| 1 |
706 |
jeremybenn |
------------------------------------------------------------------------------
|
| 2 |
|
|
-- --
|
| 3 |
|
|
-- GNAT COMPILER COMPONENTS --
|
| 4 |
|
|
-- --
|
| 5 |
|
|
-- S Y S T E M . R E G E X P --
|
| 6 |
|
|
-- --
|
| 7 |
|
|
-- S p e c --
|
| 8 |
|
|
-- --
|
| 9 |
|
|
-- Copyright (C) 1998-2010, AdaCore --
|
| 10 |
|
|
-- --
|
| 11 |
|
|
-- GNAT is free software; you can redistribute it and/or modify it under --
|
| 12 |
|
|
-- terms of the GNU General Public License as published by the Free Soft- --
|
| 13 |
|
|
-- ware Foundation; either version 3, or (at your option) any later ver- --
|
| 14 |
|
|
-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
|
| 15 |
|
|
-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
|
| 16 |
|
|
-- or FITNESS FOR A PARTICULAR PURPOSE. --
|
| 17 |
|
|
-- --
|
| 18 |
|
|
-- As a special exception under Section 7 of GPL version 3, you are granted --
|
| 19 |
|
|
-- additional permissions described in the GCC Runtime Library Exception, --
|
| 20 |
|
|
-- version 3.1, as published by the Free Software Foundation. --
|
| 21 |
|
|
-- --
|
| 22 |
|
|
-- You should have received a copy of the GNU General Public License and --
|
| 23 |
|
|
-- a copy of the GCC Runtime Library Exception along with this program; --
|
| 24 |
|
|
-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
|
| 25 |
|
|
-- <http://www.gnu.org/licenses/>. --
|
| 26 |
|
|
-- --
|
| 27 |
|
|
-- GNAT was originally developed by the GNAT team at New York University. --
|
| 28 |
|
|
-- Extensive contributions were provided by Ada Core Technologies Inc. --
|
| 29 |
|
|
-- --
|
| 30 |
|
|
------------------------------------------------------------------------------
|
| 31 |
|
|
|
| 32 |
|
|
-- Simple Regular expression matching
|
| 33 |
|
|
|
| 34 |
|
|
-- This package provides a simple implementation of a regular expression
|
| 35 |
|
|
-- pattern matching algorithm, using a subset of the syntax of regular
|
| 36 |
|
|
-- expressions copied from familiar Unix style utilities.
|
| 37 |
|
|
|
| 38 |
|
|
-- Note: this package is in the System hierarchy so that it can be directly
|
| 39 |
|
|
-- be used by other predefined packages. User access to this package is via
|
| 40 |
|
|
-- a renaming of this package in GNAT.Regexp (file g-regexp.ads).
|
| 41 |
|
|
|
| 42 |
|
|
with Ada.Finalization;
|
| 43 |
|
|
|
| 44 |
|
|
package System.Regexp is
|
| 45 |
|
|
|
| 46 |
|
|
-- The regular expression must first be compiled, using the Compile
|
| 47 |
|
|
-- function, which creates a finite state matching table, allowing
|
| 48 |
|
|
-- very fast matching once the expression has been compiled.
|
| 49 |
|
|
|
| 50 |
|
|
-- The following is the form of a regular expression, expressed in Ada
|
| 51 |
|
|
-- reference manual style BNF is as follows
|
| 52 |
|
|
|
| 53 |
|
|
-- regexp ::= term
|
| 54 |
|
|
|
| 55 |
|
|
-- regexp ::= term | term -- alternation (term or term ...)
|
| 56 |
|
|
|
| 57 |
|
|
-- term ::= item
|
| 58 |
|
|
|
| 59 |
|
|
-- term ::= item item ... -- concatenation (item then item)
|
| 60 |
|
|
|
| 61 |
|
|
-- item ::= elmt -- match elmt
|
| 62 |
|
|
-- item ::= elmt * -- zero or more elmt's
|
| 63 |
|
|
-- item ::= elmt + -- one or more elmt's
|
| 64 |
|
|
-- item ::= elmt ? -- matches elmt or nothing
|
| 65 |
|
|
|
| 66 |
|
|
-- elmt ::= nchr -- matches given character
|
| 67 |
|
|
-- elmt ::= [nchr nchr ...] -- matches any character listed
|
| 68 |
|
|
-- elmt ::= [^ nchr nchr ...] -- matches any character not listed
|
| 69 |
|
|
-- elmt ::= [char - char] -- matches chars in given range
|
| 70 |
|
|
-- elmt ::= . -- matches any single character
|
| 71 |
|
|
-- elmt ::= ( regexp ) -- parens used for grouping
|
| 72 |
|
|
|
| 73 |
|
|
-- char ::= any character, including special characters
|
| 74 |
|
|
-- nchr ::= any character except \()[].*+?^ or \char to match char
|
| 75 |
|
|
-- ... is used to indication repetition (one or more terms)
|
| 76 |
|
|
|
| 77 |
|
|
-- See also regexp(1) man page on Unix systems for further details
|
| 78 |
|
|
|
| 79 |
|
|
-- A second kind of regular expressions is provided. This one is more
|
| 80 |
|
|
-- like the wild card patterns used in file names by the Unix shell (or
|
| 81 |
|
|
-- DOS prompt) command lines. The grammar is the following:
|
| 82 |
|
|
|
| 83 |
|
|
-- regexp ::= term
|
| 84 |
|
|
|
| 85 |
|
|
-- term ::= elmt
|
| 86 |
|
|
|
| 87 |
|
|
-- term ::= elmt elmt ... -- concatenation (elmt then elmt)
|
| 88 |
|
|
-- term ::= * -- any string of 0 or more characters
|
| 89 |
|
|
-- term ::= ? -- matches any character
|
| 90 |
|
|
-- term ::= [char char ...] -- matches any character listed
|
| 91 |
|
|
-- term ::= [char - char] -- matches any character in given range
|
| 92 |
|
|
-- term ::= {elmt, elmt, ...} -- alternation (matches any of elmt)
|
| 93 |
|
|
|
| 94 |
|
|
-- Important note : This package was mainly intended to match regular
|
| 95 |
|
|
-- expressions against file names. The whole string has to match the
|
| 96 |
|
|
-- regular expression. If only a substring matches, then the function
|
| 97 |
|
|
-- Match will return False.
|
| 98 |
|
|
|
| 99 |
|
|
type Regexp is private;
|
| 100 |
|
|
-- Private type used to represent a regular expression
|
| 101 |
|
|
|
| 102 |
|
|
Error_In_Regexp : exception;
|
| 103 |
|
|
-- Exception raised when an error is found in the regular expression
|
| 104 |
|
|
|
| 105 |
|
|
function Compile
|
| 106 |
|
|
(Pattern : String;
|
| 107 |
|
|
Glob : Boolean := False;
|
| 108 |
|
|
Case_Sensitive : Boolean := True) return Regexp;
|
| 109 |
|
|
-- Compiles a regular expression S. If the syntax of the given
|
| 110 |
|
|
-- expression is invalid (does not match above grammar), Error_In_Regexp
|
| 111 |
|
|
-- is raised. If Glob is True, the pattern is considered as a 'globbing
|
| 112 |
|
|
-- pattern', that is a pattern as given by the second grammar above.
|
| 113 |
|
|
-- As a special case, if Pattern is the empty string it will always
|
| 114 |
|
|
-- match.
|
| 115 |
|
|
|
| 116 |
|
|
function Match (S : String; R : Regexp) return Boolean;
|
| 117 |
|
|
-- True if S matches R, otherwise False. Raises Constraint_Error if
|
| 118 |
|
|
-- R is an uninitialized regular expression value.
|
| 119 |
|
|
|
| 120 |
|
|
private
|
| 121 |
|
|
type Regexp_Value;
|
| 122 |
|
|
|
| 123 |
|
|
type Regexp_Access is access Regexp_Value;
|
| 124 |
|
|
|
| 125 |
|
|
type Regexp is new Ada.Finalization.Controlled with record
|
| 126 |
|
|
R : Regexp_Access := null;
|
| 127 |
|
|
end record;
|
| 128 |
|
|
|
| 129 |
|
|
pragma Finalize_Storage_Only (Regexp);
|
| 130 |
|
|
|
| 131 |
|
|
procedure Finalize (R : in out Regexp);
|
| 132 |
|
|
-- Free the memory occupied by R
|
| 133 |
|
|
|
| 134 |
|
|
procedure Adjust (R : in out Regexp);
|
| 135 |
|
|
-- Called after an assignment (do a copy of the Regexp_Access.all)
|
| 136 |
|
|
|
| 137 |
|
|
end System.Regexp;
|