OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [ada/] [s-regexp.adb] - Rev 424

Go to most recent revision | Compare with Previous | Blame | View Log

------------------------------------------------------------------------------
--                                                                          --
--                         GNAT COMPILER COMPONENTS                         --
--                                                                          --
--                        S Y S T E M . R E G E X P                         --
--                                                                          --
--                                 B o d y                                  --
--                                                                          --
--                     Copyright (C) 1999-2009, AdaCore                     --
--                                                                          --
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
-- terms of the  GNU General Public License as published  by the Free Soft- --
-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
-- for  more details.  You should have  received  a copy of the GNU General --
-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
-- to  the  Free Software Foundation,  51  Franklin  Street,  Fifth  Floor, --
-- Boston, MA 02110-1301, USA.                                              --
--                                                                          --
-- As a special exception,  if other files  instantiate  generics from this --
-- unit, or you link  this unit with other files  to produce an executable, --
-- this  unit  does not  by itself cause  the resulting  executable  to  be --
-- covered  by the  GNU  General  Public  License.  This exception does not --
-- however invalidate  any other reasons why  the executable file  might be --
-- covered by the  GNU Public License.                                      --
--                                                                          --
-- GNAT was originally developed  by the GNAT team at  New York University. --
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
--                                                                          --
------------------------------------------------------------------------------
 
with Ada.Unchecked_Deallocation;
 
with System.Case_Util;
 
package body System.Regexp is
 
   Open_Paren    : constant Character := '(';
   Close_Paren   : constant Character := ')';
   Open_Bracket  : constant Character := '[';
   Close_Bracket : constant Character := ']';
 
   type State_Index is new Natural;
   type Column_Index is new Natural;
 
   type Regexp_Array is array
     (State_Index range <>, Column_Index range <>) of State_Index;
   --  First index is for the state number
   --  Second index is for the character type
   --  Contents is the new State
 
   type Regexp_Array_Access is access Regexp_Array;
   --  Use this type through the functions Set below, so that it
   --  can grow dynamically depending on the needs.
 
   type Mapping is array (Character'Range) of Column_Index;
   --  Mapping between characters and column in the Regexp_Array
 
   type Boolean_Array is array (State_Index range <>) of Boolean;
 
   type Regexp_Value
     (Alphabet_Size : Column_Index;
      Num_States    : State_Index) is
   record
      Map            : Mapping;
      States         : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
      Is_Final       : Boolean_Array (1 .. Num_States);
      Case_Sensitive : Boolean;
   end record;
   --  Deterministic finite-state machine
 
   -----------------------
   -- Local Subprograms --
   -----------------------
 
   procedure Set
     (Table  : in out Regexp_Array_Access;
      State  : State_Index;
      Column : Column_Index;
      Value  : State_Index);
   --  Sets a value in the table. If the table is too small, reallocate it
   --  dynamically so that (State, Column) is a valid index in it.
 
   function Get
     (Table  : Regexp_Array_Access;
      State  : State_Index;
      Column : Column_Index)
      return   State_Index;
   --  Returns the value in the table at (State, Column).
   --  If this index does not exist in the table, returns 0
 
   procedure Free is new Ada.Unchecked_Deallocation
     (Regexp_Array, Regexp_Array_Access);
 
   ------------
   -- Adjust --
   ------------
 
   procedure Adjust (R : in out Regexp) is
      Tmp : Regexp_Access;
 
   begin
      Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
                               Num_States    => R.R.Num_States);
      Tmp.all := R.R.all;
      R.R := Tmp;
   end Adjust;
 
   -------------
   -- Compile --
   -------------
 
   function Compile
     (Pattern        : String;
      Glob           : Boolean := False;
      Case_Sensitive : Boolean := True)
      return           Regexp
   is
      S : String := Pattern;
      --  The pattern which is really compiled (when the pattern is case
      --  insensitive, we convert this string to lower-cases
 
      Map : Mapping := (others => 0);
      --  Mapping between characters and columns in the tables
 
      Alphabet_Size : Column_Index := 0;
      --  Number of significant characters in the regular expression.
      --  This total does not include special operators, such as *, (, ...
 
      procedure Check_Well_Formed_Pattern;
      --  Check that the pattern to compile is well-formed, so that subsequent
      --  code can rely on this without performing each time the checks to
      --  avoid accessing the pattern outside its bounds. However, not all
      --  well-formedness rules are checked. In particular, rules about special
      --  characters not being treated as regular characters are not checked.
 
      procedure Create_Mapping;
      --  Creates a mapping between characters in the regexp and columns
      --  in the tables representing the regexp. Test that the regexp is
      --  well-formed Modifies Alphabet_Size and Map
 
      procedure Create_Primary_Table
        (Table       : out Regexp_Array_Access;
         Num_States  : out State_Index;
         Start_State : out State_Index;
         End_State   : out State_Index);
      --  Creates the first version of the regexp (this is a non deterministic
      --  finite state machine, which is unadapted for a fast pattern
      --  matching algorithm). We use a recursive algorithm to process the
      --  parenthesis sub-expressions.
      --
      --  Table : at the end of the procedure : Column 0 is for any character
      --  ('.') and the last columns are for no character (closure)
      --  Num_States is set to the number of states in the table
      --  Start_State is the number of the starting state in the regexp
      --  End_State is the number of the final state when the regexp matches
 
      procedure Create_Primary_Table_Glob
        (Table       : out Regexp_Array_Access;
         Num_States  : out State_Index;
         Start_State : out State_Index;
         End_State   : out State_Index);
      --  Same function as above, but it deals with the second possible
      --  grammar for 'globbing pattern', which is a kind of subset of the
      --  whole regular expression grammar.
 
      function Create_Secondary_Table
        (First_Table : Regexp_Array_Access;
         Num_States  : State_Index;
         Start_State : State_Index;
         End_State   : State_Index)
         return        Regexp;
      --  Creates the definitive table representing the regular expression
      --  This is actually a transformation of the primary table First_Table,
      --  where every state is grouped with the states in its 'no-character'
      --  columns. The transitions between the new states are then recalculated
      --  and if necessary some new states are created.
      --
      --  Note that the resulting finite-state machine is not optimized in
      --  terms of the number of states : it would be more time-consuming to
      --  add a third pass to reduce the number of states in the machine, with
      --  no speed improvement...
 
      procedure Raise_Exception (M : String; Index : Integer);
      pragma No_Return (Raise_Exception);
      --  Raise an exception, indicating an error at character Index in S
 
      -------------------------------
      -- Check_Well_Formed_Pattern --
      -------------------------------
 
      procedure Check_Well_Formed_Pattern is
         J : Integer;
 
         Past_Elmt : Boolean := False;
         --  Set to True everywhere an elmt has been parsed, if Glob=False,
         --  meaning there can be now an occurence of '*', '+' and '?'.
 
         Past_Term : Boolean := False;
         --  Set to True everywhere a term has been parsed, if Glob=False,
         --  meaning there can be now an occurence of '|'.
 
         Parenthesis_Level : Integer := 0;
         Curly_Level       : Integer := 0;
 
         Last_Open : Integer := S'First - 1;
         --  The last occurence of an opening parenthesis, if Glob=False,
         --  or the last occurence of an opening curly brace, if Glob=True.
 
         procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
         --  If no more characters are raised, call Raise_Exception
 
         --------------------------------------
         -- Raise_Exception_If_No_More_Chars --
         --------------------------------------
 
         procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is
         begin
            if J + K > S'Last then
               Raise_Exception ("Ill-formed pattern while parsing", J);
            end if;
         end Raise_Exception_If_No_More_Chars;
 
      --  Start of processing for Check_Well_Formed_Pattern
 
      begin
         J := S'First;
         while J <= S'Last loop
            case S (J) is
               when Open_Bracket =>
                  J := J + 1;
                  Raise_Exception_If_No_More_Chars;
 
                  if not Glob then
                     if S (J) = '^' then
                        J := J + 1;
                        Raise_Exception_If_No_More_Chars;
                     end if;
                  end if;
 
                  --  The first character never has a special meaning
 
                  if S (J) = ']' or else S (J) = '-' then
                     J := J + 1;
                     Raise_Exception_If_No_More_Chars;
                  end if;
 
                  --  The set of characters cannot be empty
 
                  if S (J) = ']' then
                     Raise_Exception
                       ("Set of characters cannot be empty in regular "
                          & "expression", J);
                  end if;
 
                  declare
                     Possible_Range_Start : Boolean := True;
                     --  Set True everywhere a range character '-' can occur
 
                  begin
                     loop
                        exit when S (J) = Close_Bracket;
 
                        --  The current character should be followed by a
                        --  closing bracket.
 
                        Raise_Exception_If_No_More_Chars (1);
 
                        if S (J) = '-'
                          and then S (J + 1) /= Close_Bracket
                        then
                           if not Possible_Range_Start then
                              Raise_Exception
                                ("No mix of ranges is allowed in "
                                   & "regular expression", J);
                           end if;
 
                           J := J + 1;
                           Raise_Exception_If_No_More_Chars;
 
                           --  Range cannot be followed by '-' character,
                           --  except as last character in the set.
 
                           Possible_Range_Start := False;
 
                        else
                           Possible_Range_Start := True;
                        end if;
 
                        if S (J) = '\' then
                           J := J + 1;
                           Raise_Exception_If_No_More_Chars;
                        end if;
 
                        J := J + 1;
                     end loop;
                  end;
 
                  --  A closing bracket can end an elmt or term
 
                  Past_Elmt := True;
                  Past_Term := True;
 
               when Close_Bracket =>
 
                  --  A close bracket must follow a open_bracket, and cannot be
                  --  found alone on the line.
 
                  Raise_Exception
                    ("Incorrect character ']' in regular expression", J);
 
               when '\' =>
                  if J < S'Last then
                     J := J + 1;
 
                     --  Any character can be an elmt or a term
 
                     Past_Elmt := True;
                     Past_Term := True;
 
                  else
                     --  \ not allowed at the end of the regexp
 
                     Raise_Exception
                       ("Incorrect character '\' in regular expression", J);
                  end if;
 
               when Open_Paren =>
                  if not Glob then
                     Parenthesis_Level := Parenthesis_Level + 1;
                     Last_Open := J;
 
                     --  An open parenthesis does not end an elmt or term
 
                     Past_Elmt := False;
                     Past_Term := False;
                  end if;
 
               when Close_Paren =>
                  if not Glob then
                     Parenthesis_Level := Parenthesis_Level - 1;
 
                     if Parenthesis_Level < 0 then
                        Raise_Exception
                          ("')' is not associated with '(' in regular "
                           & "expression", J);
                     end if;
 
                     if J = Last_Open + 1 then
                        Raise_Exception
                          ("Empty parentheses not allowed in regular "
                           & "expression", J);
                     end if;
 
                     if not Past_Term then
                        Raise_Exception
                          ("Closing parenthesis not allowed here in regular "
                             & "expression", J);
                     end if;
 
                     --  A closing parenthesis can end an elmt or term
 
                     Past_Elmt := True;
                     Past_Term := True;
                  end if;
 
               when '{' =>
                  if Glob then
                     Curly_Level := Curly_Level + 1;
                     Last_Open := J;
 
                  else
                     --  Any character can be an elmt or a term
 
                     Past_Elmt := True;
                     Past_Term := True;
                  end if;
 
                  --  No need to check for ',' as the code always accepts them
 
               when '}' =>
                  if Glob then
                     Curly_Level := Curly_Level - 1;
 
                     if Curly_Level < 0 then
                        Raise_Exception
                          ("'}' is not associated with '{' in regular "
                           & "expression", J);
                     end if;
 
                     if J = Last_Open + 1 then
                        Raise_Exception
                          ("Empty curly braces not allowed in regular "
                           & "expression", J);
                     end if;
 
                  else
                     --  Any character can be an elmt or a term
 
                     Past_Elmt := True;
                     Past_Term := True;
                  end if;
 
               when '*' | '?' | '+' =>
                  if not Glob then
 
                     --  These operators must apply to an elmt sub-expression,
                     --  and cannot be found if one has not just been parsed.
 
                     if not Past_Elmt then
                        Raise_Exception
                          ("'*', '+' and '?' operators must be "
                           & "applied to an element in regular expression", J);
                     end if;
 
                     Past_Elmt := False;
                     Past_Term := True;
                  end if;
 
               when '|' =>
                  if not Glob then
 
                     --  This operator must apply to a term sub-expression,
                     --  and cannot be found if one has not just been parsed.
 
                     if not Past_Term then
                        Raise_Exception
                          ("'|' operator must be "
                           & "applied to a term in regular expression", J);
                     end if;
 
                     Past_Elmt := False;
                     Past_Term := False;
                  end if;
 
               when others =>
                  if not Glob then
 
                     --  Any character can be an elmt or a term
 
                     Past_Elmt := True;
                     Past_Term := True;
                  end if;
            end case;
 
            J := J + 1;
         end loop;
 
         --  A closing parenthesis must follow an open parenthesis
 
         if Parenthesis_Level /= 0 then
            Raise_Exception
              ("'(' must always be associated with a ')'", J);
         end if;
 
         --  A closing curly brace must follow an open curly brace
 
         if Curly_Level /= 0 then
            Raise_Exception
              ("'{' must always be associated with a '}'", J);
         end if;
      end Check_Well_Formed_Pattern;
 
      --------------------
      -- Create_Mapping --
      --------------------
 
      procedure Create_Mapping is
 
         procedure Add_In_Map (C : Character);
         --  Add a character in the mapping, if it is not already defined
 
         ----------------
         -- Add_In_Map --
         ----------------
 
         procedure Add_In_Map (C : Character) is
         begin
            if Map (C) = 0 then
               Alphabet_Size := Alphabet_Size + 1;
               Map (C) := Alphabet_Size;
            end if;
         end Add_In_Map;
 
         J                 : Integer := S'First;
         Parenthesis_Level : Integer := 0;
         Curly_Level       : Integer := 0;
         Last_Open         : Integer := S'First - 1;
 
      --  Start of processing for Create_Mapping
 
      begin
         while J <= S'Last loop
            case S (J) is
               when Open_Bracket =>
                  J := J + 1;
 
                  if S (J) = '^' then
                     J := J + 1;
                  end if;
 
                  if S (J) = ']' or else S (J) = '-' then
                     J := J + 1;
                  end if;
 
                  --  The first character never has a special meaning
 
                  loop
                     if J > S'Last then
                        Raise_Exception
                          ("Ran out of characters while parsing ", J);
                     end if;
 
                     exit when S (J) = Close_Bracket;
 
                     if S (J) = '-'
                       and then S (J + 1) /= Close_Bracket
                     then
                        declare
                           Start : constant Integer := J - 1;
 
                        begin
                           J := J + 1;
 
                           if S (J) = '\' then
                              J := J + 1;
                           end if;
 
                           for Char in S (Start) .. S (J) loop
                              Add_In_Map (Char);
                           end loop;
                        end;
                     else
                        if S (J) = '\' then
                           J := J + 1;
                        end if;
 
                        Add_In_Map (S (J));
                     end if;
 
                     J := J + 1;
                  end loop;
 
                  --  A close bracket must follow a open_bracket,
                  --  and cannot be found alone on the line
 
               when Close_Bracket =>
                  Raise_Exception
                    ("Incorrect character ']' in regular expression", J);
 
               when '\' =>
                  if J < S'Last  then
                     J := J + 1;
                     Add_In_Map (S (J));
 
                  else
                     --  \ not allowed at the end of the regexp
 
                     Raise_Exception
                       ("Incorrect character '\' in regular expression", J);
                  end if;
 
               when Open_Paren =>
                  if not Glob then
                     Parenthesis_Level := Parenthesis_Level + 1;
                     Last_Open := J;
                  else
                     Add_In_Map (Open_Paren);
                  end if;
 
               when Close_Paren =>
                  if not Glob then
                     Parenthesis_Level := Parenthesis_Level - 1;
 
                     if Parenthesis_Level < 0 then
                        Raise_Exception
                          ("')' is not associated with '(' in regular "
                           & "expression", J);
                     end if;
 
                     if J = Last_Open + 1 then
                        Raise_Exception
                          ("Empty parenthesis not allowed in regular "
                           & "expression", J);
                     end if;
 
                  else
                     Add_In_Map (Close_Paren);
                  end if;
 
               when '.' =>
                  if Glob then
                     Add_In_Map ('.');
                  end if;
 
               when '{' =>
                  if not Glob then
                     Add_In_Map (S (J));
                  else
                     Curly_Level := Curly_Level + 1;
                  end if;
 
               when '}' =>
                  if not Glob then
                     Add_In_Map (S (J));
                  else
                     Curly_Level := Curly_Level - 1;
                  end if;
 
               when '*' | '?' =>
                  if not Glob then
                     if J = S'First then
                        Raise_Exception
                          ("'*', '+', '?' and '|' operators cannot be in "
                           & "first position in regular expression", J);
                     end if;
                  end if;
 
               when '|' | '+' =>
                  if not Glob then
                     if J = S'First then
 
                        --  These operators must apply to a sub-expression,
                        --  and cannot be found at the beginning of the line
 
                        Raise_Exception
                          ("'*', '+', '?' and '|' operators cannot be in "
                           & "first position in regular expression", J);
                     end if;
 
                  else
                     Add_In_Map (S (J));
                  end if;
 
               when others =>
                  Add_In_Map (S (J));
            end case;
 
            J := J + 1;
         end loop;
 
         --  A closing parenthesis must follow an open parenthesis
 
         if Parenthesis_Level /= 0 then
            Raise_Exception
              ("'(' must always be associated with a ')'", J);
         end if;
 
         if Curly_Level /= 0 then
            Raise_Exception
              ("'{' must always be associated with a '}'", J);
         end if;
      end Create_Mapping;
 
      --------------------------
      -- Create_Primary_Table --
      --------------------------
 
      procedure Create_Primary_Table
        (Table       : out Regexp_Array_Access;
         Num_States  : out State_Index;
         Start_State : out State_Index;
         End_State   : out State_Index)
      is
         Empty_Char : constant Column_Index := Alphabet_Size + 1;
 
         Current_State : State_Index := 0;
         --  Index of the last created state
 
         procedure Add_Empty_Char
           (State    : State_Index;
            To_State : State_Index);
         --  Add a empty-character transition from State to To_State
 
         procedure Create_Repetition
           (Repetition : Character;
            Start_Prev : State_Index;
            End_Prev   : State_Index;
            New_Start  : out State_Index;
            New_End    : in out State_Index);
         --  Create the table in case we have a '*', '+' or '?'.
         --  Start_Prev .. End_Prev should indicate respectively the start and
         --  end index of the previous expression, to which '*', '+' or '?' is
         --  applied.
 
         procedure Create_Simple
           (Start_Index : Integer;
            End_Index   : Integer;
            Start_State : out State_Index;
            End_State   : out State_Index);
         --  Fill the table for the regexp Simple.
         --  This is the recursive procedure called to handle () expressions
         --  If End_State = 0, then the call to Create_Simple creates an
         --  independent regexp, not a concatenation
         --  Start_Index .. End_Index is the starting index in the string S.
         --
         --  Warning: it may look like we are creating too many empty-string
         --  transitions, but they are needed to get the correct regexp.
         --  The table is filled as follow ( s means start-state, e means
         --  end-state) :
         --
         --  regexp   state_num | a b * empty_string
         --  -------  ------------------------------
         --    a          1 (s) | 2 - - -
         --               2 (e) | - - - -
         --
         --    ab         1 (s) | 2 - - -
         --               2     | - - - 3
         --               3     | - 4 - -
         --               4 (e) | - - - -
         --
         --    a|b        1     | 2 - - -
         --               2     | - - - 6
         --               3     | - 4 - -
         --               4     | - - - 6
         --               5 (s) | - - - 1,3
         --               6 (e) | - - - -
         --
         --    a*         1     | 2 - - -
         --               2     | - - - 4
         --               3 (s) | - - - 1,4
         --               4 (e) | - - - 3
         --
         --    (a)        1 (s) | 2 - - -
         --               2 (e) | - - - -
         --
         --    a+         1     | 2 - - -
         --               2     | - - - 4
         --               3 (s) | - - - 1
         --               4 (e) | - - - 3
         --
         --    a?         1     | 2 - - -
         --               2     | - - - 4
         --               3 (s) | - - - 1,4
         --               4 (e) | - - - -
         --
         --    .          1 (s) | 2 2 2 -
         --               2 (e) | - - - -
 
         function Next_Sub_Expression
           (Start_Index : Integer;
            End_Index   : Integer)
            return        Integer;
         --  Returns the index of the last character of the next sub-expression
         --  in Simple. Index cannot be greater than End_Index.
 
         --------------------
         -- Add_Empty_Char --
         --------------------
 
         procedure Add_Empty_Char
           (State    : State_Index;
            To_State : State_Index)
         is
            J : Column_Index := Empty_Char;
 
         begin
            while Get (Table, State, J) /= 0 loop
               J := J + 1;
            end loop;
 
            Set (Table, State, J, To_State);
         end Add_Empty_Char;
 
         -----------------------
         -- Create_Repetition --
         -----------------------
 
         procedure Create_Repetition
           (Repetition : Character;
            Start_Prev : State_Index;
            End_Prev   : State_Index;
            New_Start  : out State_Index;
            New_End    : in out State_Index)
         is
         begin
            New_Start := Current_State + 1;
 
            if New_End /= 0 then
               Add_Empty_Char (New_End, New_Start);
            end if;
 
            Current_State := Current_State + 2;
            New_End   := Current_State;
 
            Add_Empty_Char (End_Prev, New_End);
            Add_Empty_Char (New_Start, Start_Prev);
 
            if Repetition /= '+' then
               Add_Empty_Char (New_Start, New_End);
            end if;
 
            if Repetition /= '?' then
               Add_Empty_Char (New_End, New_Start);
            end if;
         end Create_Repetition;
 
         -------------------
         -- Create_Simple --
         -------------------
 
         procedure Create_Simple
           (Start_Index : Integer;
            End_Index   : Integer;
            Start_State : out State_Index;
            End_State   : out State_Index)
         is
            J          : Integer := Start_Index;
            Last_Start : State_Index := 0;
 
         begin
            Start_State := 0;
            End_State   := 0;
            while J <= End_Index loop
               case S (J) is
                  when Open_Paren =>
                     declare
                        J_Start    : constant Integer := J + 1;
                        Next_Start : State_Index;
                        Next_End   : State_Index;
 
                     begin
                        J := Next_Sub_Expression (J, End_Index);
                        Create_Simple (J_Start, J - 1, Next_Start, Next_End);
 
                        if J < End_Index
                          and then (S (J + 1) = '*' or else
                                    S (J + 1) = '+' or else
                                    S (J + 1) = '?')
                        then
                           J := J + 1;
                           Create_Repetition
                             (S (J),
                              Next_Start,
                              Next_End,
                              Last_Start,
                              End_State);
 
                        else
                           Last_Start := Next_Start;
 
                           if End_State /= 0 then
                              Add_Empty_Char (End_State, Last_Start);
                           end if;
 
                           End_State := Next_End;
                        end if;
                     end;
 
                  when '|' =>
                     declare
                        Start_Prev : constant State_Index := Start_State;
                        End_Prev   : constant State_Index := End_State;
                        Start_J    : constant Integer     := J + 1;
                        Start_Next : State_Index := 0;
                        End_Next   : State_Index := 0;
 
                     begin
                        J := Next_Sub_Expression (J, End_Index);
 
                        --  Create a new state for the start of the alternative
 
                        Current_State := Current_State + 1;
                        Last_Start := Current_State;
                        Start_State := Last_Start;
 
                        --  Create the tree for the second part of alternative
 
                        Create_Simple (Start_J, J, Start_Next, End_Next);
 
                        --  Create the end state
 
                        Add_Empty_Char (Last_Start, Start_Next);
                        Add_Empty_Char (Last_Start, Start_Prev);
                        Current_State := Current_State + 1;
                        End_State := Current_State;
                        Add_Empty_Char (End_Prev, End_State);
                        Add_Empty_Char (End_Next, End_State);
                     end;
 
                  when Open_Bracket =>
                     Current_State := Current_State + 1;
 
                     declare
                        Next_State : State_Index := Current_State + 1;
 
                     begin
                        J := J + 1;
 
                        if S (J) = '^' then
                           J := J + 1;
 
                           Next_State := 0;
 
                           for Column in 0 .. Alphabet_Size loop
                              Set (Table, Current_State, Column,
                                   Value => Current_State + 1);
                           end loop;
                        end if;
 
                        --  Automatically add the first character
 
                        if S (J) = '-' or else S (J) = ']' then
                           Set (Table, Current_State, Map (S (J)),
                                Value => Next_State);
                           J := J + 1;
                        end if;
 
                        --  Loop till closing bracket found
 
                        loop
                           exit when S (J) = Close_Bracket;
 
                           if S (J) = '-'
                             and then S (J + 1) /= ']'
                           then
                              declare
                                 Start : constant Integer := J - 1;
 
                              begin
                                 J := J + 1;
 
                                 if S (J) = '\' then
                                    J := J + 1;
                                 end if;
 
                                 for Char in S (Start) .. S (J) loop
                                    Set (Table, Current_State, Map (Char),
                                         Value => Next_State);
                                 end loop;
                              end;
 
                           else
                              if S (J) = '\' then
                                 J := J + 1;
                              end if;
 
                              Set (Table, Current_State, Map (S (J)),
                                   Value => Next_State);
                           end if;
                           J := J + 1;
                        end loop;
                     end;
 
                     Current_State := Current_State + 1;
 
                     --  If the next symbol is a special symbol
 
                     if J < End_Index
                       and then (S (J + 1) = '*' or else
                                 S (J + 1) = '+' or else
                                 S (J + 1) = '?')
                     then
                        J := J + 1;
                        Create_Repetition
                          (S (J),
                           Current_State - 1,
                           Current_State,
                           Last_Start,
                           End_State);
 
                     else
                        Last_Start := Current_State - 1;
 
                        if End_State /= 0 then
                           Add_Empty_Char (End_State, Last_Start);
                        end if;
 
                        End_State := Current_State;
                     end if;
 
                  when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
                     Raise_Exception
                       ("Incorrect character in regular expression :", J);
 
                  when others =>
                     Current_State := Current_State + 1;
 
                     --  Create the state for the symbol S (J)
 
                     if S (J) = '.' then
                        for K in 0 .. Alphabet_Size loop
                           Set (Table, Current_State, K,
                                Value => Current_State + 1);
                        end loop;
 
                     else
                        if S (J) = '\' then
                           J := J + 1;
                        end if;
 
                        Set (Table, Current_State, Map (S (J)),
                             Value => Current_State + 1);
                     end if;
 
                     Current_State := Current_State + 1;
 
                     --  If the next symbol is a special symbol
 
                     if J < End_Index
                       and then (S (J + 1) = '*' or else
                                 S (J + 1) = '+' or else
                                 S (J + 1) = '?')
                     then
                        J := J + 1;
                        Create_Repetition
                          (S (J),
                           Current_State - 1,
                           Current_State,
                           Last_Start,
                           End_State);
 
                     else
                        Last_Start := Current_State - 1;
 
                        if End_State /= 0 then
                           Add_Empty_Char (End_State, Last_Start);
                        end if;
 
                        End_State := Current_State;
                     end if;
 
               end case;
 
               if Start_State = 0 then
                  Start_State := Last_Start;
               end if;
 
               J := J + 1;
            end loop;
         end Create_Simple;
 
         -------------------------
         -- Next_Sub_Expression --
         -------------------------
 
         function Next_Sub_Expression
           (Start_Index : Integer;
            End_Index   : Integer)
            return        Integer
         is
            J              : Integer := Start_Index;
            Start_On_Alter : Boolean := False;
 
         begin
            if S (J) = '|' then
               Start_On_Alter := True;
            end if;
 
            loop
               exit when J = End_Index;
               J := J + 1;
 
               case S (J) is
                  when '\' =>
                     J := J + 1;
 
                  when Open_Bracket =>
                     loop
                        J := J + 1;
                        exit when S (J) = Close_Bracket;
 
                        if S (J) = '\' then
                           J := J + 1;
                        end if;
                     end loop;
 
                  when Open_Paren =>
                     J := Next_Sub_Expression (J, End_Index);
 
                  when Close_Paren =>
                     return J;
 
                  when '|' =>
                     if Start_On_Alter then
                        return J - 1;
                     end if;
 
                  when others =>
                     null;
               end case;
            end loop;
 
            return J;
         end Next_Sub_Expression;
 
      --  Start of Create_Primary_Table
 
      begin
         Table.all := (others => (others => 0));
         Create_Simple (S'First, S'Last, Start_State, End_State);
         Num_States := Current_State;
      end Create_Primary_Table;
 
      -------------------------------
      -- Create_Primary_Table_Glob --
      -------------------------------
 
      procedure Create_Primary_Table_Glob
        (Table       : out Regexp_Array_Access;
         Num_States  : out State_Index;
         Start_State : out State_Index;
         End_State   : out State_Index)
      is
         Empty_Char : constant Column_Index := Alphabet_Size + 1;
 
         Current_State : State_Index := 0;
         --  Index of the last created state
 
         procedure Add_Empty_Char
           (State    : State_Index;
            To_State : State_Index);
         --  Add a empty-character transition from State to To_State
 
         procedure Create_Simple
           (Start_Index : Integer;
            End_Index   : Integer;
            Start_State : out State_Index;
            End_State   : out State_Index);
         --  Fill the table for the S (Start_Index .. End_Index).
         --  This is the recursive procedure called to handle () expressions
 
         --------------------
         -- Add_Empty_Char --
         --------------------
 
         procedure Add_Empty_Char
           (State    : State_Index;
            To_State : State_Index)
         is
            J : Column_Index := Empty_Char;
 
         begin
            while Get (Table, State, J) /= 0 loop
               J := J + 1;
            end loop;
 
            Set (Table, State, J,
                 Value => To_State);
         end Add_Empty_Char;
 
         -------------------
         -- Create_Simple --
         -------------------
 
         procedure Create_Simple
           (Start_Index : Integer;
            End_Index   : Integer;
            Start_State : out State_Index;
            End_State   : out State_Index)
         is
            J          : Integer := Start_Index;
            Last_Start : State_Index := 0;
 
         begin
            Start_State := 0;
            End_State   := 0;
 
            while J <= End_Index loop
               case S (J) is
 
                  when Open_Bracket =>
                     Current_State := Current_State + 1;
 
                     declare
                        Next_State : State_Index := Current_State + 1;
 
                     begin
                        J := J + 1;
 
                        if S (J) = '^' then
                           J := J + 1;
                           Next_State := 0;
 
                           for Column in 0 .. Alphabet_Size loop
                              Set (Table, Current_State, Column,
                                   Value => Current_State + 1);
                           end loop;
                        end if;
 
                        --  Automatically add the first character
 
                        if S (J) = '-' or else S (J) = ']' then
                           Set (Table, Current_State, Map (S (J)),
                                Value => Current_State);
                           J := J + 1;
                        end if;
 
                        --  Loop till closing bracket found
 
                        loop
                           exit when S (J) = Close_Bracket;
 
                           if S (J) = '-'
                             and then S (J + 1) /= ']'
                           then
                              declare
                                 Start : constant Integer := J - 1;
                              begin
                                 J := J + 1;
 
                                 if S (J) = '\' then
                                    J := J + 1;
                                 end if;
 
                                 for Char in S (Start) .. S (J) loop
                                    Set (Table, Current_State, Map (Char),
                                         Value => Next_State);
                                 end loop;
                              end;
 
                           else
                              if S (J) = '\' then
                                 J := J + 1;
                              end if;
 
                              Set (Table, Current_State, Map (S (J)),
                                   Value => Next_State);
                           end if;
                           J := J + 1;
                        end loop;
                     end;
 
                     Last_Start := Current_State;
                     Current_State := Current_State + 1;
 
                     if End_State /= 0 then
                        Add_Empty_Char (End_State, Last_Start);
                     end if;
 
                     End_State := Current_State;
 
                  when '{' =>
                     declare
                        End_Sub          : Integer;
                        Start_Regexp_Sub : State_Index;
                        End_Regexp_Sub   : State_Index;
                        Create_Start     : State_Index := 0;
 
                        Create_End : State_Index := 0;
                        --  Initialized to avoid junk warning
 
                     begin
                        while S (J) /= '}' loop
 
                           --  First step : find sub pattern
 
                           End_Sub := J + 1;
                           while S (End_Sub) /= ','
                             and then S (End_Sub) /= '}'
                           loop
                              End_Sub := End_Sub + 1;
                           end loop;
 
                           --  Second step : create a sub pattern
 
                           Create_Simple
                             (J + 1,
                              End_Sub - 1,
                              Start_Regexp_Sub,
                              End_Regexp_Sub);
 
                           J := End_Sub;
 
                           --  Third step : create an alternative
 
                           if Create_Start = 0 then
                              Current_State := Current_State + 1;
                              Create_Start := Current_State;
                              Add_Empty_Char (Create_Start, Start_Regexp_Sub);
                              Current_State := Current_State + 1;
                              Create_End := Current_State;
                              Add_Empty_Char (End_Regexp_Sub, Create_End);
 
                           else
                              Current_State := Current_State + 1;
                              Add_Empty_Char (Current_State, Create_Start);
                              Create_Start := Current_State;
                              Add_Empty_Char (Create_Start, Start_Regexp_Sub);
                              Add_Empty_Char (End_Regexp_Sub, Create_End);
                           end if;
                        end loop;
 
                        if End_State /= 0 then
                           Add_Empty_Char (End_State, Create_Start);
                        end if;
 
                        End_State := Create_End;
                        Last_Start := Create_Start;
                     end;
 
                  when '*' =>
                     Current_State := Current_State + 1;
 
                     if End_State /= 0 then
                        Add_Empty_Char (End_State, Current_State);
                     end if;
 
                     Add_Empty_Char (Current_State, Current_State + 1);
                     Add_Empty_Char (Current_State, Current_State + 3);
                     Last_Start := Current_State;
 
                     Current_State := Current_State + 1;
 
                     for K in 0 .. Alphabet_Size loop
                        Set (Table, Current_State, K,
                             Value => Current_State + 1);
                     end loop;
 
                     Current_State := Current_State + 1;
                     Add_Empty_Char (Current_State, Current_State + 1);
 
                     Current_State := Current_State + 1;
                     Add_Empty_Char (Current_State,  Last_Start);
                     End_State := Current_State;
 
                  when others =>
                     Current_State := Current_State + 1;
 
                     if S (J) = '?' then
                        for K in 0 .. Alphabet_Size loop
                           Set (Table, Current_State, K,
                                Value => Current_State + 1);
                        end loop;
 
                     else
                        if S (J) = '\' then
                           J := J + 1;
                        end if;
 
                        --  Create the state for the symbol S (J)
 
                        Set (Table, Current_State, Map (S (J)),
                             Value => Current_State + 1);
                     end if;
 
                     Last_Start := Current_State;
                     Current_State := Current_State + 1;
 
                     if End_State /= 0 then
                        Add_Empty_Char (End_State, Last_Start);
                     end if;
 
                     End_State := Current_State;
 
               end case;
 
               if Start_State = 0 then
                  Start_State := Last_Start;
               end if;
 
               J := J + 1;
            end loop;
         end Create_Simple;
 
      --  Start of processing for Create_Primary_Table_Glob
 
      begin
         Table.all := (others => (others => 0));
         Create_Simple (S'First, S'Last, Start_State, End_State);
         Num_States := Current_State;
      end Create_Primary_Table_Glob;
 
      ----------------------------
      -- Create_Secondary_Table --
      ----------------------------
 
      function Create_Secondary_Table
        (First_Table : Regexp_Array_Access;
         Num_States  : State_Index;
         Start_State : State_Index;
         End_State   : State_Index) return Regexp
      is
         pragma Warnings (Off, Num_States);
 
         Last_Index : constant State_Index := First_Table'Last (1);
         type Meta_State is array (1 .. Last_Index) of Boolean;
 
         Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
                   (others => (others => 0));
 
         Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
                         (others => (others => False));
 
         Temp_State_Not_Null : Boolean;
 
         Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
 
         Current_State       : State_Index := 1;
         Nb_State            : State_Index := 1;
 
         procedure Closure
           (State : in out Meta_State;
            Item  :        State_Index);
         --  Compute the closure of the state (that is every other state which
         --  has a empty-character transition) and add it to the state
 
         -------------
         -- Closure --
         -------------
 
         procedure Closure
           (State : in out Meta_State;
            Item  : State_Index)
         is
         begin
            if State (Item) then
               return;
            end if;
 
            State (Item) := True;
 
            for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
               if First_Table (Item, Column) = 0 then
                  return;
               end if;
 
               Closure (State, First_Table (Item, Column));
            end loop;
         end Closure;
 
      --  Start of processing for Create_Secondary_Table
 
      begin
         --  Create a new state
 
         Closure (Meta_States (Current_State), Start_State);
 
         while Current_State <= Nb_State loop
 
            --  If this new meta-state includes the primary table end state,
            --  then this meta-state will be a final state in the regexp
 
            if Meta_States (Current_State)(End_State) then
               Is_Final (Current_State) := True;
            end if;
 
            --  For every character in the regexp, calculate the possible
            --  transitions from Current_State
 
            for Column in 0 .. Alphabet_Size loop
               Meta_States (Nb_State + 1) := (others => False);
               Temp_State_Not_Null := False;
 
               for K in Meta_States (Current_State)'Range loop
                  if Meta_States (Current_State)(K)
                    and then First_Table (K, Column) /= 0
                  then
                     Closure
                       (Meta_States (Nb_State + 1), First_Table (K, Column));
                     Temp_State_Not_Null := True;
                  end if;
               end loop;
 
               --  If at least one transition existed
 
               if Temp_State_Not_Null then
 
                  --  Check if this new state corresponds to an old one
 
                  for K in 1 .. Nb_State loop
                     if Meta_States (K) = Meta_States (Nb_State + 1) then
                        Table (Current_State, Column) := K;
                        exit;
                     end if;
                  end loop;
 
                  --  If not, create a new state
 
                  if Table (Current_State, Column) = 0 then
                     Nb_State := Nb_State + 1;
                     Table (Current_State, Column) := Nb_State;
                  end if;
               end if;
            end loop;
 
            Current_State := Current_State + 1;
         end loop;
 
         --  Returns the regexp
 
         declare
            R : Regexp_Access;
 
         begin
            R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
                                   Num_States    => Nb_State);
            R.Map            := Map;
            R.Is_Final       := Is_Final (1 .. Nb_State);
            R.Case_Sensitive := Case_Sensitive;
 
            for State in 1 .. Nb_State loop
               for K in 0 .. Alphabet_Size loop
                  R.States (State, K) := Table (State, K);
               end loop;
            end loop;
 
            return (Ada.Finalization.Controlled with R => R);
         end;
      end Create_Secondary_Table;
 
      ---------------------
      -- Raise_Exception --
      ---------------------
 
      procedure Raise_Exception (M : String; Index : Integer) is
      begin
         raise Error_In_Regexp with M & " at offset" & Index'Img;
      end Raise_Exception;
 
   --  Start of processing for Compile
 
   begin
      --  Special case for the empty string: it always matches, and the
      --  following processing would fail on it.
      if S = "" then
         return (Ada.Finalization.Controlled with
                 R => new Regexp_Value'
                      (Alphabet_Size => 0,
                       Num_States    => 1,
                       Map           => (others => 0),
                       States        => (others => (others => 1)),
                       Is_Final      => (others => True),
                       Case_Sensitive => True));
      end if;
 
      if not Case_Sensitive then
         System.Case_Util.To_Lower (S);
      end if;
 
      --  Check the pattern is well-formed before any treatment
 
      Check_Well_Formed_Pattern;
 
      Create_Mapping;
 
      --  Creates the primary table
 
      declare
         Table       : Regexp_Array_Access;
         Num_States  : State_Index;
         Start_State : State_Index;
         End_State   : State_Index;
         R           : Regexp;
 
      begin
         Table := new Regexp_Array (1 .. 100,
                                    0 .. Alphabet_Size + 10);
         if not Glob then
            Create_Primary_Table (Table, Num_States, Start_State, End_State);
         else
            Create_Primary_Table_Glob
              (Table, Num_States, Start_State, End_State);
         end if;
 
         --  Creates the secondary table
 
         R := Create_Secondary_Table
           (Table, Num_States, Start_State, End_State);
         Free (Table);
         return R;
      end;
   end Compile;
 
   --------------
   -- Finalize --
   --------------
 
   procedure Finalize (R : in out Regexp) is
      procedure Free is new
        Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
 
   begin
      Free (R.R);
   end Finalize;
 
   ---------
   -- Get --
   ---------
 
   function Get
     (Table  : Regexp_Array_Access;
      State  : State_Index;
      Column : Column_Index) return State_Index
   is
   begin
      if State <= Table'Last (1)
        and then Column <= Table'Last (2)
      then
         return Table (State, Column);
      else
         return 0;
      end if;
   end Get;
 
   -----------
   -- Match --
   -----------
 
   function Match (S : String; R : Regexp) return Boolean is
      Current_State : State_Index := 1;
 
   begin
      if R.R = null then
         raise Constraint_Error;
      end if;
 
      for Char in S'Range loop
 
         if R.R.Case_Sensitive then
            Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
         else
            Current_State :=
              R.R.States (Current_State,
                          R.R.Map (System.Case_Util.To_Lower (S (Char))));
         end if;
 
         if Current_State = 0 then
            return False;
         end if;
 
      end loop;
 
      return R.R.Is_Final (Current_State);
   end Match;
 
   ---------
   -- Set --
   ---------
 
   procedure Set
     (Table  : in out Regexp_Array_Access;
      State  : State_Index;
      Column : Column_Index;
      Value  : State_Index)
   is
      New_Lines   : State_Index;
      New_Columns : Column_Index;
      New_Table   : Regexp_Array_Access;
 
   begin
      if State <= Table'Last (1)
        and then Column <= Table'Last (2)
      then
         Table (State, Column) := Value;
      else
         --  Doubles the size of the table until it is big enough that
         --  (State, Column) is a valid index
 
         New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
         New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
         New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
                                        Table'First (2) .. New_Columns);
         New_Table.all := (others => (others => 0));
 
         for J in Table'Range (1) loop
            for K in Table'Range (2) loop
               New_Table (J, K) := Table (J, K);
            end loop;
         end loop;
 
         Free (Table);
         Table := New_Table;
         Table (State, Column) := Value;
      end if;
   end Set;
 
end System.Regexp;
 

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.