OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [tags/] [gnu-src/] [gcc-4.5.1/] [gcc-4.5.1-or32-1.0rc3/] [gcc/] [ada/] [s-regexp.adb] - Blame information for rev 516

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 281 jeremybenn
------------------------------------------------------------------------------
2
--                                                                          --
3
--                         GNAT COMPILER COMPONENTS                         --
4
--                                                                          --
5
--                        S Y S T E M . R E G E X P                         --
6
--                                                                          --
7
--                                 B o d y                                  --
8
--                                                                          --
9
--                     Copyright (C) 1999-2009, AdaCore                     --
10
--                                                                          --
11
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12
-- terms of the  GNU General Public License as published  by the Free Soft- --
13
-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
14
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16
-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
17
-- for  more details.  You should have  received  a copy of the GNU General --
18
-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
19
-- to  the  Free Software Foundation,  51  Franklin  Street,  Fifth  Floor, --
20
-- Boston, MA 02110-1301, USA.                                              --
21
--                                                                          --
22
-- As a special exception,  if other files  instantiate  generics from this --
23
-- unit, or you link  this unit with other files  to produce an executable, --
24
-- this  unit  does not  by itself cause  the resulting  executable  to  be --
25
-- covered  by the  GNU  General  Public  License.  This exception does not --
26
-- however invalidate  any other reasons why  the executable file  might be --
27
-- covered by the  GNU Public License.                                      --
28
--                                                                          --
29
-- GNAT was originally developed  by the GNAT team at  New York University. --
30
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
31
--                                                                          --
32
------------------------------------------------------------------------------
33
 
34
with Ada.Unchecked_Deallocation;
35
 
36
with System.Case_Util;
37
 
38
package body System.Regexp is
39
 
40
   Open_Paren    : constant Character := '(';
41
   Close_Paren   : constant Character := ')';
42
   Open_Bracket  : constant Character := '[';
43
   Close_Bracket : constant Character := ']';
44
 
45
   type State_Index is new Natural;
46
   type Column_Index is new Natural;
47
 
48
   type Regexp_Array is array
49
     (State_Index range <>, Column_Index range <>) of State_Index;
50
   --  First index is for the state number
51
   --  Second index is for the character type
52
   --  Contents is the new State
53
 
54
   type Regexp_Array_Access is access Regexp_Array;
55
   --  Use this type through the functions Set below, so that it
56
   --  can grow dynamically depending on the needs.
57
 
58
   type Mapping is array (Character'Range) of Column_Index;
59
   --  Mapping between characters and column in the Regexp_Array
60
 
61
   type Boolean_Array is array (State_Index range <>) of Boolean;
62
 
63
   type Regexp_Value
64
     (Alphabet_Size : Column_Index;
65
      Num_States    : State_Index) is
66
   record
67
      Map            : Mapping;
68
      States         : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
69
      Is_Final       : Boolean_Array (1 .. Num_States);
70
      Case_Sensitive : Boolean;
71
   end record;
72
   --  Deterministic finite-state machine
73
 
74
   -----------------------
75
   -- Local Subprograms --
76
   -----------------------
77
 
78
   procedure Set
79
     (Table  : in out Regexp_Array_Access;
80
      State  : State_Index;
81
      Column : Column_Index;
82
      Value  : State_Index);
83
   --  Sets a value in the table. If the table is too small, reallocate it
84
   --  dynamically so that (State, Column) is a valid index in it.
85
 
86
   function Get
87
     (Table  : Regexp_Array_Access;
88
      State  : State_Index;
89
      Column : Column_Index)
90
      return   State_Index;
91
   --  Returns the value in the table at (State, Column).
92
   --  If this index does not exist in the table, returns 0
93
 
94
   procedure Free is new Ada.Unchecked_Deallocation
95
     (Regexp_Array, Regexp_Array_Access);
96
 
97
   ------------
98
   -- Adjust --
99
   ------------
100
 
101
   procedure Adjust (R : in out Regexp) is
102
      Tmp : Regexp_Access;
103
 
104
   begin
105
      Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
106
                               Num_States    => R.R.Num_States);
107
      Tmp.all := R.R.all;
108
      R.R := Tmp;
109
   end Adjust;
110
 
111
   -------------
112
   -- Compile --
113
   -------------
114
 
115
   function Compile
116
     (Pattern        : String;
117
      Glob           : Boolean := False;
118
      Case_Sensitive : Boolean := True)
119
      return           Regexp
120
   is
121
      S : String := Pattern;
122
      --  The pattern which is really compiled (when the pattern is case
123
      --  insensitive, we convert this string to lower-cases
124
 
125
      Map : Mapping := (others => 0);
126
      --  Mapping between characters and columns in the tables
127
 
128
      Alphabet_Size : Column_Index := 0;
129
      --  Number of significant characters in the regular expression.
130
      --  This total does not include special operators, such as *, (, ...
131
 
132
      procedure Check_Well_Formed_Pattern;
133
      --  Check that the pattern to compile is well-formed, so that subsequent
134
      --  code can rely on this without performing each time the checks to
135
      --  avoid accessing the pattern outside its bounds. However, not all
136
      --  well-formedness rules are checked. In particular, rules about special
137
      --  characters not being treated as regular characters are not checked.
138
 
139
      procedure Create_Mapping;
140
      --  Creates a mapping between characters in the regexp and columns
141
      --  in the tables representing the regexp. Test that the regexp is
142
      --  well-formed Modifies Alphabet_Size and Map
143
 
144
      procedure Create_Primary_Table
145
        (Table       : out Regexp_Array_Access;
146
         Num_States  : out State_Index;
147
         Start_State : out State_Index;
148
         End_State   : out State_Index);
149
      --  Creates the first version of the regexp (this is a non deterministic
150
      --  finite state machine, which is unadapted for a fast pattern
151
      --  matching algorithm). We use a recursive algorithm to process the
152
      --  parenthesis sub-expressions.
153
      --
154
      --  Table : at the end of the procedure : Column 0 is for any character
155
      --  ('.') and the last columns are for no character (closure)
156
      --  Num_States is set to the number of states in the table
157
      --  Start_State is the number of the starting state in the regexp
158
      --  End_State is the number of the final state when the regexp matches
159
 
160
      procedure Create_Primary_Table_Glob
161
        (Table       : out Regexp_Array_Access;
162
         Num_States  : out State_Index;
163
         Start_State : out State_Index;
164
         End_State   : out State_Index);
165
      --  Same function as above, but it deals with the second possible
166
      --  grammar for 'globbing pattern', which is a kind of subset of the
167
      --  whole regular expression grammar.
168
 
169
      function Create_Secondary_Table
170
        (First_Table : Regexp_Array_Access;
171
         Num_States  : State_Index;
172
         Start_State : State_Index;
173
         End_State   : State_Index)
174
         return        Regexp;
175
      --  Creates the definitive table representing the regular expression
176
      --  This is actually a transformation of the primary table First_Table,
177
      --  where every state is grouped with the states in its 'no-character'
178
      --  columns. The transitions between the new states are then recalculated
179
      --  and if necessary some new states are created.
180
      --
181
      --  Note that the resulting finite-state machine is not optimized in
182
      --  terms of the number of states : it would be more time-consuming to
183
      --  add a third pass to reduce the number of states in the machine, with
184
      --  no speed improvement...
185
 
186
      procedure Raise_Exception (M : String; Index : Integer);
187
      pragma No_Return (Raise_Exception);
188
      --  Raise an exception, indicating an error at character Index in S
189
 
190
      -------------------------------
191
      -- Check_Well_Formed_Pattern --
192
      -------------------------------
193
 
194
      procedure Check_Well_Formed_Pattern is
195
         J : Integer;
196
 
197
         Past_Elmt : Boolean := False;
198
         --  Set to True everywhere an elmt has been parsed, if Glob=False,
199
         --  meaning there can be now an occurence of '*', '+' and '?'.
200
 
201
         Past_Term : Boolean := False;
202
         --  Set to True everywhere a term has been parsed, if Glob=False,
203
         --  meaning there can be now an occurence of '|'.
204
 
205
         Parenthesis_Level : Integer := 0;
206
         Curly_Level       : Integer := 0;
207
 
208
         Last_Open : Integer := S'First - 1;
209
         --  The last occurence of an opening parenthesis, if Glob=False,
210
         --  or the last occurence of an opening curly brace, if Glob=True.
211
 
212
         procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
213
         --  If no more characters are raised, call Raise_Exception
214
 
215
         --------------------------------------
216
         -- Raise_Exception_If_No_More_Chars --
217
         --------------------------------------
218
 
219
         procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is
220
         begin
221
            if J + K > S'Last then
222
               Raise_Exception ("Ill-formed pattern while parsing", J);
223
            end if;
224
         end Raise_Exception_If_No_More_Chars;
225
 
226
      --  Start of processing for Check_Well_Formed_Pattern
227
 
228
      begin
229
         J := S'First;
230
         while J <= S'Last loop
231
            case S (J) is
232
               when Open_Bracket =>
233
                  J := J + 1;
234
                  Raise_Exception_If_No_More_Chars;
235
 
236
                  if not Glob then
237
                     if S (J) = '^' then
238
                        J := J + 1;
239
                        Raise_Exception_If_No_More_Chars;
240
                     end if;
241
                  end if;
242
 
243
                  --  The first character never has a special meaning
244
 
245
                  if S (J) = ']' or else S (J) = '-' then
246
                     J := J + 1;
247
                     Raise_Exception_If_No_More_Chars;
248
                  end if;
249
 
250
                  --  The set of characters cannot be empty
251
 
252
                  if S (J) = ']' then
253
                     Raise_Exception
254
                       ("Set of characters cannot be empty in regular "
255
                          & "expression", J);
256
                  end if;
257
 
258
                  declare
259
                     Possible_Range_Start : Boolean := True;
260
                     --  Set True everywhere a range character '-' can occur
261
 
262
                  begin
263
                     loop
264
                        exit when S (J) = Close_Bracket;
265
 
266
                        --  The current character should be followed by a
267
                        --  closing bracket.
268
 
269
                        Raise_Exception_If_No_More_Chars (1);
270
 
271
                        if S (J) = '-'
272
                          and then S (J + 1) /= Close_Bracket
273
                        then
274
                           if not Possible_Range_Start then
275
                              Raise_Exception
276
                                ("No mix of ranges is allowed in "
277
                                   & "regular expression", J);
278
                           end if;
279
 
280
                           J := J + 1;
281
                           Raise_Exception_If_No_More_Chars;
282
 
283
                           --  Range cannot be followed by '-' character,
284
                           --  except as last character in the set.
285
 
286
                           Possible_Range_Start := False;
287
 
288
                        else
289
                           Possible_Range_Start := True;
290
                        end if;
291
 
292
                        if S (J) = '\' then
293
                           J := J + 1;
294
                           Raise_Exception_If_No_More_Chars;
295
                        end if;
296
 
297
                        J := J + 1;
298
                     end loop;
299
                  end;
300
 
301
                  --  A closing bracket can end an elmt or term
302
 
303
                  Past_Elmt := True;
304
                  Past_Term := True;
305
 
306
               when Close_Bracket =>
307
 
308
                  --  A close bracket must follow a open_bracket, and cannot be
309
                  --  found alone on the line.
310
 
311
                  Raise_Exception
312
                    ("Incorrect character ']' in regular expression", J);
313
 
314
               when '\' =>
315
                  if J < S'Last then
316
                     J := J + 1;
317
 
318
                     --  Any character can be an elmt or a term
319
 
320
                     Past_Elmt := True;
321
                     Past_Term := True;
322
 
323
                  else
324
                     --  \ not allowed at the end of the regexp
325
 
326
                     Raise_Exception
327
                       ("Incorrect character '\' in regular expression", J);
328
                  end if;
329
 
330
               when Open_Paren =>
331
                  if not Glob then
332
                     Parenthesis_Level := Parenthesis_Level + 1;
333
                     Last_Open := J;
334
 
335
                     --  An open parenthesis does not end an elmt or term
336
 
337
                     Past_Elmt := False;
338
                     Past_Term := False;
339
                  end if;
340
 
341
               when Close_Paren =>
342
                  if not Glob then
343
                     Parenthesis_Level := Parenthesis_Level - 1;
344
 
345
                     if Parenthesis_Level < 0 then
346
                        Raise_Exception
347
                          ("')' is not associated with '(' in regular "
348
                           & "expression", J);
349
                     end if;
350
 
351
                     if J = Last_Open + 1 then
352
                        Raise_Exception
353
                          ("Empty parentheses not allowed in regular "
354
                           & "expression", J);
355
                     end if;
356
 
357
                     if not Past_Term then
358
                        Raise_Exception
359
                          ("Closing parenthesis not allowed here in regular "
360
                             & "expression", J);
361
                     end if;
362
 
363
                     --  A closing parenthesis can end an elmt or term
364
 
365
                     Past_Elmt := True;
366
                     Past_Term := True;
367
                  end if;
368
 
369
               when '{' =>
370
                  if Glob then
371
                     Curly_Level := Curly_Level + 1;
372
                     Last_Open := J;
373
 
374
                  else
375
                     --  Any character can be an elmt or a term
376
 
377
                     Past_Elmt := True;
378
                     Past_Term := True;
379
                  end if;
380
 
381
                  --  No need to check for ',' as the code always accepts them
382
 
383
               when '}' =>
384
                  if Glob then
385
                     Curly_Level := Curly_Level - 1;
386
 
387
                     if Curly_Level < 0 then
388
                        Raise_Exception
389
                          ("'}' is not associated with '{' in regular "
390
                           & "expression", J);
391
                     end if;
392
 
393
                     if J = Last_Open + 1 then
394
                        Raise_Exception
395
                          ("Empty curly braces not allowed in regular "
396
                           & "expression", J);
397
                     end if;
398
 
399
                  else
400
                     --  Any character can be an elmt or a term
401
 
402
                     Past_Elmt := True;
403
                     Past_Term := True;
404
                  end if;
405
 
406
               when '*' | '?' | '+' =>
407
                  if not Glob then
408
 
409
                     --  These operators must apply to an elmt sub-expression,
410
                     --  and cannot be found if one has not just been parsed.
411
 
412
                     if not Past_Elmt then
413
                        Raise_Exception
414
                          ("'*', '+' and '?' operators must be "
415
                           & "applied to an element in regular expression", J);
416
                     end if;
417
 
418
                     Past_Elmt := False;
419
                     Past_Term := True;
420
                  end if;
421
 
422
               when '|' =>
423
                  if not Glob then
424
 
425
                     --  This operator must apply to a term sub-expression,
426
                     --  and cannot be found if one has not just been parsed.
427
 
428
                     if not Past_Term then
429
                        Raise_Exception
430
                          ("'|' operator must be "
431
                           & "applied to a term in regular expression", J);
432
                     end if;
433
 
434
                     Past_Elmt := False;
435
                     Past_Term := False;
436
                  end if;
437
 
438
               when others =>
439
                  if not Glob then
440
 
441
                     --  Any character can be an elmt or a term
442
 
443
                     Past_Elmt := True;
444
                     Past_Term := True;
445
                  end if;
446
            end case;
447
 
448
            J := J + 1;
449
         end loop;
450
 
451
         --  A closing parenthesis must follow an open parenthesis
452
 
453
         if Parenthesis_Level /= 0 then
454
            Raise_Exception
455
              ("'(' must always be associated with a ')'", J);
456
         end if;
457
 
458
         --  A closing curly brace must follow an open curly brace
459
 
460
         if Curly_Level /= 0 then
461
            Raise_Exception
462
              ("'{' must always be associated with a '}'", J);
463
         end if;
464
      end Check_Well_Formed_Pattern;
465
 
466
      --------------------
467
      -- Create_Mapping --
468
      --------------------
469
 
470
      procedure Create_Mapping is
471
 
472
         procedure Add_In_Map (C : Character);
473
         --  Add a character in the mapping, if it is not already defined
474
 
475
         ----------------
476
         -- Add_In_Map --
477
         ----------------
478
 
479
         procedure Add_In_Map (C : Character) is
480
         begin
481
            if Map (C) = 0 then
482
               Alphabet_Size := Alphabet_Size + 1;
483
               Map (C) := Alphabet_Size;
484
            end if;
485
         end Add_In_Map;
486
 
487
         J                 : Integer := S'First;
488
         Parenthesis_Level : Integer := 0;
489
         Curly_Level       : Integer := 0;
490
         Last_Open         : Integer := S'First - 1;
491
 
492
      --  Start of processing for Create_Mapping
493
 
494
      begin
495
         while J <= S'Last loop
496
            case S (J) is
497
               when Open_Bracket =>
498
                  J := J + 1;
499
 
500
                  if S (J) = '^' then
501
                     J := J + 1;
502
                  end if;
503
 
504
                  if S (J) = ']' or else S (J) = '-' then
505
                     J := J + 1;
506
                  end if;
507
 
508
                  --  The first character never has a special meaning
509
 
510
                  loop
511
                     if J > S'Last then
512
                        Raise_Exception
513
                          ("Ran out of characters while parsing ", J);
514
                     end if;
515
 
516
                     exit when S (J) = Close_Bracket;
517
 
518
                     if S (J) = '-'
519
                       and then S (J + 1) /= Close_Bracket
520
                     then
521
                        declare
522
                           Start : constant Integer := J - 1;
523
 
524
                        begin
525
                           J := J + 1;
526
 
527
                           if S (J) = '\' then
528
                              J := J + 1;
529
                           end if;
530
 
531
                           for Char in S (Start) .. S (J) loop
532
                              Add_In_Map (Char);
533
                           end loop;
534
                        end;
535
                     else
536
                        if S (J) = '\' then
537
                           J := J + 1;
538
                        end if;
539
 
540
                        Add_In_Map (S (J));
541
                     end if;
542
 
543
                     J := J + 1;
544
                  end loop;
545
 
546
                  --  A close bracket must follow a open_bracket,
547
                  --  and cannot be found alone on the line
548
 
549
               when Close_Bracket =>
550
                  Raise_Exception
551
                    ("Incorrect character ']' in regular expression", J);
552
 
553
               when '\' =>
554
                  if J < S'Last  then
555
                     J := J + 1;
556
                     Add_In_Map (S (J));
557
 
558
                  else
559
                     --  \ not allowed at the end of the regexp
560
 
561
                     Raise_Exception
562
                       ("Incorrect character '\' in regular expression", J);
563
                  end if;
564
 
565
               when Open_Paren =>
566
                  if not Glob then
567
                     Parenthesis_Level := Parenthesis_Level + 1;
568
                     Last_Open := J;
569
                  else
570
                     Add_In_Map (Open_Paren);
571
                  end if;
572
 
573
               when Close_Paren =>
574
                  if not Glob then
575
                     Parenthesis_Level := Parenthesis_Level - 1;
576
 
577
                     if Parenthesis_Level < 0 then
578
                        Raise_Exception
579
                          ("')' is not associated with '(' in regular "
580
                           & "expression", J);
581
                     end if;
582
 
583
                     if J = Last_Open + 1 then
584
                        Raise_Exception
585
                          ("Empty parenthesis not allowed in regular "
586
                           & "expression", J);
587
                     end if;
588
 
589
                  else
590
                     Add_In_Map (Close_Paren);
591
                  end if;
592
 
593
               when '.' =>
594
                  if Glob then
595
                     Add_In_Map ('.');
596
                  end if;
597
 
598
               when '{' =>
599
                  if not Glob then
600
                     Add_In_Map (S (J));
601
                  else
602
                     Curly_Level := Curly_Level + 1;
603
                  end if;
604
 
605
               when '}' =>
606
                  if not Glob then
607
                     Add_In_Map (S (J));
608
                  else
609
                     Curly_Level := Curly_Level - 1;
610
                  end if;
611
 
612
               when '*' | '?' =>
613
                  if not Glob then
614
                     if J = S'First then
615
                        Raise_Exception
616
                          ("'*', '+', '?' and '|' operators cannot be in "
617
                           & "first position in regular expression", J);
618
                     end if;
619
                  end if;
620
 
621
               when '|' | '+' =>
622
                  if not Glob then
623
                     if J = S'First then
624
 
625
                        --  These operators must apply to a sub-expression,
626
                        --  and cannot be found at the beginning of the line
627
 
628
                        Raise_Exception
629
                          ("'*', '+', '?' and '|' operators cannot be in "
630
                           & "first position in regular expression", J);
631
                     end if;
632
 
633
                  else
634
                     Add_In_Map (S (J));
635
                  end if;
636
 
637
               when others =>
638
                  Add_In_Map (S (J));
639
            end case;
640
 
641
            J := J + 1;
642
         end loop;
643
 
644
         --  A closing parenthesis must follow an open parenthesis
645
 
646
         if Parenthesis_Level /= 0 then
647
            Raise_Exception
648
              ("'(' must always be associated with a ')'", J);
649
         end if;
650
 
651
         if Curly_Level /= 0 then
652
            Raise_Exception
653
              ("'{' must always be associated with a '}'", J);
654
         end if;
655
      end Create_Mapping;
656
 
657
      --------------------------
658
      -- Create_Primary_Table --
659
      --------------------------
660
 
661
      procedure Create_Primary_Table
662
        (Table       : out Regexp_Array_Access;
663
         Num_States  : out State_Index;
664
         Start_State : out State_Index;
665
         End_State   : out State_Index)
666
      is
667
         Empty_Char : constant Column_Index := Alphabet_Size + 1;
668
 
669
         Current_State : State_Index := 0;
670
         --  Index of the last created state
671
 
672
         procedure Add_Empty_Char
673
           (State    : State_Index;
674
            To_State : State_Index);
675
         --  Add a empty-character transition from State to To_State
676
 
677
         procedure Create_Repetition
678
           (Repetition : Character;
679
            Start_Prev : State_Index;
680
            End_Prev   : State_Index;
681
            New_Start  : out State_Index;
682
            New_End    : in out State_Index);
683
         --  Create the table in case we have a '*', '+' or '?'.
684
         --  Start_Prev .. End_Prev should indicate respectively the start and
685
         --  end index of the previous expression, to which '*', '+' or '?' is
686
         --  applied.
687
 
688
         procedure Create_Simple
689
           (Start_Index : Integer;
690
            End_Index   : Integer;
691
            Start_State : out State_Index;
692
            End_State   : out State_Index);
693
         --  Fill the table for the regexp Simple.
694
         --  This is the recursive procedure called to handle () expressions
695
         --  If End_State = 0, then the call to Create_Simple creates an
696
         --  independent regexp, not a concatenation
697
         --  Start_Index .. End_Index is the starting index in the string S.
698
         --
699
         --  Warning: it may look like we are creating too many empty-string
700
         --  transitions, but they are needed to get the correct regexp.
701
         --  The table is filled as follow ( s means start-state, e means
702
         --  end-state) :
703
         --
704
         --  regexp   state_num | a b * empty_string
705
         --  -------  ------------------------------
706
         --    a          1 (s) | 2 - - -
707
         --               2 (e) | - - - -
708
         --
709
         --    ab         1 (s) | 2 - - -
710
         --               2     | - - - 3
711
         --               3     | - 4 - -
712
         --               4 (e) | - - - -
713
         --
714
         --    a|b        1     | 2 - - -
715
         --               2     | - - - 6
716
         --               3     | - 4 - -
717
         --               4     | - - - 6
718
         --               5 (s) | - - - 1,3
719
         --               6 (e) | - - - -
720
         --
721
         --    a*         1     | 2 - - -
722
         --               2     | - - - 4
723
         --               3 (s) | - - - 1,4
724
         --               4 (e) | - - - 3
725
         --
726
         --    (a)        1 (s) | 2 - - -
727
         --               2 (e) | - - - -
728
         --
729
         --    a+         1     | 2 - - -
730
         --               2     | - - - 4
731
         --               3 (s) | - - - 1
732
         --               4 (e) | - - - 3
733
         --
734
         --    a?         1     | 2 - - -
735
         --               2     | - - - 4
736
         --               3 (s) | - - - 1,4
737
         --               4 (e) | - - - -
738
         --
739
         --    .          1 (s) | 2 2 2 -
740
         --               2 (e) | - - - -
741
 
742
         function Next_Sub_Expression
743
           (Start_Index : Integer;
744
            End_Index   : Integer)
745
            return        Integer;
746
         --  Returns the index of the last character of the next sub-expression
747
         --  in Simple. Index cannot be greater than End_Index.
748
 
749
         --------------------
750
         -- Add_Empty_Char --
751
         --------------------
752
 
753
         procedure Add_Empty_Char
754
           (State    : State_Index;
755
            To_State : State_Index)
756
         is
757
            J : Column_Index := Empty_Char;
758
 
759
         begin
760
            while Get (Table, State, J) /= 0 loop
761
               J := J + 1;
762
            end loop;
763
 
764
            Set (Table, State, J, To_State);
765
         end Add_Empty_Char;
766
 
767
         -----------------------
768
         -- Create_Repetition --
769
         -----------------------
770
 
771
         procedure Create_Repetition
772
           (Repetition : Character;
773
            Start_Prev : State_Index;
774
            End_Prev   : State_Index;
775
            New_Start  : out State_Index;
776
            New_End    : in out State_Index)
777
         is
778
         begin
779
            New_Start := Current_State + 1;
780
 
781
            if New_End /= 0 then
782
               Add_Empty_Char (New_End, New_Start);
783
            end if;
784
 
785
            Current_State := Current_State + 2;
786
            New_End   := Current_State;
787
 
788
            Add_Empty_Char (End_Prev, New_End);
789
            Add_Empty_Char (New_Start, Start_Prev);
790
 
791
            if Repetition /= '+' then
792
               Add_Empty_Char (New_Start, New_End);
793
            end if;
794
 
795
            if Repetition /= '?' then
796
               Add_Empty_Char (New_End, New_Start);
797
            end if;
798
         end Create_Repetition;
799
 
800
         -------------------
801
         -- Create_Simple --
802
         -------------------
803
 
804
         procedure Create_Simple
805
           (Start_Index : Integer;
806
            End_Index   : Integer;
807
            Start_State : out State_Index;
808
            End_State   : out State_Index)
809
         is
810
            J          : Integer := Start_Index;
811
            Last_Start : State_Index := 0;
812
 
813
         begin
814
            Start_State := 0;
815
            End_State   := 0;
816
            while J <= End_Index loop
817
               case S (J) is
818
                  when Open_Paren =>
819
                     declare
820
                        J_Start    : constant Integer := J + 1;
821
                        Next_Start : State_Index;
822
                        Next_End   : State_Index;
823
 
824
                     begin
825
                        J := Next_Sub_Expression (J, End_Index);
826
                        Create_Simple (J_Start, J - 1, Next_Start, Next_End);
827
 
828
                        if J < End_Index
829
                          and then (S (J + 1) = '*' or else
830
                                    S (J + 1) = '+' or else
831
                                    S (J + 1) = '?')
832
                        then
833
                           J := J + 1;
834
                           Create_Repetition
835
                             (S (J),
836
                              Next_Start,
837
                              Next_End,
838
                              Last_Start,
839
                              End_State);
840
 
841
                        else
842
                           Last_Start := Next_Start;
843
 
844
                           if End_State /= 0 then
845
                              Add_Empty_Char (End_State, Last_Start);
846
                           end if;
847
 
848
                           End_State := Next_End;
849
                        end if;
850
                     end;
851
 
852
                  when '|' =>
853
                     declare
854
                        Start_Prev : constant State_Index := Start_State;
855
                        End_Prev   : constant State_Index := End_State;
856
                        Start_J    : constant Integer     := J + 1;
857
                        Start_Next : State_Index := 0;
858
                        End_Next   : State_Index := 0;
859
 
860
                     begin
861
                        J := Next_Sub_Expression (J, End_Index);
862
 
863
                        --  Create a new state for the start of the alternative
864
 
865
                        Current_State := Current_State + 1;
866
                        Last_Start := Current_State;
867
                        Start_State := Last_Start;
868
 
869
                        --  Create the tree for the second part of alternative
870
 
871
                        Create_Simple (Start_J, J, Start_Next, End_Next);
872
 
873
                        --  Create the end state
874
 
875
                        Add_Empty_Char (Last_Start, Start_Next);
876
                        Add_Empty_Char (Last_Start, Start_Prev);
877
                        Current_State := Current_State + 1;
878
                        End_State := Current_State;
879
                        Add_Empty_Char (End_Prev, End_State);
880
                        Add_Empty_Char (End_Next, End_State);
881
                     end;
882
 
883
                  when Open_Bracket =>
884
                     Current_State := Current_State + 1;
885
 
886
                     declare
887
                        Next_State : State_Index := Current_State + 1;
888
 
889
                     begin
890
                        J := J + 1;
891
 
892
                        if S (J) = '^' then
893
                           J := J + 1;
894
 
895
                           Next_State := 0;
896
 
897
                           for Column in 0 .. Alphabet_Size loop
898
                              Set (Table, Current_State, Column,
899
                                   Value => Current_State + 1);
900
                           end loop;
901
                        end if;
902
 
903
                        --  Automatically add the first character
904
 
905
                        if S (J) = '-' or else S (J) = ']' then
906
                           Set (Table, Current_State, Map (S (J)),
907
                                Value => Next_State);
908
                           J := J + 1;
909
                        end if;
910
 
911
                        --  Loop till closing bracket found
912
 
913
                        loop
914
                           exit when S (J) = Close_Bracket;
915
 
916
                           if S (J) = '-'
917
                             and then S (J + 1) /= ']'
918
                           then
919
                              declare
920
                                 Start : constant Integer := J - 1;
921
 
922
                              begin
923
                                 J := J + 1;
924
 
925
                                 if S (J) = '\' then
926
                                    J := J + 1;
927
                                 end if;
928
 
929
                                 for Char in S (Start) .. S (J) loop
930
                                    Set (Table, Current_State, Map (Char),
931
                                         Value => Next_State);
932
                                 end loop;
933
                              end;
934
 
935
                           else
936
                              if S (J) = '\' then
937
                                 J := J + 1;
938
                              end if;
939
 
940
                              Set (Table, Current_State, Map (S (J)),
941
                                   Value => Next_State);
942
                           end if;
943
                           J := J + 1;
944
                        end loop;
945
                     end;
946
 
947
                     Current_State := Current_State + 1;
948
 
949
                     --  If the next symbol is a special symbol
950
 
951
                     if J < End_Index
952
                       and then (S (J + 1) = '*' or else
953
                                 S (J + 1) = '+' or else
954
                                 S (J + 1) = '?')
955
                     then
956
                        J := J + 1;
957
                        Create_Repetition
958
                          (S (J),
959
                           Current_State - 1,
960
                           Current_State,
961
                           Last_Start,
962
                           End_State);
963
 
964
                     else
965
                        Last_Start := Current_State - 1;
966
 
967
                        if End_State /= 0 then
968
                           Add_Empty_Char (End_State, Last_Start);
969
                        end if;
970
 
971
                        End_State := Current_State;
972
                     end if;
973
 
974
                  when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
975
                     Raise_Exception
976
                       ("Incorrect character in regular expression :", J);
977
 
978
                  when others =>
979
                     Current_State := Current_State + 1;
980
 
981
                     --  Create the state for the symbol S (J)
982
 
983
                     if S (J) = '.' then
984
                        for K in 0 .. Alphabet_Size loop
985
                           Set (Table, Current_State, K,
986
                                Value => Current_State + 1);
987
                        end loop;
988
 
989
                     else
990
                        if S (J) = '\' then
991
                           J := J + 1;
992
                        end if;
993
 
994
                        Set (Table, Current_State, Map (S (J)),
995
                             Value => Current_State + 1);
996
                     end if;
997
 
998
                     Current_State := Current_State + 1;
999
 
1000
                     --  If the next symbol is a special symbol
1001
 
1002
                     if J < End_Index
1003
                       and then (S (J + 1) = '*' or else
1004
                                 S (J + 1) = '+' or else
1005
                                 S (J + 1) = '?')
1006
                     then
1007
                        J := J + 1;
1008
                        Create_Repetition
1009
                          (S (J),
1010
                           Current_State - 1,
1011
                           Current_State,
1012
                           Last_Start,
1013
                           End_State);
1014
 
1015
                     else
1016
                        Last_Start := Current_State - 1;
1017
 
1018
                        if End_State /= 0 then
1019
                           Add_Empty_Char (End_State, Last_Start);
1020
                        end if;
1021
 
1022
                        End_State := Current_State;
1023
                     end if;
1024
 
1025
               end case;
1026
 
1027
               if Start_State = 0 then
1028
                  Start_State := Last_Start;
1029
               end if;
1030
 
1031
               J := J + 1;
1032
            end loop;
1033
         end Create_Simple;
1034
 
1035
         -------------------------
1036
         -- Next_Sub_Expression --
1037
         -------------------------
1038
 
1039
         function Next_Sub_Expression
1040
           (Start_Index : Integer;
1041
            End_Index   : Integer)
1042
            return        Integer
1043
         is
1044
            J              : Integer := Start_Index;
1045
            Start_On_Alter : Boolean := False;
1046
 
1047
         begin
1048
            if S (J) = '|' then
1049
               Start_On_Alter := True;
1050
            end if;
1051
 
1052
            loop
1053
               exit when J = End_Index;
1054
               J := J + 1;
1055
 
1056
               case S (J) is
1057
                  when '\' =>
1058
                     J := J + 1;
1059
 
1060
                  when Open_Bracket =>
1061
                     loop
1062
                        J := J + 1;
1063
                        exit when S (J) = Close_Bracket;
1064
 
1065
                        if S (J) = '\' then
1066
                           J := J + 1;
1067
                        end if;
1068
                     end loop;
1069
 
1070
                  when Open_Paren =>
1071
                     J := Next_Sub_Expression (J, End_Index);
1072
 
1073
                  when Close_Paren =>
1074
                     return J;
1075
 
1076
                  when '|' =>
1077
                     if Start_On_Alter then
1078
                        return J - 1;
1079
                     end if;
1080
 
1081
                  when others =>
1082
                     null;
1083
               end case;
1084
            end loop;
1085
 
1086
            return J;
1087
         end Next_Sub_Expression;
1088
 
1089
      --  Start of Create_Primary_Table
1090
 
1091
      begin
1092
         Table.all := (others => (others => 0));
1093
         Create_Simple (S'First, S'Last, Start_State, End_State);
1094
         Num_States := Current_State;
1095
      end Create_Primary_Table;
1096
 
1097
      -------------------------------
1098
      -- Create_Primary_Table_Glob --
1099
      -------------------------------
1100
 
1101
      procedure Create_Primary_Table_Glob
1102
        (Table       : out Regexp_Array_Access;
1103
         Num_States  : out State_Index;
1104
         Start_State : out State_Index;
1105
         End_State   : out State_Index)
1106
      is
1107
         Empty_Char : constant Column_Index := Alphabet_Size + 1;
1108
 
1109
         Current_State : State_Index := 0;
1110
         --  Index of the last created state
1111
 
1112
         procedure Add_Empty_Char
1113
           (State    : State_Index;
1114
            To_State : State_Index);
1115
         --  Add a empty-character transition from State to To_State
1116
 
1117
         procedure Create_Simple
1118
           (Start_Index : Integer;
1119
            End_Index   : Integer;
1120
            Start_State : out State_Index;
1121
            End_State   : out State_Index);
1122
         --  Fill the table for the S (Start_Index .. End_Index).
1123
         --  This is the recursive procedure called to handle () expressions
1124
 
1125
         --------------------
1126
         -- Add_Empty_Char --
1127
         --------------------
1128
 
1129
         procedure Add_Empty_Char
1130
           (State    : State_Index;
1131
            To_State : State_Index)
1132
         is
1133
            J : Column_Index := Empty_Char;
1134
 
1135
         begin
1136
            while Get (Table, State, J) /= 0 loop
1137
               J := J + 1;
1138
            end loop;
1139
 
1140
            Set (Table, State, J,
1141
                 Value => To_State);
1142
         end Add_Empty_Char;
1143
 
1144
         -------------------
1145
         -- Create_Simple --
1146
         -------------------
1147
 
1148
         procedure Create_Simple
1149
           (Start_Index : Integer;
1150
            End_Index   : Integer;
1151
            Start_State : out State_Index;
1152
            End_State   : out State_Index)
1153
         is
1154
            J          : Integer := Start_Index;
1155
            Last_Start : State_Index := 0;
1156
 
1157
         begin
1158
            Start_State := 0;
1159
            End_State   := 0;
1160
 
1161
            while J <= End_Index loop
1162
               case S (J) is
1163
 
1164
                  when Open_Bracket =>
1165
                     Current_State := Current_State + 1;
1166
 
1167
                     declare
1168
                        Next_State : State_Index := Current_State + 1;
1169
 
1170
                     begin
1171
                        J := J + 1;
1172
 
1173
                        if S (J) = '^' then
1174
                           J := J + 1;
1175
                           Next_State := 0;
1176
 
1177
                           for Column in 0 .. Alphabet_Size loop
1178
                              Set (Table, Current_State, Column,
1179
                                   Value => Current_State + 1);
1180
                           end loop;
1181
                        end if;
1182
 
1183
                        --  Automatically add the first character
1184
 
1185
                        if S (J) = '-' or else S (J) = ']' then
1186
                           Set (Table, Current_State, Map (S (J)),
1187
                                Value => Current_State);
1188
                           J := J + 1;
1189
                        end if;
1190
 
1191
                        --  Loop till closing bracket found
1192
 
1193
                        loop
1194
                           exit when S (J) = Close_Bracket;
1195
 
1196
                           if S (J) = '-'
1197
                             and then S (J + 1) /= ']'
1198
                           then
1199
                              declare
1200
                                 Start : constant Integer := J - 1;
1201
                              begin
1202
                                 J := J + 1;
1203
 
1204
                                 if S (J) = '\' then
1205
                                    J := J + 1;
1206
                                 end if;
1207
 
1208
                                 for Char in S (Start) .. S (J) loop
1209
                                    Set (Table, Current_State, Map (Char),
1210
                                         Value => Next_State);
1211
                                 end loop;
1212
                              end;
1213
 
1214
                           else
1215
                              if S (J) = '\' then
1216
                                 J := J + 1;
1217
                              end if;
1218
 
1219
                              Set (Table, Current_State, Map (S (J)),
1220
                                   Value => Next_State);
1221
                           end if;
1222
                           J := J + 1;
1223
                        end loop;
1224
                     end;
1225
 
1226
                     Last_Start := Current_State;
1227
                     Current_State := Current_State + 1;
1228
 
1229
                     if End_State /= 0 then
1230
                        Add_Empty_Char (End_State, Last_Start);
1231
                     end if;
1232
 
1233
                     End_State := Current_State;
1234
 
1235
                  when '{' =>
1236
                     declare
1237
                        End_Sub          : Integer;
1238
                        Start_Regexp_Sub : State_Index;
1239
                        End_Regexp_Sub   : State_Index;
1240
                        Create_Start     : State_Index := 0;
1241
 
1242
                        Create_End : State_Index := 0;
1243
                        --  Initialized to avoid junk warning
1244
 
1245
                     begin
1246
                        while S (J) /= '}' loop
1247
 
1248
                           --  First step : find sub pattern
1249
 
1250
                           End_Sub := J + 1;
1251
                           while S (End_Sub) /= ','
1252
                             and then S (End_Sub) /= '}'
1253
                           loop
1254
                              End_Sub := End_Sub + 1;
1255
                           end loop;
1256
 
1257
                           --  Second step : create a sub pattern
1258
 
1259
                           Create_Simple
1260
                             (J + 1,
1261
                              End_Sub - 1,
1262
                              Start_Regexp_Sub,
1263
                              End_Regexp_Sub);
1264
 
1265
                           J := End_Sub;
1266
 
1267
                           --  Third step : create an alternative
1268
 
1269
                           if Create_Start = 0 then
1270
                              Current_State := Current_State + 1;
1271
                              Create_Start := Current_State;
1272
                              Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1273
                              Current_State := Current_State + 1;
1274
                              Create_End := Current_State;
1275
                              Add_Empty_Char (End_Regexp_Sub, Create_End);
1276
 
1277
                           else
1278
                              Current_State := Current_State + 1;
1279
                              Add_Empty_Char (Current_State, Create_Start);
1280
                              Create_Start := Current_State;
1281
                              Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1282
                              Add_Empty_Char (End_Regexp_Sub, Create_End);
1283
                           end if;
1284
                        end loop;
1285
 
1286
                        if End_State /= 0 then
1287
                           Add_Empty_Char (End_State, Create_Start);
1288
                        end if;
1289
 
1290
                        End_State := Create_End;
1291
                        Last_Start := Create_Start;
1292
                     end;
1293
 
1294
                  when '*' =>
1295
                     Current_State := Current_State + 1;
1296
 
1297
                     if End_State /= 0 then
1298
                        Add_Empty_Char (End_State, Current_State);
1299
                     end if;
1300
 
1301
                     Add_Empty_Char (Current_State, Current_State + 1);
1302
                     Add_Empty_Char (Current_State, Current_State + 3);
1303
                     Last_Start := Current_State;
1304
 
1305
                     Current_State := Current_State + 1;
1306
 
1307
                     for K in 0 .. Alphabet_Size loop
1308
                        Set (Table, Current_State, K,
1309
                             Value => Current_State + 1);
1310
                     end loop;
1311
 
1312
                     Current_State := Current_State + 1;
1313
                     Add_Empty_Char (Current_State, Current_State + 1);
1314
 
1315
                     Current_State := Current_State + 1;
1316
                     Add_Empty_Char (Current_State,  Last_Start);
1317
                     End_State := Current_State;
1318
 
1319
                  when others =>
1320
                     Current_State := Current_State + 1;
1321
 
1322
                     if S (J) = '?' then
1323
                        for K in 0 .. Alphabet_Size loop
1324
                           Set (Table, Current_State, K,
1325
                                Value => Current_State + 1);
1326
                        end loop;
1327
 
1328
                     else
1329
                        if S (J) = '\' then
1330
                           J := J + 1;
1331
                        end if;
1332
 
1333
                        --  Create the state for the symbol S (J)
1334
 
1335
                        Set (Table, Current_State, Map (S (J)),
1336
                             Value => Current_State + 1);
1337
                     end if;
1338
 
1339
                     Last_Start := Current_State;
1340
                     Current_State := Current_State + 1;
1341
 
1342
                     if End_State /= 0 then
1343
                        Add_Empty_Char (End_State, Last_Start);
1344
                     end if;
1345
 
1346
                     End_State := Current_State;
1347
 
1348
               end case;
1349
 
1350
               if Start_State = 0 then
1351
                  Start_State := Last_Start;
1352
               end if;
1353
 
1354
               J := J + 1;
1355
            end loop;
1356
         end Create_Simple;
1357
 
1358
      --  Start of processing for Create_Primary_Table_Glob
1359
 
1360
      begin
1361
         Table.all := (others => (others => 0));
1362
         Create_Simple (S'First, S'Last, Start_State, End_State);
1363
         Num_States := Current_State;
1364
      end Create_Primary_Table_Glob;
1365
 
1366
      ----------------------------
1367
      -- Create_Secondary_Table --
1368
      ----------------------------
1369
 
1370
      function Create_Secondary_Table
1371
        (First_Table : Regexp_Array_Access;
1372
         Num_States  : State_Index;
1373
         Start_State : State_Index;
1374
         End_State   : State_Index) return Regexp
1375
      is
1376
         pragma Warnings (Off, Num_States);
1377
 
1378
         Last_Index : constant State_Index := First_Table'Last (1);
1379
         type Meta_State is array (1 .. Last_Index) of Boolean;
1380
 
1381
         Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
1382
                   (others => (others => 0));
1383
 
1384
         Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
1385
                         (others => (others => False));
1386
 
1387
         Temp_State_Not_Null : Boolean;
1388
 
1389
         Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
1390
 
1391
         Current_State       : State_Index := 1;
1392
         Nb_State            : State_Index := 1;
1393
 
1394
         procedure Closure
1395
           (State : in out Meta_State;
1396
            Item  :        State_Index);
1397
         --  Compute the closure of the state (that is every other state which
1398
         --  has a empty-character transition) and add it to the state
1399
 
1400
         -------------
1401
         -- Closure --
1402
         -------------
1403
 
1404
         procedure Closure
1405
           (State : in out Meta_State;
1406
            Item  : State_Index)
1407
         is
1408
         begin
1409
            if State (Item) then
1410
               return;
1411
            end if;
1412
 
1413
            State (Item) := True;
1414
 
1415
            for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
1416
               if First_Table (Item, Column) = 0 then
1417
                  return;
1418
               end if;
1419
 
1420
               Closure (State, First_Table (Item, Column));
1421
            end loop;
1422
         end Closure;
1423
 
1424
      --  Start of processing for Create_Secondary_Table
1425
 
1426
      begin
1427
         --  Create a new state
1428
 
1429
         Closure (Meta_States (Current_State), Start_State);
1430
 
1431
         while Current_State <= Nb_State loop
1432
 
1433
            --  If this new meta-state includes the primary table end state,
1434
            --  then this meta-state will be a final state in the regexp
1435
 
1436
            if Meta_States (Current_State)(End_State) then
1437
               Is_Final (Current_State) := True;
1438
            end if;
1439
 
1440
            --  For every character in the regexp, calculate the possible
1441
            --  transitions from Current_State
1442
 
1443
            for Column in 0 .. Alphabet_Size loop
1444
               Meta_States (Nb_State + 1) := (others => False);
1445
               Temp_State_Not_Null := False;
1446
 
1447
               for K in Meta_States (Current_State)'Range loop
1448
                  if Meta_States (Current_State)(K)
1449
                    and then First_Table (K, Column) /= 0
1450
                  then
1451
                     Closure
1452
                       (Meta_States (Nb_State + 1), First_Table (K, Column));
1453
                     Temp_State_Not_Null := True;
1454
                  end if;
1455
               end loop;
1456
 
1457
               --  If at least one transition existed
1458
 
1459
               if Temp_State_Not_Null then
1460
 
1461
                  --  Check if this new state corresponds to an old one
1462
 
1463
                  for K in 1 .. Nb_State loop
1464
                     if Meta_States (K) = Meta_States (Nb_State + 1) then
1465
                        Table (Current_State, Column) := K;
1466
                        exit;
1467
                     end if;
1468
                  end loop;
1469
 
1470
                  --  If not, create a new state
1471
 
1472
                  if Table (Current_State, Column) = 0 then
1473
                     Nb_State := Nb_State + 1;
1474
                     Table (Current_State, Column) := Nb_State;
1475
                  end if;
1476
               end if;
1477
            end loop;
1478
 
1479
            Current_State := Current_State + 1;
1480
         end loop;
1481
 
1482
         --  Returns the regexp
1483
 
1484
         declare
1485
            R : Regexp_Access;
1486
 
1487
         begin
1488
            R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
1489
                                   Num_States    => Nb_State);
1490
            R.Map            := Map;
1491
            R.Is_Final       := Is_Final (1 .. Nb_State);
1492
            R.Case_Sensitive := Case_Sensitive;
1493
 
1494
            for State in 1 .. Nb_State loop
1495
               for K in 0 .. Alphabet_Size loop
1496
                  R.States (State, K) := Table (State, K);
1497
               end loop;
1498
            end loop;
1499
 
1500
            return (Ada.Finalization.Controlled with R => R);
1501
         end;
1502
      end Create_Secondary_Table;
1503
 
1504
      ---------------------
1505
      -- Raise_Exception --
1506
      ---------------------
1507
 
1508
      procedure Raise_Exception (M : String; Index : Integer) is
1509
      begin
1510
         raise Error_In_Regexp with M & " at offset" & Index'Img;
1511
      end Raise_Exception;
1512
 
1513
   --  Start of processing for Compile
1514
 
1515
   begin
1516
      --  Special case for the empty string: it always matches, and the
1517
      --  following processing would fail on it.
1518
      if S = "" then
1519
         return (Ada.Finalization.Controlled with
1520
                 R => new Regexp_Value'
1521
                      (Alphabet_Size => 0,
1522
                       Num_States    => 1,
1523
                       Map           => (others => 0),
1524
                       States        => (others => (others => 1)),
1525
                       Is_Final      => (others => True),
1526
                       Case_Sensitive => True));
1527
      end if;
1528
 
1529
      if not Case_Sensitive then
1530
         System.Case_Util.To_Lower (S);
1531
      end if;
1532
 
1533
      --  Check the pattern is well-formed before any treatment
1534
 
1535
      Check_Well_Formed_Pattern;
1536
 
1537
      Create_Mapping;
1538
 
1539
      --  Creates the primary table
1540
 
1541
      declare
1542
         Table       : Regexp_Array_Access;
1543
         Num_States  : State_Index;
1544
         Start_State : State_Index;
1545
         End_State   : State_Index;
1546
         R           : Regexp;
1547
 
1548
      begin
1549
         Table := new Regexp_Array (1 .. 100,
1550
 
1551
         if not Glob then
1552
            Create_Primary_Table (Table, Num_States, Start_State, End_State);
1553
         else
1554
            Create_Primary_Table_Glob
1555
              (Table, Num_States, Start_State, End_State);
1556
         end if;
1557
 
1558
         --  Creates the secondary table
1559
 
1560
         R := Create_Secondary_Table
1561
           (Table, Num_States, Start_State, End_State);
1562
         Free (Table);
1563
         return R;
1564
      end;
1565
   end Compile;
1566
 
1567
   --------------
1568
   -- Finalize --
1569
   --------------
1570
 
1571
   procedure Finalize (R : in out Regexp) is
1572
      procedure Free is new
1573
        Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1574
 
1575
   begin
1576
      Free (R.R);
1577
   end Finalize;
1578
 
1579
   ---------
1580
   -- Get --
1581
   ---------
1582
 
1583
   function Get
1584
     (Table  : Regexp_Array_Access;
1585
      State  : State_Index;
1586
      Column : Column_Index) return State_Index
1587
   is
1588
   begin
1589
      if State <= Table'Last (1)
1590
        and then Column <= Table'Last (2)
1591
      then
1592
         return Table (State, Column);
1593
      else
1594
         return 0;
1595
      end if;
1596
   end Get;
1597
 
1598
   -----------
1599
   -- Match --
1600
   -----------
1601
 
1602
   function Match (S : String; R : Regexp) return Boolean is
1603
      Current_State : State_Index := 1;
1604
 
1605
   begin
1606
      if R.R = null then
1607
         raise Constraint_Error;
1608
      end if;
1609
 
1610
      for Char in S'Range loop
1611
 
1612
         if R.R.Case_Sensitive then
1613
            Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1614
         else
1615
            Current_State :=
1616
              R.R.States (Current_State,
1617
                          R.R.Map (System.Case_Util.To_Lower (S (Char))));
1618
         end if;
1619
 
1620
         if Current_State = 0 then
1621
            return False;
1622
         end if;
1623
 
1624
      end loop;
1625
 
1626
      return R.R.Is_Final (Current_State);
1627
   end Match;
1628
 
1629
   ---------
1630
   -- Set --
1631
   ---------
1632
 
1633
   procedure Set
1634
     (Table  : in out Regexp_Array_Access;
1635
      State  : State_Index;
1636
      Column : Column_Index;
1637
      Value  : State_Index)
1638
   is
1639
      New_Lines   : State_Index;
1640
      New_Columns : Column_Index;
1641
      New_Table   : Regexp_Array_Access;
1642
 
1643
   begin
1644
      if State <= Table'Last (1)
1645
        and then Column <= Table'Last (2)
1646
      then
1647
         Table (State, Column) := Value;
1648
      else
1649
         --  Doubles the size of the table until it is big enough that
1650
         --  (State, Column) is a valid index
1651
 
1652
         New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1653
         New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1654
         New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1655
                                        Table'First (2) .. New_Columns);
1656
         New_Table.all := (others => (others => 0));
1657
 
1658
         for J in Table'Range (1) loop
1659
            for K in Table'Range (2) loop
1660
               New_Table (J, K) := Table (J, K);
1661
            end loop;
1662
         end loop;
1663
 
1664
         Free (Table);
1665
         Table := New_Table;
1666
         Table (State, Column) := Value;
1667
      end if;
1668
   end Set;
1669
 
1670
end System.Regexp;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.