OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [ada/] [s-regexp.adb] - Blame information for rev 708

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 706 jeremybenn
------------------------------------------------------------------------------
2
--                                                                          --
3
--                         GNAT COMPILER COMPONENTS                         --
4
--                                                                          --
5
--                        S Y S T E M . R E G E X P                         --
6
--                                                                          --
7
--                                 B o d y                                  --
8
--                                                                          --
9
--                     Copyright (C) 1999-2010, AdaCore                     --
10
--                                                                          --
11
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12
-- terms of the  GNU General Public License as published  by the Free Soft- --
13
-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16
-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17
--                                                                          --
18
-- As a special exception under Section 7 of GPL version 3, you are granted --
19
-- additional permissions described in the GCC Runtime Library Exception,   --
20
-- version 3.1, as published by the Free Software Foundation.               --
21
--                                                                          --
22
-- You should have received a copy of the GNU General Public License and    --
23
-- a copy of the GCC Runtime Library Exception along with this program;     --
24
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25
-- <http://www.gnu.org/licenses/>.                                          --
26
--                                                                          --
27
-- GNAT was originally developed  by the GNAT team at  New York University. --
28
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29
--                                                                          --
30
------------------------------------------------------------------------------
31
 
32
with Ada.Unchecked_Deallocation;
33
 
34
with System.Case_Util;
35
 
36
package body System.Regexp is
37
 
38
   Open_Paren    : constant Character := '(';
39
   Close_Paren   : constant Character := ')';
40
   Open_Bracket  : constant Character := '[';
41
   Close_Bracket : constant Character := ']';
42
 
43
   type State_Index is new Natural;
44
   type Column_Index is new Natural;
45
 
46
   type Regexp_Array is array
47
     (State_Index range <>, Column_Index range <>) of State_Index;
48
   --  First index is for the state number
49
   --  Second index is for the character type
50
   --  Contents is the new State
51
 
52
   type Regexp_Array_Access is access Regexp_Array;
53
   --  Use this type through the functions Set below, so that it
54
   --  can grow dynamically depending on the needs.
55
 
56
   type Mapping is array (Character'Range) of Column_Index;
57
   --  Mapping between characters and column in the Regexp_Array
58
 
59
   type Boolean_Array is array (State_Index range <>) of Boolean;
60
 
61
   type Regexp_Value
62
     (Alphabet_Size : Column_Index;
63
      Num_States    : State_Index) is
64
   record
65
      Map            : Mapping;
66
      States         : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
67
      Is_Final       : Boolean_Array (1 .. Num_States);
68
      Case_Sensitive : Boolean;
69
   end record;
70
   --  Deterministic finite-state machine
71
 
72
   -----------------------
73
   -- Local Subprograms --
74
   -----------------------
75
 
76
   procedure Set
77
     (Table  : in out Regexp_Array_Access;
78
      State  : State_Index;
79
      Column : Column_Index;
80
      Value  : State_Index);
81
   --  Sets a value in the table. If the table is too small, reallocate it
82
   --  dynamically so that (State, Column) is a valid index in it.
83
 
84
   function Get
85
     (Table  : Regexp_Array_Access;
86
      State  : State_Index;
87
      Column : Column_Index)
88
      return   State_Index;
89
   --  Returns the value in the table at (State, Column).
90
   --  If this index does not exist in the table, returns 0
91
 
92
   procedure Free is new Ada.Unchecked_Deallocation
93
     (Regexp_Array, Regexp_Array_Access);
94
 
95
   ------------
96
   -- Adjust --
97
   ------------
98
 
99
   procedure Adjust (R : in out Regexp) is
100
      Tmp : Regexp_Access;
101
 
102
   begin
103
      Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
104
                               Num_States    => R.R.Num_States);
105
      Tmp.all := R.R.all;
106
      R.R := Tmp;
107
   end Adjust;
108
 
109
   -------------
110
   -- Compile --
111
   -------------
112
 
113
   function Compile
114
     (Pattern        : String;
115
      Glob           : Boolean := False;
116
      Case_Sensitive : Boolean := True)
117
      return           Regexp
118
   is
119
      S : String := Pattern;
120
      --  The pattern which is really compiled (when the pattern is case
121
      --  insensitive, we convert this string to lower-cases
122
 
123
      Map : Mapping := (others => 0);
124
      --  Mapping between characters and columns in the tables
125
 
126
      Alphabet_Size : Column_Index := 0;
127
      --  Number of significant characters in the regular expression.
128
      --  This total does not include special operators, such as *, (, ...
129
 
130
      procedure Check_Well_Formed_Pattern;
131
      --  Check that the pattern to compile is well-formed, so that subsequent
132
      --  code can rely on this without performing each time the checks to
133
      --  avoid accessing the pattern outside its bounds. However, not all
134
      --  well-formedness rules are checked. In particular, rules about special
135
      --  characters not being treated as regular characters are not checked.
136
 
137
      procedure Create_Mapping;
138
      --  Creates a mapping between characters in the regexp and columns
139
      --  in the tables representing the regexp. Test that the regexp is
140
      --  well-formed Modifies Alphabet_Size and Map
141
 
142
      procedure Create_Primary_Table
143
        (Table       : out Regexp_Array_Access;
144
         Num_States  : out State_Index;
145
         Start_State : out State_Index;
146
         End_State   : out State_Index);
147
      --  Creates the first version of the regexp (this is a non deterministic
148
      --  finite state machine, which is unadapted for a fast pattern
149
      --  matching algorithm). We use a recursive algorithm to process the
150
      --  parenthesis sub-expressions.
151
      --
152
      --  Table : at the end of the procedure : Column 0 is for any character
153
      --  ('.') and the last columns are for no character (closure)
154
      --  Num_States is set to the number of states in the table
155
      --  Start_State is the number of the starting state in the regexp
156
      --  End_State is the number of the final state when the regexp matches
157
 
158
      procedure Create_Primary_Table_Glob
159
        (Table       : out Regexp_Array_Access;
160
         Num_States  : out State_Index;
161
         Start_State : out State_Index;
162
         End_State   : out State_Index);
163
      --  Same function as above, but it deals with the second possible
164
      --  grammar for 'globbing pattern', which is a kind of subset of the
165
      --  whole regular expression grammar.
166
 
167
      function Create_Secondary_Table
168
        (First_Table : Regexp_Array_Access;
169
         Num_States  : State_Index;
170
         Start_State : State_Index;
171
         End_State   : State_Index)
172
         return        Regexp;
173
      --  Creates the definitive table representing the regular expression
174
      --  This is actually a transformation of the primary table First_Table,
175
      --  where every state is grouped with the states in its 'no-character'
176
      --  columns. The transitions between the new states are then recalculated
177
      --  and if necessary some new states are created.
178
      --
179
      --  Note that the resulting finite-state machine is not optimized in
180
      --  terms of the number of states : it would be more time-consuming to
181
      --  add a third pass to reduce the number of states in the machine, with
182
      --  no speed improvement...
183
 
184
      procedure Raise_Exception (M : String; Index : Integer);
185
      pragma No_Return (Raise_Exception);
186
      --  Raise an exception, indicating an error at character Index in S
187
 
188
      -------------------------------
189
      -- Check_Well_Formed_Pattern --
190
      -------------------------------
191
 
192
      procedure Check_Well_Formed_Pattern is
193
         J : Integer;
194
 
195
         Past_Elmt : Boolean := False;
196
         --  Set to True everywhere an elmt has been parsed, if Glob=False,
197
         --  meaning there can be now an occurrence of '*', '+' and '?'.
198
 
199
         Past_Term : Boolean := False;
200
         --  Set to True everywhere a term has been parsed, if Glob=False,
201
         --  meaning there can be now an occurrence of '|'.
202
 
203
         Parenthesis_Level : Integer := 0;
204
         Curly_Level       : Integer := 0;
205
 
206
         Last_Open : Integer := S'First - 1;
207
         --  The last occurrence of an opening parenthesis, if Glob=False,
208
         --  or the last occurrence of an opening curly brace, if Glob=True.
209
 
210
         procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
211
         --  If no more characters are raised, call Raise_Exception
212
 
213
         --------------------------------------
214
         -- Raise_Exception_If_No_More_Chars --
215
         --------------------------------------
216
 
217
         procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is
218
         begin
219
            if J + K > S'Last then
220
               Raise_Exception ("Ill-formed pattern while parsing", J);
221
            end if;
222
         end Raise_Exception_If_No_More_Chars;
223
 
224
      --  Start of processing for Check_Well_Formed_Pattern
225
 
226
      begin
227
         J := S'First;
228
         while J <= S'Last loop
229
            case S (J) is
230
               when Open_Bracket =>
231
                  J := J + 1;
232
                  Raise_Exception_If_No_More_Chars;
233
 
234
                  if not Glob then
235
                     if S (J) = '^' then
236
                        J := J + 1;
237
                        Raise_Exception_If_No_More_Chars;
238
                     end if;
239
                  end if;
240
 
241
                  --  The first character never has a special meaning
242
 
243
                  if S (J) = ']' or else S (J) = '-' then
244
                     J := J + 1;
245
                     Raise_Exception_If_No_More_Chars;
246
                  end if;
247
 
248
                  --  The set of characters cannot be empty
249
 
250
                  if S (J) = ']' then
251
                     Raise_Exception
252
                       ("Set of characters cannot be empty in regular "
253
                          & "expression", J);
254
                  end if;
255
 
256
                  declare
257
                     Possible_Range_Start : Boolean := True;
258
                     --  Set True everywhere a range character '-' can occur
259
 
260
                  begin
261
                     loop
262
                        exit when S (J) = Close_Bracket;
263
 
264
                        --  The current character should be followed by a
265
                        --  closing bracket.
266
 
267
                        Raise_Exception_If_No_More_Chars (1);
268
 
269
                        if S (J) = '-'
270
                          and then S (J + 1) /= Close_Bracket
271
                        then
272
                           if not Possible_Range_Start then
273
                              Raise_Exception
274
                                ("No mix of ranges is allowed in "
275
                                   & "regular expression", J);
276
                           end if;
277
 
278
                           J := J + 1;
279
                           Raise_Exception_If_No_More_Chars;
280
 
281
                           --  Range cannot be followed by '-' character,
282
                           --  except as last character in the set.
283
 
284
                           Possible_Range_Start := False;
285
 
286
                        else
287
                           Possible_Range_Start := True;
288
                        end if;
289
 
290
                        if S (J) = '\' then
291
                           J := J + 1;
292
                           Raise_Exception_If_No_More_Chars;
293
                        end if;
294
 
295
                        J := J + 1;
296
                     end loop;
297
                  end;
298
 
299
                  --  A closing bracket can end an elmt or term
300
 
301
                  Past_Elmt := True;
302
                  Past_Term := True;
303
 
304
               when Close_Bracket =>
305
 
306
                  --  A close bracket must follow a open_bracket, and cannot be
307
                  --  found alone on the line.
308
 
309
                  Raise_Exception
310
                    ("Incorrect character ']' in regular expression", J);
311
 
312
               when '\' =>
313
                  if J < S'Last then
314
                     J := J + 1;
315
 
316
                     --  Any character can be an elmt or a term
317
 
318
                     Past_Elmt := True;
319
                     Past_Term := True;
320
 
321
                  else
322
                     --  \ not allowed at the end of the regexp
323
 
324
                     Raise_Exception
325
                       ("Incorrect character '\' in regular expression", J);
326
                  end if;
327
 
328
               when Open_Paren =>
329
                  if not Glob then
330
                     Parenthesis_Level := Parenthesis_Level + 1;
331
                     Last_Open := J;
332
 
333
                     --  An open parenthesis does not end an elmt or term
334
 
335
                     Past_Elmt := False;
336
                     Past_Term := False;
337
                  end if;
338
 
339
               when Close_Paren =>
340
                  if not Glob then
341
                     Parenthesis_Level := Parenthesis_Level - 1;
342
 
343
                     if Parenthesis_Level < 0 then
344
                        Raise_Exception
345
                          ("')' is not associated with '(' in regular "
346
                           & "expression", J);
347
                     end if;
348
 
349
                     if J = Last_Open + 1 then
350
                        Raise_Exception
351
                          ("Empty parentheses not allowed in regular "
352
                           & "expression", J);
353
                     end if;
354
 
355
                     if not Past_Term then
356
                        Raise_Exception
357
                          ("Closing parenthesis not allowed here in regular "
358
                             & "expression", J);
359
                     end if;
360
 
361
                     --  A closing parenthesis can end an elmt or term
362
 
363
                     Past_Elmt := True;
364
                     Past_Term := True;
365
                  end if;
366
 
367
               when '{' =>
368
                  if Glob then
369
                     Curly_Level := Curly_Level + 1;
370
                     Last_Open := J;
371
 
372
                  else
373
                     --  Any character can be an elmt or a term
374
 
375
                     Past_Elmt := True;
376
                     Past_Term := True;
377
                  end if;
378
 
379
                  --  No need to check for ',' as the code always accepts them
380
 
381
               when '}' =>
382
                  if Glob then
383
                     Curly_Level := Curly_Level - 1;
384
 
385
                     if Curly_Level < 0 then
386
                        Raise_Exception
387
                          ("'}' is not associated with '{' in regular "
388
                           & "expression", J);
389
                     end if;
390
 
391
                     if J = Last_Open + 1 then
392
                        Raise_Exception
393
                          ("Empty curly braces not allowed in regular "
394
                           & "expression", J);
395
                     end if;
396
 
397
                  else
398
                     --  Any character can be an elmt or a term
399
 
400
                     Past_Elmt := True;
401
                     Past_Term := True;
402
                  end if;
403
 
404
               when '*' | '?' | '+' =>
405
                  if not Glob then
406
 
407
                     --  These operators must apply to an elmt sub-expression,
408
                     --  and cannot be found if one has not just been parsed.
409
 
410
                     if not Past_Elmt then
411
                        Raise_Exception
412
                          ("'*', '+' and '?' operators must be "
413
                           & "applied to an element in regular expression", J);
414
                     end if;
415
 
416
                     Past_Elmt := False;
417
                     Past_Term := True;
418
                  end if;
419
 
420
               when '|' =>
421
                  if not Glob then
422
 
423
                     --  This operator must apply to a term sub-expression,
424
                     --  and cannot be found if one has not just been parsed.
425
 
426
                     if not Past_Term then
427
                        Raise_Exception
428
                          ("'|' operator must be "
429
                           & "applied to a term in regular expression", J);
430
                     end if;
431
 
432
                     Past_Elmt := False;
433
                     Past_Term := False;
434
                  end if;
435
 
436
               when others =>
437
                  if not Glob then
438
 
439
                     --  Any character can be an elmt or a term
440
 
441
                     Past_Elmt := True;
442
                     Past_Term := True;
443
                  end if;
444
            end case;
445
 
446
            J := J + 1;
447
         end loop;
448
 
449
         --  A closing parenthesis must follow an open parenthesis
450
 
451
         if Parenthesis_Level /= 0 then
452
            Raise_Exception
453
              ("'(' must always be associated with a ')'", J);
454
         end if;
455
 
456
         --  A closing curly brace must follow an open curly brace
457
 
458
         if Curly_Level /= 0 then
459
            Raise_Exception
460
              ("'{' must always be associated with a '}'", J);
461
         end if;
462
      end Check_Well_Formed_Pattern;
463
 
464
      --------------------
465
      -- Create_Mapping --
466
      --------------------
467
 
468
      procedure Create_Mapping is
469
 
470
         procedure Add_In_Map (C : Character);
471
         --  Add a character in the mapping, if it is not already defined
472
 
473
         ----------------
474
         -- Add_In_Map --
475
         ----------------
476
 
477
         procedure Add_In_Map (C : Character) is
478
         begin
479
            if Map (C) = 0 then
480
               Alphabet_Size := Alphabet_Size + 1;
481
               Map (C) := Alphabet_Size;
482
            end if;
483
         end Add_In_Map;
484
 
485
         J                 : Integer := S'First;
486
         Parenthesis_Level : Integer := 0;
487
         Curly_Level       : Integer := 0;
488
         Last_Open         : Integer := S'First - 1;
489
 
490
      --  Start of processing for Create_Mapping
491
 
492
      begin
493
         while J <= S'Last loop
494
            case S (J) is
495
               when Open_Bracket =>
496
                  J := J + 1;
497
 
498
                  if S (J) = '^' then
499
                     J := J + 1;
500
                  end if;
501
 
502
                  if S (J) = ']' or else S (J) = '-' then
503
                     J := J + 1;
504
                  end if;
505
 
506
                  --  The first character never has a special meaning
507
 
508
                  loop
509
                     if J > S'Last then
510
                        Raise_Exception
511
                          ("Ran out of characters while parsing ", J);
512
                     end if;
513
 
514
                     exit when S (J) = Close_Bracket;
515
 
516
                     if S (J) = '-'
517
                       and then S (J + 1) /= Close_Bracket
518
                     then
519
                        declare
520
                           Start : constant Integer := J - 1;
521
 
522
                        begin
523
                           J := J + 1;
524
 
525
                           if S (J) = '\' then
526
                              J := J + 1;
527
                           end if;
528
 
529
                           for Char in S (Start) .. S (J) loop
530
                              Add_In_Map (Char);
531
                           end loop;
532
                        end;
533
                     else
534
                        if S (J) = '\' then
535
                           J := J + 1;
536
                        end if;
537
 
538
                        Add_In_Map (S (J));
539
                     end if;
540
 
541
                     J := J + 1;
542
                  end loop;
543
 
544
                  --  A close bracket must follow a open_bracket,
545
                  --  and cannot be found alone on the line
546
 
547
               when Close_Bracket =>
548
                  Raise_Exception
549
                    ("Incorrect character ']' in regular expression", J);
550
 
551
               when '\' =>
552
                  if J < S'Last  then
553
                     J := J + 1;
554
                     Add_In_Map (S (J));
555
 
556
                  else
557
                     --  \ not allowed at the end of the regexp
558
 
559
                     Raise_Exception
560
                       ("Incorrect character '\' in regular expression", J);
561
                  end if;
562
 
563
               when Open_Paren =>
564
                  if not Glob then
565
                     Parenthesis_Level := Parenthesis_Level + 1;
566
                     Last_Open := J;
567
                  else
568
                     Add_In_Map (Open_Paren);
569
                  end if;
570
 
571
               when Close_Paren =>
572
                  if not Glob then
573
                     Parenthesis_Level := Parenthesis_Level - 1;
574
 
575
                     if Parenthesis_Level < 0 then
576
                        Raise_Exception
577
                          ("')' is not associated with '(' in regular "
578
                           & "expression", J);
579
                     end if;
580
 
581
                     if J = Last_Open + 1 then
582
                        Raise_Exception
583
                          ("Empty parenthesis not allowed in regular "
584
                           & "expression", J);
585
                     end if;
586
 
587
                  else
588
                     Add_In_Map (Close_Paren);
589
                  end if;
590
 
591
               when '.' =>
592
                  if Glob then
593
                     Add_In_Map ('.');
594
                  end if;
595
 
596
               when '{' =>
597
                  if not Glob then
598
                     Add_In_Map (S (J));
599
                  else
600
                     Curly_Level := Curly_Level + 1;
601
                  end if;
602
 
603
               when '}' =>
604
                  if not Glob then
605
                     Add_In_Map (S (J));
606
                  else
607
                     Curly_Level := Curly_Level - 1;
608
                  end if;
609
 
610
               when '*' | '?' =>
611
                  if not Glob then
612
                     if J = S'First then
613
                        Raise_Exception
614
                          ("'*', '+', '?' and '|' operators cannot be in "
615
                           & "first position in regular expression", J);
616
                     end if;
617
                  end if;
618
 
619
               when '|' | '+' =>
620
                  if not Glob then
621
                     if J = S'First then
622
 
623
                        --  These operators must apply to a sub-expression,
624
                        --  and cannot be found at the beginning of the line
625
 
626
                        Raise_Exception
627
                          ("'*', '+', '?' and '|' operators cannot be in "
628
                           & "first position in regular expression", J);
629
                     end if;
630
 
631
                  else
632
                     Add_In_Map (S (J));
633
                  end if;
634
 
635
               when others =>
636
                  Add_In_Map (S (J));
637
            end case;
638
 
639
            J := J + 1;
640
         end loop;
641
 
642
         --  A closing parenthesis must follow an open parenthesis
643
 
644
         if Parenthesis_Level /= 0 then
645
            Raise_Exception
646
              ("'(' must always be associated with a ')'", J);
647
         end if;
648
 
649
         if Curly_Level /= 0 then
650
            Raise_Exception
651
              ("'{' must always be associated with a '}'", J);
652
         end if;
653
      end Create_Mapping;
654
 
655
      --------------------------
656
      -- Create_Primary_Table --
657
      --------------------------
658
 
659
      procedure Create_Primary_Table
660
        (Table       : out Regexp_Array_Access;
661
         Num_States  : out State_Index;
662
         Start_State : out State_Index;
663
         End_State   : out State_Index)
664
      is
665
         Empty_Char : constant Column_Index := Alphabet_Size + 1;
666
 
667
         Current_State : State_Index := 0;
668
         --  Index of the last created state
669
 
670
         procedure Add_Empty_Char
671
           (State    : State_Index;
672
            To_State : State_Index);
673
         --  Add a empty-character transition from State to To_State
674
 
675
         procedure Create_Repetition
676
           (Repetition : Character;
677
            Start_Prev : State_Index;
678
            End_Prev   : State_Index;
679
            New_Start  : out State_Index;
680
            New_End    : in out State_Index);
681
         --  Create the table in case we have a '*', '+' or '?'.
682
         --  Start_Prev .. End_Prev should indicate respectively the start and
683
         --  end index of the previous expression, to which '*', '+' or '?' is
684
         --  applied.
685
 
686
         procedure Create_Simple
687
           (Start_Index : Integer;
688
            End_Index   : Integer;
689
            Start_State : out State_Index;
690
            End_State   : out State_Index);
691
         --  Fill the table for the regexp Simple.
692
         --  This is the recursive procedure called to handle () expressions
693
         --  If End_State = 0, then the call to Create_Simple creates an
694
         --  independent regexp, not a concatenation
695
         --  Start_Index .. End_Index is the starting index in the string S.
696
         --
697
         --  Warning: it may look like we are creating too many empty-string
698
         --  transitions, but they are needed to get the correct regexp.
699
         --  The table is filled as follow ( s means start-state, e means
700
         --  end-state) :
701
         --
702
         --  regexp   state_num | a b * empty_string
703
         --  -------  ------------------------------
704
         --    a          1 (s) | 2 - - -
705
         --               2 (e) | - - - -
706
         --
707
         --    ab         1 (s) | 2 - - -
708
         --               2     | - - - 3
709
         --               3     | - 4 - -
710
         --               4 (e) | - - - -
711
         --
712
         --    a|b        1     | 2 - - -
713
         --               2     | - - - 6
714
         --               3     | - 4 - -
715
         --               4     | - - - 6
716
         --               5 (s) | - - - 1,3
717
         --               6 (e) | - - - -
718
         --
719
         --    a*         1     | 2 - - -
720
         --               2     | - - - 4
721
         --               3 (s) | - - - 1,4
722
         --               4 (e) | - - - 3
723
         --
724
         --    (a)        1 (s) | 2 - - -
725
         --               2 (e) | - - - -
726
         --
727
         --    a+         1     | 2 - - -
728
         --               2     | - - - 4
729
         --               3 (s) | - - - 1
730
         --               4 (e) | - - - 3
731
         --
732
         --    a?         1     | 2 - - -
733
         --               2     | - - - 4
734
         --               3 (s) | - - - 1,4
735
         --               4 (e) | - - - -
736
         --
737
         --    .          1 (s) | 2 2 2 -
738
         --               2 (e) | - - - -
739
 
740
         function Next_Sub_Expression
741
           (Start_Index : Integer;
742
            End_Index   : Integer)
743
            return        Integer;
744
         --  Returns the index of the last character of the next sub-expression
745
         --  in Simple. Index cannot be greater than End_Index.
746
 
747
         --------------------
748
         -- Add_Empty_Char --
749
         --------------------
750
 
751
         procedure Add_Empty_Char
752
           (State    : State_Index;
753
            To_State : State_Index)
754
         is
755
            J : Column_Index := Empty_Char;
756
 
757
         begin
758
            while Get (Table, State, J) /= 0 loop
759
               J := J + 1;
760
            end loop;
761
 
762
            Set (Table, State, J, To_State);
763
         end Add_Empty_Char;
764
 
765
         -----------------------
766
         -- Create_Repetition --
767
         -----------------------
768
 
769
         procedure Create_Repetition
770
           (Repetition : Character;
771
            Start_Prev : State_Index;
772
            End_Prev   : State_Index;
773
            New_Start  : out State_Index;
774
            New_End    : in out State_Index)
775
         is
776
         begin
777
            New_Start := Current_State + 1;
778
 
779
            if New_End /= 0 then
780
               Add_Empty_Char (New_End, New_Start);
781
            end if;
782
 
783
            Current_State := Current_State + 2;
784
            New_End   := Current_State;
785
 
786
            Add_Empty_Char (End_Prev, New_End);
787
            Add_Empty_Char (New_Start, Start_Prev);
788
 
789
            if Repetition /= '+' then
790
               Add_Empty_Char (New_Start, New_End);
791
            end if;
792
 
793
            if Repetition /= '?' then
794
               Add_Empty_Char (New_End, New_Start);
795
            end if;
796
         end Create_Repetition;
797
 
798
         -------------------
799
         -- Create_Simple --
800
         -------------------
801
 
802
         procedure Create_Simple
803
           (Start_Index : Integer;
804
            End_Index   : Integer;
805
            Start_State : out State_Index;
806
            End_State   : out State_Index)
807
         is
808
            J          : Integer := Start_Index;
809
            Last_Start : State_Index := 0;
810
 
811
         begin
812
            Start_State := 0;
813
            End_State   := 0;
814
            while J <= End_Index loop
815
               case S (J) is
816
                  when Open_Paren =>
817
                     declare
818
                        J_Start    : constant Integer := J + 1;
819
                        Next_Start : State_Index;
820
                        Next_End   : State_Index;
821
 
822
                     begin
823
                        J := Next_Sub_Expression (J, End_Index);
824
                        Create_Simple (J_Start, J - 1, Next_Start, Next_End);
825
 
826
                        if J < End_Index
827
                          and then (S (J + 1) = '*' or else
828
                                    S (J + 1) = '+' or else
829
                                    S (J + 1) = '?')
830
                        then
831
                           J := J + 1;
832
                           Create_Repetition
833
                             (S (J),
834
                              Next_Start,
835
                              Next_End,
836
                              Last_Start,
837
                              End_State);
838
 
839
                        else
840
                           Last_Start := Next_Start;
841
 
842
                           if End_State /= 0 then
843
                              Add_Empty_Char (End_State, Last_Start);
844
                           end if;
845
 
846
                           End_State := Next_End;
847
                        end if;
848
                     end;
849
 
850
                  when '|' =>
851
                     declare
852
                        Start_Prev : constant State_Index := Start_State;
853
                        End_Prev   : constant State_Index := End_State;
854
                        Start_J    : constant Integer     := J + 1;
855
                        Start_Next : State_Index := 0;
856
                        End_Next   : State_Index := 0;
857
 
858
                     begin
859
                        J := Next_Sub_Expression (J, End_Index);
860
 
861
                        --  Create a new state for the start of the alternative
862
 
863
                        Current_State := Current_State + 1;
864
                        Last_Start := Current_State;
865
                        Start_State := Last_Start;
866
 
867
                        --  Create the tree for the second part of alternative
868
 
869
                        Create_Simple (Start_J, J, Start_Next, End_Next);
870
 
871
                        --  Create the end state
872
 
873
                        Add_Empty_Char (Last_Start, Start_Next);
874
                        Add_Empty_Char (Last_Start, Start_Prev);
875
                        Current_State := Current_State + 1;
876
                        End_State := Current_State;
877
                        Add_Empty_Char (End_Prev, End_State);
878
                        Add_Empty_Char (End_Next, End_State);
879
                     end;
880
 
881
                  when Open_Bracket =>
882
                     Current_State := Current_State + 1;
883
 
884
                     declare
885
                        Next_State : State_Index := Current_State + 1;
886
 
887
                     begin
888
                        J := J + 1;
889
 
890
                        if S (J) = '^' then
891
                           J := J + 1;
892
 
893
                           Next_State := 0;
894
 
895
                           for Column in 0 .. Alphabet_Size loop
896
                              Set (Table, Current_State, Column,
897
                                   Value => Current_State + 1);
898
                           end loop;
899
                        end if;
900
 
901
                        --  Automatically add the first character
902
 
903
                        if S (J) = '-' or else S (J) = ']' then
904
                           Set (Table, Current_State, Map (S (J)),
905
                                Value => Next_State);
906
                           J := J + 1;
907
                        end if;
908
 
909
                        --  Loop till closing bracket found
910
 
911
                        loop
912
                           exit when S (J) = Close_Bracket;
913
 
914
                           if S (J) = '-'
915
                             and then S (J + 1) /= ']'
916
                           then
917
                              declare
918
                                 Start : constant Integer := J - 1;
919
 
920
                              begin
921
                                 J := J + 1;
922
 
923
                                 if S (J) = '\' then
924
                                    J := J + 1;
925
                                 end if;
926
 
927
                                 for Char in S (Start) .. S (J) loop
928
                                    Set (Table, Current_State, Map (Char),
929
                                         Value => Next_State);
930
                                 end loop;
931
                              end;
932
 
933
                           else
934
                              if S (J) = '\' then
935
                                 J := J + 1;
936
                              end if;
937
 
938
                              Set (Table, Current_State, Map (S (J)),
939
                                   Value => Next_State);
940
                           end if;
941
                           J := J + 1;
942
                        end loop;
943
                     end;
944
 
945
                     Current_State := Current_State + 1;
946
 
947
                     --  If the next symbol is a special symbol
948
 
949
                     if J < End_Index
950
                       and then (S (J + 1) = '*' or else
951
                                 S (J + 1) = '+' or else
952
                                 S (J + 1) = '?')
953
                     then
954
                        J := J + 1;
955
                        Create_Repetition
956
                          (S (J),
957
                           Current_State - 1,
958
                           Current_State,
959
                           Last_Start,
960
                           End_State);
961
 
962
                     else
963
                        Last_Start := Current_State - 1;
964
 
965
                        if End_State /= 0 then
966
                           Add_Empty_Char (End_State, Last_Start);
967
                        end if;
968
 
969
                        End_State := Current_State;
970
                     end if;
971
 
972
                  when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
973
                     Raise_Exception
974
                       ("Incorrect character in regular expression :", J);
975
 
976
                  when others =>
977
                     Current_State := Current_State + 1;
978
 
979
                     --  Create the state for the symbol S (J)
980
 
981
                     if S (J) = '.' then
982
                        for K in 0 .. Alphabet_Size loop
983
                           Set (Table, Current_State, K,
984
                                Value => Current_State + 1);
985
                        end loop;
986
 
987
                     else
988
                        if S (J) = '\' then
989
                           J := J + 1;
990
                        end if;
991
 
992
                        Set (Table, Current_State, Map (S (J)),
993
                             Value => Current_State + 1);
994
                     end if;
995
 
996
                     Current_State := Current_State + 1;
997
 
998
                     --  If the next symbol is a special symbol
999
 
1000
                     if J < End_Index
1001
                       and then (S (J + 1) = '*' or else
1002
                                 S (J + 1) = '+' or else
1003
                                 S (J + 1) = '?')
1004
                     then
1005
                        J := J + 1;
1006
                        Create_Repetition
1007
                          (S (J),
1008
                           Current_State - 1,
1009
                           Current_State,
1010
                           Last_Start,
1011
                           End_State);
1012
 
1013
                     else
1014
                        Last_Start := Current_State - 1;
1015
 
1016
                        if End_State /= 0 then
1017
                           Add_Empty_Char (End_State, Last_Start);
1018
                        end if;
1019
 
1020
                        End_State := Current_State;
1021
                     end if;
1022
 
1023
               end case;
1024
 
1025
               if Start_State = 0 then
1026
                  Start_State := Last_Start;
1027
               end if;
1028
 
1029
               J := J + 1;
1030
            end loop;
1031
         end Create_Simple;
1032
 
1033
         -------------------------
1034
         -- Next_Sub_Expression --
1035
         -------------------------
1036
 
1037
         function Next_Sub_Expression
1038
           (Start_Index : Integer;
1039
            End_Index   : Integer)
1040
            return        Integer
1041
         is
1042
            J              : Integer := Start_Index;
1043
            Start_On_Alter : Boolean := False;
1044
 
1045
         begin
1046
            if S (J) = '|' then
1047
               Start_On_Alter := True;
1048
            end if;
1049
 
1050
            loop
1051
               exit when J = End_Index;
1052
               J := J + 1;
1053
 
1054
               case S (J) is
1055
                  when '\' =>
1056
                     J := J + 1;
1057
 
1058
                  when Open_Bracket =>
1059
                     loop
1060
                        J := J + 1;
1061
                        exit when S (J) = Close_Bracket;
1062
 
1063
                        if S (J) = '\' then
1064
                           J := J + 1;
1065
                        end if;
1066
                     end loop;
1067
 
1068
                  when Open_Paren =>
1069
                     J := Next_Sub_Expression (J, End_Index);
1070
 
1071
                  when Close_Paren =>
1072
                     return J;
1073
 
1074
                  when '|' =>
1075
                     if Start_On_Alter then
1076
                        return J - 1;
1077
                     end if;
1078
 
1079
                  when others =>
1080
                     null;
1081
               end case;
1082
            end loop;
1083
 
1084
            return J;
1085
         end Next_Sub_Expression;
1086
 
1087
      --  Start of Create_Primary_Table
1088
 
1089
      begin
1090
         Table.all := (others => (others => 0));
1091
         Create_Simple (S'First, S'Last, Start_State, End_State);
1092
         Num_States := Current_State;
1093
      end Create_Primary_Table;
1094
 
1095
      -------------------------------
1096
      -- Create_Primary_Table_Glob --
1097
      -------------------------------
1098
 
1099
      procedure Create_Primary_Table_Glob
1100
        (Table       : out Regexp_Array_Access;
1101
         Num_States  : out State_Index;
1102
         Start_State : out State_Index;
1103
         End_State   : out State_Index)
1104
      is
1105
         Empty_Char : constant Column_Index := Alphabet_Size + 1;
1106
 
1107
         Current_State : State_Index := 0;
1108
         --  Index of the last created state
1109
 
1110
         procedure Add_Empty_Char
1111
           (State    : State_Index;
1112
            To_State : State_Index);
1113
         --  Add a empty-character transition from State to To_State
1114
 
1115
         procedure Create_Simple
1116
           (Start_Index : Integer;
1117
            End_Index   : Integer;
1118
            Start_State : out State_Index;
1119
            End_State   : out State_Index);
1120
         --  Fill the table for the S (Start_Index .. End_Index).
1121
         --  This is the recursive procedure called to handle () expressions
1122
 
1123
         --------------------
1124
         -- Add_Empty_Char --
1125
         --------------------
1126
 
1127
         procedure Add_Empty_Char
1128
           (State    : State_Index;
1129
            To_State : State_Index)
1130
         is
1131
            J : Column_Index := Empty_Char;
1132
 
1133
         begin
1134
            while Get (Table, State, J) /= 0 loop
1135
               J := J + 1;
1136
            end loop;
1137
 
1138
            Set (Table, State, J,
1139
                 Value => To_State);
1140
         end Add_Empty_Char;
1141
 
1142
         -------------------
1143
         -- Create_Simple --
1144
         -------------------
1145
 
1146
         procedure Create_Simple
1147
           (Start_Index : Integer;
1148
            End_Index   : Integer;
1149
            Start_State : out State_Index;
1150
            End_State   : out State_Index)
1151
         is
1152
            J          : Integer := Start_Index;
1153
            Last_Start : State_Index := 0;
1154
 
1155
         begin
1156
            Start_State := 0;
1157
            End_State   := 0;
1158
 
1159
            while J <= End_Index loop
1160
               case S (J) is
1161
 
1162
                  when Open_Bracket =>
1163
                     Current_State := Current_State + 1;
1164
 
1165
                     declare
1166
                        Next_State : State_Index := Current_State + 1;
1167
 
1168
                     begin
1169
                        J := J + 1;
1170
 
1171
                        if S (J) = '^' then
1172
                           J := J + 1;
1173
                           Next_State := 0;
1174
 
1175
                           for Column in 0 .. Alphabet_Size loop
1176
                              Set (Table, Current_State, Column,
1177
                                   Value => Current_State + 1);
1178
                           end loop;
1179
                        end if;
1180
 
1181
                        --  Automatically add the first character
1182
 
1183
                        if S (J) = '-' or else S (J) = ']' then
1184
                           Set (Table, Current_State, Map (S (J)),
1185
                                Value => Current_State);
1186
                           J := J + 1;
1187
                        end if;
1188
 
1189
                        --  Loop till closing bracket found
1190
 
1191
                        loop
1192
                           exit when S (J) = Close_Bracket;
1193
 
1194
                           if S (J) = '-'
1195
                             and then S (J + 1) /= ']'
1196
                           then
1197
                              declare
1198
                                 Start : constant Integer := J - 1;
1199
                              begin
1200
                                 J := J + 1;
1201
 
1202
                                 if S (J) = '\' then
1203
                                    J := J + 1;
1204
                                 end if;
1205
 
1206
                                 for Char in S (Start) .. S (J) loop
1207
                                    Set (Table, Current_State, Map (Char),
1208
                                         Value => Next_State);
1209
                                 end loop;
1210
                              end;
1211
 
1212
                           else
1213
                              if S (J) = '\' then
1214
                                 J := J + 1;
1215
                              end if;
1216
 
1217
                              Set (Table, Current_State, Map (S (J)),
1218
                                   Value => Next_State);
1219
                           end if;
1220
                           J := J + 1;
1221
                        end loop;
1222
                     end;
1223
 
1224
                     Last_Start := Current_State;
1225
                     Current_State := Current_State + 1;
1226
 
1227
                     if End_State /= 0 then
1228
                        Add_Empty_Char (End_State, Last_Start);
1229
                     end if;
1230
 
1231
                     End_State := Current_State;
1232
 
1233
                  when '{' =>
1234
                     declare
1235
                        End_Sub          : Integer;
1236
                        Start_Regexp_Sub : State_Index;
1237
                        End_Regexp_Sub   : State_Index;
1238
                        Create_Start     : State_Index := 0;
1239
 
1240
                        Create_End : State_Index := 0;
1241
                        --  Initialized to avoid junk warning
1242
 
1243
                     begin
1244
                        while S (J) /= '}' loop
1245
 
1246
                           --  First step : find sub pattern
1247
 
1248
                           End_Sub := J + 1;
1249
                           while S (End_Sub) /= ','
1250
                             and then S (End_Sub) /= '}'
1251
                           loop
1252
                              End_Sub := End_Sub + 1;
1253
                           end loop;
1254
 
1255
                           --  Second step : create a sub pattern
1256
 
1257
                           Create_Simple
1258
                             (J + 1,
1259
                              End_Sub - 1,
1260
                              Start_Regexp_Sub,
1261
                              End_Regexp_Sub);
1262
 
1263
                           J := End_Sub;
1264
 
1265
                           --  Third step : create an alternative
1266
 
1267
                           if Create_Start = 0 then
1268
                              Current_State := Current_State + 1;
1269
                              Create_Start := Current_State;
1270
                              Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1271
                              Current_State := Current_State + 1;
1272
                              Create_End := Current_State;
1273
                              Add_Empty_Char (End_Regexp_Sub, Create_End);
1274
 
1275
                           else
1276
                              Current_State := Current_State + 1;
1277
                              Add_Empty_Char (Current_State, Create_Start);
1278
                              Create_Start := Current_State;
1279
                              Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1280
                              Add_Empty_Char (End_Regexp_Sub, Create_End);
1281
                           end if;
1282
                        end loop;
1283
 
1284
                        if End_State /= 0 then
1285
                           Add_Empty_Char (End_State, Create_Start);
1286
                        end if;
1287
 
1288
                        End_State := Create_End;
1289
                        Last_Start := Create_Start;
1290
                     end;
1291
 
1292
                  when '*' =>
1293
                     Current_State := Current_State + 1;
1294
 
1295
                     if End_State /= 0 then
1296
                        Add_Empty_Char (End_State, Current_State);
1297
                     end if;
1298
 
1299
                     Add_Empty_Char (Current_State, Current_State + 1);
1300
                     Add_Empty_Char (Current_State, Current_State + 3);
1301
                     Last_Start := Current_State;
1302
 
1303
                     Current_State := Current_State + 1;
1304
 
1305
                     for K in 0 .. Alphabet_Size loop
1306
                        Set (Table, Current_State, K,
1307
                             Value => Current_State + 1);
1308
                     end loop;
1309
 
1310
                     Current_State := Current_State + 1;
1311
                     Add_Empty_Char (Current_State, Current_State + 1);
1312
 
1313
                     Current_State := Current_State + 1;
1314
                     Add_Empty_Char (Current_State,  Last_Start);
1315
                     End_State := Current_State;
1316
 
1317
                  when others =>
1318
                     Current_State := Current_State + 1;
1319
 
1320
                     if S (J) = '?' then
1321
                        for K in 0 .. Alphabet_Size loop
1322
                           Set (Table, Current_State, K,
1323
                                Value => Current_State + 1);
1324
                        end loop;
1325
 
1326
                     else
1327
                        if S (J) = '\' then
1328
                           J := J + 1;
1329
                        end if;
1330
 
1331
                        --  Create the state for the symbol S (J)
1332
 
1333
                        Set (Table, Current_State, Map (S (J)),
1334
                             Value => Current_State + 1);
1335
                     end if;
1336
 
1337
                     Last_Start := Current_State;
1338
                     Current_State := Current_State + 1;
1339
 
1340
                     if End_State /= 0 then
1341
                        Add_Empty_Char (End_State, Last_Start);
1342
                     end if;
1343
 
1344
                     End_State := Current_State;
1345
 
1346
               end case;
1347
 
1348
               if Start_State = 0 then
1349
                  Start_State := Last_Start;
1350
               end if;
1351
 
1352
               J := J + 1;
1353
            end loop;
1354
         end Create_Simple;
1355
 
1356
      --  Start of processing for Create_Primary_Table_Glob
1357
 
1358
      begin
1359
         Table.all := (others => (others => 0));
1360
         Create_Simple (S'First, S'Last, Start_State, End_State);
1361
         Num_States := Current_State;
1362
      end Create_Primary_Table_Glob;
1363
 
1364
      ----------------------------
1365
      -- Create_Secondary_Table --
1366
      ----------------------------
1367
 
1368
      function Create_Secondary_Table
1369
        (First_Table : Regexp_Array_Access;
1370
         Num_States  : State_Index;
1371
         Start_State : State_Index;
1372
         End_State   : State_Index) return Regexp
1373
      is
1374
         pragma Warnings (Off, Num_States);
1375
 
1376
         Last_Index : constant State_Index := First_Table'Last (1);
1377
         type Meta_State is array (1 .. Last_Index) of Boolean;
1378
 
1379
         Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
1380
                   (others => (others => 0));
1381
 
1382
         Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
1383
                         (others => (others => False));
1384
 
1385
         Temp_State_Not_Null : Boolean;
1386
 
1387
         Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
1388
 
1389
         Current_State       : State_Index := 1;
1390
         Nb_State            : State_Index := 1;
1391
 
1392
         procedure Closure
1393
           (State : in out Meta_State;
1394
            Item  :        State_Index);
1395
         --  Compute the closure of the state (that is every other state which
1396
         --  has a empty-character transition) and add it to the state
1397
 
1398
         -------------
1399
         -- Closure --
1400
         -------------
1401
 
1402
         procedure Closure
1403
           (State : in out Meta_State;
1404
            Item  : State_Index)
1405
         is
1406
         begin
1407
            if State (Item) then
1408
               return;
1409
            end if;
1410
 
1411
            State (Item) := True;
1412
 
1413
            for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
1414
               if First_Table (Item, Column) = 0 then
1415
                  return;
1416
               end if;
1417
 
1418
               Closure (State, First_Table (Item, Column));
1419
            end loop;
1420
         end Closure;
1421
 
1422
      --  Start of processing for Create_Secondary_Table
1423
 
1424
      begin
1425
         --  Create a new state
1426
 
1427
         Closure (Meta_States (Current_State), Start_State);
1428
 
1429
         while Current_State <= Nb_State loop
1430
 
1431
            --  If this new meta-state includes the primary table end state,
1432
            --  then this meta-state will be a final state in the regexp
1433
 
1434
            if Meta_States (Current_State)(End_State) then
1435
               Is_Final (Current_State) := True;
1436
            end if;
1437
 
1438
            --  For every character in the regexp, calculate the possible
1439
            --  transitions from Current_State
1440
 
1441
            for Column in 0 .. Alphabet_Size loop
1442
               Meta_States (Nb_State + 1) := (others => False);
1443
               Temp_State_Not_Null := False;
1444
 
1445
               for K in Meta_States (Current_State)'Range loop
1446
                  if Meta_States (Current_State)(K)
1447
                    and then First_Table (K, Column) /= 0
1448
                  then
1449
                     Closure
1450
                       (Meta_States (Nb_State + 1), First_Table (K, Column));
1451
                     Temp_State_Not_Null := True;
1452
                  end if;
1453
               end loop;
1454
 
1455
               --  If at least one transition existed
1456
 
1457
               if Temp_State_Not_Null then
1458
 
1459
                  --  Check if this new state corresponds to an old one
1460
 
1461
                  for K in 1 .. Nb_State loop
1462
                     if Meta_States (K) = Meta_States (Nb_State + 1) then
1463
                        Table (Current_State, Column) := K;
1464
                        exit;
1465
                     end if;
1466
                  end loop;
1467
 
1468
                  --  If not, create a new state
1469
 
1470
                  if Table (Current_State, Column) = 0 then
1471
                     Nb_State := Nb_State + 1;
1472
                     Table (Current_State, Column) := Nb_State;
1473
                  end if;
1474
               end if;
1475
            end loop;
1476
 
1477
            Current_State := Current_State + 1;
1478
         end loop;
1479
 
1480
         --  Returns the regexp
1481
 
1482
         declare
1483
            R : Regexp_Access;
1484
 
1485
         begin
1486
            R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
1487
                                   Num_States    => Nb_State);
1488
            R.Map            := Map;
1489
            R.Is_Final       := Is_Final (1 .. Nb_State);
1490
            R.Case_Sensitive := Case_Sensitive;
1491
 
1492
            for State in 1 .. Nb_State loop
1493
               for K in 0 .. Alphabet_Size loop
1494
                  R.States (State, K) := Table (State, K);
1495
               end loop;
1496
            end loop;
1497
 
1498
            return (Ada.Finalization.Controlled with R => R);
1499
         end;
1500
      end Create_Secondary_Table;
1501
 
1502
      ---------------------
1503
      -- Raise_Exception --
1504
      ---------------------
1505
 
1506
      procedure Raise_Exception (M : String; Index : Integer) is
1507
      begin
1508
         raise Error_In_Regexp with M & " at offset" & Index'Img;
1509
      end Raise_Exception;
1510
 
1511
   --  Start of processing for Compile
1512
 
1513
   begin
1514
      --  Special case for the empty string: it always matches, and the
1515
      --  following processing would fail on it.
1516
      if S = "" then
1517
         return (Ada.Finalization.Controlled with
1518
                 R => new Regexp_Value'
1519
                      (Alphabet_Size => 0,
1520
                       Num_States    => 1,
1521
                       Map           => (others => 0),
1522
                       States        => (others => (others => 1)),
1523
                       Is_Final      => (others => True),
1524
                       Case_Sensitive => True));
1525
      end if;
1526
 
1527
      if not Case_Sensitive then
1528
         System.Case_Util.To_Lower (S);
1529
      end if;
1530
 
1531
      --  Check the pattern is well-formed before any treatment
1532
 
1533
      Check_Well_Formed_Pattern;
1534
 
1535
      Create_Mapping;
1536
 
1537
      --  Creates the primary table
1538
 
1539
      declare
1540
         Table       : Regexp_Array_Access;
1541
         Num_States  : State_Index;
1542
         Start_State : State_Index;
1543
         End_State   : State_Index;
1544
         R           : Regexp;
1545
 
1546
      begin
1547
         Table := new Regexp_Array (1 .. 100,
1548
 
1549
         if not Glob then
1550
            Create_Primary_Table (Table, Num_States, Start_State, End_State);
1551
         else
1552
            Create_Primary_Table_Glob
1553
              (Table, Num_States, Start_State, End_State);
1554
         end if;
1555
 
1556
         --  Creates the secondary table
1557
 
1558
         R := Create_Secondary_Table
1559
           (Table, Num_States, Start_State, End_State);
1560
         Free (Table);
1561
         return R;
1562
      end;
1563
   end Compile;
1564
 
1565
   --------------
1566
   -- Finalize --
1567
   --------------
1568
 
1569
   procedure Finalize (R : in out Regexp) is
1570
      procedure Free is new
1571
        Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1572
 
1573
   begin
1574
      Free (R.R);
1575
   end Finalize;
1576
 
1577
   ---------
1578
   -- Get --
1579
   ---------
1580
 
1581
   function Get
1582
     (Table  : Regexp_Array_Access;
1583
      State  : State_Index;
1584
      Column : Column_Index) return State_Index
1585
   is
1586
   begin
1587
      if State <= Table'Last (1)
1588
        and then Column <= Table'Last (2)
1589
      then
1590
         return Table (State, Column);
1591
      else
1592
         return 0;
1593
      end if;
1594
   end Get;
1595
 
1596
   -----------
1597
   -- Match --
1598
   -----------
1599
 
1600
   function Match (S : String; R : Regexp) return Boolean is
1601
      Current_State : State_Index := 1;
1602
 
1603
   begin
1604
      if R.R = null then
1605
         raise Constraint_Error;
1606
      end if;
1607
 
1608
      for Char in S'Range loop
1609
 
1610
         if R.R.Case_Sensitive then
1611
            Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1612
         else
1613
            Current_State :=
1614
              R.R.States (Current_State,
1615
                          R.R.Map (System.Case_Util.To_Lower (S (Char))));
1616
         end if;
1617
 
1618
         if Current_State = 0 then
1619
            return False;
1620
         end if;
1621
 
1622
      end loop;
1623
 
1624
      return R.R.Is_Final (Current_State);
1625
   end Match;
1626
 
1627
   ---------
1628
   -- Set --
1629
   ---------
1630
 
1631
   procedure Set
1632
     (Table  : in out Regexp_Array_Access;
1633
      State  : State_Index;
1634
      Column : Column_Index;
1635
      Value  : State_Index)
1636
   is
1637
      New_Lines   : State_Index;
1638
      New_Columns : Column_Index;
1639
      New_Table   : Regexp_Array_Access;
1640
 
1641
   begin
1642
      if State <= Table'Last (1)
1643
        and then Column <= Table'Last (2)
1644
      then
1645
         Table (State, Column) := Value;
1646
      else
1647
         --  Doubles the size of the table until it is big enough that
1648
         --  (State, Column) is a valid index
1649
 
1650
         New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1651
         New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1652
         New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1653
                                        Table'First (2) .. New_Columns);
1654
         New_Table.all := (others => (others => 0));
1655
 
1656
         for J in Table'Range (1) loop
1657
            for K in Table'Range (2) loop
1658
               New_Table (J, K) := Table (J, K);
1659
            end loop;
1660
         end loop;
1661
 
1662
         Free (Table);
1663
         Table := New_Table;
1664
         Table (State, Column) := Value;
1665
      end if;
1666
   end Set;
1667
 
1668
end System.Regexp;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.