OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [ada/] [g-regpat.adb] - Blame information for rev 16

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
------------------------------------------------------------------------------
2
--                                                                          --
3
--                         GNAT LIBRARY COMPONENTS                          --
4
--                                                                          --
5
--                          G N A T . R E G P A T                           --
6
--                                                                          --
7
--                                 B o d y                                  --
8
--                                                                          --
9
--               Copyright (C) 1986 by University of Toronto.               --
10
--                      Copyright (C) 1999-2005, AdaCore                    --
11
--                                                                          --
12
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
13
-- terms of the  GNU General Public License as published  by the Free Soft- --
14
-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
15
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
16
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
17
-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
18
-- for  more details.  You should have  received  a copy of the GNU General --
19
-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
20
-- to  the  Free Software Foundation,  51  Franklin  Street,  Fifth  Floor, --
21
-- Boston, MA 02110-1301, USA.                                              --
22
--                                                                          --
23
-- As a special exception,  if other files  instantiate  generics from this --
24
-- unit, or you link  this unit with other files  to produce an executable, --
25
-- this  unit  does not  by itself cause  the resulting  executable  to  be --
26
-- covered  by the  GNU  General  Public  License.  This exception does not --
27
-- however invalidate  any other reasons why  the executable file  might be --
28
-- covered by the  GNU Public License.                                      --
29
--                                                                          --
30
-- GNAT was originally developed  by the GNAT team at  New York University. --
31
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
32
--                                                                          --
33
------------------------------------------------------------------------------
34
 
35
--  This is an altered Ada 95 version of the original V8 style regular
36
--  expression library written in C by Henry Spencer. Apart from the
37
--  translation to Ada, the interface has been considerably changed to
38
--  use the Ada String type instead of C-style nul-terminated strings.
39
 
40
--  Beware that some of this code is subtly aware of the way operator
41
--  precedence is structured in regular expressions. Serious changes in
42
--  regular-expression syntax might require a total rethink.
43
 
44
with System.IO;               use System.IO;
45
with Ada.Characters.Handling; use Ada.Characters.Handling;
46
with Unchecked_Conversion;
47
 
48
package body GNAT.Regpat is
49
 
50
   MAGIC : constant Character := Character'Val (10#0234#);
51
   --  The first byte of the regexp internal "program" is actually
52
   --  this magic number; the start node begins in the second byte.
53
   --
54
   --  This is used to make sure that a regular expression was correctly
55
   --  compiled.
56
 
57
   ----------------------------
58
   -- Implementation details --
59
   ----------------------------
60
 
61
   --  This is essentially a linear encoding of a nondeterministic
62
   --  finite-state machine, also known as syntax charts or
63
   --  "railroad normal form" in parsing technology.
64
 
65
   --  Each node is an opcode plus a "next" pointer, possibly plus an
66
   --  operand. "Next" pointers of all nodes except BRANCH implement
67
   --  concatenation; a "next" pointer with a BRANCH on both ends of it
68
   --  is connecting two alternatives.
69
 
70
   --  The operand of some types of node is a literal string; for others,
71
   --  it is a node leading into a sub-FSM. In particular, the operand of
72
   --  a BRANCH node is the first node of the branch.
73
   --  (NB this is *not* a tree structure:  the tail of the branch connects
74
   --  to the thing following the set of BRANCHes).
75
 
76
   --  You can see the exact byte-compiled version by using the Dump
77
   --  subprogram. However, here are a few examples:
78
 
79
   --  (a|b):  1 : MAGIC
80
   --          2 : BRANCH  (next at  10)
81
   --          5 :    EXACT  (next at  18)   operand=a
82
   --         10 : BRANCH  (next at  18)
83
   --         13 :    EXACT  (next at  18)   operand=b
84
   --         18 : EOP  (next at 0)
85
   --
86
   --  (ab)*:  1 : MAGIC
87
   --          2 : CURLYX  (next at  26)  { 0, 32767}
88
   --          9 :    OPEN 1  (next at  13)
89
   --         13 :       EXACT  (next at  19)   operand=ab
90
   --         19 :    CLOSE 1  (next at  23)
91
   --         23 :    WHILEM  (next at 0)
92
   --         26 : NOTHING  (next at  29)
93
   --         29 : EOP  (next at 0)
94
 
95
   --  The opcodes are:
96
 
97
   type Opcode is
98
 
99
      --  Name          Operand?  Meaning
100
 
101
     (EOP,        -- no        End of program
102
      MINMOD,     -- no        Next operator is not greedy
103
 
104
      --  Classes of characters
105
 
106
      ANY,        -- no        Match any one character except newline
107
      SANY,       -- no        Match any character, including new line
108
      ANYOF,      -- class     Match any character in this class
109
      EXACT,      -- str       Match this string exactly
110
      EXACTF,     -- str       Match this string (case-folding is one)
111
      NOTHING,    -- no        Match empty string
112
      SPACE,      -- no        Match any whitespace character
113
      NSPACE,     -- no        Match any non-whitespace character
114
      DIGIT,      -- no        Match any numeric character
115
      NDIGIT,     -- no        Match any non-numeric character
116
      ALNUM,      -- no        Match any alphanumeric character
117
      NALNUM,     -- no        Match any non-alphanumeric character
118
 
119
      --  Branches
120
 
121
      BRANCH,     -- node      Match this alternative, or the next
122
 
123
      --  Simple loops (when the following node is one character in length)
124
 
125
      STAR,       -- node      Match this simple thing 0 or more times
126
      PLUS,       -- node      Match this simple thing 1 or more times
127
      CURLY,      -- 2num node Match this simple thing between n and m times.
128
 
129
      --  Complex loops
130
 
131
      CURLYX,     -- 2num node Match this complex thing {n,m} times
132
      --                       The nums are coded on two characters each
133
 
134
      WHILEM,     -- no        Do curly processing and see if rest matches
135
 
136
      --  Matches after or before a word
137
 
138
      BOL,        -- no        Match "" at beginning of line
139
      MBOL,       -- no        Same, assuming mutiline (match after \n)
140
      SBOL,       -- no        Same, assuming single line (don't match at \n)
141
      EOL,        -- no        Match "" at end of line
142
      MEOL,       -- no        Same, assuming mutiline (match before \n)
143
      SEOL,       -- no        Same, assuming single line (don't match at \n)
144
 
145
      BOUND,      -- no        Match "" at any word boundary
146
      NBOUND,     -- no        Match "" at any word non-boundary
147
 
148
      --  Parenthesis groups handling
149
 
150
      REFF,       -- num       Match some already matched string, folded
151
      OPEN,       -- num       Mark this point in input as start of #n
152
      CLOSE);     -- num       Analogous to OPEN
153
 
154
   for Opcode'Size use 8;
155
 
156
   --  Opcode notes:
157
 
158
   --  BRANCH
159
   --    The set of branches constituting a single choice are hooked
160
   --    together with their "next" pointers, since precedence prevents
161
   --    anything being concatenated to any individual branch. The
162
   --    "next" pointer of the last BRANCH in a choice points to the
163
   --    thing following the whole choice. This is also where the
164
   --    final "next" pointer of each individual branch points; each
165
   --    branch starts with the operand node of a BRANCH node.
166
 
167
   --  STAR,PLUS
168
   --    '?', and complex '*' and '+', are implemented with CURLYX.
169
   --    branches. Simple cases (one character per match) are implemented with
170
   --    STAR and PLUS for speed and to minimize recursive plunges.
171
 
172
   --  OPEN,CLOSE
173
   --    ...are numbered at compile time.
174
 
175
   --  EXACT, EXACTF
176
   --    There are in fact two arguments, the first one is the length (minus
177
   --    one of the string argument), coded on one character, the second
178
   --    argument is the string itself, coded on length + 1 characters.
179
 
180
   --  A node is one char of opcode followed by two chars of "next" pointer.
181
   --  "Next" pointers are stored as two 8-bit pieces, high order first. The
182
   --  value is a positive offset from the opcode of the node containing it.
183
   --  An operand, if any, simply follows the node. (Note that much of the
184
   --  code generation knows about this implicit relationship.)
185
 
186
   --  Using two bytes for the "next" pointer is vast overkill for most
187
   --  things, but allows patterns to get big without disasters.
188
 
189
   -----------------------
190
   -- Character classes --
191
   -----------------------
192
   --  This is the implementation for character classes ([...]) in the
193
   --  syntax for regular expressions. Each character (0..256) has an
194
   --  entry into the table. This makes for a very fast matching
195
   --  algorithm.
196
 
197
   type Class_Byte is mod 256;
198
   type Character_Class is array (Class_Byte range 0 .. 31) of Class_Byte;
199
 
200
   type Bit_Conversion_Array is array (Class_Byte range 0 .. 7) of Class_Byte;
201
   Bit_Conversion : constant Bit_Conversion_Array :=
202
                      (1, 2, 4, 8, 16, 32, 64, 128);
203
 
204
   type Std_Class is (ANYOF_NONE,
205
                      ANYOF_ALNUM,   --  Alphanumeric class [a-zA-Z0-9]
206
                      ANYOF_NALNUM,
207
                      ANYOF_SPACE,   --  Space class [ \t\n\r\f]
208
                      ANYOF_NSPACE,
209
                      ANYOF_DIGIT,   --  Digit class [0-9]
210
                      ANYOF_NDIGIT,
211
                      ANYOF_ALNUMC,  --  Alphanumeric class [a-zA-Z0-9]
212
                      ANYOF_NALNUMC,
213
                      ANYOF_ALPHA,   --  Alpha class [a-zA-Z]
214
                      ANYOF_NALPHA,
215
                      ANYOF_ASCII,   --  Ascii class (7 bits) 0..127
216
                      ANYOF_NASCII,
217
                      ANYOF_CNTRL,   --  Control class
218
                      ANYOF_NCNTRL,
219
                      ANYOF_GRAPH,   --  Graphic class
220
                      ANYOF_NGRAPH,
221
                      ANYOF_LOWER,   --  Lower case class [a-z]
222
                      ANYOF_NLOWER,
223
                      ANYOF_PRINT,   --  printable class
224
                      ANYOF_NPRINT,
225
                      ANYOF_PUNCT,   --
226
                      ANYOF_NPUNCT,
227
                      ANYOF_UPPER,   --  Upper case class [A-Z]
228
                      ANYOF_NUPPER,
229
                      ANYOF_XDIGIT,  --  Hexadecimal digit
230
                      ANYOF_NXDIGIT
231
                      );
232
 
233
   procedure Set_In_Class
234
     (Bitmap : in out Character_Class;
235
      C      : Character);
236
   --  Set the entry to True for C in the class Bitmap
237
 
238
   function Get_From_Class
239
     (Bitmap : Character_Class;
240
      C      : Character) return Boolean;
241
   --  Return True if the entry is set for C in the class Bitmap
242
 
243
   procedure Reset_Class (Bitmap : out Character_Class);
244
   --  Clear all the entries in the class Bitmap
245
 
246
   pragma Inline (Set_In_Class);
247
   pragma Inline (Get_From_Class);
248
   pragma Inline (Reset_Class);
249
 
250
   -----------------------
251
   -- Local Subprograms --
252
   -----------------------
253
 
254
   function "=" (Left : Character; Right : Opcode) return Boolean;
255
 
256
   function Is_Alnum (C : Character) return Boolean;
257
   --  Return True if C is an alphanum character or an underscore ('_')
258
 
259
   function Is_White_Space (C : Character) return Boolean;
260
   --  Return True if C is a whitespace character
261
 
262
   function Is_Printable (C : Character) return Boolean;
263
   --  Return True if C is a printable character
264
 
265
   function Operand (P : Pointer) return Pointer;
266
   --  Return a pointer to the first operand of the node at P
267
 
268
   function String_Length
269
     (Program : Program_Data;
270
      P       : Pointer) return Program_Size;
271
   --  Return the length of the string argument of the node at P
272
 
273
   function String_Operand (P : Pointer) return Pointer;
274
   --  Return a pointer to the string argument of the node at P
275
 
276
   procedure Bitmap_Operand
277
     (Program : Program_Data;
278
      P       : Pointer;
279
      Op      : out Character_Class);
280
   --  Return a pointer to the string argument of the node at P
281
 
282
   function Get_Next_Offset
283
     (Program : Program_Data;
284
      IP      : Pointer) return Pointer;
285
   --  Get the offset field of a node. Used by Get_Next
286
 
287
   function Get_Next
288
     (Program : Program_Data;
289
      IP      : Pointer) return Pointer;
290
   --  Dig the next instruction pointer out of a node
291
 
292
   procedure Optimize (Self : in out Pattern_Matcher);
293
   --  Optimize a Pattern_Matcher by noting certain special cases
294
 
295
   function Read_Natural
296
     (Program : Program_Data;
297
      IP      : Pointer) return Natural;
298
   --  Return the 2-byte natural coded at position IP
299
 
300
   --  All of the subprograms above are tiny and should be inlined
301
 
302
   pragma Inline ("=");
303
   pragma Inline (Is_Alnum);
304
   pragma Inline (Is_White_Space);
305
   pragma Inline (Get_Next);
306
   pragma Inline (Get_Next_Offset);
307
   pragma Inline (Operand);
308
   pragma Inline (Read_Natural);
309
   pragma Inline (String_Length);
310
   pragma Inline (String_Operand);
311
 
312
   type Expression_Flags is record
313
      Has_Width,            -- Known never to match null string
314
      Simple,               -- Simple enough to be STAR/PLUS operand
315
      SP_Start  : Boolean;  -- Starts with * or +
316
   end record;
317
 
318
   Worst_Expression : constant Expression_Flags := (others => False);
319
   --  Worst case
320
 
321
   ---------
322
   -- "=" --
323
   ---------
324
 
325
   function "=" (Left : Character; Right : Opcode) return Boolean is
326
   begin
327
      return Character'Pos (Left) = Opcode'Pos (Right);
328
   end "=";
329
 
330
   --------------------
331
   -- Bitmap_Operand --
332
   --------------------
333
 
334
   procedure Bitmap_Operand
335
     (Program : Program_Data;
336
      P       : Pointer;
337
      Op      : out Character_Class)
338
   is
339
      function Convert is new Unchecked_Conversion
340
        (Program_Data, Character_Class);
341
 
342
   begin
343
      Op (0 .. 31) := Convert (Program (P + 3 .. P + 34));
344
   end Bitmap_Operand;
345
 
346
   -------------
347
   -- Compile --
348
   -------------
349
 
350
   procedure Compile
351
     (Matcher         : out Pattern_Matcher;
352
      Expression      : String;
353
      Final_Code_Size : out Program_Size;
354
      Flags           : Regexp_Flags := No_Flags)
355
   is
356
      --  We can't allocate space until we know how big the compiled form
357
      --  will be, but we can't compile it (and thus know how big it is)
358
      --  until we've got a place to put the code. So we cheat: we compile
359
      --  it twice, once with code generation turned off and size counting
360
      --  turned on, and once "for real".
361
 
362
      --  This also means that we don't allocate space until we are sure
363
      --  that the thing really will compile successfully, and we never
364
      --  have to move the code and thus invalidate pointers into it.
365
 
366
      --  Beware that the optimization-preparation code in here knows
367
      --  about some of the structure of the compiled regexp.
368
 
369
      PM        : Pattern_Matcher renames Matcher;
370
      Program   : Program_Data renames PM.Program;
371
 
372
      Emit_Code : constant Boolean := PM.Size > 0;
373
      Emit_Ptr  : Pointer := Program_First;
374
 
375
      Parse_Pos : Natural := Expression'First; -- Input-scan pointer
376
      Parse_End : constant Natural := Expression'Last;
377
 
378
      ----------------------------
379
      -- Subprograms for Create --
380
      ----------------------------
381
 
382
      procedure Emit (B : Character);
383
      --  Output the Character B to the Program. If code-generation is
384
      --  disabled, simply increments the program counter.
385
 
386
      function  Emit_Node (Op : Opcode) return Pointer;
387
      --  If code-generation is enabled, Emit_Node outputs the
388
      --  opcode Op and reserves space for a pointer to the next node.
389
      --  Return value is the location of new opcode, ie old Emit_Ptr.
390
 
391
      procedure Emit_Natural (IP : Pointer; N : Natural);
392
      --  Split N on two characters at position IP
393
 
394
      procedure Emit_Class (Bitmap : Character_Class);
395
      --  Emits a character class
396
 
397
      procedure Case_Emit (C : Character);
398
      --  Emit C, after converting is to lower-case if the regular
399
      --  expression is case insensitive.
400
 
401
      procedure Parse
402
        (Parenthesized : Boolean;
403
         Flags         : out Expression_Flags;
404
         IP            : out Pointer);
405
      --  Parse regular expression, i.e. main body or parenthesized thing
406
      --  Caller must absorb opening parenthesis.
407
 
408
      procedure Parse_Branch
409
        (Flags         : out Expression_Flags;
410
         First         : Boolean;
411
         IP            : out Pointer);
412
      --  Implements the concatenation operator and handles '|'
413
      --  First should be true if this is the first item of the alternative.
414
 
415
      procedure Parse_Piece
416
        (Expr_Flags : out Expression_Flags;
417
         IP         : out Pointer);
418
      --  Parse something followed by possible [*+?]
419
 
420
      procedure Parse_Atom
421
        (Expr_Flags : out Expression_Flags;
422
         IP         : out Pointer);
423
      --  Parse_Atom is the lowest level parse procedure.
424
      --  Optimization:  gobbles an entire sequence of ordinary characters
425
      --  so that it can turn them into a single node, which is smaller to
426
      --  store and faster to run. Backslashed characters are exceptions,
427
      --  each becoming a separate node; the code is simpler that way and
428
      --  it's not worth fixing.
429
 
430
      procedure Insert_Operator
431
        (Op       : Opcode;
432
         Operand  : Pointer;
433
         Greedy   : Boolean := True);
434
      --  Insert_Operator inserts an operator in front of an
435
      --  already-emitted operand and relocates the operand.
436
      --  This applies to PLUS and STAR.
437
      --  If Minmod is True, then the operator is non-greedy.
438
 
439
      procedure Insert_Curly_Operator
440
        (Op      : Opcode;
441
         Min     : Natural;
442
         Max     : Natural;
443
         Operand : Pointer;
444
         Greedy  : Boolean := True);
445
      --  Insert an operator for CURLY ({Min}, {Min,} or {Min,Max}).
446
      --  If Minmod is True, then the operator is non-greedy.
447
 
448
      procedure Link_Tail (P, Val : Pointer);
449
      --  Link_Tail sets the next-pointer at the end of a node chain
450
 
451
      procedure Link_Operand_Tail (P, Val : Pointer);
452
      --  Link_Tail on operand of first argument; nop if operandless
453
 
454
      function  Next_Instruction (P : Pointer) return Pointer;
455
      --  Dig the "next" pointer out of a node
456
 
457
      procedure Fail (M : String);
458
      pragma No_Return (Fail);
459
      --  Fail with a diagnostic message, if possible
460
 
461
      function Is_Curly_Operator (IP : Natural) return Boolean;
462
      --  Return True if IP is looking at a '{' that is the beginning
463
      --  of a curly operator, ie it matches {\d+,?\d*}
464
 
465
      function Is_Mult (IP : Natural) return Boolean;
466
      --  Return True if C is a regexp multiplier: '+', '*' or '?'
467
 
468
      procedure Get_Curly_Arguments
469
        (IP     : Natural;
470
         Min    : out Natural;
471
         Max    : out Natural;
472
         Greedy : out Boolean);
473
      --  Parse the argument list for a curly operator.
474
      --  It is assumed that IP is indeed pointing at a valid operator.
475
      --  So what is IP and how come IP is not referenced in the body ???
476
 
477
      procedure Parse_Character_Class (IP : out Pointer);
478
      --  Parse a character class.
479
      --  The calling subprogram should consume the opening '[' before.
480
 
481
      procedure Parse_Literal
482
        (Expr_Flags : out Expression_Flags;
483
         IP         : out Pointer);
484
      --  Parse_Literal encodes a string of characters to be matched exactly
485
 
486
      function Parse_Posix_Character_Class return Std_Class;
487
      --  Parse a posic character class, like [:alpha:] or [:^alpha:].
488
      --  The called is suppoed to absorbe the opening [.
489
 
490
      pragma Inline (Is_Mult);
491
      pragma Inline (Emit_Natural);
492
      pragma Inline (Parse_Character_Class); --  since used only once
493
 
494
      ---------------
495
      -- Case_Emit --
496
      ---------------
497
 
498
      procedure Case_Emit (C : Character) is
499
      begin
500
         if (Flags and Case_Insensitive) /= 0 then
501
            Emit (To_Lower (C));
502
 
503
         else
504
            --  Dump current character
505
 
506
            Emit (C);
507
         end if;
508
      end Case_Emit;
509
 
510
      ----------
511
      -- Emit --
512
      ----------
513
 
514
      procedure Emit (B : Character) is
515
      begin
516
         if Emit_Code then
517
            Program (Emit_Ptr) := B;
518
         end if;
519
 
520
         Emit_Ptr := Emit_Ptr + 1;
521
      end Emit;
522
 
523
      ----------------
524
      -- Emit_Class --
525
      ----------------
526
 
527
      procedure Emit_Class (Bitmap : Character_Class) is
528
         subtype Program31 is Program_Data (0 .. 31);
529
 
530
         function Convert is new Unchecked_Conversion
531
           (Character_Class, Program31);
532
 
533
      begin
534
         if Emit_Code then
535
            Program (Emit_Ptr .. Emit_Ptr + 31) := Convert (Bitmap);
536
         end if;
537
 
538
         Emit_Ptr := Emit_Ptr + 32;
539
      end Emit_Class;
540
 
541
      ------------------
542
      -- Emit_Natural --
543
      ------------------
544
 
545
      procedure Emit_Natural (IP : Pointer; N : Natural) is
546
      begin
547
         if Emit_Code then
548
            Program (IP + 1) := Character'Val (N / 256);
549
            Program (IP) := Character'Val (N mod 256);
550
         end if;
551
      end Emit_Natural;
552
 
553
      ---------------
554
      -- Emit_Node --
555
      ---------------
556
 
557
      function Emit_Node (Op : Opcode) return Pointer is
558
         Result : constant Pointer := Emit_Ptr;
559
 
560
      begin
561
         if Emit_Code then
562
            Program (Emit_Ptr) := Character'Val (Opcode'Pos (Op));
563
            Program (Emit_Ptr + 1) := ASCII.NUL;
564
            Program (Emit_Ptr + 2) := ASCII.NUL;
565
         end if;
566
 
567
         Emit_Ptr := Emit_Ptr + 3;
568
         return Result;
569
      end Emit_Node;
570
 
571
      ----------
572
      -- Fail --
573
      ----------
574
 
575
      procedure Fail (M : String) is
576
      begin
577
         raise Expression_Error with M;
578
      end Fail;
579
 
580
      -------------------------
581
      -- Get_Curly_Arguments --
582
      -------------------------
583
 
584
      procedure Get_Curly_Arguments
585
        (IP     : Natural;
586
         Min    : out Natural;
587
         Max    : out Natural;
588
         Greedy : out Boolean)
589
      is
590
         pragma Unreferenced (IP);
591
 
592
         Save_Pos : Natural := Parse_Pos + 1;
593
 
594
      begin
595
         Min := 0;
596
         Max := Max_Curly_Repeat;
597
 
598
         while Expression (Parse_Pos) /= '}'
599
           and then Expression (Parse_Pos) /= ','
600
         loop
601
            Parse_Pos := Parse_Pos + 1;
602
         end loop;
603
 
604
         Min := Natural'Value (Expression (Save_Pos .. Parse_Pos - 1));
605
 
606
         if Expression (Parse_Pos) = ',' then
607
            Save_Pos := Parse_Pos + 1;
608
            while Expression (Parse_Pos) /= '}' loop
609
               Parse_Pos := Parse_Pos + 1;
610
            end loop;
611
 
612
            if Save_Pos /= Parse_Pos then
613
               Max := Natural'Value (Expression (Save_Pos .. Parse_Pos - 1));
614
            end if;
615
 
616
         else
617
            Max := Min;
618
         end if;
619
 
620
         if Parse_Pos < Expression'Last
621
           and then Expression (Parse_Pos + 1) = '?'
622
         then
623
            Greedy := False;
624
            Parse_Pos := Parse_Pos + 1;
625
 
626
         else
627
            Greedy := True;
628
         end if;
629
      end Get_Curly_Arguments;
630
 
631
      ---------------------------
632
      -- Insert_Curly_Operator --
633
      ---------------------------
634
 
635
      procedure Insert_Curly_Operator
636
        (Op      : Opcode;
637
         Min     : Natural;
638
         Max     : Natural;
639
         Operand : Pointer;
640
         Greedy  : Boolean := True)
641
      is
642
         Dest   : constant Pointer := Emit_Ptr;
643
         Old    : Pointer;
644
         Size   : Pointer := 7;
645
 
646
      begin
647
         --  If the operand is not greedy, insert an extra operand before it
648
 
649
         if not Greedy then
650
            Size := Size + 3;
651
         end if;
652
 
653
         --  Move the operand in the byte-compilation, so that we can insert
654
         --  the operator before it.
655
 
656
         if Emit_Code then
657
            Program (Operand + Size .. Emit_Ptr + Size) :=
658
              Program (Operand .. Emit_Ptr);
659
         end if;
660
 
661
         --  Insert the operator at the position previously occupied by the
662
         --  operand.
663
 
664
         Emit_Ptr := Operand;
665
 
666
         if not Greedy then
667
            Old := Emit_Node (MINMOD);
668
            Link_Tail (Old, Old + 3);
669
         end if;
670
 
671
         Old := Emit_Node (Op);
672
         Emit_Natural (Old + 3, Min);
673
         Emit_Natural (Old + 5, Max);
674
 
675
         Emit_Ptr := Dest + Size;
676
      end Insert_Curly_Operator;
677
 
678
      ---------------------
679
      -- Insert_Operator --
680
      ---------------------
681
 
682
      procedure Insert_Operator
683
        (Op      : Opcode;
684
         Operand : Pointer;
685
         Greedy  : Boolean := True)
686
      is
687
         Dest   : constant Pointer := Emit_Ptr;
688
         Old    : Pointer;
689
         Size   : Pointer := 3;
690
 
691
      begin
692
         --  If not greedy, we have to emit another opcode first
693
 
694
         if not Greedy then
695
            Size := Size + 3;
696
         end if;
697
 
698
         --  Move the operand in the byte-compilation, so that we can insert
699
         --  the operator before it.
700
 
701
         if Emit_Code then
702
            Program (Operand + Size .. Emit_Ptr + Size) :=
703
              Program (Operand .. Emit_Ptr);
704
         end if;
705
 
706
         --  Insert the operator at the position previously occupied by the
707
         --  operand.
708
 
709
         Emit_Ptr := Operand;
710
 
711
         if not Greedy then
712
            Old := Emit_Node (MINMOD);
713
            Link_Tail (Old, Old + 3);
714
         end if;
715
 
716
         Old := Emit_Node (Op);
717
         Emit_Ptr := Dest + Size;
718
      end Insert_Operator;
719
 
720
      -----------------------
721
      -- Is_Curly_Operator --
722
      -----------------------
723
 
724
      function Is_Curly_Operator (IP : Natural) return Boolean is
725
         Scan : Natural := IP;
726
 
727
      begin
728
         if Expression (Scan) /= '{'
729
           or else Scan + 2 > Expression'Last
730
           or else not Is_Digit (Expression (Scan + 1))
731
         then
732
            return False;
733
         end if;
734
 
735
         Scan := Scan + 1;
736
 
737
         --  The first digit
738
 
739
         loop
740
            Scan := Scan + 1;
741
 
742
            if Scan > Expression'Last then
743
               return False;
744
            end if;
745
 
746
            exit when not Is_Digit (Expression (Scan));
747
         end loop;
748
 
749
         if Expression (Scan) = ',' then
750
            loop
751
               Scan := Scan + 1;
752
 
753
               if Scan > Expression'Last then
754
                  return False;
755
               end if;
756
 
757
               exit when not Is_Digit (Expression (Scan));
758
            end loop;
759
         end if;
760
 
761
         return Expression (Scan) = '}';
762
      end Is_Curly_Operator;
763
 
764
      -------------
765
      -- Is_Mult --
766
      -------------
767
 
768
      function Is_Mult (IP : Natural) return Boolean is
769
         C : constant Character := Expression (IP);
770
 
771
      begin
772
         return     C = '*'
773
           or else  C = '+'
774
           or else  C = '?'
775
           or else (C = '{' and then Is_Curly_Operator (IP));
776
      end Is_Mult;
777
 
778
      -----------------------
779
      -- Link_Operand_Tail --
780
      -----------------------
781
 
782
      procedure Link_Operand_Tail (P, Val : Pointer) is
783
      begin
784
         if Emit_Code and then Program (P) = BRANCH then
785
            Link_Tail (Operand (P), Val);
786
         end if;
787
      end Link_Operand_Tail;
788
 
789
      ---------------
790
      -- Link_Tail --
791
      ---------------
792
 
793
      procedure Link_Tail (P, Val : Pointer) is
794
         Scan   : Pointer;
795
         Temp   : Pointer;
796
         Offset : Pointer;
797
 
798
      begin
799
         if not Emit_Code then
800
            return;
801
         end if;
802
 
803
         --  Find last node
804
 
805
         Scan := P;
806
         loop
807
            Temp := Next_Instruction (Scan);
808
            exit when Temp = 0;
809
            Scan := Temp;
810
         end loop;
811
 
812
         Offset := Val - Scan;
813
 
814
         Emit_Natural (Scan + 1, Natural (Offset));
815
      end Link_Tail;
816
 
817
      ----------------------
818
      -- Next_Instruction --
819
      ----------------------
820
 
821
      function Next_Instruction (P : Pointer) return Pointer is
822
         Offset : Pointer;
823
 
824
      begin
825
         if not Emit_Code then
826
            return 0;
827
         end if;
828
 
829
         Offset := Get_Next_Offset (Program, P);
830
 
831
         if Offset = 0 then
832
            return 0;
833
         end if;
834
 
835
         return P + Offset;
836
      end Next_Instruction;
837
 
838
      -----------
839
      -- Parse --
840
      -----------
841
 
842
      --  Combining parenthesis handling with the base level
843
      --  of regular expression is a trifle forced, but the
844
      --  need to tie the tails of the branches to what follows
845
      --  makes it hard to avoid.
846
 
847
      procedure Parse
848
        (Parenthesized  : Boolean;
849
         Flags          : out Expression_Flags;
850
         IP             : out Pointer)
851
      is
852
         E              : String renames Expression;
853
         Br             : Pointer;
854
         Ender          : Pointer;
855
         Par_No         : Natural;
856
         New_Flags      : Expression_Flags;
857
         Have_Branch    : Boolean := False;
858
 
859
      begin
860
         Flags := (Has_Width => True, others => False);  -- Tentatively
861
 
862
         --  Make an OPEN node, if parenthesized
863
 
864
         if Parenthesized then
865
            if Matcher.Paren_Count > Max_Paren_Count then
866
               Fail ("too many ()");
867
            end if;
868
 
869
            Par_No := Matcher.Paren_Count + 1;
870
            Matcher.Paren_Count := Matcher.Paren_Count + 1;
871
            IP := Emit_Node (OPEN);
872
            Emit (Character'Val (Par_No));
873
 
874
         else
875
            IP := 0;
876
            Par_No := 0;
877
         end if;
878
 
879
         --  Pick up the branches, linking them together
880
 
881
         Parse_Branch (New_Flags, True, Br);
882
 
883
         if Br = 0 then
884
            IP := 0;
885
            return;
886
         end if;
887
 
888
         if Parse_Pos <= Parse_End
889
           and then E (Parse_Pos) = '|'
890
         then
891
            Insert_Operator (BRANCH, Br);
892
            Have_Branch := True;
893
         end if;
894
 
895
         if IP /= 0 then
896
            Link_Tail (IP, Br);   -- OPEN -> first
897
         else
898
            IP := Br;
899
         end if;
900
 
901
         if not New_Flags.Has_Width then
902
            Flags.Has_Width := False;
903
         end if;
904
 
905
         Flags.SP_Start := Flags.SP_Start or New_Flags.SP_Start;
906
 
907
         while Parse_Pos <= Parse_End
908
           and then (E (Parse_Pos) = '|')
909
         loop
910
            Parse_Pos := Parse_Pos + 1;
911
            Parse_Branch (New_Flags, False, Br);
912
 
913
            if Br = 0 then
914
               IP := 0;
915
               return;
916
            end if;
917
 
918
            Link_Tail (IP, Br);   -- BRANCH -> BRANCH
919
 
920
            if not New_Flags.Has_Width then
921
               Flags.Has_Width := False;
922
            end if;
923
 
924
            Flags.SP_Start := Flags.SP_Start or New_Flags.SP_Start;
925
         end loop;
926
 
927
         --  Make a closing node, and hook it on the end
928
 
929
         if Parenthesized then
930
            Ender := Emit_Node (CLOSE);
931
            Emit (Character'Val (Par_No));
932
         else
933
            Ender := Emit_Node (EOP);
934
         end if;
935
 
936
         Link_Tail (IP, Ender);
937
 
938
         if Have_Branch then
939
 
940
            --  Hook the tails of the branches to the closing node
941
 
942
            Br := IP;
943
            loop
944
               exit when Br = 0;
945
               Link_Operand_Tail (Br, Ender);
946
               Br := Next_Instruction (Br);
947
            end loop;
948
         end if;
949
 
950
         --  Check for proper termination
951
 
952
         if Parenthesized then
953
            if Parse_Pos > Parse_End or else E (Parse_Pos) /= ')' then
954
               Fail ("unmatched ()");
955
            end if;
956
 
957
            Parse_Pos := Parse_Pos + 1;
958
 
959
         elsif Parse_Pos <= Parse_End then
960
            if E (Parse_Pos) = ')'  then
961
               Fail ("unmatched ()");
962
            else
963
               Fail ("junk on end");         -- "Can't happen"
964
            end if;
965
         end if;
966
      end Parse;
967
 
968
      ----------------
969
      -- Parse_Atom --
970
      ----------------
971
 
972
      procedure Parse_Atom
973
        (Expr_Flags : out Expression_Flags;
974
         IP         : out Pointer)
975
      is
976
         C : Character;
977
 
978
      begin
979
         --  Tentatively set worst expression case
980
 
981
         Expr_Flags := Worst_Expression;
982
 
983
         C := Expression (Parse_Pos);
984
         Parse_Pos := Parse_Pos + 1;
985
 
986
         case (C) is
987
            when '^' =>
988
               if (Flags and Multiple_Lines) /= 0  then
989
                  IP := Emit_Node (MBOL);
990
               elsif (Flags and Single_Line) /= 0 then
991
                  IP := Emit_Node (SBOL);
992
               else
993
                  IP := Emit_Node (BOL);
994
               end if;
995
 
996
            when '$' =>
997
               if (Flags and Multiple_Lines) /= 0  then
998
                  IP := Emit_Node (MEOL);
999
               elsif (Flags and Single_Line) /= 0 then
1000
                  IP := Emit_Node (SEOL);
1001
               else
1002
                  IP := Emit_Node (EOL);
1003
               end if;
1004
 
1005
            when '.' =>
1006
               if (Flags and Single_Line) /= 0 then
1007
                  IP := Emit_Node (SANY);
1008
               else
1009
                  IP := Emit_Node (ANY);
1010
               end if;
1011
 
1012
               Expr_Flags.Has_Width := True;
1013
               Expr_Flags.Simple := True;
1014
 
1015
            when '[' =>
1016
               Parse_Character_Class (IP);
1017
               Expr_Flags.Has_Width := True;
1018
               Expr_Flags.Simple := True;
1019
 
1020
            when '(' =>
1021
               declare
1022
                  New_Flags : Expression_Flags;
1023
 
1024
               begin
1025
                  Parse (True, New_Flags, IP);
1026
 
1027
                  if IP = 0 then
1028
                     return;
1029
                  end if;
1030
 
1031
                  Expr_Flags.Has_Width :=
1032
                    Expr_Flags.Has_Width or New_Flags.Has_Width;
1033
                  Expr_Flags.SP_Start :=
1034
                    Expr_Flags.SP_Start or New_Flags.SP_Start;
1035
               end;
1036
 
1037
            when '|' | ASCII.LF | ')' =>
1038
               Fail ("internal urp");  --  Supposed to be caught earlier
1039
 
1040
            when '?' | '+' | '*' =>
1041
               Fail (C & " follows nothing");
1042
 
1043
            when '{' =>
1044
               if Is_Curly_Operator (Parse_Pos - 1) then
1045
                  Fail (C & " follows nothing");
1046
               else
1047
                  Parse_Literal (Expr_Flags, IP);
1048
               end if;
1049
 
1050
            when '\' =>
1051
               if Parse_Pos > Parse_End then
1052
                  Fail ("trailing \");
1053
               end if;
1054
 
1055
               Parse_Pos := Parse_Pos + 1;
1056
 
1057
               case Expression (Parse_Pos - 1) is
1058
                  when 'b'        =>
1059
                     IP := Emit_Node (BOUND);
1060
 
1061
                  when 'B'        =>
1062
                     IP := Emit_Node (NBOUND);
1063
 
1064
                  when 's'        =>
1065
                     IP := Emit_Node (SPACE);
1066
                     Expr_Flags.Simple := True;
1067
                     Expr_Flags.Has_Width := True;
1068
 
1069
                  when 'S'        =>
1070
                     IP := Emit_Node (NSPACE);
1071
                     Expr_Flags.Simple := True;
1072
                     Expr_Flags.Has_Width := True;
1073
 
1074
                  when 'd'        =>
1075
                     IP := Emit_Node (DIGIT);
1076
                     Expr_Flags.Simple := True;
1077
                     Expr_Flags.Has_Width := True;
1078
 
1079
                  when 'D'        =>
1080
                     IP := Emit_Node (NDIGIT);
1081
                     Expr_Flags.Simple := True;
1082
                     Expr_Flags.Has_Width := True;
1083
 
1084
                  when 'w'        =>
1085
                     IP := Emit_Node (ALNUM);
1086
                     Expr_Flags.Simple := True;
1087
                     Expr_Flags.Has_Width := True;
1088
 
1089
                  when 'W'        =>
1090
                     IP := Emit_Node (NALNUM);
1091
                     Expr_Flags.Simple := True;
1092
                     Expr_Flags.Has_Width := True;
1093
 
1094
                  when 'A'        =>
1095
                     IP := Emit_Node (SBOL);
1096
 
1097
                  when 'G'        =>
1098
                     IP := Emit_Node (SEOL);
1099
 
1100
                  when '0' .. '9' =>
1101
                     IP := Emit_Node (REFF);
1102
 
1103
                     declare
1104
                        Save : constant Natural := Parse_Pos - 1;
1105
 
1106
                     begin
1107
                        while Parse_Pos <= Expression'Last
1108
                          and then Is_Digit (Expression (Parse_Pos))
1109
                        loop
1110
                           Parse_Pos := Parse_Pos + 1;
1111
                        end loop;
1112
 
1113
                        Emit (Character'Val (Natural'Value
1114
                               (Expression (Save .. Parse_Pos - 1))));
1115
                     end;
1116
 
1117
                  when others =>
1118
                     Parse_Pos := Parse_Pos - 1;
1119
                     Parse_Literal (Expr_Flags, IP);
1120
               end case;
1121
 
1122
            when others =>
1123
               Parse_Literal (Expr_Flags, IP);
1124
         end case;
1125
      end Parse_Atom;
1126
 
1127
      ------------------
1128
      -- Parse_Branch --
1129
      ------------------
1130
 
1131
      procedure Parse_Branch
1132
        (Flags : out Expression_Flags;
1133
         First : Boolean;
1134
         IP    : out Pointer)
1135
      is
1136
         E         : String renames Expression;
1137
         Chain     : Pointer;
1138
         Last      : Pointer;
1139
         New_Flags : Expression_Flags;
1140
 
1141
         Discard : Pointer;
1142
         pragma Warnings (Off, Discard);
1143
 
1144
      begin
1145
         Flags := Worst_Expression;    -- Tentatively
1146
 
1147
         if First then
1148
            IP := Emit_Ptr;
1149
         else
1150
            IP := Emit_Node (BRANCH);
1151
         end if;
1152
 
1153
         Chain := 0;
1154
 
1155
         while Parse_Pos <= Parse_End
1156
           and then E (Parse_Pos) /= ')'
1157
           and then E (Parse_Pos) /= ASCII.LF
1158
           and then E (Parse_Pos) /= '|'
1159
         loop
1160
            Parse_Piece (New_Flags, Last);
1161
 
1162
            if Last = 0 then
1163
               IP := 0;
1164
               return;
1165
            end if;
1166
 
1167
            Flags.Has_Width := Flags.Has_Width or New_Flags.Has_Width;
1168
 
1169
            if Chain = 0 then            -- First piece
1170
               Flags.SP_Start := Flags.SP_Start or New_Flags.SP_Start;
1171
            else
1172
               Link_Tail (Chain, Last);
1173
            end if;
1174
 
1175
            Chain := Last;
1176
         end loop;
1177
 
1178
         --  Case where loop ran zero CURLY
1179
 
1180
         if Chain = 0 then
1181
            Discard := Emit_Node (NOTHING);
1182
         end if;
1183
      end Parse_Branch;
1184
 
1185
      ---------------------------
1186
      -- Parse_Character_Class --
1187
      ---------------------------
1188
 
1189
      procedure Parse_Character_Class (IP : out Pointer) is
1190
         Bitmap      : Character_Class;
1191
         Invert      : Boolean := False;
1192
         In_Range    : Boolean := False;
1193
         Named_Class : Std_Class := ANYOF_NONE;
1194
         Value       : Character;
1195
         Last_Value  : Character := ASCII.Nul;
1196
 
1197
      begin
1198
         Reset_Class (Bitmap);
1199
 
1200
         --  Do we have an invert character class ?
1201
 
1202
         if Parse_Pos <= Parse_End
1203
           and then Expression (Parse_Pos) = '^'
1204
         then
1205
            Invert := True;
1206
            Parse_Pos := Parse_Pos + 1;
1207
         end if;
1208
 
1209
         --  First character can be ] or - without closing the class
1210
 
1211
         if Parse_Pos <= Parse_End
1212
           and then (Expression (Parse_Pos) = ']'
1213
                      or else Expression (Parse_Pos) = '-')
1214
         then
1215
            Set_In_Class (Bitmap, Expression (Parse_Pos));
1216
            Parse_Pos := Parse_Pos + 1;
1217
         end if;
1218
 
1219
         --  While we don't have the end of the class
1220
 
1221
         while Parse_Pos <= Parse_End
1222
           and then Expression (Parse_Pos) /= ']'
1223
         loop
1224
            Named_Class := ANYOF_NONE;
1225
            Value := Expression (Parse_Pos);
1226
            Parse_Pos := Parse_Pos + 1;
1227
 
1228
            --  Do we have a Posix character class
1229
            if Value = '[' then
1230
               Named_Class := Parse_Posix_Character_Class;
1231
 
1232
            elsif Value = '\' then
1233
               if Parse_Pos = Parse_End then
1234
                  Fail ("Trailing \");
1235
               end if;
1236
               Value := Expression (Parse_Pos);
1237
               Parse_Pos := Parse_Pos + 1;
1238
 
1239
               case Value is
1240
                  when 'w' => Named_Class := ANYOF_ALNUM;
1241
                  when 'W' => Named_Class := ANYOF_NALNUM;
1242
                  when 's' => Named_Class := ANYOF_SPACE;
1243
                  when 'S' => Named_Class := ANYOF_NSPACE;
1244
                  when 'd' => Named_Class := ANYOF_DIGIT;
1245
                  when 'D' => Named_Class := ANYOF_NDIGIT;
1246
                  when 'n' => Value := ASCII.LF;
1247
                  when 'r' => Value := ASCII.CR;
1248
                  when 't' => Value := ASCII.HT;
1249
                  when 'f' => Value := ASCII.FF;
1250
                  when 'e' => Value := ASCII.ESC;
1251
                  when 'a' => Value := ASCII.BEL;
1252
 
1253
                  --  when 'x'  => ??? hexadecimal value
1254
                  --  when 'c'  => ??? control character
1255
                  --  when '0'..'9' => ??? octal character
1256
 
1257
                  when others => null;
1258
               end case;
1259
            end if;
1260
 
1261
            --  Do we have a character class?
1262
 
1263
            if Named_Class /= ANYOF_NONE then
1264
 
1265
               --  A range like 'a-\d' or 'a-[:digit:] is not a range
1266
 
1267
               if In_Range then
1268
                  Set_In_Class (Bitmap, Last_Value);
1269
                  Set_In_Class (Bitmap, '-');
1270
                  In_Range := False;
1271
               end if;
1272
 
1273
               --  Expand the range
1274
 
1275
               case Named_Class is
1276
                  when ANYOF_NONE => null;
1277
 
1278
                  when ANYOF_ALNUM | ANYOF_ALNUMC =>
1279
                     for Value in Class_Byte'Range loop
1280
                        if Is_Alnum (Character'Val (Value)) then
1281
                           Set_In_Class (Bitmap, Character'Val (Value));
1282
                        end if;
1283
                     end loop;
1284
 
1285
                  when ANYOF_NALNUM | ANYOF_NALNUMC =>
1286
                     for Value in Class_Byte'Range loop
1287
                        if not Is_Alnum (Character'Val (Value)) then
1288
                           Set_In_Class (Bitmap, Character'Val (Value));
1289
                        end if;
1290
                     end loop;
1291
 
1292
                  when ANYOF_SPACE =>
1293
                     for Value in Class_Byte'Range loop
1294
                        if Is_White_Space (Character'Val (Value)) then
1295
                           Set_In_Class (Bitmap, Character'Val (Value));
1296
                        end if;
1297
                     end loop;
1298
 
1299
                  when ANYOF_NSPACE =>
1300
                     for Value in Class_Byte'Range loop
1301
                        if not Is_White_Space (Character'Val (Value)) then
1302
                           Set_In_Class (Bitmap, Character'Val (Value));
1303
                        end if;
1304
                     end loop;
1305
 
1306
                  when ANYOF_DIGIT =>
1307
                     for Value in Class_Byte'Range loop
1308
                        if Is_Digit (Character'Val (Value)) then
1309
                           Set_In_Class (Bitmap, Character'Val (Value));
1310
                        end if;
1311
                     end loop;
1312
 
1313
                  when ANYOF_NDIGIT =>
1314
                     for Value in Class_Byte'Range loop
1315
                        if not Is_Digit (Character'Val (Value)) then
1316
                           Set_In_Class (Bitmap, Character'Val (Value));
1317
                        end if;
1318
                     end loop;
1319
 
1320
                  when ANYOF_ALPHA =>
1321
                     for Value in Class_Byte'Range loop
1322
                        if Is_Letter (Character'Val (Value)) then
1323
                           Set_In_Class (Bitmap, Character'Val (Value));
1324
                        end if;
1325
                     end loop;
1326
 
1327
                  when ANYOF_NALPHA =>
1328
                     for Value in Class_Byte'Range loop
1329
                        if not Is_Letter (Character'Val (Value)) then
1330
                           Set_In_Class (Bitmap, Character'Val (Value));
1331
                        end if;
1332
                     end loop;
1333
 
1334
                  when ANYOF_ASCII =>
1335
                     for Value in 0 .. 127 loop
1336
                        Set_In_Class (Bitmap, Character'Val (Value));
1337
                     end loop;
1338
 
1339
                  when ANYOF_NASCII =>
1340
                     for Value in 128 .. 255 loop
1341
                        Set_In_Class (Bitmap, Character'Val (Value));
1342
                     end loop;
1343
 
1344
                  when ANYOF_CNTRL =>
1345
                     for Value in Class_Byte'Range loop
1346
                        if Is_Control (Character'Val (Value)) then
1347
                           Set_In_Class (Bitmap, Character'Val (Value));
1348
                        end if;
1349
                     end loop;
1350
 
1351
                  when ANYOF_NCNTRL =>
1352
                     for Value in Class_Byte'Range loop
1353
                        if not Is_Control (Character'Val (Value)) then
1354
                           Set_In_Class (Bitmap, Character'Val (Value));
1355
                        end if;
1356
                     end loop;
1357
 
1358
                  when ANYOF_GRAPH =>
1359
                     for Value in Class_Byte'Range loop
1360
                        if Is_Graphic (Character'Val (Value)) then
1361
                           Set_In_Class (Bitmap, Character'Val (Value));
1362
                        end if;
1363
                     end loop;
1364
 
1365
                  when ANYOF_NGRAPH =>
1366
                     for Value in Class_Byte'Range loop
1367
                        if not Is_Graphic (Character'Val (Value)) then
1368
                           Set_In_Class (Bitmap, Character'Val (Value));
1369
                        end if;
1370
                     end loop;
1371
 
1372
                  when ANYOF_LOWER =>
1373
                     for Value in Class_Byte'Range loop
1374
                        if Is_Lower (Character'Val (Value)) then
1375
                           Set_In_Class (Bitmap, Character'Val (Value));
1376
                        end if;
1377
                     end loop;
1378
 
1379
                  when ANYOF_NLOWER =>
1380
                     for Value in Class_Byte'Range loop
1381
                        if not Is_Lower (Character'Val (Value)) then
1382
                           Set_In_Class (Bitmap, Character'Val (Value));
1383
                        end if;
1384
                     end loop;
1385
 
1386
                  when ANYOF_PRINT =>
1387
                     for Value in Class_Byte'Range loop
1388
                        if Is_Printable (Character'Val (Value)) then
1389
                           Set_In_Class (Bitmap, Character'Val (Value));
1390
                        end if;
1391
                     end loop;
1392
 
1393
                  when ANYOF_NPRINT =>
1394
                     for Value in Class_Byte'Range loop
1395
                        if not Is_Printable (Character'Val (Value)) then
1396
                           Set_In_Class (Bitmap, Character'Val (Value));
1397
                        end if;
1398
                     end loop;
1399
 
1400
                  when ANYOF_PUNCT =>
1401
                     for Value in Class_Byte'Range loop
1402
                        if Is_Printable (Character'Val (Value))
1403
                          and then not Is_White_Space (Character'Val (Value))
1404
                          and then not Is_Alnum (Character'Val (Value))
1405
                        then
1406
                           Set_In_Class (Bitmap, Character'Val (Value));
1407
                        end if;
1408
                     end loop;
1409
 
1410
                  when ANYOF_NPUNCT =>
1411
                     for Value in Class_Byte'Range loop
1412
                        if not Is_Printable (Character'Val (Value))
1413
                          or else Is_White_Space (Character'Val (Value))
1414
                          or else Is_Alnum (Character'Val (Value))
1415
                        then
1416
                           Set_In_Class (Bitmap, Character'Val (Value));
1417
                        end if;
1418
                     end loop;
1419
 
1420
                  when ANYOF_UPPER =>
1421
                     for Value in Class_Byte'Range loop
1422
                        if Is_Upper (Character'Val (Value)) then
1423
                           Set_In_Class (Bitmap, Character'Val (Value));
1424
                        end if;
1425
                     end loop;
1426
 
1427
                  when ANYOF_NUPPER =>
1428
                     for Value in Class_Byte'Range loop
1429
                        if not Is_Upper (Character'Val (Value)) then
1430
                           Set_In_Class (Bitmap, Character'Val (Value));
1431
                        end if;
1432
                     end loop;
1433
 
1434
                  when ANYOF_XDIGIT =>
1435
                     for Value in Class_Byte'Range loop
1436
                        if Is_Hexadecimal_Digit (Character'Val (Value)) then
1437
                           Set_In_Class (Bitmap, Character'Val (Value));
1438
                        end if;
1439
                     end loop;
1440
 
1441
                  when ANYOF_NXDIGIT =>
1442
                     for Value in Class_Byte'Range loop
1443
                        if not Is_Hexadecimal_Digit
1444
                          (Character'Val (Value))
1445
                        then
1446
                           Set_In_Class (Bitmap, Character'Val (Value));
1447
                        end if;
1448
                     end loop;
1449
 
1450
               end case;
1451
 
1452
            --  Not a character range
1453
 
1454
            elsif not In_Range then
1455
               Last_Value := Value;
1456
 
1457
               if Expression (Parse_Pos) = '-'
1458
                 and then Parse_Pos < Parse_End
1459
                 and then Expression (Parse_Pos + 1) /= ']'
1460
               then
1461
                  Parse_Pos := Parse_Pos + 1;
1462
 
1463
                  --  Do we have a range like '\d-a' and '[:space:]-a'
1464
                  --  which is not a real range
1465
 
1466
                  if Named_Class /= ANYOF_NONE then
1467
                     Set_In_Class (Bitmap, '-');
1468
                  else
1469
                     In_Range := True;
1470
                  end if;
1471
 
1472
               else
1473
                  Set_In_Class (Bitmap, Value);
1474
 
1475
               end if;
1476
 
1477
            --  Else in a character range
1478
 
1479
            else
1480
               if Last_Value > Value then
1481
                  Fail ("Invalid Range [" & Last_Value'Img
1482
                        & "-" & Value'Img & "]");
1483
               end if;
1484
 
1485
               while Last_Value <= Value loop
1486
                  Set_In_Class (Bitmap, Last_Value);
1487
                  Last_Value := Character'Succ (Last_Value);
1488
               end loop;
1489
 
1490
               In_Range := False;
1491
 
1492
            end if;
1493
 
1494
         end loop;
1495
 
1496
         --  Optimize case-insensitive ranges (put the upper case or lower
1497
         --  case character into the bitmap)
1498
 
1499
         if (Flags and Case_Insensitive) /= 0 then
1500
            for C in Character'Range loop
1501
               if Get_From_Class (Bitmap, C) then
1502
                  Set_In_Class (Bitmap, To_Lower (C));
1503
                  Set_In_Class (Bitmap, To_Upper (C));
1504
               end if;
1505
            end loop;
1506
         end if;
1507
 
1508
         --  Optimize inverted classes
1509
 
1510
         if Invert then
1511
            for J in Bitmap'Range loop
1512
               Bitmap (J) := not Bitmap (J);
1513
            end loop;
1514
         end if;
1515
 
1516
         Parse_Pos := Parse_Pos + 1;
1517
 
1518
         --  Emit the class
1519
 
1520
         IP := Emit_Node (ANYOF);
1521
         Emit_Class (Bitmap);
1522
      end Parse_Character_Class;
1523
 
1524
      -------------------
1525
      -- Parse_Literal --
1526
      -------------------
1527
 
1528
      --  This is a bit tricky due to quoted chars and due to
1529
      --  the multiplier characters '*', '+', and '?' that
1530
      --  take the SINGLE char previous as their operand.
1531
 
1532
      --  On entry, the character at Parse_Pos - 1 is going to go
1533
      --  into the string, no matter what it is. It could be
1534
      --  following a \ if Parse_Atom was entered from the '\' case.
1535
 
1536
      --  Basic idea is to pick up a good char in C and examine
1537
      --  the next char. If Is_Mult (C) then twiddle, if it's a \
1538
      --  then frozzle and if it's another magic char then push C and
1539
      --  terminate the string. If none of the above, push C on the
1540
      --  string and go around again.
1541
 
1542
      --  Start_Pos is used to remember where "the current character"
1543
      --  starts in the string, if due to an Is_Mult we need to back
1544
      --  up and put the current char in a separate 1-character string.
1545
      --  When Start_Pos is 0, C is the only char in the string;
1546
      --  this is used in Is_Mult handling, and in setting the SIMPLE
1547
      --  flag at the end.
1548
 
1549
      procedure Parse_Literal
1550
        (Expr_Flags : out Expression_Flags;
1551
         IP         : out Pointer)
1552
      is
1553
         Start_Pos  : Natural := 0;
1554
         C          : Character;
1555
         Length_Ptr : Pointer;
1556
 
1557
         Has_Special_Operator : Boolean := False;
1558
 
1559
      begin
1560
         Parse_Pos := Parse_Pos - 1;      --  Look at current character
1561
 
1562
         if (Flags and Case_Insensitive) /= 0 then
1563
            IP := Emit_Node (EXACTF);
1564
         else
1565
            IP := Emit_Node (EXACT);
1566
         end if;
1567
 
1568
         Length_Ptr := Emit_Ptr;
1569
         Emit_Ptr := String_Operand (IP);
1570
 
1571
         Parse_Loop :
1572
         loop
1573
            C := Expression (Parse_Pos); --  Get current character
1574
 
1575
            case C is
1576
               when '.' | '[' | '(' | ')' | '|' | ASCII.LF | '$' | '^' =>
1577
 
1578
                  if Start_Pos = 0 then
1579
                     Start_Pos := Parse_Pos;
1580
                     Emit (C);         --  First character is always emitted
1581
                  else
1582
                     exit Parse_Loop;  --  Else we are done
1583
                  end if;
1584
 
1585
               when '?' | '+' | '*' | '{' =>
1586
 
1587
                  if Start_Pos = 0 then
1588
                     Start_Pos := Parse_Pos;
1589
                     Emit (C);         --  First character is always emitted
1590
 
1591
                  --  Are we looking at an operator, or is this
1592
                  --  simply a normal character ?
1593
 
1594
                  elsif not Is_Mult (Parse_Pos) then
1595
                     Start_Pos := Parse_Pos;
1596
                     Case_Emit (C);
1597
 
1598
                  else
1599
                     --  We've got something like "abc?d".  Mark this as a
1600
                     --  special case. What we want to emit is a first
1601
                     --  constant string for "ab", then one for "c" that will
1602
                     --  ultimately be transformed with a CURLY operator, A
1603
                     --  special case has to be handled for "a?", since there
1604
                     --  is no initial string to emit.
1605
 
1606
                     Has_Special_Operator := True;
1607
                     exit Parse_Loop;
1608
                  end if;
1609
 
1610
               when '\' =>
1611
                  Start_Pos := Parse_Pos;
1612
 
1613
                  if Parse_Pos = Parse_End then
1614
                     Fail ("Trailing \");
1615
 
1616
                  else
1617
                     case Expression (Parse_Pos + 1) is
1618
                        when 'b' | 'B' | 's' | 'S' | 'd' | 'D'
1619
                          | 'w' | 'W' | '0' .. '9' | 'G' | 'A'
1620
                          => exit Parse_Loop;
1621
                        when 'n'         => Emit (ASCII.LF);
1622
                        when 't'         => Emit (ASCII.HT);
1623
                        when 'r'         => Emit (ASCII.CR);
1624
                        when 'f'         => Emit (ASCII.FF);
1625
                        when 'e'         => Emit (ASCII.ESC);
1626
                        when 'a'         => Emit (ASCII.BEL);
1627
                        when others      => Emit (Expression (Parse_Pos + 1));
1628
                     end case;
1629
 
1630
                     Parse_Pos := Parse_Pos + 1;
1631
                  end if;
1632
 
1633
               when others =>
1634
                  Start_Pos := Parse_Pos;
1635
                  Case_Emit (C);
1636
            end case;
1637
 
1638
            exit Parse_Loop when Emit_Ptr - Length_Ptr = 254;
1639
 
1640
            Parse_Pos := Parse_Pos + 1;
1641
 
1642
            exit Parse_Loop when Parse_Pos > Parse_End;
1643
         end loop Parse_Loop;
1644
 
1645
         --  Is the string followed by a '*+?{' operator ? If yes, and if there
1646
         --  is an initial string to emit, do it now.
1647
 
1648
         if Has_Special_Operator
1649
           and then Emit_Ptr >= Length_Ptr + 3
1650
         then
1651
            Emit_Ptr := Emit_Ptr - 1;
1652
            Parse_Pos := Start_Pos;
1653
         end if;
1654
 
1655
         if Emit_Code then
1656
            Program (Length_Ptr) := Character'Val (Emit_Ptr - Length_Ptr - 2);
1657
         end if;
1658
 
1659
         Expr_Flags.Has_Width := True;
1660
 
1661
         --  Slight optimization when there is a single character
1662
 
1663
         if Emit_Ptr = Length_Ptr + 2 then
1664
            Expr_Flags.Simple := True;
1665
         end if;
1666
      end Parse_Literal;
1667
 
1668
      -----------------
1669
      -- Parse_Piece --
1670
      -----------------
1671
 
1672
      --  Note that the branching code sequences used for '?' and the
1673
      --  general cases of '*' and + are somewhat optimized: they use
1674
      --  the same NOTHING node as both the endmarker for their branch
1675
      --  list and the body of the last branch. It might seem that
1676
      --  this node could be dispensed with entirely, but the endmarker
1677
      --  role is not redundant.
1678
 
1679
      procedure Parse_Piece
1680
        (Expr_Flags : out Expression_Flags;
1681
         IP         : out Pointer)
1682
      is
1683
         Op        : Character;
1684
         New_Flags : Expression_Flags;
1685
         Greedy    : Boolean := True;
1686
 
1687
      begin
1688
         Parse_Atom (New_Flags, IP);
1689
 
1690
         if IP = 0 then
1691
            return;
1692
         end if;
1693
 
1694
         if Parse_Pos > Parse_End
1695
           or else not Is_Mult (Parse_Pos)
1696
         then
1697
            Expr_Flags := New_Flags;
1698
            return;
1699
         end if;
1700
 
1701
         Op := Expression (Parse_Pos);
1702
 
1703
         if Op /= '+' then
1704
            Expr_Flags := (SP_Start => True, others => False);
1705
         else
1706
            Expr_Flags := (Has_Width => True, others => False);
1707
         end if;
1708
 
1709
         --  Detect non greedy operators in the easy cases
1710
 
1711
         if Op /= '{'
1712
           and then Parse_Pos + 1 <= Parse_End
1713
           and then Expression (Parse_Pos + 1) = '?'
1714
         then
1715
            Greedy := False;
1716
            Parse_Pos := Parse_Pos + 1;
1717
         end if;
1718
 
1719
         --  Generate the byte code
1720
 
1721
         case Op is
1722
            when '*' =>
1723
 
1724
               if New_Flags.Simple then
1725
                  Insert_Operator (STAR, IP, Greedy);
1726
               else
1727
                  Link_Tail (IP, Emit_Node (WHILEM));
1728
                  Insert_Curly_Operator
1729
                    (CURLYX, 0, Max_Curly_Repeat, IP, Greedy);
1730
                  Link_Tail (IP, Emit_Node (NOTHING));
1731
               end if;
1732
 
1733
            when '+' =>
1734
 
1735
               if New_Flags.Simple then
1736
                  Insert_Operator (PLUS, IP, Greedy);
1737
               else
1738
                  Link_Tail (IP, Emit_Node (WHILEM));
1739
                  Insert_Curly_Operator
1740
                    (CURLYX, 1, Max_Curly_Repeat, IP, Greedy);
1741
                  Link_Tail (IP, Emit_Node (NOTHING));
1742
               end if;
1743
 
1744
            when '?' =>
1745
               if New_Flags.Simple then
1746
                  Insert_Curly_Operator (CURLY, 0, 1, IP, Greedy);
1747
               else
1748
                  Link_Tail (IP, Emit_Node (WHILEM));
1749
                  Insert_Curly_Operator (CURLYX, 0, 1, IP, Greedy);
1750
                  Link_Tail (IP, Emit_Node (NOTHING));
1751
               end if;
1752
 
1753
            when '{' =>
1754
               declare
1755
                  Min, Max : Natural;
1756
 
1757
               begin
1758
                  Get_Curly_Arguments (Parse_Pos, Min, Max, Greedy);
1759
 
1760
                  if New_Flags.Simple then
1761
                     Insert_Curly_Operator (CURLY, Min, Max, IP, Greedy);
1762
                  else
1763
                     Link_Tail (IP, Emit_Node (WHILEM));
1764
                     Insert_Curly_Operator (CURLYX, Min, Max, IP, Greedy);
1765
                     Link_Tail (IP, Emit_Node (NOTHING));
1766
                  end if;
1767
               end;
1768
 
1769
            when others =>
1770
               null;
1771
         end case;
1772
 
1773
         Parse_Pos := Parse_Pos + 1;
1774
 
1775
         if Parse_Pos <= Parse_End
1776
           and then Is_Mult (Parse_Pos)
1777
         then
1778
            Fail ("nested *+{");
1779
         end if;
1780
      end Parse_Piece;
1781
 
1782
      ---------------------------------
1783
      -- Parse_Posix_Character_Class --
1784
      ---------------------------------
1785
 
1786
      function Parse_Posix_Character_Class return Std_Class is
1787
         Invert : Boolean := False;
1788
         Class  : Std_Class := ANYOF_NONE;
1789
         E      : String renames Expression;
1790
 
1791
         --  Class names. Note that code assumes that the length of all
1792
         --  classes starting with the same letter have the same length.
1793
 
1794
         Alnum   : constant String := "alnum:]";
1795
         Alpha   : constant String := "alpha:]";
1796
         Ascii_C : constant String := "ascii:]";
1797
         Cntrl   : constant String := "cntrl:]";
1798
         Digit   : constant String := "digit:]";
1799
         Graph   : constant String := "graph:]";
1800
         Lower   : constant String := "lower:]";
1801
         Print   : constant String := "print:]";
1802
         Punct   : constant String := "punct:]";
1803
         Space   : constant String := "space:]";
1804
         Upper   : constant String := "upper:]";
1805
         Word    : constant String := "word:]";
1806
         Xdigit  : constant String := "xdigit:]";
1807
 
1808
      begin
1809
         --  Case of character class specified
1810
 
1811
         if Parse_Pos <= Parse_End
1812
           and then Expression (Parse_Pos) = ':'
1813
         then
1814
            Parse_Pos := Parse_Pos + 1;
1815
 
1816
            --  Do we have something like:  [[:^alpha:]]
1817
 
1818
            if Parse_Pos <= Parse_End
1819
              and then Expression (Parse_Pos) = '^'
1820
            then
1821
               Invert := True;
1822
               Parse_Pos := Parse_Pos + 1;
1823
            end if;
1824
 
1825
            --  Check for class names based on first letter
1826
 
1827
            case Expression (Parse_Pos) is
1828
 
1829
               when 'a' =>
1830
 
1831
                  --  All 'a' classes have the same length (Alnum'Length)
1832
 
1833
                  if Parse_Pos + Alnum'Length - 1 <= Parse_End then
1834
 
1835
                     if E (Parse_Pos .. Parse_Pos + Alnum'Length - 1) =
1836
                                                                      Alnum
1837
                     then
1838
                        if Invert then
1839
                           Class := ANYOF_NALNUMC;
1840
                        else
1841
                           Class := ANYOF_ALNUMC;
1842
                        end if;
1843
 
1844
                        Parse_Pos := Parse_Pos + Alnum'Length;
1845
 
1846
                     elsif E (Parse_Pos .. Parse_Pos + Alpha'Length - 1) =
1847
                                                                      Alpha
1848
                     then
1849
                        if Invert then
1850
                           Class := ANYOF_NALPHA;
1851
                        else
1852
                           Class := ANYOF_ALPHA;
1853
                        end if;
1854
 
1855
                        Parse_Pos := Parse_Pos + Alpha'Length;
1856
 
1857
                     elsif E (Parse_Pos .. Parse_Pos + Ascii_C'Length - 1) =
1858
                                                                      Ascii_C
1859
                     then
1860
                        if Invert then
1861
                           Class := ANYOF_NASCII;
1862
                        else
1863
                           Class := ANYOF_ASCII;
1864
                        end if;
1865
 
1866
                        Parse_Pos := Parse_Pos + Ascii_C'Length;
1867
                     end if;
1868
                  end if;
1869
 
1870
               when 'c' =>
1871
                  if Parse_Pos + Cntrl'Length - 1 <= Parse_End
1872
                    and then E (Parse_Pos .. Parse_Pos + Cntrl'Length - 1) =
1873
                                                                      Cntrl
1874
                  then
1875
                     if Invert then
1876
                        Class := ANYOF_NCNTRL;
1877
                     else
1878
                        Class := ANYOF_CNTRL;
1879
                     end if;
1880
 
1881
                     Parse_Pos := Parse_Pos + Cntrl'Length;
1882
                  end if;
1883
 
1884
               when 'd' =>
1885
                  if Parse_Pos + Digit'Length - 1 <= Parse_End
1886
                    and then E (Parse_Pos .. Parse_Pos + Digit'Length - 1) =
1887
                                                                      Digit
1888
                  then
1889
                     if Invert then
1890
                        Class := ANYOF_NDIGIT;
1891
                     else
1892
                        Class := ANYOF_DIGIT;
1893
                     end if;
1894
 
1895
                     Parse_Pos := Parse_Pos + Digit'Length;
1896
                  end if;
1897
 
1898
               when 'g' =>
1899
                  if Parse_Pos + Graph'Length - 1 <= Parse_End
1900
                    and then E (Parse_Pos .. Parse_Pos + Graph'Length - 1) =
1901
                                                                      Graph
1902
                  then
1903
                     if Invert then
1904
                        Class := ANYOF_NGRAPH;
1905
                     else
1906
                        Class := ANYOF_GRAPH;
1907
                     end if;
1908
                     Parse_Pos := Parse_Pos + Graph'Length;
1909
                  end if;
1910
 
1911
               when 'l' =>
1912
                  if Parse_Pos + Lower'Length - 1 <= Parse_End
1913
                    and then E (Parse_Pos .. Parse_Pos + Lower'Length - 1) =
1914
                                                                      Lower
1915
                  then
1916
                     if Invert then
1917
                        Class := ANYOF_NLOWER;
1918
                     else
1919
                        Class := ANYOF_LOWER;
1920
                     end if;
1921
                     Parse_Pos := Parse_Pos + Lower'Length;
1922
                  end if;
1923
 
1924
               when 'p' =>
1925
 
1926
                  --  All 'p' classes have the same length
1927
 
1928
                  if Parse_Pos + Print'Length - 1 <= Parse_End then
1929
                     if E (Parse_Pos .. Parse_Pos + Print'Length - 1) =
1930
                                                                      Print
1931
                     then
1932
                        if Invert then
1933
                           Class := ANYOF_NPRINT;
1934
                        else
1935
                           Class := ANYOF_PRINT;
1936
                        end if;
1937
 
1938
                        Parse_Pos := Parse_Pos + Print'Length;
1939
 
1940
                     elsif E (Parse_Pos .. Parse_Pos + Punct'Length - 1) =
1941
                                                                      Punct
1942
                     then
1943
                        if Invert then
1944
                           Class := ANYOF_NPUNCT;
1945
                        else
1946
                           Class := ANYOF_PUNCT;
1947
                        end if;
1948
 
1949
                        Parse_Pos := Parse_Pos + Punct'Length;
1950
                     end if;
1951
                  end if;
1952
 
1953
               when 's' =>
1954
                  if Parse_Pos + Space'Length - 1 <= Parse_End
1955
                    and then E (Parse_Pos .. Parse_Pos + Space'Length - 1) =
1956
                                                                      Space
1957
                  then
1958
                     if Invert then
1959
                        Class := ANYOF_NSPACE;
1960
                     else
1961
                        Class := ANYOF_SPACE;
1962
                     end if;
1963
 
1964
                     Parse_Pos := Parse_Pos + Space'Length;
1965
                  end if;
1966
 
1967
               when 'u' =>
1968
 
1969
                  if Parse_Pos + Upper'Length - 1 <= Parse_End
1970
                    and then E (Parse_Pos .. Parse_Pos + Upper'Length - 1) =
1971
                    Upper
1972
                  then
1973
                     if Invert then
1974
                        Class := ANYOF_NUPPER;
1975
                     else
1976
                        Class := ANYOF_UPPER;
1977
                     end if;
1978
                     Parse_Pos := Parse_Pos + Upper'Length;
1979
                  end if;
1980
 
1981
               when 'w' =>
1982
 
1983
                  if Parse_Pos + Word'Length - 1 <= Parse_End
1984
                    and then E (Parse_Pos .. Parse_Pos + Word'Length - 1) =
1985
                    Word
1986
                  then
1987
                     if Invert then
1988
                        Class := ANYOF_NALNUM;
1989
                     else
1990
                        Class := ANYOF_ALNUM;
1991
                     end if;
1992
                     Parse_Pos := Parse_Pos + Word'Length;
1993
                  end if;
1994
 
1995
               when 'x' =>
1996
 
1997
                  if Parse_Pos + Xdigit'Length - 1 <= Parse_End
1998
                    and then E (Parse_Pos .. Parse_Pos + Xdigit'Length - 1)
1999
                    = Digit
2000
                  then
2001
                     if Invert then
2002
                        Class := ANYOF_NXDIGIT;
2003
                     else
2004
                        Class := ANYOF_XDIGIT;
2005
                     end if;
2006
 
2007
                     Parse_Pos := Parse_Pos + Xdigit'Length;
2008
                  end if;
2009
 
2010
               when others =>
2011
                  Fail ("Invalid character class");
2012
            end case;
2013
 
2014
         --  Character class not specified
2015
 
2016
         else
2017
            return ANYOF_NONE;
2018
         end if;
2019
 
2020
         return Class;
2021
      end Parse_Posix_Character_Class;
2022
 
2023
      Expr_Flags : Expression_Flags;
2024
      Result     : Pointer;
2025
 
2026
   --  Start of processing for Compile
2027
 
2028
   begin
2029
      Emit (MAGIC);
2030
      Parse (False, Expr_Flags, Result);
2031
 
2032
      if Result = 0 then
2033
         Fail ("Couldn't compile expression");
2034
      end if;
2035
 
2036
      Final_Code_Size := Emit_Ptr - 1;
2037
 
2038
      --  Do we want to actually compile the expression, or simply get the
2039
      --  code size ???
2040
 
2041
      if Emit_Code then
2042
         Optimize (PM);
2043
      end if;
2044
 
2045
      PM.Flags := Flags;
2046
   end Compile;
2047
 
2048
   function Compile
2049
     (Expression : String;
2050
      Flags      : Regexp_Flags := No_Flags) return Pattern_Matcher
2051
   is
2052
      Size  : Program_Size;
2053
      Dummy : Pattern_Matcher (0);
2054
 
2055
   begin
2056
      Compile (Dummy, Expression, Size, Flags);
2057
 
2058
      declare
2059
         Result : Pattern_Matcher (Size);
2060
      begin
2061
         Compile (Result, Expression, Size, Flags);
2062
         return Result;
2063
      end;
2064
   end Compile;
2065
 
2066
   procedure Compile
2067
     (Matcher    : out Pattern_Matcher;
2068
      Expression : String;
2069
      Flags      : Regexp_Flags := No_Flags)
2070
   is
2071
      Size : Program_Size;
2072
 
2073
   begin
2074
      Compile (Matcher, Expression, Size, Flags);
2075
   end Compile;
2076
 
2077
   ----------
2078
   -- Dump --
2079
   ----------
2080
 
2081
   procedure Dump (Self : Pattern_Matcher) is
2082
 
2083
      --  Index  : Pointer := Program_First + 1;
2084
      --  What is the above line for ???
2085
 
2086
      Op      : Opcode;
2087
      Program : Program_Data renames Self.Program;
2088
 
2089
      procedure Dump_Until
2090
        (Start  : Pointer;
2091
         Till   : Pointer;
2092
         Indent : Natural := 0);
2093
      --  Dump the program until the node Till (not included) is met.
2094
      --  Every line is indented with Index spaces at the beginning
2095
      --  Dumps till the end if Till is 0.
2096
 
2097
      ----------------
2098
      -- Dump_Until --
2099
      ----------------
2100
 
2101
      procedure Dump_Until
2102
        (Start  : Pointer;
2103
         Till   : Pointer;
2104
         Indent : Natural := 0)
2105
      is
2106
         Next : Pointer;
2107
         Index : Pointer := Start;
2108
         Local_Indent : Natural := Indent;
2109
         Length : Pointer;
2110
 
2111
      begin
2112
         while Index < Till loop
2113
 
2114
            Op := Opcode'Val (Character'Pos ((Self.Program (Index))));
2115
 
2116
            if Op = CLOSE then
2117
               Local_Indent := Local_Indent - 3;
2118
            end if;
2119
 
2120
            declare
2121
               Point : constant String := Pointer'Image (Index);
2122
 
2123
            begin
2124
               for J in 1 .. 6 - Point'Length loop
2125
                  Put (' ');
2126
               end loop;
2127
 
2128
               Put (Point
2129
                    & " : "
2130
                    & (1 .. Local_Indent => ' ')
2131
                    & Opcode'Image (Op));
2132
            end;
2133
 
2134
            --  Print the parenthesis number
2135
 
2136
            if Op = OPEN or else Op = CLOSE or else Op = REFF then
2137
               Put (Natural'Image (Character'Pos (Program (Index + 3))));
2138
            end if;
2139
 
2140
            Next := Index + Get_Next_Offset (Program, Index);
2141
 
2142
            if Next = Index then
2143
               Put ("  (next at 0)");
2144
            else
2145
               Put ("  (next at " & Pointer'Image (Next) & ")");
2146
            end if;
2147
 
2148
            case Op is
2149
 
2150
               --  Character class operand
2151
 
2152
               when ANYOF =>  null;
2153
                  declare
2154
                     Bitmap  : Character_Class;
2155
                     Last    : Character := ASCII.Nul;
2156
                     Current : Natural := 0;
2157
 
2158
                     Current_Char : Character;
2159
 
2160
                  begin
2161
                     Bitmap_Operand (Program, Index, Bitmap);
2162
                     Put ("   operand=");
2163
 
2164
                     while Current <= 255 loop
2165
                        Current_Char := Character'Val (Current);
2166
 
2167
                        --  First item in a range
2168
 
2169
                        if Get_From_Class (Bitmap, Current_Char) then
2170
                           Last := Current_Char;
2171
 
2172
                           --  Search for the last item in the range
2173
 
2174
                           loop
2175
                              Current := Current + 1;
2176
                              exit when Current > 255;
2177
                              Current_Char := Character'Val (Current);
2178
                              exit when
2179
                                not Get_From_Class (Bitmap, Current_Char);
2180
 
2181
                           end loop;
2182
 
2183
                           if Last <= ' ' then
2184
                              Put (Last'Img);
2185
                           else
2186
                              Put (Last);
2187
                           end if;
2188
 
2189
                           if Character'Succ (Last) /= Current_Char then
2190
                              Put ("-" & Character'Pred (Current_Char));
2191
                           end if;
2192
 
2193
                        else
2194
                           Current := Current + 1;
2195
                        end if;
2196
                     end loop;
2197
 
2198
                     New_Line;
2199
                     Index := Index + 3 + Bitmap'Length;
2200
                  end;
2201
 
2202
               --  string operand
2203
 
2204
               when EXACT | EXACTF =>
2205
                  Length := String_Length (Program, Index);
2206
                  Put ("   operand (length:" & Program_Size'Image (Length + 1)
2207
                       & ") ="
2208
                       & String (Program (String_Operand (Index)
2209
                                          .. String_Operand (Index)
2210
                                          + Length)));
2211
                  Index := String_Operand (Index) + Length + 1;
2212
                  New_Line;
2213
 
2214
               --  Node operand
2215
 
2216
               when BRANCH =>
2217
                  New_Line;
2218
                  Dump_Until (Index + 3, Next, Local_Indent + 3);
2219
                  Index := Next;
2220
 
2221
               when STAR | PLUS =>
2222
                  New_Line;
2223
 
2224
                  --  Only one instruction
2225
 
2226
                  Dump_Until (Index + 3, Index + 4, Local_Indent + 3);
2227
                  Index := Next;
2228
 
2229
               when CURLY | CURLYX =>
2230
                  Put ("  {"
2231
                       & Natural'Image (Read_Natural (Program, Index + 3))
2232
                       & ","
2233
                       & Natural'Image (Read_Natural (Program, Index + 5))
2234
                       & "}");
2235
                  New_Line;
2236
                  Dump_Until (Index + 7, Next, Local_Indent + 3);
2237
                  Index := Next;
2238
 
2239
               when OPEN =>
2240
                  New_Line;
2241
                  Index := Index + 4;
2242
                  Local_Indent := Local_Indent + 3;
2243
 
2244
               when CLOSE | REFF =>
2245
                  New_Line;
2246
                  Index := Index + 4;
2247
 
2248
               when EOP =>
2249
                  Index := Index + 3;
2250
                  New_Line;
2251
                  exit;
2252
 
2253
               --  No operand
2254
 
2255
               when others =>
2256
                  Index := Index + 3;
2257
                  New_Line;
2258
            end case;
2259
         end loop;
2260
      end Dump_Until;
2261
 
2262
   --  Start of processing for Dump
2263
 
2264
   begin
2265
      pragma Assert (Self.Program (Program_First) = MAGIC,
2266
                     "Corrupted Pattern_Matcher");
2267
 
2268
      Put_Line ("Must start with (Self.First) = "
2269
                & Character'Image (Self.First));
2270
 
2271
      if (Self.Flags and Case_Insensitive) /= 0 then
2272
         Put_Line ("  Case_Insensitive mode");
2273
      end if;
2274
 
2275
      if (Self.Flags and Single_Line) /= 0 then
2276
         Put_Line ("  Single_Line mode");
2277
      end if;
2278
 
2279
      if (Self.Flags and Multiple_Lines) /= 0 then
2280
         Put_Line ("  Multiple_Lines mode");
2281
      end if;
2282
 
2283
      Put_Line ("     1 : MAGIC");
2284
      Dump_Until (Program_First + 1, Self.Program'Last + 1);
2285
   end Dump;
2286
 
2287
   --------------------
2288
   -- Get_From_Class --
2289
   --------------------
2290
 
2291
   function Get_From_Class
2292
     (Bitmap : Character_Class;
2293
      C      : Character) return Boolean
2294
   is
2295
      Value : constant Class_Byte := Character'Pos (C);
2296
 
2297
   begin
2298
      return
2299
        (Bitmap (Value / 8) and Bit_Conversion (Value mod 8)) /= 0;
2300
   end Get_From_Class;
2301
 
2302
   --------------
2303
   -- Get_Next --
2304
   --------------
2305
 
2306
   function Get_Next (Program : Program_Data; IP : Pointer) return Pointer is
2307
      Offset : constant Pointer := Get_Next_Offset (Program, IP);
2308
 
2309
   begin
2310
      if Offset = 0 then
2311
         return 0;
2312
      else
2313
         return IP + Offset;
2314
      end if;
2315
   end Get_Next;
2316
 
2317
   ---------------------
2318
   -- Get_Next_Offset --
2319
   ---------------------
2320
 
2321
   function Get_Next_Offset
2322
     (Program : Program_Data;
2323
      IP      : Pointer) return Pointer
2324
   is
2325
   begin
2326
      return Pointer (Read_Natural (Program, IP + 1));
2327
   end Get_Next_Offset;
2328
 
2329
   --------------
2330
   -- Is_Alnum --
2331
   --------------
2332
 
2333
   function Is_Alnum (C : Character) return Boolean is
2334
   begin
2335
      return Is_Alphanumeric (C) or else C = '_';
2336
   end Is_Alnum;
2337
 
2338
   ------------------
2339
   -- Is_Printable --
2340
   ------------------
2341
 
2342
   function Is_Printable (C : Character) return Boolean is
2343
   begin
2344
      --  Printable if space or graphic character or other whitespace
2345
      --  Other white space includes (HT/LF/VT/FF/CR = codes 9-13)
2346
 
2347
      return C in Character'Val (32) .. Character'Val (126)
2348
        or else C in ASCII.HT .. ASCII.CR;
2349
   end Is_Printable;
2350
 
2351
   --------------------
2352
   -- Is_White_Space --
2353
   --------------------
2354
 
2355
   function Is_White_Space (C : Character) return Boolean is
2356
   begin
2357
      --  Note: HT = 9, LF = 10, VT = 11, FF = 12, CR = 13
2358
 
2359
      return C = ' ' or else C in ASCII.HT .. ASCII.CR;
2360
   end Is_White_Space;
2361
 
2362
   -----------
2363
   -- Match --
2364
   -----------
2365
 
2366
   procedure Match
2367
     (Self    : Pattern_Matcher;
2368
      Data    : String;
2369
      Matches : out Match_Array;
2370
      Data_First : Integer := -1;
2371
      Data_Last  : Positive := Positive'Last)
2372
   is
2373
      Program   : Program_Data renames Self.Program; -- Shorter notation
2374
 
2375
      First_In_Data : constant Integer := Integer'Max (Data_First, Data'First);
2376
      Last_In_Data  : constant Integer := Integer'Min (Data_Last, Data'Last);
2377
 
2378
      --  Global work variables
2379
 
2380
      Input_Pos : Natural;          -- String-input pointer
2381
      BOL_Pos   : Natural;          -- Beginning of input, for ^ check
2382
      Matched   : Boolean := False;  -- Until proven True
2383
 
2384
      Matches_Full : Match_Array (0 .. Natural'Max (Self.Paren_Count,
2385
                                                    Matches'Last));
2386
      --  Stores the value of all the parenthesis pairs.
2387
      --  We do not use directly Matches, so that we can also use back
2388
      --  references (REFF) even if Matches is too small.
2389
 
2390
      type Natural_Array is array (Match_Count range <>) of Natural;
2391
      Matches_Tmp : Natural_Array (Matches_Full'Range);
2392
      --  Save the opening position of parenthesis
2393
 
2394
      Last_Paren  : Natural := 0;
2395
      --  Last parenthesis seen
2396
 
2397
      Greedy : Boolean := True;
2398
      --  True if the next operator should be greedy
2399
 
2400
      type Current_Curly_Record;
2401
      type Current_Curly_Access is access all Current_Curly_Record;
2402
      type Current_Curly_Record is record
2403
         Paren_Floor : Natural;  --  How far back to strip parenthesis data
2404
         Cur         : Integer;  --  How many instances of scan we've matched
2405
         Min         : Natural;  --  Minimal number of scans to match
2406
         Max         : Natural;  --  Maximal number of scans to match
2407
         Greedy      : Boolean;  --  Whether to work our way up or down
2408
         Scan        : Pointer;  --  The thing to match
2409
         Next        : Pointer;  --  What has to match after it
2410
         Lastloc     : Natural;  --  Where we started matching this scan
2411
         Old_Cc      : Current_Curly_Access; --  Before we started this one
2412
      end record;
2413
      --  Data used to handle the curly operator and the plus and star
2414
      --  operators for complex expressions.
2415
 
2416
      Current_Curly : Current_Curly_Access := null;
2417
      --  The curly currently being processed
2418
 
2419
      -----------------------
2420
      -- Local Subprograms --
2421
      -----------------------
2422
 
2423
      function Index (Start : Positive; C : Character) return Natural;
2424
      --  Find character C in Data starting at Start and return position
2425
 
2426
      function Repeat
2427
        (IP  : Pointer;
2428
         Max : Natural := Natural'Last) return Natural;
2429
      --  Repeatedly match something simple, report how many
2430
      --  It only matches on things of length 1.
2431
      --  Starting from Input_Pos, it matches at most Max CURLY.
2432
 
2433
      function Try (Pos : Positive) return Boolean;
2434
      --  Try to match at specific point
2435
 
2436
      function Match (IP : Pointer) return Boolean;
2437
      --  This is the main matching routine. Conceptually the strategy
2438
      --  is simple:  check to see whether the current node matches,
2439
      --  call self recursively to see whether the rest matches,
2440
      --  and then act accordingly.
2441
      --
2442
      --  In practice Match makes some effort to avoid recursion, in
2443
      --  particular by going through "ordinary" nodes (that don't
2444
      --  need to know whether the rest of the match failed) by
2445
      --  using a loop instead of recursion.
2446
      --  Why is the above comment part of the spec rather than body ???
2447
 
2448
      function Match_Whilem (IP : Pointer) return Boolean;
2449
      --  Return True if a WHILEM matches
2450
      --  How come IP is unreferenced in the body ???
2451
 
2452
      function Recurse_Match (IP : Pointer; From : Natural) return Boolean;
2453
      pragma Inline (Recurse_Match);
2454
      --  Calls Match recursively. It saves and restores the parenthesis
2455
      --  status and location in the input stream correctly, so that
2456
      --  backtracking is possible
2457
 
2458
      function Match_Simple_Operator
2459
        (Op     : Opcode;
2460
         Scan   : Pointer;
2461
         Next   : Pointer;
2462
         Greedy : Boolean) return Boolean;
2463
      --  Return True it the simple operator (possibly non-greedy) matches
2464
 
2465
      pragma Inline (Index);
2466
      pragma Inline (Repeat);
2467
 
2468
      --  These are two complex functions, but used only once
2469
 
2470
      pragma Inline (Match_Whilem);
2471
      pragma Inline (Match_Simple_Operator);
2472
 
2473
      -----------
2474
      -- Index --
2475
      -----------
2476
 
2477
      function Index (Start : Positive; C : Character) return Natural is
2478
      begin
2479
         for J in Start .. Last_In_Data loop
2480
            if Data (J) = C then
2481
               return J;
2482
            end if;
2483
         end loop;
2484
 
2485
         return 0;
2486
      end Index;
2487
 
2488
      -------------------
2489
      -- Recurse_Match --
2490
      -------------------
2491
 
2492
      function Recurse_Match (IP : Pointer; From : Natural) return Boolean is
2493
         L : constant Natural := Last_Paren;
2494
 
2495
         Tmp_F : constant Match_Array :=
2496
                   Matches_Full (From + 1 .. Matches_Full'Last);
2497
 
2498
         Start : constant Natural_Array :=
2499
                   Matches_Tmp (From + 1 .. Matches_Tmp'Last);
2500
         Input : constant Natural := Input_Pos;
2501
 
2502
      begin
2503
         if Match (IP) then
2504
            return True;
2505
         end if;
2506
 
2507
         Last_Paren := L;
2508
         Matches_Full (Tmp_F'Range) := Tmp_F;
2509
         Matches_Tmp (Start'Range) := Start;
2510
         Input_Pos := Input;
2511
         return False;
2512
      end Recurse_Match;
2513
 
2514
      -----------
2515
      -- Match --
2516
      -----------
2517
 
2518
      function Match (IP : Pointer) return Boolean is
2519
         Scan   : Pointer := IP;
2520
         Next   : Pointer;
2521
         Op     : Opcode;
2522
 
2523
      begin
2524
         State_Machine :
2525
         loop
2526
            pragma Assert (Scan /= 0);
2527
 
2528
            --  Determine current opcode and count its usage in debug mode
2529
 
2530
            Op := Opcode'Val (Character'Pos (Program (Scan)));
2531
 
2532
            --  Calculate offset of next instruction.
2533
            --  Second character is most significant in Program_Data.
2534
 
2535
            Next := Get_Next (Program, Scan);
2536
 
2537
            case Op is
2538
               when EOP =>
2539
                  return True;  --  Success !
2540
 
2541
               when BRANCH =>
2542
                  if Program (Next) /= BRANCH then
2543
                     Next := Operand (Scan); -- No choice, avoid recursion
2544
 
2545
                  else
2546
                     loop
2547
                        if Recurse_Match (Operand (Scan), 0) then
2548
                           return True;
2549
                        end if;
2550
 
2551
                        Scan := Get_Next (Program, Scan);
2552
                        exit when Scan = 0 or else Program (Scan) /= BRANCH;
2553
                     end loop;
2554
 
2555
                     exit State_Machine;
2556
                  end if;
2557
 
2558
               when NOTHING =>
2559
                  null;
2560
 
2561
               when BOL =>
2562
                  exit State_Machine when Input_Pos /= BOL_Pos
2563
                    and then ((Self.Flags and Multiple_Lines) = 0
2564
                              or else Data (Input_Pos - 1) /= ASCII.LF);
2565
 
2566
               when MBOL =>
2567
                  exit State_Machine when Input_Pos /= BOL_Pos
2568
                    and then Data (Input_Pos - 1) /= ASCII.LF;
2569
 
2570
               when SBOL =>
2571
                  exit State_Machine when Input_Pos /= BOL_Pos;
2572
 
2573
               when EOL =>
2574
                  exit State_Machine when Input_Pos <= Data'Last
2575
                    and then ((Self.Flags and Multiple_Lines) = 0
2576
                              or else Data (Input_Pos) /= ASCII.LF);
2577
 
2578
               when MEOL =>
2579
                  exit State_Machine when Input_Pos <= Data'Last
2580
                    and then Data (Input_Pos) /= ASCII.LF;
2581
 
2582
               when SEOL =>
2583
                  exit State_Machine when Input_Pos <= Data'Last;
2584
 
2585
               when BOUND | NBOUND =>
2586
 
2587
                  --  Was last char in word ?
2588
 
2589
                  declare
2590
                     N  : Boolean := False;
2591
                     Ln : Boolean := False;
2592
 
2593
                  begin
2594
                     if Input_Pos /= First_In_Data then
2595
                        N := Is_Alnum (Data (Input_Pos - 1));
2596
                     end if;
2597
 
2598
                     if Input_Pos > Last_In_Data then
2599
                        Ln := False;
2600
                     else
2601
                        Ln := Is_Alnum (Data (Input_Pos));
2602
                     end if;
2603
 
2604
                     if Op = BOUND then
2605
                        if N = Ln then
2606
                           exit State_Machine;
2607
                        end if;
2608
                     else
2609
                        if N /= Ln then
2610
                           exit State_Machine;
2611
                        end if;
2612
                     end if;
2613
                  end;
2614
 
2615
               when SPACE =>
2616
                  exit State_Machine when Input_Pos > Last_In_Data
2617
                    or else not Is_White_Space (Data (Input_Pos));
2618
                  Input_Pos := Input_Pos + 1;
2619
 
2620
               when NSPACE =>
2621
                  exit State_Machine when Input_Pos > Last_In_Data
2622
                    or else Is_White_Space (Data (Input_Pos));
2623
                  Input_Pos := Input_Pos + 1;
2624
 
2625
               when DIGIT =>
2626
                  exit State_Machine when Input_Pos > Last_In_Data
2627
                    or else not Is_Digit (Data (Input_Pos));
2628
                  Input_Pos := Input_Pos + 1;
2629
 
2630
               when NDIGIT =>
2631
                  exit State_Machine when Input_Pos > Last_In_Data
2632
                    or else Is_Digit (Data (Input_Pos));
2633
                  Input_Pos := Input_Pos + 1;
2634
 
2635
               when ALNUM =>
2636
                  exit State_Machine when Input_Pos > Last_In_Data
2637
                    or else not Is_Alnum (Data (Input_Pos));
2638
                  Input_Pos := Input_Pos + 1;
2639
 
2640
               when NALNUM =>
2641
                  exit State_Machine when Input_Pos > Last_In_Data
2642
                    or else Is_Alnum (Data (Input_Pos));
2643
                  Input_Pos := Input_Pos + 1;
2644
 
2645
               when ANY =>
2646
                  exit State_Machine when Input_Pos > Last_In_Data
2647
                    or else Data (Input_Pos) = ASCII.LF;
2648
                  Input_Pos := Input_Pos + 1;
2649
 
2650
               when SANY =>
2651
                  exit State_Machine when Input_Pos > Last_In_Data;
2652
                  Input_Pos := Input_Pos + 1;
2653
 
2654
               when EXACT =>
2655
                  declare
2656
                     Opnd    : Pointer  := String_Operand (Scan);
2657
                     Current : Positive := Input_Pos;
2658
 
2659
                     Last    : constant Pointer :=
2660
                                 Opnd + String_Length (Program, Scan);
2661
 
2662
                  begin
2663
                     while Opnd <= Last loop
2664
                        exit State_Machine when Current > Last_In_Data
2665
                          or else Program (Opnd) /= Data (Current);
2666
                        Current := Current + 1;
2667
                        Opnd := Opnd + 1;
2668
                     end loop;
2669
 
2670
                     Input_Pos := Current;
2671
                  end;
2672
 
2673
               when EXACTF =>
2674
                  declare
2675
                     Opnd    : Pointer  := String_Operand (Scan);
2676
                     Current : Positive := Input_Pos;
2677
 
2678
                     Last    : constant Pointer :=
2679
                                 Opnd + String_Length (Program, Scan);
2680
 
2681
                  begin
2682
                     while Opnd <= Last loop
2683
                        exit State_Machine when Current > Last_In_Data
2684
                          or else Program (Opnd) /= To_Lower (Data (Current));
2685
                        Current := Current + 1;
2686
                        Opnd := Opnd + 1;
2687
                     end loop;
2688
 
2689
                     Input_Pos := Current;
2690
                  end;
2691
 
2692
               when ANYOF =>
2693
                  declare
2694
                     Bitmap : Character_Class;
2695
 
2696
                  begin
2697
                     Bitmap_Operand (Program, Scan, Bitmap);
2698
                     exit State_Machine when Input_Pos > Last_In_Data
2699
                       or else not Get_From_Class (Bitmap, Data (Input_Pos));
2700
                     Input_Pos := Input_Pos + 1;
2701
                  end;
2702
 
2703
               when OPEN =>
2704
                  declare
2705
                     No : constant Natural :=
2706
                            Character'Pos (Program (Operand (Scan)));
2707
 
2708
                  begin
2709
                     Matches_Tmp (No) := Input_Pos;
2710
                  end;
2711
 
2712
               when CLOSE =>
2713
                  declare
2714
                     No : constant Natural :=
2715
                            Character'Pos (Program (Operand (Scan)));
2716
 
2717
                  begin
2718
                     Matches_Full (No) := (Matches_Tmp (No), Input_Pos - 1);
2719
 
2720
                     if Last_Paren < No then
2721
                        Last_Paren := No;
2722
                     end if;
2723
                  end;
2724
 
2725
               when REFF =>
2726
                  declare
2727
                     No : constant Natural :=
2728
                            Character'Pos (Program (Operand (Scan)));
2729
 
2730
                     Data_Pos : Natural;
2731
 
2732
                  begin
2733
                     --  If we haven't seen that parenthesis yet
2734
 
2735
                     if Last_Paren < No then
2736
                        return False;
2737
                     end if;
2738
 
2739
                     Data_Pos := Matches_Full (No).First;
2740
 
2741
                     while Data_Pos <= Matches_Full (No).Last loop
2742
                        if Input_Pos > Last_In_Data
2743
                          or else Data (Input_Pos) /= Data (Data_Pos)
2744
                        then
2745
                           return False;
2746
                        end if;
2747
 
2748
                        Input_Pos := Input_Pos + 1;
2749
                        Data_Pos := Data_Pos + 1;
2750
                     end loop;
2751
                  end;
2752
 
2753
               when MINMOD =>
2754
                  Greedy := False;
2755
 
2756
               when STAR | PLUS | CURLY =>
2757
                  declare
2758
                     Greed : constant Boolean := Greedy;
2759
 
2760
                  begin
2761
                     Greedy := True;
2762
                     return Match_Simple_Operator (Op, Scan, Next, Greed);
2763
                  end;
2764
 
2765
               when CURLYX =>
2766
 
2767
                  --  Looking at something like:
2768
 
2769
                  --    1: CURLYX {n,m}  (->4)
2770
                  --    2:   code for complex thing  (->3)
2771
                  --    3:   WHILEM (->0)
2772
                  --    4: NOTHING
2773
 
2774
                  declare
2775
                     Min : constant Natural :=
2776
                             Read_Natural (Program, Scan + 3);
2777
                     Max : constant Natural :=
2778
                             Read_Natural (Program, Scan + 5);
2779
                     Cc  : aliased Current_Curly_Record;
2780
 
2781
                     Has_Match : Boolean;
2782
 
2783
                  begin
2784
                     Cc := (Paren_Floor => Last_Paren,
2785
                            Cur         => -1,
2786
                            Min         => Min,
2787
                            Max         => Max,
2788
                            Greedy      => Greedy,
2789
                            Scan        => Scan + 7,
2790
                            Next        => Next,
2791
                            Lastloc     => 0,
2792
                            Old_Cc      => Current_Curly);
2793
                     Current_Curly := Cc'Unchecked_Access;
2794
 
2795
                     Has_Match := Match (Next - 3);
2796
 
2797
                     --  Start on the WHILEM
2798
 
2799
                     Current_Curly := Cc.Old_Cc;
2800
                     return Has_Match;
2801
                  end;
2802
 
2803
               when WHILEM =>
2804
                  return Match_Whilem (IP);
2805
            end case;
2806
 
2807
            Scan := Next;
2808
         end loop State_Machine;
2809
 
2810
         --  If we get here, there is no match.
2811
         --  For successful matches when EOP is the terminating point.
2812
 
2813
         return False;
2814
      end Match;
2815
 
2816
      ---------------------------
2817
      -- Match_Simple_Operator --
2818
      ---------------------------
2819
 
2820
      function Match_Simple_Operator
2821
        (Op     : Opcode;
2822
         Scan   : Pointer;
2823
         Next   : Pointer;
2824
         Greedy : Boolean) return Boolean
2825
      is
2826
         Next_Char       : Character := ASCII.Nul;
2827
         Next_Char_Known : Boolean := False;
2828
         No              : Integer;  --  Can be negative
2829
         Min             : Natural;
2830
         Max             : Natural := Natural'Last;
2831
         Operand_Code    : Pointer;
2832
         Old             : Natural;
2833
         Last_Pos        : Natural;
2834
         Save            : constant Natural := Input_Pos;
2835
 
2836
      begin
2837
         --  Lookahead to avoid useless match attempts
2838
         --  when we know what character comes next.
2839
 
2840
         if Program (Next) = EXACT then
2841
            Next_Char := Program (String_Operand (Next));
2842
            Next_Char_Known := True;
2843
         end if;
2844
 
2845
         --  Find the minimal and maximal values for the operator
2846
 
2847
         case Op is
2848
            when STAR =>
2849
               Min := 0;
2850
               Operand_Code := Operand (Scan);
2851
 
2852
            when PLUS =>
2853
               Min := 1;
2854
               Operand_Code := Operand (Scan);
2855
 
2856
            when others =>
2857
               Min := Read_Natural (Program, Scan + 3);
2858
               Max := Read_Natural (Program, Scan + 5);
2859
               Operand_Code := Scan + 7;
2860
         end case;
2861
 
2862
         --  Non greedy operators
2863
 
2864
         if not Greedy then
2865
 
2866
            --  Test the minimal repetitions
2867
 
2868
            if Min /= 0
2869
              and then Repeat (Operand_Code, Min) < Min
2870
            then
2871
               return False;
2872
            end if;
2873
 
2874
            Old := Input_Pos;
2875
 
2876
            --  Find the place where 'next' could work
2877
 
2878
            if Next_Char_Known then
2879
               --  Last position to check
2880
 
2881
               Last_Pos := Input_Pos + Max;
2882
 
2883
               if Last_Pos > Last_In_Data
2884
                 or else Max = Natural'Last
2885
               then
2886
                  Last_Pos := Last_In_Data;
2887
               end if;
2888
 
2889
               --  Look for the first possible opportunity
2890
 
2891
               loop
2892
                  --  Find the next possible position
2893
 
2894
                  while Input_Pos <= Last_Pos
2895
                    and then Data (Input_Pos) /= Next_Char
2896
                  loop
2897
                     Input_Pos := Input_Pos + 1;
2898
                  end loop;
2899
 
2900
                  if Input_Pos > Last_Pos then
2901
                     return False;
2902
                  end if;
2903
 
2904
                  --  Check that we still match if we stop
2905
                  --  at the position we just found.
2906
 
2907
                  declare
2908
                     Num : constant Natural := Input_Pos - Old;
2909
 
2910
                  begin
2911
                     Input_Pos := Old;
2912
 
2913
                     if Repeat (Operand_Code, Num) < Num then
2914
                        return False;
2915
                     end if;
2916
                  end;
2917
 
2918
                  --  Input_Pos now points to the new position
2919
 
2920
                  if Match (Get_Next (Program, Scan)) then
2921
                     return True;
2922
                  end if;
2923
 
2924
                  Old := Input_Pos;
2925
                  Input_Pos := Input_Pos + 1;
2926
               end loop;
2927
 
2928
            --  We know what the next character is
2929
 
2930
            else
2931
               while Max >= Min loop
2932
 
2933
                  --  If the next character matches
2934
 
2935
                  if Match (Next) then
2936
                     return True;
2937
                  end if;
2938
 
2939
                  Input_Pos := Save + Min;
2940
 
2941
                  --  Could not or did not match -- move forward
2942
 
2943
                  if Repeat (Operand_Code, 1) /= 0 then
2944
                     Min := Min + 1;
2945
                  else
2946
                     return False;
2947
                  end if;
2948
               end loop;
2949
            end if;
2950
 
2951
            return False;
2952
 
2953
         --  Greedy operators
2954
 
2955
         else
2956
            No := Repeat (Operand_Code, Max);
2957
 
2958
            --  ??? Perl has some special code here in case the
2959
            --  next instruction is of type EOL, since $ and \Z
2960
            --  can match before *and* after newline at the end.
2961
 
2962
            --  ??? Perl has some special code here in case (paren)
2963
            --  is True.
2964
 
2965
            --  Else, if we don't have any parenthesis
2966
 
2967
            while No >= Min loop
2968
               if not Next_Char_Known
2969
                 or else (Input_Pos <= Last_In_Data
2970
                           and then Data (Input_Pos) = Next_Char)
2971
               then
2972
                  if Match (Next) then
2973
                     return True;
2974
                  end if;
2975
               end if;
2976
 
2977
               --  Could not or did not work, we back up
2978
 
2979
               No := No - 1;
2980
               Input_Pos := Save + No;
2981
            end loop;
2982
 
2983
            return False;
2984
         end if;
2985
      end Match_Simple_Operator;
2986
 
2987
      ------------------
2988
      -- Match_Whilem --
2989
      ------------------
2990
 
2991
      --  This is really hard to understand, because after we match what we
2992
      --  are trying to match, we must make sure the rest of the REx is going
2993
      --  to match for sure, and to do that we have to go back UP the parse
2994
      --  tree by recursing ever deeper.  And if it fails, we have to reset
2995
      --  our parent's current state that we can try again after backing off.
2996
 
2997
      function Match_Whilem (IP : Pointer) return Boolean is
2998
         pragma Unreferenced (IP);
2999
 
3000
         Cc : constant Current_Curly_Access := Current_Curly;
3001
         N  : constant Natural              := Cc.Cur + 1;
3002
         Ln : Natural                       := 0;
3003
 
3004
         Lastloc : constant Natural := Cc.Lastloc;
3005
         --  Detection of 0-len
3006
 
3007
      begin
3008
         --  If degenerate scan matches "", assume scan done
3009
 
3010
         if Input_Pos = Cc.Lastloc
3011
           and then N >= Cc.Min
3012
         then
3013
            --  Temporarily restore the old context, and check that we
3014
            --  match was comes after CURLYX.
3015
 
3016
            Current_Curly := Cc.Old_Cc;
3017
 
3018
            if Current_Curly /= null then
3019
               Ln := Current_Curly.Cur;
3020
            end if;
3021
 
3022
            if Match (Cc.Next) then
3023
               return True;
3024
            end if;
3025
 
3026
            if Current_Curly /= null then
3027
               Current_Curly.Cur := Ln;
3028
            end if;
3029
 
3030
            Current_Curly := Cc;
3031
            return False;
3032
         end if;
3033
 
3034
         --  First, just match a string of min scans
3035
 
3036
         if N < Cc.Min then
3037
            Cc.Cur := N;
3038
            Cc.Lastloc := Input_Pos;
3039
 
3040
            if Match (Cc.Scan) then
3041
               return True;
3042
            end if;
3043
 
3044
            Cc.Cur := N - 1;
3045
            Cc.Lastloc := Lastloc;
3046
            return False;
3047
         end if;
3048
 
3049
         --  Prefer next over scan for minimal matching
3050
 
3051
         if not Cc.Greedy then
3052
            Current_Curly := Cc.Old_Cc;
3053
 
3054
            if Current_Curly /= null then
3055
               Ln := Current_Curly.Cur;
3056
            end if;
3057
 
3058
            if Recurse_Match (Cc.Next, Cc.Paren_Floor) then
3059
               return True;
3060
            end if;
3061
 
3062
            if Current_Curly /= null then
3063
               Current_Curly.Cur := Ln;
3064
            end if;
3065
 
3066
            Current_Curly := Cc;
3067
 
3068
            --  Maximum greed exceeded ?
3069
 
3070
            if N >= Cc.Max then
3071
               return False;
3072
            end if;
3073
 
3074
            --  Try scanning more and see if it helps
3075
            Cc.Cur := N;
3076
            Cc.Lastloc := Input_Pos;
3077
 
3078
            if Recurse_Match (Cc.Scan, Cc.Paren_Floor) then
3079
               return True;
3080
            end if;
3081
 
3082
            Cc.Cur := N - 1;
3083
            Cc.Lastloc := Lastloc;
3084
            return False;
3085
         end if;
3086
 
3087
         --  Prefer scan over next for maximal matching
3088
 
3089
         if N < Cc.Max then   --  more greed allowed ?
3090
            Cc.Cur := N;
3091
            Cc.Lastloc := Input_Pos;
3092
 
3093
            if Recurse_Match (Cc.Scan, Cc.Paren_Floor) then
3094
               return True;
3095
            end if;
3096
         end if;
3097
 
3098
         --  Failed deeper matches of scan, so see if this one works
3099
 
3100
         Current_Curly := Cc.Old_Cc;
3101
 
3102
         if Current_Curly /= null then
3103
            Ln := Current_Curly.Cur;
3104
         end if;
3105
 
3106
         if Match (Cc.Next) then
3107
            return True;
3108
         end if;
3109
 
3110
         if Current_Curly /= null then
3111
            Current_Curly.Cur := Ln;
3112
         end if;
3113
 
3114
         Current_Curly := Cc;
3115
         Cc.Cur := N - 1;
3116
         Cc.Lastloc := Lastloc;
3117
         return False;
3118
      end Match_Whilem;
3119
 
3120
      ------------
3121
      -- Repeat --
3122
      ------------
3123
 
3124
      function Repeat
3125
        (IP  : Pointer;
3126
         Max : Natural := Natural'Last) return Natural
3127
      is
3128
         Scan  : Natural := Input_Pos;
3129
         Last  : Natural;
3130
         Op    : constant Opcode := Opcode'Val (Character'Pos (Program (IP)));
3131
         Count : Natural;
3132
         C     : Character;
3133
         Is_First : Boolean := True;
3134
         Bitmap   : Character_Class;
3135
 
3136
      begin
3137
         if Max = Natural'Last or else Scan + Max - 1 > Last_In_Data then
3138
            Last := Last_In_Data;
3139
         else
3140
            Last := Scan + Max - 1;
3141
         end if;
3142
 
3143
         case Op is
3144
            when ANY =>
3145
               while Scan <= Last
3146
                 and then Data (Scan) /= ASCII.LF
3147
               loop
3148
                  Scan := Scan + 1;
3149
               end loop;
3150
 
3151
            when SANY =>
3152
               Scan := Last + 1;
3153
 
3154
            when EXACT =>
3155
 
3156
               --  The string has only one character if Repeat was called
3157
 
3158
               C := Program (String_Operand (IP));
3159
               while Scan <= Last
3160
                 and then C = Data (Scan)
3161
               loop
3162
                  Scan := Scan + 1;
3163
               end loop;
3164
 
3165
            when EXACTF =>
3166
 
3167
               --  The string has only one character if Repeat was called
3168
 
3169
               C := Program (String_Operand (IP));
3170
               while Scan <= Last
3171
                 and then To_Lower (C) = Data (Scan)
3172
               loop
3173
                  Scan := Scan + 1;
3174
               end loop;
3175
 
3176
            when ANYOF =>
3177
               if Is_First then
3178
                  Bitmap_Operand (Program, IP, Bitmap);
3179
                  Is_First := False;
3180
               end if;
3181
 
3182
               while Scan <= Last
3183
                 and then Get_From_Class (Bitmap, Data (Scan))
3184
               loop
3185
                  Scan := Scan + 1;
3186
               end loop;
3187
 
3188
            when ALNUM =>
3189
               while Scan <= Last
3190
                 and then Is_Alnum (Data (Scan))
3191
               loop
3192
                  Scan := Scan + 1;
3193
               end loop;
3194
 
3195
            when NALNUM =>
3196
               while Scan <= Last
3197
                 and then not Is_Alnum (Data (Scan))
3198
               loop
3199
                  Scan := Scan + 1;
3200
               end loop;
3201
 
3202
            when SPACE =>
3203
               while Scan <= Last
3204
                 and then Is_White_Space (Data (Scan))
3205
               loop
3206
                  Scan := Scan + 1;
3207
               end loop;
3208
 
3209
            when NSPACE =>
3210
               while Scan <= Last
3211
                 and then not Is_White_Space (Data (Scan))
3212
               loop
3213
                  Scan := Scan + 1;
3214
               end loop;
3215
 
3216
            when DIGIT  =>
3217
               while Scan <= Last
3218
                 and then Is_Digit (Data (Scan))
3219
               loop
3220
                  Scan := Scan + 1;
3221
               end loop;
3222
 
3223
            when NDIGIT  =>
3224
               while Scan <= Last
3225
                 and then not Is_Digit (Data (Scan))
3226
               loop
3227
                  Scan := Scan + 1;
3228
               end loop;
3229
 
3230
            when others =>
3231
               raise Program_Error;
3232
         end case;
3233
 
3234
         Count := Scan - Input_Pos;
3235
         Input_Pos := Scan;
3236
         return Count;
3237
      end Repeat;
3238
 
3239
      ---------
3240
      -- Try --
3241
      ---------
3242
 
3243
      function Try (Pos : Positive) return Boolean is
3244
      begin
3245
         Input_Pos  := Pos;
3246
         Last_Paren := 0;
3247
         Matches_Full := (others => No_Match);
3248
 
3249
         if Match (Program_First + 1) then
3250
            Matches_Full (0) := (Pos, Input_Pos - 1);
3251
            return True;
3252
         end if;
3253
 
3254
         return False;
3255
      end Try;
3256
 
3257
   --  Start of processing for Match
3258
 
3259
   begin
3260
      --  Do we have the regexp Never_Match?
3261
 
3262
      if Self.Size = 0 then
3263
         Matches (0) := No_Match;
3264
         return;
3265
      end if;
3266
 
3267
      --  Check validity of program
3268
 
3269
      pragma Assert
3270
        (Program (Program_First) = MAGIC,
3271
         "Corrupted Pattern_Matcher");
3272
 
3273
      --  If there is a "must appear" string, look for it
3274
 
3275
      if Self.Must_Have_Length > 0 then
3276
         declare
3277
            First      : constant Character := Program (Self.Must_Have);
3278
            Must_First : constant Pointer := Self.Must_Have;
3279
            Must_Last  : constant Pointer :=
3280
                           Must_First + Pointer (Self.Must_Have_Length - 1);
3281
            Next_Try   : Natural := Index (First_In_Data, First);
3282
 
3283
         begin
3284
            while Next_Try /= 0
3285
              and then Data (Next_Try .. Next_Try + Self.Must_Have_Length - 1)
3286
                          = String (Program (Must_First .. Must_Last))
3287
            loop
3288
               Next_Try := Index (Next_Try + 1, First);
3289
            end loop;
3290
 
3291
            if Next_Try = 0 then
3292
               Matches_Full := (others => No_Match);
3293
               return;                  -- Not present
3294
            end if;
3295
         end;
3296
      end if;
3297
 
3298
      --  Mark beginning of line for ^
3299
 
3300
      BOL_Pos := Data'First;
3301
 
3302
      --  Simplest case first: an anchored match need be tried only once
3303
 
3304
      if Self.Anchored and then (Self.Flags and Multiple_Lines) = 0 then
3305
         Matched := Try (First_In_Data);
3306
 
3307
      elsif Self.Anchored then
3308
         declare
3309
            Next_Try : Natural := First_In_Data;
3310
         begin
3311
            --  Test the first position in the buffer
3312
            Matched := Try (Next_Try);
3313
 
3314
            --  Else only test after newlines
3315
 
3316
            if not Matched then
3317
               while Next_Try <= Last_In_Data loop
3318
                  while Next_Try <= Last_In_Data
3319
                    and then Data (Next_Try) /= ASCII.LF
3320
                  loop
3321
                     Next_Try := Next_Try + 1;
3322
                  end loop;
3323
 
3324
                  Next_Try := Next_Try + 1;
3325
 
3326
                  if Next_Try <= Last_In_Data then
3327
                     Matched := Try (Next_Try);
3328
                     exit when Matched;
3329
                  end if;
3330
               end loop;
3331
            end if;
3332
         end;
3333
 
3334
      elsif Self.First /= ASCII.NUL then
3335
         --  We know what char it must start with
3336
 
3337
         declare
3338
            Next_Try : Natural := Index (First_In_Data, Self.First);
3339
 
3340
         begin
3341
            while Next_Try /= 0 loop
3342
               Matched := Try (Next_Try);
3343
               exit when Matched;
3344
               Next_Try := Index (Next_Try + 1, Self.First);
3345
            end loop;
3346
         end;
3347
 
3348
      else
3349
         --  Messy cases: try all locations (including for the empty string)
3350
 
3351
         Matched := Try (First_In_Data);
3352
 
3353
         if not Matched then
3354
            for S in First_In_Data + 1 .. Last_In_Data loop
3355
               Matched := Try (S);
3356
               exit when Matched;
3357
            end loop;
3358
         end if;
3359
      end if;
3360
 
3361
      --  Matched has its value
3362
 
3363
      for J in Last_Paren + 1 .. Matches'Last loop
3364
         Matches_Full (J) := No_Match;
3365
      end loop;
3366
 
3367
      Matches := Matches_Full (Matches'Range);
3368
      return;
3369
   end Match;
3370
 
3371
   -----------
3372
   -- Match --
3373
   -----------
3374
 
3375
   function Match
3376
     (Self       : Pattern_Matcher;
3377
      Data       : String;
3378
      Data_First : Integer := -1;
3379
      Data_Last  : Positive := Positive'Last) return Natural
3380
   is
3381
      Matches : Match_Array (0 .. 0);
3382
 
3383
   begin
3384
      Match (Self, Data, Matches, Data_First, Data_Last);
3385
      if Matches (0) = No_Match then
3386
         return Data'First - 1;
3387
      else
3388
         return Matches (0).First;
3389
      end if;
3390
   end Match;
3391
 
3392
   function Match
3393
     (Self       : Pattern_Matcher;
3394
      Data       : String;
3395
      Data_First : Integer  := -1;
3396
      Data_Last  : Positive := Positive'Last) return Boolean
3397
   is
3398
      Matches : Match_Array (0 .. 0);
3399
 
3400
   begin
3401
      Match (Self, Data, Matches, Data_First, Data_Last);
3402
      return Matches (0).First >= Data'First;
3403
   end Match;
3404
 
3405
   procedure Match
3406
     (Expression : String;
3407
      Data       : String;
3408
      Matches    : out Match_Array;
3409
      Size       : Program_Size := Auto_Size;
3410
      Data_First : Integer      := -1;
3411
      Data_Last  : Positive     := Positive'Last)
3412
   is
3413
      PM            : Pattern_Matcher (Size);
3414
      Finalize_Size : Program_Size;
3415
 
3416
   begin
3417
      if Size = 0 then
3418
         Match (Compile (Expression), Data, Matches, Data_First, Data_Last);
3419
      else
3420
         Compile (PM, Expression, Finalize_Size);
3421
         Match (PM, Data, Matches, Data_First, Data_Last);
3422
      end if;
3423
   end Match;
3424
 
3425
   -----------
3426
   -- Match --
3427
   -----------
3428
 
3429
   function Match
3430
     (Expression : String;
3431
      Data       : String;
3432
      Size       : Program_Size := Auto_Size;
3433
      Data_First : Integer      := -1;
3434
      Data_Last  : Positive     := Positive'Last) return Natural
3435
   is
3436
      PM         : Pattern_Matcher (Size);
3437
      Final_Size : Program_Size; -- unused
3438
 
3439
   begin
3440
      if Size = 0 then
3441
         return Match (Compile (Expression), Data, Data_First, Data_Last);
3442
      else
3443
         Compile (PM, Expression, Final_Size);
3444
         return Match (PM, Data, Data_First, Data_Last);
3445
      end if;
3446
   end Match;
3447
 
3448
   -----------
3449
   -- Match --
3450
   -----------
3451
 
3452
   function  Match
3453
     (Expression : String;
3454
      Data       : String;
3455
      Size       : Program_Size := Auto_Size;
3456
      Data_First : Integer      := -1;
3457
      Data_Last  : Positive     := Positive'Last) return Boolean
3458
   is
3459
      Matches    : Match_Array (0 .. 0);
3460
      PM         : Pattern_Matcher (Size);
3461
      Final_Size : Program_Size; -- unused
3462
 
3463
   begin
3464
      if Size = 0 then
3465
         Match (Compile (Expression), Data, Matches, Data_First, Data_Last);
3466
      else
3467
         Compile (PM, Expression, Final_Size);
3468
         Match (PM, Data, Matches, Data_First, Data_Last);
3469
      end if;
3470
 
3471
      return Matches (0).First >= Data'First;
3472
   end Match;
3473
 
3474
   -------------
3475
   -- Operand --
3476
   -------------
3477
 
3478
   function Operand (P : Pointer) return Pointer is
3479
   begin
3480
      return P + 3;
3481
   end Operand;
3482
 
3483
   --------------
3484
   -- Optimize --
3485
   --------------
3486
 
3487
   procedure Optimize (Self : in out Pattern_Matcher) is
3488
      Max_Length  : Program_Size;
3489
      This_Length : Program_Size;
3490
      Longest     : Pointer;
3491
      Scan        : Pointer;
3492
      Program     : Program_Data renames Self.Program;
3493
 
3494
   begin
3495
      --  Start with safe defaults (no optimization):
3496
      --    *  No known first character of match
3497
      --    *  Does not necessarily start at beginning of line
3498
      --    *  No string known that has to appear in data
3499
 
3500
      Self.First := ASCII.NUL;
3501
      Self.Anchored := False;
3502
      Self.Must_Have := Program'Last + 1;
3503
      Self.Must_Have_Length := 0;
3504
 
3505
      Scan := Program_First + 1;  --  First instruction (can be anything)
3506
 
3507
      if Program (Scan) = EXACT then
3508
         Self.First := Program (String_Operand (Scan));
3509
 
3510
      elsif Program (Scan) = BOL
3511
        or else Program (Scan) = SBOL
3512
        or else Program (Scan) = MBOL
3513
      then
3514
         Self.Anchored := True;
3515
      end if;
3516
 
3517
      --  If there's something expensive in the regexp, find the
3518
      --  longest literal string that must appear and make it the
3519
      --  regmust. Resolve ties in favor of later strings, since
3520
      --  the regstart check works with the beginning of the regexp.
3521
      --  and avoiding duplication strengthens checking. Not a
3522
      --  strong reason, but sufficient in the absence of others.
3523
 
3524
      if False then -- if Flags.SP_Start then ???
3525
         Longest := 0;
3526
         Max_Length := 0;
3527
         while Scan /= 0 loop
3528
            if Program (Scan) = EXACT or else Program (Scan) = EXACTF then
3529
               This_Length := String_Length (Program, Scan);
3530
 
3531
               if This_Length >= Max_Length then
3532
                  Longest := String_Operand (Scan);
3533
                  Max_Length := This_Length;
3534
               end if;
3535
            end if;
3536
 
3537
            Scan := Get_Next (Program, Scan);
3538
         end loop;
3539
 
3540
         Self.Must_Have        := Longest;
3541
         Self.Must_Have_Length := Natural (Max_Length) + 1;
3542
      end if;
3543
   end Optimize;
3544
 
3545
   -----------------
3546
   -- Paren_Count --
3547
   -----------------
3548
 
3549
   function Paren_Count (Regexp : Pattern_Matcher) return Match_Count is
3550
   begin
3551
      return Regexp.Paren_Count;
3552
   end Paren_Count;
3553
 
3554
   -----------
3555
   -- Quote --
3556
   -----------
3557
 
3558
   function Quote (Str : String) return String is
3559
      S    : String (1 .. Str'Length * 2);
3560
      Last : Natural := 0;
3561
 
3562
   begin
3563
      for J in Str'Range loop
3564
         case Str (J) is
3565
            when '^' | '$' | '|' | '*' | '+' | '?' | '{' |
3566
                 '}' | '[' | ']' | '(' | ')' | '\' =>
3567
 
3568
               S (Last + 1) := '\';
3569
               S (Last + 2) := Str (J);
3570
               Last := Last + 2;
3571
 
3572
            when others =>
3573
               S (Last + 1) := Str (J);
3574
               Last := Last + 1;
3575
         end case;
3576
      end loop;
3577
 
3578
      return S (1 .. Last);
3579
   end Quote;
3580
 
3581
   ------------------
3582
   -- Read_Natural --
3583
   ------------------
3584
 
3585
   function Read_Natural
3586
     (Program : Program_Data;
3587
      IP      : Pointer) return Natural
3588
   is
3589
   begin
3590
      return Character'Pos (Program (IP)) +
3591
               256 * Character'Pos (Program (IP + 1));
3592
   end Read_Natural;
3593
 
3594
   -----------------
3595
   -- Reset_Class --
3596
   -----------------
3597
 
3598
   procedure Reset_Class (Bitmap : out Character_Class) is
3599
   begin
3600
      Bitmap := (others => 0);
3601
   end Reset_Class;
3602
 
3603
   ------------------
3604
   -- Set_In_Class --
3605
   ------------------
3606
 
3607
   procedure Set_In_Class
3608
     (Bitmap : in out Character_Class;
3609
      C      : Character)
3610
   is
3611
      Value : constant Class_Byte := Character'Pos (C);
3612
   begin
3613
      Bitmap (Value / 8) := Bitmap (Value / 8)
3614
        or Bit_Conversion (Value mod 8);
3615
   end Set_In_Class;
3616
 
3617
   -------------------
3618
   -- String_Length --
3619
   -------------------
3620
 
3621
   function String_Length
3622
     (Program : Program_Data;
3623
      P       : Pointer) return Program_Size
3624
   is
3625
   begin
3626
      pragma Assert (Program (P) = EXACT or else Program (P) = EXACTF);
3627
      return Character'Pos (Program (P + 3));
3628
   end String_Length;
3629
 
3630
   --------------------
3631
   -- String_Operand --
3632
   --------------------
3633
 
3634
   function String_Operand (P : Pointer) return Pointer is
3635
   begin
3636
      return P + 4;
3637
   end String_Operand;
3638
 
3639
end GNAT.Regpat;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.