OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [ada/] [s-regpat.adb] - Blame information for rev 300

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 281 jeremybenn
------------------------------------------------------------------------------
2
--                                                                          --
3
--                         GNAT LIBRARY COMPONENTS                          --
4
--                                                                          --
5
--                          G N A T . R E G P A T                           --
6
--                                                                          --
7
--                                 B o d y                                  --
8
--                                                                          --
9
--               Copyright (C) 1986 by University of Toronto.               --
10
--                      Copyright (C) 1999-2009, AdaCore                    --
11
--                                                                          --
12
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
13
-- terms of the  GNU General Public License as published  by the Free Soft- --
14
-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
15
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
16
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
17
-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
18
-- for  more details.  You should have  received  a copy of the GNU General --
19
-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
20
-- to  the  Free Software Foundation,  51  Franklin  Street,  Fifth  Floor, --
21
-- Boston, MA 02110-1301, USA.                                              --
22
--                                                                          --
23
-- As a special exception,  if other files  instantiate  generics from this --
24
-- unit, or you link  this unit with other files  to produce an executable, --
25
-- this  unit  does not  by itself cause  the resulting  executable  to  be --
26
-- covered  by the  GNU  General  Public  License.  This exception does not --
27
-- however invalidate  any other reasons why  the executable file  might be --
28
-- covered by the  GNU Public License.                                      --
29
--                                                                          --
30
-- GNAT was originally developed  by the GNAT team at  New York University. --
31
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
32
--                                                                          --
33
------------------------------------------------------------------------------
34
 
35
--  This is an altered Ada 95 version of the original V8 style regular
36
--  expression library written in C by Henry Spencer. Apart from the
37
--  translation to Ada, the interface has been considerably changed to
38
--  use the Ada String type instead of C-style nul-terminated strings.
39
 
40
--  Beware that some of this code is subtly aware of the way operator
41
--  precedence is structured in regular expressions. Serious changes in
42
--  regular-expression syntax might require a total rethink.
43
 
44
with System.IO;               use System.IO;
45
with Ada.Characters.Handling; use Ada.Characters.Handling;
46
with Ada.Unchecked_Conversion;
47
 
48
package body System.Regpat is
49
 
50
   MAGIC : constant Character := Character'Val (10#0234#);
51
   --  The first byte of the regexp internal "program" is actually
52
   --  this magic number; the start node begins in the second byte.
53
   --
54
   --  This is used to make sure that a regular expression was correctly
55
   --  compiled.
56
 
57
   ----------------------------
58
   -- Implementation details --
59
   ----------------------------
60
 
61
   --  This is essentially a linear encoding of a nondeterministic
62
   --  finite-state machine, also known as syntax charts or
63
   --  "railroad normal form" in parsing technology.
64
 
65
   --  Each node is an opcode plus a "next" pointer, possibly plus an
66
   --  operand. "Next" pointers of all nodes except BRANCH implement
67
   --  concatenation; a "next" pointer with a BRANCH on both ends of it
68
   --  is connecting two alternatives.
69
 
70
   --  The operand of some types of node is a literal string; for others,
71
   --  it is a node leading into a sub-FSM. In particular, the operand of
72
   --  a BRANCH node is the first node of the branch.
73
   --  (NB this is *not* a tree structure:  the tail of the branch connects
74
   --  to the thing following the set of BRANCHes).
75
 
76
   --  You can see the exact byte-compiled version by using the Dump
77
   --  subprogram. However, here are a few examples:
78
 
79
   --  (a|b):  1 : MAGIC
80
   --          2 : BRANCH  (next at  10)
81
   --          5 :    EXACT  (next at  18)   operand=a
82
   --         10 : BRANCH  (next at  18)
83
   --         13 :    EXACT  (next at  18)   operand=b
84
   --         18 : EOP  (next at 0)
85
   --
86
   --  (ab)*:  1 : MAGIC
87
   --          2 : CURLYX  (next at  26)  { 0, 32767}
88
   --          9 :    OPEN 1  (next at  13)
89
   --         13 :       EXACT  (next at  19)   operand=ab
90
   --         19 :    CLOSE 1  (next at  23)
91
   --         23 :    WHILEM  (next at 0)
92
   --         26 : NOTHING  (next at  29)
93
   --         29 : EOP  (next at 0)
94
 
95
   --  The opcodes are:
96
 
97
   type Opcode is
98
 
99
      --  Name          Operand?  Meaning
100
 
101
     (EOP,        -- no        End of program
102
      MINMOD,     -- no        Next operator is not greedy
103
 
104
      --  Classes of characters
105
 
106
      ANY,        -- no        Match any one character except newline
107
      SANY,       -- no        Match any character, including new line
108
      ANYOF,      -- class     Match any character in this class
109
      EXACT,      -- str       Match this string exactly
110
      EXACTF,     -- str       Match this string (case-folding is one)
111
      NOTHING,    -- no        Match empty string
112
      SPACE,      -- no        Match any whitespace character
113
      NSPACE,     -- no        Match any non-whitespace character
114
      DIGIT,      -- no        Match any numeric character
115
      NDIGIT,     -- no        Match any non-numeric character
116
      ALNUM,      -- no        Match any alphanumeric character
117
      NALNUM,     -- no        Match any non-alphanumeric character
118
 
119
      --  Branches
120
 
121
      BRANCH,     -- node      Match this alternative, or the next
122
 
123
      --  Simple loops (when the following node is one character in length)
124
 
125
      STAR,       -- node      Match this simple thing 0 or more times
126
      PLUS,       -- node      Match this simple thing 1 or more times
127
      CURLY,      -- 2num node Match this simple thing between n and m times.
128
 
129
      --  Complex loops
130
 
131
      CURLYX,     -- 2num node Match this complex thing {n,m} times
132
      --                       The nums are coded on two characters each
133
 
134
      WHILEM,     -- no        Do curly processing and see if rest matches
135
 
136
      --  Matches after or before a word
137
 
138
      BOL,        -- no        Match "" at beginning of line
139
      MBOL,       -- no        Same, assuming multiline (match after \n)
140
      SBOL,       -- no        Same, assuming single line (don't match at \n)
141
      EOL,        -- no        Match "" at end of line
142
      MEOL,       -- no        Same, assuming multiline (match before \n)
143
      SEOL,       -- no        Same, assuming single line (don't match at \n)
144
 
145
      BOUND,      -- no        Match "" at any word boundary
146
      NBOUND,     -- no        Match "" at any word non-boundary
147
 
148
      --  Parenthesis groups handling
149
 
150
      REFF,       -- num       Match some already matched string, folded
151
      OPEN,       -- num       Mark this point in input as start of #n
152
      CLOSE);     -- num       Analogous to OPEN
153
 
154
   for Opcode'Size use 8;
155
 
156
   --  Opcode notes:
157
 
158
   --  BRANCH
159
   --    The set of branches constituting a single choice are hooked
160
   --    together with their "next" pointers, since precedence prevents
161
   --    anything being concatenated to any individual branch. The
162
   --    "next" pointer of the last BRANCH in a choice points to the
163
   --    thing following the whole choice. This is also where the
164
   --    final "next" pointer of each individual branch points; each
165
   --    branch starts with the operand node of a BRANCH node.
166
 
167
   --  STAR,PLUS
168
   --    '?', and complex '*' and '+', are implemented with CURLYX.
169
   --    branches. Simple cases (one character per match) are implemented with
170
   --    STAR and PLUS for speed and to minimize recursive plunges.
171
 
172
   --  OPEN,CLOSE
173
   --    ...are numbered at compile time.
174
 
175
   --  EXACT, EXACTF
176
   --    There are in fact two arguments, the first one is the length (minus
177
   --    one of the string argument), coded on one character, the second
178
   --    argument is the string itself, coded on length + 1 characters.
179
 
180
   --  A node is one char of opcode followed by two chars of "next" pointer.
181
   --  "Next" pointers are stored as two 8-bit pieces, high order first. The
182
   --  value is a positive offset from the opcode of the node containing it.
183
   --  An operand, if any, simply follows the node. (Note that much of the
184
   --  code generation knows about this implicit relationship.)
185
 
186
   --  Using two bytes for the "next" pointer is vast overkill for most
187
   --  things, but allows patterns to get big without disasters.
188
 
189
   -----------------------
190
   -- Character classes --
191
   -----------------------
192
   --  This is the implementation for character classes ([...]) in the
193
   --  syntax for regular expressions. Each character (0..256) has an
194
   --  entry into the table. This makes for a very fast matching
195
   --  algorithm.
196
 
197
   type Class_Byte is mod 256;
198
   type Character_Class is array (Class_Byte range 0 .. 31) of Class_Byte;
199
 
200
   type Bit_Conversion_Array is array (Class_Byte range 0 .. 7) of Class_Byte;
201
   Bit_Conversion : constant Bit_Conversion_Array :=
202
                      (1, 2, 4, 8, 16, 32, 64, 128);
203
 
204
   type Std_Class is (ANYOF_NONE,
205
                      ANYOF_ALNUM,   --  Alphanumeric class [a-zA-Z0-9]
206
                      ANYOF_NALNUM,
207
                      ANYOF_SPACE,   --  Space class [ \t\n\r\f]
208
                      ANYOF_NSPACE,
209
                      ANYOF_DIGIT,   --  Digit class [0-9]
210
                      ANYOF_NDIGIT,
211
                      ANYOF_ALNUMC,  --  Alphanumeric class [a-zA-Z0-9]
212
                      ANYOF_NALNUMC,
213
                      ANYOF_ALPHA,   --  Alpha class [a-zA-Z]
214
                      ANYOF_NALPHA,
215
                      ANYOF_ASCII,   --  Ascii class (7 bits) 0..127
216
                      ANYOF_NASCII,
217
                      ANYOF_CNTRL,   --  Control class
218
                      ANYOF_NCNTRL,
219
                      ANYOF_GRAPH,   --  Graphic class
220
                      ANYOF_NGRAPH,
221
                      ANYOF_LOWER,   --  Lower case class [a-z]
222
                      ANYOF_NLOWER,
223
                      ANYOF_PRINT,   --  printable class
224
                      ANYOF_NPRINT,
225
                      ANYOF_PUNCT,   --
226
                      ANYOF_NPUNCT,
227
                      ANYOF_UPPER,   --  Upper case class [A-Z]
228
                      ANYOF_NUPPER,
229
                      ANYOF_XDIGIT,  --  Hexadecimal digit
230
                      ANYOF_NXDIGIT
231
                      );
232
 
233
   procedure Set_In_Class
234
     (Bitmap : in out Character_Class;
235
      C      : Character);
236
   --  Set the entry to True for C in the class Bitmap
237
 
238
   function Get_From_Class
239
     (Bitmap : Character_Class;
240
      C      : Character) return Boolean;
241
   --  Return True if the entry is set for C in the class Bitmap
242
 
243
   procedure Reset_Class (Bitmap : out Character_Class);
244
   --  Clear all the entries in the class Bitmap
245
 
246
   pragma Inline (Set_In_Class);
247
   pragma Inline (Get_From_Class);
248
   pragma Inline (Reset_Class);
249
 
250
   -----------------------
251
   -- Local Subprograms --
252
   -----------------------
253
 
254
   function "=" (Left : Character; Right : Opcode) return Boolean;
255
 
256
   function Is_Alnum (C : Character) return Boolean;
257
   --  Return True if C is an alphanum character or an underscore ('_')
258
 
259
   function Is_White_Space (C : Character) return Boolean;
260
   --  Return True if C is a whitespace character
261
 
262
   function Is_Printable (C : Character) return Boolean;
263
   --  Return True if C is a printable character
264
 
265
   function Operand (P : Pointer) return Pointer;
266
   --  Return a pointer to the first operand of the node at P
267
 
268
   function String_Length
269
     (Program : Program_Data;
270
      P       : Pointer) return Program_Size;
271
   --  Return the length of the string argument of the node at P
272
 
273
   function String_Operand (P : Pointer) return Pointer;
274
   --  Return a pointer to the string argument of the node at P
275
 
276
   procedure Bitmap_Operand
277
     (Program : Program_Data;
278
      P       : Pointer;
279
      Op      : out Character_Class);
280
   --  Return a pointer to the string argument of the node at P
281
 
282
   function Get_Next_Offset
283
     (Program : Program_Data;
284
      IP      : Pointer) return Pointer;
285
   --  Get the offset field of a node. Used by Get_Next
286
 
287
   function Get_Next
288
     (Program : Program_Data;
289
      IP      : Pointer) return Pointer;
290
   --  Dig the next instruction pointer out of a node
291
 
292
   procedure Optimize (Self : in out Pattern_Matcher);
293
   --  Optimize a Pattern_Matcher by noting certain special cases
294
 
295
   function Read_Natural
296
     (Program : Program_Data;
297
      IP      : Pointer) return Natural;
298
   --  Return the 2-byte natural coded at position IP
299
 
300
   --  All of the subprograms above are tiny and should be inlined
301
 
302
   pragma Inline ("=");
303
   pragma Inline (Is_Alnum);
304
   pragma Inline (Is_White_Space);
305
   pragma Inline (Get_Next);
306
   pragma Inline (Get_Next_Offset);
307
   pragma Inline (Operand);
308
   pragma Inline (Read_Natural);
309
   pragma Inline (String_Length);
310
   pragma Inline (String_Operand);
311
 
312
   type Expression_Flags is record
313
      Has_Width,            -- Known never to match null string
314
      Simple,               -- Simple enough to be STAR/PLUS operand
315
      SP_Start  : Boolean;  -- Starts with * or +
316
   end record;
317
 
318
   Worst_Expression : constant Expression_Flags := (others => False);
319
   --  Worst case
320
 
321
   ---------
322
   -- "=" --
323
   ---------
324
 
325
   function "=" (Left : Character; Right : Opcode) return Boolean is
326
   begin
327
      return Character'Pos (Left) = Opcode'Pos (Right);
328
   end "=";
329
 
330
   --------------------
331
   -- Bitmap_Operand --
332
   --------------------
333
 
334
   procedure Bitmap_Operand
335
     (Program : Program_Data;
336
      P       : Pointer;
337
      Op      : out Character_Class)
338
   is
339
      function Convert is new Ada.Unchecked_Conversion
340
        (Program_Data, Character_Class);
341
 
342
   begin
343
      Op (0 .. 31) := Convert (Program (P + 3 .. P + 34));
344
   end Bitmap_Operand;
345
 
346
   -------------
347
   -- Compile --
348
   -------------
349
 
350
   procedure Compile
351
     (Matcher         : out Pattern_Matcher;
352
      Expression      : String;
353
      Final_Code_Size : out Program_Size;
354
      Flags           : Regexp_Flags := No_Flags)
355
   is
356
      --  We can't allocate space until we know how big the compiled form
357
      --  will be, but we can't compile it (and thus know how big it is)
358
      --  until we've got a place to put the code. So we cheat: we compile
359
      --  it twice, once with code generation turned off and size counting
360
      --  turned on, and once "for real".
361
 
362
      --  This also means that we don't allocate space until we are sure
363
      --  that the thing really will compile successfully, and we never
364
      --  have to move the code and thus invalidate pointers into it.
365
 
366
      --  Beware that the optimization-preparation code in here knows
367
      --  about some of the structure of the compiled regexp.
368
 
369
      PM        : Pattern_Matcher renames Matcher;
370
      Program   : Program_Data renames PM.Program;
371
 
372
      Emit_Code : constant Boolean := PM.Size > 0;
373
      Emit_Ptr  : Pointer := Program_First;
374
 
375
      Parse_Pos : Natural := Expression'First; -- Input-scan pointer
376
      Parse_End : constant Natural := Expression'Last;
377
 
378
      ----------------------------
379
      -- Subprograms for Create --
380
      ----------------------------
381
 
382
      procedure Emit (B : Character);
383
      --  Output the Character B to the Program. If code-generation is
384
      --  disabled, simply increments the program counter.
385
 
386
      function  Emit_Node (Op : Opcode) return Pointer;
387
      --  If code-generation is enabled, Emit_Node outputs the
388
      --  opcode Op and reserves space for a pointer to the next node.
389
      --  Return value is the location of new opcode, i.e. old Emit_Ptr.
390
 
391
      procedure Emit_Natural (IP : Pointer; N : Natural);
392
      --  Split N on two characters at position IP
393
 
394
      procedure Emit_Class (Bitmap : Character_Class);
395
      --  Emits a character class
396
 
397
      procedure Case_Emit (C : Character);
398
      --  Emit C, after converting is to lower-case if the regular
399
      --  expression is case insensitive.
400
 
401
      procedure Parse
402
        (Parenthesized : Boolean;
403
         Flags         : out Expression_Flags;
404
         IP            : out Pointer);
405
      --  Parse regular expression, i.e. main body or parenthesized thing
406
      --  Caller must absorb opening parenthesis.
407
 
408
      procedure Parse_Branch
409
        (Flags         : out Expression_Flags;
410
         First         : Boolean;
411
         IP            : out Pointer);
412
      --  Implements the concatenation operator and handles '|'
413
      --  First should be true if this is the first item of the alternative.
414
 
415
      procedure Parse_Piece
416
        (Expr_Flags : out Expression_Flags;
417
         IP         : out Pointer);
418
      --  Parse something followed by possible [*+?]
419
 
420
      procedure Parse_Atom
421
        (Expr_Flags : out Expression_Flags;
422
         IP         : out Pointer);
423
      --  Parse_Atom is the lowest level parse procedure.
424
      --  Optimization:  gobbles an entire sequence of ordinary characters
425
      --  so that it can turn them into a single node, which is smaller to
426
      --  store and faster to run. Backslashed characters are exceptions,
427
      --  each becoming a separate node; the code is simpler that way and
428
      --  it's not worth fixing.
429
 
430
      procedure Insert_Operator
431
        (Op       : Opcode;
432
         Operand  : Pointer;
433
         Greedy   : Boolean := True);
434
      --  Insert_Operator inserts an operator in front of an
435
      --  already-emitted operand and relocates the operand.
436
      --  This applies to PLUS and STAR.
437
      --  If Minmod is True, then the operator is non-greedy.
438
 
439
      procedure Insert_Curly_Operator
440
        (Op      : Opcode;
441
         Min     : Natural;
442
         Max     : Natural;
443
         Operand : Pointer;
444
         Greedy  : Boolean := True);
445
      --  Insert an operator for CURLY ({Min}, {Min,} or {Min,Max}).
446
      --  If Minmod is True, then the operator is non-greedy.
447
 
448
      procedure Link_Tail (P, Val : Pointer);
449
      --  Link_Tail sets the next-pointer at the end of a node chain
450
 
451
      procedure Link_Operand_Tail (P, Val : Pointer);
452
      --  Link_Tail on operand of first argument; noop if operand-less
453
 
454
      function  Next_Instruction (P : Pointer) return Pointer;
455
      --  Dig the "next" pointer out of a node
456
 
457
      procedure Fail (M : String);
458
      pragma No_Return (Fail);
459
      --  Fail with a diagnostic message, if possible
460
 
461
      function Is_Curly_Operator (IP : Natural) return Boolean;
462
      --  Return True if IP is looking at a '{' that is the beginning
463
      --  of a curly operator, i.e. it matches {\d+,?\d*}
464
 
465
      function Is_Mult (IP : Natural) return Boolean;
466
      --  Return True if C is a regexp multiplier: '+', '*' or '?'
467
 
468
      procedure Get_Curly_Arguments
469
        (IP     : Natural;
470
         Min    : out Natural;
471
         Max    : out Natural;
472
         Greedy : out Boolean);
473
      --  Parse the argument list for a curly operator.
474
      --  It is assumed that IP is indeed pointing at a valid operator.
475
      --  So what is IP and how come IP is not referenced in the body ???
476
 
477
      procedure Parse_Character_Class (IP : out Pointer);
478
      --  Parse a character class.
479
      --  The calling subprogram should consume the opening '[' before.
480
 
481
      procedure Parse_Literal
482
        (Expr_Flags : out Expression_Flags;
483
         IP         : out Pointer);
484
      --  Parse_Literal encodes a string of characters to be matched exactly
485
 
486
      function Parse_Posix_Character_Class return Std_Class;
487
      --  Parse a posix character class, like [:alpha:] or [:^alpha:].
488
      --  The caller is supposed to absorb the opening [.
489
 
490
      pragma Inline (Is_Mult);
491
      pragma Inline (Emit_Natural);
492
      pragma Inline (Parse_Character_Class); --  since used only once
493
 
494
      ---------------
495
      -- Case_Emit --
496
      ---------------
497
 
498
      procedure Case_Emit (C : Character) is
499
      begin
500
         if (Flags and Case_Insensitive) /= 0 then
501
            Emit (To_Lower (C));
502
 
503
         else
504
            --  Dump current character
505
 
506
            Emit (C);
507
         end if;
508
      end Case_Emit;
509
 
510
      ----------
511
      -- Emit --
512
      ----------
513
 
514
      procedure Emit (B : Character) is
515
      begin
516
         if Emit_Code then
517
            Program (Emit_Ptr) := B;
518
         end if;
519
 
520
         Emit_Ptr := Emit_Ptr + 1;
521
      end Emit;
522
 
523
      ----------------
524
      -- Emit_Class --
525
      ----------------
526
 
527
      procedure Emit_Class (Bitmap : Character_Class) is
528
         subtype Program31 is Program_Data (0 .. 31);
529
 
530
         function Convert is new Ada.Unchecked_Conversion
531
           (Character_Class, Program31);
532
 
533
      begin
534
         if Emit_Code then
535
            Program (Emit_Ptr .. Emit_Ptr + 31) := Convert (Bitmap);
536
         end if;
537
 
538
         Emit_Ptr := Emit_Ptr + 32;
539
      end Emit_Class;
540
 
541
      ------------------
542
      -- Emit_Natural --
543
      ------------------
544
 
545
      procedure Emit_Natural (IP : Pointer; N : Natural) is
546
      begin
547
         if Emit_Code then
548
            Program (IP + 1) := Character'Val (N / 256);
549
            Program (IP) := Character'Val (N mod 256);
550
         end if;
551
      end Emit_Natural;
552
 
553
      ---------------
554
      -- Emit_Node --
555
      ---------------
556
 
557
      function Emit_Node (Op : Opcode) return Pointer is
558
         Result : constant Pointer := Emit_Ptr;
559
 
560
      begin
561
         if Emit_Code then
562
            Program (Emit_Ptr) := Character'Val (Opcode'Pos (Op));
563
            Program (Emit_Ptr + 1) := ASCII.NUL;
564
            Program (Emit_Ptr + 2) := ASCII.NUL;
565
         end if;
566
 
567
         Emit_Ptr := Emit_Ptr + 3;
568
         return Result;
569
      end Emit_Node;
570
 
571
      ----------
572
      -- Fail --
573
      ----------
574
 
575
      procedure Fail (M : String) is
576
      begin
577
         raise Expression_Error with M;
578
      end Fail;
579
 
580
      -------------------------
581
      -- Get_Curly_Arguments --
582
      -------------------------
583
 
584
      procedure Get_Curly_Arguments
585
        (IP     : Natural;
586
         Min    : out Natural;
587
         Max    : out Natural;
588
         Greedy : out Boolean)
589
      is
590
         pragma Unreferenced (IP);
591
 
592
         Save_Pos : Natural := Parse_Pos + 1;
593
 
594
      begin
595
         Min := 0;
596
         Max := Max_Curly_Repeat;
597
 
598
         while Expression (Parse_Pos) /= '}'
599
           and then Expression (Parse_Pos) /= ','
600
         loop
601
            Parse_Pos := Parse_Pos + 1;
602
         end loop;
603
 
604
         Min := Natural'Value (Expression (Save_Pos .. Parse_Pos - 1));
605
 
606
         if Expression (Parse_Pos) = ',' then
607
            Save_Pos := Parse_Pos + 1;
608
            while Expression (Parse_Pos) /= '}' loop
609
               Parse_Pos := Parse_Pos + 1;
610
            end loop;
611
 
612
            if Save_Pos /= Parse_Pos then
613
               Max := Natural'Value (Expression (Save_Pos .. Parse_Pos - 1));
614
            end if;
615
 
616
         else
617
            Max := Min;
618
         end if;
619
 
620
         if Parse_Pos < Expression'Last
621
           and then Expression (Parse_Pos + 1) = '?'
622
         then
623
            Greedy := False;
624
            Parse_Pos := Parse_Pos + 1;
625
 
626
         else
627
            Greedy := True;
628
         end if;
629
      end Get_Curly_Arguments;
630
 
631
      ---------------------------
632
      -- Insert_Curly_Operator --
633
      ---------------------------
634
 
635
      procedure Insert_Curly_Operator
636
        (Op      : Opcode;
637
         Min     : Natural;
638
         Max     : Natural;
639
         Operand : Pointer;
640
         Greedy  : Boolean := True)
641
      is
642
         Dest   : constant Pointer := Emit_Ptr;
643
         Old    : Pointer;
644
         Size   : Pointer := 7;
645
 
646
      begin
647
         --  If the operand is not greedy, insert an extra operand before it
648
 
649
         if not Greedy then
650
            Size := Size + 3;
651
         end if;
652
 
653
         --  Move the operand in the byte-compilation, so that we can insert
654
         --  the operator before it.
655
 
656
         if Emit_Code then
657
            Program (Operand + Size .. Emit_Ptr + Size) :=
658
              Program (Operand .. Emit_Ptr);
659
         end if;
660
 
661
         --  Insert the operator at the position previously occupied by the
662
         --  operand.
663
 
664
         Emit_Ptr := Operand;
665
 
666
         if not Greedy then
667
            Old := Emit_Node (MINMOD);
668
            Link_Tail (Old, Old + 3);
669
         end if;
670
 
671
         Old := Emit_Node (Op);
672
         Emit_Natural (Old + 3, Min);
673
         Emit_Natural (Old + 5, Max);
674
 
675
         Emit_Ptr := Dest + Size;
676
      end Insert_Curly_Operator;
677
 
678
      ---------------------
679
      -- Insert_Operator --
680
      ---------------------
681
 
682
      procedure Insert_Operator
683
        (Op      : Opcode;
684
         Operand : Pointer;
685
         Greedy  : Boolean := True)
686
      is
687
         Dest : constant Pointer := Emit_Ptr;
688
         Old  : Pointer;
689
         Size : Pointer := 3;
690
 
691
         Discard : Pointer;
692
         pragma Warnings (Off, Discard);
693
 
694
      begin
695
         --  If not greedy, we have to emit another opcode first
696
 
697
         if not Greedy then
698
            Size := Size + 3;
699
         end if;
700
 
701
         --  Move the operand in the byte-compilation, so that we can insert
702
         --  the operator before it.
703
 
704
         if Emit_Code then
705
            Program (Operand + Size .. Emit_Ptr + Size) :=
706
              Program (Operand .. Emit_Ptr);
707
         end if;
708
 
709
         --  Insert the operator at the position previously occupied by the
710
         --  operand.
711
 
712
         Emit_Ptr := Operand;
713
 
714
         if not Greedy then
715
            Old := Emit_Node (MINMOD);
716
            Link_Tail (Old, Old + 3);
717
         end if;
718
 
719
         Discard := Emit_Node (Op);
720
         Emit_Ptr := Dest + Size;
721
      end Insert_Operator;
722
 
723
      -----------------------
724
      -- Is_Curly_Operator --
725
      -----------------------
726
 
727
      function Is_Curly_Operator (IP : Natural) return Boolean is
728
         Scan : Natural := IP;
729
 
730
      begin
731
         if Expression (Scan) /= '{'
732
           or else Scan + 2 > Expression'Last
733
           or else not Is_Digit (Expression (Scan + 1))
734
         then
735
            return False;
736
         end if;
737
 
738
         Scan := Scan + 1;
739
 
740
         --  The first digit
741
 
742
         loop
743
            Scan := Scan + 1;
744
 
745
            if Scan > Expression'Last then
746
               return False;
747
            end if;
748
 
749
            exit when not Is_Digit (Expression (Scan));
750
         end loop;
751
 
752
         if Expression (Scan) = ',' then
753
            loop
754
               Scan := Scan + 1;
755
 
756
               if Scan > Expression'Last then
757
                  return False;
758
               end if;
759
 
760
               exit when not Is_Digit (Expression (Scan));
761
            end loop;
762
         end if;
763
 
764
         return Expression (Scan) = '}';
765
      end Is_Curly_Operator;
766
 
767
      -------------
768
      -- Is_Mult --
769
      -------------
770
 
771
      function Is_Mult (IP : Natural) return Boolean is
772
         C : constant Character := Expression (IP);
773
 
774
      begin
775
         return     C = '*'
776
           or else  C = '+'
777
           or else  C = '?'
778
           or else (C = '{' and then Is_Curly_Operator (IP));
779
      end Is_Mult;
780
 
781
      -----------------------
782
      -- Link_Operand_Tail --
783
      -----------------------
784
 
785
      procedure Link_Operand_Tail (P, Val : Pointer) is
786
      begin
787
         if Emit_Code and then Program (P) = BRANCH then
788
            Link_Tail (Operand (P), Val);
789
         end if;
790
      end Link_Operand_Tail;
791
 
792
      ---------------
793
      -- Link_Tail --
794
      ---------------
795
 
796
      procedure Link_Tail (P, Val : Pointer) is
797
         Scan   : Pointer;
798
         Temp   : Pointer;
799
         Offset : Pointer;
800
 
801
      begin
802
         if not Emit_Code then
803
            return;
804
         end if;
805
 
806
         --  Find last node
807
 
808
         Scan := P;
809
         loop
810
            Temp := Next_Instruction (Scan);
811
            exit when Temp = 0;
812
            Scan := Temp;
813
         end loop;
814
 
815
         Offset := Val - Scan;
816
 
817
         Emit_Natural (Scan + 1, Natural (Offset));
818
      end Link_Tail;
819
 
820
      ----------------------
821
      -- Next_Instruction --
822
      ----------------------
823
 
824
      function Next_Instruction (P : Pointer) return Pointer is
825
         Offset : Pointer;
826
 
827
      begin
828
         if not Emit_Code then
829
            return 0;
830
         end if;
831
 
832
         Offset := Get_Next_Offset (Program, P);
833
 
834
         if Offset = 0 then
835
            return 0;
836
         end if;
837
 
838
         return P + Offset;
839
      end Next_Instruction;
840
 
841
      -----------
842
      -- Parse --
843
      -----------
844
 
845
      --  Combining parenthesis handling with the base level
846
      --  of regular expression is a trifle forced, but the
847
      --  need to tie the tails of the branches to what follows
848
      --  makes it hard to avoid.
849
 
850
      procedure Parse
851
        (Parenthesized  : Boolean;
852
         Flags          : out Expression_Flags;
853
         IP             : out Pointer)
854
      is
855
         E              : String renames Expression;
856
         Br             : Pointer;
857
         Ender          : Pointer;
858
         Par_No         : Natural;
859
         New_Flags      : Expression_Flags;
860
         Have_Branch    : Boolean := False;
861
 
862
      begin
863
         Flags := (Has_Width => True, others => False);  -- Tentatively
864
 
865
         --  Make an OPEN node, if parenthesized
866
 
867
         if Parenthesized then
868
            if Matcher.Paren_Count > Max_Paren_Count then
869
               Fail ("too many ()");
870
            end if;
871
 
872
            Par_No := Matcher.Paren_Count + 1;
873
            Matcher.Paren_Count := Matcher.Paren_Count + 1;
874
            IP := Emit_Node (OPEN);
875
            Emit (Character'Val (Par_No));
876
 
877
         else
878
            IP := 0;
879
            Par_No := 0;
880
         end if;
881
 
882
         --  Pick up the branches, linking them together
883
 
884
         Parse_Branch (New_Flags, True, Br);
885
 
886
         if Br = 0 then
887
            IP := 0;
888
            return;
889
         end if;
890
 
891
         if Parse_Pos <= Parse_End
892
           and then E (Parse_Pos) = '|'
893
         then
894
            Insert_Operator (BRANCH, Br);
895
            Have_Branch := True;
896
         end if;
897
 
898
         if IP /= 0 then
899
            Link_Tail (IP, Br);   -- OPEN -> first
900
         else
901
            IP := Br;
902
         end if;
903
 
904
         if not New_Flags.Has_Width then
905
            Flags.Has_Width := False;
906
         end if;
907
 
908
         Flags.SP_Start := Flags.SP_Start or else New_Flags.SP_Start;
909
 
910
         while Parse_Pos <= Parse_End
911
           and then (E (Parse_Pos) = '|')
912
         loop
913
            Parse_Pos := Parse_Pos + 1;
914
            Parse_Branch (New_Flags, False, Br);
915
 
916
            if Br = 0 then
917
               IP := 0;
918
               return;
919
            end if;
920
 
921
            Link_Tail (IP, Br);   -- BRANCH -> BRANCH
922
 
923
            if not New_Flags.Has_Width then
924
               Flags.Has_Width := False;
925
            end if;
926
 
927
            Flags.SP_Start := Flags.SP_Start or else New_Flags.SP_Start;
928
         end loop;
929
 
930
         --  Make a closing node, and hook it on the end
931
 
932
         if Parenthesized then
933
            Ender := Emit_Node (CLOSE);
934
            Emit (Character'Val (Par_No));
935
         else
936
            Ender := Emit_Node (EOP);
937
         end if;
938
 
939
         Link_Tail (IP, Ender);
940
 
941
         if Have_Branch then
942
 
943
            --  Hook the tails of the branches to the closing node
944
 
945
            Br := IP;
946
            loop
947
               exit when Br = 0;
948
               Link_Operand_Tail (Br, Ender);
949
               Br := Next_Instruction (Br);
950
            end loop;
951
         end if;
952
 
953
         --  Check for proper termination
954
 
955
         if Parenthesized then
956
            if Parse_Pos > Parse_End or else E (Parse_Pos) /= ')' then
957
               Fail ("unmatched ()");
958
            end if;
959
 
960
            Parse_Pos := Parse_Pos + 1;
961
 
962
         elsif Parse_Pos <= Parse_End then
963
            if E (Parse_Pos) = ')'  then
964
               Fail ("unmatched ()");
965
            else
966
               Fail ("junk on end");         -- "Can't happen"
967
            end if;
968
         end if;
969
      end Parse;
970
 
971
      ----------------
972
      -- Parse_Atom --
973
      ----------------
974
 
975
      procedure Parse_Atom
976
        (Expr_Flags : out Expression_Flags;
977
         IP         : out Pointer)
978
      is
979
         C : Character;
980
 
981
      begin
982
         --  Tentatively set worst expression case
983
 
984
         Expr_Flags := Worst_Expression;
985
 
986
         C := Expression (Parse_Pos);
987
         Parse_Pos := Parse_Pos + 1;
988
 
989
         case (C) is
990
            when '^' =>
991
               IP :=
992
                 Emit_Node
993
                   (if (Flags and Multiple_Lines) /= 0 then MBOL
994
                    elsif (Flags and Single_Line) /= 0 then SBOL
995
                    else BOL);
996
 
997
            when '$' =>
998
               IP :=
999
                 Emit_Node
1000
                   (if (Flags and Multiple_Lines) /= 0 then MEOL
1001
                    elsif (Flags and Single_Line) /= 0 then SEOL
1002
                    else EOL);
1003
 
1004
            when '.' =>
1005
               IP :=
1006
                 Emit_Node
1007
                   (if (Flags and Single_Line) /= 0 then SANY else ANY);
1008
 
1009
               Expr_Flags.Has_Width := True;
1010
               Expr_Flags.Simple := True;
1011
 
1012
            when '[' =>
1013
               Parse_Character_Class (IP);
1014
               Expr_Flags.Has_Width := True;
1015
               Expr_Flags.Simple := True;
1016
 
1017
            when '(' =>
1018
               declare
1019
                  New_Flags : Expression_Flags;
1020
 
1021
               begin
1022
                  Parse (True, New_Flags, IP);
1023
 
1024
                  if IP = 0 then
1025
                     return;
1026
                  end if;
1027
 
1028
                  Expr_Flags.Has_Width :=
1029
                    Expr_Flags.Has_Width or else New_Flags.Has_Width;
1030
                  Expr_Flags.SP_Start :=
1031
                    Expr_Flags.SP_Start or else New_Flags.SP_Start;
1032
               end;
1033
 
1034
            when '|' | ASCII.LF | ')' =>
1035
               Fail ("internal urp");  --  Supposed to be caught earlier
1036
 
1037
            when '?' | '+' | '*' =>
1038
               Fail (C & " follows nothing");
1039
 
1040
            when '{' =>
1041
               if Is_Curly_Operator (Parse_Pos - 1) then
1042
                  Fail (C & " follows nothing");
1043
               else
1044
                  Parse_Literal (Expr_Flags, IP);
1045
               end if;
1046
 
1047
            when '\' =>
1048
               if Parse_Pos > Parse_End then
1049
                  Fail ("trailing \");
1050
               end if;
1051
 
1052
               Parse_Pos := Parse_Pos + 1;
1053
 
1054
               case Expression (Parse_Pos - 1) is
1055
                  when 'b'        =>
1056
                     IP := Emit_Node (BOUND);
1057
 
1058
                  when 'B'        =>
1059
                     IP := Emit_Node (NBOUND);
1060
 
1061
                  when 's'        =>
1062
                     IP := Emit_Node (SPACE);
1063
                     Expr_Flags.Simple := True;
1064
                     Expr_Flags.Has_Width := True;
1065
 
1066
                  when 'S'        =>
1067
                     IP := Emit_Node (NSPACE);
1068
                     Expr_Flags.Simple := True;
1069
                     Expr_Flags.Has_Width := True;
1070
 
1071
                  when 'd'        =>
1072
                     IP := Emit_Node (DIGIT);
1073
                     Expr_Flags.Simple := True;
1074
                     Expr_Flags.Has_Width := True;
1075
 
1076
                  when 'D'        =>
1077
                     IP := Emit_Node (NDIGIT);
1078
                     Expr_Flags.Simple := True;
1079
                     Expr_Flags.Has_Width := True;
1080
 
1081
                  when 'w'        =>
1082
                     IP := Emit_Node (ALNUM);
1083
                     Expr_Flags.Simple := True;
1084
                     Expr_Flags.Has_Width := True;
1085
 
1086
                  when 'W'        =>
1087
                     IP := Emit_Node (NALNUM);
1088
                     Expr_Flags.Simple := True;
1089
                     Expr_Flags.Has_Width := True;
1090
 
1091
                  when 'A'        =>
1092
                     IP := Emit_Node (SBOL);
1093
 
1094
                  when 'G'        =>
1095
                     IP := Emit_Node (SEOL);
1096
 
1097
                  when '0' .. '9' =>
1098
                     IP := Emit_Node (REFF);
1099
 
1100
                     declare
1101
                        Save : constant Natural := Parse_Pos - 1;
1102
 
1103
                     begin
1104
                        while Parse_Pos <= Expression'Last
1105
                          and then Is_Digit (Expression (Parse_Pos))
1106
                        loop
1107
                           Parse_Pos := Parse_Pos + 1;
1108
                        end loop;
1109
 
1110
                        Emit (Character'Val (Natural'Value
1111
                               (Expression (Save .. Parse_Pos - 1))));
1112
                     end;
1113
 
1114
                  when others =>
1115
                     Parse_Pos := Parse_Pos - 1;
1116
                     Parse_Literal (Expr_Flags, IP);
1117
               end case;
1118
 
1119
            when others =>
1120
               Parse_Literal (Expr_Flags, IP);
1121
         end case;
1122
      end Parse_Atom;
1123
 
1124
      ------------------
1125
      -- Parse_Branch --
1126
      ------------------
1127
 
1128
      procedure Parse_Branch
1129
        (Flags : out Expression_Flags;
1130
         First : Boolean;
1131
         IP    : out Pointer)
1132
      is
1133
         E         : String renames Expression;
1134
         Chain     : Pointer;
1135
         Last      : Pointer;
1136
         New_Flags : Expression_Flags;
1137
 
1138
         Discard : Pointer;
1139
         pragma Warnings (Off, Discard);
1140
 
1141
      begin
1142
         Flags := Worst_Expression;    -- Tentatively
1143
         IP := (if First then Emit_Ptr else Emit_Node (BRANCH));
1144
 
1145
         Chain := 0;
1146
         while Parse_Pos <= Parse_End
1147
           and then E (Parse_Pos) /= ')'
1148
           and then E (Parse_Pos) /= ASCII.LF
1149
           and then E (Parse_Pos) /= '|'
1150
         loop
1151
            Parse_Piece (New_Flags, Last);
1152
 
1153
            if Last = 0 then
1154
               IP := 0;
1155
               return;
1156
            end if;
1157
 
1158
            Flags.Has_Width := Flags.Has_Width or else New_Flags.Has_Width;
1159
 
1160
            if Chain = 0 then            -- First piece
1161
               Flags.SP_Start := Flags.SP_Start or else New_Flags.SP_Start;
1162
            else
1163
               Link_Tail (Chain, Last);
1164
            end if;
1165
 
1166
            Chain := Last;
1167
         end loop;
1168
 
1169
         --  Case where loop ran zero CURLY
1170
 
1171
         if Chain = 0 then
1172
            Discard := Emit_Node (NOTHING);
1173
         end if;
1174
      end Parse_Branch;
1175
 
1176
      ---------------------------
1177
      -- Parse_Character_Class --
1178
      ---------------------------
1179
 
1180
      procedure Parse_Character_Class (IP : out Pointer) is
1181
         Bitmap      : Character_Class;
1182
         Invert      : Boolean := False;
1183
         In_Range    : Boolean := False;
1184
         Named_Class : Std_Class := ANYOF_NONE;
1185
         Value       : Character;
1186
         Last_Value  : Character := ASCII.NUL;
1187
 
1188
      begin
1189
         Reset_Class (Bitmap);
1190
 
1191
         --  Do we have an invert character class ?
1192
 
1193
         if Parse_Pos <= Parse_End
1194
           and then Expression (Parse_Pos) = '^'
1195
         then
1196
            Invert := True;
1197
            Parse_Pos := Parse_Pos + 1;
1198
         end if;
1199
 
1200
         --  First character can be ] or - without closing the class
1201
 
1202
         if Parse_Pos <= Parse_End
1203
           and then (Expression (Parse_Pos) = ']'
1204
                      or else Expression (Parse_Pos) = '-')
1205
         then
1206
            Set_In_Class (Bitmap, Expression (Parse_Pos));
1207
            Parse_Pos := Parse_Pos + 1;
1208
         end if;
1209
 
1210
         --  While we don't have the end of the class
1211
 
1212
         while Parse_Pos <= Parse_End
1213
           and then Expression (Parse_Pos) /= ']'
1214
         loop
1215
            Named_Class := ANYOF_NONE;
1216
            Value := Expression (Parse_Pos);
1217
            Parse_Pos := Parse_Pos + 1;
1218
 
1219
            --  Do we have a Posix character class
1220
            if Value = '[' then
1221
               Named_Class := Parse_Posix_Character_Class;
1222
 
1223
            elsif Value = '\' then
1224
               if Parse_Pos = Parse_End then
1225
                  Fail ("Trailing \");
1226
               end if;
1227
               Value := Expression (Parse_Pos);
1228
               Parse_Pos := Parse_Pos + 1;
1229
 
1230
               case Value is
1231
                  when 'w' => Named_Class := ANYOF_ALNUM;
1232
                  when 'W' => Named_Class := ANYOF_NALNUM;
1233
                  when 's' => Named_Class := ANYOF_SPACE;
1234
                  when 'S' => Named_Class := ANYOF_NSPACE;
1235
                  when 'd' => Named_Class := ANYOF_DIGIT;
1236
                  when 'D' => Named_Class := ANYOF_NDIGIT;
1237
                  when 'n' => Value := ASCII.LF;
1238
                  when 'r' => Value := ASCII.CR;
1239
                  when 't' => Value := ASCII.HT;
1240
                  when 'f' => Value := ASCII.FF;
1241
                  when 'e' => Value := ASCII.ESC;
1242
                  when 'a' => Value := ASCII.BEL;
1243
 
1244
                  --  when 'x'  => ??? hexadecimal value
1245
                  --  when 'c'  => ??? control character
1246
                  --  when '0'..'9' => ??? octal character
1247
 
1248
                  when others => null;
1249
               end case;
1250
            end if;
1251
 
1252
            --  Do we have a character class?
1253
 
1254
            if Named_Class /= ANYOF_NONE then
1255
 
1256
               --  A range like 'a-\d' or 'a-[:digit:] is not a range
1257
 
1258
               if In_Range then
1259
                  Set_In_Class (Bitmap, Last_Value);
1260
                  Set_In_Class (Bitmap, '-');
1261
                  In_Range := False;
1262
               end if;
1263
 
1264
               --  Expand the range
1265
 
1266
               case Named_Class is
1267
                  when ANYOF_NONE => null;
1268
 
1269
                  when ANYOF_ALNUM | ANYOF_ALNUMC =>
1270
                     for Value in Class_Byte'Range loop
1271
                        if Is_Alnum (Character'Val (Value)) then
1272
                           Set_In_Class (Bitmap, Character'Val (Value));
1273
                        end if;
1274
                     end loop;
1275
 
1276
                  when ANYOF_NALNUM | ANYOF_NALNUMC =>
1277
                     for Value in Class_Byte'Range loop
1278
                        if not Is_Alnum (Character'Val (Value)) then
1279
                           Set_In_Class (Bitmap, Character'Val (Value));
1280
                        end if;
1281
                     end loop;
1282
 
1283
                  when ANYOF_SPACE =>
1284
                     for Value in Class_Byte'Range loop
1285
                        if Is_White_Space (Character'Val (Value)) then
1286
                           Set_In_Class (Bitmap, Character'Val (Value));
1287
                        end if;
1288
                     end loop;
1289
 
1290
                  when ANYOF_NSPACE =>
1291
                     for Value in Class_Byte'Range loop
1292
                        if not Is_White_Space (Character'Val (Value)) then
1293
                           Set_In_Class (Bitmap, Character'Val (Value));
1294
                        end if;
1295
                     end loop;
1296
 
1297
                  when ANYOF_DIGIT =>
1298
                     for Value in Class_Byte'Range loop
1299
                        if Is_Digit (Character'Val (Value)) then
1300
                           Set_In_Class (Bitmap, Character'Val (Value));
1301
                        end if;
1302
                     end loop;
1303
 
1304
                  when ANYOF_NDIGIT =>
1305
                     for Value in Class_Byte'Range loop
1306
                        if not Is_Digit (Character'Val (Value)) then
1307
                           Set_In_Class (Bitmap, Character'Val (Value));
1308
                        end if;
1309
                     end loop;
1310
 
1311
                  when ANYOF_ALPHA =>
1312
                     for Value in Class_Byte'Range loop
1313
                        if Is_Letter (Character'Val (Value)) then
1314
                           Set_In_Class (Bitmap, Character'Val (Value));
1315
                        end if;
1316
                     end loop;
1317
 
1318
                  when ANYOF_NALPHA =>
1319
                     for Value in Class_Byte'Range loop
1320
                        if not Is_Letter (Character'Val (Value)) then
1321
                           Set_In_Class (Bitmap, Character'Val (Value));
1322
                        end if;
1323
                     end loop;
1324
 
1325
                  when ANYOF_ASCII =>
1326
                     for Value in 0 .. 127 loop
1327
                        Set_In_Class (Bitmap, Character'Val (Value));
1328
                     end loop;
1329
 
1330
                  when ANYOF_NASCII =>
1331
                     for Value in 128 .. 255 loop
1332
                        Set_In_Class (Bitmap, Character'Val (Value));
1333
                     end loop;
1334
 
1335
                  when ANYOF_CNTRL =>
1336
                     for Value in Class_Byte'Range loop
1337
                        if Is_Control (Character'Val (Value)) then
1338
                           Set_In_Class (Bitmap, Character'Val (Value));
1339
                        end if;
1340
                     end loop;
1341
 
1342
                  when ANYOF_NCNTRL =>
1343
                     for Value in Class_Byte'Range loop
1344
                        if not Is_Control (Character'Val (Value)) then
1345
                           Set_In_Class (Bitmap, Character'Val (Value));
1346
                        end if;
1347
                     end loop;
1348
 
1349
                  when ANYOF_GRAPH =>
1350
                     for Value in Class_Byte'Range loop
1351
                        if Is_Graphic (Character'Val (Value)) then
1352
                           Set_In_Class (Bitmap, Character'Val (Value));
1353
                        end if;
1354
                     end loop;
1355
 
1356
                  when ANYOF_NGRAPH =>
1357
                     for Value in Class_Byte'Range loop
1358
                        if not Is_Graphic (Character'Val (Value)) then
1359
                           Set_In_Class (Bitmap, Character'Val (Value));
1360
                        end if;
1361
                     end loop;
1362
 
1363
                  when ANYOF_LOWER =>
1364
                     for Value in Class_Byte'Range loop
1365
                        if Is_Lower (Character'Val (Value)) then
1366
                           Set_In_Class (Bitmap, Character'Val (Value));
1367
                        end if;
1368
                     end loop;
1369
 
1370
                  when ANYOF_NLOWER =>
1371
                     for Value in Class_Byte'Range loop
1372
                        if not Is_Lower (Character'Val (Value)) then
1373
                           Set_In_Class (Bitmap, Character'Val (Value));
1374
                        end if;
1375
                     end loop;
1376
 
1377
                  when ANYOF_PRINT =>
1378
                     for Value in Class_Byte'Range loop
1379
                        if Is_Printable (Character'Val (Value)) then
1380
                           Set_In_Class (Bitmap, Character'Val (Value));
1381
                        end if;
1382
                     end loop;
1383
 
1384
                  when ANYOF_NPRINT =>
1385
                     for Value in Class_Byte'Range loop
1386
                        if not Is_Printable (Character'Val (Value)) then
1387
                           Set_In_Class (Bitmap, Character'Val (Value));
1388
                        end if;
1389
                     end loop;
1390
 
1391
                  when ANYOF_PUNCT =>
1392
                     for Value in Class_Byte'Range loop
1393
                        if Is_Printable (Character'Val (Value))
1394
                          and then not Is_White_Space (Character'Val (Value))
1395
                          and then not Is_Alnum (Character'Val (Value))
1396
                        then
1397
                           Set_In_Class (Bitmap, Character'Val (Value));
1398
                        end if;
1399
                     end loop;
1400
 
1401
                  when ANYOF_NPUNCT =>
1402
                     for Value in Class_Byte'Range loop
1403
                        if not Is_Printable (Character'Val (Value))
1404
                          or else Is_White_Space (Character'Val (Value))
1405
                          or else Is_Alnum (Character'Val (Value))
1406
                        then
1407
                           Set_In_Class (Bitmap, Character'Val (Value));
1408
                        end if;
1409
                     end loop;
1410
 
1411
                  when ANYOF_UPPER =>
1412
                     for Value in Class_Byte'Range loop
1413
                        if Is_Upper (Character'Val (Value)) then
1414
                           Set_In_Class (Bitmap, Character'Val (Value));
1415
                        end if;
1416
                     end loop;
1417
 
1418
                  when ANYOF_NUPPER =>
1419
                     for Value in Class_Byte'Range loop
1420
                        if not Is_Upper (Character'Val (Value)) then
1421
                           Set_In_Class (Bitmap, Character'Val (Value));
1422
                        end if;
1423
                     end loop;
1424
 
1425
                  when ANYOF_XDIGIT =>
1426
                     for Value in Class_Byte'Range loop
1427
                        if Is_Hexadecimal_Digit (Character'Val (Value)) then
1428
                           Set_In_Class (Bitmap, Character'Val (Value));
1429
                        end if;
1430
                     end loop;
1431
 
1432
                  when ANYOF_NXDIGIT =>
1433
                     for Value in Class_Byte'Range loop
1434
                        if not Is_Hexadecimal_Digit
1435
                          (Character'Val (Value))
1436
                        then
1437
                           Set_In_Class (Bitmap, Character'Val (Value));
1438
                        end if;
1439
                     end loop;
1440
 
1441
               end case;
1442
 
1443
            --  Not a character range
1444
 
1445
            elsif not In_Range then
1446
               Last_Value := Value;
1447
 
1448
               if Parse_Pos > Expression'Last then
1449
                  Fail ("Empty character class []");
1450
               end if;
1451
 
1452
               if Expression (Parse_Pos) = '-'
1453
                 and then Parse_Pos < Parse_End
1454
                 and then Expression (Parse_Pos + 1) /= ']'
1455
               then
1456
                  Parse_Pos := Parse_Pos + 1;
1457
 
1458
                  --  Do we have a range like '\d-a' and '[:space:]-a'
1459
                  --  which is not a real range
1460
 
1461
                  if Named_Class /= ANYOF_NONE then
1462
                     Set_In_Class (Bitmap, '-');
1463
                  else
1464
                     In_Range := True;
1465
                  end if;
1466
 
1467
               else
1468
                  Set_In_Class (Bitmap, Value);
1469
 
1470
               end if;
1471
 
1472
            --  Else in a character range
1473
 
1474
            else
1475
               if Last_Value > Value then
1476
                  Fail ("Invalid Range [" & Last_Value'Img
1477
                        & "-" & Value'Img & "]");
1478
               end if;
1479
 
1480
               while Last_Value <= Value loop
1481
                  Set_In_Class (Bitmap, Last_Value);
1482
                  Last_Value := Character'Succ (Last_Value);
1483
               end loop;
1484
 
1485
               In_Range := False;
1486
 
1487
            end if;
1488
 
1489
         end loop;
1490
 
1491
         --  Optimize case-insensitive ranges (put the upper case or lower
1492
         --  case character into the bitmap)
1493
 
1494
         if (Flags and Case_Insensitive) /= 0 then
1495
            for C in Character'Range loop
1496
               if Get_From_Class (Bitmap, C) then
1497
                  Set_In_Class (Bitmap, To_Lower (C));
1498
                  Set_In_Class (Bitmap, To_Upper (C));
1499
               end if;
1500
            end loop;
1501
         end if;
1502
 
1503
         --  Optimize inverted classes
1504
 
1505
         if Invert then
1506
            for J in Bitmap'Range loop
1507
               Bitmap (J) := not Bitmap (J);
1508
            end loop;
1509
         end if;
1510
 
1511
         Parse_Pos := Parse_Pos + 1;
1512
 
1513
         --  Emit the class
1514
 
1515
         IP := Emit_Node (ANYOF);
1516
         Emit_Class (Bitmap);
1517
      end Parse_Character_Class;
1518
 
1519
      -------------------
1520
      -- Parse_Literal --
1521
      -------------------
1522
 
1523
      --  This is a bit tricky due to quoted chars and due to
1524
      --  the multiplier characters '*', '+', and '?' that
1525
      --  take the SINGLE char previous as their operand.
1526
 
1527
      --  On entry, the character at Parse_Pos - 1 is going to go
1528
      --  into the string, no matter what it is. It could be
1529
      --  following a \ if Parse_Atom was entered from the '\' case.
1530
 
1531
      --  Basic idea is to pick up a good char in C and examine
1532
      --  the next char. If Is_Mult (C) then twiddle, if it's a \
1533
      --  then frozzle and if it's another magic char then push C and
1534
      --  terminate the string. If none of the above, push C on the
1535
      --  string and go around again.
1536
 
1537
      --  Start_Pos is used to remember where "the current character"
1538
      --  starts in the string, if due to an Is_Mult we need to back
1539
      --  up and put the current char in a separate 1-character string.
1540
      --  When Start_Pos is 0, C is the only char in the string;
1541
      --  this is used in Is_Mult handling, and in setting the SIMPLE
1542
      --  flag at the end.
1543
 
1544
      procedure Parse_Literal
1545
        (Expr_Flags : out Expression_Flags;
1546
         IP         : out Pointer)
1547
      is
1548
         Start_Pos  : Natural := 0;
1549
         C          : Character;
1550
         Length_Ptr : Pointer;
1551
 
1552
         Has_Special_Operator : Boolean := False;
1553
 
1554
      begin
1555
         Parse_Pos := Parse_Pos - 1;      --  Look at current character
1556
 
1557
         IP :=
1558
           Emit_Node
1559
             (if (Flags and Case_Insensitive) /= 0 then EXACTF else EXACT);
1560
 
1561
         Length_Ptr := Emit_Ptr;
1562
         Emit_Ptr := String_Operand (IP);
1563
 
1564
         Parse_Loop :
1565
         loop
1566
            C := Expression (Parse_Pos); --  Get current character
1567
 
1568
            case C is
1569
               when '.' | '[' | '(' | ')' | '|' | ASCII.LF | '$' | '^' =>
1570
 
1571
                  if Start_Pos = 0 then
1572
                     Start_Pos := Parse_Pos;
1573
                     Emit (C);         --  First character is always emitted
1574
                  else
1575
                     exit Parse_Loop;  --  Else we are done
1576
                  end if;
1577
 
1578
               when '?' | '+' | '*' | '{' =>
1579
 
1580
                  if Start_Pos = 0 then
1581
                     Start_Pos := Parse_Pos;
1582
                     Emit (C);         --  First character is always emitted
1583
 
1584
                  --  Are we looking at an operator, or is this
1585
                  --  simply a normal character ?
1586
 
1587
                  elsif not Is_Mult (Parse_Pos) then
1588
                     Start_Pos := Parse_Pos;
1589
                     Case_Emit (C);
1590
 
1591
                  else
1592
                     --  We've got something like "abc?d".  Mark this as a
1593
                     --  special case. What we want to emit is a first
1594
                     --  constant string for "ab", then one for "c" that will
1595
                     --  ultimately be transformed with a CURLY operator, A
1596
                     --  special case has to be handled for "a?", since there
1597
                     --  is no initial string to emit.
1598
 
1599
                     Has_Special_Operator := True;
1600
                     exit Parse_Loop;
1601
                  end if;
1602
 
1603
               when '\' =>
1604
                  Start_Pos := Parse_Pos;
1605
 
1606
                  if Parse_Pos = Parse_End then
1607
                     Fail ("Trailing \");
1608
 
1609
                  else
1610
                     case Expression (Parse_Pos + 1) is
1611
                        when 'b' | 'B' | 's' | 'S' | 'd' | 'D'
1612
                          | 'w' | 'W' | '0' .. '9' | 'G' | 'A'
1613
                          => exit Parse_Loop;
1614
                        when 'n'         => Emit (ASCII.LF);
1615
                        when 't'         => Emit (ASCII.HT);
1616
                        when 'r'         => Emit (ASCII.CR);
1617
                        when 'f'         => Emit (ASCII.FF);
1618
                        when 'e'         => Emit (ASCII.ESC);
1619
                        when 'a'         => Emit (ASCII.BEL);
1620
                        when others      => Emit (Expression (Parse_Pos + 1));
1621
                     end case;
1622
 
1623
                     Parse_Pos := Parse_Pos + 1;
1624
                  end if;
1625
 
1626
               when others =>
1627
                  Start_Pos := Parse_Pos;
1628
                  Case_Emit (C);
1629
            end case;
1630
 
1631
            exit Parse_Loop when Emit_Ptr - Length_Ptr = 254;
1632
 
1633
            Parse_Pos := Parse_Pos + 1;
1634
 
1635
            exit Parse_Loop when Parse_Pos > Parse_End;
1636
         end loop Parse_Loop;
1637
 
1638
         --  Is the string followed by a '*+?{' operator ? If yes, and if there
1639
         --  is an initial string to emit, do it now.
1640
 
1641
         if Has_Special_Operator
1642
           and then Emit_Ptr >= Length_Ptr + 3
1643
         then
1644
            Emit_Ptr := Emit_Ptr - 1;
1645
            Parse_Pos := Start_Pos;
1646
         end if;
1647
 
1648
         if Emit_Code then
1649
            Program (Length_Ptr) := Character'Val (Emit_Ptr - Length_Ptr - 2);
1650
         end if;
1651
 
1652
         Expr_Flags.Has_Width := True;
1653
 
1654
         --  Slight optimization when there is a single character
1655
 
1656
         if Emit_Ptr = Length_Ptr + 2 then
1657
            Expr_Flags.Simple := True;
1658
         end if;
1659
      end Parse_Literal;
1660
 
1661
      -----------------
1662
      -- Parse_Piece --
1663
      -----------------
1664
 
1665
      --  Note that the branching code sequences used for '?' and the
1666
      --  general cases of '*' and + are somewhat optimized: they use
1667
      --  the same NOTHING node as both the endmarker for their branch
1668
      --  list and the body of the last branch. It might seem that
1669
      --  this node could be dispensed with entirely, but the endmarker
1670
      --  role is not redundant.
1671
 
1672
      procedure Parse_Piece
1673
        (Expr_Flags : out Expression_Flags;
1674
         IP         : out Pointer)
1675
      is
1676
         Op        : Character;
1677
         New_Flags : Expression_Flags;
1678
         Greedy    : Boolean := True;
1679
 
1680
      begin
1681
         Parse_Atom (New_Flags, IP);
1682
 
1683
         if IP = 0 then
1684
            return;
1685
         end if;
1686
 
1687
         if Parse_Pos > Parse_End
1688
           or else not Is_Mult (Parse_Pos)
1689
         then
1690
            Expr_Flags := New_Flags;
1691
            return;
1692
         end if;
1693
 
1694
         Op := Expression (Parse_Pos);
1695
 
1696
         Expr_Flags :=
1697
           (if Op /= '+'
1698
            then (SP_Start  => True, others => False)
1699
            else (Has_Width => True, others => False));
1700
 
1701
         --  Detect non greedy operators in the easy cases
1702
 
1703
         if Op /= '{'
1704
           and then Parse_Pos + 1 <= Parse_End
1705
           and then Expression (Parse_Pos + 1) = '?'
1706
         then
1707
            Greedy := False;
1708
            Parse_Pos := Parse_Pos + 1;
1709
         end if;
1710
 
1711
         --  Generate the byte code
1712
 
1713
         case Op is
1714
            when '*' =>
1715
 
1716
               if New_Flags.Simple then
1717
                  Insert_Operator (STAR, IP, Greedy);
1718
               else
1719
                  Link_Tail (IP, Emit_Node (WHILEM));
1720
                  Insert_Curly_Operator
1721
                    (CURLYX, 0, Max_Curly_Repeat, IP, Greedy);
1722
                  Link_Tail (IP, Emit_Node (NOTHING));
1723
               end if;
1724
 
1725
            when '+' =>
1726
 
1727
               if New_Flags.Simple then
1728
                  Insert_Operator (PLUS, IP, Greedy);
1729
               else
1730
                  Link_Tail (IP, Emit_Node (WHILEM));
1731
                  Insert_Curly_Operator
1732
                    (CURLYX, 1, Max_Curly_Repeat, IP, Greedy);
1733
                  Link_Tail (IP, Emit_Node (NOTHING));
1734
               end if;
1735
 
1736
            when '?' =>
1737
               if New_Flags.Simple then
1738
                  Insert_Curly_Operator (CURLY, 0, 1, IP, Greedy);
1739
               else
1740
                  Link_Tail (IP, Emit_Node (WHILEM));
1741
                  Insert_Curly_Operator (CURLYX, 0, 1, IP, Greedy);
1742
                  Link_Tail (IP, Emit_Node (NOTHING));
1743
               end if;
1744
 
1745
            when '{' =>
1746
               declare
1747
                  Min, Max : Natural;
1748
 
1749
               begin
1750
                  Get_Curly_Arguments (Parse_Pos, Min, Max, Greedy);
1751
 
1752
                  if New_Flags.Simple then
1753
                     Insert_Curly_Operator (CURLY, Min, Max, IP, Greedy);
1754
                  else
1755
                     Link_Tail (IP, Emit_Node (WHILEM));
1756
                     Insert_Curly_Operator (CURLYX, Min, Max, IP, Greedy);
1757
                     Link_Tail (IP, Emit_Node (NOTHING));
1758
                  end if;
1759
               end;
1760
 
1761
            when others =>
1762
               null;
1763
         end case;
1764
 
1765
         Parse_Pos := Parse_Pos + 1;
1766
 
1767
         if Parse_Pos <= Parse_End
1768
           and then Is_Mult (Parse_Pos)
1769
         then
1770
            Fail ("nested *+{");
1771
         end if;
1772
      end Parse_Piece;
1773
 
1774
      ---------------------------------
1775
      -- Parse_Posix_Character_Class --
1776
      ---------------------------------
1777
 
1778
      function Parse_Posix_Character_Class return Std_Class is
1779
         Invert : Boolean := False;
1780
         Class  : Std_Class := ANYOF_NONE;
1781
         E      : String renames Expression;
1782
 
1783
         --  Class names. Note that code assumes that the length of all
1784
         --  classes starting with the same letter have the same length.
1785
 
1786
         Alnum   : constant String := "alnum:]";
1787
         Alpha   : constant String := "alpha:]";
1788
         Ascii_C : constant String := "ascii:]";
1789
         Cntrl   : constant String := "cntrl:]";
1790
         Digit   : constant String := "digit:]";
1791
         Graph   : constant String := "graph:]";
1792
         Lower   : constant String := "lower:]";
1793
         Print   : constant String := "print:]";
1794
         Punct   : constant String := "punct:]";
1795
         Space   : constant String := "space:]";
1796
         Upper   : constant String := "upper:]";
1797
         Word    : constant String := "word:]";
1798
         Xdigit  : constant String := "xdigit:]";
1799
 
1800
      begin
1801
         --  Case of character class specified
1802
 
1803
         if Parse_Pos <= Parse_End
1804
           and then Expression (Parse_Pos) = ':'
1805
         then
1806
            Parse_Pos := Parse_Pos + 1;
1807
 
1808
            --  Do we have something like:  [[:^alpha:]]
1809
 
1810
            if Parse_Pos <= Parse_End
1811
              and then Expression (Parse_Pos) = '^'
1812
            then
1813
               Invert := True;
1814
               Parse_Pos := Parse_Pos + 1;
1815
            end if;
1816
 
1817
            --  Check for class names based on first letter
1818
 
1819
            case Expression (Parse_Pos) is
1820
               when 'a' =>
1821
 
1822
                  --  All 'a' classes have the same length (Alnum'Length)
1823
 
1824
                  if Parse_Pos + Alnum'Length - 1 <= Parse_End then
1825
                     if
1826
                       E (Parse_Pos .. Parse_Pos + Alnum'Length - 1) = Alnum
1827
                     then
1828
                        Class :=
1829
                          (if Invert then ANYOF_NALNUMC else ANYOF_ALNUMC);
1830
                        Parse_Pos := Parse_Pos + Alnum'Length;
1831
 
1832
                     elsif
1833
                       E (Parse_Pos .. Parse_Pos + Alpha'Length - 1) = Alpha
1834
                     then
1835
                        Class :=
1836
                          (if Invert then ANYOF_NALPHA else ANYOF_ALPHA);
1837
                        Parse_Pos := Parse_Pos + Alpha'Length;
1838
 
1839
                     elsif E (Parse_Pos .. Parse_Pos + Ascii_C'Length - 1) =
1840
                                                                      Ascii_C
1841
                     then
1842
                        Class :=
1843
                          (if Invert then ANYOF_NASCII else ANYOF_ASCII);
1844
                        Parse_Pos := Parse_Pos + Ascii_C'Length;
1845
                     else
1846
                        Fail ("Invalid character class: " & E);
1847
                     end if;
1848
 
1849
                  else
1850
                     Fail ("Invalid character class: " & E);
1851
                  end if;
1852
 
1853
               when 'c' =>
1854
                  if Parse_Pos + Cntrl'Length - 1 <= Parse_End
1855
                    and then
1856
                      E (Parse_Pos .. Parse_Pos + Cntrl'Length - 1) = Cntrl
1857
                  then
1858
                     Class := (if Invert then ANYOF_NCNTRL else ANYOF_CNTRL);
1859
                     Parse_Pos := Parse_Pos + Cntrl'Length;
1860
                  else
1861
                     Fail ("Invalid character class: " & E);
1862
                  end if;
1863
 
1864
               when 'd' =>
1865
                  if Parse_Pos + Digit'Length - 1 <= Parse_End
1866
                    and then
1867
                      E (Parse_Pos .. Parse_Pos + Digit'Length - 1) = Digit
1868
                  then
1869
                     Class := (if Invert then ANYOF_NDIGIT else ANYOF_DIGIT);
1870
                     Parse_Pos := Parse_Pos + Digit'Length;
1871
                  end if;
1872
 
1873
               when 'g' =>
1874
                  if Parse_Pos + Graph'Length - 1 <= Parse_End
1875
                    and then
1876
                      E (Parse_Pos .. Parse_Pos + Graph'Length - 1) = Graph
1877
                  then
1878
                     Class := (if Invert then ANYOF_NGRAPH else ANYOF_GRAPH);
1879
                     Parse_Pos := Parse_Pos + Graph'Length;
1880
                  else
1881
                     Fail ("Invalid character class: " & E);
1882
                  end if;
1883
 
1884
               when 'l' =>
1885
                  if Parse_Pos + Lower'Length - 1 <= Parse_End
1886
                    and then
1887
                      E (Parse_Pos .. Parse_Pos + Lower'Length - 1) = Lower
1888
                  then
1889
                     Class := (if Invert then ANYOF_NLOWER else ANYOF_LOWER);
1890
                     Parse_Pos := Parse_Pos + Lower'Length;
1891
                  else
1892
                     Fail ("Invalid character class: " & E);
1893
                  end if;
1894
 
1895
               when 'p' =>
1896
 
1897
                  --  All 'p' classes have the same length
1898
 
1899
                  if Parse_Pos + Print'Length - 1 <= Parse_End then
1900
                     if
1901
                       E (Parse_Pos .. Parse_Pos + Print'Length - 1) = Print
1902
                     then
1903
                        Class :=
1904
                          (if Invert then ANYOF_NPRINT else ANYOF_PRINT);
1905
                        Parse_Pos := Parse_Pos + Print'Length;
1906
 
1907
                     elsif
1908
                       E (Parse_Pos .. Parse_Pos + Punct'Length - 1) = Punct
1909
                     then
1910
                        Class :=
1911
                          (if Invert then ANYOF_NPUNCT else ANYOF_PUNCT);
1912
                        Parse_Pos := Parse_Pos + Punct'Length;
1913
 
1914
                     else
1915
                        Fail ("Invalid character class: " & E);
1916
                     end if;
1917
 
1918
                  else
1919
                     Fail ("Invalid character class: " & E);
1920
                  end if;
1921
 
1922
               when 's' =>
1923
                  if Parse_Pos + Space'Length - 1 <= Parse_End
1924
                    and then
1925
                      E (Parse_Pos .. Parse_Pos + Space'Length - 1) = Space
1926
                  then
1927
                     Class := (if Invert then ANYOF_NSPACE else ANYOF_SPACE);
1928
                     Parse_Pos := Parse_Pos + Space'Length;
1929
                  else
1930
                     Fail ("Invalid character class: " & E);
1931
                  end if;
1932
 
1933
               when 'u' =>
1934
                  if Parse_Pos + Upper'Length - 1 <= Parse_End
1935
                    and then
1936
                      E (Parse_Pos .. Parse_Pos + Upper'Length - 1) = Upper
1937
                  then
1938
                     Class := (if Invert then ANYOF_NUPPER else ANYOF_UPPER);
1939
                     Parse_Pos := Parse_Pos + Upper'Length;
1940
                  else
1941
                     Fail ("Invalid character class: " & E);
1942
                  end if;
1943
 
1944
               when 'w' =>
1945
                  if Parse_Pos + Word'Length - 1 <= Parse_End
1946
                    and then
1947
                      E (Parse_Pos .. Parse_Pos + Word'Length - 1) = Word
1948
                  then
1949
                     Class := (if Invert then ANYOF_NALNUM else ANYOF_ALNUM);
1950
                     Parse_Pos := Parse_Pos + Word'Length;
1951
                  else
1952
                     Fail ("Invalid character class: " & E);
1953
                  end if;
1954
 
1955
               when 'x' =>
1956
                  if Parse_Pos + Xdigit'Length - 1 <= Parse_End
1957
                    and then
1958
                      E (Parse_Pos .. Parse_Pos + Xdigit'Length - 1) = Xdigit
1959
                  then
1960
                     Class := (if Invert then ANYOF_NXDIGIT else ANYOF_XDIGIT);
1961
                     Parse_Pos := Parse_Pos + Xdigit'Length;
1962
 
1963
                  else
1964
                     Fail ("Invalid character class: " & E);
1965
                  end if;
1966
 
1967
               when others =>
1968
                  Fail ("Invalid character class: " & E);
1969
            end case;
1970
 
1971
         --  Character class not specified
1972
 
1973
         else
1974
            return ANYOF_NONE;
1975
         end if;
1976
 
1977
         return Class;
1978
      end Parse_Posix_Character_Class;
1979
 
1980
      --  Local Declarations
1981
 
1982
      Result : Pointer;
1983
 
1984
      Expr_Flags : Expression_Flags;
1985
      pragma Unreferenced (Expr_Flags);
1986
 
1987
   --  Start of processing for Compile
1988
 
1989
   begin
1990
      Emit (MAGIC);
1991
      Parse (False, Expr_Flags, Result);
1992
 
1993
      if Result = 0 then
1994
         Fail ("Couldn't compile expression");
1995
      end if;
1996
 
1997
      Final_Code_Size := Emit_Ptr - 1;
1998
 
1999
      --  Do we want to actually compile the expression, or simply get the
2000
      --  code size ???
2001
 
2002
      if Emit_Code then
2003
         Optimize (PM);
2004
      end if;
2005
 
2006
      PM.Flags := Flags;
2007
   end Compile;
2008
 
2009
   function Compile
2010
     (Expression : String;
2011
      Flags      : Regexp_Flags := No_Flags) return Pattern_Matcher
2012
   is
2013
      Size  : Program_Size;
2014
      Dummy : Pattern_Matcher (0);
2015
      pragma Unreferenced (Dummy);
2016
 
2017
   begin
2018
      Compile (Dummy, Expression, Size, Flags);
2019
 
2020
      declare
2021
         Result : Pattern_Matcher (Size);
2022
      begin
2023
         Compile (Result, Expression, Size, Flags);
2024
         return Result;
2025
      end;
2026
   end Compile;
2027
 
2028
   procedure Compile
2029
     (Matcher    : out Pattern_Matcher;
2030
      Expression : String;
2031
      Flags      : Regexp_Flags := No_Flags)
2032
   is
2033
      Size : Program_Size;
2034
      pragma Unreferenced (Size);
2035
   begin
2036
      Compile (Matcher, Expression, Size, Flags);
2037
   end Compile;
2038
 
2039
   ----------
2040
   -- Dump --
2041
   ----------
2042
 
2043
   procedure Dump (Self : Pattern_Matcher) is
2044
      Op      : Opcode;
2045
      Program : Program_Data renames Self.Program;
2046
 
2047
      procedure Dump_Until
2048
        (Start  : Pointer;
2049
         Till   : Pointer;
2050
         Indent : Natural := 0);
2051
      --  Dump the program until the node Till (not included) is met.
2052
      --  Every line is indented with Index spaces at the beginning
2053
      --  Dumps till the end if Till is 0.
2054
 
2055
      ----------------
2056
      -- Dump_Until --
2057
      ----------------
2058
 
2059
      procedure Dump_Until
2060
        (Start  : Pointer;
2061
         Till   : Pointer;
2062
         Indent : Natural := 0)
2063
      is
2064
         Next         : Pointer;
2065
         Index        : Pointer;
2066
         Local_Indent : Natural := Indent;
2067
         Length       : Pointer;
2068
 
2069
      begin
2070
         Index := Start;
2071
         while Index < Till loop
2072
            Op := Opcode'Val (Character'Pos ((Self.Program (Index))));
2073
 
2074
            if Op = CLOSE then
2075
               Local_Indent := Local_Indent - 3;
2076
            end if;
2077
 
2078
            declare
2079
               Point : constant String := Pointer'Image (Index);
2080
 
2081
            begin
2082
               for J in 1 .. 6 - Point'Length loop
2083
                  Put (' ');
2084
               end loop;
2085
 
2086
               Put (Point
2087
                    & " : "
2088
                    & (1 .. Local_Indent => ' ')
2089
                    & Opcode'Image (Op));
2090
            end;
2091
 
2092
            --  Print the parenthesis number
2093
 
2094
            if Op = OPEN or else Op = CLOSE or else Op = REFF then
2095
               Put (Natural'Image (Character'Pos (Program (Index + 3))));
2096
            end if;
2097
 
2098
            Next := Index + Get_Next_Offset (Program, Index);
2099
 
2100
            if Next = Index then
2101
               Put ("  (next at 0)");
2102
            else
2103
               Put ("  (next at " & Pointer'Image (Next) & ")");
2104
            end if;
2105
 
2106
            case Op is
2107
 
2108
               --  Character class operand
2109
 
2110
               when ANYOF =>  null;
2111
                  declare
2112
                     Bitmap  : Character_Class;
2113
                     Last    : Character := ASCII.NUL;
2114
                     Current : Natural := 0;
2115
 
2116
                     Current_Char : Character;
2117
 
2118
                  begin
2119
                     Bitmap_Operand (Program, Index, Bitmap);
2120
                     Put ("   operand=");
2121
 
2122
                     while Current <= 255 loop
2123
                        Current_Char := Character'Val (Current);
2124
 
2125
                        --  First item in a range
2126
 
2127
                        if Get_From_Class (Bitmap, Current_Char) then
2128
                           Last := Current_Char;
2129
 
2130
                           --  Search for the last item in the range
2131
 
2132
                           loop
2133
                              Current := Current + 1;
2134
                              exit when Current > 255;
2135
                              Current_Char := Character'Val (Current);
2136
                              exit when
2137
                                not Get_From_Class (Bitmap, Current_Char);
2138
 
2139
                           end loop;
2140
 
2141
                           if Last <= ' ' then
2142
                              Put (Last'Img);
2143
                           else
2144
                              Put (Last);
2145
                           end if;
2146
 
2147
                           if Character'Succ (Last) /= Current_Char then
2148
                              Put ("-" & Character'Pred (Current_Char));
2149
                           end if;
2150
 
2151
                        else
2152
                           Current := Current + 1;
2153
                        end if;
2154
                     end loop;
2155
 
2156
                     New_Line;
2157
                     Index := Index + 3 + Bitmap'Length;
2158
                  end;
2159
 
2160
               --  string operand
2161
 
2162
               when EXACT | EXACTF =>
2163
                  Length := String_Length (Program, Index);
2164
                  Put ("   operand (length:" & Program_Size'Image (Length + 1)
2165
                       & ") ="
2166
                       & String (Program (String_Operand (Index)
2167
                                          .. String_Operand (Index)
2168
                                          + Length)));
2169
                  Index := String_Operand (Index) + Length + 1;
2170
                  New_Line;
2171
 
2172
               --  Node operand
2173
 
2174
               when BRANCH =>
2175
                  New_Line;
2176
                  Dump_Until (Index + 3, Next, Local_Indent + 3);
2177
                  Index := Next;
2178
 
2179
               when STAR | PLUS =>
2180
                  New_Line;
2181
 
2182
                  --  Only one instruction
2183
 
2184
                  Dump_Until (Index + 3, Index + 4, Local_Indent + 3);
2185
                  Index := Next;
2186
 
2187
               when CURLY | CURLYX =>
2188
                  Put ("  {"
2189
                       & Natural'Image (Read_Natural (Program, Index + 3))
2190
                       & ","
2191
                       & Natural'Image (Read_Natural (Program, Index + 5))
2192
                       & "}");
2193
                  New_Line;
2194
                  Dump_Until (Index + 7, Next, Local_Indent + 3);
2195
                  Index := Next;
2196
 
2197
               when OPEN =>
2198
                  New_Line;
2199
                  Index := Index + 4;
2200
                  Local_Indent := Local_Indent + 3;
2201
 
2202
               when CLOSE | REFF =>
2203
                  New_Line;
2204
                  Index := Index + 4;
2205
 
2206
               when EOP =>
2207
                  Index := Index + 3;
2208
                  New_Line;
2209
                  exit;
2210
 
2211
               --  No operand
2212
 
2213
               when others =>
2214
                  Index := Index + 3;
2215
                  New_Line;
2216
            end case;
2217
         end loop;
2218
      end Dump_Until;
2219
 
2220
   --  Start of processing for Dump
2221
 
2222
   begin
2223
      pragma Assert (Self.Program (Program_First) = MAGIC,
2224
                     "Corrupted Pattern_Matcher");
2225
 
2226
      Put_Line ("Must start with (Self.First) = "
2227
                & Character'Image (Self.First));
2228
 
2229
      if (Self.Flags and Case_Insensitive) /= 0 then
2230
         Put_Line ("  Case_Insensitive mode");
2231
      end if;
2232
 
2233
      if (Self.Flags and Single_Line) /= 0 then
2234
         Put_Line ("  Single_Line mode");
2235
      end if;
2236
 
2237
      if (Self.Flags and Multiple_Lines) /= 0 then
2238
         Put_Line ("  Multiple_Lines mode");
2239
      end if;
2240
 
2241
      Put_Line ("     1 : MAGIC");
2242
      Dump_Until (Program_First + 1, Self.Program'Last + 1);
2243
   end Dump;
2244
 
2245
   --------------------
2246
   -- Get_From_Class --
2247
   --------------------
2248
 
2249
   function Get_From_Class
2250
     (Bitmap : Character_Class;
2251
      C      : Character) return Boolean
2252
   is
2253
      Value : constant Class_Byte := Character'Pos (C);
2254
   begin
2255
      return
2256
        (Bitmap (Value / 8) and Bit_Conversion (Value mod 8)) /= 0;
2257
   end Get_From_Class;
2258
 
2259
   --------------
2260
   -- Get_Next --
2261
   --------------
2262
 
2263
   function Get_Next (Program : Program_Data; IP : Pointer) return Pointer is
2264
      Offset : constant Pointer := Get_Next_Offset (Program, IP);
2265
   begin
2266
      if Offset = 0 then
2267
         return 0;
2268
      else
2269
         return IP + Offset;
2270
      end if;
2271
   end Get_Next;
2272
 
2273
   ---------------------
2274
   -- Get_Next_Offset --
2275
   ---------------------
2276
 
2277
   function Get_Next_Offset
2278
     (Program : Program_Data;
2279
      IP      : Pointer) return Pointer
2280
   is
2281
   begin
2282
      return Pointer (Read_Natural (Program, IP + 1));
2283
   end Get_Next_Offset;
2284
 
2285
   --------------
2286
   -- Is_Alnum --
2287
   --------------
2288
 
2289
   function Is_Alnum (C : Character) return Boolean is
2290
   begin
2291
      return Is_Alphanumeric (C) or else C = '_';
2292
   end Is_Alnum;
2293
 
2294
   ------------------
2295
   -- Is_Printable --
2296
   ------------------
2297
 
2298
   function Is_Printable (C : Character) return Boolean is
2299
   begin
2300
      --  Printable if space or graphic character or other whitespace
2301
      --  Other white space includes (HT/LF/VT/FF/CR = codes 9-13)
2302
 
2303
      return C in Character'Val (32) .. Character'Val (126)
2304
        or else C in ASCII.HT .. ASCII.CR;
2305
   end Is_Printable;
2306
 
2307
   --------------------
2308
   -- Is_White_Space --
2309
   --------------------
2310
 
2311
   function Is_White_Space (C : Character) return Boolean is
2312
   begin
2313
      --  Note: HT = 9, LF = 10, VT = 11, FF = 12, CR = 13
2314
 
2315
      return C = ' ' or else C in ASCII.HT .. ASCII.CR;
2316
   end Is_White_Space;
2317
 
2318
   -----------
2319
   -- Match --
2320
   -----------
2321
 
2322
   procedure Match
2323
     (Self       : Pattern_Matcher;
2324
      Data       : String;
2325
      Matches    : out Match_Array;
2326
      Data_First : Integer := -1;
2327
      Data_Last  : Positive := Positive'Last)
2328
   is
2329
      Program : Program_Data renames Self.Program; -- Shorter notation
2330
 
2331
      First_In_Data : constant Integer := Integer'Max (Data_First, Data'First);
2332
      Last_In_Data  : constant Integer := Integer'Min (Data_Last, Data'Last);
2333
 
2334
      --  Global work variables
2335
 
2336
      Input_Pos : Natural;           -- String-input pointer
2337
      BOL_Pos   : Natural;           -- Beginning of input, for ^ check
2338
      Matched   : Boolean := False;  -- Until proven True
2339
 
2340
      Matches_Full : Match_Array (0 .. Natural'Max (Self.Paren_Count,
2341
                                                    Matches'Last));
2342
      --  Stores the value of all the parenthesis pairs.
2343
      --  We do not use directly Matches, so that we can also use back
2344
      --  references (REFF) even if Matches is too small.
2345
 
2346
      type Natural_Array is array (Match_Count range <>) of Natural;
2347
      Matches_Tmp : Natural_Array (Matches_Full'Range);
2348
      --  Save the opening position of parenthesis
2349
 
2350
      Last_Paren  : Natural := 0;
2351
      --  Last parenthesis seen
2352
 
2353
      Greedy : Boolean := True;
2354
      --  True if the next operator should be greedy
2355
 
2356
      type Current_Curly_Record;
2357
      type Current_Curly_Access is access all Current_Curly_Record;
2358
      type Current_Curly_Record is record
2359
         Paren_Floor : Natural;  --  How far back to strip parenthesis data
2360
         Cur         : Integer;  --  How many instances of scan we've matched
2361
         Min         : Natural;  --  Minimal number of scans to match
2362
         Max         : Natural;  --  Maximal number of scans to match
2363
         Greedy      : Boolean;  --  Whether to work our way up or down
2364
         Scan        : Pointer;  --  The thing to match
2365
         Next        : Pointer;  --  What has to match after it
2366
         Lastloc     : Natural;  --  Where we started matching this scan
2367
         Old_Cc      : Current_Curly_Access; --  Before we started this one
2368
      end record;
2369
      --  Data used to handle the curly operator and the plus and star
2370
      --  operators for complex expressions.
2371
 
2372
      Current_Curly : Current_Curly_Access := null;
2373
      --  The curly currently being processed
2374
 
2375
      -----------------------
2376
      -- Local Subprograms --
2377
      -----------------------
2378
 
2379
      function Index (Start : Positive; C : Character) return Natural;
2380
      --  Find character C in Data starting at Start and return position
2381
 
2382
      function Repeat
2383
        (IP  : Pointer;
2384
         Max : Natural := Natural'Last) return Natural;
2385
      --  Repeatedly match something simple, report how many
2386
      --  It only matches on things of length 1.
2387
      --  Starting from Input_Pos, it matches at most Max CURLY.
2388
 
2389
      function Try (Pos : Positive) return Boolean;
2390
      --  Try to match at specific point
2391
 
2392
      function Match (IP : Pointer) return Boolean;
2393
      --  This is the main matching routine. Conceptually the strategy
2394
      --  is simple:  check to see whether the current node matches,
2395
      --  call self recursively to see whether the rest matches,
2396
      --  and then act accordingly.
2397
      --
2398
      --  In practice Match makes some effort to avoid recursion, in
2399
      --  particular by going through "ordinary" nodes (that don't
2400
      --  need to know whether the rest of the match failed) by
2401
      --  using a loop instead of recursion.
2402
      --  Why is the above comment part of the spec rather than body ???
2403
 
2404
      function Match_Whilem (IP : Pointer) return Boolean;
2405
      --  Return True if a WHILEM matches
2406
      --  How come IP is unreferenced in the body ???
2407
 
2408
      function Recurse_Match (IP : Pointer; From : Natural) return Boolean;
2409
      pragma Inline (Recurse_Match);
2410
      --  Calls Match recursively. It saves and restores the parenthesis
2411
      --  status and location in the input stream correctly, so that
2412
      --  backtracking is possible
2413
 
2414
      function Match_Simple_Operator
2415
        (Op     : Opcode;
2416
         Scan   : Pointer;
2417
         Next   : Pointer;
2418
         Greedy : Boolean) return Boolean;
2419
      --  Return True it the simple operator (possibly non-greedy) matches
2420
 
2421
      pragma Inline (Index);
2422
      pragma Inline (Repeat);
2423
 
2424
      --  These are two complex functions, but used only once
2425
 
2426
      pragma Inline (Match_Whilem);
2427
      pragma Inline (Match_Simple_Operator);
2428
 
2429
      -----------
2430
      -- Index --
2431
      -----------
2432
 
2433
      function Index (Start : Positive; C : Character) return Natural is
2434
      begin
2435
         for J in Start .. Last_In_Data loop
2436
            if Data (J) = C then
2437
               return J;
2438
            end if;
2439
         end loop;
2440
 
2441
         return 0;
2442
      end Index;
2443
 
2444
      -------------------
2445
      -- Recurse_Match --
2446
      -------------------
2447
 
2448
      function Recurse_Match (IP : Pointer; From : Natural) return Boolean is
2449
         L : constant Natural := Last_Paren;
2450
 
2451
         Tmp_F : constant Match_Array :=
2452
                   Matches_Full (From + 1 .. Matches_Full'Last);
2453
 
2454
         Start : constant Natural_Array :=
2455
                   Matches_Tmp (From + 1 .. Matches_Tmp'Last);
2456
         Input : constant Natural := Input_Pos;
2457
 
2458
      begin
2459
         if Match (IP) then
2460
            return True;
2461
         end if;
2462
 
2463
         Last_Paren := L;
2464
         Matches_Full (Tmp_F'Range) := Tmp_F;
2465
         Matches_Tmp (Start'Range) := Start;
2466
         Input_Pos := Input;
2467
         return False;
2468
      end Recurse_Match;
2469
 
2470
      -----------
2471
      -- Match --
2472
      -----------
2473
 
2474
      function Match (IP : Pointer) return Boolean is
2475
         Scan   : Pointer := IP;
2476
         Next   : Pointer;
2477
         Op     : Opcode;
2478
 
2479
      begin
2480
         State_Machine :
2481
         loop
2482
            pragma Assert (Scan /= 0);
2483
 
2484
            --  Determine current opcode and count its usage in debug mode
2485
 
2486
            Op := Opcode'Val (Character'Pos (Program (Scan)));
2487
 
2488
            --  Calculate offset of next instruction.
2489
            --  Second character is most significant in Program_Data.
2490
 
2491
            Next := Get_Next (Program, Scan);
2492
 
2493
            case Op is
2494
               when EOP =>
2495
                  return True;  --  Success !
2496
 
2497
               when BRANCH =>
2498
                  if Program (Next) /= BRANCH then
2499
                     Next := Operand (Scan); -- No choice, avoid recursion
2500
 
2501
                  else
2502
                     loop
2503
                        if Recurse_Match (Operand (Scan), 0) then
2504
                           return True;
2505
                        end if;
2506
 
2507
                        Scan := Get_Next (Program, Scan);
2508
                        exit when Scan = 0 or else Program (Scan) /= BRANCH;
2509
                     end loop;
2510
 
2511
                     exit State_Machine;
2512
                  end if;
2513
 
2514
               when NOTHING =>
2515
                  null;
2516
 
2517
               when BOL =>
2518
                  exit State_Machine when Input_Pos /= BOL_Pos
2519
                    and then ((Self.Flags and Multiple_Lines) = 0
2520
                              or else Data (Input_Pos - 1) /= ASCII.LF);
2521
 
2522
               when MBOL =>
2523
                  exit State_Machine when Input_Pos /= BOL_Pos
2524
                    and then Data (Input_Pos - 1) /= ASCII.LF;
2525
 
2526
               when SBOL =>
2527
                  exit State_Machine when Input_Pos /= BOL_Pos;
2528
 
2529
               when EOL =>
2530
                  exit State_Machine when Input_Pos <= Data'Last
2531
                    and then ((Self.Flags and Multiple_Lines) = 0
2532
                              or else Data (Input_Pos) /= ASCII.LF);
2533
 
2534
               when MEOL =>
2535
                  exit State_Machine when Input_Pos <= Data'Last
2536
                    and then Data (Input_Pos) /= ASCII.LF;
2537
 
2538
               when SEOL =>
2539
                  exit State_Machine when Input_Pos <= Data'Last;
2540
 
2541
               when BOUND | NBOUND =>
2542
 
2543
                  --  Was last char in word ?
2544
 
2545
                  declare
2546
                     N  : Boolean := False;
2547
                     Ln : Boolean := False;
2548
 
2549
                  begin
2550
                     if Input_Pos /= First_In_Data then
2551
                        N := Is_Alnum (Data (Input_Pos - 1));
2552
                     end if;
2553
 
2554
                     Ln :=
2555
                       (if Input_Pos > Last_In_Data
2556
                        then False
2557
                        else Is_Alnum (Data (Input_Pos)));
2558
 
2559
                     if Op = BOUND then
2560
                        if N = Ln then
2561
                           exit State_Machine;
2562
                        end if;
2563
                     else
2564
                        if N /= Ln then
2565
                           exit State_Machine;
2566
                        end if;
2567
                     end if;
2568
                  end;
2569
 
2570
               when SPACE =>
2571
                  exit State_Machine when Input_Pos > Last_In_Data
2572
                    or else not Is_White_Space (Data (Input_Pos));
2573
                  Input_Pos := Input_Pos + 1;
2574
 
2575
               when NSPACE =>
2576
                  exit State_Machine when Input_Pos > Last_In_Data
2577
                    or else Is_White_Space (Data (Input_Pos));
2578
                  Input_Pos := Input_Pos + 1;
2579
 
2580
               when DIGIT =>
2581
                  exit State_Machine when Input_Pos > Last_In_Data
2582
                    or else not Is_Digit (Data (Input_Pos));
2583
                  Input_Pos := Input_Pos + 1;
2584
 
2585
               when NDIGIT =>
2586
                  exit State_Machine when Input_Pos > Last_In_Data
2587
                    or else Is_Digit (Data (Input_Pos));
2588
                  Input_Pos := Input_Pos + 1;
2589
 
2590
               when ALNUM =>
2591
                  exit State_Machine when Input_Pos > Last_In_Data
2592
                    or else not Is_Alnum (Data (Input_Pos));
2593
                  Input_Pos := Input_Pos + 1;
2594
 
2595
               when NALNUM =>
2596
                  exit State_Machine when Input_Pos > Last_In_Data
2597
                    or else Is_Alnum (Data (Input_Pos));
2598
                  Input_Pos := Input_Pos + 1;
2599
 
2600
               when ANY =>
2601
                  exit State_Machine when Input_Pos > Last_In_Data
2602
                    or else Data (Input_Pos) = ASCII.LF;
2603
                  Input_Pos := Input_Pos + 1;
2604
 
2605
               when SANY =>
2606
                  exit State_Machine when Input_Pos > Last_In_Data;
2607
                  Input_Pos := Input_Pos + 1;
2608
 
2609
               when EXACT =>
2610
                  declare
2611
                     Opnd    : Pointer  := String_Operand (Scan);
2612
                     Current : Positive := Input_Pos;
2613
 
2614
                     Last    : constant Pointer :=
2615
                                 Opnd + String_Length (Program, Scan);
2616
 
2617
                  begin
2618
                     while Opnd <= Last loop
2619
                        exit State_Machine when Current > Last_In_Data
2620
                          or else Program (Opnd) /= Data (Current);
2621
                        Current := Current + 1;
2622
                        Opnd := Opnd + 1;
2623
                     end loop;
2624
 
2625
                     Input_Pos := Current;
2626
                  end;
2627
 
2628
               when EXACTF =>
2629
                  declare
2630
                     Opnd    : Pointer  := String_Operand (Scan);
2631
                     Current : Positive := Input_Pos;
2632
 
2633
                     Last : constant Pointer :=
2634
                              Opnd + String_Length (Program, Scan);
2635
 
2636
                  begin
2637
                     while Opnd <= Last loop
2638
                        exit State_Machine when Current > Last_In_Data
2639
                          or else Program (Opnd) /= To_Lower (Data (Current));
2640
                        Current := Current + 1;
2641
                        Opnd := Opnd + 1;
2642
                     end loop;
2643
 
2644
                     Input_Pos := Current;
2645
                  end;
2646
 
2647
               when ANYOF =>
2648
                  declare
2649
                     Bitmap : Character_Class;
2650
                  begin
2651
                     Bitmap_Operand (Program, Scan, Bitmap);
2652
                     exit State_Machine when Input_Pos > Last_In_Data
2653
                       or else not Get_From_Class (Bitmap, Data (Input_Pos));
2654
                     Input_Pos := Input_Pos + 1;
2655
                  end;
2656
 
2657
               when OPEN =>
2658
                  declare
2659
                     No : constant Natural :=
2660
                            Character'Pos (Program (Operand (Scan)));
2661
                  begin
2662
                     Matches_Tmp (No) := Input_Pos;
2663
                  end;
2664
 
2665
               when CLOSE =>
2666
                  declare
2667
                     No : constant Natural :=
2668
                            Character'Pos (Program (Operand (Scan)));
2669
 
2670
                  begin
2671
                     Matches_Full (No) := (Matches_Tmp (No), Input_Pos - 1);
2672
 
2673
                     if Last_Paren < No then
2674
                        Last_Paren := No;
2675
                     end if;
2676
                  end;
2677
 
2678
               when REFF =>
2679
                  declare
2680
                     No : constant Natural :=
2681
                            Character'Pos (Program (Operand (Scan)));
2682
 
2683
                     Data_Pos : Natural;
2684
 
2685
                  begin
2686
                     --  If we haven't seen that parenthesis yet
2687
 
2688
                     if Last_Paren < No then
2689
                        return False;
2690
                     end if;
2691
 
2692
                     Data_Pos := Matches_Full (No).First;
2693
 
2694
                     while Data_Pos <= Matches_Full (No).Last loop
2695
                        if Input_Pos > Last_In_Data
2696
                          or else Data (Input_Pos) /= Data (Data_Pos)
2697
                        then
2698
                           return False;
2699
                        end if;
2700
 
2701
                        Input_Pos := Input_Pos + 1;
2702
                        Data_Pos := Data_Pos + 1;
2703
                     end loop;
2704
                  end;
2705
 
2706
               when MINMOD =>
2707
                  Greedy := False;
2708
 
2709
               when STAR | PLUS | CURLY =>
2710
                  declare
2711
                     Greed : constant Boolean := Greedy;
2712
                  begin
2713
                     Greedy := True;
2714
                     return Match_Simple_Operator (Op, Scan, Next, Greed);
2715
                  end;
2716
 
2717
               when CURLYX =>
2718
 
2719
                  --  Looking at something like:
2720
 
2721
                  --    1: CURLYX {n,m}  (->4)
2722
                  --    2:   code for complex thing  (->3)
2723
                  --    3:   WHILEM (->0)
2724
                  --    4: NOTHING
2725
 
2726
                  declare
2727
                     Min : constant Natural :=
2728
                             Read_Natural (Program, Scan + 3);
2729
                     Max : constant Natural :=
2730
                             Read_Natural (Program, Scan + 5);
2731
                     Cc  : aliased Current_Curly_Record;
2732
 
2733
                     Has_Match : Boolean;
2734
 
2735
                  begin
2736
                     Cc := (Paren_Floor => Last_Paren,
2737
                            Cur         => -1,
2738
                            Min         => Min,
2739
                            Max         => Max,
2740
                            Greedy      => Greedy,
2741
                            Scan        => Scan + 7,
2742
                            Next        => Next,
2743
                            Lastloc     => 0,
2744
                            Old_Cc      => Current_Curly);
2745
                     Current_Curly := Cc'Unchecked_Access;
2746
 
2747
                     Has_Match := Match (Next - 3);
2748
 
2749
                     --  Start on the WHILEM
2750
 
2751
                     Current_Curly := Cc.Old_Cc;
2752
                     return Has_Match;
2753
                  end;
2754
 
2755
               when WHILEM =>
2756
                  return Match_Whilem (IP);
2757
            end case;
2758
 
2759
            Scan := Next;
2760
         end loop State_Machine;
2761
 
2762
         --  If we get here, there is no match.
2763
         --  For successful matches when EOP is the terminating point.
2764
 
2765
         return False;
2766
      end Match;
2767
 
2768
      ---------------------------
2769
      -- Match_Simple_Operator --
2770
      ---------------------------
2771
 
2772
      function Match_Simple_Operator
2773
        (Op     : Opcode;
2774
         Scan   : Pointer;
2775
         Next   : Pointer;
2776
         Greedy : Boolean) return Boolean
2777
      is
2778
         Next_Char       : Character := ASCII.NUL;
2779
         Next_Char_Known : Boolean := False;
2780
         No              : Integer;  --  Can be negative
2781
         Min             : Natural;
2782
         Max             : Natural := Natural'Last;
2783
         Operand_Code    : Pointer;
2784
         Old             : Natural;
2785
         Last_Pos        : Natural;
2786
         Save            : constant Natural := Input_Pos;
2787
 
2788
      begin
2789
         --  Lookahead to avoid useless match attempts
2790
         --  when we know what character comes next.
2791
 
2792
         if Program (Next) = EXACT then
2793
            Next_Char := Program (String_Operand (Next));
2794
            Next_Char_Known := True;
2795
         end if;
2796
 
2797
         --  Find the minimal and maximal values for the operator
2798
 
2799
         case Op is
2800
            when STAR =>
2801
               Min := 0;
2802
               Operand_Code := Operand (Scan);
2803
 
2804
            when PLUS =>
2805
               Min := 1;
2806
               Operand_Code := Operand (Scan);
2807
 
2808
            when others =>
2809
               Min := Read_Natural (Program, Scan + 3);
2810
               Max := Read_Natural (Program, Scan + 5);
2811
               Operand_Code := Scan + 7;
2812
         end case;
2813
 
2814
         --  Non greedy operators
2815
 
2816
         if not Greedy then
2817
 
2818
            --  Test the minimal repetitions
2819
 
2820
            if Min /= 0
2821
              and then Repeat (Operand_Code, Min) < Min
2822
            then
2823
               return False;
2824
            end if;
2825
 
2826
            Old := Input_Pos;
2827
 
2828
            --  Find the place where 'next' could work
2829
 
2830
            if Next_Char_Known then
2831
               --  Last position to check
2832
 
2833
               if Max = Natural'Last then
2834
                  Last_Pos := Last_In_Data;
2835
               else
2836
                  Last_Pos := Input_Pos + Max;
2837
 
2838
                  if Last_Pos > Last_In_Data then
2839
                     Last_Pos := Last_In_Data;
2840
                  end if;
2841
               end if;
2842
 
2843
               --  Look for the first possible opportunity
2844
 
2845
               loop
2846
                  --  Find the next possible position
2847
 
2848
                  while Input_Pos <= Last_Pos
2849
                    and then Data (Input_Pos) /= Next_Char
2850
                  loop
2851
                     Input_Pos := Input_Pos + 1;
2852
                  end loop;
2853
 
2854
                  if Input_Pos > Last_Pos then
2855
                     return False;
2856
                  end if;
2857
 
2858
                  --  Check that we still match if we stop
2859
                  --  at the position we just found.
2860
 
2861
                  declare
2862
                     Num : constant Natural := Input_Pos - Old;
2863
 
2864
                  begin
2865
                     Input_Pos := Old;
2866
 
2867
                     if Repeat (Operand_Code, Num) < Num then
2868
                        return False;
2869
                     end if;
2870
                  end;
2871
 
2872
                  --  Input_Pos now points to the new position
2873
 
2874
                  if Match (Get_Next (Program, Scan)) then
2875
                     return True;
2876
                  end if;
2877
 
2878
                  Old := Input_Pos;
2879
                  Input_Pos := Input_Pos + 1;
2880
               end loop;
2881
 
2882
            --  We know what the next character is
2883
 
2884
            else
2885
               while Max >= Min loop
2886
 
2887
                  --  If the next character matches
2888
 
2889
                  if Match (Next) then
2890
                     return True;
2891
                  end if;
2892
 
2893
                  Input_Pos := Save + Min;
2894
 
2895
                  --  Could not or did not match -- move forward
2896
 
2897
                  if Repeat (Operand_Code, 1) /= 0 then
2898
                     Min := Min + 1;
2899
                  else
2900
                     return False;
2901
                  end if;
2902
               end loop;
2903
            end if;
2904
 
2905
            return False;
2906
 
2907
         --  Greedy operators
2908
 
2909
         else
2910
            No := Repeat (Operand_Code, Max);
2911
 
2912
            --  ??? Perl has some special code here in case the
2913
            --  next instruction is of type EOL, since $ and \Z
2914
            --  can match before *and* after newline at the end.
2915
 
2916
            --  ??? Perl has some special code here in case (paren)
2917
            --  is True.
2918
 
2919
            --  Else, if we don't have any parenthesis
2920
 
2921
            while No >= Min loop
2922
               if not Next_Char_Known
2923
                 or else (Input_Pos <= Last_In_Data
2924
                           and then Data (Input_Pos) = Next_Char)
2925
               then
2926
                  if Match (Next) then
2927
                     return True;
2928
                  end if;
2929
               end if;
2930
 
2931
               --  Could not or did not work, we back up
2932
 
2933
               No := No - 1;
2934
               Input_Pos := Save + No;
2935
            end loop;
2936
 
2937
            return False;
2938
         end if;
2939
      end Match_Simple_Operator;
2940
 
2941
      ------------------
2942
      -- Match_Whilem --
2943
      ------------------
2944
 
2945
      --  This is really hard to understand, because after we match what we
2946
      --  are trying to match, we must make sure the rest of the REx is going
2947
      --  to match for sure, and to do that we have to go back UP the parse
2948
      --  tree by recursing ever deeper.  And if it fails, we have to reset
2949
      --  our parent's current state that we can try again after backing off.
2950
 
2951
      function Match_Whilem (IP : Pointer) return Boolean is
2952
         pragma Unreferenced (IP);
2953
 
2954
         Cc : constant Current_Curly_Access := Current_Curly;
2955
         N  : constant Natural              := Cc.Cur + 1;
2956
         Ln : Natural                       := 0;
2957
 
2958
         Lastloc : constant Natural := Cc.Lastloc;
2959
         --  Detection of 0-len
2960
 
2961
      begin
2962
         --  If degenerate scan matches "", assume scan done
2963
 
2964
         if Input_Pos = Cc.Lastloc
2965
           and then N >= Cc.Min
2966
         then
2967
            --  Temporarily restore the old context, and check that we
2968
            --  match was comes after CURLYX.
2969
 
2970
            Current_Curly := Cc.Old_Cc;
2971
 
2972
            if Current_Curly /= null then
2973
               Ln := Current_Curly.Cur;
2974
            end if;
2975
 
2976
            if Match (Cc.Next) then
2977
               return True;
2978
            end if;
2979
 
2980
            if Current_Curly /= null then
2981
               Current_Curly.Cur := Ln;
2982
            end if;
2983
 
2984
            Current_Curly := Cc;
2985
            return False;
2986
         end if;
2987
 
2988
         --  First, just match a string of min scans
2989
 
2990
         if N < Cc.Min then
2991
            Cc.Cur := N;
2992
            Cc.Lastloc := Input_Pos;
2993
 
2994
            if Match (Cc.Scan) then
2995
               return True;
2996
            end if;
2997
 
2998
            Cc.Cur := N - 1;
2999
            Cc.Lastloc := Lastloc;
3000
            return False;
3001
         end if;
3002
 
3003
         --  Prefer next over scan for minimal matching
3004
 
3005
         if not Cc.Greedy then
3006
            Current_Curly := Cc.Old_Cc;
3007
 
3008
            if Current_Curly /= null then
3009
               Ln := Current_Curly.Cur;
3010
            end if;
3011
 
3012
            if Recurse_Match (Cc.Next, Cc.Paren_Floor) then
3013
               return True;
3014
            end if;
3015
 
3016
            if Current_Curly /= null then
3017
               Current_Curly.Cur := Ln;
3018
            end if;
3019
 
3020
            Current_Curly := Cc;
3021
 
3022
            --  Maximum greed exceeded ?
3023
 
3024
            if N >= Cc.Max then
3025
               return False;
3026
            end if;
3027
 
3028
            --  Try scanning more and see if it helps
3029
            Cc.Cur := N;
3030
            Cc.Lastloc := Input_Pos;
3031
 
3032
            if Recurse_Match (Cc.Scan, Cc.Paren_Floor) then
3033
               return True;
3034
            end if;
3035
 
3036
            Cc.Cur := N - 1;
3037
            Cc.Lastloc := Lastloc;
3038
            return False;
3039
         end if;
3040
 
3041
         --  Prefer scan over next for maximal matching
3042
 
3043
         if N < Cc.Max then   --  more greed allowed ?
3044
            Cc.Cur := N;
3045
            Cc.Lastloc := Input_Pos;
3046
 
3047
            if Recurse_Match (Cc.Scan, Cc.Paren_Floor) then
3048
               return True;
3049
            end if;
3050
         end if;
3051
 
3052
         --  Failed deeper matches of scan, so see if this one works
3053
 
3054
         Current_Curly := Cc.Old_Cc;
3055
 
3056
         if Current_Curly /= null then
3057
            Ln := Current_Curly.Cur;
3058
         end if;
3059
 
3060
         if Match (Cc.Next) then
3061
            return True;
3062
         end if;
3063
 
3064
         if Current_Curly /= null then
3065
            Current_Curly.Cur := Ln;
3066
         end if;
3067
 
3068
         Current_Curly := Cc;
3069
         Cc.Cur := N - 1;
3070
         Cc.Lastloc := Lastloc;
3071
         return False;
3072
      end Match_Whilem;
3073
 
3074
      ------------
3075
      -- Repeat --
3076
      ------------
3077
 
3078
      function Repeat
3079
        (IP  : Pointer;
3080
         Max : Natural := Natural'Last) return Natural
3081
      is
3082
         Scan  : Natural := Input_Pos;
3083
         Last  : Natural;
3084
         Op    : constant Opcode := Opcode'Val (Character'Pos (Program (IP)));
3085
         Count : Natural;
3086
         C     : Character;
3087
         Is_First : Boolean := True;
3088
         Bitmap   : Character_Class;
3089
 
3090
      begin
3091
         if Max = Natural'Last or else Scan + Max - 1 > Last_In_Data then
3092
            Last := Last_In_Data;
3093
         else
3094
            Last := Scan + Max - 1;
3095
         end if;
3096
 
3097
         case Op is
3098
            when ANY =>
3099
               while Scan <= Last
3100
                 and then Data (Scan) /= ASCII.LF
3101
               loop
3102
                  Scan := Scan + 1;
3103
               end loop;
3104
 
3105
            when SANY =>
3106
               Scan := Last + 1;
3107
 
3108
            when EXACT =>
3109
 
3110
               --  The string has only one character if Repeat was called
3111
 
3112
               C := Program (String_Operand (IP));
3113
               while Scan <= Last
3114
                 and then C = Data (Scan)
3115
               loop
3116
                  Scan := Scan + 1;
3117
               end loop;
3118
 
3119
            when EXACTF =>
3120
 
3121
               --  The string has only one character if Repeat was called
3122
 
3123
               C := Program (String_Operand (IP));
3124
               while Scan <= Last
3125
                 and then To_Lower (C) = Data (Scan)
3126
               loop
3127
                  Scan := Scan + 1;
3128
               end loop;
3129
 
3130
            when ANYOF =>
3131
               if Is_First then
3132
                  Bitmap_Operand (Program, IP, Bitmap);
3133
                  Is_First := False;
3134
               end if;
3135
 
3136
               while Scan <= Last
3137
                 and then Get_From_Class (Bitmap, Data (Scan))
3138
               loop
3139
                  Scan := Scan + 1;
3140
               end loop;
3141
 
3142
            when ALNUM =>
3143
               while Scan <= Last
3144
                 and then Is_Alnum (Data (Scan))
3145
               loop
3146
                  Scan := Scan + 1;
3147
               end loop;
3148
 
3149
            when NALNUM =>
3150
               while Scan <= Last
3151
                 and then not Is_Alnum (Data (Scan))
3152
               loop
3153
                  Scan := Scan + 1;
3154
               end loop;
3155
 
3156
            when SPACE =>
3157
               while Scan <= Last
3158
                 and then Is_White_Space (Data (Scan))
3159
               loop
3160
                  Scan := Scan + 1;
3161
               end loop;
3162
 
3163
            when NSPACE =>
3164
               while Scan <= Last
3165
                 and then not Is_White_Space (Data (Scan))
3166
               loop
3167
                  Scan := Scan + 1;
3168
               end loop;
3169
 
3170
            when DIGIT  =>
3171
               while Scan <= Last
3172
                 and then Is_Digit (Data (Scan))
3173
               loop
3174
                  Scan := Scan + 1;
3175
               end loop;
3176
 
3177
            when NDIGIT  =>
3178
               while Scan <= Last
3179
                 and then not Is_Digit (Data (Scan))
3180
               loop
3181
                  Scan := Scan + 1;
3182
               end loop;
3183
 
3184
            when others =>
3185
               raise Program_Error;
3186
         end case;
3187
 
3188
         Count := Scan - Input_Pos;
3189
         Input_Pos := Scan;
3190
         return Count;
3191
      end Repeat;
3192
 
3193
      ---------
3194
      -- Try --
3195
      ---------
3196
 
3197
      function Try (Pos : Positive) return Boolean is
3198
      begin
3199
         Input_Pos  := Pos;
3200
         Last_Paren := 0;
3201
         Matches_Full := (others => No_Match);
3202
 
3203
         if Match (Program_First + 1) then
3204
            Matches_Full (0) := (Pos, Input_Pos - 1);
3205
            return True;
3206
         end if;
3207
 
3208
         return False;
3209
      end Try;
3210
 
3211
   --  Start of processing for Match
3212
 
3213
   begin
3214
      --  Do we have the regexp Never_Match?
3215
 
3216
      if Self.Size = 0 then
3217
         Matches := (others => No_Match);
3218
         return;
3219
      end if;
3220
 
3221
      --  Check validity of program
3222
 
3223
      pragma Assert
3224
        (Program (Program_First) = MAGIC,
3225
         "Corrupted Pattern_Matcher");
3226
 
3227
      --  If there is a "must appear" string, look for it
3228
 
3229
      if Self.Must_Have_Length > 0 then
3230
         declare
3231
            First      : constant Character := Program (Self.Must_Have);
3232
            Must_First : constant Pointer := Self.Must_Have;
3233
            Must_Last  : constant Pointer :=
3234
                           Must_First + Pointer (Self.Must_Have_Length - 1);
3235
            Next_Try   : Natural := Index (First_In_Data, First);
3236
 
3237
         begin
3238
            while Next_Try /= 0
3239
              and then Data (Next_Try .. Next_Try + Self.Must_Have_Length - 1)
3240
                          = String (Program (Must_First .. Must_Last))
3241
            loop
3242
               Next_Try := Index (Next_Try + 1, First);
3243
            end loop;
3244
 
3245
            if Next_Try = 0 then
3246
               Matches := (others => No_Match);
3247
               return;                  -- Not present
3248
            end if;
3249
         end;
3250
      end if;
3251
 
3252
      --  Mark beginning of line for ^
3253
 
3254
      BOL_Pos := Data'First;
3255
 
3256
      --  Simplest case first: an anchored match need be tried only once
3257
 
3258
      if Self.Anchored and then (Self.Flags and Multiple_Lines) = 0 then
3259
         Matched := Try (First_In_Data);
3260
 
3261
      elsif Self.Anchored then
3262
         declare
3263
            Next_Try : Natural := First_In_Data;
3264
         begin
3265
            --  Test the first position in the buffer
3266
            Matched := Try (Next_Try);
3267
 
3268
            --  Else only test after newlines
3269
 
3270
            if not Matched then
3271
               while Next_Try <= Last_In_Data loop
3272
                  while Next_Try <= Last_In_Data
3273
                    and then Data (Next_Try) /= ASCII.LF
3274
                  loop
3275
                     Next_Try := Next_Try + 1;
3276
                  end loop;
3277
 
3278
                  Next_Try := Next_Try + 1;
3279
 
3280
                  if Next_Try <= Last_In_Data then
3281
                     Matched := Try (Next_Try);
3282
                     exit when Matched;
3283
                  end if;
3284
               end loop;
3285
            end if;
3286
         end;
3287
 
3288
      elsif Self.First /= ASCII.NUL then
3289
         --  We know what char it must start with
3290
 
3291
         declare
3292
            Next_Try : Natural := Index (First_In_Data, Self.First);
3293
 
3294
         begin
3295
            while Next_Try /= 0 loop
3296
               Matched := Try (Next_Try);
3297
               exit when Matched;
3298
               Next_Try := Index (Next_Try + 1, Self.First);
3299
            end loop;
3300
         end;
3301
 
3302
      else
3303
         --  Messy cases: try all locations (including for the empty string)
3304
 
3305
         Matched := Try (First_In_Data);
3306
 
3307
         if not Matched then
3308
            for S in First_In_Data + 1 .. Last_In_Data loop
3309
               Matched := Try (S);
3310
               exit when Matched;
3311
            end loop;
3312
         end if;
3313
      end if;
3314
 
3315
      --  Matched has its value
3316
 
3317
      for J in Last_Paren + 1 .. Matches'Last loop
3318
         Matches_Full (J) := No_Match;
3319
      end loop;
3320
 
3321
      Matches := Matches_Full (Matches'Range);
3322
   end Match;
3323
 
3324
   -----------
3325
   -- Match --
3326
   -----------
3327
 
3328
   function Match
3329
     (Self       : Pattern_Matcher;
3330
      Data       : String;
3331
      Data_First : Integer := -1;
3332
      Data_Last  : Positive := Positive'Last) return Natural
3333
   is
3334
      Matches : Match_Array (0 .. 0);
3335
 
3336
   begin
3337
      Match (Self, Data, Matches, Data_First, Data_Last);
3338
      if Matches (0) = No_Match then
3339
         return Data'First - 1;
3340
      else
3341
         return Matches (0).First;
3342
      end if;
3343
   end Match;
3344
 
3345
   function Match
3346
     (Self       : Pattern_Matcher;
3347
      Data       : String;
3348
      Data_First : Integer  := -1;
3349
      Data_Last  : Positive := Positive'Last) return Boolean
3350
   is
3351
      Matches : Match_Array (0 .. 0);
3352
 
3353
   begin
3354
      Match (Self, Data, Matches, Data_First, Data_Last);
3355
      return Matches (0).First >= Data'First;
3356
   end Match;
3357
 
3358
   procedure Match
3359
     (Expression : String;
3360
      Data       : String;
3361
      Matches    : out Match_Array;
3362
      Size       : Program_Size := Auto_Size;
3363
      Data_First : Integer      := -1;
3364
      Data_Last  : Positive     := Positive'Last)
3365
   is
3366
      PM            : Pattern_Matcher (Size);
3367
      Finalize_Size : Program_Size;
3368
      pragma Unreferenced (Finalize_Size);
3369
   begin
3370
      if Size = 0 then
3371
         Match (Compile (Expression), Data, Matches, Data_First, Data_Last);
3372
      else
3373
         Compile (PM, Expression, Finalize_Size);
3374
         Match (PM, Data, Matches, Data_First, Data_Last);
3375
      end if;
3376
   end Match;
3377
 
3378
   -----------
3379
   -- Match --
3380
   -----------
3381
 
3382
   function Match
3383
     (Expression : String;
3384
      Data       : String;
3385
      Size       : Program_Size := Auto_Size;
3386
      Data_First : Integer      := -1;
3387
      Data_Last  : Positive     := Positive'Last) return Natural
3388
   is
3389
      PM         : Pattern_Matcher (Size);
3390
      Final_Size : Program_Size;
3391
      pragma Unreferenced (Final_Size);
3392
   begin
3393
      if Size = 0 then
3394
         return Match (Compile (Expression), Data, Data_First, Data_Last);
3395
      else
3396
         Compile (PM, Expression, Final_Size);
3397
         return Match (PM, Data, Data_First, Data_Last);
3398
      end if;
3399
   end Match;
3400
 
3401
   -----------
3402
   -- Match --
3403
   -----------
3404
 
3405
   function  Match
3406
     (Expression : String;
3407
      Data       : String;
3408
      Size       : Program_Size := Auto_Size;
3409
      Data_First : Integer      := -1;
3410
      Data_Last  : Positive     := Positive'Last) return Boolean
3411
   is
3412
      Matches    : Match_Array (0 .. 0);
3413
      PM         : Pattern_Matcher (Size);
3414
      Final_Size : Program_Size;
3415
      pragma Unreferenced (Final_Size);
3416
   begin
3417
      if Size = 0 then
3418
         Match (Compile (Expression), Data, Matches, Data_First, Data_Last);
3419
      else
3420
         Compile (PM, Expression, Final_Size);
3421
         Match (PM, Data, Matches, Data_First, Data_Last);
3422
      end if;
3423
 
3424
      return Matches (0).First >= Data'First;
3425
   end Match;
3426
 
3427
   -------------
3428
   -- Operand --
3429
   -------------
3430
 
3431
   function Operand (P : Pointer) return Pointer is
3432
   begin
3433
      return P + 3;
3434
   end Operand;
3435
 
3436
   --------------
3437
   -- Optimize --
3438
   --------------
3439
 
3440
   procedure Optimize (Self : in out Pattern_Matcher) is
3441
      Scan    : Pointer;
3442
      Program : Program_Data renames Self.Program;
3443
 
3444
   begin
3445
      --  Start with safe defaults (no optimization):
3446
      --    *  No known first character of match
3447
      --    *  Does not necessarily start at beginning of line
3448
      --    *  No string known that has to appear in data
3449
 
3450
      Self.First := ASCII.NUL;
3451
      Self.Anchored := False;
3452
      Self.Must_Have := Program'Last + 1;
3453
      Self.Must_Have_Length := 0;
3454
 
3455
      Scan := Program_First + 1;  --  First instruction (can be anything)
3456
 
3457
      if Program (Scan) = EXACT then
3458
         Self.First := Program (String_Operand (Scan));
3459
 
3460
      elsif Program (Scan) = BOL
3461
        or else Program (Scan) = SBOL
3462
        or else Program (Scan) = MBOL
3463
      then
3464
         Self.Anchored := True;
3465
      end if;
3466
   end Optimize;
3467
 
3468
   -----------------
3469
   -- Paren_Count --
3470
   -----------------
3471
 
3472
   function Paren_Count (Regexp : Pattern_Matcher) return Match_Count is
3473
   begin
3474
      return Regexp.Paren_Count;
3475
   end Paren_Count;
3476
 
3477
   -----------
3478
   -- Quote --
3479
   -----------
3480
 
3481
   function Quote (Str : String) return String is
3482
      S    : String (1 .. Str'Length * 2);
3483
      Last : Natural := 0;
3484
 
3485
   begin
3486
      for J in Str'Range loop
3487
         case Str (J) is
3488
            when '^' | '$' | '|' | '*' | '+' | '?' | '{' |
3489
                 '}' | '[' | ']' | '(' | ')' | '\' | '.' =>
3490
 
3491
               S (Last + 1) := '\';
3492
               S (Last + 2) := Str (J);
3493
               Last := Last + 2;
3494
 
3495
            when others =>
3496
               S (Last + 1) := Str (J);
3497
               Last := Last + 1;
3498
         end case;
3499
      end loop;
3500
 
3501
      return S (1 .. Last);
3502
   end Quote;
3503
 
3504
   ------------------
3505
   -- Read_Natural --
3506
   ------------------
3507
 
3508
   function Read_Natural
3509
     (Program : Program_Data;
3510
      IP      : Pointer) return Natural
3511
   is
3512
   begin
3513
      return Character'Pos (Program (IP)) +
3514
               256 * Character'Pos (Program (IP + 1));
3515
   end Read_Natural;
3516
 
3517
   -----------------
3518
   -- Reset_Class --
3519
   -----------------
3520
 
3521
   procedure Reset_Class (Bitmap : out Character_Class) is
3522
   begin
3523
      Bitmap := (others => 0);
3524
   end Reset_Class;
3525
 
3526
   ------------------
3527
   -- Set_In_Class --
3528
   ------------------
3529
 
3530
   procedure Set_In_Class
3531
     (Bitmap : in out Character_Class;
3532
      C      : Character)
3533
   is
3534
      Value : constant Class_Byte := Character'Pos (C);
3535
   begin
3536
      Bitmap (Value / 8) := Bitmap (Value / 8)
3537
        or Bit_Conversion (Value mod 8);
3538
   end Set_In_Class;
3539
 
3540
   -------------------
3541
   -- String_Length --
3542
   -------------------
3543
 
3544
   function String_Length
3545
     (Program : Program_Data;
3546
      P       : Pointer) return Program_Size
3547
   is
3548
   begin
3549
      pragma Assert (Program (P) = EXACT or else Program (P) = EXACTF);
3550
      return Character'Pos (Program (P + 3));
3551
   end String_Length;
3552
 
3553
   --------------------
3554
   -- String_Operand --
3555
   --------------------
3556
 
3557
   function String_Operand (P : Pointer) return Pointer is
3558
   begin
3559
      return P + 4;
3560
   end String_Operand;
3561
 
3562
end System.Regpat;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.