OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [trunk/] [insight/] [gdb/] [gnu-regex.c] - Blame information for rev 1774

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 578 markom
/* *INDENT-OFF* */ /* keep in sync with glibc */
2
/* Extended regular expression matching and search library,
3
   version 0.12.
4
   (Implements POSIX draft P1003.2/D11.2, except for some of the
5
   internationalization features.)
6
   Copyright 1993, 1994, 1995, 1996, 1998, 1999, 2000
7
   Free Software Foundation, Inc.
8
 
9
   NOTE: The canonical source of this file is maintained with the
10
   GNU C Library.  Bugs can be reported to bug-glibc@gnu.org.
11
 
12
   This program is free software; you can redistribute it and/or modify it
13
   under the terms of the GNU General Public License as published by the
14
   Free Software Foundation; either version 2, or (at your option) any
15
   later version.
16
 
17
   This program is distributed in the hope that it will be useful,
18
   but WITHOUT ANY WARRANTY; without even the implied warranty of
19
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
   GNU General Public License for more details.
21
 
22
   You should have received a copy of the GNU General Public License
23
   along with this program; if not, write to the Free Software Foundation,
24
   Inc., 59 Temple Place - Suite 330,
25
   Boston, MA 02111-1307, USA.  */
26
 
27
/* AIX requires this to be the first thing in the file. */
28
#if defined _AIX && !defined REGEX_MALLOC
29
  #pragma alloca
30
#endif
31
 
32
#undef  _GNU_SOURCE
33
#define _GNU_SOURCE
34
 
35
#ifdef HAVE_CONFIG_H
36
# include <config.h>
37
#endif
38
 
39
#ifndef PARAMS
40
# if defined __GNUC__ || (defined __STDC__ && __STDC__)
41
#  define PARAMS(args) args
42
# else
43
#  define PARAMS(args) ()
44
# endif  /* GCC.  */
45
#endif  /* Not PARAMS.  */
46
 
47
#if defined STDC_HEADERS && !defined emacs
48
# include <stddef.h>
49
#else
50
/* We need this for `gnu-regex.h', and perhaps for the Emacs include files.  */
51
# include <sys/types.h>
52
#endif
53
 
54
/* For platform which support the ISO C amendement 1 functionality we
55
   support user defined character classes.  */
56
#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
57
 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
58
# include <wchar.h>
59
# include <wctype.h>
60
#endif
61
 
62
/* This is for other GNU distributions with internationalized messages.  */
63
/* CYGNUS LOCAL: ../intl will handle this for us */
64
#ifdef ENABLE_NLS
65
# include <libintl.h>
66
#else
67
# define gettext(msgid) (msgid)
68
#endif
69
 
70
#ifndef gettext_noop
71
/* This define is so xgettext can find the internationalizable
72
   strings.  */
73
# define gettext_noop(String) String
74
#endif
75
 
76
/* The `emacs' switch turns on certain matching commands
77
   that make sense only in Emacs. */
78
#ifdef emacs
79
 
80
# include "lisp.h"
81
# include "buffer.h"
82
# include "syntax.h"
83
 
84
#else  /* not emacs */
85
 
86
/* If we are not linking with Emacs proper,
87
   we can't use the relocating allocator
88
   even if config.h says that we can.  */
89
# undef REL_ALLOC
90
 
91
# if defined STDC_HEADERS || defined _LIBC
92
#  include <stdlib.h>
93
# else
94
char *malloc ();
95
char *realloc ();
96
# endif
97
 
98
/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
99
   If nothing else has been done, use the method below.  */
100
# ifdef INHIBIT_STRING_HEADER
101
#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
102
#   if !defined bzero && !defined bcopy
103
#    undef INHIBIT_STRING_HEADER
104
#   endif
105
#  endif
106
# endif
107
 
108
/* This is the normal way of making sure we have a bcopy and a bzero.
109
   This is used in most programs--a few other programs avoid this
110
   by defining INHIBIT_STRING_HEADER.  */
111
# ifndef INHIBIT_STRING_HEADER
112
#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
113
#   include <string.h>
114
#   ifndef bzero
115
#    ifndef _LIBC
116
#     define bzero(s, n)        (memset (s, '\0', n), (s))
117
#    else
118
#     define bzero(s, n)        __bzero (s, n)
119
#    endif
120
#   endif
121
#  else
122
#   include <strings.h>
123
#   ifndef memcmp
124
#    define memcmp(s1, s2, n)   bcmp (s1, s2, n)
125
#   endif
126
#   ifndef memcpy
127
#    define memcpy(d, s, n)     (bcopy (s, d, n), (d))
128
#   endif
129
#  endif
130
# endif
131
 
132
/* Define the syntax stuff for \<, \>, etc.  */
133
 
134
/* This must be nonzero for the wordchar and notwordchar pattern
135
   commands in re_match_2.  */
136
# ifndef Sword
137
#  define Sword 1
138
# endif
139
 
140
# ifdef SWITCH_ENUM_BUG
141
#  define SWITCH_ENUM_CAST(x) ((int)(x))
142
# else
143
#  define SWITCH_ENUM_CAST(x) (x)
144
# endif
145
 
146
/* How many characters in the character set.  */
147
# define CHAR_SET_SIZE 256
148
 
149
/* GDB LOCAL: define _REGEX_RE_COMP to get BSD style re_comp and re_exec */
150
#ifndef _REGEX_RE_COMP
151
#define _REGEX_RE_COMP
152
#endif
153
 
154
# ifdef SYNTAX_TABLE
155
 
156
extern char *re_syntax_table;
157
 
158
# else /* not SYNTAX_TABLE */
159
 
160
static char re_syntax_table[CHAR_SET_SIZE];
161
 
162
static void
163
init_syntax_once ()
164
{
165
   register int c;
166
   static int done = 0;
167
 
168
   if (done)
169
     return;
170
 
171
   bzero (re_syntax_table, sizeof re_syntax_table);
172
 
173
   for (c = 'a'; c <= 'z'; c++)
174
     re_syntax_table[c] = Sword;
175
 
176
   for (c = 'A'; c <= 'Z'; c++)
177
     re_syntax_table[c] = Sword;
178
 
179
   for (c = '0'; c <= '9'; c++)
180
     re_syntax_table[c] = Sword;
181
 
182
   re_syntax_table['_'] = Sword;
183
 
184
   done = 1;
185
}
186
 
187
# endif /* not SYNTAX_TABLE */
188
 
189
# define SYNTAX(c) re_syntax_table[c]
190
 
191
#endif /* not emacs */
192
 
193
/* Get the interface, including the syntax bits.  */
194
/* CYGNUS LOCAL: call it gnu-regex.h, not regex.h, to avoid name conflicts */
195
#include "gnu-regex.h"
196
 
197
/* isalpha etc. are used for the character classes.  */
198
#include <ctype.h>
199
 
200
/* Jim Meyering writes:
201
 
202
   "... Some ctype macros are valid only for character codes that
203
   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
204
   using /bin/cc or gcc but without giving an ansi option).  So, all
205
   ctype uses should be through macros like ISPRINT...  If
206
   STDC_HEADERS is defined, then autoconf has verified that the ctype
207
   macros don't need to be guarded with references to isascii. ...
208
   Defining isascii to 1 should let any compiler worth its salt
209
   eliminate the && through constant folding."
210
   Solaris defines some of these symbols so we must undefine them first.  */
211
 
212
#undef ISASCII
213
#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
214
# define ISASCII(c) 1
215
#else
216
# define ISASCII(c) isascii(c)
217
#endif
218
 
219
#ifdef isblank
220
# define ISBLANK(c) (ISASCII (c) && isblank (c))
221
#else
222
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
223
#endif
224
#ifdef isgraph
225
# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
226
#else
227
# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
228
#endif
229
 
230
#undef ISPRINT
231
#define ISPRINT(c) (ISASCII (c) && isprint (c))
232
#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
233
#define ISALNUM(c) (ISASCII (c) && isalnum (c))
234
#define ISALPHA(c) (ISASCII (c) && isalpha (c))
235
#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
236
#define ISLOWER(c) (ISASCII (c) && islower (c))
237
#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
238
#define ISSPACE(c) (ISASCII (c) && isspace (c))
239
#define ISUPPER(c) (ISASCII (c) && isupper (c))
240
#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
241
 
242
#ifndef NULL
243
# define NULL (void *)0
244
#endif
245
 
246
/* We remove any previous definition of `SIGN_EXTEND_CHAR',
247
   since ours (we hope) works properly with all combinations of
248
   machines, compilers, `char' and `unsigned char' argument types.
249
   (Per Bothner suggested the basic approach.)  */
250
#undef SIGN_EXTEND_CHAR
251
#if __STDC__
252
# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
253
#else  /* not __STDC__ */
254
/* As in Harbison and Steele.  */
255
# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
256
#endif
257
 
258
/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
259
   use `alloca' instead of `malloc'.  This is because using malloc in
260
   re_search* or re_match* could cause memory leaks when C-g is used in
261
   Emacs; also, malloc is slower and causes storage fragmentation.  On
262
   the other hand, malloc is more portable, and easier to debug.
263
 
264
   Because we sometimes use alloca, some routines have to be macros,
265
   not functions -- `alloca'-allocated space disappears at the end of the
266
   function it is called in.  */
267
 
268
#ifdef REGEX_MALLOC
269
 
270
# define REGEX_ALLOCATE malloc
271
# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
272
# define REGEX_FREE free
273
 
274
#else /* not REGEX_MALLOC  */
275
 
276
/* Emacs already defines alloca, sometimes.  */
277
# ifndef alloca
278
 
279
/* Make alloca work the best possible way.  */
280
#  ifdef __GNUC__
281
#   define alloca __builtin_alloca
282
#  else /* not __GNUC__ */
283
#   if HAVE_ALLOCA_H
284
#    include <alloca.h>
285
#   endif /* HAVE_ALLOCA_H */
286
#  endif /* not __GNUC__ */
287
 
288
# endif /* not alloca */
289
 
290
# define REGEX_ALLOCATE alloca
291
 
292
/* Assumes a `char *destination' variable.  */
293
# define REGEX_REALLOCATE(source, osize, nsize)                         \
294
  (destination = (char *) alloca (nsize),                               \
295
   memcpy (destination, source, osize))
296
 
297
/* No need to do anything to free, after alloca.  */
298
# define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
299
 
300
#endif /* not REGEX_MALLOC */
301
 
302
/* Define how to allocate the failure stack.  */
303
 
304
#if defined REL_ALLOC && defined REGEX_MALLOC
305
 
306
# define REGEX_ALLOCATE_STACK(size)                             \
307
  r_alloc (&failure_stack_ptr, (size))
308
# define REGEX_REALLOCATE_STACK(source, osize, nsize)           \
309
  r_re_alloc (&failure_stack_ptr, (nsize))
310
# define REGEX_FREE_STACK(ptr)                                  \
311
  r_alloc_free (&failure_stack_ptr)
312
 
313
#else /* not using relocating allocator */
314
 
315
# ifdef REGEX_MALLOC
316
 
317
#  define REGEX_ALLOCATE_STACK malloc
318
#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
319
#  define REGEX_FREE_STACK free
320
 
321
# else /* not REGEX_MALLOC */
322
 
323
#  define REGEX_ALLOCATE_STACK alloca
324
 
325
#  define REGEX_REALLOCATE_STACK(source, osize, nsize)                  \
326
   REGEX_REALLOCATE (source, osize, nsize)
327
/* No need to explicitly free anything.  */
328
#  define REGEX_FREE_STACK(arg)
329
 
330
# endif /* not REGEX_MALLOC */
331
#endif /* not using relocating allocator */
332
 
333
 
334
/* True if `size1' is non-NULL and PTR is pointing anywhere inside
335
   `string1' or just past its end.  This works if PTR is NULL, which is
336
   a good thing.  */
337
#define FIRST_STRING_P(ptr)                                     \
338
  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
339
 
340
/* (Re)Allocate N items of type T using malloc, or fail.  */
341
#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
342
#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
343
#define RETALLOC_IF(addr, n, t) \
344
  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
345
#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
346
 
347
#define BYTEWIDTH 8 /* In bits.  */
348
 
349
#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
350
 
351
#undef MAX
352
#undef MIN
353
#define MAX(a, b) ((a) > (b) ? (a) : (b))
354
#define MIN(a, b) ((a) < (b) ? (a) : (b))
355
 
356
typedef char boolean;
357
#define false 0
358
#define true 1
359
 
360
static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
361
                                        const char *string1, int size1,
362
                                        const char *string2, int size2,
363
                                        int pos,
364
                                        struct re_registers *regs,
365
                                        int stop));
366
 
367
/* These are the command codes that appear in compiled regular
368
   expressions.  Some opcodes are followed by argument bytes.  A
369
   command code can specify any interpretation whatsoever for its
370
   arguments.  Zero bytes may appear in the compiled regular expression.  */
371
 
372
typedef enum
373
{
374
  no_op = 0,
375
 
376
  /* Succeed right away--no more backtracking.  */
377
  succeed,
378
 
379
        /* Followed by one byte giving n, then by n literal bytes.  */
380
  exactn,
381
 
382
        /* Matches any (more or less) character.  */
383
  anychar,
384
 
385
        /* Matches any one char belonging to specified set.  First
386
           following byte is number of bitmap bytes.  Then come bytes
387
           for a bitmap saying which chars are in.  Bits in each byte
388
           are ordered low-bit-first.  A character is in the set if its
389
           bit is 1.  A character too large to have a bit in the map is
390
           automatically not in the set.  */
391
  charset,
392
 
393
        /* Same parameters as charset, but match any character that is
394
           not one of those specified.  */
395
  charset_not,
396
 
397
        /* Start remembering the text that is matched, for storing in a
398
           register.  Followed by one byte with the register number, in
399
           the range 0 to one less than the pattern buffer's re_nsub
400
           field.  Then followed by one byte with the number of groups
401
           inner to this one.  (This last has to be part of the
402
           start_memory only because we need it in the on_failure_jump
403
           of re_match_2.)  */
404
  start_memory,
405
 
406
        /* Stop remembering the text that is matched and store it in a
407
           memory register.  Followed by one byte with the register
408
           number, in the range 0 to one less than `re_nsub' in the
409
           pattern buffer, and one byte with the number of inner groups,
410
           just like `start_memory'.  (We need the number of inner
411
           groups here because we don't have any easy way of finding the
412
           corresponding start_memory when we're at a stop_memory.)  */
413
  stop_memory,
414
 
415
        /* Match a duplicate of something remembered. Followed by one
416
           byte containing the register number.  */
417
  duplicate,
418
 
419
        /* Fail unless at beginning of line.  */
420
  begline,
421
 
422
        /* Fail unless at end of line.  */
423
  endline,
424
 
425
        /* Succeeds if at beginning of buffer (if emacs) or at beginning
426
           of string to be matched (if not).  */
427
  begbuf,
428
 
429
        /* Analogously, for end of buffer/string.  */
430
  endbuf,
431
 
432
        /* Followed by two byte relative address to which to jump.  */
433
  jump,
434
 
435
        /* Same as jump, but marks the end of an alternative.  */
436
  jump_past_alt,
437
 
438
        /* Followed by two-byte relative address of place to resume at
439
           in case of failure.  */
440
  on_failure_jump,
441
 
442
        /* Like on_failure_jump, but pushes a placeholder instead of the
443
           current string position when executed.  */
444
  on_failure_keep_string_jump,
445
 
446
        /* Throw away latest failure point and then jump to following
447
           two-byte relative address.  */
448
  pop_failure_jump,
449
 
450
        /* Change to pop_failure_jump if know won't have to backtrack to
451
           match; otherwise change to jump.  This is used to jump
452
           back to the beginning of a repeat.  If what follows this jump
453
           clearly won't match what the repeat does, such that we can be
454
           sure that there is no use backtracking out of repetitions
455
           already matched, then we change it to a pop_failure_jump.
456
           Followed by two-byte address.  */
457
  maybe_pop_jump,
458
 
459
        /* Jump to following two-byte address, and push a dummy failure
460
           point. This failure point will be thrown away if an attempt
461
           is made to use it for a failure.  A `+' construct makes this
462
           before the first repeat.  Also used as an intermediary kind
463
           of jump when compiling an alternative.  */
464
  dummy_failure_jump,
465
 
466
        /* Push a dummy failure point and continue.  Used at the end of
467
           alternatives.  */
468
  push_dummy_failure,
469
 
470
        /* Followed by two-byte relative address and two-byte number n.
471
           After matching N times, jump to the address upon failure.  */
472
  succeed_n,
473
 
474
        /* Followed by two-byte relative address, and two-byte number n.
475
           Jump to the address N times, then fail.  */
476
  jump_n,
477
 
478
        /* Set the following two-byte relative address to the
479
           subsequent two-byte number.  The address *includes* the two
480
           bytes of number.  */
481
  set_number_at,
482
 
483
  wordchar,     /* Matches any word-constituent character.  */
484
  notwordchar,  /* Matches any char that is not a word-constituent.  */
485
 
486
  wordbeg,      /* Succeeds if at word beginning.  */
487
  wordend,      /* Succeeds if at word end.  */
488
 
489
  wordbound,    /* Succeeds if at a word boundary.  */
490
  notwordbound  /* Succeeds if not at a word boundary.  */
491
 
492
#ifdef emacs
493
  ,before_dot,  /* Succeeds if before point.  */
494
  at_dot,       /* Succeeds if at point.  */
495
  after_dot,    /* Succeeds if after point.  */
496
 
497
        /* Matches any character whose syntax is specified.  Followed by
498
           a byte which contains a syntax code, e.g., Sword.  */
499
  syntaxspec,
500
 
501
        /* Matches any character whose syntax is not that specified.  */
502
  notsyntaxspec
503
#endif /* emacs */
504
} re_opcode_t;
505
 
506
/* Common operations on the compiled pattern.  */
507
 
508
/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
509
 
510
#define STORE_NUMBER(destination, number)                               \
511
  do {                                                                  \
512
    (destination)[0] = (number) & 0377;                                  \
513
    (destination)[1] = (number) >> 8;                                   \
514
  } while (0)
515
 
516
/* Same as STORE_NUMBER, except increment DESTINATION to
517
   the byte after where the number is stored.  Therefore, DESTINATION
518
   must be an lvalue.  */
519
 
520
#define STORE_NUMBER_AND_INCR(destination, number)                      \
521
  do {                                                                  \
522
    STORE_NUMBER (destination, number);                                 \
523
    (destination) += 2;                                                 \
524
  } while (0)
525
 
526
/* Put into DESTINATION a number stored in two contiguous bytes starting
527
   at SOURCE.  */
528
 
529
#define EXTRACT_NUMBER(destination, source)                             \
530
  do {                                                                  \
531
    (destination) = *(source) & 0377;                                   \
532
    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
533
  } while (0)
534
 
535
#ifdef DEBUG
536
static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
537
static void
538
extract_number (dest, source)
539
    int *dest;
540
    unsigned char *source;
541
{
542
  int temp = SIGN_EXTEND_CHAR (*(source + 1));
543
  *dest = *source & 0377;
544
  *dest += temp << 8;
545
}
546
 
547
# ifndef EXTRACT_MACROS /* To debug the macros.  */
548
#  undef EXTRACT_NUMBER
549
#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
550
# endif /* not EXTRACT_MACROS */
551
 
552
#endif /* DEBUG */
553
 
554
/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
555
   SOURCE must be an lvalue.  */
556
 
557
#define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
558
  do {                                                                  \
559
    EXTRACT_NUMBER (destination, source);                               \
560
    (source) += 2;                                                      \
561
  } while (0)
562
 
563
#ifdef DEBUG
564
static void extract_number_and_incr _RE_ARGS ((int *destination,
565
                                               unsigned char **source));
566
static void
567
extract_number_and_incr (destination, source)
568
    int *destination;
569
    unsigned char **source;
570
{
571
  extract_number (destination, *source);
572
  *source += 2;
573
}
574
 
575
# ifndef EXTRACT_MACROS
576
#  undef EXTRACT_NUMBER_AND_INCR
577
#  define EXTRACT_NUMBER_AND_INCR(dest, src) \
578
  extract_number_and_incr (&dest, &src)
579
# endif /* not EXTRACT_MACROS */
580
 
581
#endif /* DEBUG */
582
 
583
/* If DEBUG is defined, Regex prints many voluminous messages about what
584
   it is doing (if the variable `debug' is nonzero).  If linked with the
585
   main program in `iregex.c', you can enter patterns and strings
586
   interactively.  And if linked with the main program in `main.c' and
587
   the other test files, you can run the already-written tests.  */
588
 
589
#ifdef DEBUG
590
 
591
/* We use standard I/O for debugging.  */
592
# include <stdio.h>
593
 
594
/* It is useful to test things that ``must'' be true when debugging.  */
595
# include <assert.h>
596
 
597
static int debug = 0;
598
 
599
# define DEBUG_STATEMENT(e) e
600
# define DEBUG_PRINT1(x) if (debug) printf (x)
601
# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
602
# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
603
# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
604
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                          \
605
  if (debug) print_partial_compiled_pattern (s, e)
606
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                 \
607
  if (debug) print_double_string (w, s1, sz1, s2, sz2)
608
 
609
 
610
/* Print the fastmap in human-readable form.  */
611
 
612
void
613
print_fastmap (fastmap)
614
    char *fastmap;
615
{
616
  unsigned was_a_range = 0;
617
  unsigned i = 0;
618
 
619
  while (i < (1 << BYTEWIDTH))
620
    {
621
      if (fastmap[i++])
622
        {
623
          was_a_range = 0;
624
          putchar (i - 1);
625
          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
626
            {
627
              was_a_range = 1;
628
              i++;
629
            }
630
          if (was_a_range)
631
            {
632
              printf ("-");
633
              putchar (i - 1);
634
            }
635
        }
636
    }
637
  putchar ('\n');
638
}
639
 
640
 
641
/* Print a compiled pattern string in human-readable form, starting at
642
   the START pointer into it and ending just before the pointer END.  */
643
 
644
void
645
print_partial_compiled_pattern (start, end)
646
    unsigned char *start;
647
    unsigned char *end;
648
{
649
  int mcnt, mcnt2;
650
  unsigned char *p1;
651
  unsigned char *p = start;
652
  unsigned char *pend = end;
653
 
654
  if (start == NULL)
655
    {
656
      printf ("(null)\n");
657
      return;
658
    }
659
 
660
  /* Loop over pattern commands.  */
661
  while (p < pend)
662
    {
663
      printf ("%d:\t", p - start);
664
 
665
      switch ((re_opcode_t) *p++)
666
        {
667
        case no_op:
668
          printf ("/no_op");
669
          break;
670
 
671
        case exactn:
672
          mcnt = *p++;
673
          printf ("/exactn/%d", mcnt);
674
          do
675
            {
676
              putchar ('/');
677
              putchar (*p++);
678
            }
679
          while (--mcnt);
680
          break;
681
 
682
        case start_memory:
683
          mcnt = *p++;
684
          printf ("/start_memory/%d/%d", mcnt, *p++);
685
          break;
686
 
687
        case stop_memory:
688
          mcnt = *p++;
689
          printf ("/stop_memory/%d/%d", mcnt, *p++);
690
          break;
691
 
692
        case duplicate:
693
          printf ("/duplicate/%d", *p++);
694
          break;
695
 
696
        case anychar:
697
          printf ("/anychar");
698
          break;
699
 
700
        case charset:
701
        case charset_not:
702
          {
703
            register int c, last = -100;
704
            register int in_range = 0;
705
 
706
            printf ("/charset [%s",
707
                    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
708
 
709
            assert (p + *p < pend);
710
 
711
            for (c = 0; c < 256; c++)
712
              if (c / 8 < *p
713
                  && (p[1 + (c/8)] & (1 << (c % 8))))
714
                {
715
                  /* Are we starting a range?  */
716
                  if (last + 1 == c && ! in_range)
717
                    {
718
                      putchar ('-');
719
                      in_range = 1;
720
                    }
721
                  /* Have we broken a range?  */
722
                  else if (last + 1 != c && in_range)
723
              {
724
                      putchar (last);
725
                      in_range = 0;
726
                    }
727
 
728
                  if (! in_range)
729
                    putchar (c);
730
 
731
                  last = c;
732
              }
733
 
734
            if (in_range)
735
              putchar (last);
736
 
737
            putchar (']');
738
 
739
            p += 1 + *p;
740
          }
741
          break;
742
 
743
        case begline:
744
          printf ("/begline");
745
          break;
746
 
747
        case endline:
748
          printf ("/endline");
749
          break;
750
 
751
        case on_failure_jump:
752
          extract_number_and_incr (&mcnt, &p);
753
          printf ("/on_failure_jump to %d", p + mcnt - start);
754
          break;
755
 
756
        case on_failure_keep_string_jump:
757
          extract_number_and_incr (&mcnt, &p);
758
          printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
759
          break;
760
 
761
        case dummy_failure_jump:
762
          extract_number_and_incr (&mcnt, &p);
763
          printf ("/dummy_failure_jump to %d", p + mcnt - start);
764
          break;
765
 
766
        case push_dummy_failure:
767
          printf ("/push_dummy_failure");
768
          break;
769
 
770
        case maybe_pop_jump:
771
          extract_number_and_incr (&mcnt, &p);
772
          printf ("/maybe_pop_jump to %d", p + mcnt - start);
773
          break;
774
 
775
        case pop_failure_jump:
776
          extract_number_and_incr (&mcnt, &p);
777
          printf ("/pop_failure_jump to %d", p + mcnt - start);
778
          break;
779
 
780
        case jump_past_alt:
781
          extract_number_and_incr (&mcnt, &p);
782
          printf ("/jump_past_alt to %d", p + mcnt - start);
783
          break;
784
 
785
        case jump:
786
          extract_number_and_incr (&mcnt, &p);
787
          printf ("/jump to %d", p + mcnt - start);
788
          break;
789
 
790
        case succeed_n:
791
          extract_number_and_incr (&mcnt, &p);
792
          p1 = p + mcnt;
793
          extract_number_and_incr (&mcnt2, &p);
794
          printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
795
          break;
796
 
797
        case jump_n:
798
          extract_number_and_incr (&mcnt, &p);
799
          p1 = p + mcnt;
800
          extract_number_and_incr (&mcnt2, &p);
801
          printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
802
          break;
803
 
804
        case set_number_at:
805
          extract_number_and_incr (&mcnt, &p);
806
          p1 = p + mcnt;
807
          extract_number_and_incr (&mcnt2, &p);
808
          printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
809
          break;
810
 
811
        case wordbound:
812
          printf ("/wordbound");
813
          break;
814
 
815
        case notwordbound:
816
          printf ("/notwordbound");
817
          break;
818
 
819
        case wordbeg:
820
          printf ("/wordbeg");
821
          break;
822
 
823
        case wordend:
824
          printf ("/wordend");
825
 
826
# ifdef emacs
827
        case before_dot:
828
          printf ("/before_dot");
829
          break;
830
 
831
        case at_dot:
832
          printf ("/at_dot");
833
          break;
834
 
835
        case after_dot:
836
          printf ("/after_dot");
837
          break;
838
 
839
        case syntaxspec:
840
          printf ("/syntaxspec");
841
          mcnt = *p++;
842
          printf ("/%d", mcnt);
843
          break;
844
 
845
        case notsyntaxspec:
846
          printf ("/notsyntaxspec");
847
          mcnt = *p++;
848
          printf ("/%d", mcnt);
849
          break;
850
# endif /* emacs */
851
 
852
        case wordchar:
853
          printf ("/wordchar");
854
          break;
855
 
856
        case notwordchar:
857
          printf ("/notwordchar");
858
          break;
859
 
860
        case begbuf:
861
          printf ("/begbuf");
862
          break;
863
 
864
        case endbuf:
865
          printf ("/endbuf");
866
          break;
867
 
868
        default:
869
          printf ("?%d", *(p-1));
870
        }
871
 
872
      putchar ('\n');
873
    }
874
 
875
  printf ("%d:\tend of pattern.\n", p - start);
876
}
877
 
878
 
879
void
880
print_compiled_pattern (bufp)
881
    struct re_pattern_buffer *bufp;
882
{
883
  unsigned char *buffer = bufp->buffer;
884
 
885
  print_partial_compiled_pattern (buffer, buffer + bufp->used);
886
  printf ("%ld bytes used/%ld bytes allocated.\n",
887
          bufp->used, bufp->allocated);
888
 
889
  if (bufp->fastmap_accurate && bufp->fastmap)
890
    {
891
      printf ("fastmap: ");
892
      print_fastmap (bufp->fastmap);
893
    }
894
 
895
  printf ("re_nsub: %d\t", bufp->re_nsub);
896
  printf ("regs_alloc: %d\t", bufp->regs_allocated);
897
  printf ("can_be_null: %d\t", bufp->can_be_null);
898
  printf ("newline_anchor: %d\n", bufp->newline_anchor);
899
  printf ("no_sub: %d\t", bufp->no_sub);
900
  printf ("not_bol: %d\t", bufp->not_bol);
901
  printf ("not_eol: %d\t", bufp->not_eol);
902
  printf ("syntax: %lx\n", bufp->syntax);
903
  /* Perhaps we should print the translate table?  */
904
}
905
 
906
 
907
void
908
print_double_string (where, string1, size1, string2, size2)
909
    const char *where;
910
    const char *string1;
911
    const char *string2;
912
    int size1;
913
    int size2;
914
{
915
  int this_char;
916
 
917
  if (where == NULL)
918
    printf ("(null)");
919
  else
920
    {
921
      if (FIRST_STRING_P (where))
922
        {
923
          for (this_char = where - string1; this_char < size1; this_char++)
924
            putchar (string1[this_char]);
925
 
926
          where = string2;
927
        }
928
 
929
      for (this_char = where - string2; this_char < size2; this_char++)
930
        putchar (string2[this_char]);
931
    }
932
}
933
 
934
void
935
printchar (c)
936
     int c;
937
{
938
  putc (c, stderr);
939
}
940
 
941
#else /* not DEBUG */
942
 
943
# undef assert
944
# define assert(e)
945
 
946
# define DEBUG_STATEMENT(e)
947
# define DEBUG_PRINT1(x)
948
# define DEBUG_PRINT2(x1, x2)
949
# define DEBUG_PRINT3(x1, x2, x3)
950
# define DEBUG_PRINT4(x1, x2, x3, x4)
951
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
952
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
953
 
954
#endif /* not DEBUG */
955
 
956
/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
957
   also be assigned to arbitrarily: each pattern buffer stores its own
958
   syntax, so it can be changed between regex compilations.  */
959
/* This has no initializer because initialized variables in Emacs
960
   become read-only after dumping.  */
961
reg_syntax_t re_syntax_options;
962
 
963
 
964
/* Specify the precise syntax of regexps for compilation.  This provides
965
   for compatibility for various utilities which historically have
966
   different, incompatible syntaxes.
967
 
968
   The argument SYNTAX is a bit mask comprised of the various bits
969
   defined in gnu-regex.h.  We return the old syntax.  */
970
 
971
reg_syntax_t
972
re_set_syntax (syntax)
973
    reg_syntax_t syntax;
974
{
975
  reg_syntax_t ret = re_syntax_options;
976
 
977
  re_syntax_options = syntax;
978
#ifdef DEBUG
979
  if (syntax & RE_DEBUG)
980
    debug = 1;
981
  else if (debug) /* was on but now is not */
982
    debug = 0;
983
#endif /* DEBUG */
984
  return ret;
985
}
986
#ifdef _LIBC
987
weak_alias (__re_set_syntax, re_set_syntax)
988
#endif
989
 
990
/* This table gives an error message for each of the error codes listed
991
   in gnu-regex.h.  Obviously the order here has to be same as there.
992
   POSIX doesn't require that we do anything for REG_NOERROR,
993
   but why not be nice?  */
994
 
995
static const char *re_error_msgid[] =
996
  {
997
    gettext_noop ("Success"),   /* REG_NOERROR */
998
    gettext_noop ("No match"),  /* REG_NOMATCH */
999
    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1000
    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1001
    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1002
    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1003
    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1004
    gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
1005
    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1006
    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1007
    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1008
    gettext_noop ("Invalid range end"), /* REG_ERANGE */
1009
    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1010
    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1011
    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1012
    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1013
    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1014
  };
1015
 
1016
/* Avoiding alloca during matching, to placate r_alloc.  */
1017
 
1018
/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1019
   searching and matching functions should not call alloca.  On some
1020
   systems, alloca is implemented in terms of malloc, and if we're
1021
   using the relocating allocator routines, then malloc could cause a
1022
   relocation, which might (if the strings being searched are in the
1023
   ralloc heap) shift the data out from underneath the regexp
1024
   routines.
1025
 
1026
   Here's another reason to avoid allocation: Emacs
1027
   processes input from X in a signal handler; processing X input may
1028
   call malloc; if input arrives while a matching routine is calling
1029
   malloc, then we're scrod.  But Emacs can't just block input while
1030
   calling matching routines; then we don't notice interrupts when
1031
   they come in.  So, Emacs blocks input around all regexp calls
1032
   except the matching calls, which it leaves unprotected, in the
1033
   faith that they will not malloc.  */
1034
 
1035
/* Normally, this is fine.  */
1036
#define MATCH_MAY_ALLOCATE
1037
 
1038
/* When using GNU C, we are not REALLY using the C alloca, no matter
1039
   what config.h may say.  So don't take precautions for it.  */
1040
#ifdef __GNUC__
1041
# undef C_ALLOCA
1042
#endif
1043
 
1044
/* The match routines may not allocate if (1) they would do it with malloc
1045
   and (2) it's not safe for them to use malloc.
1046
   Note that if REL_ALLOC is defined, matching would not use malloc for the
1047
   failure stack, but we would still use it for the register vectors;
1048
   so REL_ALLOC should not affect this.  */
1049
#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1050
# undef MATCH_MAY_ALLOCATE
1051
#endif
1052
 
1053
 
1054
/* Failure stack declarations and macros; both re_compile_fastmap and
1055
   re_match_2 use a failure stack.  These have to be macros because of
1056
   REGEX_ALLOCATE_STACK.  */
1057
 
1058
 
1059
/* Number of failure points for which to initially allocate space
1060
   when matching.  If this number is exceeded, we allocate more
1061
   space, so it is not a hard limit.  */
1062
#ifndef INIT_FAILURE_ALLOC
1063
# define INIT_FAILURE_ALLOC 5
1064
#endif
1065
 
1066
/* Roughly the maximum number of failure points on the stack.  Would be
1067
   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1068
   This is a variable only so users of regex can assign to it; we never
1069
   change it ourselves.  */
1070
 
1071
#ifdef INT_IS_16BIT
1072
 
1073
# if defined MATCH_MAY_ALLOCATE
1074
/* 4400 was enough to cause a crash on Alpha OSF/1,
1075
   whose default stack limit is 2mb.  */
1076
long int re_max_failures = 4000;
1077
# else
1078
long int re_max_failures = 2000;
1079
# endif
1080
 
1081
union fail_stack_elt
1082
{
1083
  unsigned char *pointer;
1084
  long int integer;
1085
};
1086
 
1087
typedef union fail_stack_elt fail_stack_elt_t;
1088
 
1089
typedef struct
1090
{
1091
  fail_stack_elt_t *stack;
1092
  unsigned long int size;
1093
  unsigned long int avail;              /* Offset of next open position.  */
1094
} fail_stack_type;
1095
 
1096
#else /* not INT_IS_16BIT */
1097
 
1098
# if defined MATCH_MAY_ALLOCATE
1099
/* 4400 was enough to cause a crash on Alpha OSF/1,
1100
   whose default stack limit is 2mb.  */
1101
int re_max_failures = 20000;
1102
# else
1103
int re_max_failures = 2000;
1104
# endif
1105
 
1106
union fail_stack_elt
1107
{
1108
  unsigned char *pointer;
1109
  int integer;
1110
};
1111
 
1112
typedef union fail_stack_elt fail_stack_elt_t;
1113
 
1114
typedef struct
1115
{
1116
  fail_stack_elt_t *stack;
1117
  unsigned size;
1118
  unsigned avail;                       /* Offset of next open position.  */
1119
} fail_stack_type;
1120
 
1121
#endif /* INT_IS_16BIT */
1122
 
1123
#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1124
#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1125
#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1126
 
1127
 
1128
/* Define macros to initialize and free the failure stack.
1129
   Do `return -2' if the alloc fails.  */
1130
 
1131
#ifdef MATCH_MAY_ALLOCATE
1132
# define INIT_FAIL_STACK()                                              \
1133
  do {                                                                  \
1134
    fail_stack.stack = (fail_stack_elt_t *)                             \
1135
      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1136
                                                                        \
1137
    if (fail_stack.stack == NULL)                                       \
1138
      return -2;                                                        \
1139
                                                                        \
1140
    fail_stack.size = INIT_FAILURE_ALLOC;                               \
1141
    fail_stack.avail = 0;                                                \
1142
  } while (0)
1143
 
1144
# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1145
#else
1146
# define INIT_FAIL_STACK()                                              \
1147
  do {                                                                  \
1148
    fail_stack.avail = 0;                                                \
1149
  } while (0)
1150
 
1151
# define RESET_FAIL_STACK()
1152
#endif
1153
 
1154
 
1155
/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1156
 
1157
   Return 1 if succeeds, and 0 if either ran out of memory
1158
   allocating space for it or it was already too large.
1159
 
1160
   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1161
 
1162
#define DOUBLE_FAIL_STACK(fail_stack)                                   \
1163
  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
1164
   ? 0                                                                   \
1165
   : ((fail_stack).stack = (fail_stack_elt_t *)                         \
1166
        REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1167
          (fail_stack).size * sizeof (fail_stack_elt_t),                \
1168
          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
1169
                                                                        \
1170
      (fail_stack).stack == NULL                                        \
1171
      ? 0                                                                \
1172
      : ((fail_stack).size <<= 1,                                       \
1173
         1)))
1174
 
1175
 
1176
/* Push pointer POINTER on FAIL_STACK.
1177
   Return 1 if was able to do so and 0 if ran out of memory allocating
1178
   space to do so.  */
1179
#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
1180
  ((FAIL_STACK_FULL ()                                                  \
1181
    && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
1182
   ? 0                                                                   \
1183
   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
1184
      1))
1185
 
1186
/* Push a pointer value onto the failure stack.
1187
   Assumes the variable `fail_stack'.  Probably should only
1188
   be called from within `PUSH_FAILURE_POINT'.  */
1189
#define PUSH_FAILURE_POINTER(item)                                      \
1190
  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1191
 
1192
/* This pushes an integer-valued item onto the failure stack.
1193
   Assumes the variable `fail_stack'.  Probably should only
1194
   be called from within `PUSH_FAILURE_POINT'.  */
1195
#define PUSH_FAILURE_INT(item)                                  \
1196
  fail_stack.stack[fail_stack.avail++].integer = (item)
1197
 
1198
/* Push a fail_stack_elt_t value onto the failure stack.
1199
   Assumes the variable `fail_stack'.  Probably should only
1200
   be called from within `PUSH_FAILURE_POINT'.  */
1201
#define PUSH_FAILURE_ELT(item)                                  \
1202
  fail_stack.stack[fail_stack.avail++] =  (item)
1203
 
1204
/* These three POP... operations complement the three PUSH... operations.
1205
   All assume that `fail_stack' is nonempty.  */
1206
#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1207
#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1208
#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1209
 
1210
/* Used to omit pushing failure point id's when we're not debugging.  */
1211
#ifdef DEBUG
1212
# define DEBUG_PUSH PUSH_FAILURE_INT
1213
# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1214
#else
1215
# define DEBUG_PUSH(item)
1216
# define DEBUG_POP(item_addr)
1217
#endif
1218
 
1219
 
1220
/* Push the information about the state we will need
1221
   if we ever fail back to it.
1222
 
1223
   Requires variables fail_stack, regstart, regend, reg_info, and
1224
   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1225
   be declared.
1226
 
1227
   Does `return FAILURE_CODE' if runs out of memory.  */
1228
 
1229
#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1230
  do {                                                                  \
1231
    char *destination;                                                  \
1232
    /* Must be int, so when we don't save any registers, the arithmetic \
1233
       of 0 + -1 isn't done as unsigned.  */                            \
1234
    /* Can't be int, since there is not a shred of a guarantee that int \
1235
       is wide enough to hold a value of something to which pointer can \
1236
       be assigned */                                                   \
1237
    active_reg_t this_reg;                                              \
1238
                                                                        \
1239
    DEBUG_STATEMENT (failure_id++);                                     \
1240
    DEBUG_STATEMENT (nfailure_points_pushed++);                         \
1241
    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
1242
    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1243
    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1244
                                                                        \
1245
    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);          \
1246
    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
1247
                                                                        \
1248
    /* Ensure we have enough space allocated for what we will push.  */ \
1249
    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
1250
      {                                                                 \
1251
        if (!DOUBLE_FAIL_STACK (fail_stack))                            \
1252
          return failure_code;                                          \
1253
                                                                        \
1254
        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
1255
                       (fail_stack).size);                              \
1256
        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1257
      }                                                                 \
1258
                                                                        \
1259
    /* Push the info, starting with the registers.  */                  \
1260
    DEBUG_PRINT1 ("\n");                                                \
1261
                                                                        \
1262
    if (1)                                                              \
1263
      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1264
           this_reg++)                                                  \
1265
        {                                                               \
1266
          DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);              \
1267
          DEBUG_STATEMENT (num_regs_pushed++);                          \
1268
                                                                        \
1269
          DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);         \
1270
          PUSH_FAILURE_POINTER (regstart[this_reg]);                    \
1271
                                                                        \
1272
          DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);             \
1273
          PUSH_FAILURE_POINTER (regend[this_reg]);                      \
1274
                                                                        \
1275
          DEBUG_PRINT2 ("    info: %p\n      ",                         \
1276
                        reg_info[this_reg].word.pointer);               \
1277
          DEBUG_PRINT2 (" match_null=%d",                               \
1278
                        REG_MATCH_NULL_STRING_P (reg_info[this_reg]));  \
1279
          DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));  \
1280
          DEBUG_PRINT2 (" matched_something=%d",                        \
1281
                        MATCHED_SOMETHING (reg_info[this_reg]));        \
1282
          DEBUG_PRINT2 (" ever_matched=%d",                             \
1283
                        EVER_MATCHED_SOMETHING (reg_info[this_reg]));   \
1284
          DEBUG_PRINT1 ("\n");                                          \
1285
          PUSH_FAILURE_ELT (reg_info[this_reg].word);                   \
1286
        }                                                               \
1287
                                                                        \
1288
    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1289
    PUSH_FAILURE_INT (lowest_active_reg);                               \
1290
                                                                        \
1291
    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1292
    PUSH_FAILURE_INT (highest_active_reg);                              \
1293
                                                                        \
1294
    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);            \
1295
    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
1296
    PUSH_FAILURE_POINTER (pattern_place);                               \
1297
                                                                        \
1298
    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);              \
1299
    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1300
                                 size2);                                \
1301
    DEBUG_PRINT1 ("'\n");                                               \
1302
    PUSH_FAILURE_POINTER (string_place);                                \
1303
                                                                        \
1304
    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
1305
    DEBUG_PUSH (failure_id);                                            \
1306
  } while (0)
1307
 
1308
/* This is the number of items that are pushed and popped on the stack
1309
   for each register.  */
1310
#define NUM_REG_ITEMS  3
1311
 
1312
/* Individual items aside from the registers.  */
1313
#ifdef DEBUG
1314
# define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1315
#else
1316
# define NUM_NONREG_ITEMS 4
1317
#endif
1318
 
1319
/* We push at most this many items on the stack.  */
1320
/* We used to use (num_regs - 1), which is the number of registers
1321
   this regexp will save; but that was changed to 5
1322
   to avoid stack overflow for a regexp with lots of parens.  */
1323
#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1324
 
1325
/* We actually push this many items.  */
1326
#define NUM_FAILURE_ITEMS                               \
1327
  (((0                                                   \
1328
     ? 0 : highest_active_reg - lowest_active_reg + 1)   \
1329
    * NUM_REG_ITEMS)                                    \
1330
   + NUM_NONREG_ITEMS)
1331
 
1332
/* How many items can still be added to the stack without overflowing it.  */
1333
#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1334
 
1335
 
1336
/* Pops what PUSH_FAIL_STACK pushes.
1337
 
1338
   We restore into the parameters, all of which should be lvalues:
1339
     STR -- the saved data position.
1340
     PAT -- the saved pattern position.
1341
     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1342
     REGSTART, REGEND -- arrays of string positions.
1343
     REG_INFO -- array of information about each subexpression.
1344
 
1345
   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1346
   `pend', `string1', `size1', `string2', and `size2'.  */
1347
 
1348
#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1349
{                                                                       \
1350
  DEBUG_STATEMENT (unsigned failure_id;)                                \
1351
  active_reg_t this_reg;                                                \
1352
  const unsigned char *string_temp;                                     \
1353
                                                                        \
1354
  assert (!FAIL_STACK_EMPTY ());                                        \
1355
                                                                        \
1356
  /* Remove failure points and point to how many regs pushed.  */       \
1357
  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
1358
  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
1359
  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
1360
                                                                        \
1361
  assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
1362
                                                                        \
1363
  DEBUG_POP (&failure_id);                                              \
1364
  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
1365
                                                                        \
1366
  /* If the saved string location is NULL, it came from an              \
1367
     on_failure_keep_string_jump opcode, and we want to throw away the  \
1368
     saved NULL, thus retaining our current position in the string.  */ \
1369
  string_temp = POP_FAILURE_POINTER ();                                 \
1370
  if (string_temp != NULL)                                              \
1371
    str = (const char *) string_temp;                                   \
1372
                                                                        \
1373
  DEBUG_PRINT2 ("  Popping string %p: `", str);                         \
1374
  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
1375
  DEBUG_PRINT1 ("'\n");                                                 \
1376
                                                                        \
1377
  pat = (unsigned char *) POP_FAILURE_POINTER ();                       \
1378
  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);                        \
1379
  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
1380
                                                                        \
1381
  /* Restore register info.  */                                         \
1382
  high_reg = (active_reg_t) POP_FAILURE_INT ();                         \
1383
  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);          \
1384
                                                                        \
1385
  low_reg = (active_reg_t) POP_FAILURE_INT ();                          \
1386
  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);           \
1387
                                                                        \
1388
  if (1)                                                                \
1389
    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)          \
1390
      {                                                                 \
1391
        DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);              \
1392
                                                                        \
1393
        reg_info[this_reg].word = POP_FAILURE_ELT ();                   \
1394
        DEBUG_PRINT2 ("      info: %p\n",                               \
1395
                      reg_info[this_reg].word.pointer);                 \
1396
                                                                        \
1397
        regend[this_reg] = (const char *) POP_FAILURE_POINTER ();       \
1398
        DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);             \
1399
                                                                        \
1400
        regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();     \
1401
        DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);         \
1402
      }                                                                 \
1403
  else                                                                  \
1404
    {                                                                   \
1405
      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1406
        {                                                               \
1407
          reg_info[this_reg].word.integer = 0;                           \
1408
          regend[this_reg] = 0;                                          \
1409
          regstart[this_reg] = 0;                                        \
1410
        }                                                               \
1411
      highest_active_reg = high_reg;                                    \
1412
    }                                                                   \
1413
                                                                        \
1414
  set_regs_matched_done = 0;                                             \
1415
  DEBUG_STATEMENT (nfailure_points_popped++);                           \
1416
} /* POP_FAILURE_POINT */
1417
 
1418
 
1419
 
1420
/* Structure for per-register (a.k.a. per-group) information.
1421
   Other register information, such as the
1422
   starting and ending positions (which are addresses), and the list of
1423
   inner groups (which is a bits list) are maintained in separate
1424
   variables.
1425
 
1426
   We are making a (strictly speaking) nonportable assumption here: that
1427
   the compiler will pack our bit fields into something that fits into
1428
   the type of `word', i.e., is something that fits into one item on the
1429
   failure stack.  */
1430
 
1431
 
1432
/* Declarations and macros for re_match_2.  */
1433
 
1434
typedef union
1435
{
1436
  fail_stack_elt_t word;
1437
  struct
1438
  {
1439
      /* This field is one if this group can match the empty string,
1440
         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1441
#define MATCH_NULL_UNSET_VALUE 3
1442
    unsigned match_null_string_p : 2;
1443
    unsigned is_active : 1;
1444
    unsigned matched_something : 1;
1445
    unsigned ever_matched_something : 1;
1446
  } bits;
1447
} register_info_type;
1448
 
1449
#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1450
#define IS_ACTIVE(R)  ((R).bits.is_active)
1451
#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1452
#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1453
 
1454
 
1455
/* Call this when have matched a real character; it sets `matched' flags
1456
   for the subexpressions which we are currently inside.  Also records
1457
   that those subexprs have matched.  */
1458
#define SET_REGS_MATCHED()                                              \
1459
  do                                                                    \
1460
    {                                                                   \
1461
      if (!set_regs_matched_done)                                       \
1462
        {                                                               \
1463
          active_reg_t r;                                               \
1464
          set_regs_matched_done = 1;                                    \
1465
          for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
1466
            {                                                           \
1467
              MATCHED_SOMETHING (reg_info[r])                           \
1468
                = EVER_MATCHED_SOMETHING (reg_info[r])                  \
1469
                = 1;                                                    \
1470
            }                                                           \
1471
        }                                                               \
1472
    }                                                                   \
1473
  while (0)
1474
 
1475
/* Registers are set to a sentinel when they haven't yet matched.  */
1476
static char reg_unset_dummy;
1477
#define REG_UNSET_VALUE (&reg_unset_dummy)
1478
#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1479
 
1480
/* Subroutine declarations and macros for regex_compile.  */
1481
 
1482
static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1483
                                              reg_syntax_t syntax,
1484
                                              struct re_pattern_buffer *bufp));
1485
static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1486
static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1487
                                 int arg1, int arg2));
1488
static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1489
                                  int arg, unsigned char *end));
1490
static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1491
                                  int arg1, int arg2, unsigned char *end));
1492
static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1493
                                           reg_syntax_t syntax));
1494
static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1495
                                           reg_syntax_t syntax));
1496
static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1497
                                              const char *pend,
1498
                                              char *translate,
1499
                                              reg_syntax_t syntax,
1500
                                              unsigned char *b));
1501
 
1502
/* Fetch the next character in the uncompiled pattern---translating it
1503
   if necessary.  Also cast from a signed character in the constant
1504
   string passed to us by the user to an unsigned char that we can use
1505
   as an array index (in, e.g., `translate').  */
1506
#ifndef PATFETCH
1507
# define PATFETCH(c)                                                    \
1508
  do {if (p == pend) return REG_EEND;                                   \
1509
    c = (unsigned char) *p++;                                           \
1510
    if (translate) c = (unsigned char) translate[c];                    \
1511
  } while (0)
1512
#endif
1513
 
1514
/* Fetch the next character in the uncompiled pattern, with no
1515
   translation.  */
1516
#define PATFETCH_RAW(c)                                                 \
1517
  do {if (p == pend) return REG_EEND;                                   \
1518
    c = (unsigned char) *p++;                                           \
1519
  } while (0)
1520
 
1521
/* Go backwards one character in the pattern.  */
1522
#define PATUNFETCH p--
1523
 
1524
 
1525
/* If `translate' is non-null, return translate[D], else just D.  We
1526
   cast the subscript to translate because some data is declared as
1527
   `char *', to avoid warnings when a string constant is passed.  But
1528
   when we use a character as a subscript we must make it unsigned.  */
1529
#ifndef TRANSLATE
1530
# define TRANSLATE(d) \
1531
  (translate ? (char) translate[(unsigned char) (d)] : (d))
1532
#endif
1533
 
1534
 
1535
/* Macros for outputting the compiled pattern into `buffer'.  */
1536
 
1537
/* If the buffer isn't allocated when it comes in, use this.  */
1538
#define INIT_BUF_SIZE  32
1539
 
1540
/* Make sure we have at least N more bytes of space in buffer.  */
1541
#define GET_BUFFER_SPACE(n)                                             \
1542
    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)  \
1543
      EXTEND_BUFFER ()
1544
 
1545
/* Make sure we have one more byte of buffer space and then add C to it.  */
1546
#define BUF_PUSH(c)                                                     \
1547
  do {                                                                  \
1548
    GET_BUFFER_SPACE (1);                                               \
1549
    *b++ = (unsigned char) (c);                                         \
1550
  } while (0)
1551
 
1552
 
1553
/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1554
#define BUF_PUSH_2(c1, c2)                                              \
1555
  do {                                                                  \
1556
    GET_BUFFER_SPACE (2);                                               \
1557
    *b++ = (unsigned char) (c1);                                        \
1558
    *b++ = (unsigned char) (c2);                                        \
1559
  } while (0)
1560
 
1561
 
1562
/* As with BUF_PUSH_2, except for three bytes.  */
1563
#define BUF_PUSH_3(c1, c2, c3)                                          \
1564
  do {                                                                  \
1565
    GET_BUFFER_SPACE (3);                                               \
1566
    *b++ = (unsigned char) (c1);                                        \
1567
    *b++ = (unsigned char) (c2);                                        \
1568
    *b++ = (unsigned char) (c3);                                        \
1569
  } while (0)
1570
 
1571
 
1572
/* Store a jump with opcode OP at LOC to location TO.  We store a
1573
   relative address offset by the three bytes the jump itself occupies.  */
1574
#define STORE_JUMP(op, loc, to) \
1575
  store_op1 (op, loc, (int) ((to) - (loc) - 3))
1576
 
1577
/* Likewise, for a two-argument jump.  */
1578
#define STORE_JUMP2(op, loc, to, arg) \
1579
  store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1580
 
1581
/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1582
#define INSERT_JUMP(op, loc, to) \
1583
  insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1584
 
1585
/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1586
#define INSERT_JUMP2(op, loc, to, arg) \
1587
  insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1588
 
1589
 
1590
/* This is not an arbitrary limit: the arguments which represent offsets
1591
   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1592
   be too small, many things would have to change.  */
1593
#define MAX_BUF_SIZE (1L << 16)
1594
#define REALLOC(p,s) realloc ((p), (s))
1595
 
1596
/* Extend the buffer by twice its current size via realloc and
1597
   reset the pointers that pointed into the old block to point to the
1598
   correct places in the new one.  If extending the buffer results in it
1599
   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1600
#define EXTEND_BUFFER()                                                 \
1601
  do {                                                                  \
1602
    unsigned char *old_buffer = bufp->buffer;                           \
1603
    if (bufp->allocated == MAX_BUF_SIZE)                                \
1604
      return REG_ESIZE;                                                 \
1605
    bufp->allocated <<= 1;                                              \
1606
    if (bufp->allocated > MAX_BUF_SIZE)                                 \
1607
      bufp->allocated = MAX_BUF_SIZE;                                   \
1608
    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1609
    if (bufp->buffer == NULL)                                           \
1610
      return REG_ESPACE;                                                \
1611
    /* If the buffer moved, move all the pointers into it.  */          \
1612
    if (old_buffer != bufp->buffer)                                     \
1613
      {                                                                 \
1614
        b = (b - old_buffer) + bufp->buffer;                            \
1615
        begalt = (begalt - old_buffer) + bufp->buffer;                  \
1616
        if (fixup_alt_jump)                                             \
1617
          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1618
        if (laststart)                                                  \
1619
          laststart = (laststart - old_buffer) + bufp->buffer;          \
1620
        if (pending_exact)                                              \
1621
          pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
1622
      }                                                                 \
1623
  } while (0)
1624
 
1625
 
1626
/* Since we have one byte reserved for the register number argument to
1627
   {start,stop}_memory, the maximum number of groups we can report
1628
   things about is what fits in that byte.  */
1629
#define MAX_REGNUM 255
1630
 
1631
/* But patterns can have more than `MAX_REGNUM' registers.  We just
1632
   ignore the excess.  */
1633
typedef unsigned regnum_t;
1634
 
1635
 
1636
/* Macros for the compile stack.  */
1637
 
1638
/* Since offsets can go either forwards or backwards, this type needs to
1639
   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1640
/* int may be not enough when sizeof(int) == 2.  */
1641
typedef long pattern_offset_t;
1642
 
1643
typedef struct
1644
{
1645
  pattern_offset_t begalt_offset;
1646
  pattern_offset_t fixup_alt_jump;
1647
  pattern_offset_t inner_group_offset;
1648
  pattern_offset_t laststart_offset;
1649
  regnum_t regnum;
1650
} compile_stack_elt_t;
1651
 
1652
 
1653
typedef struct
1654
{
1655
  compile_stack_elt_t *stack;
1656
  unsigned size;
1657
  unsigned avail;                       /* Offset of next open position.  */
1658
} compile_stack_type;
1659
 
1660
 
1661
#define INIT_COMPILE_STACK_SIZE 32
1662
 
1663
#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1664
#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1665
 
1666
/* The next available element.  */
1667
#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1668
 
1669
 
1670
/* Set the bit for character C in a list.  */
1671
#define SET_LIST_BIT(c)                               \
1672
  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1673
   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1674
 
1675
 
1676
/* Get the next unsigned number in the uncompiled pattern.  */
1677
#define GET_UNSIGNED_NUMBER(num)                                        \
1678
  { if (p != pend)                                                      \
1679
     {                                                                  \
1680
       PATFETCH (c);                                                    \
1681
       while (ISDIGIT (c))                                              \
1682
         {                                                              \
1683
           if (num < 0)                                                  \
1684
              num = 0;                                                   \
1685
           num = num * 10 + c - '0';                                    \
1686
           if (p == pend)                                               \
1687
              break;                                                    \
1688
           PATFETCH (c);                                                \
1689
         }                                                              \
1690
       }                                                                \
1691
    }
1692
 
1693
/* Use this only if they have btowc(), since wctype() is used below
1694
   together with btowc().  btowc() is defined in the 1994 Amendment 1
1695
   to ISO C and may not be present on systems where we have wchar.h
1696
   and wctype.h.  */
1697
#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_BTOWC)
1698
/* The GNU C library provides support for user-defined character classes
1699
   and the functions from ISO C amendement 1.  */
1700
# ifdef CHARCLASS_NAME_MAX
1701
#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1702
# else
1703
/* This shouldn't happen but some implementation might still have this
1704
   problem.  Use a reasonable default value.  */
1705
#  define CHAR_CLASS_MAX_LENGTH 256
1706
# endif
1707
 
1708
# ifdef _LIBC
1709
#  define IS_CHAR_CLASS(string) __wctype (string)
1710
# else
1711
#  define IS_CHAR_CLASS(string) wctype (string)
1712
# endif
1713
#else
1714
# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1715
 
1716
# define IS_CHAR_CLASS(string)                                          \
1717
   (STREQ (string, "alpha") || STREQ (string, "upper")                  \
1718
    || STREQ (string, "lower") || STREQ (string, "digit")               \
1719
    || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
1720
    || STREQ (string, "space") || STREQ (string, "print")               \
1721
    || STREQ (string, "punct") || STREQ (string, "graph")               \
1722
    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1723
#endif
1724
 
1725
#ifndef MATCH_MAY_ALLOCATE
1726
 
1727
/* If we cannot allocate large objects within re_match_2_internal,
1728
   we make the fail stack and register vectors global.
1729
   The fail stack, we grow to the maximum size when a regexp
1730
   is compiled.
1731
   The register vectors, we adjust in size each time we
1732
   compile a regexp, according to the number of registers it needs.  */
1733
 
1734
static fail_stack_type fail_stack;
1735
 
1736
/* Size with which the following vectors are currently allocated.
1737
   That is so we can make them bigger as needed,
1738
   but never make them smaller.  */
1739
static int regs_allocated_size;
1740
 
1741
static const char **     regstart, **     regend;
1742
static const char ** old_regstart, ** old_regend;
1743
static const char **best_regstart, **best_regend;
1744
static register_info_type *reg_info;
1745
static const char **reg_dummy;
1746
static register_info_type *reg_info_dummy;
1747
 
1748
/* Make the register vectors big enough for NUM_REGS registers,
1749
   but don't make them smaller.  */
1750
 
1751
static
1752
regex_grow_registers (num_regs)
1753
     int num_regs;
1754
{
1755
  if (num_regs > regs_allocated_size)
1756
    {
1757
      RETALLOC_IF (regstart,     num_regs, const char *);
1758
      RETALLOC_IF (regend,       num_regs, const char *);
1759
      RETALLOC_IF (old_regstart, num_regs, const char *);
1760
      RETALLOC_IF (old_regend,   num_regs, const char *);
1761
      RETALLOC_IF (best_regstart, num_regs, const char *);
1762
      RETALLOC_IF (best_regend,  num_regs, const char *);
1763
      RETALLOC_IF (reg_info,     num_regs, register_info_type);
1764
      RETALLOC_IF (reg_dummy,    num_regs, const char *);
1765
      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1766
 
1767
      regs_allocated_size = num_regs;
1768
    }
1769
}
1770
 
1771
#endif /* not MATCH_MAY_ALLOCATE */
1772
 
1773
static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1774
                                                 compile_stack,
1775
                                                 regnum_t regnum));
1776
 
1777
/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1778
   Returns one of error codes defined in `gnu-regex.h', or zero for success.
1779
 
1780
   Assumes the `allocated' (and perhaps `buffer') and `translate'
1781
   fields are set in BUFP on entry.
1782
 
1783
   If it succeeds, results are put in BUFP (if it returns an error, the
1784
   contents of BUFP are undefined):
1785
     `buffer' is the compiled pattern;
1786
     `syntax' is set to SYNTAX;
1787
     `used' is set to the length of the compiled pattern;
1788
     `fastmap_accurate' is zero;
1789
     `re_nsub' is the number of subexpressions in PATTERN;
1790
     `not_bol' and `not_eol' are zero;
1791
 
1792
   The `fastmap' and `newline_anchor' fields are neither
1793
   examined nor set.  */
1794
 
1795
/* Return, freeing storage we allocated.  */
1796
#define FREE_STACK_RETURN(value)                \
1797
  return (free (compile_stack.stack), value)
1798
 
1799
static reg_errcode_t
1800
regex_compile (pattern, size, syntax, bufp)
1801
     const char *pattern;
1802
     size_t size;
1803
     reg_syntax_t syntax;
1804
     struct re_pattern_buffer *bufp;
1805
{
1806
  /* We fetch characters from PATTERN here.  Even though PATTERN is
1807
     `char *' (i.e., signed), we declare these variables as unsigned, so
1808
     they can be reliably used as array indices.  */
1809
  register unsigned char c, c1;
1810
 
1811
  /* A random temporary spot in PATTERN.  */
1812
  const char *p1;
1813
 
1814
  /* Points to the end of the buffer, where we should append.  */
1815
  register unsigned char *b;
1816
 
1817
  /* Keeps track of unclosed groups.  */
1818
  compile_stack_type compile_stack;
1819
 
1820
  /* Points to the current (ending) position in the pattern.  */
1821
  const char *p = pattern;
1822
  const char *pend = pattern + size;
1823
 
1824
  /* How to translate the characters in the pattern.  */
1825
  RE_TRANSLATE_TYPE translate = bufp->translate;
1826
 
1827
  /* Address of the count-byte of the most recently inserted `exactn'
1828
     command.  This makes it possible to tell if a new exact-match
1829
     character can be added to that command or if the character requires
1830
     a new `exactn' command.  */
1831
  unsigned char *pending_exact = 0;
1832
 
1833
  /* Address of start of the most recently finished expression.
1834
     This tells, e.g., postfix * where to find the start of its
1835
     operand.  Reset at the beginning of groups and alternatives.  */
1836
  unsigned char *laststart = 0;
1837
 
1838
  /* Address of beginning of regexp, or inside of last group.  */
1839
  unsigned char *begalt;
1840
 
1841
  /* Place in the uncompiled pattern (i.e., the {) to
1842
     which to go back if the interval is invalid.  */
1843
  const char *beg_interval;
1844
 
1845
  /* Address of the place where a forward jump should go to the end of
1846
     the containing expression.  Each alternative of an `or' -- except the
1847
     last -- ends with a forward jump of this sort.  */
1848
  unsigned char *fixup_alt_jump = 0;
1849
 
1850
  /* Counts open-groups as they are encountered.  Remembered for the
1851
     matching close-group on the compile stack, so the same register
1852
     number is put in the stop_memory as the start_memory.  */
1853
  regnum_t regnum = 0;
1854
 
1855
#ifdef DEBUG
1856
  DEBUG_PRINT1 ("\nCompiling pattern: ");
1857
  if (debug)
1858
    {
1859
      unsigned debug_count;
1860
 
1861
      for (debug_count = 0; debug_count < size; debug_count++)
1862
        putchar (pattern[debug_count]);
1863
      putchar ('\n');
1864
    }
1865
#endif /* DEBUG */
1866
 
1867
  /* Initialize the compile stack.  */
1868
  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1869
  if (compile_stack.stack == NULL)
1870
    return REG_ESPACE;
1871
 
1872
  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1873
  compile_stack.avail = 0;
1874
 
1875
  /* Initialize the pattern buffer.  */
1876
  bufp->syntax = syntax;
1877
  bufp->fastmap_accurate = 0;
1878
  bufp->not_bol = bufp->not_eol = 0;
1879
 
1880
  /* Set `used' to zero, so that if we return an error, the pattern
1881
     printer (for debugging) will think there's no pattern.  We reset it
1882
     at the end.  */
1883
  bufp->used = 0;
1884
 
1885
  /* Always count groups, whether or not bufp->no_sub is set.  */
1886
  bufp->re_nsub = 0;
1887
 
1888
#if !defined emacs && !defined SYNTAX_TABLE
1889
  /* Initialize the syntax table.  */
1890
   init_syntax_once ();
1891
#endif
1892
 
1893
  if (bufp->allocated == 0)
1894
    {
1895
      if (bufp->buffer)
1896
        { /* If zero allocated, but buffer is non-null, try to realloc
1897
             enough space.  This loses if buffer's address is bogus, but
1898
             that is the user's responsibility.  */
1899
          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1900
        }
1901
      else
1902
        { /* Caller did not allocate a buffer.  Do it for them.  */
1903
          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1904
        }
1905
      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1906
 
1907
      bufp->allocated = INIT_BUF_SIZE;
1908
    }
1909
 
1910
  begalt = b = bufp->buffer;
1911
 
1912
  /* Loop through the uncompiled pattern until we're at the end.  */
1913
  while (p != pend)
1914
    {
1915
      PATFETCH (c);
1916
 
1917
      switch (c)
1918
        {
1919
        case '^':
1920
          {
1921
            if (   /* If at start of pattern, it's an operator.  */
1922
                   p == pattern + 1
1923
                   /* If context independent, it's an operator.  */
1924
                || syntax & RE_CONTEXT_INDEP_ANCHORS
1925
                   /* Otherwise, depends on what's come before.  */
1926
                || at_begline_loc_p (pattern, p, syntax))
1927
              BUF_PUSH (begline);
1928
            else
1929
              goto normal_char;
1930
          }
1931
          break;
1932
 
1933
 
1934
        case '$':
1935
          {
1936
            if (   /* If at end of pattern, it's an operator.  */
1937
                   p == pend
1938
                   /* If context independent, it's an operator.  */
1939
                || syntax & RE_CONTEXT_INDEP_ANCHORS
1940
                   /* Otherwise, depends on what's next.  */
1941
                || at_endline_loc_p (p, pend, syntax))
1942
               BUF_PUSH (endline);
1943
             else
1944
               goto normal_char;
1945
           }
1946
           break;
1947
 
1948
 
1949
        case '+':
1950
        case '?':
1951
          if ((syntax & RE_BK_PLUS_QM)
1952
              || (syntax & RE_LIMITED_OPS))
1953
            goto normal_char;
1954
        handle_plus:
1955
        case '*':
1956
          /* If there is no previous pattern... */
1957
          if (!laststart)
1958
            {
1959
              if (syntax & RE_CONTEXT_INVALID_OPS)
1960
                FREE_STACK_RETURN (REG_BADRPT);
1961
              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1962
                goto normal_char;
1963
            }
1964
 
1965
          {
1966
            /* Are we optimizing this jump?  */
1967
            boolean keep_string_p = false;
1968
 
1969
            /* 1 means zero (many) matches is allowed.  */
1970
            char zero_times_ok = 0, many_times_ok = 0;
1971
 
1972
            /* If there is a sequence of repetition chars, collapse it
1973
               down to just one (the right one).  We can't combine
1974
               interval operators with these because of, e.g., `a{2}*',
1975
               which should only match an even number of `a's.  */
1976
 
1977
            for (;;)
1978
              {
1979
                zero_times_ok |= c != '+';
1980
                many_times_ok |= c != '?';
1981
 
1982
                if (p == pend)
1983
                  break;
1984
 
1985
                PATFETCH (c);
1986
 
1987
                if (c == '*'
1988
                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1989
                  ;
1990
 
1991
                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1992
                  {
1993
                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1994
 
1995
                    PATFETCH (c1);
1996
                    if (!(c1 == '+' || c1 == '?'))
1997
                      {
1998
                        PATUNFETCH;
1999
                        PATUNFETCH;
2000
                        break;
2001
                      }
2002
 
2003
                    c = c1;
2004
                  }
2005
                else
2006
                  {
2007
                    PATUNFETCH;
2008
                    break;
2009
                  }
2010
 
2011
                /* If we get here, we found another repeat character.  */
2012
               }
2013
 
2014
            /* Star, etc. applied to an empty pattern is equivalent
2015
               to an empty pattern.  */
2016
            if (!laststart)
2017
              break;
2018
 
2019
            /* Now we know whether or not zero matches is allowed
2020
               and also whether or not two or more matches is allowed.  */
2021
            if (many_times_ok)
2022
              { /* More than one repetition is allowed, so put in at the
2023
                   end a backward relative jump from `b' to before the next
2024
                   jump we're going to put in below (which jumps from
2025
                   laststart to after this jump).
2026
 
2027
                   But if we are at the `*' in the exact sequence `.*\n',
2028
                   insert an unconditional jump backwards to the .,
2029
                   instead of the beginning of the loop.  This way we only
2030
                   push a failure point once, instead of every time
2031
                   through the loop.  */
2032
                assert (p - 1 > pattern);
2033
 
2034
                /* Allocate the space for the jump.  */
2035
                GET_BUFFER_SPACE (3);
2036
 
2037
                /* We know we are not at the first character of the pattern,
2038
                   because laststart was nonzero.  And we've already
2039
                   incremented `p', by the way, to be the character after
2040
                   the `*'.  Do we have to do something analogous here
2041
                   for null bytes, because of RE_DOT_NOT_NULL?  */
2042
                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2043
                    && zero_times_ok
2044
                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2045
                    && !(syntax & RE_DOT_NEWLINE))
2046
                  { /* We have .*\n.  */
2047
                    STORE_JUMP (jump, b, laststart);
2048
                    keep_string_p = true;
2049
                  }
2050
                else
2051
                  /* Anything else.  */
2052
                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2053
 
2054
                /* We've added more stuff to the buffer.  */
2055
                b += 3;
2056
              }
2057
 
2058
            /* On failure, jump from laststart to b + 3, which will be the
2059
               end of the buffer after this jump is inserted.  */
2060
            GET_BUFFER_SPACE (3);
2061
            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2062
                                       : on_failure_jump,
2063
                         laststart, b + 3);
2064
            pending_exact = 0;
2065
            b += 3;
2066
 
2067
            if (!zero_times_ok)
2068
              {
2069
                /* At least one repetition is required, so insert a
2070
                   `dummy_failure_jump' before the initial
2071
                   `on_failure_jump' instruction of the loop. This
2072
                   effects a skip over that instruction the first time
2073
                   we hit that loop.  */
2074
                GET_BUFFER_SPACE (3);
2075
                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2076
                b += 3;
2077
              }
2078
            }
2079
          break;
2080
 
2081
 
2082
        case '.':
2083
          laststart = b;
2084
          BUF_PUSH (anychar);
2085
          break;
2086
 
2087
 
2088
        case '[':
2089
          {
2090
            boolean had_char_class = false;
2091
 
2092
            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2093
 
2094
            /* Ensure that we have enough space to push a charset: the
2095
               opcode, the length count, and the bitset; 34 bytes in all.  */
2096
            GET_BUFFER_SPACE (34);
2097
 
2098
            laststart = b;
2099
 
2100
            /* We test `*p == '^' twice, instead of using an if
2101
               statement, so we only need one BUF_PUSH.  */
2102
            BUF_PUSH (*p == '^' ? charset_not : charset);
2103
            if (*p == '^')
2104
              p++;
2105
 
2106
            /* Remember the first position in the bracket expression.  */
2107
            p1 = p;
2108
 
2109
            /* Push the number of bytes in the bitmap.  */
2110
            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2111
 
2112
            /* Clear the whole map.  */
2113
            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2114
 
2115
            /* charset_not matches newline according to a syntax bit.  */
2116
            if ((re_opcode_t) b[-2] == charset_not
2117
                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2118
              SET_LIST_BIT ('\n');
2119
 
2120
            /* Read in characters and ranges, setting map bits.  */
2121
            for (;;)
2122
              {
2123
                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2124
 
2125
                PATFETCH (c);
2126
 
2127
                /* \ might escape characters inside [...] and [^...].  */
2128
                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2129
                  {
2130
                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2131
 
2132
                    PATFETCH (c1);
2133
                    SET_LIST_BIT (c1);
2134
                    continue;
2135
                  }
2136
 
2137
                /* Could be the end of the bracket expression.  If it's
2138
                   not (i.e., when the bracket expression is `[]' so
2139
                   far), the ']' character bit gets set way below.  */
2140
                if (c == ']' && p != p1 + 1)
2141
                  break;
2142
 
2143
                /* Look ahead to see if it's a range when the last thing
2144
                   was a character class.  */
2145
                if (had_char_class && c == '-' && *p != ']')
2146
                  FREE_STACK_RETURN (REG_ERANGE);
2147
 
2148
                /* Look ahead to see if it's a range when the last thing
2149
                   was a character: if this is a hyphen not at the
2150
                   beginning or the end of a list, then it's the range
2151
                   operator.  */
2152
                if (c == '-'
2153
                    && !(p - 2 >= pattern && p[-2] == '[')
2154
                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2155
                    && *p != ']')
2156
                  {
2157
                    reg_errcode_t ret
2158
                      = compile_range (&p, pend, translate, syntax, b);
2159
                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2160
                  }
2161
 
2162
                else if (p[0] == '-' && p[1] != ']')
2163
                  { /* This handles ranges made up of characters only.  */
2164
                    reg_errcode_t ret;
2165
 
2166
                    /* Move past the `-'.  */
2167
                    PATFETCH (c1);
2168
 
2169
                    ret = compile_range (&p, pend, translate, syntax, b);
2170
                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2171
                  }
2172
 
2173
                /* See if we're at the beginning of a possible character
2174
                   class.  */
2175
 
2176
                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2177
                  { /* Leave room for the null.  */
2178
                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2179
 
2180
                    PATFETCH (c);
2181
                    c1 = 0;
2182
 
2183
                    /* If pattern is `[[:'.  */
2184
                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2185
 
2186
                    for (;;)
2187
                      {
2188
                        PATFETCH (c);
2189
                        if ((c == ':' && *p == ']') || p == pend
2190
                            || c1 == CHAR_CLASS_MAX_LENGTH)
2191
                          break;
2192
                        str[c1++] = c;
2193
                      }
2194
                    str[c1] = '\0';
2195
 
2196
                    /* If isn't a word bracketed by `[:' and `:]':
2197
                       undo the ending character, the letters, and leave
2198
                       the leading `:' and `[' (but set bits for them).  */
2199
                    if (c == ':' && *p == ']')
2200
                      {
2201
/* CYGNUS LOCAL: Skip this code if we don't have btowc().  btowc() is */
2202
/* defined in the 1994 Amendment 1 to ISO C and may not be present on */
2203
/* systems where we have wchar.h and wctype.h.   */
2204
#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_BTOWC)
2205
                        boolean is_lower = STREQ (str, "lower");
2206
                        boolean is_upper = STREQ (str, "upper");
2207
                        wctype_t wt;
2208
                        int ch;
2209
 
2210
                        wt = IS_CHAR_CLASS (str);
2211
                        if (wt == 0)
2212
                          FREE_STACK_RETURN (REG_ECTYPE);
2213
 
2214
                        /* Throw away the ] at the end of the character
2215
                           class.  */
2216
                        PATFETCH (c);
2217
 
2218
                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2219
 
2220
                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2221
                          {
2222
# ifdef _LIBC
2223
                            if (__iswctype (__btowc (ch), wt))
2224
                              SET_LIST_BIT (ch);
2225
#else
2226
                            if (iswctype (btowc (ch), wt))
2227
                              SET_LIST_BIT (ch);
2228
#endif
2229
 
2230
                            if (translate && (is_upper || is_lower)
2231
                                && (ISUPPER (ch) || ISLOWER (ch)))
2232
                              SET_LIST_BIT (ch);
2233
                          }
2234
 
2235
                        had_char_class = true;
2236
#else
2237
                        int ch;
2238
                        boolean is_alnum = STREQ (str, "alnum");
2239
                        boolean is_alpha = STREQ (str, "alpha");
2240
                        boolean is_blank = STREQ (str, "blank");
2241
                        boolean is_cntrl = STREQ (str, "cntrl");
2242
                        boolean is_digit = STREQ (str, "digit");
2243
                        boolean is_graph = STREQ (str, "graph");
2244
                        boolean is_lower = STREQ (str, "lower");
2245
                        boolean is_print = STREQ (str, "print");
2246
                        boolean is_punct = STREQ (str, "punct");
2247
                        boolean is_space = STREQ (str, "space");
2248
                        boolean is_upper = STREQ (str, "upper");
2249
                        boolean is_xdigit = STREQ (str, "xdigit");
2250
 
2251
                        if (!IS_CHAR_CLASS (str))
2252
                          FREE_STACK_RETURN (REG_ECTYPE);
2253
 
2254
                        /* Throw away the ] at the end of the character
2255
                           class.  */
2256
                        PATFETCH (c);
2257
 
2258
                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2259
 
2260
                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2261
                          {
2262
                            /* This was split into 3 if's to
2263
                               avoid an arbitrary limit in some compiler.  */
2264
                            if (   (is_alnum  && ISALNUM (ch))
2265
                                || (is_alpha  && ISALPHA (ch))
2266
                                || (is_blank  && ISBLANK (ch))
2267
                                || (is_cntrl  && ISCNTRL (ch)))
2268
                              SET_LIST_BIT (ch);
2269
                            if (   (is_digit  && ISDIGIT (ch))
2270
                                || (is_graph  && ISGRAPH (ch))
2271
                                || (is_lower  && ISLOWER (ch))
2272
                                || (is_print  && ISPRINT (ch)))
2273
                              SET_LIST_BIT (ch);
2274
                            if (   (is_punct  && ISPUNCT (ch))
2275
                                || (is_space  && ISSPACE (ch))
2276
                                || (is_upper  && ISUPPER (ch))
2277
                                || (is_xdigit && ISXDIGIT (ch)))
2278
                              SET_LIST_BIT (ch);
2279
                            if (   translate && (is_upper || is_lower)
2280
                                && (ISUPPER (ch) || ISLOWER (ch)))
2281
                              SET_LIST_BIT (ch);
2282
                          }
2283
                        had_char_class = true;
2284
#endif  /* libc || wctype.h */
2285
                      }
2286
                    else
2287
                      {
2288
                        c1++;
2289
                        while (c1--)
2290
                          PATUNFETCH;
2291
                        SET_LIST_BIT ('[');
2292
                        SET_LIST_BIT (':');
2293
                        had_char_class = false;
2294
                      }
2295
                  }
2296
                else
2297
                  {
2298
                    had_char_class = false;
2299
                    SET_LIST_BIT (c);
2300
                  }
2301
              }
2302
 
2303
            /* Discard any (non)matching list bytes that are all 0 at the
2304
               end of the map.  Decrease the map-length byte too.  */
2305
            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2306
              b[-1]--;
2307
            b += b[-1];
2308
          }
2309
          break;
2310
 
2311
 
2312
        case '(':
2313
          if (syntax & RE_NO_BK_PARENS)
2314
            goto handle_open;
2315
          else
2316
            goto normal_char;
2317
 
2318
 
2319
        case ')':
2320
          if (syntax & RE_NO_BK_PARENS)
2321
            goto handle_close;
2322
          else
2323
            goto normal_char;
2324
 
2325
 
2326
        case '\n':
2327
          if (syntax & RE_NEWLINE_ALT)
2328
            goto handle_alt;
2329
          else
2330
            goto normal_char;
2331
 
2332
 
2333
        case '|':
2334
          if (syntax & RE_NO_BK_VBAR)
2335
            goto handle_alt;
2336
          else
2337
            goto normal_char;
2338
 
2339
 
2340
        case '{':
2341
           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2342
             goto handle_interval;
2343
           else
2344
             goto normal_char;
2345
 
2346
 
2347
        case '\\':
2348
          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2349
 
2350
          /* Do not translate the character after the \, so that we can
2351
             distinguish, e.g., \B from \b, even if we normally would
2352
             translate, e.g., B to b.  */
2353
          PATFETCH_RAW (c);
2354
 
2355
          switch (c)
2356
            {
2357
            case '(':
2358
              if (syntax & RE_NO_BK_PARENS)
2359
                goto normal_backslash;
2360
 
2361
            handle_open:
2362
              bufp->re_nsub++;
2363
              regnum++;
2364
 
2365
              if (COMPILE_STACK_FULL)
2366
                {
2367
                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2368
                            compile_stack_elt_t);
2369
                  if (compile_stack.stack == NULL) return REG_ESPACE;
2370
 
2371
                  compile_stack.size <<= 1;
2372
                }
2373
 
2374
              /* These are the values to restore when we hit end of this
2375
                 group.  They are all relative offsets, so that if the
2376
                 whole pattern moves because of realloc, they will still
2377
                 be valid.  */
2378
              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2379
              COMPILE_STACK_TOP.fixup_alt_jump
2380
                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2381
              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2382
              COMPILE_STACK_TOP.regnum = regnum;
2383
 
2384
              /* We will eventually replace the 0 with the number of
2385
                 groups inner to this one.  But do not push a
2386
                 start_memory for groups beyond the last one we can
2387
                 represent in the compiled pattern.  */
2388
              if (regnum <= MAX_REGNUM)
2389
                {
2390
                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2391
                  BUF_PUSH_3 (start_memory, regnum, 0);
2392
                }
2393
 
2394
              compile_stack.avail++;
2395
 
2396
              fixup_alt_jump = 0;
2397
              laststart = 0;
2398
              begalt = b;
2399
              /* If we've reached MAX_REGNUM groups, then this open
2400
                 won't actually generate any code, so we'll have to
2401
                 clear pending_exact explicitly.  */
2402
              pending_exact = 0;
2403
              break;
2404
 
2405
 
2406
            case ')':
2407
              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2408
 
2409
              if (COMPILE_STACK_EMPTY)
2410
                {
2411
                  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2412
                    goto normal_backslash;
2413
                  else
2414
                    FREE_STACK_RETURN (REG_ERPAREN);
2415
                }
2416
 
2417
            handle_close:
2418
              if (fixup_alt_jump)
2419
                { /* Push a dummy failure point at the end of the
2420
                     alternative for a possible future
2421
                     `pop_failure_jump' to pop.  See comments at
2422
                     `push_dummy_failure' in `re_match_2'.  */
2423
                  BUF_PUSH (push_dummy_failure);
2424
 
2425
                  /* We allocated space for this jump when we assigned
2426
                     to `fixup_alt_jump', in the `handle_alt' case below.  */
2427
                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2428
                }
2429
 
2430
              /* See similar code for backslashed left paren above.  */
2431
              if (COMPILE_STACK_EMPTY)
2432
                {
2433
                  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2434
                    goto normal_char;
2435
                  else
2436
                    FREE_STACK_RETURN (REG_ERPAREN);
2437
                }
2438
 
2439
              /* Since we just checked for an empty stack above, this
2440
                 ``can't happen''.  */
2441
              assert (compile_stack.avail != 0);
2442
              {
2443
                /* We don't just want to restore into `regnum', because
2444
                   later groups should continue to be numbered higher,
2445
                   as in `(ab)c(de)' -- the second group is #2.  */
2446
                regnum_t this_group_regnum;
2447
 
2448
                compile_stack.avail--;
2449
                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2450
                fixup_alt_jump
2451
                  = COMPILE_STACK_TOP.fixup_alt_jump
2452
                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2453
                    : 0;
2454
                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2455
                this_group_regnum = COMPILE_STACK_TOP.regnum;
2456
                /* If we've reached MAX_REGNUM groups, then this open
2457
                   won't actually generate any code, so we'll have to
2458
                   clear pending_exact explicitly.  */
2459
                pending_exact = 0;
2460
 
2461
                /* We're at the end of the group, so now we know how many
2462
                   groups were inside this one.  */
2463
                if (this_group_regnum <= MAX_REGNUM)
2464
                  {
2465
                    unsigned char *inner_group_loc
2466
                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2467
 
2468
                    *inner_group_loc = regnum - this_group_regnum;
2469
                    BUF_PUSH_3 (stop_memory, this_group_regnum,
2470
                                regnum - this_group_regnum);
2471
                  }
2472
              }
2473
              break;
2474
 
2475
 
2476
            case '|':                                   /* `\|'.  */
2477
              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2478
                goto normal_backslash;
2479
            handle_alt:
2480
              if (syntax & RE_LIMITED_OPS)
2481
                goto normal_char;
2482
 
2483
              /* Insert before the previous alternative a jump which
2484
                 jumps to this alternative if the former fails.  */
2485
              GET_BUFFER_SPACE (3);
2486
              INSERT_JUMP (on_failure_jump, begalt, b + 6);
2487
              pending_exact = 0;
2488
              b += 3;
2489
 
2490
              /* The alternative before this one has a jump after it
2491
                 which gets executed if it gets matched.  Adjust that
2492
                 jump so it will jump to this alternative's analogous
2493
                 jump (put in below, which in turn will jump to the next
2494
                 (if any) alternative's such jump, etc.).  The last such
2495
                 jump jumps to the correct final destination.  A picture:
2496
                          _____ _____
2497
                          |   | |   |
2498
                          |   v |   v
2499
                         a | b   | c
2500
 
2501
                 If we are at `b', then fixup_alt_jump right now points to a
2502
                 three-byte space after `a'.  We'll put in the jump, set
2503
                 fixup_alt_jump to right after `b', and leave behind three
2504
                 bytes which we'll fill in when we get to after `c'.  */
2505
 
2506
              if (fixup_alt_jump)
2507
                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2508
 
2509
              /* Mark and leave space for a jump after this alternative,
2510
                 to be filled in later either by next alternative or
2511
                 when know we're at the end of a series of alternatives.  */
2512
              fixup_alt_jump = b;
2513
              GET_BUFFER_SPACE (3);
2514
              b += 3;
2515
 
2516
              laststart = 0;
2517
              begalt = b;
2518
              break;
2519
 
2520
 
2521
            case '{':
2522
              /* If \{ is a literal.  */
2523
              if (!(syntax & RE_INTERVALS)
2524
                     /* If we're at `\{' and it's not the open-interval
2525
                        operator.  */
2526
                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2527
                  || (p - 2 == pattern  &&  p == pend))
2528
                goto normal_backslash;
2529
 
2530
            handle_interval:
2531
              {
2532
                /* If got here, then the syntax allows intervals.  */
2533
 
2534
                /* At least (most) this many matches must be made.  */
2535
                int lower_bound = -1, upper_bound = -1;
2536
 
2537
                beg_interval = p - 1;
2538
 
2539
                if (p == pend)
2540
                  {
2541
                    if (syntax & RE_NO_BK_BRACES)
2542
                      goto unfetch_interval;
2543
                    else
2544
                      FREE_STACK_RETURN (REG_EBRACE);
2545
                  }
2546
 
2547
                GET_UNSIGNED_NUMBER (lower_bound);
2548
 
2549
                if (c == ',')
2550
                  {
2551
                    GET_UNSIGNED_NUMBER (upper_bound);
2552
                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2553
                  }
2554
                else
2555
                  /* Interval such as `{1}' => match exactly once. */
2556
                  upper_bound = lower_bound;
2557
 
2558
                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2559
                    || lower_bound > upper_bound)
2560
                  {
2561
                    if (syntax & RE_NO_BK_BRACES)
2562
                      goto unfetch_interval;
2563
                    else
2564
                      FREE_STACK_RETURN (REG_BADBR);
2565
                  }
2566
 
2567
                if (!(syntax & RE_NO_BK_BRACES))
2568
                  {
2569
                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2570
 
2571
                    PATFETCH (c);
2572
                  }
2573
 
2574
                if (c != '}')
2575
                  {
2576
                    if (syntax & RE_NO_BK_BRACES)
2577
                      goto unfetch_interval;
2578
                    else
2579
                      FREE_STACK_RETURN (REG_BADBR);
2580
                  }
2581
 
2582
                /* We just parsed a valid interval.  */
2583
 
2584
                /* If it's invalid to have no preceding re.  */
2585
                if (!laststart)
2586
                  {
2587
                    if (syntax & RE_CONTEXT_INVALID_OPS)
2588
                      FREE_STACK_RETURN (REG_BADRPT);
2589
                    else if (syntax & RE_CONTEXT_INDEP_OPS)
2590
                      laststart = b;
2591
                    else
2592
                      goto unfetch_interval;
2593
                  }
2594
 
2595
                /* If the upper bound is zero, don't want to succeed at
2596
                   all; jump from `laststart' to `b + 3', which will be
2597
                   the end of the buffer after we insert the jump.  */
2598
                 if (upper_bound == 0)
2599
                   {
2600
                     GET_BUFFER_SPACE (3);
2601
                     INSERT_JUMP (jump, laststart, b + 3);
2602
                     b += 3;
2603
                   }
2604
 
2605
                 /* Otherwise, we have a nontrivial interval.  When
2606
                    we're all done, the pattern will look like:
2607
                      set_number_at <jump count> <upper bound>
2608
                      set_number_at <succeed_n count> <lower bound>
2609
                      succeed_n <after jump addr> <succeed_n count>
2610
                      <body of loop>
2611
                      jump_n <succeed_n addr> <jump count>
2612
                    (The upper bound and `jump_n' are omitted if
2613
                    `upper_bound' is 1, though.)  */
2614
                 else
2615
                   { /* If the upper bound is > 1, we need to insert
2616
                        more at the end of the loop.  */
2617
                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2618
 
2619
                     GET_BUFFER_SPACE (nbytes);
2620
 
2621
                     /* Initialize lower bound of the `succeed_n', even
2622
                        though it will be set during matching by its
2623
                        attendant `set_number_at' (inserted next),
2624
                        because `re_compile_fastmap' needs to know.
2625
                        Jump to the `jump_n' we might insert below.  */
2626
                     INSERT_JUMP2 (succeed_n, laststart,
2627
                                   b + 5 + (upper_bound > 1) * 5,
2628
                                   lower_bound);
2629
                     b += 5;
2630
 
2631
                     /* Code to initialize the lower bound.  Insert
2632
                        before the `succeed_n'.  The `5' is the last two
2633
                        bytes of this `set_number_at', plus 3 bytes of
2634
                        the following `succeed_n'.  */
2635
                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2636
                     b += 5;
2637
 
2638
                     if (upper_bound > 1)
2639
                       { /* More than one repetition is allowed, so
2640
                            append a backward jump to the `succeed_n'
2641
                            that starts this interval.
2642
 
2643
                            When we've reached this during matching,
2644
                            we'll have matched the interval once, so
2645
                            jump back only `upper_bound - 1' times.  */
2646
                         STORE_JUMP2 (jump_n, b, laststart + 5,
2647
                                      upper_bound - 1);
2648
                         b += 5;
2649
 
2650
                         /* The location we want to set is the second
2651
                            parameter of the `jump_n'; that is `b-2' as
2652
                            an absolute address.  `laststart' will be
2653
                            the `set_number_at' we're about to insert;
2654
                            `laststart+3' the number to set, the source
2655
                            for the relative address.  But we are
2656
                            inserting into the middle of the pattern --
2657
                            so everything is getting moved up by 5.
2658
                            Conclusion: (b - 2) - (laststart + 3) + 5,
2659
                            i.e., b - laststart.
2660
 
2661
                            We insert this at the beginning of the loop
2662
                            so that if we fail during matching, we'll
2663
                            reinitialize the bounds.  */
2664
                         insert_op2 (set_number_at, laststart, b - laststart,
2665
                                     upper_bound - 1, b);
2666
                         b += 5;
2667
                       }
2668
                   }
2669
                pending_exact = 0;
2670
                beg_interval = NULL;
2671
              }
2672
              break;
2673
 
2674
            unfetch_interval:
2675
              /* If an invalid interval, match the characters as literals.  */
2676
               assert (beg_interval);
2677
               p = beg_interval;
2678
               beg_interval = NULL;
2679
 
2680
               /* normal_char and normal_backslash need `c'.  */
2681
               PATFETCH (c);
2682
 
2683
               if (!(syntax & RE_NO_BK_BRACES))
2684
                 {
2685
                   if (p > pattern  &&  p[-1] == '\\')
2686
                     goto normal_backslash;
2687
                 }
2688
               goto normal_char;
2689
 
2690
#ifdef emacs
2691
            /* There is no way to specify the before_dot and after_dot
2692
               operators.  rms says this is ok.  --karl  */
2693
            case '=':
2694
              BUF_PUSH (at_dot);
2695
              break;
2696
 
2697
            case 's':
2698
              laststart = b;
2699
              PATFETCH (c);
2700
              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2701
              break;
2702
 
2703
            case 'S':
2704
              laststart = b;
2705
              PATFETCH (c);
2706
              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2707
              break;
2708
#endif /* emacs */
2709
 
2710
 
2711
            case 'w':
2712
              if (syntax & RE_NO_GNU_OPS)
2713
                goto normal_char;
2714
              laststart = b;
2715
              BUF_PUSH (wordchar);
2716
              break;
2717
 
2718
 
2719
            case 'W':
2720
              if (syntax & RE_NO_GNU_OPS)
2721
                goto normal_char;
2722
              laststart = b;
2723
              BUF_PUSH (notwordchar);
2724
              break;
2725
 
2726
 
2727
            case '<':
2728
              if (syntax & RE_NO_GNU_OPS)
2729
                goto normal_char;
2730
              BUF_PUSH (wordbeg);
2731
              break;
2732
 
2733
            case '>':
2734
              if (syntax & RE_NO_GNU_OPS)
2735
                goto normal_char;
2736
              BUF_PUSH (wordend);
2737
              break;
2738
 
2739
            case 'b':
2740
              if (syntax & RE_NO_GNU_OPS)
2741
                goto normal_char;
2742
              BUF_PUSH (wordbound);
2743
              break;
2744
 
2745
            case 'B':
2746
              if (syntax & RE_NO_GNU_OPS)
2747
                goto normal_char;
2748
              BUF_PUSH (notwordbound);
2749
              break;
2750
 
2751
            case '`':
2752
              if (syntax & RE_NO_GNU_OPS)
2753
                goto normal_char;
2754
              BUF_PUSH (begbuf);
2755
              break;
2756
 
2757
            case '\'':
2758
              if (syntax & RE_NO_GNU_OPS)
2759
                goto normal_char;
2760
              BUF_PUSH (endbuf);
2761
              break;
2762
 
2763
            case '1': case '2': case '3': case '4': case '5':
2764
            case '6': case '7': case '8': case '9':
2765
              if (syntax & RE_NO_BK_REFS)
2766
                goto normal_char;
2767
 
2768
              c1 = c - '0';
2769
 
2770
              if (c1 > regnum)
2771
                FREE_STACK_RETURN (REG_ESUBREG);
2772
 
2773
              /* Can't back reference to a subexpression if inside of it.  */
2774
              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2775
                goto normal_char;
2776
 
2777
              laststart = b;
2778
              BUF_PUSH_2 (duplicate, c1);
2779
              break;
2780
 
2781
 
2782
            case '+':
2783
            case '?':
2784
              if (syntax & RE_BK_PLUS_QM)
2785
                goto handle_plus;
2786
              else
2787
                goto normal_backslash;
2788
 
2789
            default:
2790
            normal_backslash:
2791
              /* You might think it would be useful for \ to mean
2792
                 not to translate; but if we don't translate it
2793
                 it will never match anything.  */
2794
              c = TRANSLATE (c);
2795
              goto normal_char;
2796
            }
2797
          break;
2798
 
2799
 
2800
        default:
2801
        /* Expects the character in `c'.  */
2802
        normal_char:
2803
              /* If no exactn currently being built.  */
2804
          if (!pending_exact
2805
 
2806
              /* If last exactn not at current position.  */
2807
              || pending_exact + *pending_exact + 1 != b
2808
 
2809
              /* We have only one byte following the exactn for the count.  */
2810
              || *pending_exact == (1 << BYTEWIDTH) - 1
2811
 
2812
              /* If followed by a repetition operator.  */
2813
              || *p == '*' || *p == '^'
2814
              || ((syntax & RE_BK_PLUS_QM)
2815
                  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2816
                  : (*p == '+' || *p == '?'))
2817
              || ((syntax & RE_INTERVALS)
2818
                  && ((syntax & RE_NO_BK_BRACES)
2819
                      ? *p == '{'
2820
                      : (p[0] == '\\' && p[1] == '{'))))
2821
            {
2822
              /* Start building a new exactn.  */
2823
 
2824
              laststart = b;
2825
 
2826
              BUF_PUSH_2 (exactn, 0);
2827
              pending_exact = b - 1;
2828
            }
2829
 
2830
          BUF_PUSH (c);
2831
          (*pending_exact)++;
2832
          break;
2833
        } /* switch (c) */
2834
    } /* while p != pend */
2835
 
2836
 
2837
  /* Through the pattern now.  */
2838
 
2839
  if (fixup_alt_jump)
2840
    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2841
 
2842
  if (!COMPILE_STACK_EMPTY)
2843
    FREE_STACK_RETURN (REG_EPAREN);
2844
 
2845
  /* If we don't want backtracking, force success
2846
     the first time we reach the end of the compiled pattern.  */
2847
  if (syntax & RE_NO_POSIX_BACKTRACKING)
2848
    BUF_PUSH (succeed);
2849
 
2850
  free (compile_stack.stack);
2851
 
2852
  /* We have succeeded; set the length of the buffer.  */
2853
  bufp->used = b - bufp->buffer;
2854
 
2855
#ifdef DEBUG
2856
  if (debug)
2857
    {
2858
      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2859
      print_compiled_pattern (bufp);
2860
    }
2861
#endif /* DEBUG */
2862
 
2863
#ifndef MATCH_MAY_ALLOCATE
2864
  /* Initialize the failure stack to the largest possible stack.  This
2865
     isn't necessary unless we're trying to avoid calling alloca in
2866
     the search and match routines.  */
2867
  {
2868
    int num_regs = bufp->re_nsub + 1;
2869
 
2870
    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2871
       is strictly greater than re_max_failures, the largest possible stack
2872
       is 2 * re_max_failures failure points.  */
2873
    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2874
      {
2875
        fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2876
 
2877
# ifdef emacs
2878
        if (! fail_stack.stack)
2879
          fail_stack.stack
2880
            = (fail_stack_elt_t *) xmalloc (fail_stack.size
2881
                                            * sizeof (fail_stack_elt_t));
2882
        else
2883
          fail_stack.stack
2884
            = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2885
                                             (fail_stack.size
2886
                                              * sizeof (fail_stack_elt_t)));
2887
# else /* not emacs */
2888
        if (! fail_stack.stack)
2889
          fail_stack.stack
2890
            = (fail_stack_elt_t *) malloc (fail_stack.size
2891
                                           * sizeof (fail_stack_elt_t));
2892
        else
2893
          fail_stack.stack
2894
            = (fail_stack_elt_t *) realloc (fail_stack.stack,
2895
                                            (fail_stack.size
2896
                                             * sizeof (fail_stack_elt_t)));
2897
# endif /* not emacs */
2898
      }
2899
 
2900
    regex_grow_registers (num_regs);
2901
  }
2902
#endif /* not MATCH_MAY_ALLOCATE */
2903
 
2904
  return REG_NOERROR;
2905
} /* regex_compile */
2906
 
2907
/* Subroutines for `regex_compile'.  */
2908
 
2909
/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2910
 
2911
static void
2912
store_op1 (op, loc, arg)
2913
    re_opcode_t op;
2914
    unsigned char *loc;
2915
    int arg;
2916
{
2917
  *loc = (unsigned char) op;
2918
  STORE_NUMBER (loc + 1, arg);
2919
}
2920
 
2921
 
2922
/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2923
 
2924
static void
2925
store_op2 (op, loc, arg1, arg2)
2926
    re_opcode_t op;
2927
    unsigned char *loc;
2928
    int arg1, arg2;
2929
{
2930
  *loc = (unsigned char) op;
2931
  STORE_NUMBER (loc + 1, arg1);
2932
  STORE_NUMBER (loc + 3, arg2);
2933
}
2934
 
2935
 
2936
/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2937
   for OP followed by two-byte integer parameter ARG.  */
2938
 
2939
static void
2940
insert_op1 (op, loc, arg, end)
2941
    re_opcode_t op;
2942
    unsigned char *loc;
2943
    int arg;
2944
    unsigned char *end;
2945
{
2946
  register unsigned char *pfrom = end;
2947
  register unsigned char *pto = end + 3;
2948
 
2949
  while (pfrom != loc)
2950
    *--pto = *--pfrom;
2951
 
2952
  store_op1 (op, loc, arg);
2953
}
2954
 
2955
 
2956
/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2957
 
2958
static void
2959
insert_op2 (op, loc, arg1, arg2, end)
2960
    re_opcode_t op;
2961
    unsigned char *loc;
2962
    int arg1, arg2;
2963
    unsigned char *end;
2964
{
2965
  register unsigned char *pfrom = end;
2966
  register unsigned char *pto = end + 5;
2967
 
2968
  while (pfrom != loc)
2969
    *--pto = *--pfrom;
2970
 
2971
  store_op2 (op, loc, arg1, arg2);
2972
}
2973
 
2974
 
2975
/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2976
   after an alternative or a begin-subexpression.  We assume there is at
2977
   least one character before the ^.  */
2978
 
2979
static boolean
2980
at_begline_loc_p (pattern, p, syntax)
2981
    const char *pattern, *p;
2982
    reg_syntax_t syntax;
2983
{
2984
  const char *prev = p - 2;
2985
  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2986
 
2987
  return
2988
       /* After a subexpression?  */
2989
       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2990
       /* After an alternative?  */
2991
    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2992
}
2993
 
2994
 
2995
/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2996
   at least one character after the $, i.e., `P < PEND'.  */
2997
 
2998
static boolean
2999
at_endline_loc_p (p, pend, syntax)
3000
    const char *p, *pend;
3001
    reg_syntax_t syntax;
3002
{
3003
  const char *next = p;
3004
  boolean next_backslash = *next == '\\';
3005
  const char *next_next = p + 1 < pend ? p + 1 : 0;
3006
 
3007
  return
3008
       /* Before a subexpression?  */
3009
       (syntax & RE_NO_BK_PARENS ? *next == ')'
3010
        : next_backslash && next_next && *next_next == ')')
3011
       /* Before an alternative?  */
3012
    || (syntax & RE_NO_BK_VBAR ? *next == '|'
3013
        : next_backslash && next_next && *next_next == '|');
3014
}
3015
 
3016
 
3017
/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3018
   false if it's not.  */
3019
 
3020
static boolean
3021
group_in_compile_stack (compile_stack, regnum)
3022
    compile_stack_type compile_stack;
3023
    regnum_t regnum;
3024
{
3025
  int this_element;
3026
 
3027
  for (this_element = compile_stack.avail - 1;
3028
       this_element >= 0;
3029
       this_element--)
3030
    if (compile_stack.stack[this_element].regnum == regnum)
3031
      return true;
3032
 
3033
  return false;
3034
}
3035
 
3036
 
3037
/* Read the ending character of a range (in a bracket expression) from the
3038
   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3039
   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3040
   Then we set the translation of all bits between the starting and
3041
   ending characters (inclusive) in the compiled pattern B.
3042
 
3043
   Return an error code.
3044
 
3045
   We use these short variable names so we can use the same macros as
3046
   `regex_compile' itself.  */
3047
 
3048
static reg_errcode_t
3049
compile_range (p_ptr, pend, translate, syntax, b)
3050
    const char **p_ptr, *pend;
3051
    RE_TRANSLATE_TYPE translate;
3052
    reg_syntax_t syntax;
3053
    unsigned char *b;
3054
{
3055
  unsigned this_char;
3056
 
3057
  const char *p = *p_ptr;
3058
  unsigned int range_start, range_end;
3059
 
3060
  if (p == pend)
3061
    return REG_ERANGE;
3062
 
3063
  /* Even though the pattern is a signed `char *', we need to fetch
3064
     with unsigned char *'s; if the high bit of the pattern character
3065
     is set, the range endpoints will be negative if we fetch using a
3066
     signed char *.
3067
 
3068
     We also want to fetch the endpoints without translating them; the
3069
     appropriate translation is done in the bit-setting loop below.  */
3070
  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3071
  range_start = ((const unsigned char *) p)[-2];
3072
  range_end   = ((const unsigned char *) p)[0];
3073
 
3074
  /* Have to increment the pointer into the pattern string, so the
3075
     caller isn't still at the ending character.  */
3076
  (*p_ptr)++;
3077
 
3078
  /* If the start is after the end, the range is empty.  */
3079
  if (range_start > range_end)
3080
    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3081
 
3082
  /* Here we see why `this_char' has to be larger than an `unsigned
3083
     char' -- the range is inclusive, so if `range_end' == 0xff
3084
     (assuming 8-bit characters), we would otherwise go into an infinite
3085
     loop, since all characters <= 0xff.  */
3086
  for (this_char = range_start; this_char <= range_end; this_char++)
3087
    {
3088
      SET_LIST_BIT (TRANSLATE (this_char));
3089
    }
3090
 
3091
  return REG_NOERROR;
3092
}
3093
 
3094
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3095
   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3096
   characters can start a string that matches the pattern.  This fastmap
3097
   is used by re_search to skip quickly over impossible starting points.
3098
 
3099
   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3100
   area as BUFP->fastmap.
3101
 
3102
   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3103
   the pattern buffer.
3104
 
3105
   Returns 0 if we succeed, -2 if an internal error.   */
3106
 
3107
int
3108
re_compile_fastmap (bufp)
3109
     struct re_pattern_buffer *bufp;
3110
{
3111
  int j, k;
3112
#ifdef MATCH_MAY_ALLOCATE
3113
  fail_stack_type fail_stack;
3114
#endif
3115
#ifndef REGEX_MALLOC
3116
  char *destination;
3117
#endif
3118
 
3119
  register char *fastmap = bufp->fastmap;
3120
  unsigned char *pattern = bufp->buffer;
3121
  unsigned char *p = pattern;
3122
  register unsigned char *pend = pattern + bufp->used;
3123
 
3124
#ifdef REL_ALLOC
3125
  /* This holds the pointer to the failure stack, when
3126
     it is allocated relocatably.  */
3127
  fail_stack_elt_t *failure_stack_ptr;
3128
#endif
3129
 
3130
  /* Assume that each path through the pattern can be null until
3131
     proven otherwise.  We set this false at the bottom of switch
3132
     statement, to which we get only if a particular path doesn't
3133
     match the empty string.  */
3134
  boolean path_can_be_null = true;
3135
 
3136
  /* We aren't doing a `succeed_n' to begin with.  */
3137
  boolean succeed_n_p = false;
3138
 
3139
  assert (fastmap != NULL && p != NULL);
3140
 
3141
  INIT_FAIL_STACK ();
3142
  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3143
  bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
3144
  bufp->can_be_null = 0;
3145
 
3146
  while (1)
3147
    {
3148
      if (p == pend || *p == succeed)
3149
        {
3150
          /* We have reached the (effective) end of pattern.  */
3151
          if (!FAIL_STACK_EMPTY ())
3152
            {
3153
              bufp->can_be_null |= path_can_be_null;
3154
 
3155
              /* Reset for next path.  */
3156
              path_can_be_null = true;
3157
 
3158
              p = fail_stack.stack[--fail_stack.avail].pointer;
3159
 
3160
              continue;
3161
            }
3162
          else
3163
            break;
3164
        }
3165
 
3166
      /* We should never be about to go beyond the end of the pattern.  */
3167
      assert (p < pend);
3168
 
3169
      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3170
        {
3171
 
3172
        /* I guess the idea here is to simply not bother with a fastmap
3173
           if a backreference is used, since it's too hard to figure out
3174
           the fastmap for the corresponding group.  Setting
3175
           `can_be_null' stops `re_search_2' from using the fastmap, so
3176
           that is all we do.  */
3177
        case duplicate:
3178
          bufp->can_be_null = 1;
3179
          goto done;
3180
 
3181
 
3182
      /* Following are the cases which match a character.  These end
3183
         with `break'.  */
3184
 
3185
        case exactn:
3186
          fastmap[p[1]] = 1;
3187
          break;
3188
 
3189
 
3190
        case charset:
3191
          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3192
            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3193
              fastmap[j] = 1;
3194
          break;
3195
 
3196
 
3197
        case charset_not:
3198
          /* Chars beyond end of map must be allowed.  */
3199
          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3200
            fastmap[j] = 1;
3201
 
3202
          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3203
            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3204
              fastmap[j] = 1;
3205
          break;
3206
 
3207
 
3208
        case wordchar:
3209
          for (j = 0; j < (1 << BYTEWIDTH); j++)
3210
            if (SYNTAX (j) == Sword)
3211
              fastmap[j] = 1;
3212
          break;
3213
 
3214
 
3215
        case notwordchar:
3216
          for (j = 0; j < (1 << BYTEWIDTH); j++)
3217
            if (SYNTAX (j) != Sword)
3218
              fastmap[j] = 1;
3219
          break;
3220
 
3221
 
3222
        case anychar:
3223
          {
3224
            int fastmap_newline = fastmap['\n'];
3225
 
3226
            /* `.' matches anything ...  */
3227
            for (j = 0; j < (1 << BYTEWIDTH); j++)
3228
              fastmap[j] = 1;
3229
 
3230
            /* ... except perhaps newline.  */
3231
            if (!(bufp->syntax & RE_DOT_NEWLINE))
3232
              fastmap['\n'] = fastmap_newline;
3233
 
3234
            /* Return if we have already set `can_be_null'; if we have,
3235
               then the fastmap is irrelevant.  Something's wrong here.  */
3236
            else if (bufp->can_be_null)
3237
              goto done;
3238
 
3239
            /* Otherwise, have to check alternative paths.  */
3240
            break;
3241
          }
3242
 
3243
#ifdef emacs
3244
        case syntaxspec:
3245
          k = *p++;
3246
          for (j = 0; j < (1 << BYTEWIDTH); j++)
3247
            if (SYNTAX (j) == (enum syntaxcode) k)
3248
              fastmap[j] = 1;
3249
          break;
3250
 
3251
 
3252
        case notsyntaxspec:
3253
          k = *p++;
3254
          for (j = 0; j < (1 << BYTEWIDTH); j++)
3255
            if (SYNTAX (j) != (enum syntaxcode) k)
3256
              fastmap[j] = 1;
3257
          break;
3258
 
3259
 
3260
      /* All cases after this match the empty string.  These end with
3261
         `continue'.  */
3262
 
3263
 
3264
        case before_dot:
3265
        case at_dot:
3266
        case after_dot:
3267
          continue;
3268
#endif /* emacs */
3269
 
3270
 
3271
        case no_op:
3272
        case begline:
3273
        case endline:
3274
        case begbuf:
3275
        case endbuf:
3276
        case wordbound:
3277
        case notwordbound:
3278
        case wordbeg:
3279
        case wordend:
3280
        case push_dummy_failure:
3281
          continue;
3282
 
3283
 
3284
        case jump_n:
3285
        case pop_failure_jump:
3286
        case maybe_pop_jump:
3287
        case jump:
3288
        case jump_past_alt:
3289
        case dummy_failure_jump:
3290
          EXTRACT_NUMBER_AND_INCR (j, p);
3291
          p += j;
3292
          if (j > 0)
3293
            continue;
3294
 
3295
          /* Jump backward implies we just went through the body of a
3296
             loop and matched nothing.  Opcode jumped to should be
3297
             `on_failure_jump' or `succeed_n'.  Just treat it like an
3298
             ordinary jump.  For a * loop, it has pushed its failure
3299
             point already; if so, discard that as redundant.  */
3300
          if ((re_opcode_t) *p != on_failure_jump
3301
              && (re_opcode_t) *p != succeed_n)
3302
            continue;
3303
 
3304
          p++;
3305
          EXTRACT_NUMBER_AND_INCR (j, p);
3306
          p += j;
3307
 
3308
          /* If what's on the stack is where we are now, pop it.  */
3309
          if (!FAIL_STACK_EMPTY ()
3310
              && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3311
            fail_stack.avail--;
3312
 
3313
          continue;
3314
 
3315
 
3316
        case on_failure_jump:
3317
        case on_failure_keep_string_jump:
3318
        handle_on_failure_jump:
3319
          EXTRACT_NUMBER_AND_INCR (j, p);
3320
 
3321
          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3322
             end of the pattern.  We don't want to push such a point,
3323
             since when we restore it above, entering the switch will
3324
             increment `p' past the end of the pattern.  We don't need
3325
             to push such a point since we obviously won't find any more
3326
             fastmap entries beyond `pend'.  Such a pattern can match
3327
             the null string, though.  */
3328
          if (p + j < pend)
3329
            {
3330
              if (!PUSH_PATTERN_OP (p + j, fail_stack))
3331
                {
3332
                  RESET_FAIL_STACK ();
3333
                  return -2;
3334
                }
3335
            }
3336
          else
3337
            bufp->can_be_null = 1;
3338
 
3339
          if (succeed_n_p)
3340
            {
3341
              EXTRACT_NUMBER_AND_INCR (k, p);   /* Skip the n.  */
3342
              succeed_n_p = false;
3343
            }
3344
 
3345
          continue;
3346
 
3347
 
3348
        case succeed_n:
3349
          /* Get to the number of times to succeed.  */
3350
          p += 2;
3351
 
3352
          /* Increment p past the n for when k != 0.  */
3353
          EXTRACT_NUMBER_AND_INCR (k, p);
3354
          if (k == 0)
3355
            {
3356
              p -= 4;
3357
              succeed_n_p = true;  /* Spaghetti code alert.  */
3358
              goto handle_on_failure_jump;
3359
            }
3360
          continue;
3361
 
3362
 
3363
        case set_number_at:
3364
          p += 4;
3365
          continue;
3366
 
3367
 
3368
        case start_memory:
3369
        case stop_memory:
3370
          p += 2;
3371
          continue;
3372
 
3373
 
3374
        default:
3375
          abort (); /* We have listed all the cases.  */
3376
        } /* switch *p++ */
3377
 
3378
      /* Getting here means we have found the possible starting
3379
         characters for one path of the pattern -- and that the empty
3380
         string does not match.  We need not follow this path further.
3381
         Instead, look at the next alternative (remembered on the
3382
         stack), or quit if no more.  The test at the top of the loop
3383
         does these things.  */
3384
      path_can_be_null = false;
3385
      p = pend;
3386
    } /* while p */
3387
 
3388
  /* Set `can_be_null' for the last path (also the first path, if the
3389
     pattern is empty).  */
3390
  bufp->can_be_null |= path_can_be_null;
3391
 
3392
 done:
3393
  RESET_FAIL_STACK ();
3394
  return 0;
3395
} /* re_compile_fastmap */
3396
#ifdef _LIBC
3397
weak_alias (__re_compile_fastmap, re_compile_fastmap)
3398
#endif
3399
 
3400
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3401
   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3402
   this memory for recording register information.  STARTS and ENDS
3403
   must be allocated using the malloc library routine, and must each
3404
   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3405
 
3406
   If NUM_REGS == 0, then subsequent matches should allocate their own
3407
   register data.
3408
 
3409
   Unless this function is called, the first search or match using
3410
   PATTERN_BUFFER will allocate its own register data, without
3411
   freeing the old data.  */
3412
 
3413
void
3414
re_set_registers (bufp, regs, num_regs, starts, ends)
3415
    struct re_pattern_buffer *bufp;
3416
    struct re_registers *regs;
3417
    unsigned num_regs;
3418
    regoff_t *starts, *ends;
3419
{
3420
  if (num_regs)
3421
    {
3422
      bufp->regs_allocated = REGS_REALLOCATE;
3423
      regs->num_regs = num_regs;
3424
      regs->start = starts;
3425
      regs->end = ends;
3426
    }
3427
  else
3428
    {
3429
      bufp->regs_allocated = REGS_UNALLOCATED;
3430
      regs->num_regs = 0;
3431
      regs->start = regs->end = (regoff_t *) 0;
3432
    }
3433
}
3434
#ifdef _LIBC
3435
weak_alias (__re_set_registers, re_set_registers)
3436
#endif
3437
 
3438
/* Searching routines.  */
3439
 
3440
/* Like re_search_2, below, but only one string is specified, and
3441
   doesn't let you say where to stop matching. */
3442
 
3443
int
3444
re_search (bufp, string, size, startpos, range, regs)
3445
     struct re_pattern_buffer *bufp;
3446
     const char *string;
3447
     int size, startpos, range;
3448
     struct re_registers *regs;
3449
{
3450
  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3451
                      regs, size);
3452
}
3453
#ifdef _LIBC
3454
weak_alias (__re_search, re_search)
3455
#endif
3456
 
3457
 
3458
/* Using the compiled pattern in BUFP->buffer, first tries to match the
3459
   virtual concatenation of STRING1 and STRING2, starting first at index
3460
   STARTPOS, then at STARTPOS + 1, and so on.
3461
 
3462
   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3463
 
3464
   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3465
   only at STARTPOS; in general, the last start tried is STARTPOS +
3466
   RANGE.
3467
 
3468
   In REGS, return the indices of the virtual concatenation of STRING1
3469
   and STRING2 that matched the entire BUFP->buffer and its contained
3470
   subexpressions.
3471
 
3472
   Do not consider matching one past the index STOP in the virtual
3473
   concatenation of STRING1 and STRING2.
3474
 
3475
   We return either the position in the strings at which the match was
3476
   found, -1 if no match, or -2 if error (such as failure
3477
   stack overflow).  */
3478
 
3479
int
3480
re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3481
     struct re_pattern_buffer *bufp;
3482
     const char *string1, *string2;
3483
     int size1, size2;
3484
     int startpos;
3485
     int range;
3486
     struct re_registers *regs;
3487
     int stop;
3488
{
3489
  int val;
3490
  register char *fastmap = bufp->fastmap;
3491
  register RE_TRANSLATE_TYPE translate = bufp->translate;
3492
  int total_size = size1 + size2;
3493
  int endpos = startpos + range;
3494
 
3495
  /* Check for out-of-range STARTPOS.  */
3496
  if (startpos < 0 || startpos > total_size)
3497
    return -1;
3498
 
3499
  /* Fix up RANGE if it might eventually take us outside
3500
     the virtual concatenation of STRING1 and STRING2.
3501
     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3502
  if (endpos < 0)
3503
    range = 0 - startpos;
3504
  else if (endpos > total_size)
3505
    range = total_size - startpos;
3506
 
3507
  /* If the search isn't to be a backwards one, don't waste time in a
3508
     search for a pattern that must be anchored.  */
3509
  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3510
    {
3511
      if (startpos > 0)
3512
        return -1;
3513
      else
3514
        range = 1;
3515
    }
3516
 
3517
#ifdef emacs
3518
  /* In a forward search for something that starts with \=.
3519
     don't keep searching past point.  */
3520
  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3521
    {
3522
      range = PT - startpos;
3523
      if (range <= 0)
3524
        return -1;
3525
    }
3526
#endif /* emacs */
3527
 
3528
  /* Update the fastmap now if not correct already.  */
3529
  if (fastmap && !bufp->fastmap_accurate)
3530
    if (re_compile_fastmap (bufp) == -2)
3531
      return -2;
3532
 
3533
  /* Loop through the string, looking for a place to start matching.  */
3534
  for (;;)
3535
    {
3536
      /* If a fastmap is supplied, skip quickly over characters that
3537
         cannot be the start of a match.  If the pattern can match the
3538
         null string, however, we don't need to skip characters; we want
3539
         the first null string.  */
3540
      if (fastmap && startpos < total_size && !bufp->can_be_null)
3541
        {
3542
          if (range > 0) /* Searching forwards.  */
3543
            {
3544
              register const char *d;
3545
              register int lim = 0;
3546
              int irange = range;
3547
 
3548
              if (startpos < size1 && startpos + range >= size1)
3549
                lim = range - (size1 - startpos);
3550
 
3551
              d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3552
 
3553
              /* Written out as an if-else to avoid testing `translate'
3554
                 inside the loop.  */
3555
              if (translate)
3556
                while (range > lim
3557
                       && !fastmap[(unsigned char)
3558
                                   translate[(unsigned char) *d++]])
3559
                  range--;
3560
              else
3561
                while (range > lim && !fastmap[(unsigned char) *d++])
3562
                  range--;
3563
 
3564
              startpos += irange - range;
3565
            }
3566
          else                          /* Searching backwards.  */
3567
            {
3568
              register char c = (size1 == 0 || startpos >= size1
3569
                                 ? string2[startpos - size1]
3570
                                 : string1[startpos]);
3571
 
3572
              if (!fastmap[(unsigned char) TRANSLATE (c)])
3573
                goto advance;
3574
            }
3575
        }
3576
 
3577
      /* If can't match the null string, and that's all we have left, fail.  */
3578
      if (range >= 0 && startpos == total_size && fastmap
3579
          && !bufp->can_be_null)
3580
        return -1;
3581
 
3582
      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3583
                                 startpos, regs, stop);
3584
#ifndef REGEX_MALLOC
3585
# ifdef C_ALLOCA
3586
      alloca (0);
3587
# endif
3588
#endif
3589
 
3590
      if (val >= 0)
3591
        return startpos;
3592
 
3593
      if (val == -2)
3594
        return -2;
3595
 
3596
    advance:
3597
      if (!range)
3598
        break;
3599
      else if (range > 0)
3600
        {
3601
          range--;
3602
          startpos++;
3603
        }
3604
      else
3605
        {
3606
          range++;
3607
          startpos--;
3608
        }
3609
    }
3610
  return -1;
3611
} /* re_search_2 */
3612
#ifdef _LIBC
3613
weak_alias (__re_search_2, re_search_2)
3614
#endif
3615
 
3616
/* This converts PTR, a pointer into one of the search strings `string1'
3617
   and `string2' into an offset from the beginning of that string.  */
3618
#define POINTER_TO_OFFSET(ptr)                  \
3619
  (FIRST_STRING_P (ptr)                         \
3620
   ? ((regoff_t) ((ptr) - string1))             \
3621
   : ((regoff_t) ((ptr) - string2 + size1)))
3622
 
3623
/* Macros for dealing with the split strings in re_match_2.  */
3624
 
3625
#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3626
 
3627
/* Call before fetching a character with *d.  This switches over to
3628
   string2 if necessary.  */
3629
#define PREFETCH()                                                      \
3630
  while (d == dend)                                                     \
3631
    {                                                                   \
3632
      /* End of string2 => fail.  */                                    \
3633
      if (dend == end_match_2)                                          \
3634
        goto fail;                                                      \
3635
      /* End of string1 => advance to string2.  */                      \
3636
      d = string2;                                                      \
3637
      dend = end_match_2;                                               \
3638
    }
3639
 
3640
 
3641
/* Test if at very beginning or at very end of the virtual concatenation
3642
   of `string1' and `string2'.  If only one string, it's `string2'.  */
3643
#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3644
#define AT_STRINGS_END(d) ((d) == end2)
3645
 
3646
 
3647
/* Test if D points to a character which is word-constituent.  We have
3648
   two special cases to check for: if past the end of string1, look at
3649
   the first character in string2; and if before the beginning of
3650
   string2, look at the last character in string1.  */
3651
#define WORDCHAR_P(d)                                                   \
3652
  (SYNTAX ((d) == end1 ? *string2                                       \
3653
           : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
3654
   == Sword)
3655
 
3656
/* Disabled due to a compiler bug -- see comment at case wordbound */
3657
#if 0
3658
/* Test if the character before D and the one at D differ with respect
3659
   to being word-constituent.  */
3660
#define AT_WORD_BOUNDARY(d)                                             \
3661
  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
3662
   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3663
#endif
3664
 
3665
/* Free everything we malloc.  */
3666
#ifdef MATCH_MAY_ALLOCATE
3667
# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3668
# define FREE_VARIABLES()                                               \
3669
  do {                                                                  \
3670
    REGEX_FREE_STACK (fail_stack.stack);                                \
3671
    FREE_VAR (regstart);                                                \
3672
    FREE_VAR (regend);                                                  \
3673
    FREE_VAR (old_regstart);                                            \
3674
    FREE_VAR (old_regend);                                              \
3675
    FREE_VAR (best_regstart);                                           \
3676
    FREE_VAR (best_regend);                                             \
3677
    FREE_VAR (reg_info);                                                \
3678
    FREE_VAR (reg_dummy);                                               \
3679
    FREE_VAR (reg_info_dummy);                                          \
3680
  } while (0)
3681
#else
3682
# define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
3683
#endif /* not MATCH_MAY_ALLOCATE */
3684
 
3685
/* These values must meet several constraints.  They must not be valid
3686
   register values; since we have a limit of 255 registers (because
3687
   we use only one byte in the pattern for the register number), we can
3688
   use numbers larger than 255.  They must differ by 1, because of
3689
   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3690
   be larger than the value for the highest register, so we do not try
3691
   to actually save any registers when none are active.  */
3692
#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3693
#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3694
 
3695
/* Matching routines.  */
3696
 
3697
#ifndef emacs   /* Emacs never uses this.  */
3698
/* re_match is like re_match_2 except it takes only a single string.  */
3699
 
3700
int
3701
re_match (bufp, string, size, pos, regs)
3702
     struct re_pattern_buffer *bufp;
3703
     const char *string;
3704
     int size, pos;
3705
     struct re_registers *regs;
3706
{
3707
  int result = re_match_2_internal (bufp, NULL, 0, string, size,
3708
                                    pos, regs, size);
3709
# ifndef REGEX_MALLOC
3710
#  ifdef C_ALLOCA
3711
  alloca (0);
3712
#  endif
3713
# endif
3714
  return result;
3715
}
3716
# ifdef _LIBC
3717
weak_alias (__re_match, re_match)
3718
# endif
3719
#endif /* not emacs */
3720
 
3721
static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3722
                                                    unsigned char *end,
3723
                                                register_info_type *reg_info));
3724
static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3725
                                                  unsigned char *end,
3726
                                                register_info_type *reg_info));
3727
static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3728
                                                        unsigned char *end,
3729
                                                register_info_type *reg_info));
3730
static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3731
                                     int len, char *translate));
3732
 
3733
/* re_match_2 matches the compiled pattern in BUFP against the
3734
   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3735
   and SIZE2, respectively).  We start matching at POS, and stop
3736
   matching at STOP.
3737
 
3738
   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3739
   store offsets for the substring each group matched in REGS.  See the
3740
   documentation for exactly how many groups we fill.
3741
 
3742
   We return -1 if no match, -2 if an internal error (such as the
3743
   failure stack overflowing).  Otherwise, we return the length of the
3744
   matched substring.  */
3745
 
3746
int
3747
re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3748
     struct re_pattern_buffer *bufp;
3749
     const char *string1, *string2;
3750
     int size1, size2;
3751
     int pos;
3752
     struct re_registers *regs;
3753
     int stop;
3754
{
3755
  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3756
                                    pos, regs, stop);
3757
#ifndef REGEX_MALLOC
3758
# ifdef C_ALLOCA
3759
  alloca (0);
3760
# endif
3761
#endif
3762
  return result;
3763
}
3764
#ifdef _LIBC
3765
weak_alias (__re_match_2, re_match_2)
3766
#endif
3767
 
3768
/* This is a separate function so that we can force an alloca cleanup
3769
   afterwards.  */
3770
static int
3771
re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3772
     struct re_pattern_buffer *bufp;
3773
     const char *string1, *string2;
3774
     int size1, size2;
3775
     int pos;
3776
     struct re_registers *regs;
3777
     int stop;
3778
{
3779
  /* General temporaries.  */
3780
  int mcnt;
3781
  unsigned char *p1;
3782
 
3783
  /* Just past the end of the corresponding string.  */
3784
  const char *end1, *end2;
3785
 
3786
  /* Pointers into string1 and string2, just past the last characters in
3787
     each to consider matching.  */
3788
  const char *end_match_1, *end_match_2;
3789
 
3790
  /* Where we are in the data, and the end of the current string.  */
3791
  const char *d, *dend;
3792
 
3793
  /* Where we are in the pattern, and the end of the pattern.  */
3794
  unsigned char *p = bufp->buffer;
3795
  register unsigned char *pend = p + bufp->used;
3796
 
3797
  /* Mark the opcode just after a start_memory, so we can test for an
3798
     empty subpattern when we get to the stop_memory.  */
3799
  unsigned char *just_past_start_mem = 0;
3800
 
3801
  /* We use this to map every character in the string.  */
3802
  RE_TRANSLATE_TYPE translate = bufp->translate;
3803
 
3804
  /* Failure point stack.  Each place that can handle a failure further
3805
     down the line pushes a failure point on this stack.  It consists of
3806
     restart, regend, and reg_info for all registers corresponding to
3807
     the subexpressions we're currently inside, plus the number of such
3808
     registers, and, finally, two char *'s.  The first char * is where
3809
     to resume scanning the pattern; the second one is where to resume
3810
     scanning the strings.  If the latter is zero, the failure point is
3811
     a ``dummy''; if a failure happens and the failure point is a dummy,
3812
     it gets discarded and the next next one is tried.  */
3813
#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3814
  fail_stack_type fail_stack;
3815
#endif
3816
#ifdef DEBUG
3817
  static unsigned failure_id = 0;
3818
  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3819
#endif
3820
 
3821
#ifdef REL_ALLOC
3822
  /* This holds the pointer to the failure stack, when
3823
     it is allocated relocatably.  */
3824
  fail_stack_elt_t *failure_stack_ptr;
3825
#endif
3826
 
3827
  /* We fill all the registers internally, independent of what we
3828
     return, for use in backreferences.  The number here includes
3829
     an element for register zero.  */
3830
  size_t num_regs = bufp->re_nsub + 1;
3831
 
3832
  /* The currently active registers.  */
3833
  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3834
  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3835
 
3836
  /* Information on the contents of registers. These are pointers into
3837
     the input strings; they record just what was matched (on this
3838
     attempt) by a subexpression part of the pattern, that is, the
3839
     regnum-th regstart pointer points to where in the pattern we began
3840
     matching and the regnum-th regend points to right after where we
3841
     stopped matching the regnum-th subexpression.  (The zeroth register
3842
     keeps track of what the whole pattern matches.)  */
3843
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3844
  const char **regstart, **regend;
3845
#endif
3846
 
3847
  /* If a group that's operated upon by a repetition operator fails to
3848
     match anything, then the register for its start will need to be
3849
     restored because it will have been set to wherever in the string we
3850
     are when we last see its open-group operator.  Similarly for a
3851
     register's end.  */
3852
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3853
  const char **old_regstart, **old_regend;
3854
#endif
3855
 
3856
  /* The is_active field of reg_info helps us keep track of which (possibly
3857
     nested) subexpressions we are currently in. The matched_something
3858
     field of reg_info[reg_num] helps us tell whether or not we have
3859
     matched any of the pattern so far this time through the reg_num-th
3860
     subexpression.  These two fields get reset each time through any
3861
     loop their register is in.  */
3862
#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3863
  register_info_type *reg_info;
3864
#endif
3865
 
3866
  /* The following record the register info as found in the above
3867
     variables when we find a match better than any we've seen before.
3868
     This happens as we backtrack through the failure points, which in
3869
     turn happens only if we have not yet matched the entire string. */
3870
  unsigned best_regs_set = false;
3871
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3872
  const char **best_regstart, **best_regend;
3873
#endif
3874
 
3875
  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3876
     allocate space for that if we're not allocating space for anything
3877
     else (see below).  Also, we never need info about register 0 for
3878
     any of the other register vectors, and it seems rather a kludge to
3879
     treat `best_regend' differently than the rest.  So we keep track of
3880
     the end of the best match so far in a separate variable.  We
3881
     initialize this to NULL so that when we backtrack the first time
3882
     and need to test it, it's not garbage.  */
3883
  const char *match_end = NULL;
3884
 
3885
  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3886
  int set_regs_matched_done = 0;
3887
 
3888
  /* Used when we pop values we don't care about.  */
3889
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3890
  const char **reg_dummy;
3891
  register_info_type *reg_info_dummy;
3892
#endif
3893
 
3894
#ifdef DEBUG
3895
  /* Counts the total number of registers pushed.  */
3896
  unsigned num_regs_pushed = 0;
3897
#endif
3898
 
3899
  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3900
 
3901
  INIT_FAIL_STACK ();
3902
 
3903
#ifdef MATCH_MAY_ALLOCATE
3904
  /* Do not bother to initialize all the register variables if there are
3905
     no groups in the pattern, as it takes a fair amount of time.  If
3906
     there are groups, we include space for register 0 (the whole
3907
     pattern), even though we never use it, since it simplifies the
3908
     array indexing.  We should fix this.  */
3909
  if (bufp->re_nsub)
3910
    {
3911
      regstart = REGEX_TALLOC (num_regs, const char *);
3912
      regend = REGEX_TALLOC (num_regs, const char *);
3913
      old_regstart = REGEX_TALLOC (num_regs, const char *);
3914
      old_regend = REGEX_TALLOC (num_regs, const char *);
3915
      best_regstart = REGEX_TALLOC (num_regs, const char *);
3916
      best_regend = REGEX_TALLOC (num_regs, const char *);
3917
      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3918
      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3919
      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3920
 
3921
      if (!(regstart && regend && old_regstart && old_regend && reg_info
3922
            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3923
        {
3924
          FREE_VARIABLES ();
3925
          return -2;
3926
        }
3927
    }
3928
  else
3929
    {
3930
      /* We must initialize all our variables to NULL, so that
3931
         `FREE_VARIABLES' doesn't try to free them.  */
3932
      regstart = regend = old_regstart = old_regend = best_regstart
3933
        = best_regend = reg_dummy = NULL;
3934
      reg_info = reg_info_dummy = (register_info_type *) NULL;
3935
    }
3936
#endif /* MATCH_MAY_ALLOCATE */
3937
 
3938
  /* The starting position is bogus.  */
3939
  if (pos < 0 || pos > size1 + size2)
3940
    {
3941
      FREE_VARIABLES ();
3942
      return -1;
3943
    }
3944
 
3945
  /* Initialize subexpression text positions to -1 to mark ones that no
3946
     start_memory/stop_memory has been seen for. Also initialize the
3947
     register information struct.  */
3948
  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3949
    {
3950
      regstart[mcnt] = regend[mcnt]
3951
        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3952
 
3953
      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3954
      IS_ACTIVE (reg_info[mcnt]) = 0;
3955
      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3956
      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3957
    }
3958
 
3959
  /* We move `string1' into `string2' if the latter's empty -- but not if
3960
     `string1' is null.  */
3961
  if (size2 == 0 && string1 != NULL)
3962
    {
3963
      string2 = string1;
3964
      size2 = size1;
3965
      string1 = 0;
3966
      size1 = 0;
3967
    }
3968
  end1 = string1 + size1;
3969
  end2 = string2 + size2;
3970
 
3971
  /* Compute where to stop matching, within the two strings.  */
3972
  if (stop <= size1)
3973
    {
3974
      end_match_1 = string1 + stop;
3975
      end_match_2 = string2;
3976
    }
3977
  else
3978
    {
3979
      end_match_1 = end1;
3980
      end_match_2 = string2 + stop - size1;
3981
    }
3982
 
3983
  /* `p' scans through the pattern as `d' scans through the data.
3984
     `dend' is the end of the input string that `d' points within.  `d'
3985
     is advanced into the following input string whenever necessary, but
3986
     this happens before fetching; therefore, at the beginning of the
3987
     loop, `d' can be pointing at the end of a string, but it cannot
3988
     equal `string2'.  */
3989
  if (size1 > 0 && pos <= size1)
3990
    {
3991
      d = string1 + pos;
3992
      dend = end_match_1;
3993
    }
3994
  else
3995
    {
3996
      d = string2 + pos - size1;
3997
      dend = end_match_2;
3998
    }
3999
 
4000
  DEBUG_PRINT1 ("The compiled pattern is:\n");
4001
  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4002
  DEBUG_PRINT1 ("The string to match is: `");
4003
  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4004
  DEBUG_PRINT1 ("'\n");
4005
 
4006
  /* This loops over pattern commands.  It exits by returning from the
4007
     function if the match is complete, or it drops through if the match
4008
     fails at this starting point in the input data.  */
4009
  for (;;)
4010
    {
4011
#ifdef _LIBC
4012
      DEBUG_PRINT2 ("\n%p: ", p);
4013
#else
4014
      DEBUG_PRINT2 ("\n0x%x: ", p);
4015
#endif
4016
 
4017
      if (p == pend)
4018
        { /* End of pattern means we might have succeeded.  */
4019
          DEBUG_PRINT1 ("end of pattern ... ");
4020
 
4021
          /* If we haven't matched the entire string, and we want the
4022
             longest match, try backtracking.  */
4023
          if (d != end_match_2)
4024
            {
4025
              /* 1 if this match ends in the same string (string1 or string2)
4026
                 as the best previous match.  */
4027
              boolean same_str_p = (FIRST_STRING_P (match_end)
4028
                                    == MATCHING_IN_FIRST_STRING);
4029
              /* 1 if this match is the best seen so far.  */
4030
              boolean best_match_p;
4031
 
4032
              /* AIX compiler got confused when this was combined
4033
                 with the previous declaration.  */
4034
              if (same_str_p)
4035
                best_match_p = d > match_end;
4036
              else
4037
                best_match_p = !MATCHING_IN_FIRST_STRING;
4038
 
4039
              DEBUG_PRINT1 ("backtracking.\n");
4040
 
4041
              if (!FAIL_STACK_EMPTY ())
4042
                { /* More failure points to try.  */
4043
 
4044
                  /* If exceeds best match so far, save it.  */
4045
                  if (!best_regs_set || best_match_p)
4046
                    {
4047
                      best_regs_set = true;
4048
                      match_end = d;
4049
 
4050
                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4051
 
4052
                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4053
                        {
4054
                          best_regstart[mcnt] = regstart[mcnt];
4055
                          best_regend[mcnt] = regend[mcnt];
4056
                        }
4057
                    }
4058
                  goto fail;
4059
                }
4060
 
4061
              /* If no failure points, don't restore garbage.  And if
4062
                 last match is real best match, don't restore second
4063
                 best one. */
4064
              else if (best_regs_set && !best_match_p)
4065
                {
4066
                restore_best_regs:
4067
                  /* Restore best match.  It may happen that `dend ==
4068
                     end_match_1' while the restored d is in string2.
4069
                     For example, the pattern `x.*y.*z' against the
4070
                     strings `x-' and `y-z-', if the two strings are
4071
                     not consecutive in memory.  */
4072
                  DEBUG_PRINT1 ("Restoring best registers.\n");
4073
 
4074
                  d = match_end;
4075
                  dend = ((d >= string1 && d <= end1)
4076
                           ? end_match_1 : end_match_2);
4077
 
4078
                  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4079
                    {
4080
                      regstart[mcnt] = best_regstart[mcnt];
4081
                      regend[mcnt] = best_regend[mcnt];
4082
                    }
4083
                }
4084
            } /* d != end_match_2 */
4085
 
4086
        succeed_label:
4087
          DEBUG_PRINT1 ("Accepting match.\n");
4088
 
4089
          /* If caller wants register contents data back, do it.  */
4090
          if (regs && !bufp->no_sub)
4091
            {
4092
              /* Have the register data arrays been allocated?  */
4093
              if (bufp->regs_allocated == REGS_UNALLOCATED)
4094
                { /* No.  So allocate them with malloc.  We need one
4095
                     extra element beyond `num_regs' for the `-1' marker
4096
                     GNU code uses.  */
4097
                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4098
                  regs->start = TALLOC (regs->num_regs, regoff_t);
4099
                  regs->end = TALLOC (regs->num_regs, regoff_t);
4100
                  if (regs->start == NULL || regs->end == NULL)
4101
                    {
4102
                      FREE_VARIABLES ();
4103
                      return -2;
4104
                    }
4105
                  bufp->regs_allocated = REGS_REALLOCATE;
4106
                }
4107
              else if (bufp->regs_allocated == REGS_REALLOCATE)
4108
                { /* Yes.  If we need more elements than were already
4109
                     allocated, reallocate them.  If we need fewer, just
4110
                     leave it alone.  */
4111
                  if (regs->num_regs < num_regs + 1)
4112
                    {
4113
                      regs->num_regs = num_regs + 1;
4114
                      RETALLOC (regs->start, regs->num_regs, regoff_t);
4115
                      RETALLOC (regs->end, regs->num_regs, regoff_t);
4116
                      if (regs->start == NULL || regs->end == NULL)
4117
                        {
4118
                          FREE_VARIABLES ();
4119
                          return -2;
4120
                        }
4121
                    }
4122
                }
4123
              else
4124
                {
4125
                  /* These braces fend off a "empty body in an else-statement"
4126
                     warning under GCC when assert expands to nothing.  */
4127
                  assert (bufp->regs_allocated == REGS_FIXED);
4128
                }
4129
 
4130
              /* Convert the pointer data in `regstart' and `regend' to
4131
                 indices.  Register zero has to be set differently,
4132
                 since we haven't kept track of any info for it.  */
4133
              if (regs->num_regs > 0)
4134
                {
4135
                  regs->start[0] = pos;
4136
                  regs->end[0] = (MATCHING_IN_FIRST_STRING
4137
                                  ? ((regoff_t) (d - string1))
4138
                                  : ((regoff_t) (d - string2 + size1)));
4139
                }
4140
 
4141
              /* Go through the first `min (num_regs, regs->num_regs)'
4142
                 registers, since that is all we initialized.  */
4143
              for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4144
                   mcnt++)
4145
                {
4146
                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4147
                    regs->start[mcnt] = regs->end[mcnt] = -1;
4148
                  else
4149
                    {
4150
                      regs->start[mcnt]
4151
                        = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4152
                      regs->end[mcnt]
4153
                        = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4154
                    }
4155
                }
4156
 
4157
              /* If the regs structure we return has more elements than
4158
                 were in the pattern, set the extra elements to -1.  If
4159
                 we (re)allocated the registers, this is the case,
4160
                 because we always allocate enough to have at least one
4161
                 -1 at the end.  */
4162
              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4163
                regs->start[mcnt] = regs->end[mcnt] = -1;
4164
            } /* regs && !bufp->no_sub */
4165
 
4166
          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4167
                        nfailure_points_pushed, nfailure_points_popped,
4168
                        nfailure_points_pushed - nfailure_points_popped);
4169
          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4170
 
4171
          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4172
                            ? string1
4173
                            : string2 - size1);
4174
 
4175
          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4176
 
4177
          FREE_VARIABLES ();
4178
          return mcnt;
4179
        }
4180
 
4181
      /* Otherwise match next pattern command.  */
4182
      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4183
        {
4184
        /* Ignore these.  Used to ignore the n of succeed_n's which
4185
           currently have n == 0.  */
4186
        case no_op:
4187
          DEBUG_PRINT1 ("EXECUTING no_op.\n");
4188
          break;
4189
 
4190
        case succeed:
4191
          DEBUG_PRINT1 ("EXECUTING succeed.\n");
4192
          goto succeed_label;
4193
 
4194
        /* Match the next n pattern characters exactly.  The following
4195
           byte in the pattern defines n, and the n bytes after that
4196
           are the characters to match.  */
4197
        case exactn:
4198
          mcnt = *p++;
4199
          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4200
 
4201
          /* This is written out as an if-else so we don't waste time
4202
             testing `translate' inside the loop.  */
4203
          if (translate)
4204
            {
4205
              do
4206
                {
4207
                  PREFETCH ();
4208
                  if ((unsigned char) translate[(unsigned char) *d++]
4209
                      != (unsigned char) *p++)
4210
                    goto fail;
4211
                }
4212
              while (--mcnt);
4213
            }
4214
          else
4215
            {
4216
              do
4217
                {
4218
                  PREFETCH ();
4219
                  if (*d++ != (char) *p++) goto fail;
4220
                }
4221
              while (--mcnt);
4222
            }
4223
          SET_REGS_MATCHED ();
4224
          break;
4225
 
4226
 
4227
        /* Match any character except possibly a newline or a null.  */
4228
        case anychar:
4229
          DEBUG_PRINT1 ("EXECUTING anychar.\n");
4230
 
4231
          PREFETCH ();
4232
 
4233
          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4234
              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4235
            goto fail;
4236
 
4237
          SET_REGS_MATCHED ();
4238
          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4239
          d++;
4240
          break;
4241
 
4242
 
4243
        case charset:
4244
        case charset_not:
4245
          {
4246
            register unsigned char c;
4247
            boolean not = (re_opcode_t) *(p - 1) == charset_not;
4248
 
4249
            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4250
 
4251
            PREFETCH ();
4252
            c = TRANSLATE (*d); /* The character to match.  */
4253
 
4254
            /* Cast to `unsigned' instead of `unsigned char' in case the
4255
               bit list is a full 32 bytes long.  */
4256
            if (c < (unsigned) (*p * BYTEWIDTH)
4257
                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4258
              not = !not;
4259
 
4260
            p += 1 + *p;
4261
 
4262
            if (!not) goto fail;
4263
 
4264
            SET_REGS_MATCHED ();
4265
            d++;
4266
            break;
4267
          }
4268
 
4269
 
4270
        /* The beginning of a group is represented by start_memory.
4271
           The arguments are the register number in the next byte, and the
4272
           number of groups inner to this one in the next.  The text
4273
           matched within the group is recorded (in the internal
4274
           registers data structure) under the register number.  */
4275
        case start_memory:
4276
          DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4277
 
4278
          /* Find out if this group can match the empty string.  */
4279
          p1 = p;               /* To send to group_match_null_string_p.  */
4280
 
4281
          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4282
            REG_MATCH_NULL_STRING_P (reg_info[*p])
4283
              = group_match_null_string_p (&p1, pend, reg_info);
4284
 
4285
          /* Save the position in the string where we were the last time
4286
             we were at this open-group operator in case the group is
4287
             operated upon by a repetition operator, e.g., with `(a*)*b'
4288
             against `ab'; then we want to ignore where we are now in
4289
             the string in case this attempt to match fails.  */
4290
          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4291
                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4292
                             : regstart[*p];
4293
          DEBUG_PRINT2 ("  old_regstart: %d\n",
4294
                         POINTER_TO_OFFSET (old_regstart[*p]));
4295
 
4296
          regstart[*p] = d;
4297
          DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4298
 
4299
          IS_ACTIVE (reg_info[*p]) = 1;
4300
          MATCHED_SOMETHING (reg_info[*p]) = 0;
4301
 
4302
          /* Clear this whenever we change the register activity status.  */
4303
          set_regs_matched_done = 0;
4304
 
4305
          /* This is the new highest active register.  */
4306
          highest_active_reg = *p;
4307
 
4308
          /* If nothing was active before, this is the new lowest active
4309
             register.  */
4310
          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4311
            lowest_active_reg = *p;
4312
 
4313
          /* Move past the register number and inner group count.  */
4314
          p += 2;
4315
          just_past_start_mem = p;
4316
 
4317
          break;
4318
 
4319
 
4320
        /* The stop_memory opcode represents the end of a group.  Its
4321
           arguments are the same as start_memory's: the register
4322
           number, and the number of inner groups.  */
4323
        case stop_memory:
4324
          DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4325
 
4326
          /* We need to save the string position the last time we were at
4327
             this close-group operator in case the group is operated
4328
             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4329
             against `aba'; then we want to ignore where we are now in
4330
             the string in case this attempt to match fails.  */
4331
          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4332
                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
4333
                           : regend[*p];
4334
          DEBUG_PRINT2 ("      old_regend: %d\n",
4335
                         POINTER_TO_OFFSET (old_regend[*p]));
4336
 
4337
          regend[*p] = d;
4338
          DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4339
 
4340
          /* This register isn't active anymore.  */
4341
          IS_ACTIVE (reg_info[*p]) = 0;
4342
 
4343
          /* Clear this whenever we change the register activity status.  */
4344
          set_regs_matched_done = 0;
4345
 
4346
          /* If this was the only register active, nothing is active
4347
             anymore.  */
4348
          if (lowest_active_reg == highest_active_reg)
4349
            {
4350
              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4351
              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4352
            }
4353
          else
4354
            { /* We must scan for the new highest active register, since
4355
                 it isn't necessarily one less than now: consider
4356
                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4357
                 new highest active register is 1.  */
4358
              unsigned char r = *p - 1;
4359
              while (r > 0 && !IS_ACTIVE (reg_info[r]))
4360
                r--;
4361
 
4362
              /* If we end up at register zero, that means that we saved
4363
                 the registers as the result of an `on_failure_jump', not
4364
                 a `start_memory', and we jumped to past the innermost
4365
                 `stop_memory'.  For example, in ((.)*) we save
4366
                 registers 1 and 2 as a result of the *, but when we pop
4367
                 back to the second ), we are at the stop_memory 1.
4368
                 Thus, nothing is active.  */
4369
              if (r == 0)
4370
                {
4371
                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4372
                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4373
                }
4374
              else
4375
                highest_active_reg = r;
4376
            }
4377
 
4378
          /* If just failed to match something this time around with a
4379
             group that's operated on by a repetition operator, try to
4380
             force exit from the ``loop'', and restore the register
4381
             information for this group that we had before trying this
4382
             last match.  */
4383
          if ((!MATCHED_SOMETHING (reg_info[*p])
4384
               || just_past_start_mem == p - 1)
4385
              && (p + 2) < pend)
4386
            {
4387
              boolean is_a_jump_n = false;
4388
 
4389
              p1 = p + 2;
4390
              mcnt = 0;
4391
              switch ((re_opcode_t) *p1++)
4392
                {
4393
                  case jump_n:
4394
                    is_a_jump_n = true;
4395
                  case pop_failure_jump:
4396
                  case maybe_pop_jump:
4397
                  case jump:
4398
                  case dummy_failure_jump:
4399
                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4400
                    if (is_a_jump_n)
4401
                      p1 += 2;
4402
                    break;
4403
 
4404
                  default:
4405
                    /* do nothing */ ;
4406
                }
4407
              p1 += mcnt;
4408
 
4409
              /* If the next operation is a jump backwards in the pattern
4410
                 to an on_failure_jump right before the start_memory
4411
                 corresponding to this stop_memory, exit from the loop
4412
                 by forcing a failure after pushing on the stack the
4413
                 on_failure_jump's jump in the pattern, and d.  */
4414
              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4415
                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4416
                {
4417
                  /* If this group ever matched anything, then restore
4418
                     what its registers were before trying this last
4419
                     failed match, e.g., with `(a*)*b' against `ab' for
4420
                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4421
                     against `aba' for regend[3].
4422
 
4423
                     Also restore the registers for inner groups for,
4424
                     e.g., `((a*)(b*))*' against `aba' (register 3 would
4425
                     otherwise get trashed).  */
4426
 
4427
                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4428
                    {
4429
                      unsigned r;
4430
 
4431
                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4432
 
4433
                      /* Restore this and inner groups' (if any) registers.  */
4434
                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4435
                           r++)
4436
                        {
4437
                          regstart[r] = old_regstart[r];
4438
 
4439
                          /* xx why this test?  */
4440
                          if (old_regend[r] >= regstart[r])
4441
                            regend[r] = old_regend[r];
4442
                        }
4443
                    }
4444
                  p1++;
4445
                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4446
                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4447
 
4448
                  goto fail;
4449
                }
4450
            }
4451
 
4452
          /* Move past the register number and the inner group count.  */
4453
          p += 2;
4454
          break;
4455
 
4456
 
4457
        /* \<digit> has been turned into a `duplicate' command which is
4458
           followed by the numeric value of <digit> as the register number.  */
4459
        case duplicate:
4460
          {
4461
            register const char *d2, *dend2;
4462
            int regno = *p++;   /* Get which register to match against.  */
4463
            DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4464
 
4465
            /* Can't back reference a group which we've never matched.  */
4466
            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4467
              goto fail;
4468
 
4469
            /* Where in input to try to start matching.  */
4470
            d2 = regstart[regno];
4471
 
4472
            /* Where to stop matching; if both the place to start and
4473
               the place to stop matching are in the same string, then
4474
               set to the place to stop, otherwise, for now have to use
4475
               the end of the first string.  */
4476
 
4477
            dend2 = ((FIRST_STRING_P (regstart[regno])
4478
                      == FIRST_STRING_P (regend[regno]))
4479
                     ? regend[regno] : end_match_1);
4480
            for (;;)
4481
              {
4482
                /* If necessary, advance to next segment in register
4483
                   contents.  */
4484
                while (d2 == dend2)
4485
                  {
4486
                    if (dend2 == end_match_2) break;
4487
                    if (dend2 == regend[regno]) break;
4488
 
4489
                    /* End of string1 => advance to string2. */
4490
                    d2 = string2;
4491
                    dend2 = regend[regno];
4492
                  }
4493
                /* At end of register contents => success */
4494
                if (d2 == dend2) break;
4495
 
4496
                /* If necessary, advance to next segment in data.  */
4497
                PREFETCH ();
4498
 
4499
                /* How many characters left in this segment to match.  */
4500
                mcnt = dend - d;
4501
 
4502
                /* Want how many consecutive characters we can match in
4503
                   one shot, so, if necessary, adjust the count.  */
4504
                if (mcnt > dend2 - d2)
4505
                  mcnt = dend2 - d2;
4506
 
4507
                /* Compare that many; failure if mismatch, else move
4508
                   past them.  */
4509
                if (translate
4510
                    ? bcmp_translate (d, d2, mcnt, translate)
4511
                    : memcmp (d, d2, mcnt))
4512
                  goto fail;
4513
                d += mcnt, d2 += mcnt;
4514
 
4515
                /* Do this because we've match some characters.  */
4516
                SET_REGS_MATCHED ();
4517
              }
4518
          }
4519
          break;
4520
 
4521
 
4522
        /* begline matches the empty string at the beginning of the string
4523
           (unless `not_bol' is set in `bufp'), and, if
4524
           `newline_anchor' is set, after newlines.  */
4525
        case begline:
4526
          DEBUG_PRINT1 ("EXECUTING begline.\n");
4527
 
4528
          if (AT_STRINGS_BEG (d))
4529
            {
4530
              if (!bufp->not_bol) break;
4531
            }
4532
          else if (d[-1] == '\n' && bufp->newline_anchor)
4533
            {
4534
              break;
4535
            }
4536
          /* In all other cases, we fail.  */
4537
          goto fail;
4538
 
4539
 
4540
        /* endline is the dual of begline.  */
4541
        case endline:
4542
          DEBUG_PRINT1 ("EXECUTING endline.\n");
4543
 
4544
          if (AT_STRINGS_END (d))
4545
            {
4546
              if (!bufp->not_eol) break;
4547
            }
4548
 
4549
          /* We have to ``prefetch'' the next character.  */
4550
          else if ((d == end1 ? *string2 : *d) == '\n'
4551
                   && bufp->newline_anchor)
4552
            {
4553
              break;
4554
            }
4555
          goto fail;
4556
 
4557
 
4558
        /* Match at the very beginning of the data.  */
4559
        case begbuf:
4560
          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4561
          if (AT_STRINGS_BEG (d))
4562
            break;
4563
          goto fail;
4564
 
4565
 
4566
        /* Match at the very end of the data.  */
4567
        case endbuf:
4568
          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4569
          if (AT_STRINGS_END (d))
4570
            break;
4571
          goto fail;
4572
 
4573
 
4574
        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4575
           pushes NULL as the value for the string on the stack.  Then
4576
           `pop_failure_point' will keep the current value for the
4577
           string, instead of restoring it.  To see why, consider
4578
           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4579
           then the . fails against the \n.  But the next thing we want
4580
           to do is match the \n against the \n; if we restored the
4581
           string value, we would be back at the foo.
4582
 
4583
           Because this is used only in specific cases, we don't need to
4584
           check all the things that `on_failure_jump' does, to make
4585
           sure the right things get saved on the stack.  Hence we don't
4586
           share its code.  The only reason to push anything on the
4587
           stack at all is that otherwise we would have to change
4588
           `anychar's code to do something besides goto fail in this
4589
           case; that seems worse than this.  */
4590
        case on_failure_keep_string_jump:
4591
          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4592
 
4593
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4594
#ifdef _LIBC
4595
          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4596
#else
4597
          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4598
#endif
4599
 
4600
          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4601
          break;
4602
 
4603
 
4604
        /* Uses of on_failure_jump:
4605
 
4606
           Each alternative starts with an on_failure_jump that points
4607
           to the beginning of the next alternative.  Each alternative
4608
           except the last ends with a jump that in effect jumps past
4609
           the rest of the alternatives.  (They really jump to the
4610
           ending jump of the following alternative, because tensioning
4611
           these jumps is a hassle.)
4612
 
4613
           Repeats start with an on_failure_jump that points past both
4614
           the repetition text and either the following jump or
4615
           pop_failure_jump back to this on_failure_jump.  */
4616
        case on_failure_jump:
4617
        on_failure:
4618
          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4619
 
4620
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4621
#ifdef _LIBC
4622
          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4623
#else
4624
          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4625
#endif
4626
 
4627
          /* If this on_failure_jump comes right before a group (i.e.,
4628
             the original * applied to a group), save the information
4629
             for that group and all inner ones, so that if we fail back
4630
             to this point, the group's information will be correct.
4631
             For example, in \(a*\)*\1, we need the preceding group,
4632
             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4633
 
4634
          /* We can't use `p' to check ahead because we push
4635
             a failure point to `p + mcnt' after we do this.  */
4636
          p1 = p;
4637
 
4638
          /* We need to skip no_op's before we look for the
4639
             start_memory in case this on_failure_jump is happening as
4640
             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4641
             against aba.  */
4642
          while (p1 < pend && (re_opcode_t) *p1 == no_op)
4643
            p1++;
4644
 
4645
          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4646
            {
4647
              /* We have a new highest active register now.  This will
4648
                 get reset at the start_memory we are about to get to,
4649
                 but we will have saved all the registers relevant to
4650
                 this repetition op, as described above.  */
4651
              highest_active_reg = *(p1 + 1) + *(p1 + 2);
4652
              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4653
                lowest_active_reg = *(p1 + 1);
4654
            }
4655
 
4656
          DEBUG_PRINT1 (":\n");
4657
          PUSH_FAILURE_POINT (p + mcnt, d, -2);
4658
          break;
4659
 
4660
 
4661
        /* A smart repeat ends with `maybe_pop_jump'.
4662
           We change it to either `pop_failure_jump' or `jump'.  */
4663
        case maybe_pop_jump:
4664
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4665
          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4666
          {
4667
            register unsigned char *p2 = p;
4668
 
4669
            /* Compare the beginning of the repeat with what in the
4670
               pattern follows its end. If we can establish that there
4671
               is nothing that they would both match, i.e., that we
4672
               would have to backtrack because of (as in, e.g., `a*a')
4673
               then we can change to pop_failure_jump, because we'll
4674
               never have to backtrack.
4675
 
4676
               This is not true in the case of alternatives: in
4677
               `(a|ab)*' we do need to backtrack to the `ab' alternative
4678
               (e.g., if the string was `ab').  But instead of trying to
4679
               detect that here, the alternative has put on a dummy
4680
               failure point which is what we will end up popping.  */
4681
 
4682
            /* Skip over open/close-group commands.
4683
               If what follows this loop is a ...+ construct,
4684
               look at what begins its body, since we will have to
4685
               match at least one of that.  */
4686
            while (1)
4687
              {
4688
                if (p2 + 2 < pend
4689
                    && ((re_opcode_t) *p2 == stop_memory
4690
                        || (re_opcode_t) *p2 == start_memory))
4691
                  p2 += 3;
4692
                else if (p2 + 6 < pend
4693
                         && (re_opcode_t) *p2 == dummy_failure_jump)
4694
                  p2 += 6;
4695
                else
4696
                  break;
4697
              }
4698
 
4699
            p1 = p + mcnt;
4700
            /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4701
               to the `maybe_finalize_jump' of this case.  Examine what
4702
               follows.  */
4703
 
4704
            /* If we're at the end of the pattern, we can change.  */
4705
            if (p2 == pend)
4706
              {
4707
                /* Consider what happens when matching ":\(.*\)"
4708
                   against ":/".  I don't really understand this code
4709
                   yet.  */
4710
                p[-3] = (unsigned char) pop_failure_jump;
4711
                DEBUG_PRINT1
4712
                  ("  End of pattern: change to `pop_failure_jump'.\n");
4713
              }
4714
 
4715
            else if ((re_opcode_t) *p2 == exactn
4716
                     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4717
              {
4718
                register unsigned char c
4719
                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4720
 
4721
                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4722
                  {
4723
                    p[-3] = (unsigned char) pop_failure_jump;
4724
                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4725
                                  c, p1[5]);
4726
                  }
4727
 
4728
                else if ((re_opcode_t) p1[3] == charset
4729
                         || (re_opcode_t) p1[3] == charset_not)
4730
                  {
4731
                    int not = (re_opcode_t) p1[3] == charset_not;
4732
 
4733
                    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4734
                        && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4735
                      not = !not;
4736
 
4737
                    /* `not' is equal to 1 if c would match, which means
4738
                        that we can't change to pop_failure_jump.  */
4739
                    if (!not)
4740
                      {
4741
                        p[-3] = (unsigned char) pop_failure_jump;
4742
                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4743
                      }
4744
                  }
4745
              }
4746
            else if ((re_opcode_t) *p2 == charset)
4747
              {
4748
#ifdef DEBUG
4749
                register unsigned char c
4750
                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4751
#endif
4752
 
4753
#if 0
4754
                if ((re_opcode_t) p1[3] == exactn
4755
                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4756
                          && (p2[2 + p1[5] / BYTEWIDTH]
4757
                              & (1 << (p1[5] % BYTEWIDTH)))))
4758
#else
4759
                if ((re_opcode_t) p1[3] == exactn
4760
                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4761
                          && (p2[2 + p1[4] / BYTEWIDTH]
4762
                              & (1 << (p1[4] % BYTEWIDTH)))))
4763
#endif
4764
                  {
4765
                    p[-3] = (unsigned char) pop_failure_jump;
4766
                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4767
                                  c, p1[5]);
4768
                  }
4769
 
4770
                else if ((re_opcode_t) p1[3] == charset_not)
4771
                  {
4772
                    int idx;
4773
                    /* We win if the charset_not inside the loop
4774
                       lists every character listed in the charset after.  */
4775
                    for (idx = 0; idx < (int) p2[1]; idx++)
4776
                      if (! (p2[2 + idx] == 0
4777
                             || (idx < (int) p1[4]
4778
                                 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4779
                        break;
4780
 
4781
                    if (idx == p2[1])
4782
                      {
4783
                        p[-3] = (unsigned char) pop_failure_jump;
4784
                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4785
                      }
4786
                  }
4787
                else if ((re_opcode_t) p1[3] == charset)
4788
                  {
4789
                    int idx;
4790
                    /* We win if the charset inside the loop
4791
                       has no overlap with the one after the loop.  */
4792
                    for (idx = 0;
4793
                         idx < (int) p2[1] && idx < (int) p1[4];
4794
                         idx++)
4795
                      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4796
                        break;
4797
 
4798
                    if (idx == p2[1] || idx == p1[4])
4799
                      {
4800
                        p[-3] = (unsigned char) pop_failure_jump;
4801
                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4802
                      }
4803
                  }
4804
              }
4805
          }
4806
          p -= 2;               /* Point at relative address again.  */
4807
          if ((re_opcode_t) p[-1] != pop_failure_jump)
4808
            {
4809
              p[-1] = (unsigned char) jump;
4810
              DEBUG_PRINT1 ("  Match => jump.\n");
4811
              goto unconditional_jump;
4812
            }
4813
        /* Note fall through.  */
4814
 
4815
 
4816
        /* The end of a simple repeat has a pop_failure_jump back to
4817
           its matching on_failure_jump, where the latter will push a
4818
           failure point.  The pop_failure_jump takes off failure
4819
           points put on by this pop_failure_jump's matching
4820
           on_failure_jump; we got through the pattern to here from the
4821
           matching on_failure_jump, so didn't fail.  */
4822
        case pop_failure_jump:
4823
          {
4824
            /* We need to pass separate storage for the lowest and
4825
               highest registers, even though we don't care about the
4826
               actual values.  Otherwise, we will restore only one
4827
               register from the stack, since lowest will == highest in
4828
               `pop_failure_point'.  */
4829
            active_reg_t dummy_low_reg, dummy_high_reg;
4830
            unsigned char *pdummy;
4831
            const char *sdummy;
4832
 
4833
            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4834
            POP_FAILURE_POINT (sdummy, pdummy,
4835
                               dummy_low_reg, dummy_high_reg,
4836
                               reg_dummy, reg_dummy, reg_info_dummy);
4837
          }
4838
          /* Note fall through.  */
4839
 
4840
        unconditional_jump:
4841
#ifdef _LIBC
4842
          DEBUG_PRINT2 ("\n%p: ", p);
4843
#else
4844
          DEBUG_PRINT2 ("\n0x%x: ", p);
4845
#endif
4846
          /* Note fall through.  */
4847
 
4848
        /* Unconditionally jump (without popping any failure points).  */
4849
        case jump:
4850
          EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
4851
          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4852
          p += mcnt;                            /* Do the jump.  */
4853
#ifdef _LIBC
4854
          DEBUG_PRINT2 ("(to %p).\n", p);
4855
#else
4856
          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4857
#endif
4858
          break;
4859
 
4860
 
4861
        /* We need this opcode so we can detect where alternatives end
4862
           in `group_match_null_string_p' et al.  */
4863
        case jump_past_alt:
4864
          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4865
          goto unconditional_jump;
4866
 
4867
 
4868
        /* Normally, the on_failure_jump pushes a failure point, which
4869
           then gets popped at pop_failure_jump.  We will end up at
4870
           pop_failure_jump, also, and with a pattern of, say, `a+', we
4871
           are skipping over the on_failure_jump, so we have to push
4872
           something meaningless for pop_failure_jump to pop.  */
4873
        case dummy_failure_jump:
4874
          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4875
          /* It doesn't matter what we push for the string here.  What
4876
             the code at `fail' tests is the value for the pattern.  */
4877
          PUSH_FAILURE_POINT (NULL, NULL, -2);
4878
          goto unconditional_jump;
4879
 
4880
 
4881
        /* At the end of an alternative, we need to push a dummy failure
4882
           point in case we are followed by a `pop_failure_jump', because
4883
           we don't want the failure point for the alternative to be
4884
           popped.  For example, matching `(a|ab)*' against `aab'
4885
           requires that we match the `ab' alternative.  */
4886
        case push_dummy_failure:
4887
          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4888
          /* See comments just above at `dummy_failure_jump' about the
4889
             two zeroes.  */
4890
          PUSH_FAILURE_POINT (NULL, NULL, -2);
4891
          break;
4892
 
4893
        /* Have to succeed matching what follows at least n times.
4894
           After that, handle like `on_failure_jump'.  */
4895
        case succeed_n:
4896
          EXTRACT_NUMBER (mcnt, p + 2);
4897
          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4898
 
4899
          assert (mcnt >= 0);
4900
          /* Originally, this is how many times we HAVE to succeed.  */
4901
          if (mcnt > 0)
4902
            {
4903
               mcnt--;
4904
               p += 2;
4905
               STORE_NUMBER_AND_INCR (p, mcnt);
4906
#ifdef _LIBC
4907
               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4908
#else
4909
               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4910
#endif
4911
            }
4912
          else if (mcnt == 0)
4913
            {
4914
#ifdef _LIBC
4915
              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
4916
#else
4917
              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4918
#endif
4919
              p[2] = (unsigned char) no_op;
4920
              p[3] = (unsigned char) no_op;
4921
              goto on_failure;
4922
            }
4923
          break;
4924
 
4925
        case jump_n:
4926
          EXTRACT_NUMBER (mcnt, p + 2);
4927
          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4928
 
4929
          /* Originally, this is how many times we CAN jump.  */
4930
          if (mcnt)
4931
            {
4932
               mcnt--;
4933
               STORE_NUMBER (p + 2, mcnt);
4934
#ifdef _LIBC
4935
               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
4936
#else
4937
               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
4938
#endif
4939
               goto unconditional_jump;
4940
            }
4941
          /* If don't have to jump any more, skip over the rest of command.  */
4942
          else
4943
            p += 4;
4944
          break;
4945
 
4946
        case set_number_at:
4947
          {
4948
            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4949
 
4950
            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4951
            p1 = p + mcnt;
4952
            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4953
#ifdef _LIBC
4954
            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
4955
#else
4956
            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4957
#endif
4958
            STORE_NUMBER (p1, mcnt);
4959
            break;
4960
          }
4961
 
4962
#if 0
4963
        /* The DEC Alpha C compiler 3.x generates incorrect code for the
4964
           test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4965
           AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4966
           macro and introducing temporary variables works around the bug.  */
4967
 
4968
        case wordbound:
4969
          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4970
          if (AT_WORD_BOUNDARY (d))
4971
            break;
4972
          goto fail;
4973
 
4974
        case notwordbound:
4975
          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4976
          if (AT_WORD_BOUNDARY (d))
4977
            goto fail;
4978
          break;
4979
#else
4980
        case wordbound:
4981
        {
4982
          boolean prevchar, thischar;
4983
 
4984
          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4985
          if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4986
            break;
4987
 
4988
          prevchar = WORDCHAR_P (d - 1);
4989
          thischar = WORDCHAR_P (d);
4990
          if (prevchar != thischar)
4991
            break;
4992
          goto fail;
4993
        }
4994
 
4995
      case notwordbound:
4996
        {
4997
          boolean prevchar, thischar;
4998
 
4999
          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5000
          if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5001
            goto fail;
5002
 
5003
          prevchar = WORDCHAR_P (d - 1);
5004
          thischar = WORDCHAR_P (d);
5005
          if (prevchar != thischar)
5006
            goto fail;
5007
          break;
5008
        }
5009
#endif
5010
 
5011
        case wordbeg:
5012
          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5013
          if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5014
            break;
5015
          goto fail;
5016
 
5017
        case wordend:
5018
          DEBUG_PRINT1 ("EXECUTING wordend.\n");
5019
          if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5020
              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5021
            break;
5022
          goto fail;
5023
 
5024
#ifdef emacs
5025
        case before_dot:
5026
          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5027
          if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5028
            goto fail;
5029
          break;
5030
 
5031
        case at_dot:
5032
          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5033
          if (PTR_CHAR_POS ((unsigned char *) d) != point)
5034
            goto fail;
5035
          break;
5036
 
5037
        case after_dot:
5038
          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5039
          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5040
            goto fail;
5041
          break;
5042
 
5043
        case syntaxspec:
5044
          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5045
          mcnt = *p++;
5046
          goto matchsyntax;
5047
 
5048
        case wordchar:
5049
          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5050
          mcnt = (int) Sword;
5051
        matchsyntax:
5052
          PREFETCH ();
5053
          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5054
          d++;
5055
          if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5056
            goto fail;
5057
          SET_REGS_MATCHED ();
5058
          break;
5059
 
5060
        case notsyntaxspec:
5061
          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5062
          mcnt = *p++;
5063
          goto matchnotsyntax;
5064
 
5065
        case notwordchar:
5066
          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5067
          mcnt = (int) Sword;
5068
        matchnotsyntax:
5069
          PREFETCH ();
5070
          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5071
          d++;
5072
          if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5073
            goto fail;
5074
          SET_REGS_MATCHED ();
5075
          break;
5076
 
5077
#else /* not emacs */
5078
        case wordchar:
5079
          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5080
          PREFETCH ();
5081
          if (!WORDCHAR_P (d))
5082
            goto fail;
5083
          SET_REGS_MATCHED ();
5084
          d++;
5085
          break;
5086
 
5087
        case notwordchar:
5088
          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5089
          PREFETCH ();
5090
          if (WORDCHAR_P (d))
5091
            goto fail;
5092
          SET_REGS_MATCHED ();
5093
          d++;
5094
          break;
5095
#endif /* not emacs */
5096
 
5097
        default:
5098
          abort ();
5099
        }
5100
      continue;  /* Successfully executed one pattern command; keep going.  */
5101
 
5102
 
5103
    /* We goto here if a matching operation fails. */
5104
    fail:
5105
      if (!FAIL_STACK_EMPTY ())
5106
        { /* A restart point is known.  Restore to that state.  */
5107
          DEBUG_PRINT1 ("\nFAIL:\n");
5108
          POP_FAILURE_POINT (d, p,
5109
                             lowest_active_reg, highest_active_reg,
5110
                             regstart, regend, reg_info);
5111
 
5112
          /* If this failure point is a dummy, try the next one.  */
5113
          if (!p)
5114
            goto fail;
5115
 
5116
          /* If we failed to the end of the pattern, don't examine *p.  */
5117
          assert (p <= pend);
5118
          if (p < pend)
5119
            {
5120
              boolean is_a_jump_n = false;
5121
 
5122
              /* If failed to a backwards jump that's part of a repetition
5123
                 loop, need to pop this failure point and use the next one.  */
5124
              switch ((re_opcode_t) *p)
5125
                {
5126
                case jump_n:
5127
                  is_a_jump_n = true;
5128
                case maybe_pop_jump:
5129
                case pop_failure_jump:
5130
                case jump:
5131
                  p1 = p + 1;
5132
                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5133
                  p1 += mcnt;
5134
 
5135
                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5136
                      || (!is_a_jump_n
5137
                          && (re_opcode_t) *p1 == on_failure_jump))
5138
                    goto fail;
5139
                  break;
5140
                default:
5141
                  /* do nothing */ ;
5142
                }
5143
            }
5144
 
5145
          if (d >= string1 && d <= end1)
5146
            dend = end_match_1;
5147
        }
5148
      else
5149
        break;   /* Matching at this starting point really fails.  */
5150
    } /* for (;;) */
5151
 
5152
  if (best_regs_set)
5153
    goto restore_best_regs;
5154
 
5155
  FREE_VARIABLES ();
5156
 
5157
  return -1;                            /* Failure to match.  */
5158
} /* re_match_2 */
5159
 
5160
/* Subroutine definitions for re_match_2.  */
5161
 
5162
 
5163
/* We are passed P pointing to a register number after a start_memory.
5164
 
5165
   Return true if the pattern up to the corresponding stop_memory can
5166
   match the empty string, and false otherwise.
5167
 
5168
   If we find the matching stop_memory, sets P to point to one past its number.
5169
   Otherwise, sets P to an undefined byte less than or equal to END.
5170
 
5171
   We don't handle duplicates properly (yet).  */
5172
 
5173
static boolean
5174
group_match_null_string_p (p, end, reg_info)
5175
    unsigned char **p, *end;
5176
    register_info_type *reg_info;
5177
{
5178
  int mcnt;
5179
  /* Point to after the args to the start_memory.  */
5180
  unsigned char *p1 = *p + 2;
5181
 
5182
  while (p1 < end)
5183
    {
5184
      /* Skip over opcodes that can match nothing, and return true or
5185
         false, as appropriate, when we get to one that can't, or to the
5186
         matching stop_memory.  */
5187
 
5188
      switch ((re_opcode_t) *p1)
5189
        {
5190
        /* Could be either a loop or a series of alternatives.  */
5191
        case on_failure_jump:
5192
          p1++;
5193
          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5194
 
5195
          /* If the next operation is not a jump backwards in the
5196
             pattern.  */
5197
 
5198
          if (mcnt >= 0)
5199
            {
5200
              /* Go through the on_failure_jumps of the alternatives,
5201
                 seeing if any of the alternatives cannot match nothing.
5202
                 The last alternative starts with only a jump,
5203
                 whereas the rest start with on_failure_jump and end
5204
                 with a jump, e.g., here is the pattern for `a|b|c':
5205
 
5206
                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5207
                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5208
                 /exactn/1/c
5209
 
5210
                 So, we have to first go through the first (n-1)
5211
                 alternatives and then deal with the last one separately.  */
5212
 
5213
 
5214
              /* Deal with the first (n-1) alternatives, which start
5215
                 with an on_failure_jump (see above) that jumps to right
5216
                 past a jump_past_alt.  */
5217
 
5218
              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5219
                {
5220
                  /* `mcnt' holds how many bytes long the alternative
5221
                     is, including the ending `jump_past_alt' and
5222
                     its number.  */
5223
 
5224
                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5225
                                                      reg_info))
5226
                    return false;
5227
 
5228
                  /* Move to right after this alternative, including the
5229
                     jump_past_alt.  */
5230
                  p1 += mcnt;
5231
 
5232
                  /* Break if it's the beginning of an n-th alternative
5233
                     that doesn't begin with an on_failure_jump.  */
5234
                  if ((re_opcode_t) *p1 != on_failure_jump)
5235
                    break;
5236
 
5237
                  /* Still have to check that it's not an n-th
5238
                     alternative that starts with an on_failure_jump.  */
5239
                  p1++;
5240
                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5241
                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5242
                    {
5243
                      /* Get to the beginning of the n-th alternative.  */
5244
                      p1 -= 3;
5245
                      break;
5246
                    }
5247
                }
5248
 
5249
              /* Deal with the last alternative: go back and get number
5250
                 of the `jump_past_alt' just before it.  `mcnt' contains
5251
                 the length of the alternative.  */
5252
              EXTRACT_NUMBER (mcnt, p1 - 2);
5253
 
5254
              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5255
                return false;
5256
 
5257
              p1 += mcnt;       /* Get past the n-th alternative.  */
5258
            } /* if mcnt > 0 */
5259
          break;
5260
 
5261
 
5262
        case stop_memory:
5263
          assert (p1[1] == **p);
5264
          *p = p1 + 2;
5265
          return true;
5266
 
5267
 
5268
        default:
5269
          if (!common_op_match_null_string_p (&p1, end, reg_info))
5270
            return false;
5271
        }
5272
    } /* while p1 < end */
5273
 
5274
  return false;
5275
} /* group_match_null_string_p */
5276
 
5277
 
5278
/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5279
   It expects P to be the first byte of a single alternative and END one
5280
   byte past the last. The alternative can contain groups.  */
5281
 
5282
static boolean
5283
alt_match_null_string_p (p, end, reg_info)
5284
    unsigned char *p, *end;
5285
    register_info_type *reg_info;
5286
{
5287
  int mcnt;
5288
  unsigned char *p1 = p;
5289
 
5290
  while (p1 < end)
5291
    {
5292
      /* Skip over opcodes that can match nothing, and break when we get
5293
         to one that can't.  */
5294
 
5295
      switch ((re_opcode_t) *p1)
5296
        {
5297
        /* It's a loop.  */
5298
        case on_failure_jump:
5299
          p1++;
5300
          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5301
          p1 += mcnt;
5302
          break;
5303
 
5304
        default:
5305
          if (!common_op_match_null_string_p (&p1, end, reg_info))
5306
            return false;
5307
        }
5308
    }  /* while p1 < end */
5309
 
5310
  return true;
5311
} /* alt_match_null_string_p */
5312
 
5313
 
5314
/* Deals with the ops common to group_match_null_string_p and
5315
   alt_match_null_string_p.
5316
 
5317
   Sets P to one after the op and its arguments, if any.  */
5318
 
5319
static boolean
5320
common_op_match_null_string_p (p, end, reg_info)
5321
    unsigned char **p, *end;
5322
    register_info_type *reg_info;
5323
{
5324
  int mcnt;
5325
  boolean ret;
5326
  int reg_no;
5327
  unsigned char *p1 = *p;
5328
 
5329
  switch ((re_opcode_t) *p1++)
5330
    {
5331
    case no_op:
5332
    case begline:
5333
    case endline:
5334
    case begbuf:
5335
    case endbuf:
5336
    case wordbeg:
5337
    case wordend:
5338
    case wordbound:
5339
    case notwordbound:
5340
#ifdef emacs
5341
    case before_dot:
5342
    case at_dot:
5343
    case after_dot:
5344
#endif
5345
      break;
5346
 
5347
    case start_memory:
5348
      reg_no = *p1;
5349
      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5350
      ret = group_match_null_string_p (&p1, end, reg_info);
5351
 
5352
      /* Have to set this here in case we're checking a group which
5353
         contains a group and a back reference to it.  */
5354
 
5355
      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5356
        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5357
 
5358
      if (!ret)
5359
        return false;
5360
      break;
5361
 
5362
    /* If this is an optimized succeed_n for zero times, make the jump.  */
5363
    case jump:
5364
      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5365
      if (mcnt >= 0)
5366
        p1 += mcnt;
5367
      else
5368
        return false;
5369
      break;
5370
 
5371
    case succeed_n:
5372
      /* Get to the number of times to succeed.  */
5373
      p1 += 2;
5374
      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5375
 
5376
      if (mcnt == 0)
5377
        {
5378
          p1 -= 4;
5379
          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5380
          p1 += mcnt;
5381
        }
5382
      else
5383
        return false;
5384
      break;
5385
 
5386
    case duplicate:
5387
      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5388
        return false;
5389
      break;
5390
 
5391
    case set_number_at:
5392
      p1 += 4;
5393
 
5394
    default:
5395
      /* All other opcodes mean we cannot match the empty string.  */
5396
      return false;
5397
  }
5398
 
5399
  *p = p1;
5400
  return true;
5401
} /* common_op_match_null_string_p */
5402
 
5403
 
5404
/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5405
   bytes; nonzero otherwise.  */
5406
 
5407
static int
5408
bcmp_translate (s1, s2, len, translate)
5409
     const char *s1, *s2;
5410
     register int len;
5411
     RE_TRANSLATE_TYPE translate;
5412
{
5413
  register const unsigned char *p1 = (const unsigned char *) s1;
5414
  register const unsigned char *p2 = (const unsigned char *) s2;
5415
  while (len)
5416
    {
5417
      if (translate[*p1++] != translate[*p2++]) return 1;
5418
      len--;
5419
    }
5420
  return 0;
5421
}
5422
 
5423
/* Entry points for GNU code.  */
5424
 
5425
/* re_compile_pattern is the GNU regular expression compiler: it
5426
   compiles PATTERN (of length SIZE) and puts the result in BUFP.
5427
   Returns 0 if the pattern was valid, otherwise an error string.
5428
 
5429
   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5430
   are set in BUFP on entry.
5431
 
5432
   We call regex_compile to do the actual compilation.  */
5433
 
5434
const char *
5435
re_compile_pattern (pattern, length, bufp)
5436
     const char *pattern;
5437
     size_t length;
5438
     struct re_pattern_buffer *bufp;
5439
{
5440
  reg_errcode_t ret;
5441
 
5442
  /* GNU code is written to assume at least RE_NREGS registers will be set
5443
     (and at least one extra will be -1).  */
5444
  bufp->regs_allocated = REGS_UNALLOCATED;
5445
 
5446
  /* And GNU code determines whether or not to get register information
5447
     by passing null for the REGS argument to re_match, etc., not by
5448
     setting no_sub.  */
5449
  bufp->no_sub = 0;
5450
 
5451
  /* Match anchors at newline.  */
5452
  bufp->newline_anchor = 1;
5453
 
5454
  ret = regex_compile (pattern, length, re_syntax_options, bufp);
5455
 
5456
  if (!ret)
5457
    return NULL;
5458
  return gettext (re_error_msgid[(int) ret]);
5459
}
5460
#ifdef _LIBC
5461
weak_alias (__re_compile_pattern, re_compile_pattern)
5462
#endif
5463
 
5464
/* Entry points compatible with 4.2 BSD regex library.  We don't define
5465
   them unless specifically requested.  */
5466
 
5467
#if defined _REGEX_RE_COMP || defined _LIBC
5468
 
5469
/* BSD has one and only one pattern buffer.  */
5470
static struct re_pattern_buffer re_comp_buf;
5471
 
5472
char *
5473
#ifdef _LIBC
5474
/* Make these definitions weak in libc, so POSIX programs can redefine
5475
   these names if they don't use our functions, and still use
5476
   regcomp/regexec below without link errors.  */
5477
weak_function
5478
#endif
5479
re_comp (s)
5480
    const char *s;
5481
{
5482
  reg_errcode_t ret;
5483
 
5484
  if (!s)
5485
    {
5486
      if (!re_comp_buf.buffer)
5487
        return gettext ("No previous regular expression");
5488
      return 0;
5489
    }
5490
 
5491
  if (!re_comp_buf.buffer)
5492
    {
5493
      re_comp_buf.buffer = (unsigned char *) malloc (200);
5494
      if (re_comp_buf.buffer == NULL)
5495
        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5496
      re_comp_buf.allocated = 200;
5497
 
5498
      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5499
      if (re_comp_buf.fastmap == NULL)
5500
        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5501
    }
5502
 
5503
  /* Since `re_exec' always passes NULL for the `regs' argument, we
5504
     don't need to initialize the pattern buffer fields which affect it.  */
5505
 
5506
  /* Match anchors at newlines.  */
5507
  re_comp_buf.newline_anchor = 1;
5508
 
5509
  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5510
 
5511
  if (!ret)
5512
    return NULL;
5513
 
5514
  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5515
  return (char *) gettext (re_error_msgid[(int) ret]);
5516
}
5517
 
5518
 
5519
int
5520
#ifdef _LIBC
5521
weak_function
5522
#endif
5523
re_exec (s)
5524
    const char *s;
5525
{
5526
  const int len = strlen (s);
5527
  return
5528
 
5529
}
5530
 
5531
#endif /* _REGEX_RE_COMP */
5532
 
5533
/* POSIX.2 functions.  Don't define these for Emacs.  */
5534
 
5535
#ifndef emacs
5536
 
5537
/* regcomp takes a regular expression as a string and compiles it.
5538
 
5539
   PREG is a regex_t *.  We do not expect any fields to be initialized,
5540
   since POSIX says we shouldn't.  Thus, we set
5541
 
5542
     `buffer' to the compiled pattern;
5543
     `used' to the length of the compiled pattern;
5544
     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5545
       REG_EXTENDED bit in CFLAGS is set; otherwise, to
5546
       RE_SYNTAX_POSIX_BASIC;
5547
     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5548
     `fastmap' and `fastmap_accurate' to zero;
5549
     `re_nsub' to the number of subexpressions in PATTERN.
5550
 
5551
   PATTERN is the address of the pattern string.
5552
 
5553
   CFLAGS is a series of bits which affect compilation.
5554
 
5555
     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5556
     use POSIX basic syntax.
5557
 
5558
     If REG_NEWLINE is set, then . and [^...] don't match newline.
5559
     Also, regexec will try a match beginning after every newline.
5560
 
5561
     If REG_ICASE is set, then we considers upper- and lowercase
5562
     versions of letters to be equivalent when matching.
5563
 
5564
     If REG_NOSUB is set, then when PREG is passed to regexec, that
5565
     routine will report only success or failure, and nothing about the
5566
     registers.
5567
 
5568
   It returns 0 if it succeeds, nonzero if it doesn't.  (See gnu-regex.h for
5569
   the return codes and their meanings.)  */
5570
 
5571
int
5572
regcomp (preg, pattern, cflags)
5573
    regex_t *preg;
5574
    const char *pattern;
5575
    int cflags;
5576
{
5577
  reg_errcode_t ret;
5578
  reg_syntax_t syntax
5579
    = (cflags & REG_EXTENDED) ?
5580
      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5581
 
5582
  /* regex_compile will allocate the space for the compiled pattern.  */
5583
  preg->buffer = 0;
5584
  preg->allocated = 0;
5585
  preg->used = 0;
5586
 
5587
  /* Don't bother to use a fastmap when searching.  This simplifies the
5588
     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5589
     characters after newlines into the fastmap.  This way, we just try
5590
     every character.  */
5591
  preg->fastmap = 0;
5592
 
5593
  if (cflags & REG_ICASE)
5594
    {
5595
      unsigned i;
5596
 
5597
      preg->translate
5598
        = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5599
                                      * sizeof (*(RE_TRANSLATE_TYPE)0));
5600
      if (preg->translate == NULL)
5601
        return (int) REG_ESPACE;
5602
 
5603
      /* Map uppercase characters to corresponding lowercase ones.  */
5604
      for (i = 0; i < CHAR_SET_SIZE; i++)
5605
        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5606
    }
5607
  else
5608
    preg->translate = NULL;
5609
 
5610
  /* If REG_NEWLINE is set, newlines are treated differently.  */
5611
  if (cflags & REG_NEWLINE)
5612
    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5613
      syntax &= ~RE_DOT_NEWLINE;
5614
      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5615
      /* It also changes the matching behavior.  */
5616
      preg->newline_anchor = 1;
5617
    }
5618
  else
5619
    preg->newline_anchor = 0;
5620
 
5621
  preg->no_sub = !!(cflags & REG_NOSUB);
5622
 
5623
  /* POSIX says a null character in the pattern terminates it, so we
5624
     can use strlen here in compiling the pattern.  */
5625
  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5626
 
5627
  /* POSIX doesn't distinguish between an unmatched open-group and an
5628
     unmatched close-group: both are REG_EPAREN.  */
5629
  if (ret == REG_ERPAREN) ret = REG_EPAREN;
5630
 
5631
  return (int) ret;
5632
}
5633
#ifdef _LIBC
5634
weak_alias (__regcomp, regcomp)
5635
#endif
5636
 
5637
 
5638
/* regexec searches for a given pattern, specified by PREG, in the
5639
   string STRING.
5640
 
5641
   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5642
   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5643
   least NMATCH elements, and we set them to the offsets of the
5644
   corresponding matched substrings.
5645
 
5646
   EFLAGS specifies `execution flags' which affect matching: if
5647
   REG_NOTBOL is set, then ^ does not match at the beginning of the
5648
   string; if REG_NOTEOL is set, then $ does not match at the end.
5649
 
5650
   We return 0 if we find a match and REG_NOMATCH if not.  */
5651
 
5652
int
5653
regexec (preg, string, nmatch, pmatch, eflags)
5654
    const regex_t *preg;
5655
    const char *string;
5656
    size_t nmatch;
5657
    regmatch_t pmatch[];
5658
    int eflags;
5659
{
5660
  int ret;
5661
  struct re_registers regs;
5662
  regex_t private_preg;
5663
  int len = strlen (string);
5664
  boolean want_reg_info = !preg->no_sub && nmatch > 0;
5665
 
5666
  private_preg = *preg;
5667
 
5668
  private_preg.not_bol = !!(eflags & REG_NOTBOL);
5669
  private_preg.not_eol = !!(eflags & REG_NOTEOL);
5670
 
5671
  /* The user has told us exactly how many registers to return
5672
     information about, via `nmatch'.  We have to pass that on to the
5673
     matching routines.  */
5674
  private_preg.regs_allocated = REGS_FIXED;
5675
 
5676
  if (want_reg_info)
5677
    {
5678
      regs.num_regs = nmatch;
5679
      regs.start = TALLOC (nmatch, regoff_t);
5680
      regs.end = TALLOC (nmatch, regoff_t);
5681
      if (regs.start == NULL || regs.end == NULL)
5682
        return (int) REG_NOMATCH;
5683
    }
5684
 
5685
  /* Perform the searching operation.  */
5686
  ret = re_search (&private_preg, string, len,
5687
                   /* start: */ 0, /* range: */ len,
5688
                   want_reg_info ? &regs : (struct re_registers *) 0);
5689
 
5690
  /* Copy the register information to the POSIX structure.  */
5691
  if (want_reg_info)
5692
    {
5693
      if (ret >= 0)
5694
        {
5695
          unsigned r;
5696
 
5697
          for (r = 0; r < nmatch; r++)
5698
            {
5699
              pmatch[r].rm_so = regs.start[r];
5700
              pmatch[r].rm_eo = regs.end[r];
5701
            }
5702
        }
5703
 
5704
      /* If we needed the temporary register info, free the space now.  */
5705
      free (regs.start);
5706
      free (regs.end);
5707
    }
5708
 
5709
  /* We want zero return to mean success, unlike `re_search'.  */
5710
  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5711
}
5712
#ifdef _LIBC
5713
weak_alias (__regexec, regexec)
5714
#endif
5715
 
5716
 
5717
/* Returns a message corresponding to an error code, ERRCODE, returned
5718
   from either regcomp or regexec.   We don't use PREG here.  */
5719
 
5720
size_t
5721
regerror (errcode, preg, errbuf, errbuf_size)
5722
    int errcode;
5723
    const regex_t *preg;
5724
    char *errbuf;
5725
    size_t errbuf_size;
5726
{
5727
  const char *msg;
5728
  size_t msg_size;
5729
 
5730
  if (errcode < 0
5731
      || errcode >= (int) (sizeof (re_error_msgid)
5732
                           / sizeof (re_error_msgid[0])))
5733
    /* Only error codes returned by the rest of the code should be passed
5734
       to this routine.  If we are given anything else, or if other regex
5735
       code generates an invalid error code, then the program has a bug.
5736
       Dump core so we can fix it.  */
5737
    abort ();
5738
 
5739
  msg = gettext (re_error_msgid[errcode]);
5740
 
5741
  msg_size = strlen (msg) + 1; /* Includes the null.  */
5742
 
5743
  if (errbuf_size != 0)
5744
    {
5745
      if (msg_size > errbuf_size)
5746
        {
5747
#if defined HAVE_MEMPCPY || defined _LIBC
5748
          *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
5749
#else
5750
          memcpy (errbuf, msg, errbuf_size - 1);
5751
          errbuf[errbuf_size - 1] = 0;
5752
#endif
5753
        }
5754
      else
5755
        memcpy (errbuf, msg, msg_size);
5756
    }
5757
 
5758
  return msg_size;
5759
}
5760
#ifdef _LIBC
5761
weak_alias (__regerror, regerror)
5762
#endif
5763
 
5764
 
5765
/* Free dynamically allocated space used by PREG.  */
5766
 
5767
void
5768
regfree (preg)
5769
    regex_t *preg;
5770
{
5771
  if (preg->buffer != NULL)
5772
    free (preg->buffer);
5773
  preg->buffer = NULL;
5774
 
5775
  preg->allocated = 0;
5776
  preg->used = 0;
5777
 
5778
  if (preg->fastmap != NULL)
5779
    free (preg->fastmap);
5780
  preg->fastmap = NULL;
5781
  preg->fastmap_accurate = 0;
5782
 
5783
  if (preg->translate != NULL)
5784
    free (preg->translate);
5785
  preg->translate = NULL;
5786
}
5787
#ifdef _LIBC
5788
weak_alias (__regfree, regfree)
5789
#endif
5790
 
5791
#endif /* not emacs  */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.