OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [branches/] [oc/] [gdb-5.0/] [gdb/] [gnu-regex.c] - Blame information for rev 1771

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 104 markom
/* *INDENT-OFF* */ /* keep in sync with glibc */
2
/* Extended regular expression matching and search library,
3
   version 0.12.
4
   (Implements POSIX draft P1003.2/D11.2, except for some of the
5
   internationalization features.)
6
   Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
7
 
8
   NOTE: The canonical source of this file is maintained with the
9
   GNU C Library.  Bugs can be reported to bug-glibc@gnu.org.
10
 
11
   This program is free software; you can redistribute it and/or modify it
12
   under the terms of the GNU General Public License as published by the
13
   Free Software Foundation; either version 2, or (at your option) any
14
   later version.
15
 
16
   This program is distributed in the hope that it will be useful,
17
   but WITHOUT ANY WARRANTY; without even the implied warranty of
18
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
   GNU General Public License for more details.
20
 
21
   You should have received a copy of the GNU General Public License
22
   along with this program; if not, write to the Free Software Foundation,
23
   Inc., 59 Temple Place - Suite 330,
24
   Boston, MA 02111-1307, USA.  */
25
 
26
/* AIX requires this to be the first thing in the file. */
27
#if defined _AIX && !defined REGEX_MALLOC
28
  #pragma alloca
29
#endif
30
 
31
#undef  _GNU_SOURCE
32
#define _GNU_SOURCE
33
 
34
#ifdef HAVE_CONFIG_H
35
# include <config.h>
36
#endif
37
 
38
#ifndef PARAMS
39
# if defined __GNUC__ || (defined __STDC__ && __STDC__)
40
#  define PARAMS(args) args
41
# else
42
#  define PARAMS(args) ()
43
# endif  /* GCC.  */
44
#endif  /* Not PARAMS.  */
45
 
46
#if defined STDC_HEADERS && !defined emacs
47
# include <stddef.h>
48
#else
49
/* We need this for `gnu-regex.h', and perhaps for the Emacs include files.  */
50
# include <sys/types.h>
51
#endif
52
 
53
/* For platform which support the ISO C amendement 1 functionality we
54
   support user defined character classes.  */
55
#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
56
 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
57
# include <wchar.h>
58
# include <wctype.h>
59
#endif
60
 
61
/* This is for other GNU distributions with internationalized messages.  */
62
/* CYGNUS LOCAL: ../intl will handle this for us */
63
#ifdef ENABLE_NLS
64
# include <libintl.h>
65
#else
66
# define gettext(msgid) (msgid)
67
#endif
68
 
69
#ifndef gettext_noop
70
/* This define is so xgettext can find the internationalizable
71
   strings.  */
72
# define gettext_noop(String) String
73
#endif
74
 
75
/* The `emacs' switch turns on certain matching commands
76
   that make sense only in Emacs. */
77
#ifdef emacs
78
 
79
# include "lisp.h"
80
# include "buffer.h"
81
# include "syntax.h"
82
 
83
#else  /* not emacs */
84
 
85
/* If we are not linking with Emacs proper,
86
   we can't use the relocating allocator
87
   even if config.h says that we can.  */
88
# undef REL_ALLOC
89
 
90
# if defined STDC_HEADERS || defined _LIBC
91
#  include <stdlib.h>
92
# else
93
char *malloc ();
94
char *realloc ();
95
# endif
96
 
97
/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
98
   If nothing else has been done, use the method below.  */
99
# ifdef INHIBIT_STRING_HEADER
100
#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
101
#   if !defined bzero && !defined bcopy
102
#    undef INHIBIT_STRING_HEADER
103
#   endif
104
#  endif
105
# endif
106
 
107
/* This is the normal way of making sure we have a bcopy and a bzero.
108
   This is used in most programs--a few other programs avoid this
109
   by defining INHIBIT_STRING_HEADER.  */
110
# ifndef INHIBIT_STRING_HEADER
111
#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
112
#   include <string.h>
113
#   ifndef bzero
114
#    ifndef _LIBC
115
#     define bzero(s, n)        (memset (s, '\0', n), (s))
116
#    else
117
#     define bzero(s, n)        __bzero (s, n)
118
#    endif
119
#   endif
120
#  else
121
#   include <strings.h>
122
#   ifndef memcmp
123
#    define memcmp(s1, s2, n)   bcmp (s1, s2, n)
124
#   endif
125
#   ifndef memcpy
126
#    define memcpy(d, s, n)     (bcopy (s, d, n), (d))
127
#   endif
128
#  endif
129
# endif
130
 
131
/* Define the syntax stuff for \<, \>, etc.  */
132
 
133
/* This must be nonzero for the wordchar and notwordchar pattern
134
   commands in re_match_2.  */
135
# ifndef Sword
136
#  define Sword 1
137
# endif
138
 
139
# ifdef SWITCH_ENUM_BUG
140
#  define SWITCH_ENUM_CAST(x) ((int)(x))
141
# else
142
#  define SWITCH_ENUM_CAST(x) (x)
143
# endif
144
 
145
/* How many characters in the character set.  */
146
# define CHAR_SET_SIZE 256
147
 
148
/* GDB LOCAL: define _REGEX_RE_COMP to get BSD style re_comp and re_exec */
149
#ifndef _REGEX_RE_COMP
150
#define _REGEX_RE_COMP
151
#endif
152
 
153
# ifdef SYNTAX_TABLE
154
 
155
extern char *re_syntax_table;
156
 
157
# else /* not SYNTAX_TABLE */
158
 
159
static char re_syntax_table[CHAR_SET_SIZE];
160
 
161
static void
162
init_syntax_once ()
163
{
164
   register int c;
165
   static int done = 0;
166
 
167
   if (done)
168
     return;
169
 
170
   bzero (re_syntax_table, sizeof re_syntax_table);
171
 
172
   for (c = 'a'; c <= 'z'; c++)
173
     re_syntax_table[c] = Sword;
174
 
175
   for (c = 'A'; c <= 'Z'; c++)
176
     re_syntax_table[c] = Sword;
177
 
178
   for (c = '0'; c <= '9'; c++)
179
     re_syntax_table[c] = Sword;
180
 
181
   re_syntax_table['_'] = Sword;
182
 
183
   done = 1;
184
}
185
 
186
# endif /* not SYNTAX_TABLE */
187
 
188
# define SYNTAX(c) re_syntax_table[c]
189
 
190
#endif /* not emacs */
191
 
192
/* Get the interface, including the syntax bits.  */
193
/* CYGNUS LOCAL: call it gnu-regex.h, not regex.h, to avoid name conflicts */
194
#include "gnu-regex.h"
195
 
196
/* isalpha etc. are used for the character classes.  */
197
#include <ctype.h>
198
 
199
/* Jim Meyering writes:
200
 
201
   "... Some ctype macros are valid only for character codes that
202
   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
203
   using /bin/cc or gcc but without giving an ansi option).  So, all
204
   ctype uses should be through macros like ISPRINT...  If
205
   STDC_HEADERS is defined, then autoconf has verified that the ctype
206
   macros don't need to be guarded with references to isascii. ...
207
   Defining isascii to 1 should let any compiler worth its salt
208
   eliminate the && through constant folding."
209
   Solaris defines some of these symbols so we must undefine them first.  */
210
 
211
#undef ISASCII
212
#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
213
# define ISASCII(c) 1
214
#else
215
# define ISASCII(c) isascii(c)
216
#endif
217
 
218
#ifdef isblank
219
# define ISBLANK(c) (ISASCII (c) && isblank (c))
220
#else
221
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
222
#endif
223
#ifdef isgraph
224
# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
225
#else
226
# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
227
#endif
228
 
229
#undef ISPRINT
230
#define ISPRINT(c) (ISASCII (c) && isprint (c))
231
#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
232
#define ISALNUM(c) (ISASCII (c) && isalnum (c))
233
#define ISALPHA(c) (ISASCII (c) && isalpha (c))
234
#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
235
#define ISLOWER(c) (ISASCII (c) && islower (c))
236
#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
237
#define ISSPACE(c) (ISASCII (c) && isspace (c))
238
#define ISUPPER(c) (ISASCII (c) && isupper (c))
239
#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
240
 
241
#ifndef NULL
242
# define NULL (void *)0
243
#endif
244
 
245
/* We remove any previous definition of `SIGN_EXTEND_CHAR',
246
   since ours (we hope) works properly with all combinations of
247
   machines, compilers, `char' and `unsigned char' argument types.
248
   (Per Bothner suggested the basic approach.)  */
249
#undef SIGN_EXTEND_CHAR
250
#if __STDC__
251
# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
252
#else  /* not __STDC__ */
253
/* As in Harbison and Steele.  */
254
# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
255
#endif
256
 
257
/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
258
   use `alloca' instead of `malloc'.  This is because using malloc in
259
   re_search* or re_match* could cause memory leaks when C-g is used in
260
   Emacs; also, malloc is slower and causes storage fragmentation.  On
261
   the other hand, malloc is more portable, and easier to debug.
262
 
263
   Because we sometimes use alloca, some routines have to be macros,
264
   not functions -- `alloca'-allocated space disappears at the end of the
265
   function it is called in.  */
266
 
267
#ifdef REGEX_MALLOC
268
 
269
# define REGEX_ALLOCATE malloc
270
# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
271
# define REGEX_FREE free
272
 
273
#else /* not REGEX_MALLOC  */
274
 
275
/* Emacs already defines alloca, sometimes.  */
276
# ifndef alloca
277
 
278
/* Make alloca work the best possible way.  */
279
#  ifdef __GNUC__
280
#   define alloca __builtin_alloca
281
#  else /* not __GNUC__ */
282
#   if HAVE_ALLOCA_H
283
#    include <alloca.h>
284
#   endif /* HAVE_ALLOCA_H */
285
#  endif /* not __GNUC__ */
286
 
287
# endif /* not alloca */
288
 
289
# define REGEX_ALLOCATE alloca
290
 
291
/* Assumes a `char *destination' variable.  */
292
# define REGEX_REALLOCATE(source, osize, nsize)                         \
293
  (destination = (char *) alloca (nsize),                               \
294
   memcpy (destination, source, osize))
295
 
296
/* No need to do anything to free, after alloca.  */
297
# define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
298
 
299
#endif /* not REGEX_MALLOC */
300
 
301
/* Define how to allocate the failure stack.  */
302
 
303
#if defined REL_ALLOC && defined REGEX_MALLOC
304
 
305
# define REGEX_ALLOCATE_STACK(size)                             \
306
  r_alloc (&failure_stack_ptr, (size))
307
# define REGEX_REALLOCATE_STACK(source, osize, nsize)           \
308
  r_re_alloc (&failure_stack_ptr, (nsize))
309
# define REGEX_FREE_STACK(ptr)                                  \
310
  r_alloc_free (&failure_stack_ptr)
311
 
312
#else /* not using relocating allocator */
313
 
314
# ifdef REGEX_MALLOC
315
 
316
#  define REGEX_ALLOCATE_STACK malloc
317
#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
318
#  define REGEX_FREE_STACK free
319
 
320
# else /* not REGEX_MALLOC */
321
 
322
#  define REGEX_ALLOCATE_STACK alloca
323
 
324
#  define REGEX_REALLOCATE_STACK(source, osize, nsize)                  \
325
   REGEX_REALLOCATE (source, osize, nsize)
326
/* No need to explicitly free anything.  */
327
#  define REGEX_FREE_STACK(arg)
328
 
329
# endif /* not REGEX_MALLOC */
330
#endif /* not using relocating allocator */
331
 
332
 
333
/* True if `size1' is non-NULL and PTR is pointing anywhere inside
334
   `string1' or just past its end.  This works if PTR is NULL, which is
335
   a good thing.  */
336
#define FIRST_STRING_P(ptr)                                     \
337
  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
338
 
339
/* (Re)Allocate N items of type T using malloc, or fail.  */
340
#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
341
#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
342
#define RETALLOC_IF(addr, n, t) \
343
  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
344
#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
345
 
346
#define BYTEWIDTH 8 /* In bits.  */
347
 
348
#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
349
 
350
#undef MAX
351
#undef MIN
352
#define MAX(a, b) ((a) > (b) ? (a) : (b))
353
#define MIN(a, b) ((a) < (b) ? (a) : (b))
354
 
355
typedef char boolean;
356
#define false 0
357
#define true 1
358
 
359
static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
360
                                        const char *string1, int size1,
361
                                        const char *string2, int size2,
362
                                        int pos,
363
                                        struct re_registers *regs,
364
                                        int stop));
365
 
366
/* These are the command codes that appear in compiled regular
367
   expressions.  Some opcodes are followed by argument bytes.  A
368
   command code can specify any interpretation whatsoever for its
369
   arguments.  Zero bytes may appear in the compiled regular expression.  */
370
 
371
typedef enum
372
{
373
  no_op = 0,
374
 
375
  /* Succeed right away--no more backtracking.  */
376
  succeed,
377
 
378
        /* Followed by one byte giving n, then by n literal bytes.  */
379
  exactn,
380
 
381
        /* Matches any (more or less) character.  */
382
  anychar,
383
 
384
        /* Matches any one char belonging to specified set.  First
385
           following byte is number of bitmap bytes.  Then come bytes
386
           for a bitmap saying which chars are in.  Bits in each byte
387
           are ordered low-bit-first.  A character is in the set if its
388
           bit is 1.  A character too large to have a bit in the map is
389
           automatically not in the set.  */
390
  charset,
391
 
392
        /* Same parameters as charset, but match any character that is
393
           not one of those specified.  */
394
  charset_not,
395
 
396
        /* Start remembering the text that is matched, for storing in a
397
           register.  Followed by one byte with the register number, in
398
           the range 0 to one less than the pattern buffer's re_nsub
399
           field.  Then followed by one byte with the number of groups
400
           inner to this one.  (This last has to be part of the
401
           start_memory only because we need it in the on_failure_jump
402
           of re_match_2.)  */
403
  start_memory,
404
 
405
        /* Stop remembering the text that is matched and store it in a
406
           memory register.  Followed by one byte with the register
407
           number, in the range 0 to one less than `re_nsub' in the
408
           pattern buffer, and one byte with the number of inner groups,
409
           just like `start_memory'.  (We need the number of inner
410
           groups here because we don't have any easy way of finding the
411
           corresponding start_memory when we're at a stop_memory.)  */
412
  stop_memory,
413
 
414
        /* Match a duplicate of something remembered. Followed by one
415
           byte containing the register number.  */
416
  duplicate,
417
 
418
        /* Fail unless at beginning of line.  */
419
  begline,
420
 
421
        /* Fail unless at end of line.  */
422
  endline,
423
 
424
        /* Succeeds if at beginning of buffer (if emacs) or at beginning
425
           of string to be matched (if not).  */
426
  begbuf,
427
 
428
        /* Analogously, for end of buffer/string.  */
429
  endbuf,
430
 
431
        /* Followed by two byte relative address to which to jump.  */
432
  jump,
433
 
434
        /* Same as jump, but marks the end of an alternative.  */
435
  jump_past_alt,
436
 
437
        /* Followed by two-byte relative address of place to resume at
438
           in case of failure.  */
439
  on_failure_jump,
440
 
441
        /* Like on_failure_jump, but pushes a placeholder instead of the
442
           current string position when executed.  */
443
  on_failure_keep_string_jump,
444
 
445
        /* Throw away latest failure point and then jump to following
446
           two-byte relative address.  */
447
  pop_failure_jump,
448
 
449
        /* Change to pop_failure_jump if know won't have to backtrack to
450
           match; otherwise change to jump.  This is used to jump
451
           back to the beginning of a repeat.  If what follows this jump
452
           clearly won't match what the repeat does, such that we can be
453
           sure that there is no use backtracking out of repetitions
454
           already matched, then we change it to a pop_failure_jump.
455
           Followed by two-byte address.  */
456
  maybe_pop_jump,
457
 
458
        /* Jump to following two-byte address, and push a dummy failure
459
           point. This failure point will be thrown away if an attempt
460
           is made to use it for a failure.  A `+' construct makes this
461
           before the first repeat.  Also used as an intermediary kind
462
           of jump when compiling an alternative.  */
463
  dummy_failure_jump,
464
 
465
        /* Push a dummy failure point and continue.  Used at the end of
466
           alternatives.  */
467
  push_dummy_failure,
468
 
469
        /* Followed by two-byte relative address and two-byte number n.
470
           After matching N times, jump to the address upon failure.  */
471
  succeed_n,
472
 
473
        /* Followed by two-byte relative address, and two-byte number n.
474
           Jump to the address N times, then fail.  */
475
  jump_n,
476
 
477
        /* Set the following two-byte relative address to the
478
           subsequent two-byte number.  The address *includes* the two
479
           bytes of number.  */
480
  set_number_at,
481
 
482
  wordchar,     /* Matches any word-constituent character.  */
483
  notwordchar,  /* Matches any char that is not a word-constituent.  */
484
 
485
  wordbeg,      /* Succeeds if at word beginning.  */
486
  wordend,      /* Succeeds if at word end.  */
487
 
488
  wordbound,    /* Succeeds if at a word boundary.  */
489
  notwordbound  /* Succeeds if not at a word boundary.  */
490
 
491
#ifdef emacs
492
  ,before_dot,  /* Succeeds if before point.  */
493
  at_dot,       /* Succeeds if at point.  */
494
  after_dot,    /* Succeeds if after point.  */
495
 
496
        /* Matches any character whose syntax is specified.  Followed by
497
           a byte which contains a syntax code, e.g., Sword.  */
498
  syntaxspec,
499
 
500
        /* Matches any character whose syntax is not that specified.  */
501
  notsyntaxspec
502
#endif /* emacs */
503
} re_opcode_t;
504
 
505
/* Common operations on the compiled pattern.  */
506
 
507
/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
508
 
509
#define STORE_NUMBER(destination, number)                               \
510
  do {                                                                  \
511
    (destination)[0] = (number) & 0377;                                  \
512
    (destination)[1] = (number) >> 8;                                   \
513
  } while (0)
514
 
515
/* Same as STORE_NUMBER, except increment DESTINATION to
516
   the byte after where the number is stored.  Therefore, DESTINATION
517
   must be an lvalue.  */
518
 
519
#define STORE_NUMBER_AND_INCR(destination, number)                      \
520
  do {                                                                  \
521
    STORE_NUMBER (destination, number);                                 \
522
    (destination) += 2;                                                 \
523
  } while (0)
524
 
525
/* Put into DESTINATION a number stored in two contiguous bytes starting
526
   at SOURCE.  */
527
 
528
#define EXTRACT_NUMBER(destination, source)                             \
529
  do {                                                                  \
530
    (destination) = *(source) & 0377;                                   \
531
    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
532
  } while (0)
533
 
534
#ifdef DEBUG
535
static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
536
static void
537
extract_number (dest, source)
538
    int *dest;
539
    unsigned char *source;
540
{
541
  int temp = SIGN_EXTEND_CHAR (*(source + 1));
542
  *dest = *source & 0377;
543
  *dest += temp << 8;
544
}
545
 
546
# ifndef EXTRACT_MACROS /* To debug the macros.  */
547
#  undef EXTRACT_NUMBER
548
#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
549
# endif /* not EXTRACT_MACROS */
550
 
551
#endif /* DEBUG */
552
 
553
/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
554
   SOURCE must be an lvalue.  */
555
 
556
#define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
557
  do {                                                                  \
558
    EXTRACT_NUMBER (destination, source);                               \
559
    (source) += 2;                                                      \
560
  } while (0)
561
 
562
#ifdef DEBUG
563
static void extract_number_and_incr _RE_ARGS ((int *destination,
564
                                               unsigned char **source));
565
static void
566
extract_number_and_incr (destination, source)
567
    int *destination;
568
    unsigned char **source;
569
{
570
  extract_number (destination, *source);
571
  *source += 2;
572
}
573
 
574
# ifndef EXTRACT_MACROS
575
#  undef EXTRACT_NUMBER_AND_INCR
576
#  define EXTRACT_NUMBER_AND_INCR(dest, src) \
577
  extract_number_and_incr (&dest, &src)
578
# endif /* not EXTRACT_MACROS */
579
 
580
#endif /* DEBUG */
581
 
582
/* If DEBUG is defined, Regex prints many voluminous messages about what
583
   it is doing (if the variable `debug' is nonzero).  If linked with the
584
   main program in `iregex.c', you can enter patterns and strings
585
   interactively.  And if linked with the main program in `main.c' and
586
   the other test files, you can run the already-written tests.  */
587
 
588
#ifdef DEBUG
589
 
590
/* We use standard I/O for debugging.  */
591
# include <stdio.h>
592
 
593
/* It is useful to test things that ``must'' be true when debugging.  */
594
# include <assert.h>
595
 
596
static int debug = 0;
597
 
598
# define DEBUG_STATEMENT(e) e
599
# define DEBUG_PRINT1(x) if (debug) printf (x)
600
# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
601
# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
602
# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
603
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                          \
604
  if (debug) print_partial_compiled_pattern (s, e)
605
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                 \
606
  if (debug) print_double_string (w, s1, sz1, s2, sz2)
607
 
608
 
609
/* Print the fastmap in human-readable form.  */
610
 
611
void
612
print_fastmap (fastmap)
613
    char *fastmap;
614
{
615
  unsigned was_a_range = 0;
616
  unsigned i = 0;
617
 
618
  while (i < (1 << BYTEWIDTH))
619
    {
620
      if (fastmap[i++])
621
        {
622
          was_a_range = 0;
623
          putchar (i - 1);
624
          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
625
            {
626
              was_a_range = 1;
627
              i++;
628
            }
629
          if (was_a_range)
630
            {
631
              printf ("-");
632
              putchar (i - 1);
633
            }
634
        }
635
    }
636
  putchar ('\n');
637
}
638
 
639
 
640
/* Print a compiled pattern string in human-readable form, starting at
641
   the START pointer into it and ending just before the pointer END.  */
642
 
643
void
644
print_partial_compiled_pattern (start, end)
645
    unsigned char *start;
646
    unsigned char *end;
647
{
648
  int mcnt, mcnt2;
649
  unsigned char *p1;
650
  unsigned char *p = start;
651
  unsigned char *pend = end;
652
 
653
  if (start == NULL)
654
    {
655
      printf ("(null)\n");
656
      return;
657
    }
658
 
659
  /* Loop over pattern commands.  */
660
  while (p < pend)
661
    {
662
      printf ("%d:\t", p - start);
663
 
664
      switch ((re_opcode_t) *p++)
665
        {
666
        case no_op:
667
          printf ("/no_op");
668
          break;
669
 
670
        case exactn:
671
          mcnt = *p++;
672
          printf ("/exactn/%d", mcnt);
673
          do
674
            {
675
              putchar ('/');
676
              putchar (*p++);
677
            }
678
          while (--mcnt);
679
          break;
680
 
681
        case start_memory:
682
          mcnt = *p++;
683
          printf ("/start_memory/%d/%d", mcnt, *p++);
684
          break;
685
 
686
        case stop_memory:
687
          mcnt = *p++;
688
          printf ("/stop_memory/%d/%d", mcnt, *p++);
689
          break;
690
 
691
        case duplicate:
692
          printf ("/duplicate/%d", *p++);
693
          break;
694
 
695
        case anychar:
696
          printf ("/anychar");
697
          break;
698
 
699
        case charset:
700
        case charset_not:
701
          {
702
            register int c, last = -100;
703
            register int in_range = 0;
704
 
705
            printf ("/charset [%s",
706
                    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
707
 
708
            assert (p + *p < pend);
709
 
710
            for (c = 0; c < 256; c++)
711
              if (c / 8 < *p
712
                  && (p[1 + (c/8)] & (1 << (c % 8))))
713
                {
714
                  /* Are we starting a range?  */
715
                  if (last + 1 == c && ! in_range)
716
                    {
717
                      putchar ('-');
718
                      in_range = 1;
719
                    }
720
                  /* Have we broken a range?  */
721
                  else if (last + 1 != c && in_range)
722
              {
723
                      putchar (last);
724
                      in_range = 0;
725
                    }
726
 
727
                  if (! in_range)
728
                    putchar (c);
729
 
730
                  last = c;
731
              }
732
 
733
            if (in_range)
734
              putchar (last);
735
 
736
            putchar (']');
737
 
738
            p += 1 + *p;
739
          }
740
          break;
741
 
742
        case begline:
743
          printf ("/begline");
744
          break;
745
 
746
        case endline:
747
          printf ("/endline");
748
          break;
749
 
750
        case on_failure_jump:
751
          extract_number_and_incr (&mcnt, &p);
752
          printf ("/on_failure_jump to %d", p + mcnt - start);
753
          break;
754
 
755
        case on_failure_keep_string_jump:
756
          extract_number_and_incr (&mcnt, &p);
757
          printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
758
          break;
759
 
760
        case dummy_failure_jump:
761
          extract_number_and_incr (&mcnt, &p);
762
          printf ("/dummy_failure_jump to %d", p + mcnt - start);
763
          break;
764
 
765
        case push_dummy_failure:
766
          printf ("/push_dummy_failure");
767
          break;
768
 
769
        case maybe_pop_jump:
770
          extract_number_and_incr (&mcnt, &p);
771
          printf ("/maybe_pop_jump to %d", p + mcnt - start);
772
          break;
773
 
774
        case pop_failure_jump:
775
          extract_number_and_incr (&mcnt, &p);
776
          printf ("/pop_failure_jump to %d", p + mcnt - start);
777
          break;
778
 
779
        case jump_past_alt:
780
          extract_number_and_incr (&mcnt, &p);
781
          printf ("/jump_past_alt to %d", p + mcnt - start);
782
          break;
783
 
784
        case jump:
785
          extract_number_and_incr (&mcnt, &p);
786
          printf ("/jump to %d", p + mcnt - start);
787
          break;
788
 
789
        case succeed_n:
790
          extract_number_and_incr (&mcnt, &p);
791
          p1 = p + mcnt;
792
          extract_number_and_incr (&mcnt2, &p);
793
          printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
794
          break;
795
 
796
        case jump_n:
797
          extract_number_and_incr (&mcnt, &p);
798
          p1 = p + mcnt;
799
          extract_number_and_incr (&mcnt2, &p);
800
          printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
801
          break;
802
 
803
        case set_number_at:
804
          extract_number_and_incr (&mcnt, &p);
805
          p1 = p + mcnt;
806
          extract_number_and_incr (&mcnt2, &p);
807
          printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
808
          break;
809
 
810
        case wordbound:
811
          printf ("/wordbound");
812
          break;
813
 
814
        case notwordbound:
815
          printf ("/notwordbound");
816
          break;
817
 
818
        case wordbeg:
819
          printf ("/wordbeg");
820
          break;
821
 
822
        case wordend:
823
          printf ("/wordend");
824
 
825
# ifdef emacs
826
        case before_dot:
827
          printf ("/before_dot");
828
          break;
829
 
830
        case at_dot:
831
          printf ("/at_dot");
832
          break;
833
 
834
        case after_dot:
835
          printf ("/after_dot");
836
          break;
837
 
838
        case syntaxspec:
839
          printf ("/syntaxspec");
840
          mcnt = *p++;
841
          printf ("/%d", mcnt);
842
          break;
843
 
844
        case notsyntaxspec:
845
          printf ("/notsyntaxspec");
846
          mcnt = *p++;
847
          printf ("/%d", mcnt);
848
          break;
849
# endif /* emacs */
850
 
851
        case wordchar:
852
          printf ("/wordchar");
853
          break;
854
 
855
        case notwordchar:
856
          printf ("/notwordchar");
857
          break;
858
 
859
        case begbuf:
860
          printf ("/begbuf");
861
          break;
862
 
863
        case endbuf:
864
          printf ("/endbuf");
865
          break;
866
 
867
        default:
868
          printf ("?%d", *(p-1));
869
        }
870
 
871
      putchar ('\n');
872
    }
873
 
874
  printf ("%d:\tend of pattern.\n", p - start);
875
}
876
 
877
 
878
void
879
print_compiled_pattern (bufp)
880
    struct re_pattern_buffer *bufp;
881
{
882
  unsigned char *buffer = bufp->buffer;
883
 
884
  print_partial_compiled_pattern (buffer, buffer + bufp->used);
885
  printf ("%ld bytes used/%ld bytes allocated.\n",
886
          bufp->used, bufp->allocated);
887
 
888
  if (bufp->fastmap_accurate && bufp->fastmap)
889
    {
890
      printf ("fastmap: ");
891
      print_fastmap (bufp->fastmap);
892
    }
893
 
894
  printf ("re_nsub: %d\t", bufp->re_nsub);
895
  printf ("regs_alloc: %d\t", bufp->regs_allocated);
896
  printf ("can_be_null: %d\t", bufp->can_be_null);
897
  printf ("newline_anchor: %d\n", bufp->newline_anchor);
898
  printf ("no_sub: %d\t", bufp->no_sub);
899
  printf ("not_bol: %d\t", bufp->not_bol);
900
  printf ("not_eol: %d\t", bufp->not_eol);
901
  printf ("syntax: %lx\n", bufp->syntax);
902
  /* Perhaps we should print the translate table?  */
903
}
904
 
905
 
906
void
907
print_double_string (where, string1, size1, string2, size2)
908
    const char *where;
909
    const char *string1;
910
    const char *string2;
911
    int size1;
912
    int size2;
913
{
914
  int this_char;
915
 
916
  if (where == NULL)
917
    printf ("(null)");
918
  else
919
    {
920
      if (FIRST_STRING_P (where))
921
        {
922
          for (this_char = where - string1; this_char < size1; this_char++)
923
            putchar (string1[this_char]);
924
 
925
          where = string2;
926
        }
927
 
928
      for (this_char = where - string2; this_char < size2; this_char++)
929
        putchar (string2[this_char]);
930
    }
931
}
932
 
933
void
934
printchar (c)
935
     int c;
936
{
937
  putc (c, stderr);
938
}
939
 
940
#else /* not DEBUG */
941
 
942
# undef assert
943
# define assert(e)
944
 
945
# define DEBUG_STATEMENT(e)
946
# define DEBUG_PRINT1(x)
947
# define DEBUG_PRINT2(x1, x2)
948
# define DEBUG_PRINT3(x1, x2, x3)
949
# define DEBUG_PRINT4(x1, x2, x3, x4)
950
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
951
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
952
 
953
#endif /* not DEBUG */
954
 
955
/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
956
   also be assigned to arbitrarily: each pattern buffer stores its own
957
   syntax, so it can be changed between regex compilations.  */
958
/* This has no initializer because initialized variables in Emacs
959
   become read-only after dumping.  */
960
reg_syntax_t re_syntax_options;
961
 
962
 
963
/* Specify the precise syntax of regexps for compilation.  This provides
964
   for compatibility for various utilities which historically have
965
   different, incompatible syntaxes.
966
 
967
   The argument SYNTAX is a bit mask comprised of the various bits
968
   defined in gnu-regex.h.  We return the old syntax.  */
969
 
970
reg_syntax_t
971
re_set_syntax (syntax)
972
    reg_syntax_t syntax;
973
{
974
  reg_syntax_t ret = re_syntax_options;
975
 
976
  re_syntax_options = syntax;
977
#ifdef DEBUG
978
  if (syntax & RE_DEBUG)
979
    debug = 1;
980
  else if (debug) /* was on but now is not */
981
    debug = 0;
982
#endif /* DEBUG */
983
  return ret;
984
}
985
#ifdef _LIBC
986
weak_alias (__re_set_syntax, re_set_syntax)
987
#endif
988
 
989
/* This table gives an error message for each of the error codes listed
990
   in gnu-regex.h.  Obviously the order here has to be same as there.
991
   POSIX doesn't require that we do anything for REG_NOERROR,
992
   but why not be nice?  */
993
 
994
static const char *re_error_msgid[] =
995
  {
996
    gettext_noop ("Success"),   /* REG_NOERROR */
997
    gettext_noop ("No match"),  /* REG_NOMATCH */
998
    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
999
    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1000
    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1001
    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1002
    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1003
    gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
1004
    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1005
    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1006
    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1007
    gettext_noop ("Invalid range end"), /* REG_ERANGE */
1008
    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1009
    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1010
    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1011
    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1012
    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1013
  };
1014
 
1015
/* Avoiding alloca during matching, to placate r_alloc.  */
1016
 
1017
/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1018
   searching and matching functions should not call alloca.  On some
1019
   systems, alloca is implemented in terms of malloc, and if we're
1020
   using the relocating allocator routines, then malloc could cause a
1021
   relocation, which might (if the strings being searched are in the
1022
   ralloc heap) shift the data out from underneath the regexp
1023
   routines.
1024
 
1025
   Here's another reason to avoid allocation: Emacs
1026
   processes input from X in a signal handler; processing X input may
1027
   call malloc; if input arrives while a matching routine is calling
1028
   malloc, then we're scrod.  But Emacs can't just block input while
1029
   calling matching routines; then we don't notice interrupts when
1030
   they come in.  So, Emacs blocks input around all regexp calls
1031
   except the matching calls, which it leaves unprotected, in the
1032
   faith that they will not malloc.  */
1033
 
1034
/* Normally, this is fine.  */
1035
#define MATCH_MAY_ALLOCATE
1036
 
1037
/* When using GNU C, we are not REALLY using the C alloca, no matter
1038
   what config.h may say.  So don't take precautions for it.  */
1039
#ifdef __GNUC__
1040
# undef C_ALLOCA
1041
#endif
1042
 
1043
/* The match routines may not allocate if (1) they would do it with malloc
1044
   and (2) it's not safe for them to use malloc.
1045
   Note that if REL_ALLOC is defined, matching would not use malloc for the
1046
   failure stack, but we would still use it for the register vectors;
1047
   so REL_ALLOC should not affect this.  */
1048
#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1049
# undef MATCH_MAY_ALLOCATE
1050
#endif
1051
 
1052
 
1053
/* Failure stack declarations and macros; both re_compile_fastmap and
1054
   re_match_2 use a failure stack.  These have to be macros because of
1055
   REGEX_ALLOCATE_STACK.  */
1056
 
1057
 
1058
/* Number of failure points for which to initially allocate space
1059
   when matching.  If this number is exceeded, we allocate more
1060
   space, so it is not a hard limit.  */
1061
#ifndef INIT_FAILURE_ALLOC
1062
# define INIT_FAILURE_ALLOC 5
1063
#endif
1064
 
1065
/* Roughly the maximum number of failure points on the stack.  Would be
1066
   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1067
   This is a variable only so users of regex can assign to it; we never
1068
   change it ourselves.  */
1069
 
1070
#ifdef INT_IS_16BIT
1071
 
1072
# if defined MATCH_MAY_ALLOCATE
1073
/* 4400 was enough to cause a crash on Alpha OSF/1,
1074
   whose default stack limit is 2mb.  */
1075
long int re_max_failures = 4000;
1076
# else
1077
long int re_max_failures = 2000;
1078
# endif
1079
 
1080
union fail_stack_elt
1081
{
1082
  unsigned char *pointer;
1083
  long int integer;
1084
};
1085
 
1086
typedef union fail_stack_elt fail_stack_elt_t;
1087
 
1088
typedef struct
1089
{
1090
  fail_stack_elt_t *stack;
1091
  unsigned long int size;
1092
  unsigned long int avail;              /* Offset of next open position.  */
1093
} fail_stack_type;
1094
 
1095
#else /* not INT_IS_16BIT */
1096
 
1097
# if defined MATCH_MAY_ALLOCATE
1098
/* 4400 was enough to cause a crash on Alpha OSF/1,
1099
   whose default stack limit is 2mb.  */
1100
int re_max_failures = 20000;
1101
# else
1102
int re_max_failures = 2000;
1103
# endif
1104
 
1105
union fail_stack_elt
1106
{
1107
  unsigned char *pointer;
1108
  int integer;
1109
};
1110
 
1111
typedef union fail_stack_elt fail_stack_elt_t;
1112
 
1113
typedef struct
1114
{
1115
  fail_stack_elt_t *stack;
1116
  unsigned size;
1117
  unsigned avail;                       /* Offset of next open position.  */
1118
} fail_stack_type;
1119
 
1120
#endif /* INT_IS_16BIT */
1121
 
1122
#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1123
#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1124
#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1125
 
1126
 
1127
/* Define macros to initialize and free the failure stack.
1128
   Do `return -2' if the alloc fails.  */
1129
 
1130
#ifdef MATCH_MAY_ALLOCATE
1131
# define INIT_FAIL_STACK()                                              \
1132
  do {                                                                  \
1133
    fail_stack.stack = (fail_stack_elt_t *)                             \
1134
      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1135
                                                                        \
1136
    if (fail_stack.stack == NULL)                                       \
1137
      return -2;                                                        \
1138
                                                                        \
1139
    fail_stack.size = INIT_FAILURE_ALLOC;                               \
1140
    fail_stack.avail = 0;                                                \
1141
  } while (0)
1142
 
1143
# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1144
#else
1145
# define INIT_FAIL_STACK()                                              \
1146
  do {                                                                  \
1147
    fail_stack.avail = 0;                                                \
1148
  } while (0)
1149
 
1150
# define RESET_FAIL_STACK()
1151
#endif
1152
 
1153
 
1154
/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1155
 
1156
   Return 1 if succeeds, and 0 if either ran out of memory
1157
   allocating space for it or it was already too large.
1158
 
1159
   REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1160
 
1161
#define DOUBLE_FAIL_STACK(fail_stack)                                   \
1162
  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
1163
   ? 0                                                                   \
1164
   : ((fail_stack).stack = (fail_stack_elt_t *)                         \
1165
        REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1166
          (fail_stack).size * sizeof (fail_stack_elt_t),                \
1167
          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
1168
                                                                        \
1169
      (fail_stack).stack == NULL                                        \
1170
      ? 0                                                                \
1171
      : ((fail_stack).size <<= 1,                                       \
1172
         1)))
1173
 
1174
 
1175
/* Push pointer POINTER on FAIL_STACK.
1176
   Return 1 if was able to do so and 0 if ran out of memory allocating
1177
   space to do so.  */
1178
#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
1179
  ((FAIL_STACK_FULL ()                                                  \
1180
    && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
1181
   ? 0                                                                   \
1182
   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
1183
      1))
1184
 
1185
/* Push a pointer value onto the failure stack.
1186
   Assumes the variable `fail_stack'.  Probably should only
1187
   be called from within `PUSH_FAILURE_POINT'.  */
1188
#define PUSH_FAILURE_POINTER(item)                                      \
1189
  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1190
 
1191
/* This pushes an integer-valued item onto the failure stack.
1192
   Assumes the variable `fail_stack'.  Probably should only
1193
   be called from within `PUSH_FAILURE_POINT'.  */
1194
#define PUSH_FAILURE_INT(item)                                  \
1195
  fail_stack.stack[fail_stack.avail++].integer = (item)
1196
 
1197
/* Push a fail_stack_elt_t value onto the failure stack.
1198
   Assumes the variable `fail_stack'.  Probably should only
1199
   be called from within `PUSH_FAILURE_POINT'.  */
1200
#define PUSH_FAILURE_ELT(item)                                  \
1201
  fail_stack.stack[fail_stack.avail++] =  (item)
1202
 
1203
/* These three POP... operations complement the three PUSH... operations.
1204
   All assume that `fail_stack' is nonempty.  */
1205
#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1206
#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1207
#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1208
 
1209
/* Used to omit pushing failure point id's when we're not debugging.  */
1210
#ifdef DEBUG
1211
# define DEBUG_PUSH PUSH_FAILURE_INT
1212
# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1213
#else
1214
# define DEBUG_PUSH(item)
1215
# define DEBUG_POP(item_addr)
1216
#endif
1217
 
1218
 
1219
/* Push the information about the state we will need
1220
   if we ever fail back to it.
1221
 
1222
   Requires variables fail_stack, regstart, regend, reg_info, and
1223
   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1224
   be declared.
1225
 
1226
   Does `return FAILURE_CODE' if runs out of memory.  */
1227
 
1228
#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1229
  do {                                                                  \
1230
    char *destination;                                                  \
1231
    /* Must be int, so when we don't save any registers, the arithmetic \
1232
       of 0 + -1 isn't done as unsigned.  */                            \
1233
    /* Can't be int, since there is not a shred of a guarantee that int \
1234
       is wide enough to hold a value of something to which pointer can \
1235
       be assigned */                                                   \
1236
    active_reg_t this_reg;                                              \
1237
                                                                        \
1238
    DEBUG_STATEMENT (failure_id++);                                     \
1239
    DEBUG_STATEMENT (nfailure_points_pushed++);                         \
1240
    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
1241
    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1242
    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1243
                                                                        \
1244
    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);          \
1245
    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
1246
                                                                        \
1247
    /* Ensure we have enough space allocated for what we will push.  */ \
1248
    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
1249
      {                                                                 \
1250
        if (!DOUBLE_FAIL_STACK (fail_stack))                            \
1251
          return failure_code;                                          \
1252
                                                                        \
1253
        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
1254
                       (fail_stack).size);                              \
1255
        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1256
      }                                                                 \
1257
                                                                        \
1258
    /* Push the info, starting with the registers.  */                  \
1259
    DEBUG_PRINT1 ("\n");                                                \
1260
                                                                        \
1261
    if (1)                                                              \
1262
      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1263
           this_reg++)                                                  \
1264
        {                                                               \
1265
          DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);              \
1266
          DEBUG_STATEMENT (num_regs_pushed++);                          \
1267
                                                                        \
1268
          DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);         \
1269
          PUSH_FAILURE_POINTER (regstart[this_reg]);                    \
1270
                                                                        \
1271
          DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);             \
1272
          PUSH_FAILURE_POINTER (regend[this_reg]);                      \
1273
                                                                        \
1274
          DEBUG_PRINT2 ("    info: %p\n      ",                         \
1275
                        reg_info[this_reg].word.pointer);               \
1276
          DEBUG_PRINT2 (" match_null=%d",                               \
1277
                        REG_MATCH_NULL_STRING_P (reg_info[this_reg]));  \
1278
          DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));  \
1279
          DEBUG_PRINT2 (" matched_something=%d",                        \
1280
                        MATCHED_SOMETHING (reg_info[this_reg]));        \
1281
          DEBUG_PRINT2 (" ever_matched=%d",                             \
1282
                        EVER_MATCHED_SOMETHING (reg_info[this_reg]));   \
1283
          DEBUG_PRINT1 ("\n");                                          \
1284
          PUSH_FAILURE_ELT (reg_info[this_reg].word);                   \
1285
        }                                                               \
1286
                                                                        \
1287
    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1288
    PUSH_FAILURE_INT (lowest_active_reg);                               \
1289
                                                                        \
1290
    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1291
    PUSH_FAILURE_INT (highest_active_reg);                              \
1292
                                                                        \
1293
    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);            \
1294
    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
1295
    PUSH_FAILURE_POINTER (pattern_place);                               \
1296
                                                                        \
1297
    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);              \
1298
    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1299
                                 size2);                                \
1300
    DEBUG_PRINT1 ("'\n");                                               \
1301
    PUSH_FAILURE_POINTER (string_place);                                \
1302
                                                                        \
1303
    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
1304
    DEBUG_PUSH (failure_id);                                            \
1305
  } while (0)
1306
 
1307
/* This is the number of items that are pushed and popped on the stack
1308
   for each register.  */
1309
#define NUM_REG_ITEMS  3
1310
 
1311
/* Individual items aside from the registers.  */
1312
#ifdef DEBUG
1313
# define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1314
#else
1315
# define NUM_NONREG_ITEMS 4
1316
#endif
1317
 
1318
/* We push at most this many items on the stack.  */
1319
/* We used to use (num_regs - 1), which is the number of registers
1320
   this regexp will save; but that was changed to 5
1321
   to avoid stack overflow for a regexp with lots of parens.  */
1322
#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1323
 
1324
/* We actually push this many items.  */
1325
#define NUM_FAILURE_ITEMS                               \
1326
  (((0                                                   \
1327
     ? 0 : highest_active_reg - lowest_active_reg + 1)   \
1328
    * NUM_REG_ITEMS)                                    \
1329
   + NUM_NONREG_ITEMS)
1330
 
1331
/* How many items can still be added to the stack without overflowing it.  */
1332
#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1333
 
1334
 
1335
/* Pops what PUSH_FAIL_STACK pushes.
1336
 
1337
   We restore into the parameters, all of which should be lvalues:
1338
     STR -- the saved data position.
1339
     PAT -- the saved pattern position.
1340
     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1341
     REGSTART, REGEND -- arrays of string positions.
1342
     REG_INFO -- array of information about each subexpression.
1343
 
1344
   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1345
   `pend', `string1', `size1', `string2', and `size2'.  */
1346
 
1347
#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1348
{                                                                       \
1349
  DEBUG_STATEMENT (unsigned failure_id;)                                \
1350
  active_reg_t this_reg;                                                \
1351
  const unsigned char *string_temp;                                     \
1352
                                                                        \
1353
  assert (!FAIL_STACK_EMPTY ());                                        \
1354
                                                                        \
1355
  /* Remove failure points and point to how many regs pushed.  */       \
1356
  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
1357
  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
1358
  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
1359
                                                                        \
1360
  assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
1361
                                                                        \
1362
  DEBUG_POP (&failure_id);                                              \
1363
  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
1364
                                                                        \
1365
  /* If the saved string location is NULL, it came from an              \
1366
     on_failure_keep_string_jump opcode, and we want to throw away the  \
1367
     saved NULL, thus retaining our current position in the string.  */ \
1368
  string_temp = POP_FAILURE_POINTER ();                                 \
1369
  if (string_temp != NULL)                                              \
1370
    str = (const char *) string_temp;                                   \
1371
                                                                        \
1372
  DEBUG_PRINT2 ("  Popping string %p: `", str);                         \
1373
  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
1374
  DEBUG_PRINT1 ("'\n");                                                 \
1375
                                                                        \
1376
  pat = (unsigned char *) POP_FAILURE_POINTER ();                       \
1377
  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);                        \
1378
  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
1379
                                                                        \
1380
  /* Restore register info.  */                                         \
1381
  high_reg = (active_reg_t) POP_FAILURE_INT ();                         \
1382
  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);          \
1383
                                                                        \
1384
  low_reg = (active_reg_t) POP_FAILURE_INT ();                          \
1385
  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);           \
1386
                                                                        \
1387
  if (1)                                                                \
1388
    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)          \
1389
      {                                                                 \
1390
        DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);              \
1391
                                                                        \
1392
        reg_info[this_reg].word = POP_FAILURE_ELT ();                   \
1393
        DEBUG_PRINT2 ("      info: %p\n",                               \
1394
                      reg_info[this_reg].word.pointer);                 \
1395
                                                                        \
1396
        regend[this_reg] = (const char *) POP_FAILURE_POINTER ();       \
1397
        DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);             \
1398
                                                                        \
1399
        regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();     \
1400
        DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);         \
1401
      }                                                                 \
1402
  else                                                                  \
1403
    {                                                                   \
1404
      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1405
        {                                                               \
1406
          reg_info[this_reg].word.integer = 0;                           \
1407
          regend[this_reg] = 0;                                          \
1408
          regstart[this_reg] = 0;                                        \
1409
        }                                                               \
1410
      highest_active_reg = high_reg;                                    \
1411
    }                                                                   \
1412
                                                                        \
1413
  set_regs_matched_done = 0;                                             \
1414
  DEBUG_STATEMENT (nfailure_points_popped++);                           \
1415
} /* POP_FAILURE_POINT */
1416
 
1417
 
1418
 
1419
/* Structure for per-register (a.k.a. per-group) information.
1420
   Other register information, such as the
1421
   starting and ending positions (which are addresses), and the list of
1422
   inner groups (which is a bits list) are maintained in separate
1423
   variables.
1424
 
1425
   We are making a (strictly speaking) nonportable assumption here: that
1426
   the compiler will pack our bit fields into something that fits into
1427
   the type of `word', i.e., is something that fits into one item on the
1428
   failure stack.  */
1429
 
1430
 
1431
/* Declarations and macros for re_match_2.  */
1432
 
1433
typedef union
1434
{
1435
  fail_stack_elt_t word;
1436
  struct
1437
  {
1438
      /* This field is one if this group can match the empty string,
1439
         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1440
#define MATCH_NULL_UNSET_VALUE 3
1441
    unsigned match_null_string_p : 2;
1442
    unsigned is_active : 1;
1443
    unsigned matched_something : 1;
1444
    unsigned ever_matched_something : 1;
1445
  } bits;
1446
} register_info_type;
1447
 
1448
#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1449
#define IS_ACTIVE(R)  ((R).bits.is_active)
1450
#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1451
#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1452
 
1453
 
1454
/* Call this when have matched a real character; it sets `matched' flags
1455
   for the subexpressions which we are currently inside.  Also records
1456
   that those subexprs have matched.  */
1457
#define SET_REGS_MATCHED()                                              \
1458
  do                                                                    \
1459
    {                                                                   \
1460
      if (!set_regs_matched_done)                                       \
1461
        {                                                               \
1462
          active_reg_t r;                                               \
1463
          set_regs_matched_done = 1;                                    \
1464
          for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
1465
            {                                                           \
1466
              MATCHED_SOMETHING (reg_info[r])                           \
1467
                = EVER_MATCHED_SOMETHING (reg_info[r])                  \
1468
                = 1;                                                    \
1469
            }                                                           \
1470
        }                                                               \
1471
    }                                                                   \
1472
  while (0)
1473
 
1474
/* Registers are set to a sentinel when they haven't yet matched.  */
1475
static char reg_unset_dummy;
1476
#define REG_UNSET_VALUE (&reg_unset_dummy)
1477
#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1478
 
1479
/* Subroutine declarations and macros for regex_compile.  */
1480
 
1481
static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1482
                                              reg_syntax_t syntax,
1483
                                              struct re_pattern_buffer *bufp));
1484
static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1485
static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1486
                                 int arg1, int arg2));
1487
static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1488
                                  int arg, unsigned char *end));
1489
static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1490
                                  int arg1, int arg2, unsigned char *end));
1491
static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1492
                                           reg_syntax_t syntax));
1493
static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1494
                                           reg_syntax_t syntax));
1495
static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1496
                                              const char *pend,
1497
                                              char *translate,
1498
                                              reg_syntax_t syntax,
1499
                                              unsigned char *b));
1500
 
1501
/* Fetch the next character in the uncompiled pattern---translating it
1502
   if necessary.  Also cast from a signed character in the constant
1503
   string passed to us by the user to an unsigned char that we can use
1504
   as an array index (in, e.g., `translate').  */
1505
#ifndef PATFETCH
1506
# define PATFETCH(c)                                                    \
1507
  do {if (p == pend) return REG_EEND;                                   \
1508
    c = (unsigned char) *p++;                                           \
1509
    if (translate) c = (unsigned char) translate[c];                    \
1510
  } while (0)
1511
#endif
1512
 
1513
/* Fetch the next character in the uncompiled pattern, with no
1514
   translation.  */
1515
#define PATFETCH_RAW(c)                                                 \
1516
  do {if (p == pend) return REG_EEND;                                   \
1517
    c = (unsigned char) *p++;                                           \
1518
  } while (0)
1519
 
1520
/* Go backwards one character in the pattern.  */
1521
#define PATUNFETCH p--
1522
 
1523
 
1524
/* If `translate' is non-null, return translate[D], else just D.  We
1525
   cast the subscript to translate because some data is declared as
1526
   `char *', to avoid warnings when a string constant is passed.  But
1527
   when we use a character as a subscript we must make it unsigned.  */
1528
#ifndef TRANSLATE
1529
# define TRANSLATE(d) \
1530
  (translate ? (char) translate[(unsigned char) (d)] : (d))
1531
#endif
1532
 
1533
 
1534
/* Macros for outputting the compiled pattern into `buffer'.  */
1535
 
1536
/* If the buffer isn't allocated when it comes in, use this.  */
1537
#define INIT_BUF_SIZE  32
1538
 
1539
/* Make sure we have at least N more bytes of space in buffer.  */
1540
#define GET_BUFFER_SPACE(n)                                             \
1541
    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)  \
1542
      EXTEND_BUFFER ()
1543
 
1544
/* Make sure we have one more byte of buffer space and then add C to it.  */
1545
#define BUF_PUSH(c)                                                     \
1546
  do {                                                                  \
1547
    GET_BUFFER_SPACE (1);                                               \
1548
    *b++ = (unsigned char) (c);                                         \
1549
  } while (0)
1550
 
1551
 
1552
/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1553
#define BUF_PUSH_2(c1, c2)                                              \
1554
  do {                                                                  \
1555
    GET_BUFFER_SPACE (2);                                               \
1556
    *b++ = (unsigned char) (c1);                                        \
1557
    *b++ = (unsigned char) (c2);                                        \
1558
  } while (0)
1559
 
1560
 
1561
/* As with BUF_PUSH_2, except for three bytes.  */
1562
#define BUF_PUSH_3(c1, c2, c3)                                          \
1563
  do {                                                                  \
1564
    GET_BUFFER_SPACE (3);                                               \
1565
    *b++ = (unsigned char) (c1);                                        \
1566
    *b++ = (unsigned char) (c2);                                        \
1567
    *b++ = (unsigned char) (c3);                                        \
1568
  } while (0)
1569
 
1570
 
1571
/* Store a jump with opcode OP at LOC to location TO.  We store a
1572
   relative address offset by the three bytes the jump itself occupies.  */
1573
#define STORE_JUMP(op, loc, to) \
1574
  store_op1 (op, loc, (int) ((to) - (loc) - 3))
1575
 
1576
/* Likewise, for a two-argument jump.  */
1577
#define STORE_JUMP2(op, loc, to, arg) \
1578
  store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1579
 
1580
/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1581
#define INSERT_JUMP(op, loc, to) \
1582
  insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1583
 
1584
/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1585
#define INSERT_JUMP2(op, loc, to, arg) \
1586
  insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1587
 
1588
 
1589
/* This is not an arbitrary limit: the arguments which represent offsets
1590
   into the pattern are two bytes long.  So if 2^16 bytes turns out to
1591
   be too small, many things would have to change.  */
1592
/* Any other compiler which, like MSC, has allocation limit below 2^16
1593
   bytes will have to use approach similar to what was done below for
1594
   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1595
   reallocating to 0 bytes.  Such thing is not going to work too well.
1596
   You have been warned!!  */
1597
#if defined _MSC_VER  && !defined WIN32
1598
/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1599
   The REALLOC define eliminates a flurry of conversion warnings,
1600
   but is not required. */
1601
# define MAX_BUF_SIZE  65500L
1602
# define REALLOC(p,s) realloc ((p), (size_t) (s))
1603
#else
1604
# define MAX_BUF_SIZE (1L << 16)
1605
# define REALLOC(p,s) realloc ((p), (s))
1606
#endif
1607
 
1608
/* Extend the buffer by twice its current size via realloc and
1609
   reset the pointers that pointed into the old block to point to the
1610
   correct places in the new one.  If extending the buffer results in it
1611
   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1612
#define EXTEND_BUFFER()                                                 \
1613
  do {                                                                  \
1614
    unsigned char *old_buffer = bufp->buffer;                           \
1615
    if (bufp->allocated == MAX_BUF_SIZE)                                \
1616
      return REG_ESIZE;                                                 \
1617
    bufp->allocated <<= 1;                                              \
1618
    if (bufp->allocated > MAX_BUF_SIZE)                                 \
1619
      bufp->allocated = MAX_BUF_SIZE;                                   \
1620
    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1621
    if (bufp->buffer == NULL)                                           \
1622
      return REG_ESPACE;                                                \
1623
    /* If the buffer moved, move all the pointers into it.  */          \
1624
    if (old_buffer != bufp->buffer)                                     \
1625
      {                                                                 \
1626
        b = (b - old_buffer) + bufp->buffer;                            \
1627
        begalt = (begalt - old_buffer) + bufp->buffer;                  \
1628
        if (fixup_alt_jump)                                             \
1629
          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1630
        if (laststart)                                                  \
1631
          laststart = (laststart - old_buffer) + bufp->buffer;          \
1632
        if (pending_exact)                                              \
1633
          pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
1634
      }                                                                 \
1635
  } while (0)
1636
 
1637
 
1638
/* Since we have one byte reserved for the register number argument to
1639
   {start,stop}_memory, the maximum number of groups we can report
1640
   things about is what fits in that byte.  */
1641
#define MAX_REGNUM 255
1642
 
1643
/* But patterns can have more than `MAX_REGNUM' registers.  We just
1644
   ignore the excess.  */
1645
typedef unsigned regnum_t;
1646
 
1647
 
1648
/* Macros for the compile stack.  */
1649
 
1650
/* Since offsets can go either forwards or backwards, this type needs to
1651
   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1652
/* int may be not enough when sizeof(int) == 2.  */
1653
typedef long pattern_offset_t;
1654
 
1655
typedef struct
1656
{
1657
  pattern_offset_t begalt_offset;
1658
  pattern_offset_t fixup_alt_jump;
1659
  pattern_offset_t inner_group_offset;
1660
  pattern_offset_t laststart_offset;
1661
  regnum_t regnum;
1662
} compile_stack_elt_t;
1663
 
1664
 
1665
typedef struct
1666
{
1667
  compile_stack_elt_t *stack;
1668
  unsigned size;
1669
  unsigned avail;                       /* Offset of next open position.  */
1670
} compile_stack_type;
1671
 
1672
 
1673
#define INIT_COMPILE_STACK_SIZE 32
1674
 
1675
#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1676
#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1677
 
1678
/* The next available element.  */
1679
#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1680
 
1681
 
1682
/* Set the bit for character C in a list.  */
1683
#define SET_LIST_BIT(c)                               \
1684
  (b[((unsigned char) (c)) / BYTEWIDTH]               \
1685
   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1686
 
1687
 
1688
/* Get the next unsigned number in the uncompiled pattern.  */
1689
#define GET_UNSIGNED_NUMBER(num)                                        \
1690
  { if (p != pend)                                                      \
1691
     {                                                                  \
1692
       PATFETCH (c);                                                    \
1693
       while (ISDIGIT (c))                                              \
1694
         {                                                              \
1695
           if (num < 0)                                                  \
1696
              num = 0;                                                   \
1697
           num = num * 10 + c - '0';                                    \
1698
           if (p == pend)                                               \
1699
              break;                                                    \
1700
           PATFETCH (c);                                                \
1701
         }                                                              \
1702
       }                                                                \
1703
    }
1704
 
1705
/* Use this only if they have btowc(), since wctype() is used below
1706
   together with btowc().  btowc() is defined in the 1994 Amendment 1
1707
   to ISO C and may not be present on systems where we have wchar.h
1708
   and wctype.h.  */
1709
#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_BTOWC)
1710
/* The GNU C library provides support for user-defined character classes
1711
   and the functions from ISO C amendement 1.  */
1712
# ifdef CHARCLASS_NAME_MAX
1713
#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1714
# else
1715
/* This shouldn't happen but some implementation might still have this
1716
   problem.  Use a reasonable default value.  */
1717
#  define CHAR_CLASS_MAX_LENGTH 256
1718
# endif
1719
 
1720
# ifdef _LIBC
1721
#  define IS_CHAR_CLASS(string) __wctype (string)
1722
# else
1723
#  define IS_CHAR_CLASS(string) wctype (string)
1724
# endif
1725
#else
1726
# define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1727
 
1728
# define IS_CHAR_CLASS(string)                                          \
1729
   (STREQ (string, "alpha") || STREQ (string, "upper")                  \
1730
    || STREQ (string, "lower") || STREQ (string, "digit")               \
1731
    || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
1732
    || STREQ (string, "space") || STREQ (string, "print")               \
1733
    || STREQ (string, "punct") || STREQ (string, "graph")               \
1734
    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1735
#endif
1736
 
1737
#ifndef MATCH_MAY_ALLOCATE
1738
 
1739
/* If we cannot allocate large objects within re_match_2_internal,
1740
   we make the fail stack and register vectors global.
1741
   The fail stack, we grow to the maximum size when a regexp
1742
   is compiled.
1743
   The register vectors, we adjust in size each time we
1744
   compile a regexp, according to the number of registers it needs.  */
1745
 
1746
static fail_stack_type fail_stack;
1747
 
1748
/* Size with which the following vectors are currently allocated.
1749
   That is so we can make them bigger as needed,
1750
   but never make them smaller.  */
1751
static int regs_allocated_size;
1752
 
1753
static const char **     regstart, **     regend;
1754
static const char ** old_regstart, ** old_regend;
1755
static const char **best_regstart, **best_regend;
1756
static register_info_type *reg_info;
1757
static const char **reg_dummy;
1758
static register_info_type *reg_info_dummy;
1759
 
1760
/* Make the register vectors big enough for NUM_REGS registers,
1761
   but don't make them smaller.  */
1762
 
1763
static
1764
regex_grow_registers (num_regs)
1765
     int num_regs;
1766
{
1767
  if (num_regs > regs_allocated_size)
1768
    {
1769
      RETALLOC_IF (regstart,     num_regs, const char *);
1770
      RETALLOC_IF (regend,       num_regs, const char *);
1771
      RETALLOC_IF (old_regstart, num_regs, const char *);
1772
      RETALLOC_IF (old_regend,   num_regs, const char *);
1773
      RETALLOC_IF (best_regstart, num_regs, const char *);
1774
      RETALLOC_IF (best_regend,  num_regs, const char *);
1775
      RETALLOC_IF (reg_info,     num_regs, register_info_type);
1776
      RETALLOC_IF (reg_dummy,    num_regs, const char *);
1777
      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1778
 
1779
      regs_allocated_size = num_regs;
1780
    }
1781
}
1782
 
1783
#endif /* not MATCH_MAY_ALLOCATE */
1784
 
1785
static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1786
                                                 compile_stack,
1787
                                                 regnum_t regnum));
1788
 
1789
/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1790
   Returns one of error codes defined in `gnu-regex.h', or zero for success.
1791
 
1792
   Assumes the `allocated' (and perhaps `buffer') and `translate'
1793
   fields are set in BUFP on entry.
1794
 
1795
   If it succeeds, results are put in BUFP (if it returns an error, the
1796
   contents of BUFP are undefined):
1797
     `buffer' is the compiled pattern;
1798
     `syntax' is set to SYNTAX;
1799
     `used' is set to the length of the compiled pattern;
1800
     `fastmap_accurate' is zero;
1801
     `re_nsub' is the number of subexpressions in PATTERN;
1802
     `not_bol' and `not_eol' are zero;
1803
 
1804
   The `fastmap' and `newline_anchor' fields are neither
1805
   examined nor set.  */
1806
 
1807
/* Return, freeing storage we allocated.  */
1808
#define FREE_STACK_RETURN(value)                \
1809
  return (free (compile_stack.stack), value)
1810
 
1811
static reg_errcode_t
1812
regex_compile (pattern, size, syntax, bufp)
1813
     const char *pattern;
1814
     size_t size;
1815
     reg_syntax_t syntax;
1816
     struct re_pattern_buffer *bufp;
1817
{
1818
  /* We fetch characters from PATTERN here.  Even though PATTERN is
1819
     `char *' (i.e., signed), we declare these variables as unsigned, so
1820
     they can be reliably used as array indices.  */
1821
  register unsigned char c, c1;
1822
 
1823
  /* A random temporary spot in PATTERN.  */
1824
  const char *p1;
1825
 
1826
  /* Points to the end of the buffer, where we should append.  */
1827
  register unsigned char *b;
1828
 
1829
  /* Keeps track of unclosed groups.  */
1830
  compile_stack_type compile_stack;
1831
 
1832
  /* Points to the current (ending) position in the pattern.  */
1833
  const char *p = pattern;
1834
  const char *pend = pattern + size;
1835
 
1836
  /* How to translate the characters in the pattern.  */
1837
  RE_TRANSLATE_TYPE translate = bufp->translate;
1838
 
1839
  /* Address of the count-byte of the most recently inserted `exactn'
1840
     command.  This makes it possible to tell if a new exact-match
1841
     character can be added to that command or if the character requires
1842
     a new `exactn' command.  */
1843
  unsigned char *pending_exact = 0;
1844
 
1845
  /* Address of start of the most recently finished expression.
1846
     This tells, e.g., postfix * where to find the start of its
1847
     operand.  Reset at the beginning of groups and alternatives.  */
1848
  unsigned char *laststart = 0;
1849
 
1850
  /* Address of beginning of regexp, or inside of last group.  */
1851
  unsigned char *begalt;
1852
 
1853
  /* Place in the uncompiled pattern (i.e., the {) to
1854
     which to go back if the interval is invalid.  */
1855
  const char *beg_interval;
1856
 
1857
  /* Address of the place where a forward jump should go to the end of
1858
     the containing expression.  Each alternative of an `or' -- except the
1859
     last -- ends with a forward jump of this sort.  */
1860
  unsigned char *fixup_alt_jump = 0;
1861
 
1862
  /* Counts open-groups as they are encountered.  Remembered for the
1863
     matching close-group on the compile stack, so the same register
1864
     number is put in the stop_memory as the start_memory.  */
1865
  regnum_t regnum = 0;
1866
 
1867
#ifdef DEBUG
1868
  DEBUG_PRINT1 ("\nCompiling pattern: ");
1869
  if (debug)
1870
    {
1871
      unsigned debug_count;
1872
 
1873
      for (debug_count = 0; debug_count < size; debug_count++)
1874
        putchar (pattern[debug_count]);
1875
      putchar ('\n');
1876
    }
1877
#endif /* DEBUG */
1878
 
1879
  /* Initialize the compile stack.  */
1880
  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1881
  if (compile_stack.stack == NULL)
1882
    return REG_ESPACE;
1883
 
1884
  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1885
  compile_stack.avail = 0;
1886
 
1887
  /* Initialize the pattern buffer.  */
1888
  bufp->syntax = syntax;
1889
  bufp->fastmap_accurate = 0;
1890
  bufp->not_bol = bufp->not_eol = 0;
1891
 
1892
  /* Set `used' to zero, so that if we return an error, the pattern
1893
     printer (for debugging) will think there's no pattern.  We reset it
1894
     at the end.  */
1895
  bufp->used = 0;
1896
 
1897
  /* Always count groups, whether or not bufp->no_sub is set.  */
1898
  bufp->re_nsub = 0;
1899
 
1900
#if !defined emacs && !defined SYNTAX_TABLE
1901
  /* Initialize the syntax table.  */
1902
   init_syntax_once ();
1903
#endif
1904
 
1905
  if (bufp->allocated == 0)
1906
    {
1907
      if (bufp->buffer)
1908
        { /* If zero allocated, but buffer is non-null, try to realloc
1909
             enough space.  This loses if buffer's address is bogus, but
1910
             that is the user's responsibility.  */
1911
          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1912
        }
1913
      else
1914
        { /* Caller did not allocate a buffer.  Do it for them.  */
1915
          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1916
        }
1917
      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1918
 
1919
      bufp->allocated = INIT_BUF_SIZE;
1920
    }
1921
 
1922
  begalt = b = bufp->buffer;
1923
 
1924
  /* Loop through the uncompiled pattern until we're at the end.  */
1925
  while (p != pend)
1926
    {
1927
      PATFETCH (c);
1928
 
1929
      switch (c)
1930
        {
1931
        case '^':
1932
          {
1933
            if (   /* If at start of pattern, it's an operator.  */
1934
                   p == pattern + 1
1935
                   /* If context independent, it's an operator.  */
1936
                || syntax & RE_CONTEXT_INDEP_ANCHORS
1937
                   /* Otherwise, depends on what's come before.  */
1938
                || at_begline_loc_p (pattern, p, syntax))
1939
              BUF_PUSH (begline);
1940
            else
1941
              goto normal_char;
1942
          }
1943
          break;
1944
 
1945
 
1946
        case '$':
1947
          {
1948
            if (   /* If at end of pattern, it's an operator.  */
1949
                   p == pend
1950
                   /* If context independent, it's an operator.  */
1951
                || syntax & RE_CONTEXT_INDEP_ANCHORS
1952
                   /* Otherwise, depends on what's next.  */
1953
                || at_endline_loc_p (p, pend, syntax))
1954
               BUF_PUSH (endline);
1955
             else
1956
               goto normal_char;
1957
           }
1958
           break;
1959
 
1960
 
1961
        case '+':
1962
        case '?':
1963
          if ((syntax & RE_BK_PLUS_QM)
1964
              || (syntax & RE_LIMITED_OPS))
1965
            goto normal_char;
1966
        handle_plus:
1967
        case '*':
1968
          /* If there is no previous pattern... */
1969
          if (!laststart)
1970
            {
1971
              if (syntax & RE_CONTEXT_INVALID_OPS)
1972
                FREE_STACK_RETURN (REG_BADRPT);
1973
              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1974
                goto normal_char;
1975
            }
1976
 
1977
          {
1978
            /* Are we optimizing this jump?  */
1979
            boolean keep_string_p = false;
1980
 
1981
            /* 1 means zero (many) matches is allowed.  */
1982
            char zero_times_ok = 0, many_times_ok = 0;
1983
 
1984
            /* If there is a sequence of repetition chars, collapse it
1985
               down to just one (the right one).  We can't combine
1986
               interval operators with these because of, e.g., `a{2}*',
1987
               which should only match an even number of `a's.  */
1988
 
1989
            for (;;)
1990
              {
1991
                zero_times_ok |= c != '+';
1992
                many_times_ok |= c != '?';
1993
 
1994
                if (p == pend)
1995
                  break;
1996
 
1997
                PATFETCH (c);
1998
 
1999
                if (c == '*'
2000
                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2001
                  ;
2002
 
2003
                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2004
                  {
2005
                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2006
 
2007
                    PATFETCH (c1);
2008
                    if (!(c1 == '+' || c1 == '?'))
2009
                      {
2010
                        PATUNFETCH;
2011
                        PATUNFETCH;
2012
                        break;
2013
                      }
2014
 
2015
                    c = c1;
2016
                  }
2017
                else
2018
                  {
2019
                    PATUNFETCH;
2020
                    break;
2021
                  }
2022
 
2023
                /* If we get here, we found another repeat character.  */
2024
               }
2025
 
2026
            /* Star, etc. applied to an empty pattern is equivalent
2027
               to an empty pattern.  */
2028
            if (!laststart)
2029
              break;
2030
 
2031
            /* Now we know whether or not zero matches is allowed
2032
               and also whether or not two or more matches is allowed.  */
2033
            if (many_times_ok)
2034
              { /* More than one repetition is allowed, so put in at the
2035
                   end a backward relative jump from `b' to before the next
2036
                   jump we're going to put in below (which jumps from
2037
                   laststart to after this jump).
2038
 
2039
                   But if we are at the `*' in the exact sequence `.*\n',
2040
                   insert an unconditional jump backwards to the .,
2041
                   instead of the beginning of the loop.  This way we only
2042
                   push a failure point once, instead of every time
2043
                   through the loop.  */
2044
                assert (p - 1 > pattern);
2045
 
2046
                /* Allocate the space for the jump.  */
2047
                GET_BUFFER_SPACE (3);
2048
 
2049
                /* We know we are not at the first character of the pattern,
2050
                   because laststart was nonzero.  And we've already
2051
                   incremented `p', by the way, to be the character after
2052
                   the `*'.  Do we have to do something analogous here
2053
                   for null bytes, because of RE_DOT_NOT_NULL?  */
2054
                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2055
                    && zero_times_ok
2056
                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2057
                    && !(syntax & RE_DOT_NEWLINE))
2058
                  { /* We have .*\n.  */
2059
                    STORE_JUMP (jump, b, laststart);
2060
                    keep_string_p = true;
2061
                  }
2062
                else
2063
                  /* Anything else.  */
2064
                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2065
 
2066
                /* We've added more stuff to the buffer.  */
2067
                b += 3;
2068
              }
2069
 
2070
            /* On failure, jump from laststart to b + 3, which will be the
2071
               end of the buffer after this jump is inserted.  */
2072
            GET_BUFFER_SPACE (3);
2073
            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2074
                                       : on_failure_jump,
2075
                         laststart, b + 3);
2076
            pending_exact = 0;
2077
            b += 3;
2078
 
2079
            if (!zero_times_ok)
2080
              {
2081
                /* At least one repetition is required, so insert a
2082
                   `dummy_failure_jump' before the initial
2083
                   `on_failure_jump' instruction of the loop. This
2084
                   effects a skip over that instruction the first time
2085
                   we hit that loop.  */
2086
                GET_BUFFER_SPACE (3);
2087
                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2088
                b += 3;
2089
              }
2090
            }
2091
          break;
2092
 
2093
 
2094
        case '.':
2095
          laststart = b;
2096
          BUF_PUSH (anychar);
2097
          break;
2098
 
2099
 
2100
        case '[':
2101
          {
2102
            boolean had_char_class = false;
2103
 
2104
            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2105
 
2106
            /* Ensure that we have enough space to push a charset: the
2107
               opcode, the length count, and the bitset; 34 bytes in all.  */
2108
            GET_BUFFER_SPACE (34);
2109
 
2110
            laststart = b;
2111
 
2112
            /* We test `*p == '^' twice, instead of using an if
2113
               statement, so we only need one BUF_PUSH.  */
2114
            BUF_PUSH (*p == '^' ? charset_not : charset);
2115
            if (*p == '^')
2116
              p++;
2117
 
2118
            /* Remember the first position in the bracket expression.  */
2119
            p1 = p;
2120
 
2121
            /* Push the number of bytes in the bitmap.  */
2122
            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2123
 
2124
            /* Clear the whole map.  */
2125
            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2126
 
2127
            /* charset_not matches newline according to a syntax bit.  */
2128
            if ((re_opcode_t) b[-2] == charset_not
2129
                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2130
              SET_LIST_BIT ('\n');
2131
 
2132
            /* Read in characters and ranges, setting map bits.  */
2133
            for (;;)
2134
              {
2135
                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2136
 
2137
                PATFETCH (c);
2138
 
2139
                /* \ might escape characters inside [...] and [^...].  */
2140
                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2141
                  {
2142
                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2143
 
2144
                    PATFETCH (c1);
2145
                    SET_LIST_BIT (c1);
2146
                    continue;
2147
                  }
2148
 
2149
                /* Could be the end of the bracket expression.  If it's
2150
                   not (i.e., when the bracket expression is `[]' so
2151
                   far), the ']' character bit gets set way below.  */
2152
                if (c == ']' && p != p1 + 1)
2153
                  break;
2154
 
2155
                /* Look ahead to see if it's a range when the last thing
2156
                   was a character class.  */
2157
                if (had_char_class && c == '-' && *p != ']')
2158
                  FREE_STACK_RETURN (REG_ERANGE);
2159
 
2160
                /* Look ahead to see if it's a range when the last thing
2161
                   was a character: if this is a hyphen not at the
2162
                   beginning or the end of a list, then it's the range
2163
                   operator.  */
2164
                if (c == '-'
2165
                    && !(p - 2 >= pattern && p[-2] == '[')
2166
                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2167
                    && *p != ']')
2168
                  {
2169
                    reg_errcode_t ret
2170
                      = compile_range (&p, pend, translate, syntax, b);
2171
                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2172
                  }
2173
 
2174
                else if (p[0] == '-' && p[1] != ']')
2175
                  { /* This handles ranges made up of characters only.  */
2176
                    reg_errcode_t ret;
2177
 
2178
                    /* Move past the `-'.  */
2179
                    PATFETCH (c1);
2180
 
2181
                    ret = compile_range (&p, pend, translate, syntax, b);
2182
                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2183
                  }
2184
 
2185
                /* See if we're at the beginning of a possible character
2186
                   class.  */
2187
 
2188
                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2189
                  { /* Leave room for the null.  */
2190
                    char str[CHAR_CLASS_MAX_LENGTH + 1];
2191
 
2192
                    PATFETCH (c);
2193
                    c1 = 0;
2194
 
2195
                    /* If pattern is `[[:'.  */
2196
                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2197
 
2198
                    for (;;)
2199
                      {
2200
                        PATFETCH (c);
2201
                        if ((c == ':' && *p == ']') || p == pend
2202
                            || c1 == CHAR_CLASS_MAX_LENGTH)
2203
                          break;
2204
                        str[c1++] = c;
2205
                      }
2206
                    str[c1] = '\0';
2207
 
2208
                    /* If isn't a word bracketed by `[:' and `:]':
2209
                       undo the ending character, the letters, and leave
2210
                       the leading `:' and `[' (but set bits for them).  */
2211
                    if (c == ':' && *p == ']')
2212
                      {
2213
/* CYGNUS LOCAL: Skip this code if we don't have btowc().  btowc() is */
2214
/* defined in the 1994 Amendment 1 to ISO C and may not be present on */
2215
/* systems where we have wchar.h and wctype.h.   */
2216
#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_BTOWC)
2217
                        boolean is_lower = STREQ (str, "lower");
2218
                        boolean is_upper = STREQ (str, "upper");
2219
                        wctype_t wt;
2220
                        int ch;
2221
 
2222
                        wt = IS_CHAR_CLASS (str);
2223
                        if (wt == 0)
2224
                          FREE_STACK_RETURN (REG_ECTYPE);
2225
 
2226
                        /* Throw away the ] at the end of the character
2227
                           class.  */
2228
                        PATFETCH (c);
2229
 
2230
                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2231
 
2232
                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2233
                          {
2234
# ifdef _LIBC
2235
                            if (__iswctype (__btowc (ch), wt))
2236
                              SET_LIST_BIT (ch);
2237
#else
2238
                            if (iswctype (btowc (ch), wt))
2239
                              SET_LIST_BIT (ch);
2240
#endif
2241
 
2242
                            if (translate && (is_upper || is_lower)
2243
                                && (ISUPPER (ch) || ISLOWER (ch)))
2244
                              SET_LIST_BIT (ch);
2245
                          }
2246
 
2247
                        had_char_class = true;
2248
#else
2249
                        int ch;
2250
                        boolean is_alnum = STREQ (str, "alnum");
2251
                        boolean is_alpha = STREQ (str, "alpha");
2252
                        boolean is_blank = STREQ (str, "blank");
2253
                        boolean is_cntrl = STREQ (str, "cntrl");
2254
                        boolean is_digit = STREQ (str, "digit");
2255
                        boolean is_graph = STREQ (str, "graph");
2256
                        boolean is_lower = STREQ (str, "lower");
2257
                        boolean is_print = STREQ (str, "print");
2258
                        boolean is_punct = STREQ (str, "punct");
2259
                        boolean is_space = STREQ (str, "space");
2260
                        boolean is_upper = STREQ (str, "upper");
2261
                        boolean is_xdigit = STREQ (str, "xdigit");
2262
 
2263
                        if (!IS_CHAR_CLASS (str))
2264
                          FREE_STACK_RETURN (REG_ECTYPE);
2265
 
2266
                        /* Throw away the ] at the end of the character
2267
                           class.  */
2268
                        PATFETCH (c);
2269
 
2270
                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2271
 
2272
                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2273
                          {
2274
                            /* This was split into 3 if's to
2275
                               avoid an arbitrary limit in some compiler.  */
2276
                            if (   (is_alnum  && ISALNUM (ch))
2277
                                || (is_alpha  && ISALPHA (ch))
2278
                                || (is_blank  && ISBLANK (ch))
2279
                                || (is_cntrl  && ISCNTRL (ch)))
2280
                              SET_LIST_BIT (ch);
2281
                            if (   (is_digit  && ISDIGIT (ch))
2282
                                || (is_graph  && ISGRAPH (ch))
2283
                                || (is_lower  && ISLOWER (ch))
2284
                                || (is_print  && ISPRINT (ch)))
2285
                              SET_LIST_BIT (ch);
2286
                            if (   (is_punct  && ISPUNCT (ch))
2287
                                || (is_space  && ISSPACE (ch))
2288
                                || (is_upper  && ISUPPER (ch))
2289
                                || (is_xdigit && ISXDIGIT (ch)))
2290
                              SET_LIST_BIT (ch);
2291
                            if (   translate && (is_upper || is_lower)
2292
                                && (ISUPPER (ch) || ISLOWER (ch)))
2293
                              SET_LIST_BIT (ch);
2294
                          }
2295
                        had_char_class = true;
2296
#endif  /* libc || wctype.h */
2297
                      }
2298
                    else
2299
                      {
2300
                        c1++;
2301
                        while (c1--)
2302
                          PATUNFETCH;
2303
                        SET_LIST_BIT ('[');
2304
                        SET_LIST_BIT (':');
2305
                        had_char_class = false;
2306
                      }
2307
                  }
2308
                else
2309
                  {
2310
                    had_char_class = false;
2311
                    SET_LIST_BIT (c);
2312
                  }
2313
              }
2314
 
2315
            /* Discard any (non)matching list bytes that are all 0 at the
2316
               end of the map.  Decrease the map-length byte too.  */
2317
            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2318
              b[-1]--;
2319
            b += b[-1];
2320
          }
2321
          break;
2322
 
2323
 
2324
        case '(':
2325
          if (syntax & RE_NO_BK_PARENS)
2326
            goto handle_open;
2327
          else
2328
            goto normal_char;
2329
 
2330
 
2331
        case ')':
2332
          if (syntax & RE_NO_BK_PARENS)
2333
            goto handle_close;
2334
          else
2335
            goto normal_char;
2336
 
2337
 
2338
        case '\n':
2339
          if (syntax & RE_NEWLINE_ALT)
2340
            goto handle_alt;
2341
          else
2342
            goto normal_char;
2343
 
2344
 
2345
        case '|':
2346
          if (syntax & RE_NO_BK_VBAR)
2347
            goto handle_alt;
2348
          else
2349
            goto normal_char;
2350
 
2351
 
2352
        case '{':
2353
           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2354
             goto handle_interval;
2355
           else
2356
             goto normal_char;
2357
 
2358
 
2359
        case '\\':
2360
          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2361
 
2362
          /* Do not translate the character after the \, so that we can
2363
             distinguish, e.g., \B from \b, even if we normally would
2364
             translate, e.g., B to b.  */
2365
          PATFETCH_RAW (c);
2366
 
2367
          switch (c)
2368
            {
2369
            case '(':
2370
              if (syntax & RE_NO_BK_PARENS)
2371
                goto normal_backslash;
2372
 
2373
            handle_open:
2374
              bufp->re_nsub++;
2375
              regnum++;
2376
 
2377
              if (COMPILE_STACK_FULL)
2378
                {
2379
                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2380
                            compile_stack_elt_t);
2381
                  if (compile_stack.stack == NULL) return REG_ESPACE;
2382
 
2383
                  compile_stack.size <<= 1;
2384
                }
2385
 
2386
              /* These are the values to restore when we hit end of this
2387
                 group.  They are all relative offsets, so that if the
2388
                 whole pattern moves because of realloc, they will still
2389
                 be valid.  */
2390
              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2391
              COMPILE_STACK_TOP.fixup_alt_jump
2392
                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2393
              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2394
              COMPILE_STACK_TOP.regnum = regnum;
2395
 
2396
              /* We will eventually replace the 0 with the number of
2397
                 groups inner to this one.  But do not push a
2398
                 start_memory for groups beyond the last one we can
2399
                 represent in the compiled pattern.  */
2400
              if (regnum <= MAX_REGNUM)
2401
                {
2402
                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2403
                  BUF_PUSH_3 (start_memory, regnum, 0);
2404
                }
2405
 
2406
              compile_stack.avail++;
2407
 
2408
              fixup_alt_jump = 0;
2409
              laststart = 0;
2410
              begalt = b;
2411
              /* If we've reached MAX_REGNUM groups, then this open
2412
                 won't actually generate any code, so we'll have to
2413
                 clear pending_exact explicitly.  */
2414
              pending_exact = 0;
2415
              break;
2416
 
2417
 
2418
            case ')':
2419
              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2420
 
2421
              if (COMPILE_STACK_EMPTY)
2422
                {
2423
                  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2424
                    goto normal_backslash;
2425
                  else
2426
                    FREE_STACK_RETURN (REG_ERPAREN);
2427
                }
2428
 
2429
            handle_close:
2430
              if (fixup_alt_jump)
2431
                { /* Push a dummy failure point at the end of the
2432
                     alternative for a possible future
2433
                     `pop_failure_jump' to pop.  See comments at
2434
                     `push_dummy_failure' in `re_match_2'.  */
2435
                  BUF_PUSH (push_dummy_failure);
2436
 
2437
                  /* We allocated space for this jump when we assigned
2438
                     to `fixup_alt_jump', in the `handle_alt' case below.  */
2439
                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2440
                }
2441
 
2442
              /* See similar code for backslashed left paren above.  */
2443
              if (COMPILE_STACK_EMPTY)
2444
                {
2445
                  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2446
                    goto normal_char;
2447
                  else
2448
                    FREE_STACK_RETURN (REG_ERPAREN);
2449
                }
2450
 
2451
              /* Since we just checked for an empty stack above, this
2452
                 ``can't happen''.  */
2453
              assert (compile_stack.avail != 0);
2454
              {
2455
                /* We don't just want to restore into `regnum', because
2456
                   later groups should continue to be numbered higher,
2457
                   as in `(ab)c(de)' -- the second group is #2.  */
2458
                regnum_t this_group_regnum;
2459
 
2460
                compile_stack.avail--;
2461
                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2462
                fixup_alt_jump
2463
                  = COMPILE_STACK_TOP.fixup_alt_jump
2464
                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2465
                    : 0;
2466
                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2467
                this_group_regnum = COMPILE_STACK_TOP.regnum;
2468
                /* If we've reached MAX_REGNUM groups, then this open
2469
                   won't actually generate any code, so we'll have to
2470
                   clear pending_exact explicitly.  */
2471
                pending_exact = 0;
2472
 
2473
                /* We're at the end of the group, so now we know how many
2474
                   groups were inside this one.  */
2475
                if (this_group_regnum <= MAX_REGNUM)
2476
                  {
2477
                    unsigned char *inner_group_loc
2478
                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2479
 
2480
                    *inner_group_loc = regnum - this_group_regnum;
2481
                    BUF_PUSH_3 (stop_memory, this_group_regnum,
2482
                                regnum - this_group_regnum);
2483
                  }
2484
              }
2485
              break;
2486
 
2487
 
2488
            case '|':                                   /* `\|'.  */
2489
              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2490
                goto normal_backslash;
2491
            handle_alt:
2492
              if (syntax & RE_LIMITED_OPS)
2493
                goto normal_char;
2494
 
2495
              /* Insert before the previous alternative a jump which
2496
                 jumps to this alternative if the former fails.  */
2497
              GET_BUFFER_SPACE (3);
2498
              INSERT_JUMP (on_failure_jump, begalt, b + 6);
2499
              pending_exact = 0;
2500
              b += 3;
2501
 
2502
              /* The alternative before this one has a jump after it
2503
                 which gets executed if it gets matched.  Adjust that
2504
                 jump so it will jump to this alternative's analogous
2505
                 jump (put in below, which in turn will jump to the next
2506
                 (if any) alternative's such jump, etc.).  The last such
2507
                 jump jumps to the correct final destination.  A picture:
2508
                          _____ _____
2509
                          |   | |   |
2510
                          |   v |   v
2511
                         a | b   | c
2512
 
2513
                 If we are at `b', then fixup_alt_jump right now points to a
2514
                 three-byte space after `a'.  We'll put in the jump, set
2515
                 fixup_alt_jump to right after `b', and leave behind three
2516
                 bytes which we'll fill in when we get to after `c'.  */
2517
 
2518
              if (fixup_alt_jump)
2519
                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2520
 
2521
              /* Mark and leave space for a jump after this alternative,
2522
                 to be filled in later either by next alternative or
2523
                 when know we're at the end of a series of alternatives.  */
2524
              fixup_alt_jump = b;
2525
              GET_BUFFER_SPACE (3);
2526
              b += 3;
2527
 
2528
              laststart = 0;
2529
              begalt = b;
2530
              break;
2531
 
2532
 
2533
            case '{':
2534
              /* If \{ is a literal.  */
2535
              if (!(syntax & RE_INTERVALS)
2536
                     /* If we're at `\{' and it's not the open-interval
2537
                        operator.  */
2538
                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2539
                  || (p - 2 == pattern  &&  p == pend))
2540
                goto normal_backslash;
2541
 
2542
            handle_interval:
2543
              {
2544
                /* If got here, then the syntax allows intervals.  */
2545
 
2546
                /* At least (most) this many matches must be made.  */
2547
                int lower_bound = -1, upper_bound = -1;
2548
 
2549
                beg_interval = p - 1;
2550
 
2551
                if (p == pend)
2552
                  {
2553
                    if (syntax & RE_NO_BK_BRACES)
2554
                      goto unfetch_interval;
2555
                    else
2556
                      FREE_STACK_RETURN (REG_EBRACE);
2557
                  }
2558
 
2559
                GET_UNSIGNED_NUMBER (lower_bound);
2560
 
2561
                if (c == ',')
2562
                  {
2563
                    GET_UNSIGNED_NUMBER (upper_bound);
2564
                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2565
                  }
2566
                else
2567
                  /* Interval such as `{1}' => match exactly once. */
2568
                  upper_bound = lower_bound;
2569
 
2570
                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2571
                    || lower_bound > upper_bound)
2572
                  {
2573
                    if (syntax & RE_NO_BK_BRACES)
2574
                      goto unfetch_interval;
2575
                    else
2576
                      FREE_STACK_RETURN (REG_BADBR);
2577
                  }
2578
 
2579
                if (!(syntax & RE_NO_BK_BRACES))
2580
                  {
2581
                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2582
 
2583
                    PATFETCH (c);
2584
                  }
2585
 
2586
                if (c != '}')
2587
                  {
2588
                    if (syntax & RE_NO_BK_BRACES)
2589
                      goto unfetch_interval;
2590
                    else
2591
                      FREE_STACK_RETURN (REG_BADBR);
2592
                  }
2593
 
2594
                /* We just parsed a valid interval.  */
2595
 
2596
                /* If it's invalid to have no preceding re.  */
2597
                if (!laststart)
2598
                  {
2599
                    if (syntax & RE_CONTEXT_INVALID_OPS)
2600
                      FREE_STACK_RETURN (REG_BADRPT);
2601
                    else if (syntax & RE_CONTEXT_INDEP_OPS)
2602
                      laststart = b;
2603
                    else
2604
                      goto unfetch_interval;
2605
                  }
2606
 
2607
                /* If the upper bound is zero, don't want to succeed at
2608
                   all; jump from `laststart' to `b + 3', which will be
2609
                   the end of the buffer after we insert the jump.  */
2610
                 if (upper_bound == 0)
2611
                   {
2612
                     GET_BUFFER_SPACE (3);
2613
                     INSERT_JUMP (jump, laststart, b + 3);
2614
                     b += 3;
2615
                   }
2616
 
2617
                 /* Otherwise, we have a nontrivial interval.  When
2618
                    we're all done, the pattern will look like:
2619
                      set_number_at <jump count> <upper bound>
2620
                      set_number_at <succeed_n count> <lower bound>
2621
                      succeed_n <after jump addr> <succeed_n count>
2622
                      <body of loop>
2623
                      jump_n <succeed_n addr> <jump count>
2624
                    (The upper bound and `jump_n' are omitted if
2625
                    `upper_bound' is 1, though.)  */
2626
                 else
2627
                   { /* If the upper bound is > 1, we need to insert
2628
                        more at the end of the loop.  */
2629
                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2630
 
2631
                     GET_BUFFER_SPACE (nbytes);
2632
 
2633
                     /* Initialize lower bound of the `succeed_n', even
2634
                        though it will be set during matching by its
2635
                        attendant `set_number_at' (inserted next),
2636
                        because `re_compile_fastmap' needs to know.
2637
                        Jump to the `jump_n' we might insert below.  */
2638
                     INSERT_JUMP2 (succeed_n, laststart,
2639
                                   b + 5 + (upper_bound > 1) * 5,
2640
                                   lower_bound);
2641
                     b += 5;
2642
 
2643
                     /* Code to initialize the lower bound.  Insert
2644
                        before the `succeed_n'.  The `5' is the last two
2645
                        bytes of this `set_number_at', plus 3 bytes of
2646
                        the following `succeed_n'.  */
2647
                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2648
                     b += 5;
2649
 
2650
                     if (upper_bound > 1)
2651
                       { /* More than one repetition is allowed, so
2652
                            append a backward jump to the `succeed_n'
2653
                            that starts this interval.
2654
 
2655
                            When we've reached this during matching,
2656
                            we'll have matched the interval once, so
2657
                            jump back only `upper_bound - 1' times.  */
2658
                         STORE_JUMP2 (jump_n, b, laststart + 5,
2659
                                      upper_bound - 1);
2660
                         b += 5;
2661
 
2662
                         /* The location we want to set is the second
2663
                            parameter of the `jump_n'; that is `b-2' as
2664
                            an absolute address.  `laststart' will be
2665
                            the `set_number_at' we're about to insert;
2666
                            `laststart+3' the number to set, the source
2667
                            for the relative address.  But we are
2668
                            inserting into the middle of the pattern --
2669
                            so everything is getting moved up by 5.
2670
                            Conclusion: (b - 2) - (laststart + 3) + 5,
2671
                            i.e., b - laststart.
2672
 
2673
                            We insert this at the beginning of the loop
2674
                            so that if we fail during matching, we'll
2675
                            reinitialize the bounds.  */
2676
                         insert_op2 (set_number_at, laststart, b - laststart,
2677
                                     upper_bound - 1, b);
2678
                         b += 5;
2679
                       }
2680
                   }
2681
                pending_exact = 0;
2682
                beg_interval = NULL;
2683
              }
2684
              break;
2685
 
2686
            unfetch_interval:
2687
              /* If an invalid interval, match the characters as literals.  */
2688
               assert (beg_interval);
2689
               p = beg_interval;
2690
               beg_interval = NULL;
2691
 
2692
               /* normal_char and normal_backslash need `c'.  */
2693
               PATFETCH (c);
2694
 
2695
               if (!(syntax & RE_NO_BK_BRACES))
2696
                 {
2697
                   if (p > pattern  &&  p[-1] == '\\')
2698
                     goto normal_backslash;
2699
                 }
2700
               goto normal_char;
2701
 
2702
#ifdef emacs
2703
            /* There is no way to specify the before_dot and after_dot
2704
               operators.  rms says this is ok.  --karl  */
2705
            case '=':
2706
              BUF_PUSH (at_dot);
2707
              break;
2708
 
2709
            case 's':
2710
              laststart = b;
2711
              PATFETCH (c);
2712
              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2713
              break;
2714
 
2715
            case 'S':
2716
              laststart = b;
2717
              PATFETCH (c);
2718
              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2719
              break;
2720
#endif /* emacs */
2721
 
2722
 
2723
            case 'w':
2724
              if (syntax & RE_NO_GNU_OPS)
2725
                goto normal_char;
2726
              laststart = b;
2727
              BUF_PUSH (wordchar);
2728
              break;
2729
 
2730
 
2731
            case 'W':
2732
              if (syntax & RE_NO_GNU_OPS)
2733
                goto normal_char;
2734
              laststart = b;
2735
              BUF_PUSH (notwordchar);
2736
              break;
2737
 
2738
 
2739
            case '<':
2740
              if (syntax & RE_NO_GNU_OPS)
2741
                goto normal_char;
2742
              BUF_PUSH (wordbeg);
2743
              break;
2744
 
2745
            case '>':
2746
              if (syntax & RE_NO_GNU_OPS)
2747
                goto normal_char;
2748
              BUF_PUSH (wordend);
2749
              break;
2750
 
2751
            case 'b':
2752
              if (syntax & RE_NO_GNU_OPS)
2753
                goto normal_char;
2754
              BUF_PUSH (wordbound);
2755
              break;
2756
 
2757
            case 'B':
2758
              if (syntax & RE_NO_GNU_OPS)
2759
                goto normal_char;
2760
              BUF_PUSH (notwordbound);
2761
              break;
2762
 
2763
            case '`':
2764
              if (syntax & RE_NO_GNU_OPS)
2765
                goto normal_char;
2766
              BUF_PUSH (begbuf);
2767
              break;
2768
 
2769
            case '\'':
2770
              if (syntax & RE_NO_GNU_OPS)
2771
                goto normal_char;
2772
              BUF_PUSH (endbuf);
2773
              break;
2774
 
2775
            case '1': case '2': case '3': case '4': case '5':
2776
            case '6': case '7': case '8': case '9':
2777
              if (syntax & RE_NO_BK_REFS)
2778
                goto normal_char;
2779
 
2780
              c1 = c - '0';
2781
 
2782
              if (c1 > regnum)
2783
                FREE_STACK_RETURN (REG_ESUBREG);
2784
 
2785
              /* Can't back reference to a subexpression if inside of it.  */
2786
              if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2787
                goto normal_char;
2788
 
2789
              laststart = b;
2790
              BUF_PUSH_2 (duplicate, c1);
2791
              break;
2792
 
2793
 
2794
            case '+':
2795
            case '?':
2796
              if (syntax & RE_BK_PLUS_QM)
2797
                goto handle_plus;
2798
              else
2799
                goto normal_backslash;
2800
 
2801
            default:
2802
            normal_backslash:
2803
              /* You might think it would be useful for \ to mean
2804
                 not to translate; but if we don't translate it
2805
                 it will never match anything.  */
2806
              c = TRANSLATE (c);
2807
              goto normal_char;
2808
            }
2809
          break;
2810
 
2811
 
2812
        default:
2813
        /* Expects the character in `c'.  */
2814
        normal_char:
2815
              /* If no exactn currently being built.  */
2816
          if (!pending_exact
2817
 
2818
              /* If last exactn not at current position.  */
2819
              || pending_exact + *pending_exact + 1 != b
2820
 
2821
              /* We have only one byte following the exactn for the count.  */
2822
              || *pending_exact == (1 << BYTEWIDTH) - 1
2823
 
2824
              /* If followed by a repetition operator.  */
2825
              || *p == '*' || *p == '^'
2826
              || ((syntax & RE_BK_PLUS_QM)
2827
                  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2828
                  : (*p == '+' || *p == '?'))
2829
              || ((syntax & RE_INTERVALS)
2830
                  && ((syntax & RE_NO_BK_BRACES)
2831
                      ? *p == '{'
2832
                      : (p[0] == '\\' && p[1] == '{'))))
2833
            {
2834
              /* Start building a new exactn.  */
2835
 
2836
              laststart = b;
2837
 
2838
              BUF_PUSH_2 (exactn, 0);
2839
              pending_exact = b - 1;
2840
            }
2841
 
2842
          BUF_PUSH (c);
2843
          (*pending_exact)++;
2844
          break;
2845
        } /* switch (c) */
2846
    } /* while p != pend */
2847
 
2848
 
2849
  /* Through the pattern now.  */
2850
 
2851
  if (fixup_alt_jump)
2852
    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2853
 
2854
  if (!COMPILE_STACK_EMPTY)
2855
    FREE_STACK_RETURN (REG_EPAREN);
2856
 
2857
  /* If we don't want backtracking, force success
2858
     the first time we reach the end of the compiled pattern.  */
2859
  if (syntax & RE_NO_POSIX_BACKTRACKING)
2860
    BUF_PUSH (succeed);
2861
 
2862
  free (compile_stack.stack);
2863
 
2864
  /* We have succeeded; set the length of the buffer.  */
2865
  bufp->used = b - bufp->buffer;
2866
 
2867
#ifdef DEBUG
2868
  if (debug)
2869
    {
2870
      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2871
      print_compiled_pattern (bufp);
2872
    }
2873
#endif /* DEBUG */
2874
 
2875
#ifndef MATCH_MAY_ALLOCATE
2876
  /* Initialize the failure stack to the largest possible stack.  This
2877
     isn't necessary unless we're trying to avoid calling alloca in
2878
     the search and match routines.  */
2879
  {
2880
    int num_regs = bufp->re_nsub + 1;
2881
 
2882
    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2883
       is strictly greater than re_max_failures, the largest possible stack
2884
       is 2 * re_max_failures failure points.  */
2885
    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2886
      {
2887
        fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2888
 
2889
# ifdef emacs
2890
        if (! fail_stack.stack)
2891
          fail_stack.stack
2892
            = (fail_stack_elt_t *) xmalloc (fail_stack.size
2893
                                            * sizeof (fail_stack_elt_t));
2894
        else
2895
          fail_stack.stack
2896
            = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2897
                                             (fail_stack.size
2898
                                              * sizeof (fail_stack_elt_t)));
2899
# else /* not emacs */
2900
        if (! fail_stack.stack)
2901
          fail_stack.stack
2902
            = (fail_stack_elt_t *) malloc (fail_stack.size
2903
                                           * sizeof (fail_stack_elt_t));
2904
        else
2905
          fail_stack.stack
2906
            = (fail_stack_elt_t *) realloc (fail_stack.stack,
2907
                                            (fail_stack.size
2908
                                             * sizeof (fail_stack_elt_t)));
2909
# endif /* not emacs */
2910
      }
2911
 
2912
    regex_grow_registers (num_regs);
2913
  }
2914
#endif /* not MATCH_MAY_ALLOCATE */
2915
 
2916
  return REG_NOERROR;
2917
} /* regex_compile */
2918
 
2919
/* Subroutines for `regex_compile'.  */
2920
 
2921
/* Store OP at LOC followed by two-byte integer parameter ARG.  */
2922
 
2923
static void
2924
store_op1 (op, loc, arg)
2925
    re_opcode_t op;
2926
    unsigned char *loc;
2927
    int arg;
2928
{
2929
  *loc = (unsigned char) op;
2930
  STORE_NUMBER (loc + 1, arg);
2931
}
2932
 
2933
 
2934
/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2935
 
2936
static void
2937
store_op2 (op, loc, arg1, arg2)
2938
    re_opcode_t op;
2939
    unsigned char *loc;
2940
    int arg1, arg2;
2941
{
2942
  *loc = (unsigned char) op;
2943
  STORE_NUMBER (loc + 1, arg1);
2944
  STORE_NUMBER (loc + 3, arg2);
2945
}
2946
 
2947
 
2948
/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2949
   for OP followed by two-byte integer parameter ARG.  */
2950
 
2951
static void
2952
insert_op1 (op, loc, arg, end)
2953
    re_opcode_t op;
2954
    unsigned char *loc;
2955
    int arg;
2956
    unsigned char *end;
2957
{
2958
  register unsigned char *pfrom = end;
2959
  register unsigned char *pto = end + 3;
2960
 
2961
  while (pfrom != loc)
2962
    *--pto = *--pfrom;
2963
 
2964
  store_op1 (op, loc, arg);
2965
}
2966
 
2967
 
2968
/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2969
 
2970
static void
2971
insert_op2 (op, loc, arg1, arg2, end)
2972
    re_opcode_t op;
2973
    unsigned char *loc;
2974
    int arg1, arg2;
2975
    unsigned char *end;
2976
{
2977
  register unsigned char *pfrom = end;
2978
  register unsigned char *pto = end + 5;
2979
 
2980
  while (pfrom != loc)
2981
    *--pto = *--pfrom;
2982
 
2983
  store_op2 (op, loc, arg1, arg2);
2984
}
2985
 
2986
 
2987
/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2988
   after an alternative or a begin-subexpression.  We assume there is at
2989
   least one character before the ^.  */
2990
 
2991
static boolean
2992
at_begline_loc_p (pattern, p, syntax)
2993
    const char *pattern, *p;
2994
    reg_syntax_t syntax;
2995
{
2996
  const char *prev = p - 2;
2997
  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2998
 
2999
  return
3000
       /* After a subexpression?  */
3001
       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3002
       /* After an alternative?  */
3003
    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3004
}
3005
 
3006
 
3007
/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3008
   at least one character after the $, i.e., `P < PEND'.  */
3009
 
3010
static boolean
3011
at_endline_loc_p (p, pend, syntax)
3012
    const char *p, *pend;
3013
    reg_syntax_t syntax;
3014
{
3015
  const char *next = p;
3016
  boolean next_backslash = *next == '\\';
3017
  const char *next_next = p + 1 < pend ? p + 1 : 0;
3018
 
3019
  return
3020
       /* Before a subexpression?  */
3021
       (syntax & RE_NO_BK_PARENS ? *next == ')'
3022
        : next_backslash && next_next && *next_next == ')')
3023
       /* Before an alternative?  */
3024
    || (syntax & RE_NO_BK_VBAR ? *next == '|'
3025
        : next_backslash && next_next && *next_next == '|');
3026
}
3027
 
3028
 
3029
/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3030
   false if it's not.  */
3031
 
3032
static boolean
3033
group_in_compile_stack (compile_stack, regnum)
3034
    compile_stack_type compile_stack;
3035
    regnum_t regnum;
3036
{
3037
  int this_element;
3038
 
3039
  for (this_element = compile_stack.avail - 1;
3040
       this_element >= 0;
3041
       this_element--)
3042
    if (compile_stack.stack[this_element].regnum == regnum)
3043
      return true;
3044
 
3045
  return false;
3046
}
3047
 
3048
 
3049
/* Read the ending character of a range (in a bracket expression) from the
3050
   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3051
   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3052
   Then we set the translation of all bits between the starting and
3053
   ending characters (inclusive) in the compiled pattern B.
3054
 
3055
   Return an error code.
3056
 
3057
   We use these short variable names so we can use the same macros as
3058
   `regex_compile' itself.  */
3059
 
3060
static reg_errcode_t
3061
compile_range (p_ptr, pend, translate, syntax, b)
3062
    const char **p_ptr, *pend;
3063
    RE_TRANSLATE_TYPE translate;
3064
    reg_syntax_t syntax;
3065
    unsigned char *b;
3066
{
3067
  unsigned this_char;
3068
 
3069
  const char *p = *p_ptr;
3070
  unsigned int range_start, range_end;
3071
 
3072
  if (p == pend)
3073
    return REG_ERANGE;
3074
 
3075
  /* Even though the pattern is a signed `char *', we need to fetch
3076
     with unsigned char *'s; if the high bit of the pattern character
3077
     is set, the range endpoints will be negative if we fetch using a
3078
     signed char *.
3079
 
3080
     We also want to fetch the endpoints without translating them; the
3081
     appropriate translation is done in the bit-setting loop below.  */
3082
  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3083
  range_start = ((const unsigned char *) p)[-2];
3084
  range_end   = ((const unsigned char *) p)[0];
3085
 
3086
  /* Have to increment the pointer into the pattern string, so the
3087
     caller isn't still at the ending character.  */
3088
  (*p_ptr)++;
3089
 
3090
  /* If the start is after the end, the range is empty.  */
3091
  if (range_start > range_end)
3092
    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3093
 
3094
  /* Here we see why `this_char' has to be larger than an `unsigned
3095
     char' -- the range is inclusive, so if `range_end' == 0xff
3096
     (assuming 8-bit characters), we would otherwise go into an infinite
3097
     loop, since all characters <= 0xff.  */
3098
  for (this_char = range_start; this_char <= range_end; this_char++)
3099
    {
3100
      SET_LIST_BIT (TRANSLATE (this_char));
3101
    }
3102
 
3103
  return REG_NOERROR;
3104
}
3105
 
3106
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3107
   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3108
   characters can start a string that matches the pattern.  This fastmap
3109
   is used by re_search to skip quickly over impossible starting points.
3110
 
3111
   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3112
   area as BUFP->fastmap.
3113
 
3114
   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3115
   the pattern buffer.
3116
 
3117
   Returns 0 if we succeed, -2 if an internal error.   */
3118
 
3119
int
3120
re_compile_fastmap (bufp)
3121
     struct re_pattern_buffer *bufp;
3122
{
3123
  int j, k;
3124
#ifdef MATCH_MAY_ALLOCATE
3125
  fail_stack_type fail_stack;
3126
#endif
3127
#ifndef REGEX_MALLOC
3128
  char *destination;
3129
#endif
3130
 
3131
  register char *fastmap = bufp->fastmap;
3132
  unsigned char *pattern = bufp->buffer;
3133
  unsigned char *p = pattern;
3134
  register unsigned char *pend = pattern + bufp->used;
3135
 
3136
#ifdef REL_ALLOC
3137
  /* This holds the pointer to the failure stack, when
3138
     it is allocated relocatably.  */
3139
  fail_stack_elt_t *failure_stack_ptr;
3140
#endif
3141
 
3142
  /* Assume that each path through the pattern can be null until
3143
     proven otherwise.  We set this false at the bottom of switch
3144
     statement, to which we get only if a particular path doesn't
3145
     match the empty string.  */
3146
  boolean path_can_be_null = true;
3147
 
3148
  /* We aren't doing a `succeed_n' to begin with.  */
3149
  boolean succeed_n_p = false;
3150
 
3151
  assert (fastmap != NULL && p != NULL);
3152
 
3153
  INIT_FAIL_STACK ();
3154
  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3155
  bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
3156
  bufp->can_be_null = 0;
3157
 
3158
  while (1)
3159
    {
3160
      if (p == pend || *p == succeed)
3161
        {
3162
          /* We have reached the (effective) end of pattern.  */
3163
          if (!FAIL_STACK_EMPTY ())
3164
            {
3165
              bufp->can_be_null |= path_can_be_null;
3166
 
3167
              /* Reset for next path.  */
3168
              path_can_be_null = true;
3169
 
3170
              p = fail_stack.stack[--fail_stack.avail].pointer;
3171
 
3172
              continue;
3173
            }
3174
          else
3175
            break;
3176
        }
3177
 
3178
      /* We should never be about to go beyond the end of the pattern.  */
3179
      assert (p < pend);
3180
 
3181
      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3182
        {
3183
 
3184
        /* I guess the idea here is to simply not bother with a fastmap
3185
           if a backreference is used, since it's too hard to figure out
3186
           the fastmap for the corresponding group.  Setting
3187
           `can_be_null' stops `re_search_2' from using the fastmap, so
3188
           that is all we do.  */
3189
        case duplicate:
3190
          bufp->can_be_null = 1;
3191
          goto done;
3192
 
3193
 
3194
      /* Following are the cases which match a character.  These end
3195
         with `break'.  */
3196
 
3197
        case exactn:
3198
          fastmap[p[1]] = 1;
3199
          break;
3200
 
3201
 
3202
        case charset:
3203
          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3204
            if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3205
              fastmap[j] = 1;
3206
          break;
3207
 
3208
 
3209
        case charset_not:
3210
          /* Chars beyond end of map must be allowed.  */
3211
          for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3212
            fastmap[j] = 1;
3213
 
3214
          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3215
            if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3216
              fastmap[j] = 1;
3217
          break;
3218
 
3219
 
3220
        case wordchar:
3221
          for (j = 0; j < (1 << BYTEWIDTH); j++)
3222
            if (SYNTAX (j) == Sword)
3223
              fastmap[j] = 1;
3224
          break;
3225
 
3226
 
3227
        case notwordchar:
3228
          for (j = 0; j < (1 << BYTEWIDTH); j++)
3229
            if (SYNTAX (j) != Sword)
3230
              fastmap[j] = 1;
3231
          break;
3232
 
3233
 
3234
        case anychar:
3235
          {
3236
            int fastmap_newline = fastmap['\n'];
3237
 
3238
            /* `.' matches anything ...  */
3239
            for (j = 0; j < (1 << BYTEWIDTH); j++)
3240
              fastmap[j] = 1;
3241
 
3242
            /* ... except perhaps newline.  */
3243
            if (!(bufp->syntax & RE_DOT_NEWLINE))
3244
              fastmap['\n'] = fastmap_newline;
3245
 
3246
            /* Return if we have already set `can_be_null'; if we have,
3247
               then the fastmap is irrelevant.  Something's wrong here.  */
3248
            else if (bufp->can_be_null)
3249
              goto done;
3250
 
3251
            /* Otherwise, have to check alternative paths.  */
3252
            break;
3253
          }
3254
 
3255
#ifdef emacs
3256
        case syntaxspec:
3257
          k = *p++;
3258
          for (j = 0; j < (1 << BYTEWIDTH); j++)
3259
            if (SYNTAX (j) == (enum syntaxcode) k)
3260
              fastmap[j] = 1;
3261
          break;
3262
 
3263
 
3264
        case notsyntaxspec:
3265
          k = *p++;
3266
          for (j = 0; j < (1 << BYTEWIDTH); j++)
3267
            if (SYNTAX (j) != (enum syntaxcode) k)
3268
              fastmap[j] = 1;
3269
          break;
3270
 
3271
 
3272
      /* All cases after this match the empty string.  These end with
3273
         `continue'.  */
3274
 
3275
 
3276
        case before_dot:
3277
        case at_dot:
3278
        case after_dot:
3279
          continue;
3280
#endif /* emacs */
3281
 
3282
 
3283
        case no_op:
3284
        case begline:
3285
        case endline:
3286
        case begbuf:
3287
        case endbuf:
3288
        case wordbound:
3289
        case notwordbound:
3290
        case wordbeg:
3291
        case wordend:
3292
        case push_dummy_failure:
3293
          continue;
3294
 
3295
 
3296
        case jump_n:
3297
        case pop_failure_jump:
3298
        case maybe_pop_jump:
3299
        case jump:
3300
        case jump_past_alt:
3301
        case dummy_failure_jump:
3302
          EXTRACT_NUMBER_AND_INCR (j, p);
3303
          p += j;
3304
          if (j > 0)
3305
            continue;
3306
 
3307
          /* Jump backward implies we just went through the body of a
3308
             loop and matched nothing.  Opcode jumped to should be
3309
             `on_failure_jump' or `succeed_n'.  Just treat it like an
3310
             ordinary jump.  For a * loop, it has pushed its failure
3311
             point already; if so, discard that as redundant.  */
3312
          if ((re_opcode_t) *p != on_failure_jump
3313
              && (re_opcode_t) *p != succeed_n)
3314
            continue;
3315
 
3316
          p++;
3317
          EXTRACT_NUMBER_AND_INCR (j, p);
3318
          p += j;
3319
 
3320
          /* If what's on the stack is where we are now, pop it.  */
3321
          if (!FAIL_STACK_EMPTY ()
3322
              && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3323
            fail_stack.avail--;
3324
 
3325
          continue;
3326
 
3327
 
3328
        case on_failure_jump:
3329
        case on_failure_keep_string_jump:
3330
        handle_on_failure_jump:
3331
          EXTRACT_NUMBER_AND_INCR (j, p);
3332
 
3333
          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3334
             end of the pattern.  We don't want to push such a point,
3335
             since when we restore it above, entering the switch will
3336
             increment `p' past the end of the pattern.  We don't need
3337
             to push such a point since we obviously won't find any more
3338
             fastmap entries beyond `pend'.  Such a pattern can match
3339
             the null string, though.  */
3340
          if (p + j < pend)
3341
            {
3342
              if (!PUSH_PATTERN_OP (p + j, fail_stack))
3343
                {
3344
                  RESET_FAIL_STACK ();
3345
                  return -2;
3346
                }
3347
            }
3348
          else
3349
            bufp->can_be_null = 1;
3350
 
3351
          if (succeed_n_p)
3352
            {
3353
              EXTRACT_NUMBER_AND_INCR (k, p);   /* Skip the n.  */
3354
              succeed_n_p = false;
3355
            }
3356
 
3357
          continue;
3358
 
3359
 
3360
        case succeed_n:
3361
          /* Get to the number of times to succeed.  */
3362
          p += 2;
3363
 
3364
          /* Increment p past the n for when k != 0.  */
3365
          EXTRACT_NUMBER_AND_INCR (k, p);
3366
          if (k == 0)
3367
            {
3368
              p -= 4;
3369
              succeed_n_p = true;  /* Spaghetti code alert.  */
3370
              goto handle_on_failure_jump;
3371
            }
3372
          continue;
3373
 
3374
 
3375
        case set_number_at:
3376
          p += 4;
3377
          continue;
3378
 
3379
 
3380
        case start_memory:
3381
        case stop_memory:
3382
          p += 2;
3383
          continue;
3384
 
3385
 
3386
        default:
3387
          abort (); /* We have listed all the cases.  */
3388
        } /* switch *p++ */
3389
 
3390
      /* Getting here means we have found the possible starting
3391
         characters for one path of the pattern -- and that the empty
3392
         string does not match.  We need not follow this path further.
3393
         Instead, look at the next alternative (remembered on the
3394
         stack), or quit if no more.  The test at the top of the loop
3395
         does these things.  */
3396
      path_can_be_null = false;
3397
      p = pend;
3398
    } /* while p */
3399
 
3400
  /* Set `can_be_null' for the last path (also the first path, if the
3401
     pattern is empty).  */
3402
  bufp->can_be_null |= path_can_be_null;
3403
 
3404
 done:
3405
  RESET_FAIL_STACK ();
3406
  return 0;
3407
} /* re_compile_fastmap */
3408
#ifdef _LIBC
3409
weak_alias (__re_compile_fastmap, re_compile_fastmap)
3410
#endif
3411
 
3412
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3413
   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3414
   this memory for recording register information.  STARTS and ENDS
3415
   must be allocated using the malloc library routine, and must each
3416
   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3417
 
3418
   If NUM_REGS == 0, then subsequent matches should allocate their own
3419
   register data.
3420
 
3421
   Unless this function is called, the first search or match using
3422
   PATTERN_BUFFER will allocate its own register data, without
3423
   freeing the old data.  */
3424
 
3425
void
3426
re_set_registers (bufp, regs, num_regs, starts, ends)
3427
    struct re_pattern_buffer *bufp;
3428
    struct re_registers *regs;
3429
    unsigned num_regs;
3430
    regoff_t *starts, *ends;
3431
{
3432
  if (num_regs)
3433
    {
3434
      bufp->regs_allocated = REGS_REALLOCATE;
3435
      regs->num_regs = num_regs;
3436
      regs->start = starts;
3437
      regs->end = ends;
3438
    }
3439
  else
3440
    {
3441
      bufp->regs_allocated = REGS_UNALLOCATED;
3442
      regs->num_regs = 0;
3443
      regs->start = regs->end = (regoff_t *) 0;
3444
    }
3445
}
3446
#ifdef _LIBC
3447
weak_alias (__re_set_registers, re_set_registers)
3448
#endif
3449
 
3450
/* Searching routines.  */
3451
 
3452
/* Like re_search_2, below, but only one string is specified, and
3453
   doesn't let you say where to stop matching. */
3454
 
3455
int
3456
re_search (bufp, string, size, startpos, range, regs)
3457
     struct re_pattern_buffer *bufp;
3458
     const char *string;
3459
     int size, startpos, range;
3460
     struct re_registers *regs;
3461
{
3462
  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3463
                      regs, size);
3464
}
3465
#ifdef _LIBC
3466
weak_alias (__re_search, re_search)
3467
#endif
3468
 
3469
 
3470
/* Using the compiled pattern in BUFP->buffer, first tries to match the
3471
   virtual concatenation of STRING1 and STRING2, starting first at index
3472
   STARTPOS, then at STARTPOS + 1, and so on.
3473
 
3474
   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3475
 
3476
   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3477
   only at STARTPOS; in general, the last start tried is STARTPOS +
3478
   RANGE.
3479
 
3480
   In REGS, return the indices of the virtual concatenation of STRING1
3481
   and STRING2 that matched the entire BUFP->buffer and its contained
3482
   subexpressions.
3483
 
3484
   Do not consider matching one past the index STOP in the virtual
3485
   concatenation of STRING1 and STRING2.
3486
 
3487
   We return either the position in the strings at which the match was
3488
   found, -1 if no match, or -2 if error (such as failure
3489
   stack overflow).  */
3490
 
3491
int
3492
re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3493
     struct re_pattern_buffer *bufp;
3494
     const char *string1, *string2;
3495
     int size1, size2;
3496
     int startpos;
3497
     int range;
3498
     struct re_registers *regs;
3499
     int stop;
3500
{
3501
  int val;
3502
  register char *fastmap = bufp->fastmap;
3503
  register RE_TRANSLATE_TYPE translate = bufp->translate;
3504
  int total_size = size1 + size2;
3505
  int endpos = startpos + range;
3506
 
3507
  /* Check for out-of-range STARTPOS.  */
3508
  if (startpos < 0 || startpos > total_size)
3509
    return -1;
3510
 
3511
  /* Fix up RANGE if it might eventually take us outside
3512
     the virtual concatenation of STRING1 and STRING2.
3513
     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3514
  if (endpos < 0)
3515
    range = 0 - startpos;
3516
  else if (endpos > total_size)
3517
    range = total_size - startpos;
3518
 
3519
  /* If the search isn't to be a backwards one, don't waste time in a
3520
     search for a pattern that must be anchored.  */
3521
  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3522
    {
3523
      if (startpos > 0)
3524
        return -1;
3525
      else
3526
        range = 1;
3527
    }
3528
 
3529
#ifdef emacs
3530
  /* In a forward search for something that starts with \=.
3531
     don't keep searching past point.  */
3532
  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3533
    {
3534
      range = PT - startpos;
3535
      if (range <= 0)
3536
        return -1;
3537
    }
3538
#endif /* emacs */
3539
 
3540
  /* Update the fastmap now if not correct already.  */
3541
  if (fastmap && !bufp->fastmap_accurate)
3542
    if (re_compile_fastmap (bufp) == -2)
3543
      return -2;
3544
 
3545
  /* Loop through the string, looking for a place to start matching.  */
3546
  for (;;)
3547
    {
3548
      /* If a fastmap is supplied, skip quickly over characters that
3549
         cannot be the start of a match.  If the pattern can match the
3550
         null string, however, we don't need to skip characters; we want
3551
         the first null string.  */
3552
      if (fastmap && startpos < total_size && !bufp->can_be_null)
3553
        {
3554
          if (range > 0) /* Searching forwards.  */
3555
            {
3556
              register const char *d;
3557
              register int lim = 0;
3558
              int irange = range;
3559
 
3560
              if (startpos < size1 && startpos + range >= size1)
3561
                lim = range - (size1 - startpos);
3562
 
3563
              d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3564
 
3565
              /* Written out as an if-else to avoid testing `translate'
3566
                 inside the loop.  */
3567
              if (translate)
3568
                while (range > lim
3569
                       && !fastmap[(unsigned char)
3570
                                   translate[(unsigned char) *d++]])
3571
                  range--;
3572
              else
3573
                while (range > lim && !fastmap[(unsigned char) *d++])
3574
                  range--;
3575
 
3576
              startpos += irange - range;
3577
            }
3578
          else                          /* Searching backwards.  */
3579
            {
3580
              register char c = (size1 == 0 || startpos >= size1
3581
                                 ? string2[startpos - size1]
3582
                                 : string1[startpos]);
3583
 
3584
              if (!fastmap[(unsigned char) TRANSLATE (c)])
3585
                goto advance;
3586
            }
3587
        }
3588
 
3589
      /* If can't match the null string, and that's all we have left, fail.  */
3590
      if (range >= 0 && startpos == total_size && fastmap
3591
          && !bufp->can_be_null)
3592
        return -1;
3593
 
3594
      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3595
                                 startpos, regs, stop);
3596
#ifndef REGEX_MALLOC
3597
# ifdef C_ALLOCA
3598
      alloca (0);
3599
# endif
3600
#endif
3601
 
3602
      if (val >= 0)
3603
        return startpos;
3604
 
3605
      if (val == -2)
3606
        return -2;
3607
 
3608
    advance:
3609
      if (!range)
3610
        break;
3611
      else if (range > 0)
3612
        {
3613
          range--;
3614
          startpos++;
3615
        }
3616
      else
3617
        {
3618
          range++;
3619
          startpos--;
3620
        }
3621
    }
3622
  return -1;
3623
} /* re_search_2 */
3624
#ifdef _LIBC
3625
weak_alias (__re_search_2, re_search_2)
3626
#endif
3627
 
3628
/* This converts PTR, a pointer into one of the search strings `string1'
3629
   and `string2' into an offset from the beginning of that string.  */
3630
#define POINTER_TO_OFFSET(ptr)                  \
3631
  (FIRST_STRING_P (ptr)                         \
3632
   ? ((regoff_t) ((ptr) - string1))             \
3633
   : ((regoff_t) ((ptr) - string2 + size1)))
3634
 
3635
/* Macros for dealing with the split strings in re_match_2.  */
3636
 
3637
#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3638
 
3639
/* Call before fetching a character with *d.  This switches over to
3640
   string2 if necessary.  */
3641
#define PREFETCH()                                                      \
3642
  while (d == dend)                                                     \
3643
    {                                                                   \
3644
      /* End of string2 => fail.  */                                    \
3645
      if (dend == end_match_2)                                          \
3646
        goto fail;                                                      \
3647
      /* End of string1 => advance to string2.  */                      \
3648
      d = string2;                                                      \
3649
      dend = end_match_2;                                               \
3650
    }
3651
 
3652
 
3653
/* Test if at very beginning or at very end of the virtual concatenation
3654
   of `string1' and `string2'.  If only one string, it's `string2'.  */
3655
#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3656
#define AT_STRINGS_END(d) ((d) == end2)
3657
 
3658
 
3659
/* Test if D points to a character which is word-constituent.  We have
3660
   two special cases to check for: if past the end of string1, look at
3661
   the first character in string2; and if before the beginning of
3662
   string2, look at the last character in string1.  */
3663
#define WORDCHAR_P(d)                                                   \
3664
  (SYNTAX ((d) == end1 ? *string2                                       \
3665
           : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
3666
   == Sword)
3667
 
3668
/* Disabled due to a compiler bug -- see comment at case wordbound */
3669
#if 0
3670
/* Test if the character before D and the one at D differ with respect
3671
   to being word-constituent.  */
3672
#define AT_WORD_BOUNDARY(d)                                             \
3673
  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
3674
   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3675
#endif
3676
 
3677
/* Free everything we malloc.  */
3678
#ifdef MATCH_MAY_ALLOCATE
3679
# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3680
# define FREE_VARIABLES()                                               \
3681
  do {                                                                  \
3682
    REGEX_FREE_STACK (fail_stack.stack);                                \
3683
    FREE_VAR (regstart);                                                \
3684
    FREE_VAR (regend);                                                  \
3685
    FREE_VAR (old_regstart);                                            \
3686
    FREE_VAR (old_regend);                                              \
3687
    FREE_VAR (best_regstart);                                           \
3688
    FREE_VAR (best_regend);                                             \
3689
    FREE_VAR (reg_info);                                                \
3690
    FREE_VAR (reg_dummy);                                               \
3691
    FREE_VAR (reg_info_dummy);                                          \
3692
  } while (0)
3693
#else
3694
# define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
3695
#endif /* not MATCH_MAY_ALLOCATE */
3696
 
3697
/* These values must meet several constraints.  They must not be valid
3698
   register values; since we have a limit of 255 registers (because
3699
   we use only one byte in the pattern for the register number), we can
3700
   use numbers larger than 255.  They must differ by 1, because of
3701
   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3702
   be larger than the value for the highest register, so we do not try
3703
   to actually save any registers when none are active.  */
3704
#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3705
#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3706
 
3707
/* Matching routines.  */
3708
 
3709
#ifndef emacs   /* Emacs never uses this.  */
3710
/* re_match is like re_match_2 except it takes only a single string.  */
3711
 
3712
int
3713
re_match (bufp, string, size, pos, regs)
3714
     struct re_pattern_buffer *bufp;
3715
     const char *string;
3716
     int size, pos;
3717
     struct re_registers *regs;
3718
{
3719
  int result = re_match_2_internal (bufp, NULL, 0, string, size,
3720
                                    pos, regs, size);
3721
# ifndef REGEX_MALLOC
3722
#  ifdef C_ALLOCA
3723
  alloca (0);
3724
#  endif
3725
# endif
3726
  return result;
3727
}
3728
# ifdef _LIBC
3729
weak_alias (__re_match, re_match)
3730
# endif
3731
#endif /* not emacs */
3732
 
3733
static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3734
                                                    unsigned char *end,
3735
                                                register_info_type *reg_info));
3736
static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3737
                                                  unsigned char *end,
3738
                                                register_info_type *reg_info));
3739
static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3740
                                                        unsigned char *end,
3741
                                                register_info_type *reg_info));
3742
static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3743
                                     int len, char *translate));
3744
 
3745
/* re_match_2 matches the compiled pattern in BUFP against the
3746
   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3747
   and SIZE2, respectively).  We start matching at POS, and stop
3748
   matching at STOP.
3749
 
3750
   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3751
   store offsets for the substring each group matched in REGS.  See the
3752
   documentation for exactly how many groups we fill.
3753
 
3754
   We return -1 if no match, -2 if an internal error (such as the
3755
   failure stack overflowing).  Otherwise, we return the length of the
3756
   matched substring.  */
3757
 
3758
int
3759
re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3760
     struct re_pattern_buffer *bufp;
3761
     const char *string1, *string2;
3762
     int size1, size2;
3763
     int pos;
3764
     struct re_registers *regs;
3765
     int stop;
3766
{
3767
  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3768
                                    pos, regs, stop);
3769
#ifndef REGEX_MALLOC
3770
# ifdef C_ALLOCA
3771
  alloca (0);
3772
# endif
3773
#endif
3774
  return result;
3775
}
3776
#ifdef _LIBC
3777
weak_alias (__re_match_2, re_match_2)
3778
#endif
3779
 
3780
/* This is a separate function so that we can force an alloca cleanup
3781
   afterwards.  */
3782
static int
3783
re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3784
     struct re_pattern_buffer *bufp;
3785
     const char *string1, *string2;
3786
     int size1, size2;
3787
     int pos;
3788
     struct re_registers *regs;
3789
     int stop;
3790
{
3791
  /* General temporaries.  */
3792
  int mcnt;
3793
  unsigned char *p1;
3794
 
3795
  /* Just past the end of the corresponding string.  */
3796
  const char *end1, *end2;
3797
 
3798
  /* Pointers into string1 and string2, just past the last characters in
3799
     each to consider matching.  */
3800
  const char *end_match_1, *end_match_2;
3801
 
3802
  /* Where we are in the data, and the end of the current string.  */
3803
  const char *d, *dend;
3804
 
3805
  /* Where we are in the pattern, and the end of the pattern.  */
3806
  unsigned char *p = bufp->buffer;
3807
  register unsigned char *pend = p + bufp->used;
3808
 
3809
  /* Mark the opcode just after a start_memory, so we can test for an
3810
     empty subpattern when we get to the stop_memory.  */
3811
  unsigned char *just_past_start_mem = 0;
3812
 
3813
  /* We use this to map every character in the string.  */
3814
  RE_TRANSLATE_TYPE translate = bufp->translate;
3815
 
3816
  /* Failure point stack.  Each place that can handle a failure further
3817
     down the line pushes a failure point on this stack.  It consists of
3818
     restart, regend, and reg_info for all registers corresponding to
3819
     the subexpressions we're currently inside, plus the number of such
3820
     registers, and, finally, two char *'s.  The first char * is where
3821
     to resume scanning the pattern; the second one is where to resume
3822
     scanning the strings.  If the latter is zero, the failure point is
3823
     a ``dummy''; if a failure happens and the failure point is a dummy,
3824
     it gets discarded and the next next one is tried.  */
3825
#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3826
  fail_stack_type fail_stack;
3827
#endif
3828
#ifdef DEBUG
3829
  static unsigned failure_id = 0;
3830
  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3831
#endif
3832
 
3833
#ifdef REL_ALLOC
3834
  /* This holds the pointer to the failure stack, when
3835
     it is allocated relocatably.  */
3836
  fail_stack_elt_t *failure_stack_ptr;
3837
#endif
3838
 
3839
  /* We fill all the registers internally, independent of what we
3840
     return, for use in backreferences.  The number here includes
3841
     an element for register zero.  */
3842
  size_t num_regs = bufp->re_nsub + 1;
3843
 
3844
  /* The currently active registers.  */
3845
  active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3846
  active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3847
 
3848
  /* Information on the contents of registers. These are pointers into
3849
     the input strings; they record just what was matched (on this
3850
     attempt) by a subexpression part of the pattern, that is, the
3851
     regnum-th regstart pointer points to where in the pattern we began
3852
     matching and the regnum-th regend points to right after where we
3853
     stopped matching the regnum-th subexpression.  (The zeroth register
3854
     keeps track of what the whole pattern matches.)  */
3855
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3856
  const char **regstart, **regend;
3857
#endif
3858
 
3859
  /* If a group that's operated upon by a repetition operator fails to
3860
     match anything, then the register for its start will need to be
3861
     restored because it will have been set to wherever in the string we
3862
     are when we last see its open-group operator.  Similarly for a
3863
     register's end.  */
3864
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3865
  const char **old_regstart, **old_regend;
3866
#endif
3867
 
3868
  /* The is_active field of reg_info helps us keep track of which (possibly
3869
     nested) subexpressions we are currently in. The matched_something
3870
     field of reg_info[reg_num] helps us tell whether or not we have
3871
     matched any of the pattern so far this time through the reg_num-th
3872
     subexpression.  These two fields get reset each time through any
3873
     loop their register is in.  */
3874
#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3875
  register_info_type *reg_info;
3876
#endif
3877
 
3878
  /* The following record the register info as found in the above
3879
     variables when we find a match better than any we've seen before.
3880
     This happens as we backtrack through the failure points, which in
3881
     turn happens only if we have not yet matched the entire string. */
3882
  unsigned best_regs_set = false;
3883
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3884
  const char **best_regstart, **best_regend;
3885
#endif
3886
 
3887
  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3888
     allocate space for that if we're not allocating space for anything
3889
     else (see below).  Also, we never need info about register 0 for
3890
     any of the other register vectors, and it seems rather a kludge to
3891
     treat `best_regend' differently than the rest.  So we keep track of
3892
     the end of the best match so far in a separate variable.  We
3893
     initialize this to NULL so that when we backtrack the first time
3894
     and need to test it, it's not garbage.  */
3895
  const char *match_end = NULL;
3896
 
3897
  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3898
  int set_regs_matched_done = 0;
3899
 
3900
  /* Used when we pop values we don't care about.  */
3901
#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3902
  const char **reg_dummy;
3903
  register_info_type *reg_info_dummy;
3904
#endif
3905
 
3906
#ifdef DEBUG
3907
  /* Counts the total number of registers pushed.  */
3908
  unsigned num_regs_pushed = 0;
3909
#endif
3910
 
3911
  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3912
 
3913
  INIT_FAIL_STACK ();
3914
 
3915
#ifdef MATCH_MAY_ALLOCATE
3916
  /* Do not bother to initialize all the register variables if there are
3917
     no groups in the pattern, as it takes a fair amount of time.  If
3918
     there are groups, we include space for register 0 (the whole
3919
     pattern), even though we never use it, since it simplifies the
3920
     array indexing.  We should fix this.  */
3921
  if (bufp->re_nsub)
3922
    {
3923
      regstart = REGEX_TALLOC (num_regs, const char *);
3924
      regend = REGEX_TALLOC (num_regs, const char *);
3925
      old_regstart = REGEX_TALLOC (num_regs, const char *);
3926
      old_regend = REGEX_TALLOC (num_regs, const char *);
3927
      best_regstart = REGEX_TALLOC (num_regs, const char *);
3928
      best_regend = REGEX_TALLOC (num_regs, const char *);
3929
      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3930
      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3931
      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3932
 
3933
      if (!(regstart && regend && old_regstart && old_regend && reg_info
3934
            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3935
        {
3936
          FREE_VARIABLES ();
3937
          return -2;
3938
        }
3939
    }
3940
  else
3941
    {
3942
      /* We must initialize all our variables to NULL, so that
3943
         `FREE_VARIABLES' doesn't try to free them.  */
3944
      regstart = regend = old_regstart = old_regend = best_regstart
3945
        = best_regend = reg_dummy = NULL;
3946
      reg_info = reg_info_dummy = (register_info_type *) NULL;
3947
    }
3948
#endif /* MATCH_MAY_ALLOCATE */
3949
 
3950
  /* The starting position is bogus.  */
3951
  if (pos < 0 || pos > size1 + size2)
3952
    {
3953
      FREE_VARIABLES ();
3954
      return -1;
3955
    }
3956
 
3957
  /* Initialize subexpression text positions to -1 to mark ones that no
3958
     start_memory/stop_memory has been seen for. Also initialize the
3959
     register information struct.  */
3960
  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3961
    {
3962
      regstart[mcnt] = regend[mcnt]
3963
        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3964
 
3965
      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3966
      IS_ACTIVE (reg_info[mcnt]) = 0;
3967
      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3968
      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3969
    }
3970
 
3971
  /* We move `string1' into `string2' if the latter's empty -- but not if
3972
     `string1' is null.  */
3973
  if (size2 == 0 && string1 != NULL)
3974
    {
3975
      string2 = string1;
3976
      size2 = size1;
3977
      string1 = 0;
3978
      size1 = 0;
3979
    }
3980
  end1 = string1 + size1;
3981
  end2 = string2 + size2;
3982
 
3983
  /* Compute where to stop matching, within the two strings.  */
3984
  if (stop <= size1)
3985
    {
3986
      end_match_1 = string1 + stop;
3987
      end_match_2 = string2;
3988
    }
3989
  else
3990
    {
3991
      end_match_1 = end1;
3992
      end_match_2 = string2 + stop - size1;
3993
    }
3994
 
3995
  /* `p' scans through the pattern as `d' scans through the data.
3996
     `dend' is the end of the input string that `d' points within.  `d'
3997
     is advanced into the following input string whenever necessary, but
3998
     this happens before fetching; therefore, at the beginning of the
3999
     loop, `d' can be pointing at the end of a string, but it cannot
4000
     equal `string2'.  */
4001
  if (size1 > 0 && pos <= size1)
4002
    {
4003
      d = string1 + pos;
4004
      dend = end_match_1;
4005
    }
4006
  else
4007
    {
4008
      d = string2 + pos - size1;
4009
      dend = end_match_2;
4010
    }
4011
 
4012
  DEBUG_PRINT1 ("The compiled pattern is:\n");
4013
  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4014
  DEBUG_PRINT1 ("The string to match is: `");
4015
  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4016
  DEBUG_PRINT1 ("'\n");
4017
 
4018
  /* This loops over pattern commands.  It exits by returning from the
4019
     function if the match is complete, or it drops through if the match
4020
     fails at this starting point in the input data.  */
4021
  for (;;)
4022
    {
4023
#ifdef _LIBC
4024
      DEBUG_PRINT2 ("\n%p: ", p);
4025
#else
4026
      DEBUG_PRINT2 ("\n0x%x: ", p);
4027
#endif
4028
 
4029
      if (p == pend)
4030
        { /* End of pattern means we might have succeeded.  */
4031
          DEBUG_PRINT1 ("end of pattern ... ");
4032
 
4033
          /* If we haven't matched the entire string, and we want the
4034
             longest match, try backtracking.  */
4035
          if (d != end_match_2)
4036
            {
4037
              /* 1 if this match ends in the same string (string1 or string2)
4038
                 as the best previous match.  */
4039
              boolean same_str_p = (FIRST_STRING_P (match_end)
4040
                                    == MATCHING_IN_FIRST_STRING);
4041
              /* 1 if this match is the best seen so far.  */
4042
              boolean best_match_p;
4043
 
4044
              /* AIX compiler got confused when this was combined
4045
                 with the previous declaration.  */
4046
              if (same_str_p)
4047
                best_match_p = d > match_end;
4048
              else
4049
                best_match_p = !MATCHING_IN_FIRST_STRING;
4050
 
4051
              DEBUG_PRINT1 ("backtracking.\n");
4052
 
4053
              if (!FAIL_STACK_EMPTY ())
4054
                { /* More failure points to try.  */
4055
 
4056
                  /* If exceeds best match so far, save it.  */
4057
                  if (!best_regs_set || best_match_p)
4058
                    {
4059
                      best_regs_set = true;
4060
                      match_end = d;
4061
 
4062
                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4063
 
4064
                      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4065
                        {
4066
                          best_regstart[mcnt] = regstart[mcnt];
4067
                          best_regend[mcnt] = regend[mcnt];
4068
                        }
4069
                    }
4070
                  goto fail;
4071
                }
4072
 
4073
              /* If no failure points, don't restore garbage.  And if
4074
                 last match is real best match, don't restore second
4075
                 best one. */
4076
              else if (best_regs_set && !best_match_p)
4077
                {
4078
                restore_best_regs:
4079
                  /* Restore best match.  It may happen that `dend ==
4080
                     end_match_1' while the restored d is in string2.
4081
                     For example, the pattern `x.*y.*z' against the
4082
                     strings `x-' and `y-z-', if the two strings are
4083
                     not consecutive in memory.  */
4084
                  DEBUG_PRINT1 ("Restoring best registers.\n");
4085
 
4086
                  d = match_end;
4087
                  dend = ((d >= string1 && d <= end1)
4088
                           ? end_match_1 : end_match_2);
4089
 
4090
                  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4091
                    {
4092
                      regstart[mcnt] = best_regstart[mcnt];
4093
                      regend[mcnt] = best_regend[mcnt];
4094
                    }
4095
                }
4096
            } /* d != end_match_2 */
4097
 
4098
        succeed_label:
4099
          DEBUG_PRINT1 ("Accepting match.\n");
4100
 
4101
          /* If caller wants register contents data back, do it.  */
4102
          if (regs && !bufp->no_sub)
4103
            {
4104
              /* Have the register data arrays been allocated?  */
4105
              if (bufp->regs_allocated == REGS_UNALLOCATED)
4106
                { /* No.  So allocate them with malloc.  We need one
4107
                     extra element beyond `num_regs' for the `-1' marker
4108
                     GNU code uses.  */
4109
                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4110
                  regs->start = TALLOC (regs->num_regs, regoff_t);
4111
                  regs->end = TALLOC (regs->num_regs, regoff_t);
4112
                  if (regs->start == NULL || regs->end == NULL)
4113
                    {
4114
                      FREE_VARIABLES ();
4115
                      return -2;
4116
                    }
4117
                  bufp->regs_allocated = REGS_REALLOCATE;
4118
                }
4119
              else if (bufp->regs_allocated == REGS_REALLOCATE)
4120
                { /* Yes.  If we need more elements than were already
4121
                     allocated, reallocate them.  If we need fewer, just
4122
                     leave it alone.  */
4123
                  if (regs->num_regs < num_regs + 1)
4124
                    {
4125
                      regs->num_regs = num_regs + 1;
4126
                      RETALLOC (regs->start, regs->num_regs, regoff_t);
4127
                      RETALLOC (regs->end, regs->num_regs, regoff_t);
4128
                      if (regs->start == NULL || regs->end == NULL)
4129
                        {
4130
                          FREE_VARIABLES ();
4131
                          return -2;
4132
                        }
4133
                    }
4134
                }
4135
              else
4136
                {
4137
                  /* These braces fend off a "empty body in an else-statement"
4138
                     warning under GCC when assert expands to nothing.  */
4139
                  assert (bufp->regs_allocated == REGS_FIXED);
4140
                }
4141
 
4142
              /* Convert the pointer data in `regstart' and `regend' to
4143
                 indices.  Register zero has to be set differently,
4144
                 since we haven't kept track of any info for it.  */
4145
              if (regs->num_regs > 0)
4146
                {
4147
                  regs->start[0] = pos;
4148
                  regs->end[0] = (MATCHING_IN_FIRST_STRING
4149
                                  ? ((regoff_t) (d - string1))
4150
                                  : ((regoff_t) (d - string2 + size1)));
4151
                }
4152
 
4153
              /* Go through the first `min (num_regs, regs->num_regs)'
4154
                 registers, since that is all we initialized.  */
4155
              for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4156
                   mcnt++)
4157
                {
4158
                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4159
                    regs->start[mcnt] = regs->end[mcnt] = -1;
4160
                  else
4161
                    {
4162
                      regs->start[mcnt]
4163
                        = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4164
                      regs->end[mcnt]
4165
                        = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4166
                    }
4167
                }
4168
 
4169
              /* If the regs structure we return has more elements than
4170
                 were in the pattern, set the extra elements to -1.  If
4171
                 we (re)allocated the registers, this is the case,
4172
                 because we always allocate enough to have at least one
4173
                 -1 at the end.  */
4174
              for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4175
                regs->start[mcnt] = regs->end[mcnt] = -1;
4176
            } /* regs && !bufp->no_sub */
4177
 
4178
          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4179
                        nfailure_points_pushed, nfailure_points_popped,
4180
                        nfailure_points_pushed - nfailure_points_popped);
4181
          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4182
 
4183
          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4184
                            ? string1
4185
                            : string2 - size1);
4186
 
4187
          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4188
 
4189
          FREE_VARIABLES ();
4190
          return mcnt;
4191
        }
4192
 
4193
      /* Otherwise match next pattern command.  */
4194
      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4195
        {
4196
        /* Ignore these.  Used to ignore the n of succeed_n's which
4197
           currently have n == 0.  */
4198
        case no_op:
4199
          DEBUG_PRINT1 ("EXECUTING no_op.\n");
4200
          break;
4201
 
4202
        case succeed:
4203
          DEBUG_PRINT1 ("EXECUTING succeed.\n");
4204
          goto succeed_label;
4205
 
4206
        /* Match the next n pattern characters exactly.  The following
4207
           byte in the pattern defines n, and the n bytes after that
4208
           are the characters to match.  */
4209
        case exactn:
4210
          mcnt = *p++;
4211
          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4212
 
4213
          /* This is written out as an if-else so we don't waste time
4214
             testing `translate' inside the loop.  */
4215
          if (translate)
4216
            {
4217
              do
4218
                {
4219
                  PREFETCH ();
4220
                  if ((unsigned char) translate[(unsigned char) *d++]
4221
                      != (unsigned char) *p++)
4222
                    goto fail;
4223
                }
4224
              while (--mcnt);
4225
            }
4226
          else
4227
            {
4228
              do
4229
                {
4230
                  PREFETCH ();
4231
                  if (*d++ != (char) *p++) goto fail;
4232
                }
4233
              while (--mcnt);
4234
            }
4235
          SET_REGS_MATCHED ();
4236
          break;
4237
 
4238
 
4239
        /* Match any character except possibly a newline or a null.  */
4240
        case anychar:
4241
          DEBUG_PRINT1 ("EXECUTING anychar.\n");
4242
 
4243
          PREFETCH ();
4244
 
4245
          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4246
              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4247
            goto fail;
4248
 
4249
          SET_REGS_MATCHED ();
4250
          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4251
          d++;
4252
          break;
4253
 
4254
 
4255
        case charset:
4256
        case charset_not:
4257
          {
4258
            register unsigned char c;
4259
            boolean not = (re_opcode_t) *(p - 1) == charset_not;
4260
 
4261
            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4262
 
4263
            PREFETCH ();
4264
            c = TRANSLATE (*d); /* The character to match.  */
4265
 
4266
            /* Cast to `unsigned' instead of `unsigned char' in case the
4267
               bit list is a full 32 bytes long.  */
4268
            if (c < (unsigned) (*p * BYTEWIDTH)
4269
                && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4270
              not = !not;
4271
 
4272
            p += 1 + *p;
4273
 
4274
            if (!not) goto fail;
4275
 
4276
            SET_REGS_MATCHED ();
4277
            d++;
4278
            break;
4279
          }
4280
 
4281
 
4282
        /* The beginning of a group is represented by start_memory.
4283
           The arguments are the register number in the next byte, and the
4284
           number of groups inner to this one in the next.  The text
4285
           matched within the group is recorded (in the internal
4286
           registers data structure) under the register number.  */
4287
        case start_memory:
4288
          DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4289
 
4290
          /* Find out if this group can match the empty string.  */
4291
          p1 = p;               /* To send to group_match_null_string_p.  */
4292
 
4293
          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4294
            REG_MATCH_NULL_STRING_P (reg_info[*p])
4295
              = group_match_null_string_p (&p1, pend, reg_info);
4296
 
4297
          /* Save the position in the string where we were the last time
4298
             we were at this open-group operator in case the group is
4299
             operated upon by a repetition operator, e.g., with `(a*)*b'
4300
             against `ab'; then we want to ignore where we are now in
4301
             the string in case this attempt to match fails.  */
4302
          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4303
                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4304
                             : regstart[*p];
4305
          DEBUG_PRINT2 ("  old_regstart: %d\n",
4306
                         POINTER_TO_OFFSET (old_regstart[*p]));
4307
 
4308
          regstart[*p] = d;
4309
          DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4310
 
4311
          IS_ACTIVE (reg_info[*p]) = 1;
4312
          MATCHED_SOMETHING (reg_info[*p]) = 0;
4313
 
4314
          /* Clear this whenever we change the register activity status.  */
4315
          set_regs_matched_done = 0;
4316
 
4317
          /* This is the new highest active register.  */
4318
          highest_active_reg = *p;
4319
 
4320
          /* If nothing was active before, this is the new lowest active
4321
             register.  */
4322
          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4323
            lowest_active_reg = *p;
4324
 
4325
          /* Move past the register number and inner group count.  */
4326
          p += 2;
4327
          just_past_start_mem = p;
4328
 
4329
          break;
4330
 
4331
 
4332
        /* The stop_memory opcode represents the end of a group.  Its
4333
           arguments are the same as start_memory's: the register
4334
           number, and the number of inner groups.  */
4335
        case stop_memory:
4336
          DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4337
 
4338
          /* We need to save the string position the last time we were at
4339
             this close-group operator in case the group is operated
4340
             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4341
             against `aba'; then we want to ignore where we are now in
4342
             the string in case this attempt to match fails.  */
4343
          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4344
                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
4345
                           : regend[*p];
4346
          DEBUG_PRINT2 ("      old_regend: %d\n",
4347
                         POINTER_TO_OFFSET (old_regend[*p]));
4348
 
4349
          regend[*p] = d;
4350
          DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4351
 
4352
          /* This register isn't active anymore.  */
4353
          IS_ACTIVE (reg_info[*p]) = 0;
4354
 
4355
          /* Clear this whenever we change the register activity status.  */
4356
          set_regs_matched_done = 0;
4357
 
4358
          /* If this was the only register active, nothing is active
4359
             anymore.  */
4360
          if (lowest_active_reg == highest_active_reg)
4361
            {
4362
              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4363
              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4364
            }
4365
          else
4366
            { /* We must scan for the new highest active register, since
4367
                 it isn't necessarily one less than now: consider
4368
                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4369
                 new highest active register is 1.  */
4370
              unsigned char r = *p - 1;
4371
              while (r > 0 && !IS_ACTIVE (reg_info[r]))
4372
                r--;
4373
 
4374
              /* If we end up at register zero, that means that we saved
4375
                 the registers as the result of an `on_failure_jump', not
4376
                 a `start_memory', and we jumped to past the innermost
4377
                 `stop_memory'.  For example, in ((.)*) we save
4378
                 registers 1 and 2 as a result of the *, but when we pop
4379
                 back to the second ), we are at the stop_memory 1.
4380
                 Thus, nothing is active.  */
4381
              if (r == 0)
4382
                {
4383
                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4384
                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4385
                }
4386
              else
4387
                highest_active_reg = r;
4388
            }
4389
 
4390
          /* If just failed to match something this time around with a
4391
             group that's operated on by a repetition operator, try to
4392
             force exit from the ``loop'', and restore the register
4393
             information for this group that we had before trying this
4394
             last match.  */
4395
          if ((!MATCHED_SOMETHING (reg_info[*p])
4396
               || just_past_start_mem == p - 1)
4397
              && (p + 2) < pend)
4398
            {
4399
              boolean is_a_jump_n = false;
4400
 
4401
              p1 = p + 2;
4402
              mcnt = 0;
4403
              switch ((re_opcode_t) *p1++)
4404
                {
4405
                  case jump_n:
4406
                    is_a_jump_n = true;
4407
                  case pop_failure_jump:
4408
                  case maybe_pop_jump:
4409
                  case jump:
4410
                  case dummy_failure_jump:
4411
                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4412
                    if (is_a_jump_n)
4413
                      p1 += 2;
4414
                    break;
4415
 
4416
                  default:
4417
                    /* do nothing */ ;
4418
                }
4419
              p1 += mcnt;
4420
 
4421
              /* If the next operation is a jump backwards in the pattern
4422
                 to an on_failure_jump right before the start_memory
4423
                 corresponding to this stop_memory, exit from the loop
4424
                 by forcing a failure after pushing on the stack the
4425
                 on_failure_jump's jump in the pattern, and d.  */
4426
              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4427
                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4428
                {
4429
                  /* If this group ever matched anything, then restore
4430
                     what its registers were before trying this last
4431
                     failed match, e.g., with `(a*)*b' against `ab' for
4432
                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4433
                     against `aba' for regend[3].
4434
 
4435
                     Also restore the registers for inner groups for,
4436
                     e.g., `((a*)(b*))*' against `aba' (register 3 would
4437
                     otherwise get trashed).  */
4438
 
4439
                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4440
                    {
4441
                      unsigned r;
4442
 
4443
                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4444
 
4445
                      /* Restore this and inner groups' (if any) registers.  */
4446
                      for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4447
                           r++)
4448
                        {
4449
                          regstart[r] = old_regstart[r];
4450
 
4451
                          /* xx why this test?  */
4452
                          if (old_regend[r] >= regstart[r])
4453
                            regend[r] = old_regend[r];
4454
                        }
4455
                    }
4456
                  p1++;
4457
                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4458
                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4459
 
4460
                  goto fail;
4461
                }
4462
            }
4463
 
4464
          /* Move past the register number and the inner group count.  */
4465
          p += 2;
4466
          break;
4467
 
4468
 
4469
        /* \<digit> has been turned into a `duplicate' command which is
4470
           followed by the numeric value of <digit> as the register number.  */
4471
        case duplicate:
4472
          {
4473
            register const char *d2, *dend2;
4474
            int regno = *p++;   /* Get which register to match against.  */
4475
            DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4476
 
4477
            /* Can't back reference a group which we've never matched.  */
4478
            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4479
              goto fail;
4480
 
4481
            /* Where in input to try to start matching.  */
4482
            d2 = regstart[regno];
4483
 
4484
            /* Where to stop matching; if both the place to start and
4485
               the place to stop matching are in the same string, then
4486
               set to the place to stop, otherwise, for now have to use
4487
               the end of the first string.  */
4488
 
4489
            dend2 = ((FIRST_STRING_P (regstart[regno])
4490
                      == FIRST_STRING_P (regend[regno]))
4491
                     ? regend[regno] : end_match_1);
4492
            for (;;)
4493
              {
4494
                /* If necessary, advance to next segment in register
4495
                   contents.  */
4496
                while (d2 == dend2)
4497
                  {
4498
                    if (dend2 == end_match_2) break;
4499
                    if (dend2 == regend[regno]) break;
4500
 
4501
                    /* End of string1 => advance to string2. */
4502
                    d2 = string2;
4503
                    dend2 = regend[regno];
4504
                  }
4505
                /* At end of register contents => success */
4506
                if (d2 == dend2) break;
4507
 
4508
                /* If necessary, advance to next segment in data.  */
4509
                PREFETCH ();
4510
 
4511
                /* How many characters left in this segment to match.  */
4512
                mcnt = dend - d;
4513
 
4514
                /* Want how many consecutive characters we can match in
4515
                   one shot, so, if necessary, adjust the count.  */
4516
                if (mcnt > dend2 - d2)
4517
                  mcnt = dend2 - d2;
4518
 
4519
                /* Compare that many; failure if mismatch, else move
4520
                   past them.  */
4521
                if (translate
4522
                    ? bcmp_translate (d, d2, mcnt, translate)
4523
                    : memcmp (d, d2, mcnt))
4524
                  goto fail;
4525
                d += mcnt, d2 += mcnt;
4526
 
4527
                /* Do this because we've match some characters.  */
4528
                SET_REGS_MATCHED ();
4529
              }
4530
          }
4531
          break;
4532
 
4533
 
4534
        /* begline matches the empty string at the beginning of the string
4535
           (unless `not_bol' is set in `bufp'), and, if
4536
           `newline_anchor' is set, after newlines.  */
4537
        case begline:
4538
          DEBUG_PRINT1 ("EXECUTING begline.\n");
4539
 
4540
          if (AT_STRINGS_BEG (d))
4541
            {
4542
              if (!bufp->not_bol) break;
4543
            }
4544
          else if (d[-1] == '\n' && bufp->newline_anchor)
4545
            {
4546
              break;
4547
            }
4548
          /* In all other cases, we fail.  */
4549
          goto fail;
4550
 
4551
 
4552
        /* endline is the dual of begline.  */
4553
        case endline:
4554
          DEBUG_PRINT1 ("EXECUTING endline.\n");
4555
 
4556
          if (AT_STRINGS_END (d))
4557
            {
4558
              if (!bufp->not_eol) break;
4559
            }
4560
 
4561
          /* We have to ``prefetch'' the next character.  */
4562
          else if ((d == end1 ? *string2 : *d) == '\n'
4563
                   && bufp->newline_anchor)
4564
            {
4565
              break;
4566
            }
4567
          goto fail;
4568
 
4569
 
4570
        /* Match at the very beginning of the data.  */
4571
        case begbuf:
4572
          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4573
          if (AT_STRINGS_BEG (d))
4574
            break;
4575
          goto fail;
4576
 
4577
 
4578
        /* Match at the very end of the data.  */
4579
        case endbuf:
4580
          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4581
          if (AT_STRINGS_END (d))
4582
            break;
4583
          goto fail;
4584
 
4585
 
4586
        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4587
           pushes NULL as the value for the string on the stack.  Then
4588
           `pop_failure_point' will keep the current value for the
4589
           string, instead of restoring it.  To see why, consider
4590
           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4591
           then the . fails against the \n.  But the next thing we want
4592
           to do is match the \n against the \n; if we restored the
4593
           string value, we would be back at the foo.
4594
 
4595
           Because this is used only in specific cases, we don't need to
4596
           check all the things that `on_failure_jump' does, to make
4597
           sure the right things get saved on the stack.  Hence we don't
4598
           share its code.  The only reason to push anything on the
4599
           stack at all is that otherwise we would have to change
4600
           `anychar's code to do something besides goto fail in this
4601
           case; that seems worse than this.  */
4602
        case on_failure_keep_string_jump:
4603
          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4604
 
4605
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4606
#ifdef _LIBC
4607
          DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4608
#else
4609
          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4610
#endif
4611
 
4612
          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4613
          break;
4614
 
4615
 
4616
        /* Uses of on_failure_jump:
4617
 
4618
           Each alternative starts with an on_failure_jump that points
4619
           to the beginning of the next alternative.  Each alternative
4620
           except the last ends with a jump that in effect jumps past
4621
           the rest of the alternatives.  (They really jump to the
4622
           ending jump of the following alternative, because tensioning
4623
           these jumps is a hassle.)
4624
 
4625
           Repeats start with an on_failure_jump that points past both
4626
           the repetition text and either the following jump or
4627
           pop_failure_jump back to this on_failure_jump.  */
4628
        case on_failure_jump:
4629
        on_failure:
4630
          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4631
 
4632
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4633
#ifdef _LIBC
4634
          DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4635
#else
4636
          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4637
#endif
4638
 
4639
          /* If this on_failure_jump comes right before a group (i.e.,
4640
             the original * applied to a group), save the information
4641
             for that group and all inner ones, so that if we fail back
4642
             to this point, the group's information will be correct.
4643
             For example, in \(a*\)*\1, we need the preceding group,
4644
             and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4645
 
4646
          /* We can't use `p' to check ahead because we push
4647
             a failure point to `p + mcnt' after we do this.  */
4648
          p1 = p;
4649
 
4650
          /* We need to skip no_op's before we look for the
4651
             start_memory in case this on_failure_jump is happening as
4652
             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4653
             against aba.  */
4654
          while (p1 < pend && (re_opcode_t) *p1 == no_op)
4655
            p1++;
4656
 
4657
          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4658
            {
4659
              /* We have a new highest active register now.  This will
4660
                 get reset at the start_memory we are about to get to,
4661
                 but we will have saved all the registers relevant to
4662
                 this repetition op, as described above.  */
4663
              highest_active_reg = *(p1 + 1) + *(p1 + 2);
4664
              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4665
                lowest_active_reg = *(p1 + 1);
4666
            }
4667
 
4668
          DEBUG_PRINT1 (":\n");
4669
          PUSH_FAILURE_POINT (p + mcnt, d, -2);
4670
          break;
4671
 
4672
 
4673
        /* A smart repeat ends with `maybe_pop_jump'.
4674
           We change it to either `pop_failure_jump' or `jump'.  */
4675
        case maybe_pop_jump:
4676
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
4677
          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4678
          {
4679
            register unsigned char *p2 = p;
4680
 
4681
            /* Compare the beginning of the repeat with what in the
4682
               pattern follows its end. If we can establish that there
4683
               is nothing that they would both match, i.e., that we
4684
               would have to backtrack because of (as in, e.g., `a*a')
4685
               then we can change to pop_failure_jump, because we'll
4686
               never have to backtrack.
4687
 
4688
               This is not true in the case of alternatives: in
4689
               `(a|ab)*' we do need to backtrack to the `ab' alternative
4690
               (e.g., if the string was `ab').  But instead of trying to
4691
               detect that here, the alternative has put on a dummy
4692
               failure point which is what we will end up popping.  */
4693
 
4694
            /* Skip over open/close-group commands.
4695
               If what follows this loop is a ...+ construct,
4696
               look at what begins its body, since we will have to
4697
               match at least one of that.  */
4698
            while (1)
4699
              {
4700
                if (p2 + 2 < pend
4701
                    && ((re_opcode_t) *p2 == stop_memory
4702
                        || (re_opcode_t) *p2 == start_memory))
4703
                  p2 += 3;
4704
                else if (p2 + 6 < pend
4705
                         && (re_opcode_t) *p2 == dummy_failure_jump)
4706
                  p2 += 6;
4707
                else
4708
                  break;
4709
              }
4710
 
4711
            p1 = p + mcnt;
4712
            /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4713
               to the `maybe_finalize_jump' of this case.  Examine what
4714
               follows.  */
4715
 
4716
            /* If we're at the end of the pattern, we can change.  */
4717
            if (p2 == pend)
4718
              {
4719
                /* Consider what happens when matching ":\(.*\)"
4720
                   against ":/".  I don't really understand this code
4721
                   yet.  */
4722
                p[-3] = (unsigned char) pop_failure_jump;
4723
                DEBUG_PRINT1
4724
                  ("  End of pattern: change to `pop_failure_jump'.\n");
4725
              }
4726
 
4727
            else if ((re_opcode_t) *p2 == exactn
4728
                     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4729
              {
4730
                register unsigned char c
4731
                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4732
 
4733
                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4734
                  {
4735
                    p[-3] = (unsigned char) pop_failure_jump;
4736
                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4737
                                  c, p1[5]);
4738
                  }
4739
 
4740
                else if ((re_opcode_t) p1[3] == charset
4741
                         || (re_opcode_t) p1[3] == charset_not)
4742
                  {
4743
                    int not = (re_opcode_t) p1[3] == charset_not;
4744
 
4745
                    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4746
                        && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4747
                      not = !not;
4748
 
4749
                    /* `not' is equal to 1 if c would match, which means
4750
                        that we can't change to pop_failure_jump.  */
4751
                    if (!not)
4752
                      {
4753
                        p[-3] = (unsigned char) pop_failure_jump;
4754
                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4755
                      }
4756
                  }
4757
              }
4758
            else if ((re_opcode_t) *p2 == charset)
4759
              {
4760
#ifdef DEBUG
4761
                register unsigned char c
4762
                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4763
#endif
4764
 
4765
#if 0
4766
                if ((re_opcode_t) p1[3] == exactn
4767
                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4768
                          && (p2[2 + p1[5] / BYTEWIDTH]
4769
                              & (1 << (p1[5] % BYTEWIDTH)))))
4770
#else
4771
                if ((re_opcode_t) p1[3] == exactn
4772
                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4773
                          && (p2[2 + p1[4] / BYTEWIDTH]
4774
                              & (1 << (p1[4] % BYTEWIDTH)))))
4775
#endif
4776
                  {
4777
                    p[-3] = (unsigned char) pop_failure_jump;
4778
                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4779
                                  c, p1[5]);
4780
                  }
4781
 
4782
                else if ((re_opcode_t) p1[3] == charset_not)
4783
                  {
4784
                    int idx;
4785
                    /* We win if the charset_not inside the loop
4786
                       lists every character listed in the charset after.  */
4787
                    for (idx = 0; idx < (int) p2[1]; idx++)
4788
                      if (! (p2[2 + idx] == 0
4789
                             || (idx < (int) p1[4]
4790
                                 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4791
                        break;
4792
 
4793
                    if (idx == p2[1])
4794
                      {
4795
                        p[-3] = (unsigned char) pop_failure_jump;
4796
                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4797
                      }
4798
                  }
4799
                else if ((re_opcode_t) p1[3] == charset)
4800
                  {
4801
                    int idx;
4802
                    /* We win if the charset inside the loop
4803
                       has no overlap with the one after the loop.  */
4804
                    for (idx = 0;
4805
                         idx < (int) p2[1] && idx < (int) p1[4];
4806
                         idx++)
4807
                      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4808
                        break;
4809
 
4810
                    if (idx == p2[1] || idx == p1[4])
4811
                      {
4812
                        p[-3] = (unsigned char) pop_failure_jump;
4813
                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4814
                      }
4815
                  }
4816
              }
4817
          }
4818
          p -= 2;               /* Point at relative address again.  */
4819
          if ((re_opcode_t) p[-1] != pop_failure_jump)
4820
            {
4821
              p[-1] = (unsigned char) jump;
4822
              DEBUG_PRINT1 ("  Match => jump.\n");
4823
              goto unconditional_jump;
4824
            }
4825
        /* Note fall through.  */
4826
 
4827
 
4828
        /* The end of a simple repeat has a pop_failure_jump back to
4829
           its matching on_failure_jump, where the latter will push a
4830
           failure point.  The pop_failure_jump takes off failure
4831
           points put on by this pop_failure_jump's matching
4832
           on_failure_jump; we got through the pattern to here from the
4833
           matching on_failure_jump, so didn't fail.  */
4834
        case pop_failure_jump:
4835
          {
4836
            /* We need to pass separate storage for the lowest and
4837
               highest registers, even though we don't care about the
4838
               actual values.  Otherwise, we will restore only one
4839
               register from the stack, since lowest will == highest in
4840
               `pop_failure_point'.  */
4841
            active_reg_t dummy_low_reg, dummy_high_reg;
4842
            unsigned char *pdummy;
4843
            const char *sdummy;
4844
 
4845
            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4846
            POP_FAILURE_POINT (sdummy, pdummy,
4847
                               dummy_low_reg, dummy_high_reg,
4848
                               reg_dummy, reg_dummy, reg_info_dummy);
4849
          }
4850
          /* Note fall through.  */
4851
 
4852
        unconditional_jump:
4853
#ifdef _LIBC
4854
          DEBUG_PRINT2 ("\n%p: ", p);
4855
#else
4856
          DEBUG_PRINT2 ("\n0x%x: ", p);
4857
#endif
4858
          /* Note fall through.  */
4859
 
4860
        /* Unconditionally jump (without popping any failure points).  */
4861
        case jump:
4862
          EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
4863
          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4864
          p += mcnt;                            /* Do the jump.  */
4865
#ifdef _LIBC
4866
          DEBUG_PRINT2 ("(to %p).\n", p);
4867
#else
4868
          DEBUG_PRINT2 ("(to 0x%x).\n", p);
4869
#endif
4870
          break;
4871
 
4872
 
4873
        /* We need this opcode so we can detect where alternatives end
4874
           in `group_match_null_string_p' et al.  */
4875
        case jump_past_alt:
4876
          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4877
          goto unconditional_jump;
4878
 
4879
 
4880
        /* Normally, the on_failure_jump pushes a failure point, which
4881
           then gets popped at pop_failure_jump.  We will end up at
4882
           pop_failure_jump, also, and with a pattern of, say, `a+', we
4883
           are skipping over the on_failure_jump, so we have to push
4884
           something meaningless for pop_failure_jump to pop.  */
4885
        case dummy_failure_jump:
4886
          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4887
          /* It doesn't matter what we push for the string here.  What
4888
             the code at `fail' tests is the value for the pattern.  */
4889
          PUSH_FAILURE_POINT (NULL, NULL, -2);
4890
          goto unconditional_jump;
4891
 
4892
 
4893
        /* At the end of an alternative, we need to push a dummy failure
4894
           point in case we are followed by a `pop_failure_jump', because
4895
           we don't want the failure point for the alternative to be
4896
           popped.  For example, matching `(a|ab)*' against `aab'
4897
           requires that we match the `ab' alternative.  */
4898
        case push_dummy_failure:
4899
          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4900
          /* See comments just above at `dummy_failure_jump' about the
4901
             two zeroes.  */
4902
          PUSH_FAILURE_POINT (NULL, NULL, -2);
4903
          break;
4904
 
4905
        /* Have to succeed matching what follows at least n times.
4906
           After that, handle like `on_failure_jump'.  */
4907
        case succeed_n:
4908
          EXTRACT_NUMBER (mcnt, p + 2);
4909
          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4910
 
4911
          assert (mcnt >= 0);
4912
          /* Originally, this is how many times we HAVE to succeed.  */
4913
          if (mcnt > 0)
4914
            {
4915
               mcnt--;
4916
               p += 2;
4917
               STORE_NUMBER_AND_INCR (p, mcnt);
4918
#ifdef _LIBC
4919
               DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4920
#else
4921
               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4922
#endif
4923
            }
4924
          else if (mcnt == 0)
4925
            {
4926
#ifdef _LIBC
4927
              DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
4928
#else
4929
              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4930
#endif
4931
              p[2] = (unsigned char) no_op;
4932
              p[3] = (unsigned char) no_op;
4933
              goto on_failure;
4934
            }
4935
          break;
4936
 
4937
        case jump_n:
4938
          EXTRACT_NUMBER (mcnt, p + 2);
4939
          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4940
 
4941
          /* Originally, this is how many times we CAN jump.  */
4942
          if (mcnt)
4943
            {
4944
               mcnt--;
4945
               STORE_NUMBER (p + 2, mcnt);
4946
#ifdef _LIBC
4947
               DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
4948
#else
4949
               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
4950
#endif
4951
               goto unconditional_jump;
4952
            }
4953
          /* If don't have to jump any more, skip over the rest of command.  */
4954
          else
4955
            p += 4;
4956
          break;
4957
 
4958
        case set_number_at:
4959
          {
4960
            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4961
 
4962
            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4963
            p1 = p + mcnt;
4964
            EXTRACT_NUMBER_AND_INCR (mcnt, p);
4965
#ifdef _LIBC
4966
            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
4967
#else
4968
            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4969
#endif
4970
            STORE_NUMBER (p1, mcnt);
4971
            break;
4972
          }
4973
 
4974
#if 0
4975
        /* The DEC Alpha C compiler 3.x generates incorrect code for the
4976
           test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4977
           AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4978
           macro and introducing temporary variables works around the bug.  */
4979
 
4980
        case wordbound:
4981
          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4982
          if (AT_WORD_BOUNDARY (d))
4983
            break;
4984
          goto fail;
4985
 
4986
        case notwordbound:
4987
          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4988
          if (AT_WORD_BOUNDARY (d))
4989
            goto fail;
4990
          break;
4991
#else
4992
        case wordbound:
4993
        {
4994
          boolean prevchar, thischar;
4995
 
4996
          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4997
          if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4998
            break;
4999
 
5000
          prevchar = WORDCHAR_P (d - 1);
5001
          thischar = WORDCHAR_P (d);
5002
          if (prevchar != thischar)
5003
            break;
5004
          goto fail;
5005
        }
5006
 
5007
      case notwordbound:
5008
        {
5009
          boolean prevchar, thischar;
5010
 
5011
          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5012
          if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5013
            goto fail;
5014
 
5015
          prevchar = WORDCHAR_P (d - 1);
5016
          thischar = WORDCHAR_P (d);
5017
          if (prevchar != thischar)
5018
            goto fail;
5019
          break;
5020
        }
5021
#endif
5022
 
5023
        case wordbeg:
5024
          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5025
          if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5026
            break;
5027
          goto fail;
5028
 
5029
        case wordend:
5030
          DEBUG_PRINT1 ("EXECUTING wordend.\n");
5031
          if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5032
              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5033
            break;
5034
          goto fail;
5035
 
5036
#ifdef emacs
5037
        case before_dot:
5038
          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5039
          if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5040
            goto fail;
5041
          break;
5042
 
5043
        case at_dot:
5044
          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5045
          if (PTR_CHAR_POS ((unsigned char *) d) != point)
5046
            goto fail;
5047
          break;
5048
 
5049
        case after_dot:
5050
          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5051
          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5052
            goto fail;
5053
          break;
5054
 
5055
        case syntaxspec:
5056
          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5057
          mcnt = *p++;
5058
          goto matchsyntax;
5059
 
5060
        case wordchar:
5061
          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5062
          mcnt = (int) Sword;
5063
        matchsyntax:
5064
          PREFETCH ();
5065
          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5066
          d++;
5067
          if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5068
            goto fail;
5069
          SET_REGS_MATCHED ();
5070
          break;
5071
 
5072
        case notsyntaxspec:
5073
          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5074
          mcnt = *p++;
5075
          goto matchnotsyntax;
5076
 
5077
        case notwordchar:
5078
          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5079
          mcnt = (int) Sword;
5080
        matchnotsyntax:
5081
          PREFETCH ();
5082
          /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5083
          d++;
5084
          if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5085
            goto fail;
5086
          SET_REGS_MATCHED ();
5087
          break;
5088
 
5089
#else /* not emacs */
5090
        case wordchar:
5091
          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5092
          PREFETCH ();
5093
          if (!WORDCHAR_P (d))
5094
            goto fail;
5095
          SET_REGS_MATCHED ();
5096
          d++;
5097
          break;
5098
 
5099
        case notwordchar:
5100
          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5101
          PREFETCH ();
5102
          if (WORDCHAR_P (d))
5103
            goto fail;
5104
          SET_REGS_MATCHED ();
5105
          d++;
5106
          break;
5107
#endif /* not emacs */
5108
 
5109
        default:
5110
          abort ();
5111
        }
5112
      continue;  /* Successfully executed one pattern command; keep going.  */
5113
 
5114
 
5115
    /* We goto here if a matching operation fails. */
5116
    fail:
5117
      if (!FAIL_STACK_EMPTY ())
5118
        { /* A restart point is known.  Restore to that state.  */
5119
          DEBUG_PRINT1 ("\nFAIL:\n");
5120
          POP_FAILURE_POINT (d, p,
5121
                             lowest_active_reg, highest_active_reg,
5122
                             regstart, regend, reg_info);
5123
 
5124
          /* If this failure point is a dummy, try the next one.  */
5125
          if (!p)
5126
            goto fail;
5127
 
5128
          /* If we failed to the end of the pattern, don't examine *p.  */
5129
          assert (p <= pend);
5130
          if (p < pend)
5131
            {
5132
              boolean is_a_jump_n = false;
5133
 
5134
              /* If failed to a backwards jump that's part of a repetition
5135
                 loop, need to pop this failure point and use the next one.  */
5136
              switch ((re_opcode_t) *p)
5137
                {
5138
                case jump_n:
5139
                  is_a_jump_n = true;
5140
                case maybe_pop_jump:
5141
                case pop_failure_jump:
5142
                case jump:
5143
                  p1 = p + 1;
5144
                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5145
                  p1 += mcnt;
5146
 
5147
                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5148
                      || (!is_a_jump_n
5149
                          && (re_opcode_t) *p1 == on_failure_jump))
5150
                    goto fail;
5151
                  break;
5152
                default:
5153
                  /* do nothing */ ;
5154
                }
5155
            }
5156
 
5157
          if (d >= string1 && d <= end1)
5158
            dend = end_match_1;
5159
        }
5160
      else
5161
        break;   /* Matching at this starting point really fails.  */
5162
    } /* for (;;) */
5163
 
5164
  if (best_regs_set)
5165
    goto restore_best_regs;
5166
 
5167
  FREE_VARIABLES ();
5168
 
5169
  return -1;                            /* Failure to match.  */
5170
} /* re_match_2 */
5171
 
5172
/* Subroutine definitions for re_match_2.  */
5173
 
5174
 
5175
/* We are passed P pointing to a register number after a start_memory.
5176
 
5177
   Return true if the pattern up to the corresponding stop_memory can
5178
   match the empty string, and false otherwise.
5179
 
5180
   If we find the matching stop_memory, sets P to point to one past its number.
5181
   Otherwise, sets P to an undefined byte less than or equal to END.
5182
 
5183
   We don't handle duplicates properly (yet).  */
5184
 
5185
static boolean
5186
group_match_null_string_p (p, end, reg_info)
5187
    unsigned char **p, *end;
5188
    register_info_type *reg_info;
5189
{
5190
  int mcnt;
5191
  /* Point to after the args to the start_memory.  */
5192
  unsigned char *p1 = *p + 2;
5193
 
5194
  while (p1 < end)
5195
    {
5196
      /* Skip over opcodes that can match nothing, and return true or
5197
         false, as appropriate, when we get to one that can't, or to the
5198
         matching stop_memory.  */
5199
 
5200
      switch ((re_opcode_t) *p1)
5201
        {
5202
        /* Could be either a loop or a series of alternatives.  */
5203
        case on_failure_jump:
5204
          p1++;
5205
          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5206
 
5207
          /* If the next operation is not a jump backwards in the
5208
             pattern.  */
5209
 
5210
          if (mcnt >= 0)
5211
            {
5212
              /* Go through the on_failure_jumps of the alternatives,
5213
                 seeing if any of the alternatives cannot match nothing.
5214
                 The last alternative starts with only a jump,
5215
                 whereas the rest start with on_failure_jump and end
5216
                 with a jump, e.g., here is the pattern for `a|b|c':
5217
 
5218
                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5219
                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5220
                 /exactn/1/c
5221
 
5222
                 So, we have to first go through the first (n-1)
5223
                 alternatives and then deal with the last one separately.  */
5224
 
5225
 
5226
              /* Deal with the first (n-1) alternatives, which start
5227
                 with an on_failure_jump (see above) that jumps to right
5228
                 past a jump_past_alt.  */
5229
 
5230
              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5231
                {
5232
                  /* `mcnt' holds how many bytes long the alternative
5233
                     is, including the ending `jump_past_alt' and
5234
                     its number.  */
5235
 
5236
                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5237
                                                      reg_info))
5238
                    return false;
5239
 
5240
                  /* Move to right after this alternative, including the
5241
                     jump_past_alt.  */
5242
                  p1 += mcnt;
5243
 
5244
                  /* Break if it's the beginning of an n-th alternative
5245
                     that doesn't begin with an on_failure_jump.  */
5246
                  if ((re_opcode_t) *p1 != on_failure_jump)
5247
                    break;
5248
 
5249
                  /* Still have to check that it's not an n-th
5250
                     alternative that starts with an on_failure_jump.  */
5251
                  p1++;
5252
                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5253
                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5254
                    {
5255
                      /* Get to the beginning of the n-th alternative.  */
5256
                      p1 -= 3;
5257
                      break;
5258
                    }
5259
                }
5260
 
5261
              /* Deal with the last alternative: go back and get number
5262
                 of the `jump_past_alt' just before it.  `mcnt' contains
5263
                 the length of the alternative.  */
5264
              EXTRACT_NUMBER (mcnt, p1 - 2);
5265
 
5266
              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5267
                return false;
5268
 
5269
              p1 += mcnt;       /* Get past the n-th alternative.  */
5270
            } /* if mcnt > 0 */
5271
          break;
5272
 
5273
 
5274
        case stop_memory:
5275
          assert (p1[1] == **p);
5276
          *p = p1 + 2;
5277
          return true;
5278
 
5279
 
5280
        default:
5281
          if (!common_op_match_null_string_p (&p1, end, reg_info))
5282
            return false;
5283
        }
5284
    } /* while p1 < end */
5285
 
5286
  return false;
5287
} /* group_match_null_string_p */
5288
 
5289
 
5290
/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5291
   It expects P to be the first byte of a single alternative and END one
5292
   byte past the last. The alternative can contain groups.  */
5293
 
5294
static boolean
5295
alt_match_null_string_p (p, end, reg_info)
5296
    unsigned char *p, *end;
5297
    register_info_type *reg_info;
5298
{
5299
  int mcnt;
5300
  unsigned char *p1 = p;
5301
 
5302
  while (p1 < end)
5303
    {
5304
      /* Skip over opcodes that can match nothing, and break when we get
5305
         to one that can't.  */
5306
 
5307
      switch ((re_opcode_t) *p1)
5308
        {
5309
        /* It's a loop.  */
5310
        case on_failure_jump:
5311
          p1++;
5312
          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5313
          p1 += mcnt;
5314
          break;
5315
 
5316
        default:
5317
          if (!common_op_match_null_string_p (&p1, end, reg_info))
5318
            return false;
5319
        }
5320
    }  /* while p1 < end */
5321
 
5322
  return true;
5323
} /* alt_match_null_string_p */
5324
 
5325
 
5326
/* Deals with the ops common to group_match_null_string_p and
5327
   alt_match_null_string_p.
5328
 
5329
   Sets P to one after the op and its arguments, if any.  */
5330
 
5331
static boolean
5332
common_op_match_null_string_p (p, end, reg_info)
5333
    unsigned char **p, *end;
5334
    register_info_type *reg_info;
5335
{
5336
  int mcnt;
5337
  boolean ret;
5338
  int reg_no;
5339
  unsigned char *p1 = *p;
5340
 
5341
  switch ((re_opcode_t) *p1++)
5342
    {
5343
    case no_op:
5344
    case begline:
5345
    case endline:
5346
    case begbuf:
5347
    case endbuf:
5348
    case wordbeg:
5349
    case wordend:
5350
    case wordbound:
5351
    case notwordbound:
5352
#ifdef emacs
5353
    case before_dot:
5354
    case at_dot:
5355
    case after_dot:
5356
#endif
5357
      break;
5358
 
5359
    case start_memory:
5360
      reg_no = *p1;
5361
      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5362
      ret = group_match_null_string_p (&p1, end, reg_info);
5363
 
5364
      /* Have to set this here in case we're checking a group which
5365
         contains a group and a back reference to it.  */
5366
 
5367
      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5368
        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5369
 
5370
      if (!ret)
5371
        return false;
5372
      break;
5373
 
5374
    /* If this is an optimized succeed_n for zero times, make the jump.  */
5375
    case jump:
5376
      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5377
      if (mcnt >= 0)
5378
        p1 += mcnt;
5379
      else
5380
        return false;
5381
      break;
5382
 
5383
    case succeed_n:
5384
      /* Get to the number of times to succeed.  */
5385
      p1 += 2;
5386
      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5387
 
5388
      if (mcnt == 0)
5389
        {
5390
          p1 -= 4;
5391
          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5392
          p1 += mcnt;
5393
        }
5394
      else
5395
        return false;
5396
      break;
5397
 
5398
    case duplicate:
5399
      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5400
        return false;
5401
      break;
5402
 
5403
    case set_number_at:
5404
      p1 += 4;
5405
 
5406
    default:
5407
      /* All other opcodes mean we cannot match the empty string.  */
5408
      return false;
5409
  }
5410
 
5411
  *p = p1;
5412
  return true;
5413
} /* common_op_match_null_string_p */
5414
 
5415
 
5416
/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5417
   bytes; nonzero otherwise.  */
5418
 
5419
static int
5420
bcmp_translate (s1, s2, len, translate)
5421
     const char *s1, *s2;
5422
     register int len;
5423
     RE_TRANSLATE_TYPE translate;
5424
{
5425
  register const unsigned char *p1 = (const unsigned char *) s1;
5426
  register const unsigned char *p2 = (const unsigned char *) s2;
5427
  while (len)
5428
    {
5429
      if (translate[*p1++] != translate[*p2++]) return 1;
5430
      len--;
5431
    }
5432
  return 0;
5433
}
5434
 
5435
/* Entry points for GNU code.  */
5436
 
5437
/* re_compile_pattern is the GNU regular expression compiler: it
5438
   compiles PATTERN (of length SIZE) and puts the result in BUFP.
5439
   Returns 0 if the pattern was valid, otherwise an error string.
5440
 
5441
   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5442
   are set in BUFP on entry.
5443
 
5444
   We call regex_compile to do the actual compilation.  */
5445
 
5446
const char *
5447
re_compile_pattern (pattern, length, bufp)
5448
     const char *pattern;
5449
     size_t length;
5450
     struct re_pattern_buffer *bufp;
5451
{
5452
  reg_errcode_t ret;
5453
 
5454
  /* GNU code is written to assume at least RE_NREGS registers will be set
5455
     (and at least one extra will be -1).  */
5456
  bufp->regs_allocated = REGS_UNALLOCATED;
5457
 
5458
  /* And GNU code determines whether or not to get register information
5459
     by passing null for the REGS argument to re_match, etc., not by
5460
     setting no_sub.  */
5461
  bufp->no_sub = 0;
5462
 
5463
  /* Match anchors at newline.  */
5464
  bufp->newline_anchor = 1;
5465
 
5466
  ret = regex_compile (pattern, length, re_syntax_options, bufp);
5467
 
5468
  if (!ret)
5469
    return NULL;
5470
  return gettext (re_error_msgid[(int) ret]);
5471
}
5472
#ifdef _LIBC
5473
weak_alias (__re_compile_pattern, re_compile_pattern)
5474
#endif
5475
 
5476
/* Entry points compatible with 4.2 BSD regex library.  We don't define
5477
   them unless specifically requested.  */
5478
 
5479
#if defined _REGEX_RE_COMP || defined _LIBC
5480
 
5481
/* BSD has one and only one pattern buffer.  */
5482
static struct re_pattern_buffer re_comp_buf;
5483
 
5484
char *
5485
#ifdef _LIBC
5486
/* Make these definitions weak in libc, so POSIX programs can redefine
5487
   these names if they don't use our functions, and still use
5488
   regcomp/regexec below without link errors.  */
5489
weak_function
5490
#endif
5491
re_comp (s)
5492
    const char *s;
5493
{
5494
  reg_errcode_t ret;
5495
 
5496
  if (!s)
5497
    {
5498
      if (!re_comp_buf.buffer)
5499
        return gettext ("No previous regular expression");
5500
      return 0;
5501
    }
5502
 
5503
  if (!re_comp_buf.buffer)
5504
    {
5505
      re_comp_buf.buffer = (unsigned char *) malloc (200);
5506
      if (re_comp_buf.buffer == NULL)
5507
        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5508
      re_comp_buf.allocated = 200;
5509
 
5510
      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5511
      if (re_comp_buf.fastmap == NULL)
5512
        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5513
    }
5514
 
5515
  /* Since `re_exec' always passes NULL for the `regs' argument, we
5516
     don't need to initialize the pattern buffer fields which affect it.  */
5517
 
5518
  /* Match anchors at newlines.  */
5519
  re_comp_buf.newline_anchor = 1;
5520
 
5521
  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5522
 
5523
  if (!ret)
5524
    return NULL;
5525
 
5526
  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5527
  return (char *) gettext (re_error_msgid[(int) ret]);
5528
}
5529
 
5530
 
5531
int
5532
#ifdef _LIBC
5533
weak_function
5534
#endif
5535
re_exec (s)
5536
    const char *s;
5537
{
5538
  const int len = strlen (s);
5539
  return
5540
 
5541
}
5542
 
5543
#endif /* _REGEX_RE_COMP */
5544
 
5545
/* POSIX.2 functions.  Don't define these for Emacs.  */
5546
 
5547
#ifndef emacs
5548
 
5549
/* regcomp takes a regular expression as a string and compiles it.
5550
 
5551
   PREG is a regex_t *.  We do not expect any fields to be initialized,
5552
   since POSIX says we shouldn't.  Thus, we set
5553
 
5554
     `buffer' to the compiled pattern;
5555
     `used' to the length of the compiled pattern;
5556
     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5557
       REG_EXTENDED bit in CFLAGS is set; otherwise, to
5558
       RE_SYNTAX_POSIX_BASIC;
5559
     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5560
     `fastmap' and `fastmap_accurate' to zero;
5561
     `re_nsub' to the number of subexpressions in PATTERN.
5562
 
5563
   PATTERN is the address of the pattern string.
5564
 
5565
   CFLAGS is a series of bits which affect compilation.
5566
 
5567
     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5568
     use POSIX basic syntax.
5569
 
5570
     If REG_NEWLINE is set, then . and [^...] don't match newline.
5571
     Also, regexec will try a match beginning after every newline.
5572
 
5573
     If REG_ICASE is set, then we considers upper- and lowercase
5574
     versions of letters to be equivalent when matching.
5575
 
5576
     If REG_NOSUB is set, then when PREG is passed to regexec, that
5577
     routine will report only success or failure, and nothing about the
5578
     registers.
5579
 
5580
   It returns 0 if it succeeds, nonzero if it doesn't.  (See gnu-regex.h for
5581
   the return codes and their meanings.)  */
5582
 
5583
int
5584
regcomp (preg, pattern, cflags)
5585
    regex_t *preg;
5586
    const char *pattern;
5587
    int cflags;
5588
{
5589
  reg_errcode_t ret;
5590
  reg_syntax_t syntax
5591
    = (cflags & REG_EXTENDED) ?
5592
      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5593
 
5594
  /* regex_compile will allocate the space for the compiled pattern.  */
5595
  preg->buffer = 0;
5596
  preg->allocated = 0;
5597
  preg->used = 0;
5598
 
5599
  /* Don't bother to use a fastmap when searching.  This simplifies the
5600
     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5601
     characters after newlines into the fastmap.  This way, we just try
5602
     every character.  */
5603
  preg->fastmap = 0;
5604
 
5605
  if (cflags & REG_ICASE)
5606
    {
5607
      unsigned i;
5608
 
5609
      preg->translate
5610
        = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5611
                                      * sizeof (*(RE_TRANSLATE_TYPE)0));
5612
      if (preg->translate == NULL)
5613
        return (int) REG_ESPACE;
5614
 
5615
      /* Map uppercase characters to corresponding lowercase ones.  */
5616
      for (i = 0; i < CHAR_SET_SIZE; i++)
5617
        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5618
    }
5619
  else
5620
    preg->translate = NULL;
5621
 
5622
  /* If REG_NEWLINE is set, newlines are treated differently.  */
5623
  if (cflags & REG_NEWLINE)
5624
    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5625
      syntax &= ~RE_DOT_NEWLINE;
5626
      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5627
      /* It also changes the matching behavior.  */
5628
      preg->newline_anchor = 1;
5629
    }
5630
  else
5631
    preg->newline_anchor = 0;
5632
 
5633
  preg->no_sub = !!(cflags & REG_NOSUB);
5634
 
5635
  /* POSIX says a null character in the pattern terminates it, so we
5636
     can use strlen here in compiling the pattern.  */
5637
  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5638
 
5639
  /* POSIX doesn't distinguish between an unmatched open-group and an
5640
     unmatched close-group: both are REG_EPAREN.  */
5641
  if (ret == REG_ERPAREN) ret = REG_EPAREN;
5642
 
5643
  return (int) ret;
5644
}
5645
#ifdef _LIBC
5646
weak_alias (__regcomp, regcomp)
5647
#endif
5648
 
5649
 
5650
/* regexec searches for a given pattern, specified by PREG, in the
5651
   string STRING.
5652
 
5653
   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5654
   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5655
   least NMATCH elements, and we set them to the offsets of the
5656
   corresponding matched substrings.
5657
 
5658
   EFLAGS specifies `execution flags' which affect matching: if
5659
   REG_NOTBOL is set, then ^ does not match at the beginning of the
5660
   string; if REG_NOTEOL is set, then $ does not match at the end.
5661
 
5662
   We return 0 if we find a match and REG_NOMATCH if not.  */
5663
 
5664
int
5665
regexec (preg, string, nmatch, pmatch, eflags)
5666
    const regex_t *preg;
5667
    const char *string;
5668
    size_t nmatch;
5669
    regmatch_t pmatch[];
5670
    int eflags;
5671
{
5672
  int ret;
5673
  struct re_registers regs;
5674
  regex_t private_preg;
5675
  int len = strlen (string);
5676
  boolean want_reg_info = !preg->no_sub && nmatch > 0;
5677
 
5678
  private_preg = *preg;
5679
 
5680
  private_preg.not_bol = !!(eflags & REG_NOTBOL);
5681
  private_preg.not_eol = !!(eflags & REG_NOTEOL);
5682
 
5683
  /* The user has told us exactly how many registers to return
5684
     information about, via `nmatch'.  We have to pass that on to the
5685
     matching routines.  */
5686
  private_preg.regs_allocated = REGS_FIXED;
5687
 
5688
  if (want_reg_info)
5689
    {
5690
      regs.num_regs = nmatch;
5691
      regs.start = TALLOC (nmatch, regoff_t);
5692
      regs.end = TALLOC (nmatch, regoff_t);
5693
      if (regs.start == NULL || regs.end == NULL)
5694
        return (int) REG_NOMATCH;
5695
    }
5696
 
5697
  /* Perform the searching operation.  */
5698
  ret = re_search (&private_preg, string, len,
5699
                   /* start: */ 0, /* range: */ len,
5700
                   want_reg_info ? &regs : (struct re_registers *) 0);
5701
 
5702
  /* Copy the register information to the POSIX structure.  */
5703
  if (want_reg_info)
5704
    {
5705
      if (ret >= 0)
5706
        {
5707
          unsigned r;
5708
 
5709
          for (r = 0; r < nmatch; r++)
5710
            {
5711
              pmatch[r].rm_so = regs.start[r];
5712
              pmatch[r].rm_eo = regs.end[r];
5713
            }
5714
        }
5715
 
5716
      /* If we needed the temporary register info, free the space now.  */
5717
      free (regs.start);
5718
      free (regs.end);
5719
    }
5720
 
5721
  /* We want zero return to mean success, unlike `re_search'.  */
5722
  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5723
}
5724
#ifdef _LIBC
5725
weak_alias (__regexec, regexec)
5726
#endif
5727
 
5728
 
5729
/* Returns a message corresponding to an error code, ERRCODE, returned
5730
   from either regcomp or regexec.   We don't use PREG here.  */
5731
 
5732
size_t
5733
regerror (errcode, preg, errbuf, errbuf_size)
5734
    int errcode;
5735
    const regex_t *preg;
5736
    char *errbuf;
5737
    size_t errbuf_size;
5738
{
5739
  const char *msg;
5740
  size_t msg_size;
5741
 
5742
  if (errcode < 0
5743
      || errcode >= (int) (sizeof (re_error_msgid)
5744
                           / sizeof (re_error_msgid[0])))
5745
    /* Only error codes returned by the rest of the code should be passed
5746
       to this routine.  If we are given anything else, or if other regex
5747
       code generates an invalid error code, then the program has a bug.
5748
       Dump core so we can fix it.  */
5749
    abort ();
5750
 
5751
  msg = gettext (re_error_msgid[errcode]);
5752
 
5753
  msg_size = strlen (msg) + 1; /* Includes the null.  */
5754
 
5755
  if (errbuf_size != 0)
5756
    {
5757
      if (msg_size > errbuf_size)
5758
        {
5759
#if defined HAVE_MEMPCPY || defined _LIBC
5760
          *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
5761
#else
5762
          memcpy (errbuf, msg, errbuf_size - 1);
5763
          errbuf[errbuf_size - 1] = 0;
5764
#endif
5765
        }
5766
      else
5767
        memcpy (errbuf, msg, msg_size);
5768
    }
5769
 
5770
  return msg_size;
5771
}
5772
#ifdef _LIBC
5773
weak_alias (__regerror, regerror)
5774
#endif
5775
 
5776
 
5777
/* Free dynamically allocated space used by PREG.  */
5778
 
5779
void
5780
regfree (preg)
5781
    regex_t *preg;
5782
{
5783
  if (preg->buffer != NULL)
5784
    free (preg->buffer);
5785
  preg->buffer = NULL;
5786
 
5787
  preg->allocated = 0;
5788
  preg->used = 0;
5789
 
5790
  if (preg->fastmap != NULL)
5791
    free (preg->fastmap);
5792
  preg->fastmap = NULL;
5793
  preg->fastmap_accurate = 0;
5794
 
5795
  if (preg->translate != NULL)
5796
    free (preg->translate);
5797
  preg->translate = NULL;
5798
}
5799
#ifdef _LIBC
5800
weak_alias (__regfree, regfree)
5801
#endif
5802
 
5803
#endif /* not emacs  */

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.