OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.18.0/] [newlib/] [libc/] [posix/] [regcomp.c] - Blame information for rev 207

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 207 jeremybenn
/*-
2
 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
3
 * Copyright (c) 1992, 1993, 1994
4
 *      The Regents of the University of California.  All rights reserved.
5
 *
6
 * This code is derived from software contributed to Berkeley by
7
 * Henry Spencer.
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 * 4. Neither the name of the University nor the names of its contributors
18
 *    may be used to endorse or promote products derived from this software
19
 *    without specific prior written permission.
20
 *
21
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31
 * SUCH DAMAGE.
32
 *
33
 *      @(#)regcomp.c   8.5 (Berkeley) 3/20/94
34
 */
35
 
36
#ifndef _NO_REGEX
37
 
38
#if defined(LIBC_SCCS) && !defined(lint)
39
static char sccsid[] = "@(#)regcomp.c   8.5 (Berkeley) 3/20/94";
40
#endif /* LIBC_SCCS and not lint */
41
#include <sys/cdefs.h>
42
 
43
#include <sys/types.h>
44
#include <stdio.h>
45
#include <string.h>
46
#include <ctype.h>
47
#include <limits.h>
48
#include <stdlib.h>
49
#include <regex.h>
50
 
51
#include "collate.h"
52
 
53
#include "utils.h"
54
#include "regex2.h"
55
 
56
#include "cclass.h"
57
#include "cname.h"
58
 
59
/*
60
 * parse structure, passed up and down to avoid global variables and
61
 * other clumsinesses
62
 */
63
struct parse {
64
        char *next;             /* next character in RE */
65
        char *end;              /* end of string (-> NUL normally) */
66
        int error;              /* has an error been seen? */
67
        sop *strip;             /* malloced strip */
68
        sopno ssize;            /* malloced strip size (allocated) */
69
        sopno slen;             /* malloced strip length (used) */
70
        int ncsalloc;           /* number of csets allocated */
71
        struct re_guts *g;
72
#       define  NPAREN  10      /* we need to remember () 1-9 for back refs */
73
        sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
74
        sopno pend[NPAREN];     /* -> ) ([0] unused) */
75
};
76
 
77
/* ========= begin header generated by ./mkh ========= */
78
#ifdef __cplusplus
79
extern "C" {
80
#endif
81
 
82
/* === regcomp.c === */
83
static void p_ere(struct parse *p, int stop);
84
static void p_ere_exp(struct parse *p);
85
static void p_str(struct parse *p);
86
static void p_bre(struct parse *p, int end1, int end2);
87
static int p_simp_re(struct parse *p, int starordinary);
88
static int p_count(struct parse *p);
89
static void p_bracket(struct parse *p);
90
static void p_b_term(struct parse *p, cset *cs);
91
static void p_b_cclass(struct parse *p, cset *cs);
92
static void p_b_eclass(struct parse *p, cset *cs);
93
static char p_b_symbol(struct parse *p);
94
static char p_b_coll_elem(struct parse *p, int endc);
95
static char othercase(int ch);
96
static void bothcases(struct parse *p, int ch);
97
static void ordinary(struct parse *p, int ch);
98
static void nonnewline(struct parse *p);
99
static void repeat(struct parse *p, sopno start, int from, int to);
100
static int seterr(struct parse *p, int e);
101
static cset *allocset(struct parse *p);
102
static void freeset(struct parse *p, cset *cs);
103
static int freezeset(struct parse *p, cset *cs);
104
static int firstch(struct parse *p, cset *cs);
105
static int nch(struct parse *p, cset *cs);
106
static void mcadd(struct parse *p, cset *cs, char *cp);
107
#if used
108
static void mcsub(cset *cs, char *cp);
109
static int mcin(cset *cs, char *cp);
110
static char *mcfind(cset *cs, char *cp);
111
#endif
112
static void mcinvert(struct parse *p, cset *cs);
113
static void mccase(struct parse *p, cset *cs);
114
static int isinsets(struct re_guts *g, int c);
115
static int samesets(struct re_guts *g, int c1, int c2);
116
static void categorize(struct parse *p, struct re_guts *g);
117
static sopno dupl(struct parse *p, sopno start, sopno finish);
118
static void doemit(struct parse *p, sop op, size_t opnd);
119
static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
120
static void dofwd(struct parse *p, sopno pos, sop value);
121
static void enlarge(struct parse *p, sopno size);
122
static void stripsnug(struct parse *p, struct re_guts *g);
123
static void findmust(struct parse *p, struct re_guts *g);
124
static int altoffset(sop *scan, int offset, int mccs);
125
static void computejumps(struct parse *p, struct re_guts *g);
126
static void computematchjumps(struct parse *p, struct re_guts *g);
127
static sopno pluscount(struct parse *p, struct re_guts *g);
128
 
129
#ifdef __cplusplus
130
}
131
#endif
132
/* ========= end header generated by ./mkh ========= */
133
 
134
static char nuls[10];           /* place to point scanner in event of error */
135
 
136
/*
137
 * macros for use with parse structure
138
 * BEWARE:  these know that the parse structure is named `p' !!!
139
 */
140
#define PEEK()  (*p->next)
141
#define PEEK2() (*(p->next+1))
142
#define MORE()  (p->next < p->end)
143
#define MORE2() (p->next+1 < p->end)
144
#define SEE(c)  (MORE() && PEEK() == (c))
145
#define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
146
#define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
147
#define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
148
#define NEXT()  (p->next++)
149
#define NEXT2() (p->next += 2)
150
#define NEXTn(n)        (p->next += (n))
151
#define GETNEXT()       (*p->next++)
152
#define SETERROR(e)     seterr(p, (e))
153
#define REQUIRE(co, e)  ((co) || SETERROR(e))
154
#define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
155
#define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
156
#define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e))
157
#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
158
#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
159
#define AHEAD(pos)              dofwd(p, pos, HERE()-(pos))
160
#define ASTERN(sop, pos)        EMIT(sop, HERE()-pos)
161
#define HERE()          (p->slen)
162
#define THERE()         (p->slen - 1)
163
#define THERETHERE()    (p->slen - 2)
164
#define DROP(n) (p->slen -= (n))
165
 
166
#ifndef NDEBUG
167
static int never = 0;            /* for use in asserts; shuts lint up */
168
#else
169
#define never   0                /* some <assert.h>s have bugs too */
170
#endif
171
 
172
/* Macro used by computejump()/computematchjump() */
173
#define MIN(a,b)        ((a)<(b)?(a):(b))
174
 
175
/*
176
 - regcomp - interface for parser and compilation
177
 = extern int regcomp(regex_t *, const char *, int);
178
 = #define      REG_BASIC       0000
179
 = #define      REG_EXTENDED    0001
180
 = #define      REG_ICASE       0002
181
 = #define      REG_NOSUB       0004
182
 = #define      REG_NEWLINE     0010
183
 = #define      REG_NOSPEC      0020
184
 = #define      REG_PEND        0040
185
 = #define      REG_DUMP        0200
186
 */
187
int                             /* 0 success, otherwise REG_something */
188
regcomp(preg, pattern, cflags)
189
regex_t *preg;
190
const char *pattern;
191
int cflags;
192
{
193
        struct parse pa;
194
        struct re_guts *g;
195
        struct parse *p = &pa;
196
        int i;
197
        size_t len;
198
#ifdef REDEBUG
199
#       define  GOODFLAGS(f)    (f)
200
#else
201
#       define  GOODFLAGS(f)    ((f)&~REG_DUMP)
202
#endif
203
 
204
        cflags = GOODFLAGS(cflags);
205
        if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
206
                return(REG_INVARG);
207
 
208
        if (cflags&REG_PEND) {
209
                if (preg->re_endp < pattern)
210
                        return(REG_INVARG);
211
                len = preg->re_endp - pattern;
212
        } else
213
                len = strlen((char *)pattern);
214
 
215
        /* do the mallocs early so failure handling is easy */
216
        g = (struct re_guts *)malloc(sizeof(struct re_guts) +
217
                                                        (NC-1)*sizeof(cat_t));
218
        if (g == NULL)
219
                return(REG_ESPACE);
220
        p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
221
        p->strip = (sop *)malloc(p->ssize * sizeof(sop));
222
        p->slen = 0;
223
        if (p->strip == NULL) {
224
                free((char *)g);
225
                return(REG_ESPACE);
226
        }
227
 
228
        /* set things up */
229
        p->g = g;
230
        p->next = (char *)pattern;      /* convenience; we do not modify it */
231
        p->end = p->next + len;
232
        p->error = 0;
233
        p->ncsalloc = 0;
234
        for (i = 0; i < NPAREN; i++) {
235
                p->pbegin[i] = 0;
236
                p->pend[i] = 0;
237
        }
238
        g->csetsize = NC;
239
        g->sets = NULL;
240
        g->setbits = NULL;
241
        g->ncsets = 0;
242
        g->cflags = cflags;
243
        g->iflags = 0;
244
        g->nbol = 0;
245
        g->neol = 0;
246
        g->must = NULL;
247
        g->moffset = -1;
248
        g->charjump = NULL;
249
        g->matchjump = NULL;
250
        g->mlen = 0;
251
        g->nsub = 0;
252
        g->ncategories = 1;     /* category 0 is "everything else" */
253
        g->categories = &g->catspace[-(CHAR_MIN)];
254
        (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
255
        g->backrefs = 0;
256
 
257
        /* do it */
258
        EMIT(OEND, 0);
259
        g->firststate = THERE();
260
        if (cflags&REG_EXTENDED)
261
                p_ere(p, OUT);
262
        else if (cflags&REG_NOSPEC)
263
                p_str(p);
264
        else
265
                p_bre(p, OUT, OUT);
266
        EMIT(OEND, 0);
267
        g->laststate = THERE();
268
 
269
        /* tidy up loose ends and fill things in */
270
        categorize(p, g);
271
        stripsnug(p, g);
272
        findmust(p, g);
273
        /* only use Boyer-Moore algorithm if the pattern is bigger
274
         * than three characters
275
         */
276
        if(g->mlen > 3) {
277
                computejumps(p, g);
278
                computematchjumps(p, g);
279
                if(g->matchjump == NULL && g->charjump != NULL) {
280
                        free(g->charjump);
281
                        g->charjump = NULL;
282
                }
283
        }
284
        g->nplus = pluscount(p, g);
285
        g->magic = MAGIC2;
286
        preg->re_nsub = g->nsub;
287
        preg->re_g = g;
288
        preg->re_magic = MAGIC1;
289
#ifndef REDEBUG
290
        /* not debugging, so can't rely on the assert() in regexec() */
291
        if (g->iflags&BAD)
292
                SETERROR(REG_ASSERT);
293
#endif
294
 
295
        /* win or lose, we're done */
296
        if (p->error != 0)       /* lose */
297
                regfree(preg);
298
        return(p->error);
299
}
300
 
301
/*
302
 - p_ere - ERE parser top level, concatenation and alternation
303
 == static void p_ere(struct parse *p, int stop);
304
 */
305
static void
306
p_ere(p, stop)
307
struct parse *p;
308
int stop;                       /* character this ERE should end at */
309
{
310
        char c;
311
        sopno prevback;
312
        sopno prevfwd;
313
        sopno conc;
314
        int first = 1;          /* is this the first alternative? */
315
 
316
        for (;;) {
317
                /* do a bunch of concatenated expressions */
318
                conc = HERE();
319
                while (MORE() && (c = PEEK()) != '|' && c != stop)
320
                        p_ere_exp(p);
321
                (void)REQUIRE(HERE() != conc, REG_EMPTY);       /* require nonempty */
322
 
323
                if (!EAT('|'))
324
                        break;          /* NOTE BREAK OUT */
325
 
326
                if (first) {
327
                        INSERT(OCH_, conc);     /* offset is wrong */
328
                        prevfwd = conc;
329
                        prevback = conc;
330
                        first = 0;
331
                }
332
                ASTERN(OOR1, prevback);
333
                prevback = THERE();
334
                AHEAD(prevfwd);                 /* fix previous offset */
335
                prevfwd = HERE();
336
                EMIT(OOR2, 0);                   /* offset is very wrong */
337
        }
338
 
339
        if (!first) {           /* tail-end fixups */
340
                AHEAD(prevfwd);
341
                ASTERN(O_CH, prevback);
342
        }
343
 
344
        assert(!MORE() || SEE(stop));
345
}
346
 
347
/*
348
 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
349
 == static void p_ere_exp(struct parse *p);
350
 */
351
static void
352
p_ere_exp(p)
353
struct parse *p;
354
{
355
        char c;
356
        sopno pos;
357
        int count;
358
        int count2;
359
        sopno subno;
360
        int wascaret = 0;
361
 
362
        assert(MORE());         /* caller should have ensured this */
363
        c = GETNEXT();
364
 
365
        pos = HERE();
366
        switch (c) {
367
        case '(':
368
                (void)REQUIRE(MORE(), REG_EPAREN);
369
                p->g->nsub++;
370
                subno = p->g->nsub;
371
                if (subno < NPAREN)
372
                        p->pbegin[subno] = HERE();
373
                EMIT(OLPAREN, subno);
374
                if (!SEE(')'))
375
                        p_ere(p, ')');
376
                if (subno < NPAREN) {
377
                        p->pend[subno] = HERE();
378
                        assert(p->pend[subno] != 0);
379
                }
380
                EMIT(ORPAREN, subno);
381
                (void)MUSTEAT(')', REG_EPAREN);
382
                break;
383
#ifndef POSIX_MISTAKE
384
        case ')':               /* happens only if no current unmatched ( */
385
                /*
386
                 * You may ask, why the ifndef?  Because I didn't notice
387
                 * this until slightly too late for 1003.2, and none of the
388
                 * other 1003.2 regular-expression reviewers noticed it at
389
                 * all.  So an unmatched ) is legal POSIX, at least until
390
                 * we can get it fixed.
391
                 */
392
                SETERROR(REG_EPAREN);
393
                break;
394
#endif
395
        case '^':
396
                EMIT(OBOL, 0);
397
                p->g->iflags |= USEBOL;
398
                p->g->nbol++;
399
                wascaret = 1;
400
                break;
401
        case '$':
402
                EMIT(OEOL, 0);
403
                p->g->iflags |= USEEOL;
404
                p->g->neol++;
405
                break;
406
        case '|':
407
                SETERROR(REG_EMPTY);
408
                break;
409
        case '*':
410
        case '+':
411
        case '?':
412
                SETERROR(REG_BADRPT);
413
                break;
414
        case '.':
415
                if (p->g->cflags&REG_NEWLINE)
416
                        nonnewline(p);
417
                else
418
                        EMIT(OANY, 0);
419
                break;
420
        case '[':
421
                p_bracket(p);
422
                break;
423
        case '\\':
424
                (void)REQUIRE(MORE(), REG_EESCAPE);
425
                c = GETNEXT();
426
                ordinary(p, c);
427
                break;
428
        case '{':               /* okay as ordinary except if digit follows */
429
                (void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
430
                /* FALLTHROUGH */
431
        default:
432
                ordinary(p, c);
433
                break;
434
        }
435
 
436
        if (!MORE())
437
                return;
438
        c = PEEK();
439
        /* we call { a repetition if followed by a digit */
440
        if (!( c == '*' || c == '+' || c == '?' ||
441
                                (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
442
                return;         /* no repetition, we're done */
443
        NEXT();
444
 
445
        (void)REQUIRE(!wascaret, REG_BADRPT);
446
        switch (c) {
447
        case '*':       /* implemented as +? */
448
                /* this case does not require the (y|) trick, noKLUDGE */
449
                INSERT(OPLUS_, pos);
450
                ASTERN(O_PLUS, pos);
451
                INSERT(OQUEST_, pos);
452
                ASTERN(O_QUEST, pos);
453
                break;
454
        case '+':
455
                INSERT(OPLUS_, pos);
456
                ASTERN(O_PLUS, pos);
457
                break;
458
        case '?':
459
                /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
460
                INSERT(OCH_, pos);              /* offset slightly wrong */
461
                ASTERN(OOR1, pos);              /* this one's right */
462
                AHEAD(pos);                     /* fix the OCH_ */
463
                EMIT(OOR2, 0);                   /* offset very wrong... */
464
                AHEAD(THERE());                 /* ...so fix it */
465
                ASTERN(O_CH, THERETHERE());
466
                break;
467
        case '{':
468
                count = p_count(p);
469
                if (EAT(',')) {
470
                        if (isdigit((uch)PEEK())) {
471
                                count2 = p_count(p);
472
                                (void)REQUIRE(count <= count2, REG_BADBR);
473
                        } else          /* single number with comma */
474
                                count2 = INFINITY;
475
                } else          /* just a single number */
476
                        count2 = count;
477
                repeat(p, pos, count, count2);
478
                if (!EAT('}')) {        /* error heuristics */
479
                        while (MORE() && PEEK() != '}')
480
                                NEXT();
481
                        (void)REQUIRE(MORE(), REG_EBRACE);
482
                        SETERROR(REG_BADBR);
483
                }
484
                break;
485
        }
486
 
487
        if (!MORE())
488
                return;
489
        c = PEEK();
490
        if (!( c == '*' || c == '+' || c == '?' ||
491
                                (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
492
                return;
493
        SETERROR(REG_BADRPT);
494
}
495
 
496
/*
497
 - p_str - string (no metacharacters) "parser"
498
 == static void p_str(struct parse *p);
499
 */
500
static void
501
p_str(p)
502
struct parse *p;
503
{
504
        (void)REQUIRE(MORE(), REG_EMPTY);
505
        while (MORE())
506
                ordinary(p, GETNEXT());
507
}
508
 
509
/*
510
 - p_bre - BRE parser top level, anchoring and concatenation
511
 == static void p_bre(struct parse *p, int end1, \
512
 ==     int end2);
513
 * Giving end1 as OUT essentially eliminates the end1/end2 check.
514
 *
515
 * This implementation is a bit of a kludge, in that a trailing $ is first
516
 * taken as an ordinary character and then revised to be an anchor.  The
517
 * only undesirable side effect is that '$' gets included as a character
518
 * category in such cases.  This is fairly harmless; not worth fixing.
519
 * The amount of lookahead needed to avoid this kludge is excessive.
520
 */
521
static void
522
p_bre(p, end1, end2)
523
struct parse *p;
524
int end1;                       /* first terminating character */
525
int end2;                       /* second terminating character */
526
{
527
        sopno start = HERE();
528
        int first = 1;                  /* first subexpression? */
529
        int wasdollar = 0;
530
 
531
        if (EAT('^')) {
532
                EMIT(OBOL, 0);
533
                p->g->iflags |= USEBOL;
534
                p->g->nbol++;
535
        }
536
        while (MORE() && !SEETWO(end1, end2)) {
537
                wasdollar = p_simp_re(p, first);
538
                first = 0;
539
        }
540
        if (wasdollar) {        /* oops, that was a trailing anchor */
541
                DROP(1);
542
                EMIT(OEOL, 0);
543
                p->g->iflags |= USEEOL;
544
                p->g->neol++;
545
        }
546
 
547
        (void)REQUIRE(HERE() != start, REG_EMPTY);      /* require nonempty */
548
}
549
 
550
/*
551
 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
552
 == static int p_simp_re(struct parse *p, int starordinary);
553
 */
554
static int                      /* was the simple RE an unbackslashed $? */
555
p_simp_re(p, starordinary)
556
struct parse *p;
557
int starordinary;               /* is a leading * an ordinary character? */
558
{
559
        int c;
560
        int count;
561
        int count2;
562
        sopno pos;
563
        int i;
564
        sopno subno;
565
#       define  BACKSL  (1<<CHAR_BIT)
566
 
567
        pos = HERE();           /* repetion op, if any, covers from here */
568
 
569
        assert(MORE());         /* caller should have ensured this */
570
        c = GETNEXT();
571
        if (c == '\\') {
572
                (void)REQUIRE(MORE(), REG_EESCAPE);
573
                c = BACKSL | GETNEXT();
574
        }
575
        switch (c) {
576
        case '.':
577
                if (p->g->cflags&REG_NEWLINE)
578
                        nonnewline(p);
579
                else
580
                        EMIT(OANY, 0);
581
                break;
582
        case '[':
583
                p_bracket(p);
584
                break;
585
        case BACKSL|'{':
586
                SETERROR(REG_BADRPT);
587
                break;
588
        case BACKSL|'(':
589
                p->g->nsub++;
590
                subno = p->g->nsub;
591
                if (subno < NPAREN)
592
                        p->pbegin[subno] = HERE();
593
                EMIT(OLPAREN, subno);
594
                /* the MORE here is an error heuristic */
595
                if (MORE() && !SEETWO('\\', ')'))
596
                        p_bre(p, '\\', ')');
597
                if (subno < NPAREN) {
598
                        p->pend[subno] = HERE();
599
                        assert(p->pend[subno] != 0);
600
                }
601
                EMIT(ORPAREN, subno);
602
                (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
603
                break;
604
        case BACKSL|')':        /* should not get here -- must be user */
605
        case BACKSL|'}':
606
                SETERROR(REG_EPAREN);
607
                break;
608
        case BACKSL|'1':
609
        case BACKSL|'2':
610
        case BACKSL|'3':
611
        case BACKSL|'4':
612
        case BACKSL|'5':
613
        case BACKSL|'6':
614
        case BACKSL|'7':
615
        case BACKSL|'8':
616
        case BACKSL|'9':
617
                i = (c&~BACKSL) - '0';
618
                assert(i < NPAREN);
619
                if (p->pend[i] != 0) {
620
                        assert(i <= p->g->nsub);
621
                        EMIT(OBACK_, i);
622
                        assert(p->pbegin[i] != 0);
623
                        assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
624
                        assert(OP(p->strip[p->pend[i]]) == ORPAREN);
625
                        (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
626
                        EMIT(O_BACK, i);
627
                } else
628
                        SETERROR(REG_ESUBREG);
629
                p->g->backrefs = 1;
630
                break;
631
        case '*':
632
                (void)REQUIRE(starordinary, REG_BADRPT);
633
                /* FALLTHROUGH */
634
        default:
635
                ordinary(p, (char)c);
636
                break;
637
        }
638
 
639
        if (EAT('*')) {         /* implemented as +? */
640
                /* this case does not require the (y|) trick, noKLUDGE */
641
                INSERT(OPLUS_, pos);
642
                ASTERN(O_PLUS, pos);
643
                INSERT(OQUEST_, pos);
644
                ASTERN(O_QUEST, pos);
645
        } else if (EATTWO('\\', '{')) {
646
                count = p_count(p);
647
                if (EAT(',')) {
648
                        if (MORE() && isdigit((uch)PEEK())) {
649
                                count2 = p_count(p);
650
                                (void)REQUIRE(count <= count2, REG_BADBR);
651
                        } else          /* single number with comma */
652
                                count2 = INFINITY;
653
                } else          /* just a single number */
654
                        count2 = count;
655
                repeat(p, pos, count, count2);
656
                if (!EATTWO('\\', '}')) {       /* error heuristics */
657
                        while (MORE() && !SEETWO('\\', '}'))
658
                                NEXT();
659
                        (void)REQUIRE(MORE(), REG_EBRACE);
660
                        SETERROR(REG_BADBR);
661
                }
662
        } else if (c == '$')     /* $ (but not \$) ends it */
663
                return(1);
664
 
665
        return(0);
666
}
667
 
668
/*
669
 - p_count - parse a repetition count
670
 == static int p_count(struct parse *p);
671
 */
672
static int                      /* the value */
673
p_count(p)
674
struct parse *p;
675
{
676
        int count = 0;
677
        int ndigits = 0;
678
 
679
        while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
680
                count = count*10 + (GETNEXT() - '0');
681
                ndigits++;
682
        }
683
 
684
        (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
685
        return(count);
686
}
687
 
688
/*
689
 - p_bracket - parse a bracketed character list
690
 == static void p_bracket(struct parse *p);
691
 *
692
 * Note a significant property of this code:  if the allocset() did SETERROR,
693
 * no set operations are done.
694
 */
695
static void
696
p_bracket(p)
697
struct parse *p;
698
{
699
        cset *cs = allocset(p);
700
        int invert = 0;
701
 
702
        /* Dept of Truly Sickening Special-Case Kludges */
703
        if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
704
                EMIT(OBOW, 0);
705
                NEXTn(6);
706
                return;
707
        }
708
        if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
709
                EMIT(OEOW, 0);
710
                NEXTn(6);
711
                return;
712
        }
713
 
714
        if (EAT('^'))
715
                invert++;       /* make note to invert set at end */
716
        if (EAT(']'))
717
                CHadd(cs, ']');
718
        else if (EAT('-'))
719
                CHadd(cs, '-');
720
        while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
721
                p_b_term(p, cs);
722
        if (EAT('-'))
723
                CHadd(cs, '-');
724
        (void)MUSTEAT(']', REG_EBRACK);
725
 
726
        if (p->error != 0)       /* don't mess things up further */
727
                return;
728
 
729
        if (p->g->cflags&REG_ICASE) {
730
                int i;
731
                int ci;
732
 
733
                for (i = p->g->csetsize - 1; i >= 0; i--)
734
                        if (CHIN(cs, i) && isalpha(i)) {
735
                                ci = othercase(i);
736
                                if (ci != i)
737
                                        CHadd(cs, ci);
738
                        }
739
                if (cs->multis != NULL)
740
                        mccase(p, cs);
741
        }
742
        if (invert) {
743
                int i;
744
 
745
                for (i = p->g->csetsize - 1; i >= 0; i--)
746
                        if (CHIN(cs, i))
747
                                CHsub(cs, i);
748
                        else
749
                                CHadd(cs, i);
750
                if (p->g->cflags&REG_NEWLINE)
751
                        CHsub(cs, '\n');
752
                if (cs->multis != NULL)
753
                        mcinvert(p, cs);
754
        }
755
 
756
        assert(cs->multis == NULL);             /* xxx */
757
 
758
        if (nch(p, cs) == 1) {          /* optimize singleton sets */
759
                ordinary(p, firstch(p, cs));
760
                freeset(p, cs);
761
        } else
762
                EMIT(OANYOF, freezeset(p, cs));
763
}
764
 
765
/*
766
 - p_b_term - parse one term of a bracketed character list
767
 == static void p_b_term(struct parse *p, cset *cs);
768
 */
769
static void
770
p_b_term(p, cs)
771
struct parse *p;
772
cset *cs;
773
{
774
        char c;
775
        char start, finish;
776
        int i;
777
 
778
        /* classify what we've got */
779
        switch ((MORE()) ? PEEK() : '\0') {
780
        case '[':
781
                c = (MORE2()) ? PEEK2() : '\0';
782
                break;
783
        case '-':
784
                SETERROR(REG_ERANGE);
785
                return;                 /* NOTE RETURN */
786
                break;
787
        default:
788
                c = '\0';
789
                break;
790
        }
791
 
792
        switch (c) {
793
        case ':':               /* character class */
794
                NEXT2();
795
                (void)REQUIRE(MORE(), REG_EBRACK);
796
                c = PEEK();
797
                (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
798
                p_b_cclass(p, cs);
799
                (void)REQUIRE(MORE(), REG_EBRACK);
800
                (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
801
                break;
802
        case '=':               /* equivalence class */
803
                NEXT2();
804
                (void)REQUIRE(MORE(), REG_EBRACK);
805
                c = PEEK();
806
                (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
807
                p_b_eclass(p, cs);
808
                (void)REQUIRE(MORE(), REG_EBRACK);
809
                (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
810
                break;
811
        default:                /* symbol, ordinary character, or range */
812
/* xxx revision needed for multichar stuff */
813
                start = p_b_symbol(p);
814
                if (SEE('-') && MORE2() && PEEK2() != ']') {
815
                        /* range */
816
                        NEXT();
817
                        if (EAT('-'))
818
                                finish = '-';
819
                        else
820
                                finish = p_b_symbol(p);
821
                } else
822
                        finish = start;
823
                if (start == finish)
824
                        CHadd(cs, start);
825
                else {
826
                        if (__collate_load_error) {
827
                                (void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
828
                                for (i = (uch)start; i <= (uch)finish; i++)
829
                                        CHadd(cs, i);
830
                        } else {
831
                                (void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
832
                                for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
833
                                        if (   __collate_range_cmp(start, i) <= 0
834
                                            && __collate_range_cmp(i, finish) <= 0
835
                                           )
836
                                                CHadd(cs, i);
837
                                }
838
                        }
839
                }
840
                break;
841
        }
842
}
843
 
844
/*
845
 - p_b_cclass - parse a character-class name and deal with it
846
 == static void p_b_cclass(struct parse *p, cset *cs);
847
 */
848
static void
849
p_b_cclass(p, cs)
850
struct parse *p;
851
cset *cs;
852
{
853
        int c;
854
        char *sp = p->next;
855
        struct cclass *cp;
856
        size_t len;
857
 
858
        while (MORE() && isalpha((uch)PEEK()))
859
                NEXT();
860
        len = p->next - sp;
861
        for (cp = cclasses; cp->name != NULL; cp++)
862
                if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
863
                        break;
864
        if (cp->name == NULL) {
865
                /* oops, didn't find it */
866
                SETERROR(REG_ECTYPE);
867
                return;
868
        }
869
 
870
        switch (cp->fidx) {
871
        case CALNUM:
872
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
873
                        if (isalnum((uch)c))
874
                                CHadd(cs, c);
875
                break;
876
        case CALPHA:
877
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
878
                        if (isalpha((uch)c))
879
                                CHadd(cs, c);
880
                break;
881
        case CBLANK:
882
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
883
                        if (isblank((uch)c))
884
                                CHadd(cs, c);
885
                break;
886
        case CCNTRL:
887
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
888
                        if (iscntrl((uch)c))
889
                                CHadd(cs, c);
890
                break;
891
        case CDIGIT:
892
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
893
                        if (isdigit((uch)c))
894
                                CHadd(cs, c);
895
                break;
896
        case CGRAPH:
897
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
898
                        if (isgraph((uch)c))
899
                                CHadd(cs, c);
900
                break;
901
        case CLOWER:
902
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
903
                        if (islower((uch)c))
904
                                CHadd(cs, c);
905
                break;
906
        case CPRINT:
907
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
908
                        if (isprint((uch)c))
909
                                CHadd(cs, c);
910
                break;
911
        case CPUNCT:
912
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
913
                        if (ispunct((uch)c))
914
                                CHadd(cs, c);
915
                break;
916
        case CSPACE:
917
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
918
                        if (isspace((uch)c))
919
                                CHadd(cs, c);
920
                break;
921
        case CUPPER:
922
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
923
                        if (isupper((uch)c))
924
                                CHadd(cs, c);
925
                break;
926
        case CXDIGIT:
927
                for (c = CHAR_MIN; c <= CHAR_MAX; c++)
928
                        if (isxdigit((uch)c))
929
                                CHadd(cs, c);
930
                break;
931
        }
932
#if 0
933
        for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
934
                MCadd(p, cs, u);
935
#endif
936
}
937
 
938
/*
939
 - p_b_eclass - parse an equivalence-class name and deal with it
940
 == static void p_b_eclass(struct parse *p, cset *cs);
941
 *
942
 * This implementation is incomplete. xxx
943
 */
944
static void
945
p_b_eclass(p, cs)
946
struct parse *p;
947
cset *cs;
948
{
949
        char c;
950
 
951
        c = p_b_coll_elem(p, '=');
952
        CHadd(cs, c);
953
}
954
 
955
/*
956
 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
957
 == static char p_b_symbol(struct parse *p);
958
 */
959
static char                     /* value of symbol */
960
p_b_symbol(p)
961
struct parse *p;
962
{
963
        char value;
964
 
965
        (void)REQUIRE(MORE(), REG_EBRACK);
966
        if (!EATTWO('[', '.'))
967
                return(GETNEXT());
968
 
969
        /* collating symbol */
970
        value = p_b_coll_elem(p, '.');
971
        (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
972
        return(value);
973
}
974
 
975
/*
976
 - p_b_coll_elem - parse a collating-element name and look it up
977
 == static char p_b_coll_elem(struct parse *p, int endc);
978
 */
979
static char                     /* value of collating element */
980
p_b_coll_elem(p, endc)
981
struct parse *p;
982
int endc;                       /* name ended by endc,']' */
983
{
984
        char *sp = p->next;
985
        struct cname *cp;
986
        int len;
987
 
988
        while (MORE() && !SEETWO(endc, ']'))
989
                NEXT();
990
        if (!MORE()) {
991
                SETERROR(REG_EBRACK);
992
                return(0);
993
        }
994
        len = p->next - sp;
995
        for (cp = cnames; cp->name != NULL; cp++)
996
                if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
997
                        return(cp->code);       /* known name */
998
        if (len == 1)
999
                return(*sp);    /* single character */
1000
        SETERROR(REG_ECOLLATE);                 /* neither */
1001
        return(0);
1002
}
1003
 
1004
/*
1005
 - othercase - return the case counterpart of an alphabetic
1006
 == static char othercase(int ch);
1007
 */
1008
static char                     /* if no counterpart, return ch */
1009
othercase(ch)
1010
int ch;
1011
{
1012
        ch = (uch)ch;
1013
        assert(isalpha(ch));
1014
        if (isupper(ch))
1015
                return(tolower(ch));
1016
        else if (islower(ch))
1017
                return(toupper(ch));
1018
        else                    /* peculiar, but could happen */
1019
                return(ch);
1020
}
1021
 
1022
/*
1023
 - bothcases - emit a dualcase version of a two-case character
1024
 == static void bothcases(struct parse *p, int ch);
1025
 *
1026
 * Boy, is this implementation ever a kludge...
1027
 */
1028
static void
1029
bothcases(p, ch)
1030
struct parse *p;
1031
int ch;
1032
{
1033
        char *oldnext = p->next;
1034
        char *oldend = p->end;
1035
        char bracket[3];
1036
 
1037
        ch = (uch)ch;
1038
        assert(othercase(ch) != ch);    /* p_bracket() would recurse */
1039
        p->next = bracket;
1040
        p->end = bracket+2;
1041
        bracket[0] = ch;
1042
        bracket[1] = ']';
1043
        bracket[2] = '\0';
1044
        p_bracket(p);
1045
        assert(p->next == bracket+2);
1046
        p->next = oldnext;
1047
        p->end = oldend;
1048
}
1049
 
1050
/*
1051
 - ordinary - emit an ordinary character
1052
 == static void ordinary(struct parse *p, int ch);
1053
 */
1054
static void
1055
ordinary(p, ch)
1056
struct parse *p;
1057
int ch;
1058
{
1059
        cat_t *cap = p->g->categories;
1060
 
1061
        if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
1062
                bothcases(p, ch);
1063
        else {
1064
                EMIT(OCHAR, (uch)ch);
1065
                if (cap[ch] == 0)
1066
                        cap[ch] = p->g->ncategories++;
1067
        }
1068
}
1069
 
1070
/*
1071
 - nonnewline - emit REG_NEWLINE version of OANY
1072
 == static void nonnewline(struct parse *p);
1073
 *
1074
 * Boy, is this implementation ever a kludge...
1075
 */
1076
static void
1077
nonnewline(p)
1078
struct parse *p;
1079
{
1080
        char *oldnext = p->next;
1081
        char *oldend = p->end;
1082
        char bracket[4];
1083
 
1084
        p->next = bracket;
1085
        p->end = bracket+3;
1086
        bracket[0] = '^';
1087
        bracket[1] = '\n';
1088
        bracket[2] = ']';
1089
        bracket[3] = '\0';
1090
        p_bracket(p);
1091
        assert(p->next == bracket+3);
1092
        p->next = oldnext;
1093
        p->end = oldend;
1094
}
1095
 
1096
/*
1097
 - repeat - generate code for a bounded repetition, recursively if needed
1098
 == static void repeat(struct parse *p, sopno start, int from, int to);
1099
 */
1100
static void
1101
repeat(p, start, from, to)
1102
struct parse *p;
1103
sopno start;                    /* operand from here to end of strip */
1104
int from;                       /* repeated from this number */
1105
int to;                         /* to this number of times (maybe INFINITY) */
1106
{
1107
        sopno finish = HERE();
1108
#       define  N       2
1109
#       define  INF     3
1110
#       define  REP(f, t)       ((f)*8 + (t))
1111
#       define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1112
        sopno copy;
1113
 
1114
        if (p->error != 0)       /* head off possible runaway recursion */
1115
                return;
1116
 
1117
        assert(from <= to);
1118
 
1119
        switch (REP(MAP(from), MAP(to))) {
1120
        case REP(0, 0):                   /* must be user doing this */
1121
                DROP(finish-start);     /* drop the operand */
1122
                break;
1123
        case REP(0, 1):                  /* as x{1,1}? */
1124
        case REP(0, N):                  /* as x{1,n}? */
1125
        case REP(0, INF):                /* as x{1,}? */
1126
                /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1127
                INSERT(OCH_, start);            /* offset is wrong... */
1128
                repeat(p, start+1, 1, to);
1129
                ASTERN(OOR1, start);
1130
                AHEAD(start);                   /* ... fix it */
1131
                EMIT(OOR2, 0);
1132
                AHEAD(THERE());
1133
                ASTERN(O_CH, THERETHERE());
1134
                break;
1135
        case REP(1, 1):                 /* trivial case */
1136
                /* done */
1137
                break;
1138
        case REP(1, N):                 /* as x?x{1,n-1} */
1139
                /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1140
                INSERT(OCH_, start);
1141
                ASTERN(OOR1, start);
1142
                AHEAD(start);
1143
                EMIT(OOR2, 0);                   /* offset very wrong... */
1144
                AHEAD(THERE());                 /* ...so fix it */
1145
                ASTERN(O_CH, THERETHERE());
1146
                copy = dupl(p, start+1, finish+1);
1147
                assert(copy == finish+4);
1148
                repeat(p, copy, 1, to-1);
1149
                break;
1150
        case REP(1, INF):               /* as x+ */
1151
                INSERT(OPLUS_, start);
1152
                ASTERN(O_PLUS, start);
1153
                break;
1154
        case REP(N, N):                 /* as xx{m-1,n-1} */
1155
                copy = dupl(p, start, finish);
1156
                repeat(p, copy, from-1, to-1);
1157
                break;
1158
        case REP(N, INF):               /* as xx{n-1,INF} */
1159
                copy = dupl(p, start, finish);
1160
                repeat(p, copy, from-1, to);
1161
                break;
1162
        default:                        /* "can't happen" */
1163
                SETERROR(REG_ASSERT);   /* just in case */
1164
                break;
1165
        }
1166
}
1167
 
1168
/*
1169
 - seterr - set an error condition
1170
 == static int seterr(struct parse *p, int e);
1171
 */
1172
static int                      /* useless but makes type checking happy */
1173
seterr(p, e)
1174
struct parse *p;
1175
int e;
1176
{
1177
        if (p->error == 0)       /* keep earliest error condition */
1178
                p->error = e;
1179
        p->next = nuls;         /* try to bring things to a halt */
1180
        p->end = nuls;
1181
        return(0);               /* make the return value well-defined */
1182
}
1183
 
1184
/*
1185
 - allocset - allocate a set of characters for []
1186
 == static cset *allocset(struct parse *p);
1187
 */
1188
static cset *
1189
allocset(p)
1190
struct parse *p;
1191
{
1192
        int no = p->g->ncsets++;
1193
        size_t nc;
1194
        size_t nbytes;
1195
        cset *cs;
1196
        size_t css = (size_t)p->g->csetsize;
1197
        int i;
1198
 
1199
        if (no >= p->ncsalloc) {        /* need another column of space */
1200
                p->ncsalloc += CHAR_BIT;
1201
                nc = p->ncsalloc;
1202
                assert(nc % CHAR_BIT == 0);
1203
                nbytes = nc / CHAR_BIT * css;
1204
                if (p->g->sets == NULL)
1205
                        p->g->sets = (cset *)malloc(nc * sizeof(cset));
1206
                else
1207
                        p->g->sets = (cset *)reallocf((char *)p->g->sets,
1208
                                                        nc * sizeof(cset));
1209
                if (p->g->setbits == NULL)
1210
                        p->g->setbits = (uch *)malloc(nbytes);
1211
                else {
1212
                        p->g->setbits = (uch *)reallocf((char *)p->g->setbits,
1213
                                                                nbytes);
1214
                        /* xxx this isn't right if setbits is now NULL */
1215
                        for (i = 0; i < no; i++)
1216
                                p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1217
                }
1218
                if (p->g->sets != NULL && p->g->setbits != NULL)
1219
                        (void) memset((char *)p->g->setbits + (nbytes - css),
1220
                                                                0, css);
1221
                else {
1222
                        no = 0;
1223
                        SETERROR(REG_ESPACE);
1224
                        /* caller's responsibility not to do set ops */
1225
                }
1226
        }
1227
 
1228
        assert(p->g->sets != NULL);     /* xxx */
1229
        cs = &p->g->sets[no];
1230
        cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1231
        cs->mask = 1 << ((no) % CHAR_BIT);
1232
        cs->hash = 0;
1233
        cs->smultis = 0;
1234
        cs->multis = NULL;
1235
 
1236
        return(cs);
1237
}
1238
 
1239
/*
1240
 - freeset - free a now-unused set
1241
 == static void freeset(struct parse *p, cset *cs);
1242
 */
1243
static void
1244
freeset(p, cs)
1245
struct parse *p;
1246
cset *cs;
1247
{
1248
        int i;
1249
        cset *top = &p->g->sets[p->g->ncsets];
1250
        size_t css = (size_t)p->g->csetsize;
1251
 
1252
        for (i = 0; i < css; i++)
1253
                CHsub(cs, i);
1254
        if (cs == top-1)        /* recover only the easy case */
1255
                p->g->ncsets--;
1256
}
1257
 
1258
/*
1259
 - freezeset - final processing on a set of characters
1260
 == static int freezeset(struct parse *p, cset *cs);
1261
 *
1262
 * The main task here is merging identical sets.  This is usually a waste
1263
 * of time (although the hash code minimizes the overhead), but can win
1264
 * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
1265
 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1266
 * the same value!
1267
 */
1268
static int                      /* set number */
1269
freezeset(p, cs)
1270
struct parse *p;
1271
cset *cs;
1272
{
1273
        short h = cs->hash;
1274
        int i;
1275
        cset *top = &p->g->sets[p->g->ncsets];
1276
        cset *cs2;
1277
        size_t css = (size_t)p->g->csetsize;
1278
 
1279
        /* look for an earlier one which is the same */
1280
        for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1281
                if (cs2->hash == h && cs2 != cs) {
1282
                        /* maybe */
1283
                        for (i = 0; i < css; i++)
1284
                                if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1285
                                        break;          /* no */
1286
                        if (i == css)
1287
                                break;                  /* yes */
1288
                }
1289
 
1290
        if (cs2 < top) {        /* found one */
1291
                freeset(p, cs);
1292
                cs = cs2;
1293
        }
1294
 
1295
        return((int)(cs - p->g->sets));
1296
}
1297
 
1298
/*
1299
 - firstch - return first character in a set (which must have at least one)
1300
 == static int firstch(struct parse *p, cset *cs);
1301
 */
1302
static int                      /* character; there is no "none" value */
1303
firstch(p, cs)
1304
struct parse *p;
1305
cset *cs;
1306
{
1307
        int i;
1308
        size_t css = (size_t)p->g->csetsize;
1309
 
1310
        for (i = 0; i < css; i++)
1311
                if (CHIN(cs, i))
1312
                        return((char)i);
1313
        assert(never);
1314
        return(0);               /* arbitrary */
1315
}
1316
 
1317
/*
1318
 - nch - number of characters in a set
1319
 == static int nch(struct parse *p, cset *cs);
1320
 */
1321
static int
1322
nch(p, cs)
1323
struct parse *p;
1324
cset *cs;
1325
{
1326
        int i;
1327
        size_t css = (size_t)p->g->csetsize;
1328
        int n = 0;
1329
 
1330
        for (i = 0; i < css; i++)
1331
                if (CHIN(cs, i))
1332
                        n++;
1333
        return(n);
1334
}
1335
 
1336
/*
1337
 - mcadd - add a collating element to a cset
1338
 == static void mcadd(struct parse *p, cset *cs, \
1339
 ==     char *cp);
1340
 */
1341
static void
1342
mcadd(p, cs, cp)
1343
struct parse *p;
1344
cset *cs;
1345
char *cp;
1346
{
1347
        size_t oldend = cs->smultis;
1348
 
1349
        cs->smultis += strlen(cp) + 1;
1350
        if (cs->multis == NULL)
1351
                cs->multis = malloc(cs->smultis);
1352
        else
1353
                cs->multis = reallocf(cs->multis, cs->smultis);
1354
        if (cs->multis == NULL) {
1355
                SETERROR(REG_ESPACE);
1356
                return;
1357
        }
1358
 
1359
        (void) strcpy(cs->multis + oldend - 1, cp);
1360
        cs->multis[cs->smultis - 1] = '\0';
1361
}
1362
 
1363
#if used
1364
/*
1365
 - mcsub - subtract a collating element from a cset
1366
 == static void mcsub(cset *cs, char *cp);
1367
 */
1368
static void
1369
mcsub(cs, cp)
1370
cset *cs;
1371
char *cp;
1372
{
1373
        char *fp = mcfind(cs, cp);
1374
        size_t len = strlen(fp);
1375
 
1376
        assert(fp != NULL);
1377
        (void) memmove(fp, fp + len + 1,
1378
                                cs->smultis - (fp + len + 1 - cs->multis));
1379
        cs->smultis -= len;
1380
 
1381
        if (cs->smultis == 0) {
1382
                free(cs->multis);
1383
                cs->multis = NULL;
1384
                return;
1385
        }
1386
 
1387
        cs->multis = reallocf(cs->multis, cs->smultis);
1388
        assert(cs->multis != NULL);
1389
}
1390
 
1391
/*
1392
 - mcin - is a collating element in a cset?
1393
 == static int mcin(cset *cs, char *cp);
1394
 */
1395
static int
1396
mcin(cs, cp)
1397
cset *cs;
1398
char *cp;
1399
{
1400
        return(mcfind(cs, cp) != NULL);
1401
}
1402
 
1403
/*
1404
 - mcfind - find a collating element in a cset
1405
 == static char *mcfind(cset *cs, char *cp);
1406
 */
1407
static char *
1408
mcfind(cs, cp)
1409
cset *cs;
1410
char *cp;
1411
{
1412
        char *p;
1413
 
1414
        if (cs->multis == NULL)
1415
                return(NULL);
1416
        for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1417
                if (strcmp(cp, p) == 0)
1418
                        return(p);
1419
        return(NULL);
1420
}
1421
#endif
1422
 
1423
/*
1424
 - mcinvert - invert the list of collating elements in a cset
1425
 == static void mcinvert(struct parse *p, cset *cs);
1426
 *
1427
 * This would have to know the set of possibilities.  Implementation
1428
 * is deferred.
1429
 */
1430
static void
1431
mcinvert(p, cs)
1432
struct parse *p;
1433
cset *cs;
1434
{
1435
        assert(cs->multis == NULL);     /* xxx */
1436
}
1437
 
1438
/*
1439
 - mccase - add case counterparts of the list of collating elements in a cset
1440
 == static void mccase(struct parse *p, cset *cs);
1441
 *
1442
 * This would have to know the set of possibilities.  Implementation
1443
 * is deferred.
1444
 */
1445
static void
1446
mccase(p, cs)
1447
struct parse *p;
1448
cset *cs;
1449
{
1450
        assert(cs->multis == NULL);     /* xxx */
1451
}
1452
 
1453
/*
1454
 - isinsets - is this character in any sets?
1455
 == static int isinsets(struct re_guts *g, int c);
1456
 */
1457
static int                      /* predicate */
1458
isinsets(g, c)
1459
struct re_guts *g;
1460
int c;
1461
{
1462
        uch *col;
1463
        int i;
1464
        int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1465
        unsigned uc = (uch)c;
1466
 
1467
        for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1468
                if (col[uc] != 0)
1469
                        return(1);
1470
        return(0);
1471
}
1472
 
1473
/*
1474
 - samesets - are these two characters in exactly the same sets?
1475
 == static int samesets(struct re_guts *g, int c1, int c2);
1476
 */
1477
static int                      /* predicate */
1478
samesets(g, c1, c2)
1479
struct re_guts *g;
1480
int c1;
1481
int c2;
1482
{
1483
        uch *col;
1484
        int i;
1485
        int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1486
        unsigned uc1 = (uch)c1;
1487
        unsigned uc2 = (uch)c2;
1488
 
1489
        for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1490
                if (col[uc1] != col[uc2])
1491
                        return(0);
1492
        return(1);
1493
}
1494
 
1495
/*
1496
 - categorize - sort out character categories
1497
 == static void categorize(struct parse *p, struct re_guts *g);
1498
 */
1499
static void
1500
categorize(p, g)
1501
struct parse *p;
1502
struct re_guts *g;
1503
{
1504
        cat_t *cats = g->categories;
1505
        int c;
1506
        int c2;
1507
        cat_t cat;
1508
 
1509
        /* avoid making error situations worse */
1510
        if (p->error != 0)
1511
                return;
1512
 
1513
        for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1514
                if (cats[c] == 0 && isinsets(g, c)) {
1515
                        cat = g->ncategories++;
1516
                        cats[c] = cat;
1517
                        for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1518
                                if (cats[c2] == 0 && samesets(g, c, c2))
1519
                                        cats[c2] = cat;
1520
                }
1521
}
1522
 
1523
/*
1524
 - dupl - emit a duplicate of a bunch of sops
1525
 == static sopno dupl(struct parse *p, sopno start, sopno finish);
1526
 */
1527
static sopno                    /* start of duplicate */
1528
dupl(p, start, finish)
1529
struct parse *p;
1530
sopno start;                    /* from here */
1531
sopno finish;                   /* to this less one */
1532
{
1533
        sopno ret = HERE();
1534
        sopno len = finish - start;
1535
 
1536
        assert(finish >= start);
1537
        if (len == 0)
1538
                return(ret);
1539
        enlarge(p, p->ssize + len);     /* this many unexpected additions */
1540
        assert(p->ssize >= p->slen + len);
1541
        (void) memcpy((char *)(p->strip + p->slen),
1542
                (char *)(p->strip + start), (size_t)len*sizeof(sop));
1543
        p->slen += len;
1544
        return(ret);
1545
}
1546
 
1547
/*
1548
 - doemit - emit a strip operator
1549
 == static void doemit(struct parse *p, sop op, size_t opnd);
1550
 *
1551
 * It might seem better to implement this as a macro with a function as
1552
 * hard-case backup, but it's just too big and messy unless there are
1553
 * some changes to the data structures.  Maybe later.
1554
 */
1555
static void
1556
doemit(p, op, opnd)
1557
struct parse *p;
1558
sop op;
1559
size_t opnd;
1560
{
1561
        /* avoid making error situations worse */
1562
        if (p->error != 0)
1563
                return;
1564
 
1565
        /* deal with oversize operands ("can't happen", more or less) */
1566
        assert(opnd < 1<<OPSHIFT);
1567
 
1568
        /* deal with undersized strip */
1569
        if (p->slen >= p->ssize)
1570
                enlarge(p, (p->ssize+1) / 2 * 3);       /* +50% */
1571
        assert(p->slen < p->ssize);
1572
 
1573
        /* finally, it's all reduced to the easy case */
1574
        p->strip[p->slen++] = SOP(op, opnd);
1575
}
1576
 
1577
/*
1578
 - doinsert - insert a sop into the strip
1579
 == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
1580
 */
1581
static void
1582
doinsert(p, op, opnd, pos)
1583
struct parse *p;
1584
sop op;
1585
size_t opnd;
1586
sopno pos;
1587
{
1588
        sopno sn;
1589
        sop s;
1590
        int i;
1591
 
1592
        /* avoid making error situations worse */
1593
        if (p->error != 0)
1594
                return;
1595
 
1596
        sn = HERE();
1597
        EMIT(op, opnd);         /* do checks, ensure space */
1598
        assert(HERE() == sn+1);
1599
        s = p->strip[sn];
1600
 
1601
        /* adjust paren pointers */
1602
        assert(pos > 0);
1603
        for (i = 1; i < NPAREN; i++) {
1604
                if (p->pbegin[i] >= pos) {
1605
                        p->pbegin[i]++;
1606
                }
1607
                if (p->pend[i] >= pos) {
1608
                        p->pend[i]++;
1609
                }
1610
        }
1611
 
1612
        memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1613
                                                (HERE()-pos-1)*sizeof(sop));
1614
        p->strip[pos] = s;
1615
}
1616
 
1617
/*
1618
 - dofwd - complete a forward reference
1619
 == static void dofwd(struct parse *p, sopno pos, sop value);
1620
 */
1621
static void
1622
dofwd(p, pos, value)
1623
struct parse *p;
1624
sopno pos;
1625
sop value;
1626
{
1627
        /* avoid making error situations worse */
1628
        if (p->error != 0)
1629
                return;
1630
 
1631
        assert(value < 1<<OPSHIFT);
1632
        p->strip[pos] = OP(p->strip[pos]) | value;
1633
}
1634
 
1635
/*
1636
 - enlarge - enlarge the strip
1637
 == static void enlarge(struct parse *p, sopno size);
1638
 */
1639
static void
1640
enlarge(p, size)
1641
struct parse *p;
1642
sopno size;
1643
{
1644
        sop *sp;
1645
 
1646
        if (p->ssize >= size)
1647
                return;
1648
 
1649
        sp = (sop *)realloc(p->strip, size*sizeof(sop));
1650
        if (sp == NULL) {
1651
                SETERROR(REG_ESPACE);
1652
                return;
1653
        }
1654
        p->strip = sp;
1655
        p->ssize = size;
1656
}
1657
 
1658
/*
1659
 - stripsnug - compact the strip
1660
 == static void stripsnug(struct parse *p, struct re_guts *g);
1661
 */
1662
static void
1663
stripsnug(p, g)
1664
struct parse *p;
1665
struct re_guts *g;
1666
{
1667
        g->nstates = p->slen;
1668
        g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1669
        if (g->strip == NULL) {
1670
                SETERROR(REG_ESPACE);
1671
                g->strip = p->strip;
1672
        }
1673
}
1674
 
1675
/*
1676
 - findmust - fill in must and mlen with longest mandatory literal string
1677
 == static void findmust(struct parse *p, struct re_guts *g);
1678
 *
1679
 * This algorithm could do fancy things like analyzing the operands of |
1680
 * for common subsequences.  Someday.  This code is simple and finds most
1681
 * of the interesting cases.
1682
 *
1683
 * Note that must and mlen got initialized during setup.
1684
 */
1685
static void
1686
findmust(p, g)
1687
struct parse *p;
1688
struct re_guts *g;
1689
{
1690
        sop *scan;
1691
        sop *start;
1692
        sop *newstart;
1693
        sopno newlen;
1694
        sop s;
1695
        char *cp;
1696
        sopno i;
1697
        int offset;
1698
        int cs, mccs;
1699
 
1700
        /* avoid making error situations worse */
1701
        if (p->error != 0)
1702
                return;
1703
 
1704
        /* Find out if we can handle OANYOF or not */
1705
        mccs = 0;
1706
        for (cs = 0; cs < g->ncsets; cs++)
1707
                if (g->sets[cs].multis != NULL)
1708
                        mccs = 1;
1709
 
1710
        /* find the longest OCHAR sequence in strip */
1711
        newlen = 0;
1712
        offset = 0;
1713
        g->moffset = 0;
1714
        scan = g->strip + 1;
1715
        do {
1716
                s = *scan++;
1717
                switch (OP(s)) {
1718
                case OCHAR:             /* sequence member */
1719
                        if (newlen == 0)         /* new sequence */
1720
                                newstart = scan - 1;
1721
                        newlen++;
1722
                        break;
1723
                case OPLUS_:            /* things that don't break one */
1724
                case OLPAREN:
1725
                case ORPAREN:
1726
                        break;
1727
                case OQUEST_:           /* things that must be skipped */
1728
                case OCH_:
1729
                        offset = altoffset(scan, offset, mccs);
1730
                        scan--;
1731
                        do {
1732
                                scan += OPND(s);
1733
                                s = *scan;
1734
                                /* assert() interferes w debug printouts */
1735
                                if (OP(s) != O_QUEST && OP(s) != O_CH &&
1736
                                                        OP(s) != OOR2) {
1737
                                        g->iflags |= BAD;
1738
                                        return;
1739
                                }
1740
                        } while (OP(s) != O_QUEST && OP(s) != O_CH);
1741
                        /* fallthrough */
1742
                case OBOW:              /* things that break a sequence */
1743
                case OEOW:
1744
                case OBOL:
1745
                case OEOL:
1746
                case O_QUEST:
1747
                case O_CH:
1748
                case OEND:
1749
                        if (newlen > g->mlen) {         /* ends one */
1750
                                start = newstart;
1751
                                g->mlen = newlen;
1752
                                if (offset > -1) {
1753
                                        g->moffset += offset;
1754
                                        offset = newlen;
1755
                                } else
1756
                                        g->moffset = offset;
1757
                        } else {
1758
                                if (offset > -1)
1759
                                        offset += newlen;
1760
                        }
1761
                        newlen = 0;
1762
                        break;
1763
                case OANY:
1764
                        if (newlen > g->mlen) {         /* ends one */
1765
                                start = newstart;
1766
                                g->mlen = newlen;
1767
                                if (offset > -1) {
1768
                                        g->moffset += offset;
1769
                                        offset = newlen;
1770
                                } else
1771
                                        g->moffset = offset;
1772
                        } else {
1773
                                if (offset > -1)
1774
                                        offset += newlen;
1775
                        }
1776
                        if (offset > -1)
1777
                                offset++;
1778
                        newlen = 0;
1779
                        break;
1780
                case OANYOF:            /* may or may not invalidate offset */
1781
                        /* First, everything as OANY */
1782
                        if (newlen > g->mlen) {         /* ends one */
1783
                                start = newstart;
1784
                                g->mlen = newlen;
1785
                                if (offset > -1) {
1786
                                        g->moffset += offset;
1787
                                        offset = newlen;
1788
                                } else
1789
                                        g->moffset = offset;
1790
                        } else {
1791
                                if (offset > -1)
1792
                                        offset += newlen;
1793
                        }
1794
                        if (offset > -1)
1795
                                offset++;
1796
                        newlen = 0;
1797
                        /* And, now, if we found out we can't deal with
1798
                         * it, make offset = -1.
1799
                         */
1800
                        if (mccs)
1801
                                offset = -1;
1802
                        break;
1803
                default:
1804
                        /* Anything here makes it impossible or too hard
1805
                         * to calculate the offset -- so we give up;
1806
                         * save the last known good offset, in case the
1807
                         * must sequence doesn't occur later.
1808
                         */
1809
                        if (newlen > g->mlen) {         /* ends one */
1810
                                start = newstart;
1811
                                g->mlen = newlen;
1812
                                if (offset > -1)
1813
                                        g->moffset += offset;
1814
                                else
1815
                                        g->moffset = offset;
1816
                        }
1817
                        offset = -1;
1818
                        newlen = 0;
1819
                        break;
1820
                }
1821
        } while (OP(s) != OEND);
1822
 
1823
        if (g->mlen == 0) {              /* there isn't one */
1824
                g->moffset = -1;
1825
                return;
1826
        }
1827
 
1828
        /* turn it into a character string */
1829
        g->must = malloc((size_t)g->mlen + 1);
1830
        if (g->must == NULL) {          /* argh; just forget it */
1831
                g->mlen = 0;
1832
                g->moffset = -1;
1833
                return;
1834
        }
1835
        cp = g->must;
1836
        scan = start;
1837
        for (i = g->mlen; i > 0; i--) {
1838
                while (OP(s = *scan++) != OCHAR)
1839
                        continue;
1840
                assert(cp < g->must + g->mlen);
1841
                *cp++ = (char)OPND(s);
1842
        }
1843
        assert(cp == g->must + g->mlen);
1844
        *cp++ = '\0';           /* just on general principles */
1845
}
1846
 
1847
/*
1848
 - altoffset - choose biggest offset among multiple choices
1849
 == static int altoffset(sop *scan, int offset, int mccs);
1850
 *
1851
 * Compute, recursively if necessary, the largest offset among multiple
1852
 * re paths.
1853
 */
1854
static int
1855
altoffset(scan, offset, mccs)
1856
sop *scan;
1857
int offset;
1858
int mccs;
1859
{
1860
        int largest;
1861
        int try;
1862
        sop s;
1863
 
1864
        /* If we gave up already on offsets, return */
1865
        if (offset == -1)
1866
                return -1;
1867
 
1868
        largest = 0;
1869
        try = 0;
1870
        s = *scan++;
1871
        while (OP(s) != O_QUEST && OP(s) != O_CH) {
1872
                switch (OP(s)) {
1873
                case OOR1:
1874
                        if (try > largest)
1875
                                largest = try;
1876
                        try = 0;
1877
                        break;
1878
                case OQUEST_:
1879
                case OCH_:
1880
                        try = altoffset(scan, try, mccs);
1881
                        if (try == -1)
1882
                                return -1;
1883
                        scan--;
1884
                        do {
1885
                                scan += OPND(s);
1886
                                s = *scan;
1887
                                if (OP(s) != O_QUEST && OP(s) != O_CH &&
1888
                                                        OP(s) != OOR2)
1889
                                        return -1;
1890
                        } while (OP(s) != O_QUEST && OP(s) != O_CH);
1891
                        /* We must skip to the next position, or we'll
1892
                         * leave altoffset() too early.
1893
                         */
1894
                        scan++;
1895
                        break;
1896
                case OANYOF:
1897
                        if (mccs)
1898
                                return -1;
1899
                case OCHAR:
1900
                case OANY:
1901
                        try++;
1902
                case OBOW:
1903
                case OEOW:
1904
                case OLPAREN:
1905
                case ORPAREN:
1906
                case OOR2:
1907
                        break;
1908
                default:
1909
                        try = -1;
1910
                        break;
1911
                }
1912
                if (try == -1)
1913
                        return -1;
1914
                s = *scan++;
1915
        }
1916
 
1917
        if (try > largest)
1918
                largest = try;
1919
 
1920
        return largest+offset;
1921
}
1922
 
1923
/*
1924
 - computejumps - compute char jumps for BM scan
1925
 == static void computejumps(struct parse *p, struct re_guts *g);
1926
 *
1927
 * This algorithm assumes g->must exists and is has size greater than
1928
 * zero. It's based on the algorithm found on Computer Algorithms by
1929
 * Sara Baase.
1930
 *
1931
 * A char jump is the number of characters one needs to jump based on
1932
 * the value of the character from the text that was mismatched.
1933
 */
1934
static void
1935
computejumps(p, g)
1936
struct parse *p;
1937
struct re_guts *g;
1938
{
1939
        int ch;
1940
        int mindex;
1941
 
1942
        /* Avoid making errors worse */
1943
        if (p->error != 0)
1944
                return;
1945
 
1946
        g->charjump = (int*) malloc((NC + 1) * sizeof(int));
1947
        if (g->charjump == NULL)        /* Not a fatal error */
1948
                return;
1949
        /* Adjust for signed chars, if necessary */
1950
        g->charjump = &g->charjump[-(CHAR_MIN)];
1951
 
1952
        /* If the character does not exist in the pattern, the jump
1953
         * is equal to the number of characters in the pattern.
1954
         */
1955
        for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
1956
                g->charjump[ch] = g->mlen;
1957
 
1958
        /* If the character does exist, compute the jump that would
1959
         * take us to the last character in the pattern equal to it
1960
         * (notice that we match right to left, so that last character
1961
         * is the first one that would be matched).
1962
         */
1963
        for (mindex = 0; mindex < g->mlen; mindex++)
1964
                g->charjump[g->must[mindex]] = g->mlen - mindex - 1;
1965
}
1966
 
1967
/*
1968
 - computematchjumps - compute match jumps for BM scan
1969
 == static void computematchjumps(struct parse *p, struct re_guts *g);
1970
 *
1971
 * This algorithm assumes g->must exists and is has size greater than
1972
 * zero. It's based on the algorithm found on Computer Algorithms by
1973
 * Sara Baase.
1974
 *
1975
 * A match jump is the number of characters one needs to advance based
1976
 * on the already-matched suffix.
1977
 * Notice that all values here are minus (g->mlen-1), because of the way
1978
 * the search algorithm works.
1979
 */
1980
static void
1981
computematchjumps(p, g)
1982
struct parse *p;
1983
struct re_guts *g;
1984
{
1985
        int mindex;             /* General "must" iterator */
1986
        int suffix;             /* Keeps track of matching suffix */
1987
        int ssuffix;            /* Keeps track of suffixes' suffix */
1988
        int* pmatches;          /* pmatches[k] points to the next i
1989
                                 * such that i+1...mlen is a substring
1990
                                 * of k+1...k+mlen-i-1
1991
                                 */
1992
 
1993
        /* Avoid making errors worse */
1994
        if (p->error != 0)
1995
                return;
1996
 
1997
        pmatches = (int*) malloc(g->mlen * sizeof(unsigned int));
1998
        if (pmatches == NULL) {
1999
                g->matchjump = NULL;
2000
                return;
2001
        }
2002
 
2003
        g->matchjump = (int*) malloc(g->mlen * sizeof(unsigned int));
2004
        if (g->matchjump == NULL)       /* Not a fatal error */
2005
                return;
2006
 
2007
        /* Set maximum possible jump for each character in the pattern */
2008
        for (mindex = 0; mindex < g->mlen; mindex++)
2009
                g->matchjump[mindex] = 2*g->mlen - mindex - 1;
2010
 
2011
        /* Compute pmatches[] */
2012
        for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
2013
            mindex--, suffix--) {
2014
                pmatches[mindex] = suffix;
2015
 
2016
                /* If a mismatch is found, interrupting the substring,
2017
                 * compute the matchjump for that position. If no
2018
                 * mismatch is found, then a text substring mismatched
2019
                 * against the suffix will also mismatch against the
2020
                 * substring.
2021
                 */
2022
                while (suffix < g->mlen
2023
                    && g->must[mindex] != g->must[suffix]) {
2024
                        g->matchjump[suffix] = MIN(g->matchjump[suffix],
2025
                            g->mlen - mindex - 1);
2026
                        suffix = pmatches[suffix];
2027
                }
2028
        }
2029
 
2030
        /* Compute the matchjump up to the last substring found to jump
2031
         * to the beginning of the largest must pattern prefix matching
2032
         * it's own suffix.
2033
         */
2034
        for (mindex = 0; mindex <= suffix; mindex++)
2035
                g->matchjump[mindex] = MIN(g->matchjump[mindex],
2036
                    g->mlen + suffix - mindex);
2037
 
2038
        ssuffix = pmatches[suffix];
2039
        while (suffix < g->mlen) {
2040
                while (suffix <= ssuffix && suffix < g->mlen) {
2041
                        g->matchjump[suffix] = MIN(g->matchjump[suffix],
2042
                            g->mlen + ssuffix - suffix);
2043
                        suffix++;
2044
                }
2045
                if (suffix < g->mlen)
2046
                        ssuffix = pmatches[ssuffix];
2047
        }
2048
 
2049
        free(pmatches);
2050
}
2051
 
2052
/*
2053
 - pluscount - count + nesting
2054
 == static sopno pluscount(struct parse *p, struct re_guts *g);
2055
 */
2056
static sopno                    /* nesting depth */
2057
pluscount(p, g)
2058
struct parse *p;
2059
struct re_guts *g;
2060
{
2061
        sop *scan;
2062
        sop s;
2063
        sopno plusnest = 0;
2064
        sopno maxnest = 0;
2065
 
2066
        if (p->error != 0)
2067
                return(0);       /* there may not be an OEND */
2068
 
2069
        scan = g->strip + 1;
2070
        do {
2071
                s = *scan++;
2072
                switch (OP(s)) {
2073
                case OPLUS_:
2074
                        plusnest++;
2075
                        break;
2076
                case O_PLUS:
2077
                        if (plusnest > maxnest)
2078
                                maxnest = plusnest;
2079
                        plusnest--;
2080
                        break;
2081
                }
2082
        } while (OP(s) != OEND);
2083
        if (plusnest != 0)
2084
                g->iflags |= BAD;
2085
        return(maxnest);
2086
}
2087
 
2088
#endif /* !_NO_REGEX  */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.