OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [newlib-1.17.0/] [newlib/] [libc/] [posix/] [engine.c] - Blame information for rev 158

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 148 jeremybenn
/*-
2
 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
3
 * Copyright (c) 1992, 1993, 1994
4
 *      The Regents of the University of California.  All rights reserved.
5
 *
6
 * This code is derived from software contributed to Berkeley by
7
 * Henry Spencer.
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 * 4. Neither the name of the University nor the names of its contributors
18
 *    may be used to endorse or promote products derived from this software
19
 *    without specific prior written permission.
20
 *
21
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31
 * SUCH DAMAGE.
32
 *
33
 *      @(#)engine.c    8.5 (Berkeley) 3/20/94
34
 */
35
 
36
#include <sys/cdefs.h>
37
 
38
/*
39
 * The matching engine and friends.  This file is #included by regexec.c
40
 * after suitable #defines of a variety of macros used herein, so that
41
 * different state representations can be used without duplicating masses
42
 * of code.
43
 */
44
 
45
#ifdef SNAMES
46
#define matcher smatcher
47
#define fast    sfast
48
#define slow    sslow
49
#define dissect sdissect
50
#define backref sbackref
51
#define step    sstep
52
#define print   sprint
53
#define at      sat
54
#define match   smat
55
#endif
56
#ifdef LNAMES
57
#define matcher lmatcher
58
#define fast    lfast
59
#define slow    lslow
60
#define dissect ldissect
61
#define backref lbackref
62
#define step    lstep
63
#define print   lprint
64
#define at      lat
65
#define match   lmat
66
#endif
67
 
68
/* another structure passed up and down to avoid zillions of parameters */
69
struct match {
70
        struct re_guts *g;
71
        int eflags;
72
        regmatch_t *pmatch;     /* [nsub+1] (0 element unused) */
73
        char *offp;             /* offsets work from here */
74
        char *beginp;           /* start of string -- virtual NUL precedes */
75
        char *endp;             /* end of string -- virtual NUL here */
76
        char *coldp;            /* can be no match starting before here */
77
        char **lastpos;         /* [nplus+1] */
78
        STATEVARS;
79
        states st;              /* current states */
80
        states fresh;           /* states for a fresh start */
81
        states tmp;             /* temporary */
82
        states empty;           /* empty set of states */
83
};
84
 
85
/* ========= begin header generated by ./mkh ========= */
86
#ifdef __cplusplus
87
extern "C" {
88
#endif
89
 
90
/* === engine.c === */
91
static int matcher(struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
92
static char *dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
93
static char *backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
94
static char *fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
95
static char *slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
96
static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
97
#define BOL     (OUT+1)
98
#define EOL     (BOL+1)
99
#define BOLEOL  (BOL+2)
100
#define NOTHING (BOL+3)
101
#define BOW     (BOL+4)
102
#define EOW     (BOL+5)
103
#define CODEMAX (BOL+5)         /* highest code used */
104
#define NONCHAR(c)      ((c) > CHAR_MAX)
105
#define NNONCHAR        (CODEMAX-CHAR_MAX)
106
#ifdef REDEBUG
107
static void print(struct match *m, char *caption, states st, int ch, FILE *d);
108
#endif
109
#ifdef REDEBUG
110
static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
111
#endif
112
#ifdef REDEBUG
113
static char *pchar(int ch);
114
#endif
115
 
116
#ifdef __cplusplus
117
}
118
#endif
119
/* ========= end header generated by ./mkh ========= */
120
 
121
#ifdef REDEBUG
122
#define SP(t, s, c)     print(m, t, s, c, stdout)
123
#define AT(t, p1, p2, s1, s2)   at(m, t, p1, p2, s1, s2)
124
#define NOTE(str)       { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
125
#else
126
#define SP(t, s, c)     /* nothing */
127
#define AT(t, p1, p2, s1, s2)   /* nothing */
128
#define NOTE(s) /* nothing */
129
#endif
130
 
131
/*
132
 - matcher - the actual matching engine
133
 == static int matcher(struct re_guts *g, char *string, \
134
 ==     size_t nmatch, regmatch_t pmatch[], int eflags);
135
 */
136
static int                      /* 0 success, REG_NOMATCH failure */
137
matcher(g, string, nmatch, pmatch, eflags)
138
struct re_guts *g;
139
char *string;
140
size_t nmatch;
141
regmatch_t pmatch[];
142
int eflags;
143
{
144
        char *endp;
145
        int i;
146
        struct match mv;
147
        struct match *m = &mv;
148
        char *dp;
149
        const sopno gf = g->firststate+1;       /* +1 for OEND */
150
        const sopno gl = g->laststate;
151
        char *start;
152
        char *stop;
153
        /* Boyer-Moore algorithms variables */
154
        char *pp;
155
        int cj, mj;
156
        char *mustfirst;
157
        char *mustlast;
158
        int *matchjump;
159
        int *charjump;
160
 
161
        /* simplify the situation where possible */
162
        if (g->cflags&REG_NOSUB)
163
                nmatch = 0;
164
        if (eflags&REG_STARTEND) {
165
                start = string + pmatch[0].rm_so;
166
                stop = string + pmatch[0].rm_eo;
167
        } else {
168
                start = string;
169
                stop = start + strlen(start);
170
        }
171
        if (stop < start)
172
                return(REG_INVARG);
173
 
174
        /* prescreening; this does wonders for this rather slow code */
175
        if (g->must != NULL) {
176
                if (g->charjump != NULL && g->matchjump != NULL) {
177
                        mustfirst = g->must;
178
                        mustlast = g->must + g->mlen - 1;
179
                        charjump = g->charjump;
180
                        matchjump = g->matchjump;
181
                        pp = mustlast;
182
                        for (dp = start+g->mlen-1; dp < stop;) {
183
                                /* Fast skip non-matches */
184
                                while (dp < stop && charjump[*dp])
185
                                        dp += charjump[*dp];
186
 
187
                                if (dp >= stop)
188
                                        break;
189
 
190
                                /* Greedy matcher */
191
                                /* We depend on not being used for
192
                                 * for strings of length 1
193
                                 */
194
                                while (*--dp == *--pp && pp != mustfirst);
195
 
196
                                if (*dp == *pp)
197
                                        break;
198
 
199
                                /* Jump to next possible match */
200
                                mj = matchjump[pp - mustfirst];
201
                                cj = charjump[*dp];
202
                                dp += (cj < mj ? mj : cj);
203
                                pp = mustlast;
204
                        }
205
                        if (pp != mustfirst)
206
                                return(REG_NOMATCH);
207
                } else {
208
                        for (dp = start; dp < stop; dp++)
209
                                if (*dp == g->must[0] &&
210
                                    stop - dp >= g->mlen &&
211
                                    memcmp(dp, g->must, (size_t)g->mlen) == 0)
212
                                        break;
213
                        if (dp == stop)         /* we didn't find g->must */
214
                                return(REG_NOMATCH);
215
                }
216
        }
217
 
218
        /* match struct setup */
219
        m->g = g;
220
        m->eflags = eflags;
221
        m->pmatch = NULL;
222
        m->lastpos = NULL;
223
        m->offp = string;
224
        m->beginp = start;
225
        m->endp = stop;
226
        STATESETUP(m, 4);
227
        SETUP(m->st);
228
        SETUP(m->fresh);
229
        SETUP(m->tmp);
230
        SETUP(m->empty);
231
        CLEAR(m->empty);
232
 
233
        /* Adjust start according to moffset, to speed things up */
234
        if (g->moffset > -1)
235
                start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
236
 
237
        /* this loop does only one repetition except for backrefs */
238
        for (;;) {
239
                endp = fast(m, start, stop, gf, gl);
240
                if (endp == NULL) {             /* a miss */
241
                        STATETEARDOWN(m);
242
                        return(REG_NOMATCH);
243
                }
244
                if (nmatch == 0 && !g->backrefs)
245
                        break;          /* no further info needed */
246
 
247
                /* where? */
248
                assert(m->coldp != NULL);
249
                for (;;) {
250
                        NOTE("finding start");
251
                        endp = slow(m, m->coldp, stop, gf, gl);
252
                        if (endp != NULL)
253
                                break;
254
                        assert(m->coldp < m->endp);
255
                        m->coldp++;
256
                }
257
                if (nmatch == 1 && !g->backrefs)
258
                        break;          /* no further info needed */
259
 
260
                /* oh my, he wants the subexpressions... */
261
                if (m->pmatch == NULL)
262
                        m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
263
                                                        sizeof(regmatch_t));
264
                if (m->pmatch == NULL) {
265
                        STATETEARDOWN(m);
266
                        return(REG_ESPACE);
267
                }
268
                for (i = 1; i <= m->g->nsub; i++)
269
                        m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
270
                if (!g->backrefs && !(m->eflags&REG_BACKR)) {
271
                        NOTE("dissecting");
272
                        dp = dissect(m, m->coldp, endp, gf, gl);
273
                } else {
274
                        if (g->nplus > 0 && m->lastpos == NULL)
275
                                m->lastpos = (char **)malloc((g->nplus+1) *
276
                                                        sizeof(char *));
277
                        if (g->nplus > 0 && m->lastpos == NULL) {
278
                                free(m->pmatch);
279
                                STATETEARDOWN(m);
280
                                return(REG_ESPACE);
281
                        }
282
                        NOTE("backref dissect");
283
                        dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
284
                }
285
                if (dp != NULL)
286
                        break;
287
 
288
                /* uh-oh... we couldn't find a subexpression-level match */
289
                assert(g->backrefs);    /* must be back references doing it */
290
                assert(g->nplus == 0 || m->lastpos != NULL);
291
                for (;;) {
292
                        if (dp != NULL || endp <= m->coldp)
293
                                break;          /* defeat */
294
                        NOTE("backoff");
295
                        endp = slow(m, m->coldp, endp-1, gf, gl);
296
                        if (endp == NULL)
297
                                break;          /* defeat */
298
                        /* try it on a shorter possibility */
299
#ifndef NDEBUG
300
                        for (i = 1; i <= m->g->nsub; i++) {
301
                                assert(m->pmatch[i].rm_so == -1);
302
                                assert(m->pmatch[i].rm_eo == -1);
303
                        }
304
#endif
305
                        NOTE("backoff dissect");
306
                        dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
307
                }
308
                assert(dp == NULL || dp == endp);
309
                if (dp != NULL)         /* found a shorter one */
310
                        break;
311
 
312
                /* despite initial appearances, there is no match here */
313
                NOTE("false alarm");
314
                start = m->coldp + 1;   /* recycle starting later */
315
                assert(start <= stop);
316
        }
317
 
318
        /* fill in the details if requested */
319
        if (nmatch > 0) {
320
                pmatch[0].rm_so = m->coldp - m->offp;
321
                pmatch[0].rm_eo = endp - m->offp;
322
        }
323
        if (nmatch > 1) {
324
                assert(m->pmatch != NULL);
325
                for (i = 1; i < nmatch; i++)
326
                        if (i <= m->g->nsub)
327
                                pmatch[i] = m->pmatch[i];
328
                        else {
329
                                pmatch[i].rm_so = -1;
330
                                pmatch[i].rm_eo = -1;
331
                        }
332
        }
333
 
334
        if (m->pmatch != NULL)
335
                free((char *)m->pmatch);
336
        if (m->lastpos != NULL)
337
                free((char *)m->lastpos);
338
        STATETEARDOWN(m);
339
        return(0);
340
}
341
 
342
/*
343
 - dissect - figure out what matched what, no back references
344
 == static char *dissect(struct match *m, char *start, \
345
 ==     char *stop, sopno startst, sopno stopst);
346
 */
347
static char *                   /* == stop (success) always */
348
dissect(m, start, stop, startst, stopst)
349
struct match *m;
350
char *start;
351
char *stop;
352
sopno startst;
353
sopno stopst;
354
{
355
        int i;
356
        sopno ss;               /* start sop of current subRE */
357
        sopno es;               /* end sop of current subRE */
358
        char *sp;               /* start of string matched by it */
359
        char *stp;              /* string matched by it cannot pass here */
360
        char *rest;             /* start of rest of string */
361
        char *tail;             /* string unmatched by rest of RE */
362
        sopno ssub;             /* start sop of subsubRE */
363
        sopno esub;             /* end sop of subsubRE */
364
        char *ssp;              /* start of string matched by subsubRE */
365
        char *sep;              /* end of string matched by subsubRE */
366
        char *oldssp;           /* previous ssp */
367
        char *dp;
368
 
369
        AT("diss", start, stop, startst, stopst);
370
        sp = start;
371
        for (ss = startst; ss < stopst; ss = es) {
372
                /* identify end of subRE */
373
                es = ss;
374
                switch (OP(m->g->strip[es])) {
375
                case OPLUS_:
376
                case OQUEST_:
377
                        es += OPND(m->g->strip[es]);
378
                        break;
379
                case OCH_:
380
                        while (OP(m->g->strip[es]) != O_CH)
381
                                es += OPND(m->g->strip[es]);
382
                        break;
383
                }
384
                es++;
385
 
386
                /* figure out what it matched */
387
                switch (OP(m->g->strip[ss])) {
388
                case OEND:
389
                        assert(nope);
390
                        break;
391
                case OCHAR:
392
                        sp++;
393
                        break;
394
                case OBOL:
395
                case OEOL:
396
                case OBOW:
397
                case OEOW:
398
                        break;
399
                case OANY:
400
                case OANYOF:
401
                        sp++;
402
                        break;
403
                case OBACK_:
404
                case O_BACK:
405
                        assert(nope);
406
                        break;
407
                /* cases where length of match is hard to find */
408
                case OQUEST_:
409
                        stp = stop;
410
                        for (;;) {
411
                                /* how long could this one be? */
412
                                rest = slow(m, sp, stp, ss, es);
413
                                assert(rest != NULL);   /* it did match */
414
                                /* could the rest match the rest? */
415
                                tail = slow(m, rest, stop, es, stopst);
416
                                if (tail == stop)
417
                                        break;          /* yes! */
418
                                /* no -- try a shorter match for this one */
419
                                stp = rest - 1;
420
                                assert(stp >= sp);      /* it did work */
421
                        }
422
                        ssub = ss + 1;
423
                        esub = es - 1;
424
                        /* did innards match? */
425
                        if (slow(m, sp, rest, ssub, esub) != NULL) {
426
                                dp = dissect(m, sp, rest, ssub, esub);
427
                                assert(dp == rest);
428
                        } else          /* no */
429
                                assert(sp == rest);
430
                        sp = rest;
431
                        break;
432
                case OPLUS_:
433
                        stp = stop;
434
                        for (;;) {
435
                                /* how long could this one be? */
436
                                rest = slow(m, sp, stp, ss, es);
437
                                assert(rest != NULL);   /* it did match */
438
                                /* could the rest match the rest? */
439
                                tail = slow(m, rest, stop, es, stopst);
440
                                if (tail == stop)
441
                                        break;          /* yes! */
442
                                /* no -- try a shorter match for this one */
443
                                stp = rest - 1;
444
                                assert(stp >= sp);      /* it did work */
445
                        }
446
                        ssub = ss + 1;
447
                        esub = es - 1;
448
                        ssp = sp;
449
                        oldssp = ssp;
450
                        for (;;) {      /* find last match of innards */
451
                                sep = slow(m, ssp, rest, ssub, esub);
452
                                if (sep == NULL || sep == ssp)
453
                                        break;  /* failed or matched null */
454
                                oldssp = ssp;   /* on to next try */
455
                                ssp = sep;
456
                        }
457
                        if (sep == NULL) {
458
                                /* last successful match */
459
                                sep = ssp;
460
                                ssp = oldssp;
461
                        }
462
                        assert(sep == rest);    /* must exhaust substring */
463
                        assert(slow(m, ssp, sep, ssub, esub) == rest);
464
                        dp = dissect(m, ssp, sep, ssub, esub);
465
                        assert(dp == sep);
466
                        sp = rest;
467
                        break;
468
                case OCH_:
469
                        stp = stop;
470
                        for (;;) {
471
                                /* how long could this one be? */
472
                                rest = slow(m, sp, stp, ss, es);
473
                                assert(rest != NULL);   /* it did match */
474
                                /* could the rest match the rest? */
475
                                tail = slow(m, rest, stop, es, stopst);
476
                                if (tail == stop)
477
                                        break;          /* yes! */
478
                                /* no -- try a shorter match for this one */
479
                                stp = rest - 1;
480
                                assert(stp >= sp);      /* it did work */
481
                        }
482
                        ssub = ss + 1;
483
                        esub = ss + OPND(m->g->strip[ss]) - 1;
484
                        assert(OP(m->g->strip[esub]) == OOR1);
485
                        for (;;) {      /* find first matching branch */
486
                                if (slow(m, sp, rest, ssub, esub) == rest)
487
                                        break;  /* it matched all of it */
488
                                /* that one missed, try next one */
489
                                assert(OP(m->g->strip[esub]) == OOR1);
490
                                esub++;
491
                                assert(OP(m->g->strip[esub]) == OOR2);
492
                                ssub = esub + 1;
493
                                esub += OPND(m->g->strip[esub]);
494
                                if (OP(m->g->strip[esub]) == OOR2)
495
                                        esub--;
496
                                else
497
                                        assert(OP(m->g->strip[esub]) == O_CH);
498
                        }
499
                        dp = dissect(m, sp, rest, ssub, esub);
500
                        assert(dp == rest);
501
                        sp = rest;
502
                        break;
503
                case O_PLUS:
504
                case O_QUEST:
505
                case OOR1:
506
                case OOR2:
507
                case O_CH:
508
                        assert(nope);
509
                        break;
510
                case OLPAREN:
511
                        i = OPND(m->g->strip[ss]);
512
                        assert(0 < i && i <= m->g->nsub);
513
                        m->pmatch[i].rm_so = sp - m->offp;
514
                        break;
515
                case ORPAREN:
516
                        i = OPND(m->g->strip[ss]);
517
                        assert(0 < i && i <= m->g->nsub);
518
                        m->pmatch[i].rm_eo = sp - m->offp;
519
                        break;
520
                default:                /* uh oh */
521
                        assert(nope);
522
                        break;
523
                }
524
        }
525
 
526
        assert(sp == stop);
527
        return(sp);
528
}
529
 
530
/*
531
 - backref - figure out what matched what, figuring in back references
532
 == static char *backref(struct match *m, char *start, \
533
 ==     char *stop, sopno startst, sopno stopst, sopno lev);
534
 */
535
static char *                   /* == stop (success) or NULL (failure) */
536
backref(m, start, stop, startst, stopst, lev)
537
struct match *m;
538
char *start;
539
char *stop;
540
sopno startst;
541
sopno stopst;
542
sopno lev;                      /* PLUS nesting level */
543
{
544
        int i;
545
        sopno ss;               /* start sop of current subRE */
546
        char *sp;               /* start of string matched by it */
547
        sopno ssub;             /* start sop of subsubRE */
548
        sopno esub;             /* end sop of subsubRE */
549
        char *ssp;              /* start of string matched by subsubRE */
550
        char *dp;
551
        size_t len;
552
        int hard;
553
        sop s;
554
        regoff_t offsave;
555
        cset *cs;
556
 
557
        AT("back", start, stop, startst, stopst);
558
        sp = start;
559
 
560
        /* get as far as we can with easy stuff */
561
        hard = 0;
562
        for (ss = startst; !hard && ss < stopst; ss++)
563
                switch (OP(s = m->g->strip[ss])) {
564
                case OCHAR:
565
                        if (sp == stop || *sp++ != (char)OPND(s))
566
                                return(NULL);
567
                        break;
568
                case OANY:
569
                        if (sp == stop)
570
                                return(NULL);
571
                        sp++;
572
                        break;
573
                case OANYOF:
574
                        cs = &m->g->sets[OPND(s)];
575
                        if (sp == stop || !CHIN(cs, *sp++))
576
                                return(NULL);
577
                        break;
578
                case OBOL:
579
                        if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
580
                                        (sp < m->endp && *(sp-1) == '\n' &&
581
                                                (m->g->cflags&REG_NEWLINE)) )
582
                                { /* yes */ }
583
                        else
584
                                return(NULL);
585
                        break;
586
                case OEOL:
587
                        if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
588
                                        (sp < m->endp && *sp == '\n' &&
589
                                                (m->g->cflags&REG_NEWLINE)) )
590
                                { /* yes */ }
591
                        else
592
                                return(NULL);
593
                        break;
594
                case OBOW:
595
                        if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
596
                                        (sp < m->endp && *(sp-1) == '\n' &&
597
                                                (m->g->cflags&REG_NEWLINE)) ||
598
                                        (sp > m->beginp &&
599
                                                        !ISWORD(*(sp-1))) ) &&
600
                                        (sp < m->endp && ISWORD(*sp)) )
601
                                { /* yes */ }
602
                        else
603
                                return(NULL);
604
                        break;
605
                case OEOW:
606
                        if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
607
                                        (sp < m->endp && *sp == '\n' &&
608
                                                (m->g->cflags&REG_NEWLINE)) ||
609
                                        (sp < m->endp && !ISWORD(*sp)) ) &&
610
                                        (sp > m->beginp && ISWORD(*(sp-1))) )
611
                                { /* yes */ }
612
                        else
613
                                return(NULL);
614
                        break;
615
                case O_QUEST:
616
                        break;
617
                case OOR1:      /* matches null but needs to skip */
618
                        ss++;
619
                        s = m->g->strip[ss];
620
                        do {
621
                                assert(OP(s) == OOR2);
622
                                ss += OPND(s);
623
                        } while (OP(s = m->g->strip[ss]) != O_CH);
624
                        /* note that the ss++ gets us past the O_CH */
625
                        break;
626
                default:        /* have to make a choice */
627
                        hard = 1;
628
                        break;
629
                }
630
        if (!hard) {            /* that was it! */
631
                if (sp != stop)
632
                        return(NULL);
633
                return(sp);
634
        }
635
        ss--;                   /* adjust for the for's final increment */
636
 
637
        /* the hard stuff */
638
        AT("hard", sp, stop, ss, stopst);
639
        s = m->g->strip[ss];
640
        switch (OP(s)) {
641
        case OBACK_:            /* the vilest depths */
642
                i = OPND(s);
643
                assert(0 < i && i <= m->g->nsub);
644
                if (m->pmatch[i].rm_eo == -1)
645
                        return(NULL);
646
                assert(m->pmatch[i].rm_so != -1);
647
                len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
648
                assert(stop - m->beginp >= len);
649
                if (sp > stop - len)
650
                        return(NULL);   /* not enough left to match */
651
                ssp = m->offp + m->pmatch[i].rm_so;
652
                if (memcmp(sp, ssp, len) != 0)
653
                        return(NULL);
654
                while (m->g->strip[ss] != SOP(O_BACK, i))
655
                        ss++;
656
                return(backref(m, sp+len, stop, ss+1, stopst, lev));
657
                break;
658
        case OQUEST_:           /* to null or not */
659
                dp = backref(m, sp, stop, ss+1, stopst, lev);
660
                if (dp != NULL)
661
                        return(dp);     /* not */
662
                return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
663
                break;
664
        case OPLUS_:
665
                assert(m->lastpos != NULL);
666
                assert(lev+1 <= m->g->nplus);
667
                m->lastpos[lev+1] = sp;
668
                return(backref(m, sp, stop, ss+1, stopst, lev+1));
669
                break;
670
        case O_PLUS:
671
                if (sp == m->lastpos[lev])      /* last pass matched null */
672
                        return(backref(m, sp, stop, ss+1, stopst, lev-1));
673
                /* try another pass */
674
                m->lastpos[lev] = sp;
675
                dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
676
                if (dp == NULL)
677
                        return(backref(m, sp, stop, ss+1, stopst, lev-1));
678
                else
679
                        return(dp);
680
                break;
681
        case OCH_:              /* find the right one, if any */
682
                ssub = ss + 1;
683
                esub = ss + OPND(s) - 1;
684
                assert(OP(m->g->strip[esub]) == OOR1);
685
                for (;;) {      /* find first matching branch */
686
                        dp = backref(m, sp, stop, ssub, esub, lev);
687
                        if (dp != NULL)
688
                                return(dp);
689
                        /* that one missed, try next one */
690
                        if (OP(m->g->strip[esub]) == O_CH)
691
                                return(NULL);   /* there is none */
692
                        esub++;
693
                        assert(OP(m->g->strip[esub]) == OOR2);
694
                        ssub = esub + 1;
695
                        esub += OPND(m->g->strip[esub]);
696
                        if (OP(m->g->strip[esub]) == OOR2)
697
                                esub--;
698
                        else
699
                                assert(OP(m->g->strip[esub]) == O_CH);
700
                }
701
                break;
702
        case OLPAREN:           /* must undo assignment if rest fails */
703
                i = OPND(s);
704
                assert(0 < i && i <= m->g->nsub);
705
                offsave = m->pmatch[i].rm_so;
706
                m->pmatch[i].rm_so = sp - m->offp;
707
                dp = backref(m, sp, stop, ss+1, stopst, lev);
708
                if (dp != NULL)
709
                        return(dp);
710
                m->pmatch[i].rm_so = offsave;
711
                return(NULL);
712
                break;
713
        case ORPAREN:           /* must undo assignment if rest fails */
714
                i = OPND(s);
715
                assert(0 < i && i <= m->g->nsub);
716
                offsave = m->pmatch[i].rm_eo;
717
                m->pmatch[i].rm_eo = sp - m->offp;
718
                dp = backref(m, sp, stop, ss+1, stopst, lev);
719
                if (dp != NULL)
720
                        return(dp);
721
                m->pmatch[i].rm_eo = offsave;
722
                return(NULL);
723
                break;
724
        default:                /* uh oh */
725
                assert(nope);
726
                break;
727
        }
728
 
729
        /* "can't happen" */
730
        assert(nope);
731
        /* NOTREACHED */
732
        return "shut up gcc";
733
}
734
 
735
/*
736
 - fast - step through the string at top speed
737
 == static char *fast(struct match *m, char *start, \
738
 ==     char *stop, sopno startst, sopno stopst);
739
 */
740
static char *                   /* where tentative match ended, or NULL */
741
fast(m, start, stop, startst, stopst)
742
struct match *m;
743
char *start;
744
char *stop;
745
sopno startst;
746
sopno stopst;
747
{
748
        states st = m->st;
749
        states fresh = m->fresh;
750
        states tmp = m->tmp;
751
        char *p = start;
752
        int c = (start == m->beginp) ? OUT : *(start-1);
753
        int lastc;              /* previous c */
754
        int flagch;
755
        int i;
756
        char *coldp;            /* last p after which no match was underway */
757
 
758
        CLEAR(st);
759
        SET1(st, startst);
760
        st = step(m->g, startst, stopst, st, NOTHING, st);
761
        ASSIGN(fresh, st);
762
        SP("start", st, *p);
763
        coldp = NULL;
764
        for (;;) {
765
                /* next character */
766
                lastc = c;
767
                c = (p == m->endp) ? OUT : *p;
768
                if (EQ(st, fresh))
769
                        coldp = p;
770
 
771
                /* is there an EOL and/or BOL between lastc and c? */
772
                flagch = '\0';
773
                i = 0;
774
                if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
775
                                (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
776
                        flagch = BOL;
777
                        i = m->g->nbol;
778
                }
779
                if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
780
                                (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
781
                        flagch = (flagch == BOL) ? BOLEOL : EOL;
782
                        i += m->g->neol;
783
                }
784
                if (i != 0) {
785
                        for (; i > 0; i--)
786
                                st = step(m->g, startst, stopst, st, flagch, st);
787
                        SP("boleol", st, c);
788
                }
789
 
790
                /* how about a word boundary? */
791
                if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
792
                                        (c != OUT && ISWORD(c)) ) {
793
                        flagch = BOW;
794
                }
795
                if ( (lastc != OUT && ISWORD(lastc)) &&
796
                                (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
797
                        flagch = EOW;
798
                }
799
                if (flagch == BOW || flagch == EOW) {
800
                        st = step(m->g, startst, stopst, st, flagch, st);
801
                        SP("boweow", st, c);
802
                }
803
 
804
                /* are we done? */
805
                if (ISSET(st, stopst) || p == stop)
806
                        break;          /* NOTE BREAK OUT */
807
 
808
                /* no, we must deal with this character */
809
                ASSIGN(tmp, st);
810
                ASSIGN(st, fresh);
811
                assert(c != OUT);
812
                st = step(m->g, startst, stopst, tmp, c, st);
813
                SP("aft", st, c);
814
                assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
815
                p++;
816
        }
817
 
818
        assert(coldp != NULL);
819
        m->coldp = coldp;
820
        if (ISSET(st, stopst))
821
                return(p+1);
822
        else
823
                return(NULL);
824
}
825
 
826
/*
827
 - slow - step through the string more deliberately
828
 == static char *slow(struct match *m, char *start, \
829
 ==     char *stop, sopno startst, sopno stopst);
830
 */
831
static char *                   /* where it ended */
832
slow(m, start, stop, startst, stopst)
833
struct match *m;
834
char *start;
835
char *stop;
836
sopno startst;
837
sopno stopst;
838
{
839
        states st = m->st;
840
        states empty = m->empty;
841
        states tmp = m->tmp;
842
        char *p = start;
843
        int c = (start == m->beginp) ? OUT : *(start-1);
844
        int lastc;              /* previous c */
845
        int flagch;
846
        int i;
847
        char *matchp;           /* last p at which a match ended */
848
 
849
        AT("slow", start, stop, startst, stopst);
850
        CLEAR(st);
851
        SET1(st, startst);
852
        SP("sstart", st, *p);
853
        st = step(m->g, startst, stopst, st, NOTHING, st);
854
        matchp = NULL;
855
        for (;;) {
856
                /* next character */
857
                lastc = c;
858
                c = (p == m->endp) ? OUT : *p;
859
 
860
                /* is there an EOL and/or BOL between lastc and c? */
861
                flagch = '\0';
862
                i = 0;
863
                if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
864
                                (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
865
                        flagch = BOL;
866
                        i = m->g->nbol;
867
                }
868
                if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
869
                                (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
870
                        flagch = (flagch == BOL) ? BOLEOL : EOL;
871
                        i += m->g->neol;
872
                }
873
                if (i != 0) {
874
                        for (; i > 0; i--)
875
                                st = step(m->g, startst, stopst, st, flagch, st);
876
                        SP("sboleol", st, c);
877
                }
878
 
879
                /* how about a word boundary? */
880
                if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
881
                                        (c != OUT && ISWORD(c)) ) {
882
                        flagch = BOW;
883
                }
884
                if ( (lastc != OUT && ISWORD(lastc)) &&
885
                                (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
886
                        flagch = EOW;
887
                }
888
                if (flagch == BOW || flagch == EOW) {
889
                        st = step(m->g, startst, stopst, st, flagch, st);
890
                        SP("sboweow", st, c);
891
                }
892
 
893
                /* are we done? */
894
                if (ISSET(st, stopst))
895
                        matchp = p;
896
                if (EQ(st, empty) || p == stop)
897
                        break;          /* NOTE BREAK OUT */
898
 
899
                /* no, we must deal with this character */
900
                ASSIGN(tmp, st);
901
                ASSIGN(st, empty);
902
                assert(c != OUT);
903
                st = step(m->g, startst, stopst, tmp, c, st);
904
                SP("saft", st, c);
905
                assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
906
                p++;
907
        }
908
 
909
        return(matchp);
910
}
911
 
912
 
913
/*
914
 - step - map set of states reachable before char to set reachable after
915
 == static states step(struct re_guts *g, sopno start, sopno stop, \
916
 ==     states bef, int ch, states aft);
917
 == #define     BOL     (OUT+1)
918
 == #define     EOL     (BOL+1)
919
 == #define     BOLEOL  (BOL+2)
920
 == #define     NOTHING (BOL+3)
921
 == #define     BOW     (BOL+4)
922
 == #define     EOW     (BOL+5)
923
 == #define     CODEMAX (BOL+5)         // highest code used
924
 == #define     NONCHAR(c)      ((c) > CHAR_MAX)
925
 == #define     NNONCHAR        (CODEMAX-CHAR_MAX)
926
 */
927
static states
928
step(g, start, stop, bef, ch, aft)
929
struct re_guts *g;
930
sopno start;                    /* start state within strip */
931
sopno stop;                     /* state after stop state within strip */
932
states bef;                     /* states reachable before */
933
int ch;                         /* character or NONCHAR code */
934
states aft;                     /* states already known reachable after */
935
{
936
        cset *cs;
937
        sop s;
938
        sopno pc;
939
        onestate here;          /* note, macros know this name */
940
        sopno look;
941
        int i;
942
 
943
        for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
944
                s = g->strip[pc];
945
                switch (OP(s)) {
946
                case OEND:
947
                        assert(pc == stop-1);
948
                        break;
949
                case OCHAR:
950
                        /* only characters can match */
951
                        assert(!NONCHAR(ch) || ch != (char)OPND(s));
952
                        if (ch == (char)OPND(s))
953
                                FWD(aft, bef, 1);
954
                        break;
955
                case OBOL:
956
                        if (ch == BOL || ch == BOLEOL)
957
                                FWD(aft, bef, 1);
958
                        break;
959
                case OEOL:
960
                        if (ch == EOL || ch == BOLEOL)
961
                                FWD(aft, bef, 1);
962
                        break;
963
                case OBOW:
964
                        if (ch == BOW)
965
                                FWD(aft, bef, 1);
966
                        break;
967
                case OEOW:
968
                        if (ch == EOW)
969
                                FWD(aft, bef, 1);
970
                        break;
971
                case OANY:
972
                        if (!NONCHAR(ch))
973
                                FWD(aft, bef, 1);
974
                        break;
975
                case OANYOF:
976
                        cs = &g->sets[OPND(s)];
977
                        if (!NONCHAR(ch) && CHIN(cs, ch))
978
                                FWD(aft, bef, 1);
979
                        break;
980
                case OBACK_:            /* ignored here */
981
                case O_BACK:
982
                        FWD(aft, aft, 1);
983
                        break;
984
                case OPLUS_:            /* forward, this is just an empty */
985
                        FWD(aft, aft, 1);
986
                        break;
987
                case O_PLUS:            /* both forward and back */
988
                        FWD(aft, aft, 1);
989
                        i = ISSETBACK(aft, OPND(s));
990
                        BACK(aft, aft, OPND(s));
991
                        if (!i && ISSETBACK(aft, OPND(s))) {
992
                                /* oho, must reconsider loop body */
993
                                pc -= OPND(s) + 1;
994
                                INIT(here, pc);
995
                        }
996
                        break;
997
                case OQUEST_:           /* two branches, both forward */
998
                        FWD(aft, aft, 1);
999
                        FWD(aft, aft, OPND(s));
1000
                        break;
1001
                case O_QUEST:           /* just an empty */
1002
                        FWD(aft, aft, 1);
1003
                        break;
1004
                case OLPAREN:           /* not significant here */
1005
                case ORPAREN:
1006
                        FWD(aft, aft, 1);
1007
                        break;
1008
                case OCH_:              /* mark the first two branches */
1009
                        FWD(aft, aft, 1);
1010
                        assert(OP(g->strip[pc+OPND(s)]) == OOR2);
1011
                        FWD(aft, aft, OPND(s));
1012
                        break;
1013
                case OOR1:              /* done a branch, find the O_CH */
1014
                        if (ISSTATEIN(aft, here)) {
1015
                                for (look = 1;
1016
                                                OP(s = g->strip[pc+look]) != O_CH;
1017
                                                look += OPND(s))
1018
                                        assert(OP(s) == OOR2);
1019
                                FWD(aft, aft, look);
1020
                        }
1021
                        break;
1022
                case OOR2:              /* propagate OCH_'s marking */
1023
                        FWD(aft, aft, 1);
1024
                        if (OP(g->strip[pc+OPND(s)]) != O_CH) {
1025
                                assert(OP(g->strip[pc+OPND(s)]) == OOR2);
1026
                                FWD(aft, aft, OPND(s));
1027
                        }
1028
                        break;
1029
                case O_CH:              /* just empty */
1030
                        FWD(aft, aft, 1);
1031
                        break;
1032
                default:                /* ooooops... */
1033
                        assert(nope);
1034
                        break;
1035
                }
1036
        }
1037
 
1038
        return(aft);
1039
}
1040
 
1041
#ifdef REDEBUG
1042
/*
1043
 - print - print a set of states
1044
 == #ifdef REDEBUG
1045
 == static void print(struct match *m, char *caption, states st, \
1046
 ==     int ch, FILE *d);
1047
 == #endif
1048
 */
1049
static void
1050
print(m, caption, st, ch, d)
1051
struct match *m;
1052
char *caption;
1053
states st;
1054
int ch;
1055
FILE *d;
1056
{
1057
        struct re_guts *g = m->g;
1058
        int i;
1059
        int first = 1;
1060
 
1061
        if (!(m->eflags&REG_TRACE))
1062
                return;
1063
 
1064
        fprintf(d, "%s", caption);
1065
        if (ch != '\0')
1066
                fprintf(d, " %s", pchar(ch));
1067
        for (i = 0; i < g->nstates; i++)
1068
                if (ISSET(st, i)) {
1069
                        fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
1070
                        first = 0;
1071
                }
1072
        fprintf(d, "\n");
1073
}
1074
 
1075
/*
1076
 - at - print current situation
1077
 == #ifdef REDEBUG
1078
 == static void at(struct match *m, char *title, char *start, char *stop, \
1079
 ==                                             sopno startst, sopno stopst);
1080
 == #endif
1081
 */
1082
static void
1083
at(m, title, start, stop, startst, stopst)
1084
struct match *m;
1085
char *title;
1086
char *start;
1087
char *stop;
1088
sopno startst;
1089
sopno stopst;
1090
{
1091
        if (!(m->eflags&REG_TRACE))
1092
                return;
1093
 
1094
        printf("%s %s-", title, pchar(*start));
1095
        printf("%s ", pchar(*stop));
1096
        printf("%ld-%ld\n", (long)startst, (long)stopst);
1097
}
1098
 
1099
#ifndef PCHARDONE
1100
#define PCHARDONE       /* never again */
1101
/*
1102
 - pchar - make a character printable
1103
 == #ifdef REDEBUG
1104
 == static char *pchar(int ch);
1105
 == #endif
1106
 *
1107
 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1108
 * duplicate here avoids having a debugging-capable regexec.o tied to
1109
 * a matching debug.o, and this is convenient.  It all disappears in
1110
 * the non-debug compilation anyway, so it doesn't matter much.
1111
 */
1112
static char *                   /* -> representation */
1113
pchar(ch)
1114
int ch;
1115
{
1116
        static char pbuf[10];
1117
 
1118
        if (isprint((uch)ch) || ch == ' ')
1119
                sprintf(pbuf, "%c", ch);
1120
        else
1121
                sprintf(pbuf, "\\%o", ch);
1122
        return(pbuf);
1123
}
1124
#endif
1125
#endif
1126
 
1127
#undef  matcher
1128
#undef  fast
1129
#undef  slow
1130
#undef  dissect
1131
#undef  backref
1132
#undef  step
1133
#undef  print
1134
#undef  at
1135
#undef  match

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.