OpenCores
URL https://opencores.org/ocsvn/eco32/eco32/trunk

Subversion Repositories eco32

[/] [eco32/] [trunk/] [lcc/] [cpp/] [lex.c] - Blame information for rev 130

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 4 hellwig
#include <stdio.h>
2
#include <stdlib.h>
3
#include <string.h>
4
#include "cpp.h"
5
 
6
/*
7
 * lexical FSM encoding
8
 *   when in state state, and one of the characters
9
 *   in ch arrives, enter nextstate.
10
 *   States >= S_SELF are either final, or at least require special action.
11
 *   In 'fsm' there is a line for each state X charset X nextstate.
12
 *   List chars that overwrite previous entries later (e.g. C_ALPH
13
 *   can be overridden by '_' by a later entry; and C_XX is the
14
 *   the universal set, and should always be first.
15
 *   States above S_SELF are represented in the big table as negative values.
16
 *   S_SELF and S_SELFB encode the resulting token type in the upper bits.
17
 *   These actions differ in that S_SELF doesn't have a lookahead char,
18
 *   S_SELFB does.
19
 *
20
 *   The encoding is blown out into a big table for time-efficiency.
21
 *   Entries have
22
 *      nextstate: 6 bits; ?\ marker: 1 bit; tokentype: 9 bits.
23
 */
24
 
25
#define MAXSTATE 32
26
#define ACT(tok,act)    ((tok<<7)+act)
27
#define QBSBIT  0100
28
#define GETACT(st)      (st>>7)&0x1ff
29
 
30
/* character classes */
31
#define C_WS    1
32
#define C_ALPH  2
33
#define C_NUM   3
34
#define C_EOF   4
35
#define C_XX    5
36
 
37
enum state {
38
        START=0, NUM1, NUM2, NUM3, ID1, ST1, ST2, ST3, COM1, COM2, COM3, COM4,
39
        CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1,
40
        CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1,
41
        S_SELF=MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR,
42
        S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME
43
};
44
 
45
int     tottok;
46
int     tokkind[256];
47
struct  fsm {
48
        int     state;          /* if in this state */
49
        uchar   ch[4];          /* and see one of these characters */
50
        int     nextstate;      /* enter this state if +ve */
51
};
52
 
53
/*const*/ struct fsm fsm[] = {
54
        /* start state */
55
        START,  { C_XX },       ACT(UNCLASS,S_SELF),
56
        START,  { ' ', '\t', '\v' },    WS1,
57
        START,  { C_NUM },      NUM1,
58
        START,  { '.' },        NUM3,
59
        START,  { C_ALPH },     ID1,
60
        START,  { 'L' },        ST1,
61
        START,  { '"' },        ST2,
62
        START,  { '\'' },       CC1,
63
        START,  { '/' },        COM1,
64
        START,  { EOFC },       S_EOF,
65
        START,  { '\n' },       S_NL,
66
        START,  { '-' },        MINUS1,
67
        START,  { '+' },        PLUS1,
68
        START,  { '<' },        LT1,
69
        START,  { '>' },        GT1,
70
        START,  { '=' },        ASG1,
71
        START,  { '!' },        NOT1,
72
        START,  { '&' },        AND1,
73
        START,  { '|' },        OR1,
74
        START,  { '#' },        SHARP1,
75
        START,  { '%' },        PCT1,
76
        START,  { '[' },        ACT(SBRA,S_SELF),
77
        START,  { ']' },        ACT(SKET,S_SELF),
78
        START,  { '(' },        ACT(LP,S_SELF),
79
        START,  { ')' },        ACT(RP,S_SELF),
80
        START,  { '*' },        STAR1,
81
        START,  { ',' },        ACT(COMMA,S_SELF),
82
        START,  { '?' },        ACT(QUEST,S_SELF),
83
        START,  { ':' },        ACT(COLON,S_SELF),
84
        START,  { ';' },        ACT(SEMIC,S_SELF),
85
        START,  { '{' },        ACT(CBRA,S_SELF),
86
        START,  { '}' },        ACT(CKET,S_SELF),
87
        START,  { '~' },        ACT(TILDE,S_SELF),
88
        START,  { '^' },        CIRC1,
89
 
90
        /* saw a digit */
91
        NUM1,   { C_XX },       ACT(NUMBER,S_SELFB),
92
        NUM1,   { C_NUM, C_ALPH, '.' }, NUM1,
93
        NUM1,   { 'E', 'e' },   NUM2,
94
        NUM1,   { '_' },        ACT(NUMBER,S_SELFB),
95
 
96
        /* saw possible start of exponent, digits-e */
97
        NUM2,   { C_XX },       ACT(NUMBER,S_SELFB),
98
        NUM2,   { '+', '-' },   NUM1,
99
        NUM2,   { C_NUM, C_ALPH },      NUM1,
100
        NUM2,   { '_' },        ACT(NUMBER,S_SELFB),
101
 
102
        /* saw a '.', which could be a number or an operator */
103
        NUM3,   { C_XX },       ACT(DOT,S_SELFB),
104
        NUM3,   { '.' },        DOTS1,
105
        NUM3,   { C_NUM },      NUM1,
106
 
107
        DOTS1,  { C_XX },       ACT(UNCLASS, S_SELFB),
108
        DOTS1,  { C_NUM },      NUM1,
109
        DOTS1,  { '.' },        ACT(ELLIPS, S_SELF),
110
 
111
        /* saw a letter or _ */
112
        ID1,    { C_XX },       ACT(NAME,S_NAME),
113
        ID1,    { C_ALPH, C_NUM },      ID1,
114
 
115
        /* saw L (start of wide string?) */
116
        ST1,    { C_XX },       ACT(NAME,S_NAME),
117
        ST1,    { C_ALPH, C_NUM },      ID1,
118
        ST1,    { '"' },        ST2,
119
        ST1,    { '\'' },       CC1,
120
 
121
        /* saw " beginning string */
122
        ST2,    { C_XX },       ST2,
123
        ST2,    { '"' },        ACT(STRING, S_SELF),
124
        ST2,    { '\\' },       ST3,
125
        ST2,    { '\n' },       S_STNL,
126
        ST2,    { EOFC },       S_EOFSTR,
127
 
128
        /* saw \ in string */
129
        ST3,    { C_XX },       ST2,
130
        ST3,    { '\n' },       S_STNL,
131
        ST3,    { EOFC },       S_EOFSTR,
132
 
133
        /* saw ' beginning character const */
134
        CC1,    { C_XX },       CC1,
135
        CC1,    { '\'' },       ACT(CCON, S_SELF),
136
        CC1,    { '\\' },       CC2,
137
        CC1,    { '\n' },       S_STNL,
138
        CC1,    { EOFC },       S_EOFSTR,
139
 
140
        /* saw \ in ccon */
141
        CC2,    { C_XX },       CC1,
142
        CC2,    { '\n' },       S_STNL,
143
        CC2,    { EOFC },       S_EOFSTR,
144
 
145
        /* saw /, perhaps start of comment */
146
        COM1,   { C_XX },       ACT(SLASH, S_SELFB),
147
        COM1,   { '=' },        ACT(ASSLASH, S_SELF),
148
        COM1,   { '*' },        COM2,
149
        COM1,   { '/' },        COM4,
150
 
151
        /* saw / then *, start of comment */
152
        COM2,   { C_XX },       COM2,
153
        COM2,   { '\n' },       S_COMNL,
154
        COM2,   { '*' },        COM3,
155
        COM2,   { EOFC },       S_EOFCOM,
156
 
157
        /* saw the * possibly ending a comment */
158
        COM3,   { C_XX },       COM2,
159
        COM3,   { '\n' },       S_COMNL,
160
        COM3,   { '*' },        COM3,
161
        COM3,   { '/' },        S_COMMENT,
162
 
163
        /* // comment */
164
        COM4,   { C_XX },       COM4,
165
        COM4,   { '\n' },       S_NL,
166
        COM4,   { EOFC },       S_EOFCOM,
167
 
168
        /* saw white space, eat it up */
169
        WS1,    { C_XX },       S_WS,
170
        WS1,    { ' ', '\t', '\v' },    WS1,
171
 
172
        /* saw -, check --, -=, -> */
173
        MINUS1, { C_XX },       ACT(MINUS, S_SELFB),
174
        MINUS1, { '-' },        ACT(MMINUS, S_SELF),
175
        MINUS1, { '=' },        ACT(ASMINUS,S_SELF),
176
        MINUS1, { '>' },        ACT(ARROW,S_SELF),
177
 
178
        /* saw +, check ++, += */
179
        PLUS1,  { C_XX },       ACT(PLUS, S_SELFB),
180
        PLUS1,  { '+' },        ACT(PPLUS, S_SELF),
181
        PLUS1,  { '=' },        ACT(ASPLUS, S_SELF),
182
 
183
        /* saw <, check <<, <<=, <= */
184
        LT1,    { C_XX },       ACT(LT, S_SELFB),
185
        LT1,    { '<' },        LT2,
186
        LT1,    { '=' },        ACT(LEQ, S_SELF),
187
        LT2,    { C_XX },       ACT(LSH, S_SELFB),
188
        LT2,    { '=' },        ACT(ASLSH, S_SELF),
189
 
190
        /* saw >, check >>, >>=, >= */
191
        GT1,    { C_XX },       ACT(GT, S_SELFB),
192
        GT1,    { '>' },        GT2,
193
        GT1,    { '=' },        ACT(GEQ, S_SELF),
194
        GT2,    { C_XX },       ACT(RSH, S_SELFB),
195
        GT2,    { '=' },        ACT(ASRSH, S_SELF),
196
 
197
        /* = */
198
        ASG1,   { C_XX },       ACT(ASGN, S_SELFB),
199
        ASG1,   { '=' },        ACT(EQ, S_SELF),
200
 
201
        /* ! */
202
        NOT1,   { C_XX },       ACT(NOT, S_SELFB),
203
        NOT1,   { '=' },        ACT(NEQ, S_SELF),
204
 
205
        /* & */
206
        AND1,   { C_XX },       ACT(AND, S_SELFB),
207
        AND1,   { '&' },        ACT(LAND, S_SELF),
208
        AND1,   { '=' },        ACT(ASAND, S_SELF),
209
 
210
        /* | */
211
        OR1,    { C_XX },       ACT(OR, S_SELFB),
212
        OR1,    { '|' },        ACT(LOR, S_SELF),
213
        OR1,    { '=' },        ACT(ASOR, S_SELF),
214
 
215
        /* # */
216
        SHARP1, { C_XX },       ACT(SHARP, S_SELFB),
217
        SHARP1, { '#' },        ACT(DSHARP, S_SELF),
218
 
219
        /* % */
220
        PCT1,   { C_XX },       ACT(PCT, S_SELFB),
221
        PCT1,   { '=' },        ACT(ASPCT, S_SELF),
222
 
223
        /* * */
224
        STAR1,  { C_XX },       ACT(STAR, S_SELFB),
225
        STAR1,  { '=' },        ACT(ASSTAR, S_SELF),
226
 
227
        /* ^ */
228
        CIRC1,  { C_XX },       ACT(CIRC, S_SELFB),
229
        CIRC1,  { '=' },        ACT(ASCIRC, S_SELF),
230
 
231
        -1
232
};
233
 
234
/* first index is char, second is state */
235
/* increase #states to power of 2 to encourage use of shift */
236
short   bigfsm[256][MAXSTATE];
237
 
238
void
239
expandlex(void)
240
{
241
        /*const*/ struct fsm *fp;
242
        int i, j, nstate;
243
 
244
        for (fp = fsm; fp->state>=0; fp++) {
245
                for (i=0; fp->ch[i]; i++) {
246
                        nstate = fp->nextstate;
247
                        if (nstate >= S_SELF)
248
                                nstate = ~nstate;
249
                        switch (fp->ch[i]) {
250
 
251
                        case C_XX:              /* random characters */
252
                                for (j=0; j<256; j++)
253
                                        bigfsm[j][fp->state] = nstate;
254
                                continue;
255
                        case C_ALPH:
256
                                for (j=0; j<=256; j++)
257
                                        if ('a'<=j&&j<='z' || 'A'<=j&&j<='Z'
258
                                          || j=='_')
259
                                                bigfsm[j][fp->state] = nstate;
260
                                continue;
261
                        case C_NUM:
262
                                for (j='0'; j<='9'; j++)
263
                                        bigfsm[j][fp->state] = nstate;
264
                                continue;
265
                        default:
266
                                bigfsm[fp->ch[i]][fp->state] = nstate;
267
                        }
268
                }
269
        }
270
        /* install special cases for ? (trigraphs),  \ (splicing), runes, and EOB */
271
        for (i=0; i<MAXSTATE; i++) {
272
                for (j=0; j<0xFF; j++)
273
                        if (j=='?' || j=='\\') {
274
                                if (bigfsm[j][i]>0)
275
                                        bigfsm[j][i] = ~bigfsm[j][i];
276
                                bigfsm[j][i] &= ~QBSBIT;
277
                        }
278
                bigfsm[EOB][i] = ~S_EOB;
279
                if (bigfsm[EOFC][i]>=0)
280
                        bigfsm[EOFC][i] = ~S_EOF;
281
        }
282
}
283
 
284
void
285
fixlex(void)
286
{
287
        /* do C++ comments? */
288
        if (Cplusplus==0)
289
                bigfsm['/'][COM1] = bigfsm['x'][COM1];
290
}
291
 
292
/*
293
 * fill in a row of tokens from input, terminated by NL or END
294
 * First token is put at trp->lp.
295
 * Reset is non-zero when the input buffer can be "rewound."
296
 * The value is a flag indicating that possible macros have
297
 * been seen in the row.
298
 */
299
int
300
gettokens(Tokenrow *trp, int reset)
301
{
302
        register int c, state, oldstate;
303
        register uchar *ip;
304
        register Token *tp, *maxp;
305
        int runelen;
306
        Source *s = cursource;
307
        int nmac = 0;
308
        extern char outbuf[];
309
 
310
        tp = trp->lp;
311
        ip = s->inp;
312
        if (reset) {
313
                s->lineinc = 0;
314
                if (ip>=s->inl) {               /* nothing in buffer */
315
                        s->inl = s->inb;
316
                        fillbuf(s);
317
                        ip = s->inp = s->inb;
318
                } else if (ip >= s->inb+(3*INS/4)) {
319
                        memmove(s->inb, ip, 4+s->inl-ip);
320
                        s->inl = s->inb+(s->inl-ip);
321
                        ip = s->inp = s->inb;
322
                }
323
        }
324
        maxp = &trp->bp[trp->max];
325
        runelen = 1;
326
        for (;;) {
327
           continue2:
328
                if (tp>=maxp) {
329
                        trp->lp = tp;
330
                        tp = growtokenrow(trp);
331
                        maxp = &trp->bp[trp->max];
332
                }
333
                tp->type = UNCLASS;
334
                tp->hideset = 0;
335
                tp->t = ip;
336
                tp->wslen = 0;
337
                tp->flag = 0;
338
                state = START;
339
                for (;;) {
340
                        oldstate = state;
341
                        c = *ip;
342
                        if ((state = bigfsm[c][state]) >= 0) {
343
                                ip += runelen;
344
                                runelen = 1;
345
                                continue;
346
                        }
347
                        state = ~state;
348
                reswitch:
349
                        switch (state&0177) {
350
                        case S_SELF:
351
                                ip += runelen;
352
                                runelen = 1;
353
                        case S_SELFB:
354
                                tp->type = GETACT(state);
355
                                tp->len = ip - tp->t;
356
                                tp++;
357
                                goto continue2;
358
 
359
                        case S_NAME:    /* like S_SELFB but with nmac check */
360
                                tp->type = NAME;
361
                                tp->len = ip - tp->t;
362
                                nmac |= quicklook(tp->t[0], tp->len>1?tp->t[1]:0);
363
                                tp++;
364
                                goto continue2;
365
 
366
                        case S_WS:
367
                                tp->wslen = ip - tp->t;
368
                                tp->t = ip;
369
                                state = START;
370
                                continue;
371
 
372
                        default:
373
                                if ((state&QBSBIT)==0) {
374
                                        ip += runelen;
375
                                        runelen = 1;
376
                                        continue;
377
                                }
378
                                state &= ~QBSBIT;
379
                                s->inp = ip;
380
                                if (c=='?') {   /* check trigraph */
381
                                        if (trigraph(s)) {
382
                                                state = oldstate;
383
                                                continue;
384
                                        }
385
                                        goto reswitch;
386
                                }
387
                                if (c=='\\') { /* line-folding */
388
                                        if (foldline(s)) {
389
                                                s->lineinc++;
390
                                                state = oldstate;
391
                                                continue;
392
                                        }
393
                                        goto reswitch;
394
                                }
395
                                error(WARNING, "Lexical botch in cpp");
396
                                ip += runelen;
397
                                runelen = 1;
398
                                continue;
399
 
400
                        case S_EOB:
401
                                s->inp = ip;
402
                                fillbuf(cursource);
403
                                state = oldstate;
404
                                continue;
405
 
406
                        case S_EOF:
407
                                tp->type = END;
408
                                tp->len = 0;
409
                                s->inp = ip;
410
                                if (tp!=trp->bp && (tp-1)->type!=NL && cursource->fd!=NULL)
411
                                        error(WARNING,"No newline at end of file");
412
                                trp->lp = tp+1;
413
                                return nmac;
414
 
415
                        case S_STNL:
416
                                error(ERROR, "Unterminated string or char const");
417
                        case S_NL:
418
                                tp->t = ip;
419
                                tp->type = NL;
420
                                tp->len = 1;
421
                                tp->wslen = 0;
422
                                s->lineinc++;
423
                                s->inp = ip+1;
424
                                trp->lp = tp+1;
425
                                return nmac;
426
 
427
                        case S_EOFSTR:
428
                                error(FATAL, "EOF in string or char constant");
429
                                break;
430
 
431
                        case S_COMNL:
432
                                s->lineinc++;
433
                                state = COM2;
434
                                ip += runelen;
435
                                runelen = 1;
436
                                if (ip >= s->inb+(7*INS/8)) { /* very long comment */
437
                                        memmove(tp->t, ip, 4+s->inl-ip);
438
                                        s->inl -= ip-tp->t;
439
                                        ip = tp->t+1;
440
                                }
441
                                continue;
442
 
443
                        case S_EOFCOM:
444
                                error(WARNING, "EOF inside comment");
445
                                --ip;
446
                        case S_COMMENT:
447
                                ++ip;
448
                                tp->t = ip;
449
                                tp->t[-1] = ' ';
450
                                tp->wslen = 1;
451
                                state = START;
452
                                continue;
453
                        }
454
                        break;
455
                }
456
                ip += runelen;
457
                runelen = 1;
458
                tp->len = ip - tp->t;
459
                tp++;
460
        }
461
}
462
 
463
/* have seen ?; handle the trigraph it starts (if any) else 0 */
464
int
465
trigraph(Source *s)
466
{
467
        int c;
468
 
469
        while (s->inp+2 >= s->inl && fillbuf(s)!=EOF)
470
                ;
471
        if (s->inp[1]!='?')
472
                return 0;
473
        c = 0;
474
        switch(s->inp[2]) {
475
        case '=':
476
                c = '#'; break;
477
        case '(':
478
                c = '['; break;
479
        case '/':
480
                c = '\\'; break;
481
        case ')':
482
                c = ']'; break;
483
        case '\'':
484
                c = '^'; break;
485
        case '<':
486
                c = '{'; break;
487
        case '!':
488
                c = '|'; break;
489
        case '>':
490
                c = '}'; break;
491
        case '-':
492
                c = '~'; break;
493
        }
494
        if (c) {
495
                *s->inp = c;
496
                memmove(s->inp+1, s->inp+3, s->inl-s->inp+2);
497
                s->inl -= 2;
498
        }
499
        return c;
500
}
501
 
502
int
503
foldline(Source *s)
504
{
505
        while (s->inp+1 >= s->inl && fillbuf(s)!=EOF)
506
                ;
507
        if (s->inp[1] == '\n') {
508
                memmove(s->inp, s->inp+2, s->inl-s->inp+3);
509
                s->inl -= 2;
510
                return 1;
511
        }
512
        return 0;
513
}
514
 
515
int
516
fillbuf(Source *s)
517
{
518
        int n, nr;
519
 
520
        nr = INS/8;
521
        if ((char *)s->inl+nr > (char *)s->inb+INS)
522
                error(FATAL, "Input buffer overflow");
523
        if (s->fd==NULL || (n=fread((char *)s->inl, 1, INS/8, s->fd)) <= 0)
524
                n = 0;
525
        if ((*s->inp&0xff) == EOB) /* sentinel character appears in input */
526
                *s->inp = EOFC;
527
        s->inl += n;
528
        s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOB;
529
        if (n==0) {
530
                s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOFC;
531
                return EOF;
532
        }
533
        return 0;
534
}
535
 
536
/*
537
 * Push down to new source of characters.
538
 * If fd!=NULL and str==NULL, then from a file `name';
539
 * if fd==NULL and str, then from the string.
540
 */
541
Source *
542
setsource(char *name, FILE *fd, char *str)
543
{
544
        Source *s = new(Source);
545
        int len;
546
 
547
        s->line = 1;
548
        s->lineinc = 0;
549
        s->fd = fd;
550
        s->filename = name;
551
        s->next = cursource;
552
        s->ifdepth = 0;
553
        cursource = s;
554
        /* slop at right for EOB */
555
        if (str) {
556
                len = strlen(str);
557
                s->inb = domalloc(len+4);
558
                s->inp = s->inb;
559
                strncpy((char *)s->inp, str, len);
560
        } else {
561
                s->inb = domalloc(INS+4);
562
                s->inp = s->inb;
563
                len = 0;
564
        }
565
        s->inl = s->inp+len;
566
        s->inl[0] = s->inl[1] = EOB;
567
        return s;
568
}
569
 
570
void
571
unsetsource(void)
572
{
573
        Source *s = cursource;
574
 
575
        if (s->fd != NULL) {
576
                fclose(s->fd);
577
                dofree(s->inb);
578
        }
579
        cursource = s->next;
580
        dofree(s);
581
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.