OpenCores
URL https://opencores.org/ocsvn/copyblaze/copyblaze/trunk

Subversion Repositories copyblaze

[/] [copyblaze/] [trunk/] [copyblaze/] [sw/] [tools/] [asm/] [pBlazASM/] [pBlazASM/] [pbLexer.c] - Blame information for rev 46

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 ameziti
/*
2
 *  Copyright © 2003..2010 : Henk van Kampen <henk@mediatronix.com>
3
 *
4
 *      This file is part of pBlazASM.
5
 *
6
 *  pBlazASM is free software: you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation, either version 3 of the License, or
9
 *  (at your option) any later version.
10
 *
11
 *  pBlazASM is distributed in the hope that it will be useful,
12
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 *  GNU General Public License for more details.
15
 *
16
 *  You should have received a copy of the GNU General Public License
17
 *  along with pBlazASM.  If not, see <http://www.gnu.org/licenses/>.
18
 */
19
 
20
#include <ctype.h>
21
#include <string.h>
22
#include <stdint.h>
23
#include <stdlib.h>
24
 
25
#include "pbTypes.h"
26
#include "pbErrors.h"
27
 
28
// lexer states
29
typedef enum {
30
        lsBin,
31
        lsChar,
32
        lsComment,
33
        lsDec,
34
        lsCopy,
35
        lsError,
36
        lsHex,
37
        lsHexBin,
38
        lsIdent,
39
        lsIdle,
40
        lsInit,
41
        lsOperator,
42
        lsDoubleOp,
43
        lsPunct,
44
        lsIndex,
45
        lsString
46
} LexState ;
47
 
48
// global token list
49
static symbol_t tokens[ 256 ] ; // global token list
50
static symbol_t * ptok = 0 ; // pointer to current token, index in 'tokens[]'
51
 
52
symbol_t * tok_first( void ) {
53
        ptok = tokens ;
54
        return ptok ;
55
}
56
 
57
symbol_t * tok_current( void ) {
58
        return ptok ;
59
}
60
 
61
symbol_t * tok_next( void ) {
62
        if ( ptok < &tokens[ 256 ] )
63
                return ptok++ ;
64
        else {
65
                ptok->type = tNONE ;
66
                return ptok ;
67
        }
68
}
69
 
70
void tok_back(symbol_t * back ){
71
        ptok = back ;
72
}
73
 
74
void tok_free( void ) {
75
        for ( ptok = tokens ; ptok->text != NULL ; ptok++ ) {
76
                free( ptok->text ) ;
77
 
78
                ptok->type = tNONE ;
79
                ptok->subtype = stNONE ;
80
                ptok->text = NULL ;
81
                ptok->value = 0 ;
82
        }
83
}
84
 
85
// state machine based lexer
86
// tokens are recorded in 'tokens', ended by a NONE token
87
bool lex( char * line, const bool mode ) {
88
        char * start = NULL, *end = NULL, *s = line ;
89
        char term[ 256 ], *pterm = NULL ;
90
        LexState state = lsInit ;
91
 
92
        // state machine
93
        for ( ptok = tokens ; ptok < &tokens[ 256 ] ; ) {
94
                switch ( state ) {
95
                case lsInit :
96
                        ptok->type = tNONE ;
97
                        ptok->subtype = stNONE ;
98
                        ptok->value = 0 ;
99
                        ptok->text = NULL ;
100
 
101
                        pterm = term ;
102
                        *pterm = '\0' ;
103
                        state = lsIdle ;
104
                        break ;
105
 
106
                case lsIdle :
107
                        // starting characters of tokens to be
108
                        if ( *s == '\0' || *s == '\r' || *s == '\n' ) {
109
                                // end of line
110
                                return true ;
111
                        } else if ( *s == ' ' || iscntrl( *s ) ) {
112
                                // white space, 'space' and all control characters, except \0, \r and \n
113
                                s++ ;
114 46 ameziti
                        } else if ( mode && ( isalnum( *s ) || *s == '_'  ) ) {
115 2 ameziti
                                // KCPSM mode, all alphanum is accepted for idents, could be hex values
116
                                // ident
117
                                start = s++ ;
118
                                state = lsIdent ;
119
                        } else if ( !mode && ( isalpha( *s ) || *s == '_' ) ) {
120
                                // ident
121
                                start = s++ ;
122
                                state = lsIdent ;
123
                        } else if ( *s == ';' ) {
124
                                // comment
125
                                start = s++ ;
126
                                state = lsComment ;
127
                        } else if ( *s == '0' ) {
128
                                // maybe hex or bin
129
                                start = s++ ;
130
                                state = lsHexBin ;
131
                        } else if ( isdigit( *s ) ) {
132
                                // decimal number
133
                                start = s++ ;
134
                                state = lsDec ;
135
                        } else if ( *s == '$' ) {
136
                                // hexadecimal number
137
                                start = ++s ;
138
                                state = lsHex ;
139
                        } else if ( *s == '%' ) {
140
                                // binary number
141
                                start = ++s ;
142
                                state = lsBin ;
143
                        } else if ( *s == '.' ) {
144
                                // directives, indexing, local labels, etc
145
                                start = s++ ;
146
                                state = lsIndex ;
147
                        } else if ( *s == ':' || *s == ',' || *s == '(' || *s == ')' ) {
148
                                // punctuation ',', ':', '(', ')', '~'
149
                                start = s++ ;
150
                                state = lsPunct ;
151
                        } else if ( *s == '*' || *s == '/' || *s == '#' || *s == '+' || *s == '-' ||
152
                                        *s == '|' || *s == '&' || *s == '^' || *s == '~' ) {
153
                                // operators
154
                                start = s++ ;
155
                                state = lsOperator ;
156
                        } else if ( *s == '<' || *s == '>' ) {
157
                                // double char operators
158
                                start = s++ ;
159
                                state = lsDoubleOp ;
160
                        } else if ( *s == '\'' ) {
161
                                // 'c'
162
                                start = ++s ;
163
                                state = lsChar ;
164
                        } else if ( *s == '"' ) {
165
                                // "string"
166
                                start = ++s ;
167
                                state = lsString ;
168
                        } else
169
                                state = lsError ;
170
                        break ;
171
 
172
                case lsComment :
173
                        if ( *s != '\0' && *s != '\r' && *s != '\n' )
174
                                // anything till end of line
175
                                s++ ;
176
                        else {
177
                                end = s ;
178
                                ptok->type = tNONE ;
179
                                ptok->subtype = stCOMMENT ;
180
                                state = lsCopy ;
181
                        }
182
                        break ;
183
 
184
                case lsChar :
185
                        if ( *s == '\'' ) {
186
                                ptok->type = tCHAR ;
187
                                end = s++ ;
188
                                state = lsCopy ;
189
                        } else if ( *s == '\\' ) {
190
                                s += 1 ;
191
                                if ( *s != '\0' )
192
                                        s += 1 ;
193
                        } else if ( isgraph( *s ) || *s == ' ' ) {
194
                                s++ ;
195
                        } else
196
                                state = lsError ;
197
                        break ;
198
 
199
                case lsString :
200
                        if ( *s == '"' ) {
201
                                ptok->type = tSTRING ;
202
                                end = s++ ;
203
                                state = lsCopy ;
204
                        } else if ( *s == '\\' ) {
205
                                s += 1 ;
206
                                if ( *s != '\0' )
207
                                        s += 1 ;
208
                        } else if ( isgraph( *s ) || *s == ' ' )
209
                                s++ ;
210
                        else
211
                                state = lsError ;
212
                        break ;
213
 
214
                case lsIdent :
215
                        if ( isalnum( *s ) || *s == '_' )
216
                                s++ ;
217
                        else {
218
                                end = s ;
219
                                ptok->type = tIDENT ;
220
                                ptok->subtype = stNONE ;
221
                                state = lsCopy ;
222
                        }
223
                        break ;
224
 
225
                case lsHexBin :
226
                        if ( *s == 'x' ) {
227
                                start = ++s ;
228
                                state = lsHex ;
229
                        } else if ( *s == 'b' ) {
230
                                start = ++s ;
231
                                state = lsBin ;
232
                        } else
233
                                // missing the first '0' doesn't hurt here
234
                                state = lsDec ;
235
                        break ;
236
 
237
                case lsHex :
238
                        if ( isxdigit( *s ) )
239
                                s++ ;
240
                        else {
241
                                end = s ;
242
                                ptok->type = tHEX ;
243
                                state = lsCopy ;
244
                        }
245
                        break ;
246
 
247
                case lsBin :
248
                        if ( *s == '0' || *s == '1' )
249
                                s++ ;
250
                        else {
251
                                end = s ;
252
                                ptok->type = tBIN ;
253
                                state = lsCopy ;
254
                        }
255
                        break ;
256
 
257
                case lsDec :
258
                        if ( isdigit( *s ) )
259
                                s++ ;
260
                        else {
261
                                end = s ;
262
                                ptok->type = tDEC ;
263
                                state = lsCopy ;
264
                        }
265
                        break ;
266
 
267
                case lsOperator :
268
                        ptok->type = tOPERATOR ;
269
                        switch ( *start ) {
270
                        case '*' :
271
                                ptok->subtype = stMUL ;
272
                                break ;
273
                        case '/' :
274
                                ptok->subtype = stDIV ;
275
                                break ;
276
                        case '#' :
277
                                ptok->subtype = stMOD ;
278
                                break ;
279
                        case '+' :
280
                                ptok->subtype = stADD ;
281
                                break ;
282
                        case '-' :
283
                                ptok->subtype = stSUB ;
284
                                break ;
285
                        case '|' :
286
                                ptok->subtype = stIOR ;
287
                                break ;
288
                        case '&' :
289
                                ptok->subtype = stAND ;
290
                                break ;
291
                        case '^' :
292
                                ptok->subtype = stXOR ;
293
                                break ;
294
                        case '~' :
295
                                ptok->subtype = stTILDA ;
296
                                break ;
297
                        }
298
                        end = s ;
299
                        state = lsCopy ;
300
                        break ;
301
 
302
                case lsDoubleOp :
303
                        if ( *start == *s ) { // << or >>
304
                                ptok->type = tOPERATOR ;
305
                                switch ( *start ) {
306
                                case '<' :
307
                                        ptok->subtype = stSHL ;
308
                                        break ;
309
                                case '>' :
310
                                        ptok->subtype = stSHR ;
311
                                        break ;
312
                                }
313
                                end = ++s ;
314
                                state = lsCopy ;
315
                        } else
316
                                state = lsError ;
317
                        break ;
318
 
319
                case lsPunct :
320
                        end = s ;
321
                        state = lsCopy ;
322
                        switch ( *start ) {
323
                        case ':' :
324
                                ptok->type = tCOLON ;
325
                                break ;
326
                        case '(' :
327
                                ptok->type = tLPAREN ;
328
                                break ;
329
                        case ')' :
330
                                ptok->type = tRPAREN ;
331
                                break ;
332
                        case ',' :
333
                                ptok->type = tCOMMA ;
334
                                break ;
335
                        default :
336
                                state = lsError ;
337
                        }
338
                        break ;
339
 
340
                case lsIndex :
341
                        // any of .IX, .IX++, .--IX, .-IX+
342
                        if ( isalnum( *s ) || *s == '-' || *s == '+' )
343
                                s++ ;
344
                        else {
345
                                end = s ;
346
                                ptok->type = tIDENT ;
347
                                ptok->subtype = stDOT ;
348
                                state = lsCopy ;
349
                        }
350
                        break ;
351
 
352
                        // final token collector
353
                case lsCopy :
354
                        while ( start < end )
355
                                *pterm++ = *start++ ;
356
                        *pterm = '\0' ;
357
                        ptok->text = strdup( term ) ;
358
                        ptok++ ;
359
                        state = lsInit ;
360
                        break ;
361
 
362
                        // any errors
363
                case lsError :
364
                        *pterm = '\0' ;
365
                        ptok->type = tERROR ;
366
                        return false ;
367
                }
368
        }
369
        return false ;
370
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.