OpenCores
URL https://opencores.org/ocsvn/forwardcom/forwardcom/trunk

Subversion Repositories forwardcom

[/] [forwardcom/] [bintools/] [assem1.cpp] - Blame information for rev 41

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 41 Agner
/****************************    assem1.cpp    ********************************
2
* Author:        Agner Fog
3
* Date created:  2017-04-17
4
* Last modified: 2021-07-10
5
* Version:       1.11
6
* Project:       Binary tools for ForwardCom instruction set
7
* Module:        assem.cpp
8
* Description:
9
* Module for assembling ForwardCom .as files. Contains:
10
* pass1(): Split input file into lines and tokens. Remove comments. Find symbol definitions
11
* pass2(): Handle meta code. Classify lines. Identify symbol names, sections, functions
12
*
13
* Copyright 2017-2021 GNU General Public License http://www.gnu.org/licenses
14
******************************************************************************/
15
#include "stdafx.h"
16
 
17
const char * allowedInNames = "_$@";   // characters allowed in symbol names (don't allow characters that are used as operators)
18
const bool allowUTF8 = true;           // UTF-8 characters allowed in symbol names
19
const bool allowNestedComments = true; // allow nested comments: /* /* */ */
20
 
21
                                       // Operator for sorting symbols by name. Used by assembler
22
// List of operators
23
SOperator operatorsList[] = {
24
    // name, id, priority
25
    {"(", '(',      1},
26
    {")", ')',      1},
27
    {"[", '[',      1},
28
    {"]", ']',      1},
29
    {"{", '{',      1},
30
    {"}", '}',      1},
31
    {"'", 39,       1},
32
    {"\"", '"',     1},           // "
33
    {"/*", 'c',     1},           // comment begin
34
    {"*/", 'd',     1},           // comment end
35
    {".", '.',      2},
36
    {"!", '!',      3},
37
    {"~", '~',      3},
38
    {"++", '+'+D2,  3},
39
    {"--", '-'+D2,  3},
40
    {"*", '*',      4},
41
    {"/", '/',      4},
42
    {"%", '%',      4},
43
    {"+", '+',      5},
44
    {"-", '-',      5},
45
    {"<<", '<'+D2,  6},
46
    {">>", '>'+D2,  6},           // signed shift right
47
    {">>>", '>'+D3, 6},           // unsigned shift right
48
    {"<", '<',      7},
49
    {"<=", '<'+EQ,  7},
50
    {">", '>',      7},
51
    {">=", '>'+EQ,  7},
52
    {"==", '='+D2,  8},
53
    {"!=", '!'+EQ,  8},
54
    {"&", '&',      9},
55
    {"^", '^',     10},
56
    {"|", '|',     11},
57
    {"&&", '&'+D2, 12},
58
    {"||", '|'+D2, 13},
59
    {"^^", '^'+D2, 13},           // boolean XOR. non-standard operator
60
    {"?", '?',     14},
61
    {":", ':',     14},
62
    {"=", '=',     15},
63
    {"+=", '+'+EQ, 15},
64
    {"-=", '-'+EQ, 15},
65
    {"*=", '*'+EQ, 15},
66
    {"/=", '/'+EQ, 15},
67
    {"%=", '%'+EQ, 15},
68
    {"<<=", '<'+D2+EQ,  15},
69
    {">>=", '>'+D2+EQ,  15},      // signed shift right
70
    {">>>=", '>'+D3+EQ, 15},      // unsigned shift right
71
    {"&=", '&'+EQ, 15},
72
    {"^=", '^'+EQ, 15},
73
    {"|=", '|'+EQ, 15},
74
    {",", ',',     16},
75
    {"//", '/'+D2, 20},           // comment, end of line
76
    {";", ';',     20}            // comment, end of line
77
};
78
 
79
 
80
// List of keywords
81
SKeyword keywordsList[] = {
82
    // name, id
83
    {"section",        DIR_SECTION},        // TOK_DIR: section, functions directives
84
    {"function",       DIR_FUNCTION},
85
    {"end",            DIR_END},
86
    {"public",         DIR_PUBLIC},
87
    {"extern",         DIR_EXTERN},
88
 
89
    // TOK_ATT: attributes of sections, functions and symbols
90
    {"read",           ATT_READ},           // readable section
91
    {"write",          ATT_WRITE},          // writeable section
92
    {"execute",        ATT_EXEC},           // executable section
93
    {"align",          ATT_ALIGN},          // align section, data, or code
94
    {"weak",           ATT_WEAK},           // weak linking
95
    {"reguse",         ATT_REGUSE},         // register use    
96
    {"constant",       ATT_CONSTANT},       // external constant
97
    {"uninitialized",  ATT_UNINIT},         // uninitialized section (BSS)
98
    {"communal",       ATT_COMDAT},         // communal section. duplicates and unreferenced sections are removed
99
    {"exception_hand", ATT_EXCEPTION},      // exception handler and stack unroll information
100
    {"event_hand",     ATT_EVENT},          // event handler list, including constructors and destructors
101
    {"debug_info",     ATT_DEBUG},          // debug information
102
    {"comment_info",   ATT_COMMENT},        // comments, including copyright and required libraries
103
 
104
    // TOK_TYP: type names
105
    {"int8",           TYP_INT8},
106
    {"uint8",          TYP_INT8+TYP_UNS},
107
    {"int16",          TYP_INT16},
108
    {"uint16",         TYP_INT16+TYP_UNS},
109
    {"int32",          TYP_INT32},
110
    {"uint32",         TYP_INT32+TYP_UNS},
111
    {"int64",          TYP_INT64},
112
    {"uint64",         TYP_INT64+TYP_UNS},
113
    {"int128",         TYP_INT128},
114
    {"uint128",        TYP_INT128+TYP_UNS},
115
    {"int",            TYP_INT32},
116
    {"uint",           TYP_INT32+TYP_UNS},
117
    {"float",          TYP_FLOAT32},
118
    {"double",         TYP_FLOAT64},
119
    {"float16",        TYP_FLOAT16},
120
    {"float32",        TYP_FLOAT32},
121
    {"float64",        TYP_FLOAT64},
122
    {"float128",       TYP_FLOAT128},
123
    {"string",         TYP_STRING},
124
 
125
    // TOK_OPT: options of instructions and operands
126
    {"mask",           OPT_MASK},
127
    {"fallback",       OPT_FALLBACK},
128
    {"length",         OPT_LENGTH},
129
    {"broadcast",      OPT_BROADCAST},
130
    {"limit",          OPT_LIMIT},
131
    {"scalar",         OPT_SCALAR},
132
    {"options",        OPT_OPTIONS},
133
    {"option",         OPT_OPTIONS},  // alias
134
 
135
    // TOK_REG: register names    
136
    {"numcontr",       REG_NUMCONTR},
137
    {"threadp",        REG_THREADP},
138
    {"datap",          REG_DATAP},
139
    {"ip",             REG_IP},
140
    {"sp",             REG_SP},
141
 
142
    // TOK_HLL: high level language keywords
143
    {"if",             HLL_IF},
144
    {"else",           HLL_ELSE},
145
    {"switch",         HLL_SWITCH},         // switch (r1, scratch registers) { case 0: break; ...}
146
    {"case",           HLL_CASE},
147
    {"for",            HLL_FOR},            // for (r1 = 1; r1 <= r2; r1++) {}
148
    {"in",             HLL_IN},             // for (float v1 in [r1-r2], nocheck) // (r2 counts down)
149
    {"while",          HLL_WHILE},          // while (r1 > 0) {}
150
    {"do",             HLL_DO},             // do {} while ()
151
    {"break",          HLL_BREAK},          // break out of switch or loop
152
    {"continue",       HLL_CONTINUE},       // continue loop 
153
    {"true",           HLL_TRUE},           // constant = 1
154
    {"false",          HLL_FALSE},          // constant = 0
155
 
156
    // temporary additions. will be replaced by macros later:
157
    {"push",           HLL_PUSH},           // push registers
158
    {"pop",            HLL_POP},            // pop registers
159
 
160
};
161
 
162
// List of register name prefixes
163
SKeyword registerNames[] = {
164
    // name, id
165
    {"r",            REG_R},
166
    {"v",            REG_V},
167
    {"spec",         REG_SPEC},
168
    {"capab",        REG_CAPAB},
169
    {"perf",         REG_PERF},
170
    {"sys",          REG_SYS}
171
};
172
 
173
 
174
CAssembler::CAssembler() {                                 // Constructor
175
    // Reserve size for buffers
176
    const int estimatedLineLength = 16;
177
    const int estimatedTokensPerLine = 10;
178
    int estimatedNumLines = dataSize() / estimatedLineLength;
179
    lines.setNum(estimatedNumLines);
180
    tokens.setNum(estimatedNumLines * estimatedTokensPerLine);
181
    errors.setOwner(this);
182
    // Initialize and sort lists
183
    initializeWordLists();
184
    ElfFwcShdr nullHeader;         // make first section header empty
185
    zeroAllMembers(nullHeader);
186
    sectionHeaders.push(nullHeader);
187
}
188
 
189
void CAssembler::go() {
190
 
191
    // Write feedback text to console
192
    feedBackText1();
193
 
194
    // Set default options
195
    if (cmd.codeSizeOption == 0) cmd.codeSizeOption = 1 << 24;
196
    if (cmd.dataSizeOption == 0) cmd.dataSizeOption = 1 << 15;
197
    // initialize options
198
    code_size = cmd.codeSizeOption;
199
    data_size = cmd.dataSizeOption;
200
 
201
    do {  // This loop is repeated only once. Just convenient to break out of in case of errors
202
        pass = 1;
203
        // Split input file into lines and tokens. Find symbol definitions
204
        pass1();
205
        if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS);  break;}
206
 
207
        pass = 2;
208
        // A. Handle metaprogramming directives
209
        // B. Classify lines
210
        // C. Identify symbol names, sections, labels, functions 
211
        pass2();
212
        if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS);  break;}
213
 
214
        //showTokens();  //!! for debugging only
215
        //showSymbols(); //!! for debugging only        
216
 
217
        pass = 3;
218
        // Interpret lines. Generate code and data
219
        pass3();
220
        if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS);  break;}
221
 
222
        pass = 4;
223
        // Resolve internal cross references, optimize forward references
224
        pass4();
225
        if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS);  break;}
226
 
227
        pass = 5;
228
        // Make binary file
229
        pass5();
230
        if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS);  break;}
231
 
232
    } while (false);
233
 
234
    // output any error messages
235
    errors.outputErrors();
236
    if (errors.numErrors()) cmd.mainReturnValue = 1; // make sure makefile process stops on error
237
 
238
    // output object file
239
    outFile.write(cmd.getFilename(cmd.outputFile));
240
}
241
 
242
 
243
// Character can be the start of a symbol name
244
inline bool nameChar1(char c) {
245
    return ((c | 0x20) >= 'a' && (c | 0x20) <= 'z') || ((c & 0x80) && allowUTF8) || strchr(allowedInNames, c);
246
}
247
 
248
// Character can be the part of a symbol name
249
inline bool nameChar2(char c) {
250
    return nameChar1(c) || (c >= '0' && c <= '9');
251
}
252
 
253
// check if string is a number. Can be decimal, binary, octal, hexadecimal, or floating point
254
// Returns the length of the part of the string that belongs to the number
255
uint32_t isNumber(const char * s, int maxlen, bool * isFloat) {
256
    bool is_float = false;
257
    char c = s[0];
258
    if ((c < '0' || c > '9') && (c != '.' || s[1] < '0' || s[1] > '9')) return 0;
259
    int i = 0;
260
    int state = 0;
261
    // 0: begin
262
    // 1: after 0
263
    // 2: after digits 0-9
264
    // 3: after 0x
265
    // 4: after 0b or 0o
266
    // 5: after .
267
    // 6: after E
268
    // 7: after E09
269
    // 8: after E+-
270
    for (i = 0; i < maxlen; i++) {
271
        c = s[i];
272
        char cl = c | 0x20;   // upper case letter
273
        if (c == '0' && state == 0) {state = 1; continue;}
274
        if (cl == 'x' && state == 1) {state = 3; continue;}
275
        if ((cl == 'b' || cl == 'o') && state == 1) {state = 4; continue;}
276
        if (c == '.' && state <= 2)  {state = 5; is_float = true; continue;}
277
        if (cl == 'e' && (state <= 2 || state == 5)) {state = 6; is_float = true; continue;}
278
        if ((c == '+' || c == '-') && state == 6) {state = 8; continue;}
279
        if (c >= '0' && c <= '9') {
280
            if (state < 2) state = 2;
281
            if (state == 6) state = 7;
282
            continue;
283
        }
284
        if (cl >= 'a' && cl <= 'f' && state == 3) continue;
285
        // Anything else: stop here
286
        break;
287
    }
288
    if (isFloat) *isFloat = is_float;       // return isFloat
289
    return i;                               // return length
290
}
291
 
292
// Check if string is a register name
293
uint32_t isRegister(const char * s, uint32_t len) {
294
    uint32_t i, j, nl, num;
295
    for (i = 0; i < TableSize(registerNames); i++) {
296
        if ((s[0] | 0x20) == registerNames[i].name[0]) {   // first character match, lower case
297
            nl = (uint32_t)strlen(registerNames[i].name);  // length of register name prefix
298
            if (len < nl + 1 || len > nl + 2) continue;    // continue search if length wrong
299
            for (j = 0; j < nl; j++) {                     // check if each character matches
300
                if ((s[j] | 0x20) != registerNames[i].name[j]) { // lower case compare
301
                    j = 0xFFFFFFFF; break;
302
                }
303
            }
304
            if (j == 0xFFFFFFFF) continue;                 // no match
305
            if (s[j] < '0' || s[j] > '9') continue;        // not a number
306
            num = s[j] - '0';                              // get number, first digit
307
            if (len == nl + 2) {                           // two digit number
308
                if (s[j+1] < '0' || s[j+1] > '9') continue;// second digit not a number            
309
                num = num * 10 + (s[j+1] - '0');
310
            }
311
            if (num >= 32) continue;                       // number too high
312
            return num + registerNames[i].id;              // everyting matches
313
        }
314
    }
315
    return 0;       // not found. return 0
316
}
317
 
318
// write feedback text on stdout
319
void CAssembler::feedBackText1() {
320
    if (cmd.verbose) {
321
        // Tell what we are doing:
322
        printf("\nAssembling %s to %s", cmd.getFilename(cmd.inputFile), cmd.getFilename(cmd.outputFile));
323
    }
324
}
325
 
326
 
327
// Split input file into lines and tokens. Handle preprocessing directives. Find symbol definitions
328
void CAssembler::pass1() {
329
    uint32_t n = 0;                // offset into assembly file
330
    uint32_t m;                    // end of current token
331
    int32_t  i, f;                 // temporary
332
    int32_t  comment = 0;          // 0: normal, 1: inside comment to end of line, 2: inside /* */ comment
333
    uint32_t commentStart = 0;     // start position of multiline comment
334
    uint32_t commentStartColumn = 0;// start column of multiline comment
335
    char c;                        // current character or byte
336
    SToken token = {0};            // current token
337
    SKeyword keywSearch;           // record to search for keyword
338
    SOperator opSearch;            // record to search for operator
339
    SInstruction instructSearch;   // record to search for instruction
340
    SLine line = {0,0,0,0,0,0,0};  // line record
341
    lines.push(line);              // empty records for line 0
342
    linei = 1;                     // start at line 1
343
    numSwitch = 0;              // count switch statements
344
    tokens.push(token);            // unused token 0
345
 
346
    if (dataSize() >= 3 && (get<uint32_t>(0) & 0xFFFFFF) == 0xBFBBEF) {
347
        n += 3;                    // skip UTF-8 byte order mark
348
    }
349
 
350
    line.beginPos = n;             // start of line 1
351
    line.firstToken = tokens.numEntries();
352
    line.file = filei;
353
 
354
    // loop through file
355
    while (n < dataSize()) {
356
        c = get<char>(n);              // get character
357
 
358
        // is it space or a control character?
359
        if (uint8_t(c) <= 0x20) {
360
            if (c == ' ' || c == '\t') {   // skip space and tab
361
                n++;
362
                continue;
363
            }
364
            if (c == '\r' || c == '\n') {  // newline
365
                n++;
366
                if (c == '\r' && get<char>(n) == '\n') n++;  // "\r\n" windows newline
367
                if (comment == 1) comment = 0;                  // end comment
368
                if (n <= dataSize()) {
369
                    // finish current line
370
                    line.numTokens = tokens.numEntries() - line.firstToken;
371
                    line.linenum = linei++;
372
                    if (line.numTokens) {  // save line if not empty                  
373
                        lines.push(line);
374
                    }
375
                    // start next line
376
                    line.type = 0;
377
                    line.file = filei;
378
                    line.beginPos = n;
379
                    line.firstToken = tokens.numEntries();
380
                }
381
                continue;
382
            }
383
            // illegal control character
384
            token.type = TOK_ERR;
385
            line.type = LINE_ERROR;
386
            comment = 1;              // ignore rest of line
387
            m = tokens.push(token);     // save error token
388
            errors.report(n, 1, ERR_CONTROL_CHAR);
389
        }
390
        // prepare token of any type
391
        token.pos  = n;
392
        token.stringLength = 1;
393
        token.id   = 0;
394
        //token.column = n - line.beginPos;
395
 
396
        // is it a name?
397
        if (!comment && nameChar1(c)) {
398
            // start of a name
399
            m = n+1;
400
            while (m < dataSize() && nameChar2(get<char>(m))) m++;
401
            // name goes from position n to m-1. make token
402
            token.type = TOK_NAM;
403
            token.pos = n;
404
            token.stringLength = m - n;
405
 
406
            // is it a register name
407
            f = isRegister((char*)buf()+n, token.stringLength);
408
            if (f) {
409
                token.type = TOK_REG;
410
                token.id = f;
411
            }
412
            // is it a keyword?
413
            if (token.type == TOK_NAM && m-n < sizeof(keywSearch.name)) {
414
                memcpy(keywSearch.name, buf()+n, m-n);
415
                keywSearch.name[m-n] = 0;
416
                f = keywords.findFirst(keywSearch);
417
                if (f >= 0) {  // keyword found
418
                    token.id = keywords[f].id;
419
                    token.type = keywords[f].id >> 24;
420
                    if (token.id == HLL_SWITCH) numSwitch++;
421
                }
422
            }
423
            // is it an instruction?
424
            if (token.type == TOK_NAM && m-n < sizeof(instructSearch.name)) {
425
                memcpy(instructSearch.name, buf()+n, m-n);
426
                instructSearch.name[m-n] = 0;
427
                f = instructionlistNm.findFirst(instructSearch);
428
                if (f >= 0) {  // instruction name found
429
                    token.type = TOK_INS;
430
                    token.id = instructionlistNm[f].id;
431
                }
432
            }
433
            n = m;
434
            tokens.push(token);     // save token
435
            continue;
436
        }
437
 
438
        // Is it a number?
439
        if (!comment) {
440
            bool isFloat;
441
            f = isNumber((char*)buf() + n, dataSize() - n, &isFloat);
442
            if (f) {
443
                token.type = TOK_NUM + isFloat;
444
                token.id = n;               // save number as string. The value is extracted later
445
                token.stringLength = f;
446
                n += f;
447
                tokens.push(token);     // save token
448
                continue;
449
            }
450
        }
451
 
452
        // is it an operator?
453
        opSearch.name[0] = c;
454
        opSearch.name[1] = 0;
455
        f = operators.findFirst(opSearch);
456
        if (f >= 0) {
457
            // found single-character operator
458
            // make a greedy search for multi-character operators
459
            i = f;
460
            for (i = f+1; (uint32_t)i < operators.numEntries(); i++) {
461
                if (operators[i].name[0] != c) break;
462
                if (memcmp((char*)buf()+n, operators[i].name, strlen(operators[i].name)) == 0) f = i;
463
            }
464
            token.type = TOK_OPR;
465
            token.id = operators[f].id;
466
            token.priority = operators[f].priority;
467
            token.stringLength = (uint32_t)strlen(operators[f].name);
468
 
469
            // search for operators that need consideration here
470
            switch (token.id) {
471
 
472
            case 39: case '"':  // quoted string in single or double quotes
473
                if (comment) break;
474
                // search for end of string
475
                token.type = token.id == 39 ? TOK_CHA : TOK_STR;
476
                token.pos = n + 1;
477
                m = n;
478
                while (true) {
479
                    if (get<char>(m+1) == '\r' || get<char>(m+1) == '\n' || m == dataSize()) {
480
                        // end of line without matching end quote. multi-line quotes not allowed
481
                        token.type = TOK_ERR;
482
                        errors.report(token.pos-1, 1, ERR_QUOTE_BEGIN);
483
                        comment = 1; // skip rest of line
484
                        break;
485
                    }
486
                    if (get<char>(m+1) == c && get<char>(m) != '\\') {  // matching end quote not preceded by escape backslash
487
                        token.stringLength = m - n;
488
                        n += 2;
489
                        break;
490
                    }
491
                    m++;
492
                }
493
                break;
494
 
495
            case '/'+D2:            // "//". comment to end of line
496
                if (comment == 0) {
497
                    comment = 1;
498
                }
499
                break;
500
            case 'c':                                 // "/*" start of comment
501
                if (comment == 1) {
502
                    n += token.stringLength;         // skip and don't save token
503
                    continue;
504
                }
505
                if (comment == 2) {                     // nested comment
506
                    if (allowNestedComments) {
507
                        comment++;
508
                    }
509
                    else {
510
                        token.type = TOK_ERR;
511
                        errors.report(n, 2, ERR_COMMENT_BEGIN);
512
                    }
513
                    break;
514
                }
515
                comment = 2;
516
                commentStart = n;  commentStartColumn = n - line.beginPos;
517
                break;
518
            case 'd':                                // "*/" end of comment
519
                if (comment == 1) {
520
                    n += token.stringLength;         // skip and don't save token
521
                    continue;
522
                }
523
                if (comment == 2) {
524
                    comment = 0;
525
                    n += token.stringLength;         // skip and don't save token
526
                    continue;
527
                }
528
                else if (comment > 2 && allowNestedComments) {
529
                    comment--;
530
                    n += token.stringLength;         // skip and don't save token
531
                    continue;
532
                }
533
                else {
534
                    token.type = TOK_ERR;           // unmatched end comment
535
                    errors.report(n, 2, ERR_COMMENT_END);
536
                    comment = 1;
537
                }
538
                break;
539
            case ';':
540
                // semicolon starts a new pseudo-line
541
                if (comment) break;
542
                // finish current line
543
                tokens.push(token);     // the ';' token is used only in for(;;) loops. should be ignored at the end of the line otherwise
544
                n += token.stringLength;
545
                line.numTokens = tokens.numEntries() - line.firstToken;
546
                line.linenum = linei;
547
                if (line.numTokens) {  // save line if not empty                  
548
                    lines.push(line);
549
                }
550
                // start next line
551
                line.beginPos = n;
552
                line.firstToken = tokens.numEntries();
553
                continue;  // don't save ';' token twice
554
            case '{':  case '}':
555
                if (comment) break;
556
                // put each bracket in a separate pseudo-line to ease high level language parsing
557
                // finish current line
558
                line.numTokens = tokens.numEntries() - line.firstToken;
559
                line.linenum = linei;
560
                if (line.numTokens) {  // save line if not empty                  
561
                    lines.push(line);
562
                }
563
                // start line with bracket only
564
                line.beginPos = n;
565
                line.firstToken = tokens.numEntries();
566
                tokens.push(token);      // save token
567
                n += token.stringLength;
568
                line.numTokens = 1;
569
                lines.push(line);
570
                // start line after bracket
571
                line.beginPos = n;
572
                line.firstToken = tokens.numEntries();
573
                continue;
574
            }
575
            if (comment == 0 && token.type != TOK_ERR) {
576
                // save token unless we are inside a comment or an error has occurred           
577
                tokens.push(token);     // save token
578
            }
579
            n += token.stringLength;
580
            continue;
581
        }
582
 
583
        if (comment) {
584
            // we are inside a comment. Continue search only for end of line or end of comment
585
            n++;
586
            continue;
587
        }
588
 
589
        // none of the above. Make token for illegal character
590
        token.type = TOK_ERR;
591
        line.type = LINE_ERROR;
592
        errors.report(n, 1, ERR_ILLEGAL_CHAR);
593
        comment = 1;              // ignore rest of line
594
        n++;
595
    }
596
    // finish last line
597
   // tokens.push(token);
598
    line.numTokens = tokens.numEntries() - line.firstToken;
599
    lines.push(line);
600
    // start pseudo line
601
    line.beginPos = n;
602
    line.firstToken = tokens.numEntries();
603
    line.type = 0;
604
 
605
    // check for unmatched comment
606
    if (comment >= 2) {
607
        token.type = TOK_ERR;
608
        errors.report(commentStart, commentStartColumn, ERR_COMMENT_BEGIN);
609
    }
610
    // make EOF token in the end
611
    line.type = 0;
612
    line.beginPos = n;
613
    line.firstToken = tokens.numEntries();
614
    line.numTokens = 1;
615
    lines.push(line);
616
    token.pos   = n;
617
    token.stringLength   = 0;
618
    token.type = TOK_EOF;    // end of file
619
    tokens.push(token);     // save eof token
620
}
621
 
622
 
623
void CAssembler::interpretSectionDirective() {
624
    // Interpret section directive during pass 2 or 3
625
    // pass 2: identify section name and type, and give it a number
626
    // pass 3: make section header
627
 
628
    // to do: nested sections
629
 
630
    uint32_t tok;                                          // token number
631
    ElfFWC_Sym2 sym;                                       // symbol record
632
    int32_t sectionsym = 0;                                // index to symbol record defining current section name
633
    uint32_t state = 0;                                    // 1: after align, 2: after '='
634
    ElfFwcShdr sectionHeader;                             // section header
635
    zeroAllMembers(sym);                                   // reset symbol
636
    zeroAllMembers(sectionHeader);                         // reset section header
637
    sectionHeader.sh_type = SHT_PROGBITS;                  // default section type
638
 
639
    sectionFlags = 0;
640
    for (tok = tokenB + 2; tok < tokenB + tokenN; tok++) { // get section attributes
641
        if (tokens[tok].type == TOK_ATT) {
642
            if (tokens[tok].id == ATT_UNINIT && state != 2) {
643
                sectionHeader.sh_type = SHT_NOBITS;                // uninitialized section (BSS)
644
                sectionFlags |= SHF_READ | SHF_WRITE;
645
            }
646
            else if (tokens[tok].id == ATT_COMDAT && state != 2) {
647
                sectionHeader.sh_type = SHT_COMDAT;                // communal section. duplicates and unreferenced sections are removed
648
            }
649
            else if (tokens[tok].id != ATT_ALIGN && state == 0) {
650
                sectionFlags |= tokens[tok].id & 0xFFFFFF;
651
                if (sectionFlags & SHF_EXEC) sectionFlags |= SHF_IP;  // executable section must be IP based
652
            }
653
            else if (tokens[tok].id == ATT_ALIGN && state == 0) {
654
                state = 1;
655
            }
656
            else {
657
                errors.report(tokens[tok]);  break;
658
            }
659
        }
660
        else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_IP    && state == 0)   sectionFlags |= SHF_IP;
661
        else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_DATAP && state == 0)   sectionFlags |= SHF_DATAP;
662
        else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_THREADP && state == 0) sectionFlags |= SHF_THREADP;
663
        else if (tokens[tok].type == TOK_OPR && tokens[tok].id == '=' && state == 1) state = 2;
664
        else if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',' && state != 2) ; // comma, ignore
665
        else if (tokens[tok].type == TOK_NUM && state == 2) {
666
            if (pass >= 3) {  // alignment value
667
                uint32_t alignm = expression(tok, 1, 0).value.w;
668
                if ((alignm & (alignm - 1)) || alignm > MAX_ALIGN) errors.reportLine(ERR_ALIGNMENT);
669
                else {
670
                    sectionHeader.sh_align = bitScanReverse(alignm);
671
                }
672
            }
673
            state = 0;
674
        }
675
        else {
676
            errors.report(tokens[tok]);  break;
677
        }
678
    }
679
    // find or define symbol with section name
680
    sectionsym = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength);
681
    if (sectionsym <= 0) {
682
        // symbol not previously defined. Define it now
683
        sym.st_type = STT_SECTION;
684
        sym.st_name = symbolNameBuffer.putStringN((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength);
685
        sym.st_bind = sectionFlags;
686
        sectionsym = addSymbol(sym);         // save symbol with section name
687
    }
688
    else {
689
        // symbol already defined. check that it is a section name
690
        if (symbols[sectionsym].st_type != STT_SECTION) {
691
            errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED);
692
        }
693
    }
694
    sectionFlags |= SHF_ALLOC;
695
    lines[linei].type = LINE_SECTION;                          // line is section directive
696
    lines[linei].sectionType = sectionFlags;
697
    if (symbols[sectionsym].st_section == 0) {
698
        // new section. make section header
699
        sectionHeader.sh_name = symbols[sectionsym].st_name;
700
        if (sectionFlags & SHF_EXEC) {
701
            sectionHeader.sh_entsize = 4;
702
            if (sectionHeader.sh_align < 2) sectionHeader.sh_align = 2;
703
            sectionFlags |= SHF_IP;
704
        }
705
        else { // data section
706
            if (!(sectionFlags & (SHF_READ | SHF_WRITE))) sectionFlags |= SHF_READ | SHF_WRITE; // read or write attributes not specified, default is both
707
            if (!(sectionFlags & (SHF_IP | SHF_DATAP | SHF_THREADP))) {  // address reference not specified. assume datap if writeable, ip if readonly
708
                if (sectionFlags & SHF_WRITE) sectionFlags |= SHF_DATAP;
709
                else sectionFlags |= SHF_IP;
710
            }
711
        }
712
        sectionHeader.sh_flags = sectionFlags;
713
        section = sectionHeaders.push(sectionHeader);
714
        symbols[sectionsym].st_section = section;
715
    }
716
    else {  // this section is seen before
717
        section = symbols[sectionsym].st_section;
718
        if (sectionHeaders[section].sh_align < sectionHeader.sh_align) sectionHeaders[section].sh_align = sectionHeader.sh_align;
719
        if (sectionFlags && (sectionFlags & ~sectionHeaders[section].sh_flags)) errors.reportLine(ERR_SECTION_DIFFERENT_TYPE);
720
        sectionFlags = (uint32_t)sectionHeaders[section].sh_flags;
721
        if (sectionHeader.sh_align > 2) {
722
            // insert alignment code
723
            SCode code;
724
            zeroAllMembers(code);
725
            code.instruction = II_ALIGN;
726
            code.value.u = (int64_t)1 << sectionHeader.sh_align;
727
            code.sizeUnknown = 0x80;
728
            code.section = section;
729
            codeBuffer.push(code);
730
        }
731
    }
732
}
733
 
734
void CAssembler::interpretFunctionDirective() {
735
    // Interpret function directive during pass 2
736
    uint32_t tok;                     // token number
737
    ElfFWC_Sym2 sym;                  // symbol record
738
    zeroAllMembers(sym);              // reset symbol
739
    int32_t symi;
740
 
741
    symi = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength);
742
    if (symi > 0) {
743
        if (pass == 2) errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED);  // symbol already defined
744
    }
745
    else {
746
        // define symbol
747
        sym.st_type = STT_FUNC;
748
        sym.st_other = STV_IP;
749
        sym.st_name = symbolNameBuffer.putStringN((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength);
750
        sym.st_bind = 0;
751
        sym.st_section = section;
752
        for (tok = tokenB + 2; tok < tokenB + tokenN; tok++) { // get function attributes
753
            if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') continue;
754
            if (tokens[tok].id == ATT_WEAK) sym.st_bind |= STB_WEAK;
755
            if (tokens[tok].id == ATT_REGUSE) {
756
                if (tokens[tok+1].id == '=' && tokens[tok+2].type == TOK_NUM) {
757
                    tok += 2;
758
                    sym.st_reguse1 = expression(tok, 1, 0).value.w;
759
                    sym.st_other |= STV_REGUSE;
760
                    if (tokens[tok+1].id == ',' && tokens[tok+2].type == TOK_NUM) {
761
                        tok += 2;
762
                        sym.st_reguse2 = expression(tok, 1, 0).value.w;
763
                    }
764
                }
765
            }
766
            else if (tokens[tok].type == TOK_DIR && tokens[tok].id == DIR_PUBLIC) sym.st_bind |= STB_GLOBAL;
767
            else {
768
                errors.report(tokens[tok]);  // unexpected token
769
            }
770
        }
771
        symi = addSymbol(sym);          // save symbol with function name
772
    }
773
    lines[linei].type = LINE_FUNCTION;           // line is function directive
774
 
775
    if (pass == 3 && symi) {
776
        // make a label here. The final address will be calculated in pass 4
777
        SCode code;                              // current instruction code
778
        zeroAllMembers(code);                    // reset code structure
779
        code.label = symbols[symi].st_name;
780
        code.section = section;
781
        codeBuffer.push(code);
782
    }
783
}
784
 
785
void CAssembler::interpretEndDirective() {
786
    // Interpret section or function end directive during pass 2
787
    ElfFWC_Sym2 sym;                  // symbol record
788
    zeroAllMembers(sym);              // reset symbol
789
    int32_t symi;
790
    CTextFileBuffer tempBuffer;       // temporary storage of names
791
 
792
    symi = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength);
793
    if (symi <= 0) {
794
        errors.reportLine(ERR_UNMATCHED_END);
795
    }
796
    else {
797
        if (symbols[symi].st_type == STT_SECTION) {
798
            if (symbols[symi].st_section == section) {
799
                // current section ends here
800
                section = 0;  sectionFlags = 0;
801
            }
802
            else {
803
                errors.reportLine(ERR_UNMATCHED_END);
804
            }
805
        }
806
        else if (symbols[symi].st_type == STT_FUNC && pass >= 4) {
807
            symbols[symi].st_unitsize = 4;
808
            // to do: insert size!
809
            //symbols[symi].st_unitsize = ?
810
            // support function(){} syntax. prevent nested functions
811
        }
812
    }
813
    lines[linei].type = LINE_ENDDIR;        // line is end directive
814
}
815
 
816
// Interpret line specifying options
817
void CAssembler::interpretOptionsLine() {
818
 
819
    // Expecting a line of the type:
820
    // "options codesize = 0x10000, datasize = 1 << 20"
821
    uint32_t tok;                      // token number
822
    uint32_t state = 0;                // 0: start, 1: after option name, 2: after equal sign, 3: after expression
823
    const char * optionname = 0;
824
    int option = 0;                    // 1: codesize, 2: datasize
825
    SExpression val;                   // value to be assigned
826
    SCode code;                        // instruction code containing options
827
    for (tok = tokenB + 1; tok < tokenB + tokenN; tok++) {
828
 
829
        switch (state) {
830
        case 0:                        // start. expect name "datasize" or "codesize"
831
            if (tokens[tok].type != TOK_NAM) {
832
                errors.report(tokens[tok]);  return;  // unexpected token
833
            }
834
            optionname = (char*)buf()+tokens[tok].pos; // tokens[tok].stringLength;
835
            if (strncasecmp_(optionname, "codesize", 8) == 0) option = 1;
836
            else if (strncasecmp_(optionname, "datasize", 8) == 0) option = 2;
837
            else {
838
                errors.report(tokens[tok]);  return;  // unexpected name
839
            }
840
            state = 1;
841
            break;
842
 
843
        case 1:  // after name, expecting equal sign
844
            if (tokens[tok].type == TOK_OPR && tokens[tok].id == '=') {
845
                state = 2;
846
            }
847
            else {
848
                errors.report(tokens[tok]);  return;  // unexpected token
849
            }
850
            break;
851
 
852
        case 2:  // expect expression
853
            val = expression(tok, tokenB + tokenN - tok, 0);  // evaluate number or expression
854
            tok += val.tokens - 1;
855
            if (val.etype != XPR_INT) {
856
                errors.reportLine(ERR_MUST_BE_CONSTANT);
857
                return;
858
            }
859
            zeroAllMembers(code);                    // reset code structure
860
            switch (option) {
861
            case 1:  // set codesize
862
                if (val.value.u == 0) code_size = cmd.codeSizeOption;
863
                else code_size = val.value.u;
864
                code.value.u = code_size;
865
                break;
866
            case 2:  // set datasize 
867
                if (val.value.u == 0) data_size = cmd.dataSizeOption;
868
                else data_size = val.value.u;
869
                code.value.u = data_size;
870
                break;
871
            }
872
            // This is called only in pass 3. Save this option for pass 4:
873
            code.instruction = II_OPTIONS;
874
            code.section = section;
875
            code.fitNum = option;
876
            code.sizeUnknown = 1;
877
            codeBuffer.push(code);
878
            state = 3;
879
            break;
880
 
881
        case 3:  // expect comma or nothing
882
            if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') {
883
                state = 0;    // start over after comma
884
            }
885
            else {
886
                errors.report(tokens[tok]);  return;  // unexpected token
887
            }
888
        }
889
    }
890
}
891
 
892
 
893
// Find symbol by index into symbolNameBuffer. The return value is an index into symbols. 
894
// Symbol indexes may change when new symbols are added to the symbols list, which is sorted by name
895
uint32_t CAssembler::findSymbol(uint32_t namei) {
896
    ElfFWC_Sym2 sym;                                       // temporary symbol record used for searching
897
    sym.st_name = namei;
898
    return symbols.findFirst(sym);                         // find symbol by name
899
}
900
 
901
// Find symbol by name as string. The return value is an index into symbols. 
902
// Symbol indexes may change when new symbols are added to the symbols list, which is sorted by name
903
uint32_t CAssembler::findSymbol(const char * name, uint32_t len) {
904
    uint32_t saveSize = symbolNameBuffer.dataSize();       // save symbolNameBuffer size for later reset
905
    uint32_t namei = symbolNameBuffer.putStringN(name, len); // put name temporarily into symbolNameBuffer
906
    int32_t symi = findSymbol(namei);                      // find symbol by name index
907
    symbolNameBuffer.setSize(saveSize);                    // remove temporary name from symbolNameBuffer
908
    return symi;                                           // return symbol index
909
}
910
 
911
// Add a symbol to symbols list
912
uint32_t CAssembler::addSymbol(ElfFWC_Sym2 & sym) {
913
    int32_t f = symbols.findFirst(sym);
914
    if (f >= 0) {
915
        // error: symbol already defined
916
        return 0;
917
    }
918
    else {
919
        return symbols.addUnique(sym);
920
    }
921
}
922
 
923
// interpret   name: options {, name: options}
924
void CAssembler::interpretExternDirective() {
925
    uint32_t tok;                     // token number
926
    uint32_t nametok = 0;             // last name token
927
    ElfFWC_Sym2 sym;                  // symbol record
928
    zeroAllMembers(sym);              // reset symbol
929
    sym.st_bind = STB_GLOBAL;
930
 
931
    // Example: extern name1: int32 weak, name2: function, name3, name4: read 
932
    uint32_t state = 0;  // 0: after extern or comma, 
933
                         // 1: after name, 
934
                         // 2: after colon
935
 
936
    // loop through tokens on this line
937
    for (tok = tokenB + 1; tok < tokenB + tokenN; tok++) {
938
        switch (state) {
939
        case 0:  // after extern or comma. expecting name
940
            if (tokens[tok].type == TOK_NAM) {
941
                // name encountered
942
                sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength);
943
                state = 1;  nametok = tok;
944
            }
945
            else errors.report(tokens[tok]);
946
            break;
947
        case 1: // after name. expecting colon or comma
948
            if (tokens[tok].type == TOK_OPR) {
949
                if (tokens[tok].id == ':') {
950
                    state = 2;
951
                    continue;
952
                }
953
                else if (tokens[tok].id == ',') {
954
                    goto COMMA;
955
                }
956
            }
957
            errors.report(tokens[tok]);
958
            break;
959
        case 2:  // after colon. expecting attribute or comma or end of line
960
            if (tokens[tok].type == TOK_TYP) {
961
                // symbol size given by type token
962
                uint32_t s = tokens[tok].id & 0xF;
963
                if (s > 4) s -= 3;  // float types
964
                sym.st_unitsize = uint32_t(1 << s);
965
                sym.st_unitnum = 1;
966
            }
967
            else if (tokens[tok].type == TOK_ATT || tokens[tok].type == TOK_DIR) {
968
                ATTRIBUTE:
969
                switch (tokens[tok].id) {
970
                case DIR_FUNCTION: case ATT_EXEC: // function or execute
971
                    if (sym.st_type) {
972
                        errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_CONFLICT_TYPE);
973
                    }
974
                    sym.st_type = STT_FUNC;
975
                    sym.st_other = STV_IP | STV_EXEC;
976
                    break;
977
                case ATT_READ:  // read
978
                    if (sym.st_type == 0) sym.st_other |= STV_READ;
979
                    break;
980
                case ATT_WRITE:  // write
981
                    if (sym.st_type == STT_FUNC) {
982
                        errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_CONFLICT_TYPE);
983
                    }
984
                    else {
985
                        sym.st_type = STT_OBJECT;
986
                    }
987
                    break;
988
                case ATT_WEAK:   // weak
989
                    sym.st_bind = STB_WEAK;
990
                    break;
991
                case ATT_CONSTANT:  // constant
992
                    sym.st_type = STT_CONSTANT;
993
                    break;
994
                case ATT_REGUSE:
995
                    if (tokens[tok+1].id == '=' && (tokens[tok+2].type == TOK_NUM /*|| tokens[tok+2].type == TOK_OPR)*/)) {
996
                        tok += 2;
997
                        sym.st_reguse1 = expression(tok, 1, 0).value.w;
998
                        sym.st_other |= STV_REGUSE;
999
                        if (tokens[tok+1].id == ',' && tokens[tok+2].type == TOK_NUM) {
1000
                            tok += 2;
1001
                            sym.st_reguse2 = expression(tok, 1, 0).value.w;
1002
                        }
1003
                    }
1004
                    break;
1005
                default:  // error
1006
                    errors.report(tokens[tok]);
1007
                }
1008
            }
1009
            else if (tokens[tok].type == TOK_REG) {
1010
                switch (tokens[tok].id) {
1011
                case REG_IP:
1012
                    sym.st_other |= STV_IP;  break;
1013
                case REG_DATAP:
1014
                    sym.st_other |= STV_DATAP;  break;
1015
                case REG_THREADP:
1016
                    sym.st_other |= STV_THREADP;  break;
1017
                default: errors.report(tokens[tok]);
1018
                }
1019
            }
1020
            else if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') {
1021
                // end of definition. save symbol
1022
            COMMA:
1023
                if (tok < tokenB + tokenN
1024
                    && (tokens[tok + 1].type == TOK_ATT || tokens[tok + 1].type == TOK_DIR)) {
1025
                    tok++; goto ATTRIBUTE;
1026
                }
1027
                uint32_t symi = addSymbol(sym);          // save symbol with function name
1028
                if (symi == 0) {  // symbol already defined
1029
                    errors.report(tokens[nametok].pos, tokens[nametok].stringLength, ERR_SYMBOL_DEFINED);
1030
                }
1031
                sym.st_name = 0;          // clear record for next symbol
1032
                sym.st_type = 0;
1033
                sym.st_other = 0;
1034
                sym.st_unitsize = 0;
1035
                sym.st_unitnum = 0;
1036
                sym.st_bind = STB_GLOBAL;
1037
                state = 0;
1038
            }
1039
            else {
1040
                errors.report(tokens[tok]);
1041
            }
1042
            break;
1043
        }
1044
    }
1045
    if (state) {  // last extern definition does not end with comma. finish it here
1046
        goto COMMA;
1047
    }
1048
    lines[linei].type = LINE_DATADEF;        // line is data definition
1049
}
1050
 
1051
 
1052
void CAssembler::interpretLabel(uint32_t tok) {
1053
    // line begins with a name. interpret label
1054
    // to do: add type if data. not string type
1055
    ElfFWC_Sym2 sym;   // symbol record
1056
    zeroAllMembers(sym); // reset symbol
1057
 
1058
    // save name
1059
    sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength);
1060
    sym.st_section = section;
1061
    // determine if code or data from section type
1062
    if (sectionFlags & SHF_EXEC) {
1063
        sym.st_type = STT_FUNC;
1064
        sym.st_other = STV_EXEC | STV_IP;
1065
    }
1066
    else {
1067
        sym.st_type = STT_OBJECT;
1068
        sym.st_other = sectionFlags & STV_SECT_ATTR;
1069
    }
1070
 
1071
    // look for more exact type information
1072
    if (tokenN > 2) {
1073
        uint32_t t = tok+2;
1074
        if (tokens[t].type == TOK_TYP) {
1075
            uint32_t s = tokens[t].id & 0xF;
1076
            if (s > 4) s -= 3;
1077
            sym.st_unitsize = uint32_t(1 << s);
1078
            sym.st_unitnum = 1;
1079
            if (tokenN > 3) t++;
1080
        }
1081
        if (tokens[t].type == TOK_NUM || tokens[t].type == TOK_FLT) {
1082
            sym.st_type = STT_OBJECT;
1083
            lines[linei].type = LINE_DATADEF;
1084
        }
1085
        else if (tokens[t].type == TOK_REG || tokens[t].type == TOK_INS || tokens[t].id == '[') {
1086
            lines[linei].type = LINE_CODEDEF;
1087
            sym.st_type = STT_FUNC;
1088
        }
1089
    }
1090
    if (section) { // copy type info from section
1091
        sym.st_other = sectionHeaders[section].sh_flags & STV_SECT_ATTR;
1092
    }
1093
 
1094
    if (lines[linei].type == 0) {
1095
        lines[linei].type = (sectionFlags & SHF_EXEC) ? LINE_CODEDEF : LINE_DATADEF;
1096
    }
1097
 
1098
    uint32_t symi = addSymbol(sym);     // add symbol to symbols list
1099
 
1100
    if (section) {
1101
        // symbol address
1102
        symbols[symi].st_value = sectionHeaders[section].sh_size;
1103
    }
1104
    tokens[tok].id = symbols[symi].st_name;         // save symbol name index
1105
    if (symi == 0) errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED);
1106
}
1107
 
1108
 
1109
// interpret assembly style variable definition:
1110
// label: type value1, value2
1111
void CAssembler::interpretVariableDefinition1() {
1112
    int state = 0;      // 0: start
1113
                        // 1: after label
1114
                        // 2: after :
1115
                        // 3: after type or ,
1116
                        // 4: after value
1117
    uint32_t tok;                      // token index
1118
    uint32_t type = 0;                 // data type
1119
    uint32_t dsize = 0;                // data size
1120
    uint32_t dsize1;                   // log2(dsize)
1121
    uint32_t dnum = 0;                 // number of data items
1122
    uint32_t stringlen = 0;            // length of string
1123
    uint32_t symi = 0;                 // symbol index
1124
    ElfFWC_Sym2 sym;                   // symbol record
1125
    zeroAllMembers(sym);               // reset symbol
1126
    SExpression exp1;                  // expression when interpreting numeric expression
1127
 
1128
    if (section == 0) {
1129
        errors.reportLine(ERR_DATA_WO_SECTION);
1130
    }
1131
 
1132
    // loop through tokens on this line
1133
    for (tok = tokenB; tok < tokenB + tokenN; tok++) {
1134
        switch (state) {
1135
        case 0:  // start
1136
            if (tokens[tok].type == TOK_NAM) { // name. make symbol
1137
                sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength);
1138
                sym.st_type = STT_OBJECT;
1139
                symi = symbols.addUnique(sym);
1140
                tokens[tok].type = TOK_SYM;      // change token type
1141
                tokens[tok].id = symbols[symi].st_name;  // use name offset as unique identifier because symbol index can change
1142
                state = 1;
1143
            }
1144
            else if (tokens[tok].type == TOK_SYM) { // symbol
1145
                symi = findSymbol(tokens[tok].id);
1146
                if (symi > 0) {
1147
                    if (pass == 2) errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED);  // symbol already defined
1148
                }
1149
                state = 1;
1150
            }
1151
            else if (tokens[tok].type == TOK_TYP) {
1152
                goto TYPE_TOKEN;
1153
            }
1154
            else errors.report(tokens[tok]);
1155
            if (symi && section) {
1156
                symbols[symi].st_value = sectionHeaders[section].sh_size;
1157
            }
1158
            break;
1159
        case 1:  // after label. expect colon
1160
            if (tokens[tok].type == TOK_OPR && tokens[tok].id == ':') {
1161
                state = 2;
1162
            }
1163
            else errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_EXPECT_COLON);
1164
            break;
1165
        case 2:  // expect type
1166
            if (tokens[tok].type == TOK_TYP) {
1167
                TYPE_TOKEN:
1168
                type = tokens[tok].id & 0xFF;
1169
                dsize1 = type & 0xF;
1170
                if (type & 0x40) dsize1 -= 3;
1171
                dsize = 1 << dsize1;
1172
                state = 3;
1173
                if (section) {  // align data
1174
                    uint32_t addr = (uint32_t)sectionHeaders[section].sh_size;
1175
                    if (sectionHeaders[section].sh_align < dsize1) sectionHeaders[section].sh_align = dsize1;  // update section alignment
1176
                    if (addr & (dsize - 1)) { // needs to insert zeroes
1177
                        uint32_t addr2 = (addr + dsize - 1) & -(int32_t)dsize;
1178
                        sectionHeaders[section].sh_size = addr2;           // update address
1179
                        if (symi) symbols[symi].st_value = addr2;          // update symbol address
1180
                        if (pass >= 3) {
1181
                            dataBuffers[section].align((uint32_t)dsize);   // put zeroes in data buffer
1182
                        }
1183
                    }
1184
                }
1185
            }
1186
            else errors.report(tokens[tok]);
1187
            break;
1188
        case 3:  // after type. expect value. evaluate expression
1189
            exp1 = expression(tok, tokenB + tokenN - tok, pass < 3 ? 0x10 : 0); // pass 3: may contain symbols not defined yet
1190
            tok += exp1.tokens - 1;
1191
            if (exp1.etype & XPR_STRING) {  // string expression: get size
1192
                if ((type & 0x1F) != (TYP_INT8 & 0x1F)) errors.reportLine(ERR_STRING_TYPE);  // string must use type int8
1193
                stringlen = exp1.sym2; // string length
1194
            }
1195
            else stringlen = 0;
1196
            if (pass < 3) {
1197
                if (section) sectionHeaders[section].sh_size += stringlen ? stringlen : dsize;  // update address
1198
            }
1199
            else {
1200
                if (section) {
1201
                    // save data of desired type
1202
                    if (exp1.etype & XPR_FLT) {
1203
                        // floating point number specified
1204
                        if ((type & 0xF0) == (TYP_INT8 & 0xF0)) {  // float specified, integer expected
1205
                            exp1.value.i = int64_t(exp1.value.d);
1206
                            errors.reportLine(ERR_CONFLICT_TYPE);
1207
                        }
1208
                    }
1209
                    else if (exp1.etype & XPR_INT) {
1210
                        if (type & TYP_FLOAT) {  // integer specified, float expected
1211
                            exp1.value.d = double(exp1.value.i);  // convert to float
1212
                        }
1213
                    }
1214
                    int64_t value = exp1.value.i;  //value of expression
1215
                    if (exp1.sym3) {
1216
                        // calculation of symbol value. add relocation if needed
1217
                        uint32_t size = type & 0xF;
1218
                        if (type & 0x40) size -= 3;
1219
                        size = 1 << size;
1220
                        //value = calculateConstantOperand(exp1, dataBuffers[section].dataSize(), size);                            
1221
                        value = calculateConstantOperand(exp1, sectionHeaders[section].sh_size, dsize);
1222
                        if (exp1.etype & XPR_ERROR) {
1223
                            errors.reportLine((uint32_t)value); // report error
1224
                            break;
1225
                        }
1226
                        // check for overflow
1227
                        bool overflow = false;
1228
                        switch (type & 0xFF) {
1229
                        case TYP_INT8 & 0xFF:
1230
                            overflow = value > 0x7F || value < -0x80;
1231
                            break;
1232
                        case TYP_INT16 & 0xFF:
1233
                            overflow = value > 0x7FFF || value < -0x8000;
1234
                            break;
1235
                        case TYP_INT32 & 0xFF:
1236
                            overflow = value > 0x7FFFFFFF || value < int32_t(0x80000000);
1237
                            break;
1238
                        default:;
1239
                        }
1240
                        if (overflow) errors.reportLine(ERR_OVERFLOW); // (symbol1 - symbol2) overflows
1241
                    }
1242
                    if (sectionHeaders[section].sh_type == SHT_NOBITS) {
1243
                        // uninitialized (BSS) section. check that value is zero, but don't store
1244
                        if (value != 0) errors.reportLine(ERR_NONZERO_IN_BSS); // not zero
1245
                    }
1246
                    else {
1247
                        // save data
1248
                        switch (type & 0xFF) {
1249
                        case TYP_INT8 & 0xFF:
1250
                            if (stringlen) {
1251
                                dataBuffers[section].push(stringBuffer.buf() + exp1.value.w, stringlen);
1252
                                break;
1253
                            }
1254
                            dataBuffers[section].push(&value, 1);  break;
1255
                        case TYP_INT16 & 0xFF:
1256
                            dataBuffers[section].push(&value, 2);  break;
1257
                        case TYP_INT32 & 0xFF:
1258
                            dataBuffers[section].push(&value, 4);  break;
1259
                        case TYP_INT64 & 0xFF:
1260
                            dataBuffers[section].push(&value, 8);  break;
1261
                        case TYP_INT128 & 0xFF:
1262
                            dataBuffers[section].push(&value, 8);
1263
                            value = value >> 63;     // sign extend
1264
                            dataBuffers[section].push(&value, 8);
1265
                            break;
1266
                        case TYP_FLOAT16 & 0xFF:  // half precision
1267
                            exp1.value.w = double2half(exp1.value.d);
1268
                            dataBuffers[section].push(&exp1.value.w, 2);  break;
1269
                        case TYP_FLOAT32 & 0xFF: { // single precision
1270
                            float val = float(exp1.value.d);
1271
                            dataBuffers[section].push(&val, 4); }
1272
                            break;
1273
                        case TYP_FLOAT64 & 0xFF:  // double precision
1274
                            dataBuffers[section].push(&exp1.value.d, 8);  break;
1275
                        }
1276
                    }
1277
                    sectionHeaders[section].sh_size += stringlen ? stringlen : dsize;  // update address
1278
                }
1279
            }
1280
            if (!(exp1.etype & (XPR_IMMEDIATE | XPR_STRING | XPR_SYM1 | XPR_UNRESOLV)) || (exp1.etype & (XPR_REG|XPR_OPTION|XPR_MEM|XPR_ERROR))) errors.report(tokens[tok]);
1281
 
1282
            if (stringlen) dnum += stringlen; else dnum += 1;
1283
            state = 4;
1284
            break;
1285
        case 4:  // after value. expect comma or end of line
1286
            if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') {
1287
                state = 3;
1288
            }
1289
            else errors.report(tokens[tok]);
1290
            break;
1291
        }
1292
        if (lineError) return;
1293
    }
1294
    if (state != 4 && state != 2) errors.report(tokens[tok-1]);
1295
    if (symi) { // save size
1296
        symbols[symi].st_unitsize = dsize;
1297
        symbols[symi].st_unitnum = dnum;
1298
        symbols[symi].st_section = section;
1299
        if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) symbols[symi].st_other |= STV_FLOAT;
1300
        if (section) { // copy information from section
1301
            symbols[symi].st_other |= sectionHeaders[section].sh_flags & STV_SECT_ATTR;
1302
        }
1303
    }
1304
}
1305
 
1306
// interpret C style variable definition:
1307
// type name1 = value1, name2[num] = {value, value, ..}
1308
void CAssembler::interpretVariableDefinition2() {
1309
    int state = 0;      // 0: start
1310
                        // 1: after type or comma
1311
                        // 2: after name
1312
                        // 3: after [
1313
                        // 4: after [number
1314
                        // 5: after =
1315
                        // 6: after = number
1316
                        // 7: after {
1317
                        // 8: after {number
1318
 
1319
    uint32_t tok;                           // token index
1320
    uint32_t dsize = 0;                     // data element size
1321
    uint32_t dsize1 = 0;                    // data element size = 1 << dsize1
1322
    uint32_t type = 0;                      // data type
1323
    uint32_t arrayNum1 = 1;                 // number of elements indicated in []
1324
    uint32_t arrayNum2 = 0;                 // number of elements in {} list
1325
    uint32_t stringlen = 0;                 // length of string
1326
    uint32_t symi = 0;                      // symbol index
1327
    ElfFWC_Sym2 sym;                        // symbol record
1328
    zeroAllMembers(sym);                    // reset symbol
1329
    SExpression exp1;                       // expression when interpreting numeric expression
1330
 
1331
    if (section == 0) {
1332
        errors.reportLine(ERR_DATA_WO_SECTION);
1333
    }
1334
 
1335
    // loop through tokens on this line
1336
    for (tok = tokenB; tok < tokenB + tokenN; tok++) {
1337
        switch (state) {
1338
        case 0:  // this is a type token
1339
            type  = tokens[tok].id & 0xFF;
1340
            dsize1 = tokens[tok].id & 0xF;
1341
            if ((type & 0x40) > 3) dsize1 -= 3;
1342
            dsize = 1 << dsize1;
1343
            state = 1;
1344
            if (section) {  // align data
1345
                uint32_t addr = (uint32_t)sectionHeaders[section].sh_size;
1346
                if (addr & (dsize - 1)) { // needs to insert zeroes
1347
                    uint32_t addr2 = (addr + dsize - 1) & -(int32_t)dsize;  // calculate aligned address
1348
                    sectionHeaders[section].sh_size = addr2;           // update address
1349
                    if (pass >= 3) {
1350
                        dataBuffers[section].align(dsize);   // put zeroes in data buffer
1351
                    }
1352
                }
1353
                if (sectionHeaders[section].sh_align < dsize1) sectionHeaders[section].sh_align = dsize1;  // update section alignment
1354
            }
1355
            break;
1356
        case 1:  // expecting name token. save name
1357
            if (tokens[tok].type == TOK_NAM) { // name. make symbol
1358
                sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength);
1359
                symi = addSymbol(sym);
1360
                if (symi == 0 && pass == 2) {
1361
                    errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED);  break;
1362
                }
1363
                symbols[symi].st_type = (sectionFlags & SHF_EXEC) ? STT_FUNC : STT_OBJECT;
1364
                tokens[tok].type = TOK_SYM;      // change token type
1365
                tokens[tok].id = symbols[symi].st_name;  // use name offset as unique identifier because symbol index can change
1366
                state = 2;
1367
            }
1368
            else if (tokens[tok].type == TOK_SYM) { // symbol
1369
                symi = findSymbol(tokens[tok].id);
1370
                if (symi > 0 && pass == 2) errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED);  // symbol already defined
1371
                state = 2;
1372
            }
1373
            else {
1374
                errors.report(tokens[tok]);
1375
            }
1376
            //nametok = tok;
1377
            symbols[symi].st_unitsize = dsize;
1378
            symbols[symi].st_unitnum = 0;
1379
 
1380
            if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) symbols[symi].st_other |= STV_FLOAT;
1381
            if (section) { // copy information from section
1382
                symbols[symi].st_value = sectionHeaders[section].sh_size;
1383
                symbols[symi].st_other |= sectionHeaders[section].sh_flags & STV_SECT_ATTR;
1384
            }
1385
            break;
1386
        case 2:  // after name. expect , = [ eol
1387
            if (tokens[tok].type != TOK_OPR) {
1388
                errors.report(tokens[tok]);  break;
1389
            }
1390
            switch (tokens[tok].id) {
1391
            case ',':  // finish this symbol definition
1392
                COMMA:
1393
                    if (arrayNum2 > arrayNum1) { // check if the two array sizes match
1394
                        if (arrayNum1 > 1) {
1395
                            errors.report(tokens[tok-1].pos, tokens[tok-1].stringLength, ERR_CONFLICT_ARRAYSZ);
1396
                        }
1397
                        else arrayNum1 = arrayNum2;
1398
                    }
1399
                    symbols[symi].st_unitsize = dsize;
1400
                    symbols[symi].st_unitnum = arrayNum1;
1401
                    symbols[symi].st_reguse1 = linei;
1402
                    symbols[symi].st_section = section;
1403
 
1404
                    if (arrayNum1 > arrayNum2 && section) {
1405
                        // unspecified elements are zero. calculate extra size
1406
                        uint32_t asize = (arrayNum1 - arrayNum2) * dsize;
1407
                        sectionHeaders[section].sh_size += asize;
1408
                        if (pass >= 3 && sectionHeaders[section].sh_type != SHT_NOBITS) {
1409
                            // store any unspecified elements as zero
1410
                            uint64_t zero = 0;
1411
                            while (asize > 8) {
1412
                                dataBuffers[section].push(&zero, 8);  asize -= 8;
1413
                            }
1414
                            while (asize > 0) {
1415
                                dataBuffers[section].push(&zero, 1);  asize -= 1;
1416
                            }
1417
                        }
1418
                    }
1419
 
1420
                    // get ready for next symbol
1421
                    zeroAllMembers(sym);
1422
                    arrayNum1 = 1;  arrayNum2 = 0;
1423
                    if (state == 99) return;       // finished line
1424
                    state = 1;
1425
                    break;
1426
            case '=':
1427
                state = 5;
1428
                break;
1429
            case '[':
1430
                state = 3;
1431
                break;
1432
            default:
1433
                errors.report(tokens[tok]);
1434
            }
1435
            break;
1436
        case 3:  // after [ . expect number or ]
1437
            if (tokens[tok].id == ']') {
1438
                state = 2; break;
1439
            }
1440
            if (arrayNum1 > 1) {
1441
                errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_MULTIDIMENSIONAL);  break;            // error. multidimensional array not supported
1442
            }
1443
            // evaluate numeric expression inside []. 
1444
            // it may contain complex expressions that can only be evaluated later, but
1445
            // this will not generate an error message here
1446
            exp1 = expression(tok, tokenB + tokenN - tok, 0x10);
1447
            if (lineError) return;
1448
            tok += exp1.tokens -1;
1449
            if (exp1.etype == 0) errors.report(tokens[tok]);
1450
            if ((exp1.etype & ~XPR_IMMEDIATE) == 0) {
1451
                arrayNum1 = exp1.value.w;
1452
            }
1453
            state = 4;
1454
            break;
1455
        case 4:  // after [number. expect ]
1456
            if (tokens[tok].id != ']') {
1457
                errors.report(tokens[tok]);  break;
1458
            }
1459
            state = 2;
1460
            break;
1461
        case 5:  // after =. expect number or {numbers}
1462
            if (tokens[tok].id == '{') state = 7;
1463
            else {
1464
                state = 6;
1465
                goto SAVE_VALUE;  // interpret value and save it
1466
            }
1467
            break;
1468
        case 6:  // after = number. expect comma or eol
1469
            if (tokens[tok].id != ',') {
1470
                errors.report(tokens[tok]);  break;
1471
            }
1472
            goto COMMA;
1473
        case 7:  // after {. expect number list
1474
            state = 8;
1475
        SAVE_VALUE:
1476
            arrayNum2++;
1477
            if (pass < 3) {
1478
                // may contain symbols not defined yet. just pass expression and count tokens
1479
                exp1 = expression(tok, tokenB + tokenN - tok, 0x10);
1480
                tok += exp1.tokens - 1;
1481
                if (lineError) return;
1482
            }
1483
            else {
1484
                // pass 5. evaluate expression and save value
1485
                exp1 = expression(tok, tokenB + tokenN - tok, 0);
1486
                tok += exp1.tokens - 1;
1487
                if (lineError) return;
1488
                if ((exp1.etype & XPR_SYM1) && exp1.sym3 && pass > 3) {
1489
                    // calculation of symbol value. add relocation if needed
1490
                    exp1.value.i = calculateConstantOperand(exp1, sectionHeaders[section].sh_size, dsize);
1491
                    if (exp1.etype & XPR_ERROR) {
1492
                        errors.reportLine((uint32_t)(exp1.value.i)); // report error
1493
                        break;
1494
                    }
1495
                    // check for overflow
1496
                    bool overflow = false;
1497
                    switch (type & 0xFF) {
1498
                    case TYP_INT8 & 0xFF:
1499
                        overflow = exp1.value.i > 0x7F || exp1.value.i < -0x80;
1500
                        break;
1501
                    case TYP_INT16 & 0xFF:
1502
                        overflow = exp1.value.i > 0x7FFF || exp1.value.i < -0x8000;
1503
                        break;
1504
                    case TYP_INT32 & 0xFF:
1505
                        overflow = exp1.value.i > 0x7FFFFFFF || exp1.value.i < int32_t(0x80000000);
1506
                        break;
1507
                    default:;
1508
                    }
1509
                    if (overflow) errors.reportLine(ERR_OVERFLOW); // (symbol1 - symbol2) overflows
1510
                }
1511
            }
1512
            if (!(exp1.etype & (XPR_IMMEDIATE | XPR_STRING | XPR_UNRESOLV | XPR_SYM1)) || (exp1.etype & (XPR_REG|XPR_OPTION|XPR_MEM|XPR_ERROR))) {
1513
                errors.report(tokens[tok]);
1514
            }
1515
            if (section && section < dataBuffers.numEntries() && pass >= 3) {
1516
                // save data of desired type
1517
                if ((exp1.etype & XPR_IMMEDIATE) == XPR_FLT) {
1518
                    // floating point number specified
1519
                    if ((type & 0xF0) == (TYP_INT8 & 0xF0)) {  // float specified, integer expected
1520
                        exp1.value.i = int64_t(exp1.value.d);
1521
                        errors.reportLine(ERR_CONFLICT_TYPE);
1522
                    }
1523
                }
1524
                else if ((exp1.etype & XPR_IMMEDIATE) == XPR_INT) {
1525
                    if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) {  // integer specified, float expected
1526
                        exp1.value.d = double(exp1.value.i);  // convert to float
1527
                    }
1528
                }
1529
                else if (exp1.etype & XPR_STRING) {  // string expression: get size
1530
                    if ((type & 0x1F) != (TYP_INT8 & 0x1F)) errors.reportLine(ERR_STRING_TYPE);  // string must use type int8
1531
                    stringlen = exp1.sym2; // string length
1532
                }
1533
                else stringlen = 0;
1534
 
1535
                if (sectionHeaders[section].sh_type == SHT_NOBITS) {
1536
                    // uninitialized (BSS) section. check that value is zero, but don't store
1537
                    if (exp1.value.i != 0) errors.reportLine(ERR_NONZERO_IN_BSS); // not zero
1538
                }
1539
                else {
1540
                    // save data
1541
                    switch (type & 0xFF) {
1542
                    case TYP_INT8 & 0xFF:
1543
                        if (stringlen) {
1544
                            dataBuffers[section].push(stringBuffer.buf() + exp1.value.w, stringlen);
1545
                            break;
1546
                        }
1547
                        dataBuffers[section].push(&exp1.value.u, 1);  break;
1548
                    case TYP_INT16 & 0xFF:
1549
                        dataBuffers[section].push(&exp1.value.u, 2);  break;
1550
                    case TYP_INT32 & 0xFF:
1551
                        dataBuffers[section].push(&exp1.value.u, 4);  break;
1552
                    case TYP_INT64 & 0xFF:
1553
                        dataBuffers[section].push(&exp1.value.u, 8);  break;
1554
                    case TYP_INT128 & 0xFF:
1555
                        dataBuffers[section].push(&exp1.value.u, 8);
1556
                        exp1.value.i = exp1.value.i >> 63;     // sign extend
1557
                        dataBuffers[section].push(&exp1.value.u, 8);
1558
                        break;
1559
                    case TYP_FLOAT16 & 0xFF:  // half precision
1560
                        exp1.value.w = double2half(exp1.value.d);
1561
                        dataBuffers[section].push(&exp1.value.w, 2);  break;
1562
                    case TYP_FLOAT32 & 0xFF: { // single precision
1563
                        float val = float(exp1.value.d);
1564
                        dataBuffers[section].push(&val, 4); }
1565
                                             break;
1566
                    case TYP_FLOAT64 & 0xFF:  // double precision
1567
                        dataBuffers[section].push(&exp1.value.d, 8);  break;
1568
                    }
1569
                }
1570
            }
1571
            sectionHeaders[section].sh_size += stringlen ? stringlen : dsize;  // update address
1572
            break;
1573
        case 8:  // after {number. expect comma or }
1574
            if (tokens[tok].id == ',') state = 7;
1575
            else if (tokens[tok].id == '}') state = 6;
1576
            else {
1577
                errors.report(tokens[tok]);  break;
1578
            }
1579
        }
1580
        if (tok + 1 == tokenB + tokenN && (state == 5 || state >= 7) && linei + 1 < lines.numEntries()) {
1581
            // no more tokens. statement with {} can span multiple lines
1582
            if (state == 5) {
1583
                // after '='. expect next line to be '{'
1584
                uint32_t tokNext = lines[linei+1].firstToken;
1585
                if (tokens[tokNext].type != TOK_OPR || tokens[tokNext].id != '{') break; // anything else: break out of loop and get error message
1586
            }
1587
            // append next line
1588
            lines[linei].type = LINE_DATADEF;
1589
            linei++;
1590
            tokenN += lines[linei].numTokens;
1591
        }
1592
 
1593
    }
1594
    // no more tokens
1595
    if (state == 2 || state == 6) {
1596
        // finish this definition
1597
        lines[linei].type = LINE_DATADEF;
1598
        state = 99; goto COMMA;
1599
    }
1600
    errors.report(tokens[tok-1].pos, tokens[tok-1].stringLength, ERR_UNFINISHED_VAR);
1601
}
1602
 
1603
// check if line is code or data
1604
void CAssembler::determineLineType() {
1605
    uint32_t tok;                           // current token
1606
    uint32_t elements = 0;                  // detect type and constant tokens
1607
 
1608
    if (tokens[tokenB].type == TOK_OPT) {
1609
        lines[linei].type = LINE_OPTIONS;  return;
1610
    }
1611
    // loop through tokens on this line
1612
    for (tok = tokenB; tok < tokenB + tokenN; tok++) {
1613
        if (tokens[tok].type == TOK_REG || tokens[tok].type == TOK_INS || tokens[tok].type == TOK_XPR || tokens[tok].type == TOK_HLL) {
1614
            lines[linei].type = LINE_CODEDEF;  return;     // register or instruction found. must be code
1615
        }
1616
        if (tokens[tok].type == TOK_TYP) elements |= 1;
1617
        if (tokens[tok].type == TOK_NUM || tokens[tok].type == TOK_FLT || tokens[tok].type == TOK_CHA || tokens[tok].type == TOK_STR) elements |= 2;
1618
    }
1619
    if (elements == 3)  lines[linei].type = LINE_DATADEF;
1620
    else if (tokens[tokenB].type == TOK_ATT && tokens[tokenB].id == ATT_ALIGN) {  // align directive
1621
        lines[linei].type = (sectionFlags & SHF_EXEC) ? LINE_CODEDEF : LINE_DATADEF;
1622
    }
1623
    else if (tokens[tokenB].type == TOK_EOF) lines[linei].type = 0;   // end of file
1624
    else if (tokenN == 1 && tokens[tokenB].type == TOK_OPR && linei > 1) {
1625
        // {} bracket. same type as previous line
1626
        lines[linei].type = lines[linei-1].type;
1627
    }
1628
    else if (tokens[tokenB].type == TOK_OPR && tokens[tokenB].id == '%') {
1629
        // metaprogramming code
1630
        lines[linei].type = LINE_METADEF;
1631
    }
1632
    else if (linei > 1) {
1633
        // undetermined. This may occur in for(;;) clause. Use same type as previous line
1634
        lines[linei].type = lines[linei-1].type;
1635
    }
1636
    else {
1637
        // error. cannot determine
1638
        errors.report(tokens[tokenB]);
1639
        lines[linei].type = LINE_ERROR;
1640
    }
1641
}
1642
 
1643
// interpret data or code alignment directive
1644
void CAssembler::interpretAlign() {
1645
    if (section) {
1646
        uint32_t addr = (uint32_t)sectionHeaders[section].sh_size;
1647
        SExpression exp1 = expression(tokenB+1, tokenN - 1, pass < 3 ? 0x10 : 0);
1648
        if (exp1.tokens < tokenN - 1) {errors.report(tokens[tokenB+1+exp1.tokens]); return;}
1649
        if ((exp1.etype & XPR_IMMEDIATE) != XPR_INT || (exp1.etype & (XPR_STRING | XPR_REG | XPR_OP | XPR_MEM | XPR_OPTION))) {
1650
            errors.report(tokens[tokenB+1]);  return;
1651
        }
1652
        uint64_t alignm = exp1.value.u;
1653
        if ((alignm & (alignm - 1)) || alignm > MAX_ALIGN) {errors.reportLine(ERR_ALIGNMENT);  return;}
1654
        uint32_t log2ali = bitScanReverse(alignm);
1655
        if (sectionHeaders[section].sh_align < log2ali) {
1656
            sectionHeaders[section].sh_align = log2ali;  // make sure section alignment is not less
1657
        }
1658
        if (addr & ((uint32_t)alignm - 1)) { // needs to insert zeroes
1659
            uint32_t addr2 = (addr + (uint32_t)alignm - 1) & -(int32_t)alignm;
1660
            sectionHeaders[section].sh_size = addr2;           // update address
1661
            if (pass >= 3) {
1662
                dataBuffers[section].align((uint32_t)alignm);  // put zeroes in data buffer
1663
            }
1664
        }
1665
    }
1666
}
1667
 
1668
// Pass 3 does three things. 
1669
// A. Handle metaprogramming directives
1670
// B. Classify lines
1671
// C. Identify symbol names, sections, labels, functions 
1672
// These must be done in parallel because metaprogramming directives can refer to previously 
1673
// defined symbols, and data/code definitions can involve metaprogramming variables and macros
1674
 
1675
void CAssembler::pass2() {
1676
    ElfFWC_Sym2 sym;                  // symbol record
1677
    zeroAllMembers(sym);              // reset symbol
1678
    symbols.push(sym);                // symbol record 0 is empty
1679
    symbolNameBuffer.put((char)0);    // put dummy zero to avoid zero offset at next string
1680
    sectionFlags = 0;
1681
    section = 0;
1682
 
1683
    // lines loop
1684
    for (linei = 1; linei < lines.numEntries(); linei++) {
1685
        lineError = 0;
1686
        tokenB = lines[linei].firstToken;      // first token in line        
1687
        tokenN = lines[linei].numTokens; // number of tokens in line
1688
        if (tokenN == 0) continue;
1689
        replaceKnownNames();                   // replace previously defined names by symbol references
1690
        // check if line begins with '%'
1691
        if (tokens[tokenB].type == TOK_OPR && tokens[tokenB].id == '%') {
1692
            // metaprogramming code
1693
            lines[linei].type = LINE_METADEF;
1694
            interpretMetaDefinition();
1695
            continue;
1696
        }
1697
        // classify other lines
1698
        lines[linei].sectionType = sectionFlags;                    // line is section directive
1699
        if (sectionFlags & ATT_EXEC) lines[linei].type = LINE_CODEDEF;
1700
        else if (sectionFlags & ((ATT_READ | ATT_WRITE))) lines[linei].type = LINE_DATADEF;
1701
 
1702
        if (tokenN > 1) {
1703
            // search for section, function and symbol definitions
1704
            // lines with a single token cannot legally define a symbol name
1705
            if ((tokens[tokenB].type == TOK_NAM || tokens[tokenB].type == TOK_SYM) && tokens[tokenB+1].type == TOK_DIR) {
1706
                switch (tokens[tokenB + 1].id) {
1707
                case DIR_SECTION:   // section starts here
1708
                    interpretSectionDirective();
1709
                    break;
1710
                case DIR_FUNCTION:   // function starts here
1711
                    interpretFunctionDirective();
1712
                    break;
1713
                case DIR_END:    // section or function end
1714
                    interpretEndDirective();
1715
                    break;
1716
                default:
1717
                    errors.report(tokens[tokenB + 1]);
1718
                }
1719
            }
1720
            else if (tokens[tokenB].id == DIR_EXTERN) {
1721
                // extern symbols
1722
                interpretExternDirective();
1723
            }
1724
            else if (tokens[tokenB].id == DIR_PUBLIC) {
1725
                // the interpretation of public symbol declarations is postponed to pass 4 after all 
1726
                // symbols have been defined and got their final value
1727
                lines[linei].type = LINE_PUBLICDEF;
1728
            }
1729
            else if (tokens[tokenB].type == TOK_NAM && tokens[tokenB+1].id == ':') {
1730
                interpretLabel(tokenB);
1731
                if (lines[linei].type == LINE_DATADEF) interpretVariableDefinition1();
1732
            }
1733
            else if (tokens[tokenB].type == TOK_TYP && (tokens[tokenB+1].type == TOK_NAM || tokens[tokenB+1].type == TOK_SYM)) {
1734
                interpretVariableDefinition2();
1735
            }
1736
            else if (tokens[tokenB].type == TOK_ATT && tokens[tokenB].id == ATT_ALIGN) {
1737
                interpretAlign();
1738
            }
1739
            else if (tokens[tokenB].type == TOK_SYM && tokens[tokenB+1].id == ':' && pass == 2) {
1740
                errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED);  // symbol already defined
1741
            }
1742
            else {
1743
                determineLineType();  // check if code or data
1744
                if (lines[linei].type == LINE_DATADEF) interpretVariableDefinition1();
1745
            }
1746
        }
1747
        else {
1748
            determineLineType();  // check if code or data (can only be code)
1749
        }
1750
    }
1751
 
1752
    // loop through lines again to replace names that are forward references to symbols defined during pass 2
1753
    for (linei = 1; linei < lines.numEntries(); linei++) {
1754
        tokenB = lines[linei].firstToken;      // first token in line        
1755
        tokenN = lines[linei].numTokens;      // number of tokens in line
1756
        replaceKnownNames();                   // replace previously defined names by symbol references
1757
    }
1758
}
1759
 
1760
 
1761
// Show all symbols. For debugging only
1762
void CAssembler::showSymbols() {
1763
    uint32_t symi;
1764
    ElfFWC_Sym2 sym;
1765
    printf("\n\nSymbol:    name, section, addr, type, size, binding");
1766
    for (symi = 1; symi < symbols.numEntries(); symi++) {
1767
        sym = symbols[symi];
1768
        printf("\n%3i: %10s, %7i, %4X", symi, symbolNameBuffer.buf() + sym.st_name,
1769
            sym.st_section, (uint32_t)sym.st_value);
1770
        if (sym.st_type == STT_CONSTANT || sym.st_type == STT_VARIABLE) {
1771
            if (sym.st_other & STV_FLOAT) {    // floating point constant
1772
                union { uint64_t i; double d; } val;
1773
                val.i = sym.st_value;
1774
                printf(" = %G", val.d);
1775
            }
1776
            else if (sym.st_other & STV_STRING) {   // string
1777
                printf(" = %s", stringBuffer.getString((uint32_t)sym.st_value));
1778
            }
1779
            else {
1780
                // print 64 bit integer constant
1781
                printf(" = 0x");
1782
                if (uint64_t(sym.st_value) >> 32) {
1783
                    printf("%X%08X", uint32_t(sym.st_value >> 32), uint32_t(sym.st_value));
1784
                }
1785
                else {
1786
                    printf("%X", uint32_t(sym.st_value));
1787
                }
1788
                // this method causes warnings:
1789
                // printf(((sizeof(long int) > 4) ? " = 0x%lx" : " = 0x%llx"), sym.st_value); 
1790
            }
1791
        }
1792
        else {
1793
            printf(" %5X, %X*%X, %7X",  // other type
1794
                sym.st_type, sym.st_unitsize, sym.st_unitnum, sym.st_bind);
1795
        }
1796
    }
1797
}
1798
 
1799
// Show all tokens. For debugging only
1800
void CAssembler::showTokens() {
1801
    SKeyword const tokenNames[] = {
1802
        {"name",        TOK_NAM},   // unidentified name        
1803
        {"direc",       TOK_DIR},   // section or function directive
1804
        {"attrib",      TOK_ATT},   // section or function attribute
1805
        {"label",       TOK_LAB},   // code label or function name
1806
        {"datalb",      TOK_VAR},   // data label
1807
        {"secnm",       TOK_SEC},   // section name
1808
        {"type",        TOK_TYP},   // type name
1809
        {"reg",         TOK_REG},   //  register name
1810
        {"instr",       TOK_INS},   // instruction name 
1811
        {"oper",        TOK_OPR},   // operator
1812
        {"option",      TOK_OPT},   // operator
1813
        {"num",         TOK_NUM},   // number
1814
        {"float",       TOK_FLT},   // floating point number
1815
        {"char",        TOK_CHA},   // character or string in single quotes ' '
1816
        {"string",      TOK_STR},   // string in double quotes " "
1817
        {"symbol",      TOK_SYM},   // symbol
1818
        {"expression",  TOK_XPR},   // expression
1819
        {"eof",         TOK_EOF},   // string in double quotes " "
1820
        {"hll",         TOK_HLL}    // string in double quotes " "
1821
                                    //   {"error", TOK_ERR}   // error. illegal character or unmatched quote
1822
    };
1823
 
1824
    uint32_t line, tok, i;
1825
    for (line = 1; line < lines.numEntries(); line++) {
1826
        if (line < lines.numEntries() && lines[line].numTokens) {
1827
            printf("\nline %2i type %X", lines[line].linenum, lines[line].type);
1828
 
1829
            for (tok = lines[line].firstToken; tok < lines[line].firstToken + lines[line].numTokens; tok++) {
1830
                // find name for token type
1831
                const char * nm = 0;
1832
                for (i = 0; i < TableSize(tokenNames); i++) {
1833
                    if (tokenNames[i].id == tokens[tok].type) nm = tokenNames[i].name;
1834
                }
1835
                if (nm) printf("\n%4X  %8s: ", tok, nm);                               // Token type
1836
                else printf("type %4X", tokens[tok].type);
1837
 
1838
                switch (tokens[tok].type) {
1839
                case TOK_DIR: case TOK_ATT: case TOK_TYP: case TOK_OPT: case TOK_HLL:
1840
                    nm = 0;
1841
                    for (i = 0; i < TableSize(keywordsList); i++) {
1842
                        if (keywordsList[i].id == tokens[tok].id) nm = keywordsList[i].name;
1843
                    }
1844
                    if (nm) printf("%s", nm);
1845
                    else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength);
1846
                    break;
1847
                case TOK_OPR:
1848
                    nm = 0;
1849
                    for (i = 0; i < TableSize(operatorsList); i++) {
1850
                        if (operatorsList[i].id == tokens[tok].id) nm = operatorsList[i].name;
1851
                    }
1852
                    if (nm) printf("%s", nm);
1853
                    else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength);
1854
                    break;
1855
                case TOK_REG: //registerNames
1856
                    nm = 0;
1857
                    for (i = 0; i < TableSize(registerNames); i++) {
1858
                        if (registerNames[i].id == tokens[tok].id) nm = registerNames[i].name;
1859
                    }
1860
                    if (nm) printf("%s%i", nm, tokens[tok].id & 0xFF);
1861
                    else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength);
1862
                    break;
1863
                case TOK_NAM: case TOK_NUM: case TOK_FLT: case TOK_LAB: case TOK_VAR: case TOK_SEC:
1864
                case TOK_CHA: case TOK_STR: case TOK_INS: case TOK_SYM:
1865
                    for (i = 0; i < tokens[tok].stringLength; i++) {
1866
                        printf("%c", buf()[tokens[tok].pos + i]);
1867
                    }
1868
                    printf("  id %X, value %X", tokens[tok].id, tokens[tok].value.w);
1869
                    break;
1870
                case TOK_XPR:
1871
                default:
1872
                    printf("0x%X 0x%X 0x%X %2i", tokens[tok].id, tokens[tok].value.w, tokens[tok].pos, tokens[tok].stringLength);
1873
                    break;
1874
                }
1875
            }
1876
        }
1877
    }
1878
}
1879
 
1880
void CAssembler::initializeWordLists() {
1881
    // Operators list
1882
    operators.pushBig(operatorsList, sizeof(operatorsList));
1883
    operators.sort();
1884
    // Keywords list
1885
    keywords.pushBig(keywordsList,sizeof(keywordsList));
1886
    keywords.sort();
1887
    // Read instruction list from file
1888
    CCSVFile instructionListFile;
1889
    instructionListFile.read(cmd.getFilename(cmd.instructionListFile), CMDL_FILE_SEARCH_PATH); // Filename of list of instructions
1890
    instructionListFile.parse();                            // Read and interpret instruction list file
1891
    instructionlist << instructionListFile.instructionlist; // Transfer instruction list to my own container
1892
    instructionlistId.copy(instructionlist);                // copy instruction list
1893
    instructionlistNm.copy(instructionlist);                // copy instruction list
1894
                                                            // sort lists by different criteria, defined by the different operators:
1895
    // operator < (SInstruction const & a, SInstruction const & b)
1896
    // operator < (SInstruction3 const & a, SInstruction3 const & b)
1897
    SInstruction3 nullInstruction;                          // empty record
1898
    zeroAllMembers(nullInstruction);
1899
    instructionlistId.push(nullInstruction);                // Empty record will go to position 0 to avoid an instruction with index 0
1900
    instructionlistNm.sort();                               // Sort instructionlist by name
1901
    instructionlistId.sort();                               // Sort instructionlistId by id
1902
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.