OpenCores
URL https://opencores.org/ocsvn/forwardcom/forwardcom/trunk

Subversion Repositories forwardcom

[/] [forwardcom/] [bintools/] [assem4.cpp] - Blame information for rev 44

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 44 Agner
/****************************    assem4.cpp    ********************************
2
* Author:        Agner Fog
3
* Date created:  2017-04-17
4
* Last modified: 2021-07-14
5
* Version:       1.11
6
* Project:       Binary tools for ForwardCom instruction set
7
* Module:        assem.cpp
8
* Description:
9
* Module for assembling ForwardCom .as files.
10
* This module contains:
11
* pass3(): Interpretation of code lines.
12
* Copyright 2017-2021 GNU General Public License http://www.gnu.org/licenses
13
******************************************************************************/
14
#include "stdafx.h"
15
 
16
 
17
// Interpret lines. Generate code and data
18
void CAssembler::pass3() {
19
    uint16_t last_line_type = 0;       // type of preceding line
20
    makeFormatLists();                 // make formatList3 and formatList4
21
    code_size = cmd.codeSizeOption;    // initialize options
22
    data_size = cmd.dataSizeOption;
23
    section = 0;
24
    iLoop = iIf = iSwitch = 0;         // index of current high level statements
25
 
26
    // lines loop
27
    for (linei = 1; linei < lines.numEntries()-1; linei++) {
28
        tokenB = lines[linei].firstToken;      // first token in line        
29
        tokenN = lines[linei].numTokens; // number of tokens in line
30
        if (tokenN == 0 || lines[linei].type == LINE_ERROR || lines[linei].type == LINE_METADEF) continue;
31
        lineError = false;
32
 
33
        switch (lines[linei].type) {
34
        case LINE_DATADEF:
35
            if (last_line_type == LINE_CODEDEF && (lines[linei].sectionType & SHF_EXEC)) {
36
                /* currently, the assembler cannot mix code and data because they are put in different buffers.
37
                The only way to hard-code instructions is to put them into a separate section. */
38
                errors.reportLine(ERR_MIX_DATA_AND_CODE);   // data definition in code section
39
            }
40
            break;
41
        case LINE_CODEDEF:
42
            interpretCodeLine();
43
            if (last_line_type == LINE_DATADEF && !(lines[linei].sectionType & SHF_EXEC)) {
44
                errors.reportLine(ERR_MIX_DATA_AND_CODE);   // code definition in data section
45
            }
46
            break;
47
        case LINE_METADEF: case LINE_ERROR:
48
            continue;
49
        case LINE_FUNCTION:
50
            interpretFunctionDirective();
51
            break;
52
        case LINE_SECTION:
53
            interpretSectionDirective();
54
            break;
55
        case LINE_ENDDIR:
56
            interpretEndDirective();
57
            break;
58
        case LINE_OPTIONS:
59
            interpretOptionsLine();
60
            break;
61
        }
62
 
63
        last_line_type = lines[linei].type;
64
    }
65
    while (hllBlocks.numEntries()) {
66
        // unfinished block
67
        SBlock block = hllBlocks.pop();
68
        errors.report(tokens[block.startBracket].pos, tokens[block.startBracket].stringLength, ERR_BRACKET_BEGIN);
69
    }
70
}
71
 
72
// extract subsets of formatList (in disasm1.cpp) for multiformat instructions and jump instructions
73
void CAssembler::makeFormatLists() {
74
    uint32_t i;
75
    for (i = 0; i < formatListSize; i++) {
76
        if (formatList[i].category == 3) formatList3.push(formatList[i]);
77
        if (formatList[i].category == 4) formatList4.push(formatList[i]);
78
    }
79
}
80
 
81
// Interpret a line defining code. This covers both assembly style and high level style code
82
void CAssembler::interpretCodeLine() {
83
    uint32_t tok;                                // token index
84
    dataType = 0;                                // data type for current instruction
85
    uint32_t nReg = 0;                           // number of register source operands
86
    uint32_t state = 0;  /* state during interpretation of line. example:
87
        L1: int32 r1 = compare(r2, 5), option = 2   // assembly style
88
        L1: int32 r1 = r2 < 5                       // same in high level style
89
            0:  begin
90
            1:  after label
91
            2:  after label:
92
            3:  after type
93
            4:  after destination
94
            5:  after destination = (expecting expression or instruction)
95
            6:  after expression or instruction()
96
            7:  after instruction
97
            8:  after instruction(
98
            9:  after operand
99
           10:  after instruction(),
100
           11:  after jump instruction
101
    */
102
    SExpression expr;                            // evaluated expression
103
    SCode code;                                  // current instruction code
104
    zeroAllMembers(code);                        // reset code structure
105
 
106
    if (section == 0) {
107
        errors.reportLine(ERR_CODE_WO_SECTION);
108
    }
109
 
110
    // high level instructions with nothing before can be caught already here
111
    if (tokens[tokenB].type == TOK_HLL) {
112
        interpretHighLevelStatement();    // if, else, switch, for, do, while (){} statements
113
        return;
114
    }
115
    if (tokens[tokenB].type == TOK_OPR && tokens[tokenB].id == '}') {
116
        interpretEndBracket();            // end of {} block
117
        return;
118
    }
119
 
120
    // interpret line by state machine looping through tokens
121
    for (tok = tokenB; tok < tokenB + tokenN; tok++) {
122
        SToken token = tokens[tok];
123
        if (token.type == TOK_XPR && expressions[token.value.w].etype & XPR_REG) {
124
            // this is an alias for a register. Translate to register
125
            token.type = TOK_REG;
126
            token.id = expressions[token.value.w].reg1;
127
        }
128
 
129
        if (lineError) break;
130
        code.section = section;
131
 
132
        if (state == 5) {  // after '='
133
            if (token.type == TOK_INS) {  // instruction
134
                if (code.instruction) errors.report(token);  // instruction after += etc.
135
                code.instruction = token.id;
136
                state = 7;
137
            }
138
            else {  // expression after equal sign
139
                // interpret expression representing operands and operator
140
                expr = expression(tok, tokenB + tokenN - tok, 0);
141
                if (lineError) return;
142
                if (code.instruction) {
143
                    // += operator etc. already encountered. combine the operands
144
                    uint32_t op = code.instruction;  code.instruction = 0;
145
                    code.reg1 = code.dest;  // first source operand is same as destination
146
                    code.etype |= XPR_REG1;  code.tokens = 0;
147
                    expr = op2(op, code, expr);  // operation '+' for '+=', etc.
148
                    code.instruction = 0;  code.reg1 = 0;
149
                }
150
                if (code.etype & XPR_ERROR) {
151
                    errors.reportLine(code.value.w); // report error
152
                }
153
                // ordinary '=' goes here
154
                if (lineError) return;
155
                insertAll(code, expr);
156
                tok += expr.tokens - 1;
157
                state = 6;
158
            }
159
        }
160
        else if (state == 11) {
161
            // interpret jump target
162
            expr = expression(tok, tokenB + tokenN - tok, 0);
163
            state = 6;
164
            if (expr.etype & XPR_REG) {
165
                code = code | expr;
166
                tok += expr.tokens - 1;
167
            }
168
            else if (expr.etype & (XPR_INT | XPR_SYM1)) {
169
                code.sym5 = expr.sym3 ? expr.sym3 : expr.sym1;
170
                code.offset_jump = expr.value.w;
171
                if (expr.value.w & 3) errors.report(token.pos, token.stringLength, ERR_JUMP_TARGET_MISALIGN);
172
                tok += expr.tokens - 1;
173
                code.etype |= XPR_JUMPOS | (expr.etype & ~XPR_IMMEDIATE);
174
            }
175
            else {
176
                errors.report(token.pos, token.stringLength, ERR_EXPECT_JUMP_TARGET);
177
                break;
178
            }
179
        }
180
        else if (state == 8 && token.type != TOK_OPT && token.type != TOK_REG) {
181
            // expression in parameter list
182
            if (token.type == TOK_OPR && token.id == ')') {
183
                state = 6; break;  // end of parameter list
184
            }
185
            // interpret any expression, except register or option
186
            expr = expression(tok, tokenB + tokenN - tok, 0);
187
            tok += expr.tokens - 1;
188
            if (code.etype & expr.etype & XPR_INT) {
189
                // multiple immediate integer constants
190
                if (code.etype & XPR_INT2) {
191
                    // three integer operands
192
                    if (code.etype & XPR_OPTIONS) errors.report(token.pos, token.stringLength, ERR_TOO_MANY_OPERANDS);
193
                    code.optionbits = uint8_t(expr.value.w);
194
                    code.etype |= XPR_OPTIONS;
195
                    expr.value.u = 0;
196
                }
197
                else {
198
                    // two integer operands
199
                    if (code.value.u >> 32 != 0) errors.report(token.pos, token.stringLength, ERR_TOO_MANY_OPERANDS);
200
                    code.value.u = code.value.w | expr.value.u << 32;
201
                    code.etype |= XPR_INT2;
202
                    expr.value.u = 0;
203
                }
204
            }
205
            else if (expr.etype & XPR_MEM) {
206
                if (expr.etype & XPR_OFFSET) code.offset_mem += expr.offset_mem;
207
                //else code.offset += expr.value.i;
208
                if (expr.etype & XPR_IMMEDIATE) {  // both memory and immediate operands
209
                    code.value.i = expr.value.i;
210
                }
211
            }
212
            else if (expr.etype & XPR_IMMEDIATE) {
213
                code.value.i = expr.value.i;
214
            }
215
            expr.value.i = 0;
216
            code = code | expr;
217
            state = 9;
218
        }
219
        else {
220
            switch (token.type) {
221
            case TOK_LAB:  case TOK_SYM:
222
                if (state == 0) {
223
                    //code.label = token.value.w;
224
                    code.label = token.id;
225
                    if (code.label) {
226
                        int32_t symi = findSymbol(code.label);
227
                        if (symi > 0) symbols[symi].st_section = section;
228
                    }
229
                    state = 1;
230
                }
231
                else goto ST_ERROR;
232
                break;
233
            case TOK_OPR:
234
                if (token.id == ':' && state == 1) {
235
                    state = 2;
236
                }
237
                else if (token.id == '+' && state == 3) {
238
                    code.dtype |= TYP_PLUS;
239
                }
240
                else if (token.priority == 15 && state == 4) {
241
                    // assignment operator
242
                    state = 5;
243
                    if (token.id & EQ) { // combined operator and assignment: += -= *= etc.
244
                        code.reg1 = code.dest;
245
                        code.etype |= XPR_REG | XPR_REG1;
246
                        code.instruction = token.id & ~EQ;  // temporarily store operator in .instruction
247
                    }
248
                    else if (token.id != '=') errors.report(token);
249
                }
250
                else if (token.id == '=' && state == 11) {
251
                    state = 12;
252
                }
253
                else if (token.id == ',' && state == 6) {
254
                    state = 10;
255
                }
256
                else if (token.id == ',' && state == 9) {
257
                    state = 8;
258
                }
259
                else if (token.id == '(' && state == 7) {
260
                    state = 8;
261
                }
262
                else if (token.id == ')' && (state == 8 || state == 9)) {
263
                    state = 6;
264
                }
265
                else if (token.id == '[' && (state == 0 || state == 2 || state == 3)) {
266
                    // interpret memory destination
267
                    expr = expression(tok, tokenB + tokenN - tok, 0);
268
                    tok += expr.tokens - 1;
269
                    insertMem(code, expr);
270
                    code.dest = 2;
271
                    state = 4;
272
                }
273
                else if (token.id == '[' && state == 7 && code.instruction == II_ADDRESS) {
274
                    // address []. expect memory operand
275
                    expr = expression(tok, tokenB + tokenN - tok, 0);
276
                    tok += expr.tokens - 1;
277
                    insertMem(code, expr);
278
                    state = 6;
279
                }
280
                else if ((token.id == '+' + D2 || token.id == '-' + D2) && (state == 3 || state == 4)) {
281
                    // ++ and -- operators
282
                    code.instruction = (token.id == '+' + D2) ? II_ADD : II_SUB;
283
                    // operand is 1, integer or float
284
                    if (dataType & TYP_FLOAT) {
285
                        code.value.d = 1.0;
286
                        code.etype |= XPR_FLT;
287
                    }
288
                    else {
289
                        code.value.i = 1;
290
                        code.etype |= XPR_INT;
291
                    }
292
                    if (state == 3) { // prefix operator. expect register
293
                        tok++;
294
                        if (token.type != TOK_REG) errors.report(token);
295
                        code.dest = token.id;
296
                    }
297
                    code.reg1 = code.dest;
298
                    code.etype |= XPR_REG1;
299
                    state = 6;
300
                }
301
                else if (token.id == ';') {} // ignore terminating ';'
302
                else goto ST_ERROR;
303
                break;
304
            case TOK_TYP:
305
                if (state == 0 || state == 2) {
306
                    dataType = code.dtype = token.id;
307
                    state = 3;
308
                }
309
                else goto ST_ERROR;
310
                break;
311
            case TOK_REG:
312
                if (state == 0 || state == 2 || state == 3) {
313
                    code.dest = uint8_t(token.id);
314
                    state = 4;
315
                }
316
                else if (state == 8) {
317
                    if (nReg < 3) {
318
                        (&code.reg1)[nReg] = (uint8_t)token.id;  // insert register in expression
319
                        code.etype |= XPR_REG1 << nReg++;
320
                        if ((code.etype & (XPR_INT | XPR_FLT | XPR_MEM)) && code.dest != 2)  errors.report(token.pos, token.stringLength, ERR_OPERANDS_WRONG_ORDER);
321
                    }
322
                    else errors.report(token.pos, token.stringLength, ERR_TOO_MANY_OPERANDS);
323
                    state = 9;
324
                }
325
                else goto ST_ERROR;
326
                break;
327
            case TOK_XPR:
328
                if (token.value.u >= expressions.numEntries())  goto ST_ERROR; // expression not found
329
                if (expressions[token.value.w].etype & XPR_MEM) {  // this is an alias for a memory operand
330
                    insertMem(code, expressions[token.value.w]);
331
                    code.dest = 2;
332
                    state = 4;
333
                }
334
                else goto ST_ERROR;
335
                break;
336
            case TOK_INS:
337
                if (state == 0 || state == 2 || state == 3) {
338
                    // interpret instruction name
339
                    code.instruction = token.id;
340
                    state = 7;                              // expect parenthesis and parameters
341
                    if (code.instruction & II_JUMP_INSTR) {
342
                        // Jump or call instruction. The next may be a jump target, a register or a memory operand
343
                        state = 11;  // expect jump target
344
                        // Check if there is a memory operand
345
                        for (uint32_t tok2 = tok+1; tok2 < tokenB + tokenN; tok2++) {
346
                            if (tokens[tok2].type == TOK_OPR && tokens[tok2].id == '[') {
347
                                // a jump instruction with memory operand is treated as a normal instruction
348
                                state = 7;  break;
349
                            }
350
                        }
351
                    }
352
                }
353
                else if ((state == 6 || state == 10) && (token.id & II_JUMP_INSTR)) {
354
                    // second half of jump instruction
355
                    code.instruction |= token.id;    // combine two partial instruction names
356
                    state = 11;                            // expect jump target
357
                }
358
                else goto ST_ERROR;
359
                break;
360
            case TOK_OPT:  // option keyword
361
                expr = expression(tok, tokenB + tokenN - tok, 4);  // this will read option = value
362
                tok += expr.tokens - 1;
363
                code.etype |= expr.etype;
364
                if (expr.etype & XPR_LIMIT) {
365
                    code.value.i = expr.value.i;
366
                    if (expr.value.u >= 0x100000000U) { // limit too high
367
                        errors.report(tokens[tok - 1].pos, tokens[tok - 1].stringLength, ERR_LIMIT_TOO_HIGH);
368
                    }
369
                }
370
                if (expr.etype & (XPR_LENGTH | XPR_BROADC)) code.length = expr.length;
371
                if (expr.etype & XPR_MASK) code.mask = expr.mask;
372
                if (expr.etype & XPR_FALLBACK) code.fallback = expr.fallback;
373
                if (expr.etype & XPR_OPTIONS) code.optionbits = expr.optionbits;
374
                if (state == 8) state = 9;
375
                else if (state == 6 || state == 10) state = 6;
376
                else goto ST_ERROR;
377
                break;
378
            case TOK_ATT:
379
                if (token.id == ATT_ALIGN && state == 0 && tokenN >= 2) {
380
                    // align n directive
381
                    code.instruction = II_ALIGN;
382
                    expr = expression(tok + 1, tokenB + tokenN - tok - 1, 0);
383
                    tok = tokenB + tokenN;
384
                    code.value.u = expr.value.u;
385
                    code.sizeUnknown = 0x80;
386
                    if ((code.value.u & (code.value.u - 1)) || code.value.u > MAX_ALIGN
387
                    || (expr.etype & XPR_IMMEDIATE) != XPR_INT || (expr.etype & (XPR_REG|XPR_OPTION|XPR_MEM))) {
388
                        errors.reportLine(ERR_ALIGNMENT);
389
                    }
390
                }
391
                else goto ST_ERROR;
392
                break;
393
            case TOK_HLL:  // high level directive: if, else, while, for, etc.
394
                interpretHighLevelStatement();
395
                return;
396
            default:;
397
            ST_ERROR:
398
                errors.report(token);
399
                break;
400
            }
401
        }
402
    }
403
    if (lineError) return;
404
    // check if state machine ends with a finished instruction
405
    if (state != 0 && state != 2 && state != 6 && state != 7) {
406
        errors.report(tokens[tok-1].pos, tokens[tok-1].stringLength, ERR_UNFINISHED_INSTRUCTION);
407
        return;
408
    }
409
 
410
    // move and store instruction has no operator yet
411
    if (code.instruction == 0 && code.etype) {
412
        if (code.dest == 2) code.instruction = II_STORE;  // store to memory
413
        else {
414
            code.instruction = II_MOVE;                   // move constant to register
415
            if (cmd.optiLevel && (code.etype & XPR_INT) && code.value.i >= 0 && !code.sym3 && (code.dtype & TYP_INT) && (code.dest & REG_R)) {
416
                code.dtype |= TYP_PLUS;                   // optimize to larger type for positive constant because it is zero-extended anyway
417
            }
418
        }
419
    }
420
 
421
    if (code.instruction) { // a code record with no instruction represents a label only
422
        // code record contains instruction
423
        if (code.etype & XPR_JUMPOS) mergeJump(code);
424
 
425
        checkCode1(code);
426
        if (lineError) return;
427
 
428
        // find an instruction variant that fits
429
        fitCode(code);
430
        if (lineError) return;
431
    }
432
 
433
    // save code structure
434
    codeBuffer.push(code);
435
}
436
 
437
 
438
// Check how many bits are needed to contain immediate constant of an instruction.
439
// The result is returned as bit-flags in code.fitNumX.
440
// The return value is nonzero if the size cannot be resolved yet.
441
int CAssembler::fitConstant(SCode & code) {
442
    int64_t value = 0;                           // the constant or address to fit
443
    int64_t valueScaled;                         // value divided by scale factor
444
    double dvalue = 0;                           // floating point value if needed
445
    bool floatType = false;                      // a floating point type is needed
446
    bool floatConst = false;                     // a floating point constant is provided
447
    uint32_t fitNum = 0;                         // return value
448
    uint32_t sym3 = 0, sym4 = 0;                 // symbols
449
    int32_t isym3 = 0, isym4 = 0;                // symbol index
450
    int32_t uncertainty;                         // maximum deviance if the value is uncertain
451
    int  uncertain = 0;                          // return value
452
    int symscale;                                // scaling of difference between symbols
453
 
454
    if (code.instruction == II_ALIGN) return 0;  // not an instruction
455
    if (!(code.etype & (XPR_IMMEDIATE | XPR_SYM1))) return 0; // has no immediate
456
 
457
    value = value0 = code.value.i;               // immediate constant
458
    floatType  = uint8_t(code.dtype) >= uint8_t(TYP_FLOAT16);  // floating point needed
459
    floatConst = (code.etype & XPR_FLT) != 0;    // floating point provided
460
    if (floatType) {
461
        if (floatConst) dvalue = code.value.d;
462
        else {
463
            // Note: We are converting the immediate constant to floating point here in order to find
464
            // the optimal representation. We have not identified the instruction yet so we don't know
465
            // if it actually needs a floating point constant or an integer. We have saved the original
466
            // integer value in value0 so that we can undo the conversion in case an instruction with
467
            // floating point type needs an integer operand.
468
            dvalue = (double)value;  // value as float
469
            if (code.etype & XPR_INT) {
470
                // convert integer constant to float
471
                code.value.d = dvalue;
472
                code.etype = (code.etype & ~XPR_IMMEDIATE) | XPR_FLT;
473
                floatConst = true;
474
            }
475
        }
476
        if ((code.etype & XPR_FLT) && uint8_t(code.dtype) == uint8_t(TYP_FLOAT32)) {
477
            union {            // check for overflow in single precision float
478
                float f;
479
                uint32_t i;
480
            } u;
481
            u.f = float(code.value.d);
482
            if (isinf_f(u.i) && u.f > code.value.d) errors.reportLine(ERR_CONSTANT_TOO_LARGE);
483
        }
484
        if ((code.etype & XPR_FLT) && uint8_t(code.dtype) == uint8_t(TYP_FLOAT16)) {
485
            // check for overflow in half precision float
486
            if (isinf_h(double2half(code.value.d) && !isinf_d(code.value.i))) errors.reportLine(ERR_CONSTANT_TOO_LARGE);
487
        }
488
    }
489
 
490
    // check for symbols
491
    if (code.sym3) {
492
        sym3 = code.sym3; sym4 = code.sym4;
493
        symscale = code.symscale3;
494
        isym3 = findSymbol(sym3);
495
        if (isym3 < 1) {
496
            code.sizeUnknown = 2; return 2;              // should not occur
497
        }
498
    }
499
 
500
    if (code.sym3 && !code.sym4 && int32_t(symbols[isym3].st_section) == SECTION_LOCAL_VAR && symbols[isym3].st_type == STT_CONSTANT) {
501
        // convert local symbol to constant
502
        value = symbols[isym3].st_value;
503
        code.value.i = value;
504
        code.sym3 = 0;
505
        if (cmd.optiLevel && value >= 0 && (code.dtype & TYP_INT) && (code.dest & REG_R)) {
506
            code.dtype |= TYP_PLUS;        // optimize to larger type for positive constant because it is zero-extended anyway
507
        }
508
    }
509
    else if (sym3) {
510
        // there is a symbol
511
        if (symbols[isym3].st_unitsize == 0) uncertain = 2;  // symbol value is not known yet
512
        uint32_t sym3section = symbols[isym3].st_section; // symbol section
513
        // determine necessary relocation size if relocation needed
514
        uint64_t relSize;                       // maximum size of relocated address
515
        if (symbols[isym3].st_type == STT_CONSTANT) {
516
            relSize = 0x10000000;               // there is no command line option for the size of absolute symbols. assume 32 bit
517
            code.etype |= XPR_INT;
518
        }
519
        else if (sym3section && symbols[isym3].st_type != STT_CONSTANT) {   // local symbol with known section 
520
            relSize = (sectionHeaders[sym3section].sh_flags & (SHF_EXEC | SHF_IP)) ? code_size : data_size;
521
        }
522
        else { // external symbol with unknown section. look at symbol attributes
523
            relSize = (symbols[isym3].st_other & (STV_EXEC | STV_IP)) ? code_size : data_size;
524
            if (!(code.etype & (XPR_MEM | XPR_SYM2))) {
525
                errors.reportLine(ERR_CONFLICT_TYPE);  // must be memory operand
526
            }
527
        }
528
        if (sym4) {
529
            // value is (sym3 - sym4) / scale factor
530
            isym4 = findSymbol(sym4);
531
            if (isym4 <= 0) {
532
                code.sizeUnknown = 2; return 2;              // should not occur
533
            }
534
            code.etype |= XPR_INT;                           // symbol difference gives an integer
535
            if (symbols[isym3].st_unitsize == 0) uncertain = 2;  // symbol value is not known yet
536
            if (symbols[isym3].st_section != symbols[isym4].st_section || symbols[isym3].st_bind != STB_LOCAL || symbols[isym4].st_bind != STB_LOCAL) {
537
                // different sections or not local. relocation needed
538
                fitNum = IFIT_RELOC;
539
                if (code.symscale1 > 1) relSize /= code.symscale1;  // value is scaled
540
                if (relSize <= 1 << 7)  fitNum |= IFIT_I8;
541
                if (relSize <= 1 << 15) fitNum |= IFIT_I16;
542
                if (relSize <= (uint64_t)1 << 31) fitNum |= IFIT_I32;
543
                code.fitNum = fitNum;
544
                code.sizeUnknown = uncertain;
545
                return uncertain;
546
            }
547
            // difference between two local symbols
548
            if (pass < 4) {
549
                code.fitNum = IFIT_I8 | IFIT_I16 | IFIT_I32;  // symbol values are not available yet
550
                code.sizeUnknown = 1;
551
                return 1;
552
            }
553
            value += int32_t(uint32_t(symbols[isym3].st_value) - uint32_t(symbols[isym4].st_value));
554
            if (symscale < 1) symscale = 1;
555
            valueScaled = value / symscale + code.offset_mem;
556
            if (valueScaled >= -(1 << 7)  && valueScaled < (1 << 7))  fitNum |= IFIT_I8;
557
            if (valueScaled >= -(1 << 15) && valueScaled < (1 << 15)) fitNum |= IFIT_I16;
558
            if (valueScaled >= -((int64_t)1 << 31) && valueScaled < ((int64_t)1 << 31)) fitNum |= IFIT_I32;
559
            // check if value is certain. uncertainty is stored in high part of st_value
560
            uncertainty = (symbols[isym3].st_value >> 32) - (symbols[isym4].st_value >> 32);
561
            valueScaled = value / symscale + code.offset_mem + uncertainty;
562
            if (symscale > 1) valueScaled /= symscale;  // value is scaled
563
            if ((valueScaled < -(1 << 7)  || valueScaled >= (1 << 7))  && (fitNum & IFIT_I8))  uncertain |= 1;
564
            if ((valueScaled < -(1 << 15) || valueScaled >= (1 << 15)) && (fitNum & IFIT_I16)) uncertain |= 1;
565
            if ((valueScaled < -((int64_t)1 << 31) || valueScaled >= ((int64_t)1 << 31)) && (fitNum & IFIT_I32)) uncertain |= 1;
566
 
567
            if (uncertain && (code.fitNum & IFIT_LARGE)) {
568
                // choose the larger version if optimization process has convergence problems
569
                fitNum  = (fitNum & (fitNum - 1)) | IFIT_I32;  // remove the lowest set bit
570
                uncertain &= ~1;
571
            }
572
            code.fitNum = fitNum;
573
            code.sizeUnknown = uncertain;
574
            return uncertain;
575
        }
576
        // one symbol. must be constant
577
        if (sym3section != 0 && symbols[isym3].st_type != STT_CONSTANT && !(code.etype & XPR_MEM)) {
578
            errors.reportLine(ERR_MEM_WO_BRACKET);
579
            return 1;
580
        }
581
 
582
        if (sym3section && symbols[isym3].st_type != STT_CONSTANT && (sectionHeaders[sym3section].sh_flags & SHF_IP)) {
583
            // relative to instruction pointer
584
            if (sym3section != code.section || symbols[isym3].st_bind != STB_LOCAL) {
585
                // symbol is in different section or not local. relocation needed
586
                fitNum = IFIT_RELOC;
587
                if (relSize <= 1 << 7)  fitNum |= IFIT_I8;   // necessary relocation size
588
                if (relSize <= 1 << 15) fitNum |= IFIT_I16;
589
                if (relSize <= (uint64_t)1 << 31) fitNum |= IFIT_I32;
590
                code.fitNum = fitNum;
591
                code.sizeUnknown = uncertain;
592
                return uncertain;
593
            }
594
            if (pass < 4) {
595
                code.fitNum = IFIT_I8 | IFIT_I16 | IFIT_I32;  // symbol values are not available yet
596
                code.sizeUnknown = 1;
597
                return 1;
598
            }
599
            // self-relative address to local symbol
600
            value = int32_t((uint32_t)symbols[isym3].st_value - (code.address + code.size * 4));
601
            valueScaled = value + code.offset_mem;
602
            if (valueScaled >= -(1 << 7)  && valueScaled < (1 << 7))  fitNum |= IFIT_I8;
603
            if (valueScaled >= -(1 << 15) && valueScaled < (1 << 15)) fitNum |= IFIT_I16;
604
            if (valueScaled >= -((int64_t)1 << 31) && valueScaled < ((int64_t)1 << 31)) fitNum |= IFIT_I32;
605
            code.fitNum = fitNum;
606
            // check if value is certain. uncertainty is stored in high part of st_value and sh_link
607
            uncertainty = int32_t((symbols[isym3].st_value >> 32) - sectionHeaders[code.section].sh_link);
608
            valueScaled += uncertainty;
609
            if ((valueScaled < -(1 << 7)  || valueScaled >= (1 << 7))  && (fitNum & IFIT_I8))  uncertain |= 1;
610
            if ((valueScaled < -(1 << 15) || valueScaled >= (1 << 15)) && (fitNum & IFIT_I16)) uncertain |= 1;
611
            if ((valueScaled < -((int64_t)1 << 31) || valueScaled >= ((int64_t)1 << 31)) && (fitNum & IFIT_I32)) uncertain |= 1;
612
            if (uncertain && (code.fitNum & IFIT_LARGE)) {
613
                // choose the larger version if optimization process has convergence problems
614
                fitNum  = (fitNum & (fitNum - 1)) | IFIT_I32;  // remove the lowest set bit
615
                uncertain &= ~1;
616
            }
617
            code.fitNum = fitNum;
618
            code.sizeUnknown = uncertain;
619
            return uncertain;
620
        }
621
 
622
        // symbol is relative to data pointer or external constant. relocation needed
623
        fitNum = IFIT_RELOC;
624
        if (relSize <= 1 << 7)  fitNum |= IFIT_I8;
625
        if (relSize <= 1 << 15) fitNum |= IFIT_I16;
626
        if (relSize <= (uint64_t)1 << 31) fitNum |= IFIT_I32;
627
        code.fitNum = fitNum;
628
        code.sizeUnknown = uncertain;
629
        return uncertain;
630
    }
631
    // no symbol. only a constant
632
    if (floatType) {
633
        // floating point constant
634
        code.fitNum = fitFloat(dvalue);
635
        if (uint8_t(code.dtype) < uint8_t(TYP_FLOAT64)) code.fitNum |= FFIT_32;
636
        code.sizeUnknown = 0;
637
        return 0;
638
    }
639
    // integer constant
640
    uint32_t low;     // index of lowest set bit
641
    uint32_t high;    // index of highest set bit
642
    fitNum = 0;
643
    int nbits;
644
    if (value == int64_t(0x8000000000000000)) {  // prevent overflow of -value
645
        fitNum = 0;
646
    }
647
    else if (value >= 0) {
648
        low   = bitScanForward((uint64_t)value);    // lowest set bit
649
        high  = bitScanReverse((uint64_t)value);    // highest set bit
650
        //if (value < 8)       fitNum |= IFIT_I4;
651
        //if (value == 8)      fitNum |= IFIT_J4;
652
        //if (value < 0x10)    fitNum |= IFIT_U4;
653
        if (value < 0x80)    fitNum |= IFIT_I8 | IFIT_I8SHIFT;
654
        if (value == 0x80)   fitNum |= IFIT_J8;
655
        if (value <= 0xFF)   fitNum |= IFIT_U8;
656
        if (value < 0x8000)  fitNum |= IFIT_I16 | IFIT_I16SH16;
657
        if (value == 0x8000) fitNum |= IFIT_J16;
658
        if (value <= 0xFFFF) fitNum |= IFIT_U16;
659
        if (high < 31) fitNum |= IFIT_I32;
660
        if (high < 32) fitNum |= IFIT_U32;
661
        if (value == 0x80000000U) fitNum |= IFIT_J32;
662
        nbits = high - low + 1;
663
        if (nbits < 8) fitNum |= IFIT_I8SHIFT;
664
        if (nbits < 16) {
665
            fitNum |= IFIT_I16SHIFT;
666
            if (low >= 16 && high < 31) fitNum |= IFIT_I16SH16;
667
        }
668
        if (nbits < 32) fitNum |= IFIT_I32SHIFT;
669
        if (low >= 32)  fitNum |= IFIT_I32SH32;
670
    }
671
    else {  // x < 0
672
        value = -value;
673
        low   = bitScanForward(value);    // lowest set bit
674
        high  = bitScanReverse(value);    // highest set bit
675
        //if (value <= 8)           fitNum |= IFIT_I4;
676
        if (value <= 0x80)        fitNum |= IFIT_I8 | IFIT_I8SHIFT;
677
        if (value <= 0x8000)      fitNum |= IFIT_I16 |IFIT_I16SH16 ;
678
        if (value <= 0x80000000U) fitNum |= IFIT_I32;
679
        nbits = high - low + 1;
680
        if (nbits < 8) fitNum |= IFIT_I8SHIFT;
681
        if (nbits < 16) {
682
            fitNum |= IFIT_I16SHIFT;
683
            if (low >= 16 && high <= 31) fitNum |= IFIT_I16SH16;
684
        }
685
        if (nbits < 32) fitNum |= IFIT_I32SHIFT;
686
        if (low >= 32)  fitNum |= IFIT_I32SH32;
687
    }
688
    code.fitNum = fitNum;
689
    code.sizeUnknown = 0;
690
    return 0;
691
}
692
 
693
 
694
// Check how many bits are needed to a relative address or jump offset of an instruction.
695
// This result is returned as bit-flags in codefitAddr, code.fitJump, and code.fitNum
696
// The return value is nonzero if the size cannot be resolved yet.
697
int CAssembler::fitAddress(SCode & code) {
698
    int64_t value = 0;                           // the constant or address to fit
699
    int64_t valueScaled;                         // value divided by scale factor
700
    uint32_t fitBits = 0;                        // bit flags indicating fit
701
    int32_t isym1 = 0, isym2 = 0;                // symbol index
702
    int32_t uncertainty;                         // maximum deviance if the value is uncertain
703
    int  uncertain = 0;                          // return value
704
 
705
    if (code.instruction == II_ALIGN) return 0;              // not an instruction
706
    if (!(code.etype & (XPR_OFFSET | XPR_JUMPOS | XPR_MEM))) return 0; // has no address
707
 
708
    // check address of memory operand
709
    if (code.sym1) {
710
        // there is a memory operand symbol
711
        code.etype |= XPR_OFFSET;
712
 
713
        value = code.offset_mem;                                 // memory offset
714
        isym1 = findSymbol(code.sym1);
715
        if (isym1 <= 0) {
716
            code.sizeUnknown = 2; return 2;              // should not occur
717
        }
718
        if (symbols[isym1].st_unitsize == 0) uncertain = 2;  // symbol value is not known yet
719
        uint32_t sym1section = symbols[isym1].st_section; // symbol section
720
        if (sym1section < sectionHeaders.numEntries()) {
721
            // determine necessary relocation size if relocation needed
722
            uint64_t relSize;                       // maximum size of relocated address
723
            if (symbols[isym1].st_type == STT_CONSTANT) {
724
                // assume that constant offset is limited by dataSizeOption
725
                relSize = data_size;       // relocation size for code and constant data                
726
            }
727
            else if (sym1section
728
                && !(sectionHeaders[sym1section].sh_flags & (SHF_WRITE | SHF_DATAP | SHF_THREADP))) {
729
                relSize = code_size;       // relocation size for code and constant data
730
            }
731
            else if (sym1section) {   // local symbol with known section 
732
                relSize = (sectionHeaders[sym1section].sh_flags & (SHF_EXEC | SHF_IP)) ? code_size : data_size;
733
            }
734
            else { // external symbol with unknown section. look at symbol attributes
735
                relSize = (symbols[isym1].st_other & (STV_EXEC | STV_IP)) ? code_size : data_size;
736
            }
737
            if (code.sym2) {
738
                // value is (sym1 - sym2) / scale factor
739
                isym2 = findSymbol(code.sym2);
740
                if (isym2 <= 0) {
741
                    code.sizeUnknown = 2; return 2;              // should not occur
742
                }
743
                if (symbols[isym1].st_unitsize == 0) uncertain = 2;  // symbol value is not known yet
744
                if (symbols[isym1].st_section != symbols[isym2].st_section || symbols[isym1].st_bind != STB_LOCAL || symbols[isym2].st_bind != STB_LOCAL) {
745
                    // different sections or not local. relocation needed
746
                    fitBits = IFIT_RELOC;
747
                    if (code.symscale1 > 1) relSize /= code.symscale1;  // value is scaled
748
                    if (relSize <= 1 << 7)  fitBits |= IFIT_I8;
749
                    if (relSize <= 1 << 15) fitBits |= IFIT_I16;
750
                    //if (relSize <= 1 << 23) fitBits |= IFIT_I24;
751
                    if (relSize <= (uint64_t)1 << 31) fitBits |= IFIT_I32;
752
                    code.fitAddr = fitBits;
753
                    code.sizeUnknown += uncertain;
754
                    //return uncertain;
755
                }
756
                // difference between two local symbols
757
                else if (pass < 4) {
758
                    code.fitAddr = IFIT_I8 | IFIT_I16 | IFIT_I32;  // symbol values are not available yet
759
                    code.sizeUnknown += 1;
760
                    uncertain += 1;
761
                    //return 1;
762
                }
763
                else {
764
                    value += int32_t(uint32_t(symbols[isym1].st_value) - uint32_t(symbols[isym2].st_value));
765
                    int scale = code.symscale1;
766
                    if (scale < 1) scale = 1;
767
                    valueScaled = value / scale + code.offset_mem;
768
                    if (valueScaled >= -(1 << 7) && valueScaled < (1 << 7))  fitBits |= IFIT_I8;
769
                    if (valueScaled >= -(1 << 15) && valueScaled < (1 << 15)) fitBits |= IFIT_I16;
770
                    if (valueScaled >= -((int64_t)1 << 31) && valueScaled < ((int64_t)1 << 31)) fitBits |= IFIT_I32;
771
                    // check if value is certain. uncertainty is stored in high part of st_value
772
                    uncertainty = (symbols[isym1].st_value >> 32) - (symbols[isym2].st_value >> 32);
773
                    valueScaled = value / scale + code.offset_mem + uncertainty;
774
                    if (code.symscale1 > 1) valueScaled /= code.symscale1;  // value is scaled
775
                    if ((valueScaled < -(1 << 7) || valueScaled >= (1 << 7)) && (fitBits & IFIT_I8))  uncertain |= 1;
776
                    if ((valueScaled < -(1 << 15) || valueScaled >= (1 << 15)) && (fitBits & IFIT_I16)) uncertain |= 1;
777
                    if ((valueScaled < -((int64_t)1 << 31) || valueScaled >= ((int64_t)1 << 31)) && (fitBits & IFIT_I32)) uncertain |= 1;
778
                    if (uncertain && (code.fitAddr & IFIT_LARGE)) {
779
                        // choose the larger version if optimization process has convergence problems
780
                        fitBits = (fitBits & (fitBits - 1)) | IFIT_I32;  // remove the lowest set bit
781
                        uncertain &= ~1;
782
                    }
783
                    code.fitAddr = fitBits;
784
                    code.sizeUnknown += uncertain;
785
                    //return uncertain;
786
                }
787
            }
788
            // one symbol
789
            else if (sectionHeaders[sym1section].sh_flags & SHF_IP) {
790
                // relative to instruction pointer
791
                if (sym1section != code.section || symbols[isym1].st_bind != STB_LOCAL) {
792
                    // symbol is in different section or not local. relocation needed
793
                    fitBits = IFIT_RELOC;
794
                    if (code.etype & XPR_JUMPOS) relSize >>= 2;  // value is scaled by 4
795
                    if (relSize <= 1 << 7)  fitBits |= IFIT_I8;   // necessary relocation size
796
                    if (relSize <= 1 << 15) fitBits |= IFIT_I16;
797
                    if (relSize <= 1 << 23) fitBits |= IFIT_I24;
798
                    if (relSize <= (uint64_t)1 << 31) fitBits |= IFIT_I32;
799
                    code.fitAddr = fitBits;
800
                    code.sizeUnknown += uncertain;
801
                    //return uncertain;
802
                }
803
                else if (pass < 4) {
804
                    // code.fitBits = IFIT_I16 | IFIT_I32;  // symbol values are not available yet
805
                    code.fitAddr = IFIT_I16 | IFIT_I24 | IFIT_I32;  // symbol values are not available yet
806
                    code.sizeUnknown += 1;
807
                    uncertain |= 1;
808
                    //return 1;
809
                }
810
                else {  // self-relative address to local symbol
811
                    value = int32_t((uint32_t)symbols[isym1].st_value - (code.address + code.size * 4));
812
                    valueScaled = value;
813
                    valueScaled += code.offset_mem;
814
                    if (valueScaled >= -(1 << 15) && valueScaled < (1 << 15)) fitBits |= IFIT_I16;
815
                    if (valueScaled >= -(1 << 23) && valueScaled < (1 << 23)) fitBits |= IFIT_I24;
816
                    if (valueScaled >= -((int64_t)1 << 31) && valueScaled < ((int64_t)1 << 31)) fitBits |= IFIT_I32;
817
                    code.fitAddr = fitBits;
818
                    // check if value is certain. uncertainty is stored in high part of st_value and sh_link
819
                    uncertainty = int32_t((symbols[isym1].st_value >> 32) - sectionHeaders[code.section].sh_link);
820
                    valueScaled += uncertainty;
821
                    if ((valueScaled < -(1 << 7) || valueScaled >= (1 << 7)) && (fitBits & IFIT_I8))  uncertain |= 1;
822
                    if ((valueScaled < -(1 << 15) || valueScaled >= (1 << 15)) && (fitBits & IFIT_I16)) uncertain |= 1;
823
                    if ((valueScaled < -(1 << 23) || valueScaled >= (1 << 23)) && (fitBits & IFIT_I24)) uncertain |= 1;
824
                    if ((valueScaled < -((int64_t)1 << 31) || valueScaled >= ((int64_t)1 << 31)) && (fitBits & IFIT_I32)) uncertain |= 1;
825
                    if (uncertain && (code.fitAddr & IFIT_LARGE)) {
826
                        // choose the larger version if optimization process has convergence problems
827
                        fitBits = (fitBits & (fitBits - 1)) | IFIT_I32;  // remove the lowest set bit
828
                        uncertain &= ~1;
829
                    }
830
                    code.fitAddr = fitBits;
831
                    code.sizeUnknown += uncertain;
832
                    //return uncertain;
833
                }
834
            }
835
            else {
836
                // symbol is relative to data pointer. relocation needed
837
                fitBits = IFIT_RELOC;
838
                if (relSize <= 1 << 7)  fitBits |= IFIT_I8;
839
                if (relSize <= 1 << 15) fitBits |= IFIT_I16;
840
                if (relSize <= (uint64_t)1 << 31) fitBits |= IFIT_I32;
841
                code.fitAddr = fitBits;
842
                code.sizeUnknown += uncertain;
843
            }
844
        }
845
    }
846
    else {
847
        // no symbol. only a signed integer constant
848
        value = code.offset_mem;
849
        fitBits = 0;
850
        if (value >= -(int64_t)0x80 && value < 0x80) fitBits |= IFIT_I8;
851
        if (value >= -(int64_t)0x8000 && value < 0x8000) fitBits |= IFIT_I16;
852
        if (value >= -(int64_t)0x80000000 && value < 0x80000000) fitBits |= IFIT_I32;
853
        code.fitAddr = fitBits;
854
    }
855
 
856
    // check jump offset symbol
857
    if (code.sym5) {
858
        // there is a jump offset symbol
859
        value = code.offset_jump;                     // jump offset
860
        fitBits = 0;
861
 
862
        isym1 = findSymbol(code.sym5);
863
        if (isym1 <= 0) {
864
            code.sizeUnknown = 2; return 2;              // should not occur
865
        }
866
        // one symbol relative to instruction pointer
867
        if (symbols[isym1].st_unitsize == 0) uncertain = 2;  // symbol value is not known yet
868
        uint32_t sym1section = symbols[isym1].st_section; // symbol section
869
        if (sym1section < sectionHeaders.numEntries()) {
870
            // determine necessary relocation size if relocation needed
871
            uint64_t relSize;                       // maximum size of relocated address
872
            relSize = code_size >> 2;               // relocation size for code and constant data, scaled by 4
873
 
874
            if (sym1section != code.section || symbols[isym1].st_bind != STB_LOCAL) {
875
                // symbol is in different section or not local. relocation needed
876
                fitBits = IFIT_RELOC;
877
                if (relSize <= 1 << 7)  fitBits |= IFIT_I8;   // necessary relocation size
878
                if (relSize <= 1 << 15) fitBits |= IFIT_I16;
879
                if (relSize <= 1 << 23) fitBits |= IFIT_I24;
880
                if (relSize <= (uint64_t)1 << 31) fitBits |= IFIT_I32;
881
                code.fitJump = fitBits;
882
                code.sizeUnknown += uncertain;
883
                //return uncertain;
884
            }
885
            else if (pass < 4) {
886
                code.fitJump = IFIT_I16 | IFIT_I24 | IFIT_I32;  // symbol values are not available yet
887
                code.sizeUnknown += 1;
888
                uncertain = 1;
889
                //return 1;
890
            }
891
            else {
892
                // self-relative address to local symbol
893
                value = int32_t((uint32_t)symbols[isym1].st_value - (code.address + code.size * 4));
894
                valueScaled = value >> 2;  // jump address is scaled
895
                valueScaled += code.offset_jump;
896
                if (valueScaled >= -(1 << 7)  && valueScaled < (1 << 7))  fitBits |= IFIT_I8;
897
                if (valueScaled >= -(1 << 15) && valueScaled < (1 << 15)) fitBits |= IFIT_I16;
898
                if (valueScaled >= -(1 << 23) && valueScaled < (1 << 23)) fitBits |= IFIT_I24;
899
                if (valueScaled >= -((int64_t)1 << 31) && valueScaled < ((int64_t)1 << 31)) fitBits |= IFIT_I32;
900
                code.fitJump = fitBits;
901
                // check if value is certain. uncertainty is stored in high part of st_value and sh_link
902
                uncertainty = int32_t((symbols[isym1].st_value >> 32) - sectionHeaders[code.section].sh_link);
903
                valueScaled += uncertainty;
904
                if ((valueScaled < -(1 << 7)  || valueScaled >= (1 << 7)) && (fitBits & IFIT_I8))   uncertain |= 1;
905
                if ((valueScaled < -(1 << 15) || valueScaled >= (1 << 15)) && (fitBits & IFIT_I16)) uncertain |= 1;
906
                if ((valueScaled < -(1 << 23) || valueScaled >= (1 << 23)) && (fitBits & IFIT_I24)) uncertain |= 1;
907
                if ((valueScaled < -((int64_t)1 << 31) || valueScaled >= ((int64_t)1 << 31)) && (fitBits & IFIT_I32)) uncertain |= 1;
908
                if (uncertain && (code.fitAddr & IFIT_LARGE)) {
909
                    // choose the larger version if optimization process has convergence problems
910
                    fitBits = (fitBits & (fitBits - 1)) | IFIT_I32;  // remove the lowest set bit
911
                    uncertain &= ~1;
912
                    code.fitJump = fitBits;
913
                    //code.sizeUnknown += uncertain;
914
                }
915
                code.sizeUnknown += uncertain;
916
            }
917
        }
918
    }
919
    return uncertain;
920
}
921
 
922
 
923
// find format details in formatList from entry in instructionlist
924
uint32_t findFormat(SInstruction const & listentry, uint32_t imm) {
925
    // listentry: record in instructionlist or instructionlistId
926
    // imm: immediate operand, if any
927
 
928
    // make model instruction for lookupFormat
929
    STemplate instrModel;
930
    instrModel.a.il = listentry.format >> 8;
931
    instrModel.a.mode = (listentry.format >> 4) & 7;
932
    instrModel.a.ot = (listentry.format >> 5) & 4;
933
    if ((listentry.format & ~ 0x12F) == 0x200) {  // format 0x200, 0x220, 0x300, 0x320
934
        instrModel.a.mode2 = listentry.format & 7;
935
    }
936
    else if ((listentry.format & 0xFF0) == 0x270 && listentry.op1 < 8) {
937
        instrModel.a.mode2 = listentry.op1 & 7;
938
    }
939
    else instrModel.a.mode2 = 0;
940
    instrModel.a.op1 = listentry.op1;
941
    instrModel.b[0] = imm & 0xFF;
942
    // look op details for this format (from emulator2.cpp)
943
    return lookupFormat(instrModel.q);
944
}
945
 
946
// find the smallest representation that the floating point operand fits into
947
int fitFloat(double x) {
948
    if (x == 0.) return IFIT_I8 | FFIT_16 | FFIT_32 | FFIT_64;
949
    union {
950
        double d;
951
        struct {
952
            uint64_t mantissa: 52;
953
            uint64_t exponent: 11;
954
            uint64_t sign:      1;
955
        } f;
956
    } u;
957
    u.d = x;
958
    int fit = FFIT_64;
959
    // check if mantissa fits
960
    if ((u.f.mantissa & (((uint64_t)1 << 42) - 1)) == 0) fit |= FFIT_16;
961
    if ((u.f.mantissa & (((uint64_t)1 << 29) - 1)) == 0) fit |= FFIT_32;
962
    // check if exponent fits, except for infinity or nan
963
    if (u.f.exponent != 0x7FF) {
964
        int ex = int(u.f.exponent - 0x3FF);
965
        if (ex < -14 || ex > 15) fit &= ~FFIT_16;
966
        if (ex < -126 || ex > 127) fit &= ~FFIT_32;
967
    }
968
    // check if x fits into a small integer
969
    if (fit & FFIT_16) {
970
        int i = int(x);
971
        if (i == x && i >= -128 && i < 128) {
972
            fit |= IFIT_I8;
973
        }
974
    }
975
    return fit;
976
}
977
 
978
// find an instruction variant that fits the code
979
int CAssembler::fitCode(SCode & code) {
980
    // return value:
981
    // 0: does not fit
982
    // 1: fits
983
    uint32_t bestInstr = 0;                      // best fitting instruction variant, index into instructionlistId
984
    uint32_t bestSize  = 99;                     // size of best fitting instruction variant
985
    SCode    codeTemp;                           // fitted code
986
    SCode    codeBest;                           // best fitted code
987
    uint32_t instrIndex = 0, ii;                 // index into instructionlistId
988
    uint32_t formatIx = 0;                       // index into formatList
989
    uint32_t isize;                              // il bits
990
    codeBest.category = 0;
991
 
992
    // find instruction by id    
993
    SInstruction3 sinstr;                        // make dummy record with instruction id as parameter to findAll
994
    if (code.instruction == II_ALIGN) {
995
        return 1;                                // alignment directive
996
    }
997
    sinstr.id = code.instruction;
998
    int32_t nInstr = instructionlistId.findAll(&instrIndex, sinstr);
999
 
1000
    if (code.etype & (XPR_IMMEDIATE | XPR_OFFSET | XPR_LIMIT | XPR_JUMPOS)) {
1001
        // there is an immediate constant, offset, or limit.
1002
        // generate specific error message if large constant cannot fit
1003
        if ((code.etype & XPR_OFFSET) && !(code.etype & XPR_IMMEDIATE) && !(code.fitAddr & IFIT_I32))  {
1004
            errors.reportLine(ERR_OFFSET_TOO_LARGE);
1005
        }
1006
        //else if ((code.etype & XPR_LIMIT) && !(code.fitBits & (IFIT_U16 | IFIT_U32)))  errors.reportLine(ERR_LIMIT_TOO_LARGE);
1007
        else if ((code.etype & XPR_IMMEDIATE) && !(code.etype & XPR_INT2)) {
1008
            if (!(code.fitNum & (IFIT_I16 | IFIT_I16SHIFT | IFIT_I32 | IFIT_I32SHIFT | FFIT_16 | FFIT_32)) && (code.etype & XPR_OPTIONS) && code.optionbits) {
1009
                errors.reportLine(ERR_IMMEDIATE_TOO_LARGE);
1010
            }
1011
        }
1012
    }
1013
    if (lineError) return 0;
1014
 
1015
    // loop through all instruction definitions with same id
1016
    for (ii = instrIndex; ii < instrIndex + nInstr; ii++) {
1017
        // category
1018
        code.instr1 = ii;
1019
        code.category = instructionlistId[ii].category;
1020
        // get variant bits from instruction list
1021
        variant = instructionlistId[ii].variant;  // instruction-specific variants
1022
 
1023
        switch (instructionlistId[ii].category) {
1024
        case 1:   // single format. find entry in formatList
1025
            formatIx = findFormat(instructionlistId[ii], code.value.w);
1026
            code.formatp = formatList + formatIx;
1027
            if (instructionFits(code, codeTemp, ii)) {
1028
                // check if smaller than previously found.
1029
                isize = codeTemp.size;
1030
                if (isize < bestSize) {
1031
                    bestSize = isize;
1032
                    bestInstr = ii;
1033
                    codeBest = codeTemp;
1034
                }
1035
            }
1036
            break;
1037
 
1038
        case 3:  // multi-format instructions. search all formats for the best one
1039
            for (formatIx = 0; formatIx < formatList3.numEntries(); formatIx++) {
1040
                code.formatp = &formatList3[formatIx];
1041
 
1042
                if (((uint64_t)1 << code.formatp->formatIndex) & instructionlistId[ii].format) {
1043
                    if (instructionFits(code, codeTemp, ii)) {
1044
                        // check if smaller than previously found. category 3 = multiformat preferred
1045
                        isize = codeTemp.size;
1046
                        if (isize < bestSize || (isize == bestSize && codeBest.category != 3)) {
1047
                            bestSize = isize;
1048
                            bestInstr = ii;
1049
                            codeBest = codeTemp;
1050
                        }
1051
                    }
1052
                }
1053
            }
1054
            break;
1055
 
1056
        case 4:  // jump instructions. search all formats for the best one
1057
            for (formatIx = 0; formatIx < formatList4.numEntries(); formatIx++) {
1058
                code.formatp = &formatList4[formatIx];
1059
                if (((uint64_t)1 << code.formatp->formatIndex) & instructionlistId[ii].format) {
1060
                    if (jumpInstructionFits(code, codeTemp, ii)) {
1061
                        // check if smaller than previously found. category 3 = multiformat preferred
1062
                        isize = codeTemp.size;
1063
                        if (isize < bestSize) {
1064
                            bestSize = isize;
1065
                            bestInstr = ii;
1066
                            codeBest = codeTemp;
1067
                        }
1068
                    }
1069
                }
1070
            }
1071
            break;
1072
 
1073
        default:
1074
            return 0;        // error in list
1075
        }
1076
    }
1077
 
1078
    if (bestSize > 4) {
1079
        errors.reportLine(checkCodeE(code));         // find reason why no format fits, and report error
1080
        return 0;
1081
    }
1082
 
1083
    code = codeBest;          // get the best fitting code
1084
    variant = instructionlistId[bestInstr].variant;  // instruction-specific variants
1085
 
1086
    checkCode2(code);         // check if operands are correct
1087
 
1088
    if (lineError) return 0;
1089
    return 1;
1090
}
1091
 
1092
 
1093
// check if instruction fits into specified format
1094
bool CAssembler::instructionFits(SCode const & code, SCode & codeTemp, uint32_t ii) {
1095
    // code: structure defining all operands and options
1096
    // codeTemp: fitted code
1097
    // ii: index into instructionlistId
1098
    // formatIndex: index into formatList
1099
 
1100
    uint32_t shiftCount;                         // shift count for shifted constant
1101
    // copy code structure and add details
1102
    codeTemp = code;
1103
    codeTemp.category = code.formatp->category;
1104
    codeTemp.size = (code.formatp->format2 >> 8) & 3;
1105
    if (codeTemp.size == 0) codeTemp.size = 1;
1106
    codeTemp.instr1 = ii;
1107
 
1108
    if (instructionlistId[ii].opimmediate == OPI_IMPLICIT && !(code.etype & XPR_IMMEDIATE)) {
1109
        // There is no immediate operand. instructionlistId[ii] has an implicit immediate operand.
1110
        // Insert implicit operand and see if it fits
1111
        codeTemp.value.u = instructionlistId[ii].implicit_imm;
1112
        codeTemp.etype |= XPR_INT;
1113
        codeTemp.fitNum = 0xFFFFFFFF;
1114
    }
1115
 
1116
    // check vector use
1117
    bool useVectors = (code.dtype & TYP_FLOAT)
1118
        || (code.dest & 0xE0) == REG_V
1119
        || (code.reg1 & 0xE0) == REG_V
1120
        || (code.reg2 & 0xE0) == REG_V;
1121
 
1122
    if (useVectors) {
1123
        if (!(code.formatp->vect)) return false;  // vectors not supported
1124
    }
1125
    else if (code.formatp->vect & ~0x10) return false;    // vectors provided but not used
1126
 
1127
    // requested operand type
1128
    uint32_t requestOT = code.dtype & 7;
1129
    if (uint8_t(code.dtype) == uint8_t(TYP_FLOAT16)) {
1130
        requestOT = TYP_INT16 & 7;                // replace pseudo-type TYP_FLOAT16 with TYP_INT16
1131
        codeTemp.dtype = TYP_INT16;
1132
    }
1133
 
1134
    // operand type provided by this format
1135
    uint32_t formatOT = code.formatp->ot;
1136
    if (formatOT == 0x32) formatOT = 0x12 + (instructionlistId[ii].op1 & 1);  // int32 for even op1, int64 for odd op1
1137
    if (formatOT == 0x35) formatOT = 0x15 + (instructionlistId[ii].op1 & 1);  // float for even op1, double for odd op1
1138
    if (formatOT == 0) formatOT = requestOT;  // operand type determined by OT field
1139
    formatOT &= 7;
1140
    uint32_t scale2 = formatOT;
1141
    if (scale2 > 4) scale2 -= 3;  // operand size = 1 << scale2
1142
 
1143
    if (variant & (VARIANT_D0 | VARIANT_D2)) {  // no operand type
1144
        if (code.dtype == 0 && code.instruction != II_NOP) codeTemp.dtype = formatOT ? formatOT : 3;
1145
    }
1146
    else {
1147
        // check requested operand type 
1148
        if (formatOT <= 3 && requestOT < formatOT && (code.dtype & TYP_PLUS)) {
1149
            requestOT = formatOT;  // request allows bigger type
1150
            // codeTemp.dtype = formatOT;  // prevents merging with subsequent jump with smaller type than formatOT
1151
        }
1152
        if (requestOT != formatOT && code.dtype) return false;  // requested format type not supported
1153
 
1154
        // check if operand type supported by instruction
1155
        uint32_t optypessupport = useVectors ? (instructionlistId[ii].optypesscalar | instructionlistId[ii].optypesvector) : instructionlistId[ii].optypesgp;
1156
        optypessupport |= optypessupport >> 8;  // include types with optional support
1157
        if (!(optypessupport & (1 << requestOT))) return false;
1158
    }
1159
 
1160
    // check if there are enough register operands in this format
1161
    uint8_t opAvail = code.formatp->opAvail;
1162
    uint8_t numReg = ((opAvail >> 4) & 1) + ((opAvail >> 5) & 1) + ((opAvail >> 6) & 1) + ((opAvail >> 7) & 1); // number of registers available
1163
    uint8_t numReq = instructionlistId[ii].sourceoperands;  // number of registers required for this instruction
1164
    codeTemp.numOp = numReq;
1165
    if ((codeTemp.etype & XPR_IMMEDIATE) && numReq) numReq--;
1166
    if ((codeTemp.etype & XPR_MEM) && numReq) numReq--;
1167
    if ((codeTemp.etype & (XPR_MASK | XPR_FALLBACK)) && ((code.fallback & 0x1F) != (code.reg1 & 0x1F) || (code.reg1 & 0x1F) == 0x1F)) {
1168
        numReq += 2;  // fallback different from reg1, implies reg1 != destination
1169
    }
1170
    else if ((code.etype & XPR_REG1) && code.dest && code.reg1 != code.dest && !(variant & VARIANT_D3)) {
1171
        numReq++;     // reg1 != destination
1172
    }
1173
    if (numReq > numReg) return false;  // not enough registers in this format
1174
 
1175
    // check if mask available
1176
    if ((code.etype & XPR_MASK) && !(code.formatp->tmplate == 0xA || code.formatp->tmplate == 0xE)) return false;
1177
 
1178
    // check option bits 
1179
    if ((code.etype & XPR_OPTIONS) && code.optionbits != 0
1180
        && (code.formatp->tmplate != 0xE || !(code.formatp->imm2 & 2))
1181
        && (variant & VARIANT_On) && instructionlistId[ii].opimmediate != OPI_INT1688) return false; // only template E has option bits
1182
 
1183
    // check memory operand
1184
    if (code.etype & XPR_MEM) {
1185
        if (code.formatp->mem == 0) return false;  // memory operand requested but not supported
1186
        if (code.etype & XPR_SYM1) {  // has data symbol
1187
            if (code.etype & XPR_SYM2) {  // has difference between two symbols
1188
                codeTemp.sizeUnknown = 1;
1189
            }
1190
            //if (!(code.fitNumX & IFIT_I32)) return false;  // assume symbol address requires 32 bits. local symbol difference resolved later when sizeUnknown = 1
1191
        }
1192
        // check index and scale factor
1193
        if (code.etype & XPR_INDEX) {
1194
            if (!(code.formatp->mem & 4)) return false;  // index not supported
1195
            if ((code.formatp->scale & 4) && code.scale != -1) return false;  // scale factor must be -1
1196
            if ((code.formatp->scale & 2) && code.scale != 1 << scale2) return false;  // scale factor must match operand type
1197
            if (!(code.formatp->scale & 6) && code.scale != 1) return false;  // scale factor must be 1
1198
        }
1199
        else {  // no index requested
1200
            if (code.formatp->mem & 4) {
1201
                codeTemp.index = 0x1F;  // RT = 0x1F means no index
1202
                codeTemp.scale = 1 << scale2;
1203
            }
1204
        }
1205
 
1206
        // check address offset size
1207
        if (code.etype & (XPR_OFFSET | XPR_SYM1)) {
1208
            if (!(code.formatp->mem & 0x10)) return false;  // format does not support memory offset
1209
            switch (code.formatp->addrSize) {
1210
            case 1:
1211
                if (code.sym1 && !(code.fitAddr & IFIT_I8)) return false;
1212
                if ((code.base & 0x1F) >= 0x1C && (code.base & 0x1F) != 0x1F) return false; // ip, datap, threadp must have 16 bit offset
1213
                // no relocation. scale factor depends on operand size
1214
                if (code.offset_mem & ((1 << scale2) - 1)) return false;  // offset is not a multiple of the scale factor
1215
                if ((code.offset_mem >> scale2) < -0x80 || (code.offset_mem >> scale2) > 0x7F) return false;
1216
                break;
1217
            case 2:
1218
                if (!(code.fitAddr & IFIT_I16)) return false;
1219
                break;
1220
            case 4:
1221
                if (!(code.fitAddr & IFIT_I32)) return false;
1222
                break;
1223
            default:
1224
                return false;
1225
            }
1226
        }
1227
        else if ((code.formatp->addrSize) < 2 && (code.base & 0x1F) >= 0x1C && (code.base & 0x1F) != 0x1F) return false;
1228
 
1229
        // fail if limit required and not supported, or supported and not required
1230
        if (code.etype & XPR_LIMIT) {
1231
            if (!(code.formatp->mem & 0x20)) return false;     // limit not supported by format
1232
            switch (code.formatp->addrSize) {
1233
            case 1: if (code.value.u >= 0x100) return false;
1234
                break;
1235
            case 2: if (code.value.u >= 0x10000) return false;
1236
                break;
1237
            case 4: if (uint64_t(code.value.u) >= 0x100000000U) return false;
1238
                break;
1239
            }
1240
        }
1241
        else {
1242
            if (code.formatp->mem & 0x20) return false;     // limit provided but not requested
1243
        }
1244
 
1245
        // check length/broadcast/scalar
1246
        if (code.etype & XPR_SCALAR) {                            // scalar operand requested
1247
            if ((code.formatp->vect & 6) != 0) {
1248
                codeTemp.length = 31;                            // disable length or broadcast option
1249
            }
1250
        }
1251
        else if (code.etype & XPR_LENGTH) {              // vector length specified
1252
            if ((code.formatp->vect & 2) == 0) return false;  // vector length not in this format
1253
        }
1254
        else if (code.etype & XPR_BROADC) {              // vector broadcast specified
1255
            if ((code.formatp->vect & 4) == 0) return false;  // vector broadcasst not in this format
1256
        }
1257
    }
1258
    else if (code.formatp->mem) return false;  // memory operand supported by not requested
1259
 
1260
    // check immediate operand
1261
    //bool isFloat = (code.dtype & TYP_FLOAT32 & 0xF0) != 0; // specified type is float or double or float128 
1262
    bool hasImmediate = (code.etype & XPR_IMMEDIATE) != 0; // && !(code.etype & (XPR_OFFSET | XPR_LIMIT)));
1263
 
1264
    /*if ((variant & VARIANT_M1) && code.formatp->mem && code.formatp->tmplate == 0xE) {
1265
        // variant M1: immediate operand is in IM3. No further check needed
1266
        // to do: fail if relocation on immediate
1267
        return hasImmediate;  // succeed if there is an immediate
1268
    } */
1269
 
1270
    if (hasImmediate) {
1271
        if (code.formatp->immSize == 0 && instructionlistId[ii].sourceoperands < 4) return false;  // immediate not supported
1272
 
1273
        // to do: check if relocation
1274
 
1275
        // check if size fits. special cases in instruction list
1276
        switch (instructionlistId[ii].opimmediate) {
1277
        case OPI_IMPLICIT:  // implicit value of immediate operand. Accept explicit value only if same
1278
            if (codeTemp.value.u != instructionlistId[ii].implicit_imm) return false;
1279
            break;
1280
 
1281
        case OPI_INT8SH:  // im2 << im1
1282
            if (code.fitNum & (IFIT_I8 | IFIT_I8SHIFT)) {   // fits im2 << im1
1283
                shiftCount = bitScanForward(codeTemp.value.u);
1284
                codeTemp.value.u = (codeTemp.value.u >> shiftCount << 8) | shiftCount;
1285
                codeTemp.fitNum |= IFIT_I16;  // make it accepted below
1286
                break;
1287
            }
1288
            return false;
1289
        case OPI_INT16SH16: // im12 << 16
1290
            if (code.fitNum & (IFIT_I16 | IFIT_I16SH16)) {   // fits im2 << 16
1291
                codeTemp.value.u = codeTemp.value.u >> 16;
1292
                codeTemp.fitNum |= IFIT_I16;  // make it accepted below
1293
                break;
1294
            }
1295
            return false;
1296
        case OPI_INT32SH32: // im2 << 32
1297
            if (code.fitNum & (IFIT_I32 | IFIT_I32SH32)) {   // fits im2 << 32
1298
                codeTemp.value.u = codeTemp.value.u >> 32;
1299
                codeTemp.fitNum |= IFIT_I32;  // make it accepted below
1300
                break;
1301
            }
1302
            return false;
1303
        case OPI_UINT8: // 8 bit unsigned integer
1304
            if (value0 < 0x100 && value0 > -(int64_t)0x80U) return true;
1305
            return false;
1306
        case OPI_UINT16: // 16 bit unsigned integer
1307
            if (value0 < 0x10000 && value0 > -(int64_t)0x8000U) return true;
1308
            return false;
1309
        case OPI_UINT32: // 32 bit unsigned integer
1310
            //if (code.fitNum & IFIT_U32) return true; // this does not work if a float type is specified
1311
            if (value0 < 0x100000000 && value0 > -(int64_t)0x80000000U) return true;
1312
            return false;
1313
        case OPI_INT886:  // three integers
1314
            codeTemp.value.u = (codeTemp.value.w & 0xFF) | (codeTemp.value.u >> 24);
1315
            return true;
1316
        case OPI_INT1688:  // three integers: 16 + 8 + 8 bits
1317
            codeTemp.value.u = (codeTemp.value.w & 0xFFFF) | (codeTemp.value.u >> 16 & 0xFF0000) | codeTemp.optionbits << 24;
1318
            return true;
1319
        case OPI_OT:  // constant of same type as operand type
1320
            if ((uint8_t(code.dtype) & ~TYP_UNS) <= uint8_t(TYP_INT32) && code.formatp->immSize >= 4) return true;
1321
        }
1322
        // check if size fits. general cases
1323
        switch (code.formatp->immSize) {
1324
        case 1:
1325
            if (codeTemp.fitNum & IFIT_I8) break;  // fits
1326
            if ((variant & VARIANT_U0) && (codeTemp.fitNum & IFIT_U8)) break; // unsigned fits
1327
            if ((codeTemp.dtype & 0x1F) == (TYP_INT8 & 0x1F) && (codeTemp.fitNum & IFIT_U8)) break;  // 8 bit size fits unsigned with no sign extension
1328
            return false;
1329
        case 2:
1330
            if (codeTemp.fitNum & (IFIT_I16 | FFIT_16)) break;  // fits
1331
            if ((variant & VARIANT_U0) && (codeTemp.fitNum & IFIT_U16)) break; // unsigned fits
1332
            if ((codeTemp.dtype & 0x1F) == (TYP_INT16 & 0x1F) && code.formatp->tmplate != 0xC && (codeTemp.fitNum & IFIT_U16)) break;  // 16 bit size fits unsigned with no sign extension
1333
            if ((code.formatp->imm2 & 4) && !(variant & VARIANT_On) && (codeTemp.fitNum & IFIT_I16SHIFT)) {
1334
                // fits with im2 << im3
1335
                shiftCount = bitScanForward(codeTemp.value.u);
1336
                codeTemp.value.u >>= shiftCount;
1337
                codeTemp.optionbits = shiftCount;
1338
                break;
1339
            }
1340
            if (variant & VARIANT_H0) break; // half precision fits
1341
            return false;
1342
        case 4:
1343
            if ((code.dtype & 0xFF) == (TYP_FLOAT32 & 0xFF))  break;  // float32 must be rounded to fit
1344
            if (codeTemp.fitNum & (IFIT_I32 | FFIT_32)) break;  // fits
1345
            if ((codeTemp.fitNum & IFIT_U32) && (code.dtype & 0xFF) == (TYP_INT32 & 0xFF)) break;  // fits
1346
            if ((variant & VARIANT_U0) && (codeTemp.fitNum & IFIT_U32)) break; // unsigned fits
1347
            if (variant & VARIANT_H0) break; // half precision fits
1348
            if ((codeTemp.dtype & 0x1F) == (TYP_INT32 & 0x1F) && (codeTemp.fitNum & IFIT_U32)) break;  // 32 bit size fits unsigned with no sign extension
1349
            if ((code.formatp->imm2 & 8) && (codeTemp.fitNum & IFIT_I32SHIFT)) {
1350
                // fits with im4 << im2
1351
                shiftCount = bitScanForward(codeTemp.value.u);
1352
                codeTemp.value.u = ((codeTemp.value.u >> shiftCount) & 0xFFFFFFFF) | ((uint64_t)shiftCount << 32); // store shift count in upper half
1353
                break;
1354
            }
1355
            return false;
1356
        case 8:
1357
            break;
1358
        default:; // other values should not occur in table
1359
        }
1360
    }
1361
    else if ((code.formatp->immSize != 0) && !(code.etype & (XPR_OFFSET | XPR_LIMIT))
1362
        && instructionlistId[ii].sourceoperands && code.category != 1) {
1363
        return false;  // immediate operand provided but not required
1364
    }
1365
    return true;
1366
}
1367
 
1368
// check if instruction fits into specified format
1369
bool CAssembler::jumpInstructionFits(SCode const & code, SCode & codeTemp, uint32_t ii) {
1370
    // code: structure defining all operands and options
1371
    // codeTemp: fitted code
1372
    // ii: index into instructionlistId
1373
    // formatIndex: index into formatList4
1374
 
1375
    //uint8_t offsetSize = 0;              // number of bytes to use in relative address
1376
    //uint8_t immediateSize = 0;           // number of bytes to use in immediate operand
1377
    bool offsetRelocated = false;        // relative offset needs relocation
1378
    //bool immediateRelocated = false;     // immediate operand needs relocation
1379
 
1380
    codeTemp = code;
1381
    codeTemp.category = code.formatp->category;
1382
    codeTemp.size = (code.formatp->format2 >> 8) & 3;
1383
    codeTemp.instr1 = ii;
1384
 
1385
    // check vector use
1386
    bool useVectors = (code.dtype & TYP_FLOAT) || (code.dest & 0xE0) == REG_V || (code.reg1 & 0xE0) == REG_V;
1387
    if (useVectors) {
1388
        if (!(code.formatp->vect)) return false;  // vectors not supported
1389
    }
1390
 
1391
    // operand type provided by this format
1392
    uint32_t formatOT = code.formatp->ot;
1393
    if (formatOT == 0) formatOT = code.dtype;  // operand type determined by OT field
1394
    formatOT &= 7;
1395
 
1396
    // check requested operand type
1397
    uint32_t requestOT = code.dtype & 7;
1398
    if (formatOT <= 3 && requestOT < formatOT && (code.dtype & TYP_PLUS)) {
1399
        requestOT = formatOT;  // request allows bigger type
1400
        codeTemp.dtype = formatOT;
1401
    }
1402
    if (requestOT != formatOT && code.dtype) return false;  // requested format type not supported
1403
 
1404
    // check if operand type supported by instruction
1405
    uint32_t optypessupport = useVectors ? (instructionlistId[ii].optypesscalar | instructionlistId[ii].optypesvector) : instructionlistId[ii].optypesgp;
1406
    optypessupport |= optypessupport >> 8;  // include types with optional support
1407
    if (!(optypessupport & (1 << requestOT))) return false;
1408
 
1409
    // check if there are enough register operands in this format
1410
    uint8_t opAvail = code.formatp->opAvail;
1411
    uint8_t numReg = ((opAvail >> 4) & 1) + ((opAvail >> 5) & 1) + ((opAvail >> 7) & 1); // number of registers available
1412
    uint8_t numReq = instructionlistId[ii].sourceoperands;  // number of registers required for this instruction
1413
    if ((code.etype & XPR_REG1) && code.dest && code.reg1 != code.dest && numReq > 2) {
1414
        numReq++;     // reg1 != destination, except if no reg2
1415
    }
1416
    if (code.formatp->jumpSize) numReq--;
1417
    if ((code.etype & (XPR_IMMEDIATE | XPR_MEM)) && numReq) numReq--;
1418
    if ((code.etype & XPR_INT2) && numReq) numReq--;
1419
    if (numReq > numReg) return false;  // not enough registers in this format
1420
 
1421
    // check if correct number of registers specified
1422
    uint8_t nReg = 0;
1423
    for (int j = 0; j < 3; j++) nReg += (code.etype & (XPR_REG1 << j)) != 0;
1424
    if (code.dest && code.dest != code.reg1) nReg++;
1425
    if (nReg != numReq) return false;
1426
 
1427
    // check if mask available
1428
    if ((code.etype & XPR_MASK) && !(fInstr->tmplate == 0xA || fInstr->tmplate == 0xE)) return false;
1429
 
1430
    // self-relative jump offset
1431
    if (code.etype & XPR_JUMPOS) {
1432
        if (!(code.formatp->jumpSize)) return false;
1433
        switch (code.formatp->jumpSize) {
1434
        case 0:  // no offset
1435
            if (code.offset_jump || offsetRelocated) return false;
1436
            break;
1437
        case 1:  // 1 byte
1438
            if (!(code.fitJump & IFIT_I8)) return false;
1439
            break;
1440
        case 2:  // 2 bytes
1441
            if (!(code.fitJump & IFIT_I16)) return false;
1442
            break;
1443
        case 3:  // 3 bytes
1444
            if (!(code.fitJump & IFIT_I24)) return false;
1445
            break;
1446
        case 4:  // 4 bytes
1447
            if (!(code.fitJump & IFIT_I32)) return false;
1448
            break;
1449
        }
1450
    }
1451
    else { // no self-relative jump offset
1452
        if (code.formatp->jumpSize) return false;
1453
    }
1454
 
1455
    if (instructionlistId[ii].opimmediate == OPI_IMPLICIT && !(code.etype & XPR_IMMEDIATE)) {
1456
        // There is no immediate operand. instructionlistId[ii] has an implicit immediate operand.
1457
        // Insert implicit operand and see if it fits
1458
        codeTemp.value.u = instructionlistId[ii].implicit_imm;
1459
        codeTemp.etype |= XPR_INT;
1460
        codeTemp.fitNum = 0xFFFFFFFF;
1461
    }
1462
 
1463
    // immediate operand
1464
    if (codeTemp.etype & XPR_IMMEDIATE) {
1465
        if (code.dtype & TYP_FLOAT) {
1466
            if (variant & VARIANT_I2) {
1467
                // immediate should be integer
1468
                codeTemp.etype = (code.etype & ~XPR_FLT) | XPR_INT;
1469
                codeTemp.value.i = (int64_t)code.value.d;
1470
                switch (code.formatp->immSize) {
1471
                case 0:  // no immediate
1472
                    return false;
1473
                case 1:  // 1 byte
1474
                    if (codeTemp.value.i < -0x80 || codeTemp.value.i > 0x7F) return false;
1475
                    break;
1476
                case 2:  // 2 bytes
1477
                    if (codeTemp.value.i < -0x8000 || codeTemp.value.i > 0x7FFF) return false;
1478
                    break;
1479
                case 4:  // 4 bytes
1480
                    if (-codeTemp.value.i > 0x80000000u || codeTemp.value.i > 0x7FFFFFFF) return false;
1481
                    break;
1482
                }
1483
            }
1484
            else {
1485
                // immediate is floating point or small integer converted to floating point
1486
                int fit = code.fitNum;
1487
                if ((code.dtype & 0xFF) <= (TYP_FLOAT32 & 0xFF)) fit |= FFIT_32;
1488
                switch (code.formatp->immSize) {
1489
                case 0:  // no immediate
1490
                    return false;
1491
                case 1:  // 1 byte
1492
                    if (!(fit & IFIT_I8)) return false;
1493
                    break;
1494
                case 2:  // 2 bytes
1495
                    if (!(fit & FFIT_16)) return false;
1496
                    break;
1497
                case 4:  // 4 bytes
1498
                    if (!(fit & FFIT_32)) return false;
1499
                    break;
1500
                case 8:  // 8 bytes., currently not supported
1501
                    ;
1502
                }
1503
            }
1504
        }
1505
        else {
1506
            // immediate integer operand
1507
            switch (code.formatp->immSize) {
1508
            case 0:  // no immediate
1509
                return false;
1510
            case 1:
1511
                if (codeTemp.fitNum & IFIT_I8) break;  // fits
1512
                if ((codeTemp.dtype & 0x1F) == (TYP_INT8 & 0x1F) && (codeTemp.fitNum & IFIT_U8)) break;  // 8 bit size fits unsigned with no sign extension
1513
                return false;
1514
            case 2:  // 2 bytes
1515
                if (instructionlistId[ii].opimmediate == OPI_INT1632) { // 16+32 bits
1516
                    if ((codeTemp.value.u >> 32) <= 0xFFFF) break;
1517
                    return false;
1518
                }
1519
                if (codeTemp.fitNum & IFIT_I16) break;  // fits
1520
                if ((codeTemp.dtype & 0x1F) == (TYP_INT16 & 0x1F) && (codeTemp.fitNum & IFIT_U16)) break;  // 16 bit size fits unsigned with no sign extension
1521
                return false;
1522
            case 4:  // 4 bytes
1523
                if (instructionlistId[ii].opimmediate == OPI_2INT16) { // 16+16 bits
1524
                    if (codeTemp.value.w <= 0xFFFF && (codeTemp.value.u >> 32) <= 0xFFFF) break;
1525
                    return false;
1526
                }
1527
                if (codeTemp.fitNum & IFIT_I32) break;  // fits
1528
                if ((codeTemp.dtype & 0x1F) == (TYP_INT32 & 0x1F) && (codeTemp.fitNum & IFIT_U32)) break;  // 32 bit size fits unsigned with no sign extension
1529
                return false;
1530
            case 8:  // 8 bytes
1531
                break;
1532
            default:  // does not fit other sizes
1533
                return false;
1534
            }
1535
        }
1536
    }
1537
    else {
1538
        // no explicit immediate
1539
        if (code.formatp->immSize && code.instruction != II_JUMP && code.instruction != II_CALL) return false;
1540
    }
1541
 
1542
    // memory operand
1543
    if (code.etype & XPR_MEM) {
1544
        if (code.formatp->mem == 0) return false;  // memory operand requested but not supported
1545
        uint32_t scale2 = formatOT;
1546
        if (scale2 > 4) scale2 -= 3;  // operand size = 1 << scale2
1547
        if (code.etype & XPR_SYM1) {  // has data symbol
1548
            if (code.etype & XPR_SYM2) {  // has difference between two symbols
1549
                codeTemp.sizeUnknown = 1;
1550
            }
1551
            if (!(code.fitAddr & IFIT_I32)) return false;  // assume symbol address requires 32 bits. local symbol difference resolved later when sizeUnknown = 1
1552
        }
1553
        // check index and scale factor
1554
        if (code.etype & XPR_INDEX) {
1555
            if (!(code.formatp->mem & 4)) return false;  // index not supported
1556
        }
1557
        else {  // no index requested
1558
            if (code.formatp->mem & 4) {
1559
                codeTemp.index = 0x1F;  // RT = 0x1F means no index
1560
                codeTemp.scale = 1 << scale2;
1561
            }
1562
        }
1563
 
1564
        // check address offset size
1565
        if (code.etype & XPR_OFFSET) {
1566
            if (!(code.formatp->mem & 0x10)) return false;  // format does not support memory offset
1567
            switch (code.formatp->addrSize) {
1568
            case 1:  // scale factor depends on operand size
1569
                if (code.offset_mem & ((1 << scale2) - 1)) return false;  // offset is not a multiple of the scale factor
1570
                if ((code.offset_mem >> scale2) < -0x80 || (code.offset_mem >> scale2) > 0x7F) return false;
1571
                break;
1572
            case 2:
1573
                if (!(code.fitAddr & IFIT_I16)) return false;
1574
                break;
1575
            case 4:
1576
                if (!(code.fitAddr & IFIT_I32)) return false;
1577
                break;
1578
            default:
1579
                return false;
1580
            }
1581
        }
1582
    }
1583
    else if (code.formatp->mem) return false;  // memory operand supported by not requested
1584
 
1585
    return true;
1586
}
1587
 
1588
 
1589
// Check code for correctness before fitting a format, and fix some code details
1590
void CAssembler::checkCode1(SCode & code) {
1591
 
1592
    // check code for correctness
1593
    if (code.etype & XPR_MEM) {
1594
        // check memory operand
1595
        bool useVectors = (code.dtype & TYP_FLOAT) != 0 || (code.dest & 0xE0) == REG_V || (code.reg1 & 0xE0) == REG_V;
1596
        if (useVectors && code.scale == -1) {
1597
            code.etype |= XPR_LENGTH;  code.length = code.index;  // index register is also length
1598
        }
1599
        int numOpt = ((code.etype & XPR_SCALAR) != 0) + ((code.etype & XPR_LENGTH) != 0) + ((code.etype & XPR_BROADC) != 0);
1600
        if (numOpt > 1) {errors.reportLine(ERR_CONFLICT_OPTIONS);  return;}  // conflicting options
1601
        if (numOpt && !useVectors && !(code.etype & XPR_SCALAR)) {errors.reportLine(ERR_VECTOR_OPTION);  return;}  // vector option on non-vector operands
1602
 
1603
        if (code.etype & XPR_INDEX) {
1604
            // check scale factor
1605
            const int dataSizeTable[8] = {1, 2, 4, 8, 16, 4, 8, 16}; // data size for each operant type
1606
            int8_t scale = code.scale;
1607
            if (scale != 1 && scale != -1 && scale != dataSizeTable[code.dtype & 7]) errors.reportLine(ERR_SCALE_FACTOR);
1608
            if (code.scale == -1 && code.length && code.length != code.index) {
1609
                errors.reportLine(ERR_NEG_INDEX_LENGTH);  return;
1610
            }
1611
        }
1612
        if (!(code.etype & XPR_BASE)) {
1613
            // no base pointer. check if there is a symbol with an implicit base pointer
1614
            int32_t symi1 = 0;
1615
            if (code.etype & XPR_SYM1) symi1 = findSymbol(code.sym1);
1616
            if ((code.etype & XPR_SYM2) || symi1 < 1 || !(symbols[symi1].st_other & STV_SECT_ATTR)) {
1617
                errors.reportLine(ERR_NO_BASE);
1618
            }
1619
        }
1620
    }
1621
    // check mask
1622
    if ((code.etype & XPR_MASK) && (code.mask & 0x1F) > 6) errors.reportLine(ERR_MASK_REGISTER);
1623
 
1624
    // check fallback
1625
    if (code.etype & XPR_MASK) {
1626
        if (code.fallback == 0) code.fallback = code.reg1 ? code.reg1 : 0x1F;  // default fallback is reg1, or 0 if no reg1
1627
        if ((code.fallback & 0xE0) == 0) code.fallback |= code.dest & 0xE0;    // get type of dest if fallback has no type
1628
    }
1629
 
1630
    // details for unsigned variants
1631
    if (code.dtype & TYP_UNS) {  // an unsigned type is specified  
1632
        switch (code.instruction) {
1633
        case II_DIV:  case II_DIV_EX:
1634
        case II_MUL_HI:  case II_MUL_EX:
1635
        case II_REM:  case II_SHIFT_RIGHT_S:
1636
        case II_MIN:  case II_MAX:
1637
            code.instruction |= 1;  // change to unsigned version
1638
            break;
1639
        default:;  // other instructions: do nothing
1640
        }
1641
    }
1642
 
1643
    // handle half precision
1644
    if (uint8_t(code.dtype) == uint8_t(TYP_FLOAT16)) {
1645
        switch (code.instruction) {
1646
        case II_ADD: case II_MUL: case II_DIV: case II_MUL_ADD:
1647
            code.instruction |= II_ADD_H & 0xFF000;    // change to half precision instruction
1648
            break;
1649
        case II_SUB:
1650
            if ((code.etype & XPR_IMMEDIATE) && !(code.etype & (XPR_MEM | XPR_REG2))) {
1651
                code.instruction = II_ADD_H; code.value.d = - code.value.d; // subtract constant changed to add -constant
1652
            }
1653
            else code.instruction = II_SUB_H;
1654
            break;
1655
        case II_SUB_REV:
1656
            if (code.value.i == 0) {   // -x
1657
                code.instruction = II_TOGGLE_BIT;
1658
                code.value.u = 15;
1659
            }
1660
            else errors.reportLine(ERR_WRONG_OPERANDS);
1661
            break;
1662
        case II_MOVE: case II_REPLACE: case II_REPLACE_EVEN: case II_REPLACE_ODD:
1663
            if (code.etype & XPR_INT) {   // convert integer to float16
1664
                if (abs(code.value.i) > 65504) errors.reportLine(ERR_OVERFLOW);
1665
                code.value.u = double2half(double(code.value.i));
1666
            }
1667
            else if (code.etype & XPR_FLT) {  // convert double to float16
1668
                if (code.value.d > 65504. || code.value.d < -65504.) errors.reportLine(ERR_OVERFLOW);
1669
                code.value.u = double2half(code.value.d);
1670
                code.etype = (code.etype & ~ XPR_IMMEDIATE) | XPR_INT;
1671
            }
1672
            if (code.instruction == II_SUB_H && (code.etype & XPR_IMMEDIATE)) {
1673
                code.value.w ^= 0x8000;
1674
                code.instruction &= ~1;  // convert sub_h constant to add_h -constant
1675
            }
1676
            code.dtype = TYP_INT16;
1677
            code.fitNum = IFIT_I16 | IFIT_I32;
1678
            break;
1679
        case II_STORE:
1680
            if (code.etype & XPR_INT) code.value.u = double2half(double(code.value.i));
1681
            else code.value.u = double2half(code.value.d);
1682
            code.dtype = TYP_INT16;
1683
            code.etype = (code.etype & ~ XPR_FLT) | XPR_INT;
1684
            break;
1685
        case II_ADD_H: case II_SUB_H: case II_MUL_H: case II_DIV_H: case II_MUL_ADD_H:
1686
            break;
1687
        default:
1688
            // no other instructions support half precision
1689
            errors.reportLine(ERR_WRONG_OPERANDS);
1690
        }
1691
    }
1692
 
1693
    // special case instructions 
1694
    switch (code.instruction) {
1695
    case II_STORE:
1696
        if ((code.dtype & TYP_FLOAT) && (code.etype & XPR_FLT) && !(code.reg1)) {
1697
            // store float constant
1698
          //  code.dtype = code.dtype + (TYP_INT32 - TYP_FLOAT32) | TYP_UNS;
1699
        }
1700
    }
1701
 
1702
    // check size needed for immediate operand and address
1703
    fitConstant(code);
1704
    fitAddress(code);
1705
 
1706
    if (code.instruction & II_JUMP_INSTR) {
1707
        // jump instruction 
1708
        code.category = 4;
1709
        // check register type
1710
        if (code.dtype && code.reg1) {
1711
            if ((code.dtype & 0xFF) <= (TYP_FLOAT16 & 0xFF)) { // must use g.p. registers
1712
                if (code.reg1 & REG_V) errors.reportLine(ERR_WRONG_REG_TYPE);
1713
            }
1714
            else {  // must use vector registers
1715
                if (code.reg1 & REG_R) errors.reportLine(ERR_WRONG_REG_TYPE);
1716
            }
1717
        }
1718
        // check if immediate operand too big
1719
        if (code.etype & XPR_IMMEDIATE) {
1720
            if (code.dtype & TYP_FLOAT) {
1721
                if ((code.dtype & 0xFF) >= (TYP_FLOAT64 & 0xFF) && !(code.fitNum & FFIT_32)) errors.reportLine(ERR_TOO_LARGE_FOR_JUMP);
1722
            }
1723
            else if (code.dtype & TYP_UNS) {
1724
                if ((code.dtype & 0x1F) >= (TYP_INT64 & 0x1F) && !(code.fitNum & IFIT_U32)) errors.reportLine(ERR_TOO_LARGE_FOR_JUMP);
1725
            }
1726
            else if ((code.dtype & 0x1F) >= (TYP_INT64 & 0x1F) && !(code.fitNum & IFIT_I32)) errors.reportLine(ERR_TOO_LARGE_FOR_JUMP);
1727
        }
1728
    }
1729
 
1730
    // optimize instruction
1731
    if (cmd.optiLevel) optimizeCode(code);
1732
}
1733
 
1734
 
1735
// Check register types etc. after fitting a format, and finish code details
1736
void CAssembler::checkCode2(SCode & code) {
1737
    if (code.instruction >= II_ALIGN) return;  // not an instruction
1738
 
1739
    // check type
1740
    if (code.dtype == 0) {
1741
        if ((code.etype & (XPR_INT | XPR_FLT | XPR_REG | XPR_REG1 | XPR_MEM)) && !(variant & (VARIANT_D0 | VARIANT_D2))) { // type not specified        
1742
            if (code.instruction == II_MOVE && code.category == 3 && !(code.etype & (XPR_IMMEDIATE | XPR_MEM))) {
1743
                // register-to-register move. find appropriate operand type
1744
                code.dtype = TYP_INT64;        // g.p. register. copy whole register ??
1745
                if (code.dest & REG_V) code.dtype = TYP_INT8;  // vector register. length must be divisible by tpe
1746
            }
1747
            else {
1748
                errors.reportLine(ERR_TYPE_MISSING);       // type must be specified
1749
                return;
1750
            }
1751
        }
1752
    }
1753
 
1754
    if (code.etype & XPR_MEM) {
1755
        // check memory operand
1756
        if (variant & VARIANT_M0) { // memory destination
1757
            if (code.etype & XPR_BROADC) {
1758
                errors.reportLine(ERR_DEST_BROADCAST); return;
1759
            }
1760
        }
1761
        if (code.base >= REG_R + 28 && code.base <= REG_R + 30 && (code.formatp->addrSize) > 1 && pass < 4) {
1762
            // cannot use r28 - r30 as base pointer with more than 8 bits offset
1763
            // (we don't get an error message here for a symbol address because the base pointer has not been assigned yet)
1764
            errors.reportLine(ERR_R28_30_BASE);
1765
        }
1766
        // check M1 option
1767
        /*if (variant & VARIANT_M1) {
1768
            if (code.formatp->tmplate == 0xE && (code.etype & XPR_MEM) && (code.etype & XPR_INT)
1769
                && (code.value.i > 63 || code.value.i < -63)) {
1770
                errors.reportLine(ERR_CONSTANT_TOO_LARGE);  return;
1771
            }
1772
            if (code.optionbits && (code.etype & XPR_MEM)) {
1773
                errors.reportLine(ERR_BOTH_MEM_AND_OPTIONS);  return;
1774
            }
1775
        }*/
1776
    }
1777
 
1778
    if (lineError) return;  // skip additional errors
1779
 
1780
    // Make list of operands from available operands. 0=none, 1=immediate, 2=memory, 5=RT, 6=RS, 7=RU, 8=RD
1781
    uint8_t opAvail = code.formatp->opAvail;    // Bit index of available operands
1782
    int j;                                        // loop counter
1783
 
1784
    // check if correct number of registers
1785
    uint32_t numReq = instructionlistId[code.instr1].sourceoperands;  // number of registers required for this instruction
1786
    if (code.category == 4 && (code.instruction & II_JUMP_INSTR) && (code.etype & XPR_JUMPOS) && numReq) numReq--;
1787
    if ((code.etype & XPR_IMMEDIATE) && numReq) numReq--;
1788
    if ((code.etype & XPR_INT2) && numReq) numReq--;
1789
    if ((code.etype & XPR_MEM) && !(variant & VARIANT_M0) && numReq) numReq--;
1790
 
1791
    uint32_t nReg = 0;
1792
    for (j = 0; j < 3; j++) nReg += (code.etype & (XPR_REG1 << j)) != 0;
1793
    if (nReg < numReq && !(variant & VARIANT_D3))
1794
        errors.reportLine(ERR_TOO_FEW_OPERANDS);
1795
    else if (nReg > numReq && instructionlistId[code.instr1].opimmediate != 25) {
1796
        errors.reportLine(ERR_TOO_MANY_OPERANDS);
1797
    }
1798
 
1799
    // count number of available registers in format
1800
    uint32_t regAvail = 0;
1801
    opAvail >>= 4;                            // register operands
1802
    while (opAvail) {
1803
        regAvail += opAvail & 1;
1804
        opAvail >>= 1;
1805
    }
1806
 
1807
    // expected register types
1808
    uint8_t regType = REG_R;
1809
    if ((code.formatp->vect & 1) || ((code.formatp->vect & 0x10) && (code.dtype & 4))) regType = REG_V;
1810
 
1811
    // check each of up to three source registers
1812
    for (j = 0; j < 3; j++) {
1813
        if (code.etype & (XPR_REG1 << j)) {  // register j used
1814
            if (variant & VARIANT_SPECS) {    // must be special register
1815
                if (((&code.reg1)[j] & 0xE0) <= REG_V) errors.reportLine(ERR_WRONG_REG_TYPE);
1816
            }
1817
            else if ((variant & (VARIANT_R1 << j))
1818
                || ((variant & VARIANT_RL) && (j == 2 || (&code.reg1)[j+1] == 0))) {
1819
                if (((&code.reg1)[j] & 0xE0) != REG_R) {  // this operand must be general purpose register
1820
                    errors.reportLine(ERR_WRONG_REG_TYPE);
1821
                }
1822
            }
1823
            else if (((&code.reg1)[j] & 0xE0) != regType) {  // wrong register type
1824
                errors.reportLine(ERR_WRONG_REG_TYPE);
1825
            }
1826
        }
1827
        if (lineError) return;  // skip additional errors
1828
    }
1829
    // check destination register
1830
    if (code.dest) {
1831
        if (variant & VARIANT_SPECD) {    // must be special register
1832
            if ((code.dest & 0xE0) <= REG_V) errors.reportLine(ERR_WRONG_REG_TYPE);
1833
        }
1834
        else if (variant & VARIANT_R0) {
1835
            if ((code.dest & 0xE0) != REG_R) {  // destination must be general purpose register
1836
                errors.reportLine(ERR_WRONG_REG_TYPE);
1837
            }
1838
        }
1839
        else if ((code.dest & 0xE0) != regType && code.dest != 2) {  // wrong register type
1840
            errors.reportLine(ERR_WRONG_REG_TYPE);
1841
        }
1842
        else if ((code.dest == 2) ^ ((variant & VARIANT_M0) != 0)) {  // operands in wrong order
1843
            errors.reportLine(ERR_OPERANDS_WRONG_ORDER);
1844
        }
1845
 
1846
        if (lineError) return;  // skip additional errors
1847
    }
1848
    if ((variant & (VARIANT_D0 | VARIANT_D1 | VARIANT_D2)) != 0 && code.dest != 0) {  // should not have destination        
1849
        errors.reportLine(ERR_NO_DESTINATION);
1850
    }
1851
    if ((variant & (VARIANT_D0 | VARIANT_D1)) == 0 && code.dest == 0) {  // should have destination
1852
        errors.reportLine(ERR_MISSING_DESTINATION);
1853
    }
1854
 
1855
    // check mask register
1856
    if ((code.etype & XPR_FALLBACK) && !(code.etype & XPR_MASK)) {   // fallback but no mask
1857
        code.mask = 7;                                               // no mask
1858
    }
1859
    if ((code.etype & (XPR_MASK | XPR_FALLBACK)) && (code.mask & 7) != 7) {  // mask used
1860
        if ((code.mask & 0xE0) != regType) {  // wrong type for mask register
1861
            errors.reportLine(ERR_WRONG_REG_TYPE);
1862
        }
1863
        else if ((code.fallback & 0xE0) != regType && (code.fallback & 0x1F) != 0x1F) {  // wrong type for fallback registser
1864
            if ((variant & VARIANT_RL) && code.fallback == code.reg1) {
1865
                // fallback has been assigned to reg1 in CAssembler::checkCode1, but reg1 is g.p. register
1866
                code.fallback = 0x5F;
1867
            }
1868
            else errors.reportLine(ERR_WRONG_REG_TYPE);
1869
        }
1870
        if ((code.etype & XPR_FALLBACK) && (variant & VARIANT_F0)) {  // cannot have fallback register
1871
            errors.reportLine(ERR_CANNOT_HAVEFALLBACK1);
1872
        }
1873
        // check if fallback is the right register
1874
        if (code.etype & XPR_FALLBACK) {
1875
            if (code.numOp >= 3 && code.fallback != code.reg1) {
1876
                errors.reportLine(ERR_3OP_AND_FALLBACK);
1877
            }
1878
        }
1879
    }
1880
 
1881
    // check scale factor
1882
    const int dataSizeTable[8] = { 1, 2, 4, 8, 16, 4, 8, 16 }; // data size for each operant type
1883
    int8_t scale = code.scale;
1884
    if (scale == 0) scale = 1;
1885
    if (((code.formatp->scale & 4) && scale != -1)     // scale must be -1
1886
        || (((code.formatp->scale & 6) == 2) && scale != dataSizeTable[code.dtype & 7]) // scale must match operand type
1887
        || (((code.formatp->scale & 6) == 0 && scale != 1 && (code.index & 0x1F) != 0x1F))) {                          // scale must be 1
1888
        errors.reportLine(ERR_SCALE_FACTOR);
1889
    }
1890
    // check vector length
1891
    int numOpt = ((code.etype & XPR_SCALAR) != 0) + ((code.etype & XPR_LENGTH) != 0) + ((code.etype & XPR_BROADC) != 0);
1892
    if (numOpt == 0 && (code.etype & XPR_MEM) && (code.formatp->vect & ~0x10) && !(code.etype & XPR_LIMIT) && !(code.formatp->vect & 0x80)) {
1893
        errors.reportLine(ERR_LENGTH_OPTION_MISS);  return;  // missing length option
1894
    }
1895
 
1896
    // check immediate type
1897
    if ((code.etype & XPR_FLT) && (variant & VARIANT_I2)) {
1898
        // immediate should be integer
1899
        code.etype = (code.etype & ~XPR_FLT) | XPR_INT;
1900
        //code.value.i = (int64_t)code.value.d;
1901
        code.value.i = value0;
1902
    }
1903
    if ((code.etype & XPR_INT) && !(code.etype & (XPR_LIMIT | XPR_INT2))) {
1904
        // check if value fits specified operand type
1905
        int ok = 1;
1906
        switch (code.dtype & 0x1F) {
1907
        case TYP_INT8 & 0x1F:
1908
            ok = code.fitNum & (IFIT_I8 | IFIT_U8);  break;
1909
        case TYP_INT16 & 0x1F:
1910
            ok = code.fitNum & (IFIT_I16 | IFIT_U16);  break;
1911
        case TYP_INT32 & 0x1F:
1912
            ok = code.fitNum & (IFIT_I32 | IFIT_U32);  break;
1913
        }
1914
        if (!ok && (instructionlistId[code.instr1].opimmediate & ~0x10) != OPI_INT32) {
1915
            errors.reportLine(ERR_CONSTANT_TOO_LARGE);
1916
        }
1917
    }
1918
 
1919
    // check options
1920
    if ((code.etype & XPR_OPTIONS) && !(variant & VARIANT_On) && code.formatp->category != 4) {
1921
        errors.reportLine(ERR_CANNOT_HAVE_OPTION);
1922
    }
1923
 
1924
    // details for unsigned variants
1925
    if (code.dtype & TYP_UNS) {  // an unsigned type is specified  
1926
        if ((variant & VARIANT_U3) && code.instruction == II_COMPARE && code.optionbits) code.optionbits |= 8;  // unsigned compare
1927
    }
1928
 
1929
    if (section) code.section = section;  // insert section
1930
}
1931
 
1932
 
1933
// find reason why no format fits, and return error number
1934
uint32_t CAssembler::checkCodeE(SCode & code) {
1935
    // check fallback
1936
    if ((code.etype & XPR_FALLBACK) && code.fallback != code.dest) {
1937
        if (((code.etype & XPR_MEM) && (code.dest & REG_V)) || code.index) return ERR_CANNOT_HAVEFALLBACK2;
1938
        if (instructionlistId[code.instr1].sourceoperands >= 3) return ERR_3OP_AND_FALLBACK;
1939
    }
1940
    // check three-operand instructions
1941
    if (instructionlistId[code.instr1].sourceoperands >= 3 && code.reg1 != code.dest && (code.etype & XPR_MEM) && ((code.dest & REG_V) || code.index)) {
1942
        return ERR_3OP_AND_MEM;
1943
    }
1944
    return ERR_NO_INSTRUCTION_FIT;  // any other reason
1945
}
1946
 
1947
 
1948
// optimize instruction. replace by more efficient instruction if possible
1949
void CAssembler::optimizeCode(SCode & code) {
1950
 
1951
    // is it a vector instruction?
1952
    bool hasVector = ((code.dest | code.reg1) & REG_V) != 0;
1953
 
1954
    // is it a floating point instruction?
1955
    bool isFloat = (code.dtype & TYP_FLOAT) != 0;
1956
 
1957
    if (code.instruction & II_JUMP_INSTR) {
1958
        // jump instruction
1959
        // optimize immediate jump offset operand
1960
        if ((code.instruction & 0xFF) == II_SUB && (code.etype & XPR_IMMEDIATE) == XPR_INT
1961
            && code.value.i >= -0x7F && code.value.i <= 0x80  && cmd.optiLevel
1962
            && ((code.dtype & 0xFF) == (TYP_INT32 & 0xFF) || ((code.dtype & 0xFF) <= (TYP_INT32 & 0xFF) && (code.dtype & TYP_PLUS)))) {
1963
            // subtract with conditional jump with 8-bit immediate and 8-bit address 
1964
            // should be replaced by addition of the negative constant
1965
            int32_t isym = 0;
1966
            if (code.etype & XPR_SYM1) isym = findSymbol(code.sym1);
1967
            if (isym <= 0 || symbols[isym].st_section == section || code_size <= (1 << 9)) {
1968
                // we are not sure yet, but chances are good that the address fits an 8-bit field. Replace sub by add
1969
                code.value.i = -code.value.i;       // change sign of immediate constant
1970
                code.instruction ^= (II_SUB ^ II_ADD);  // replace sub with add
1971
                if ((code.instruction & 0xFFFF00) == II_JUMP_CARRY) code.instruction ^= 0x100;  // carry condition is inverted
1972
            }
1973
        }
1974
        if ((code.fitNum & (IFIT_J16 | IFIT_J32) && (code.etype & XPR_IMMEDIATE) == XPR_INT && (code.instruction & 0xFE) == II_ADD)) {
1975
            // replace add with sub or vice versa
1976
            code.value.i = -code.value.i;       // change sign of immediate constant
1977
            code.instruction ^= (II_SUB ^ II_ADD);
1978
            if ((code.instruction & 0xFFFF00) == II_JUMP_CARRY) code.instruction ^= 0x100;  // carry condition is inverted
1979
            code.fitNum |= (code.fitNum & IFIT_J) >> 1;  // signal that it fits
1980
        }
1981
    }
1982
    else { // other instruction. optimize immediate operand
1983
        if ((code.etype & XPR_INT) /* && !(code.etype & (XPR_OFFSET | XPR_LIMIT | XPR_SYM1))*/ ) {
1984
            if ((code.instruction & 0xFFFFFFFE) == II_ADD && (code.fitNum & IFIT_J8) != 0) {
1985
                // we can make the instruction smaller by changing the sign of the constant and exchange add and sub 
1986
                // (we don't have to do this for 0x8000 and 0x80000000 because the can be fitted as 1 << x)
1987
                code.instruction ^= (II_ADD ^ II_SUB);  // replace add with sub or vice versa
1988
                code.value.i = -code.value.i;       // change sign of immediate constant
1989
                code.fitNum |= (code.fitNum & IFIT_J) >> 1;  // signal that it fits
1990
            }
1991
            else if (code.instruction == II_SUB && (code.fitNum & (IFIT_I16SH16 | IFIT_I16)) && !(code.fitNum & IFIT_I8)
1992
                && code.value.w != 0x80000000U && code.value.w != 0xFFFF8000U && code.dest == code.reg1 && !hasVector
1993
                && (((uint8_t)code.dtype == (uint8_t)TYP_INT32) || (((uint8_t)code.dtype < (uint8_t)TYP_INT32) && (code.dtype & TYP_PLUS)))) {
1994
                code.instruction = II_ADD;          // replace sub with add
1995
                code.value.i = -code.value.i;       // change sign of immediate constant
1996
            }
1997
            else if (code.instruction == II_SUB && (code.fitNum & IFIT_I8SHIFT) && !(code.fitNum & IFIT_I8) && !isFloat
1998
                && code.dest == code.reg1
1999
                && (((uint8_t)code.dtype >= (uint8_t)TYP_INT32) || (code.dtype & TYP_PLUS))) {
2000
                code.instruction = II_ADD;          // replace sub with add
2001
                code.value.i = -code.value.i;       // change sign of immediate constant
2002
                code.fitNum &= ~(IFIT_I16 | IFIT_I16SH16 | IFIT_I32SH32);
2003
            }
2004
            else if (code.instruction == II_SUB && (code.fitNum & IFIT_I32SH32) && !(code.fitNum & (IFIT_I16SHIFT | IFIT_I32))
2005
                && (((uint8_t)code.dtype == (uint8_t)TYP_INT64) || (code.dtype & TYP_PLUS)) && !isFloat) {
2006
                code.instruction = II_ADD;          // replace sub with add
2007
                code.value.i = -code.value.i;       // change sign of immediate constant
2008
            }
2009
            else if ((code.instruction == II_MOVE || code.instruction == II_AND)
2010
                && (code.fitNum & IFIT_U32) && !(code.fitNum & (IFIT_I32 | IFIT_I16SHIFT))
2011
                && ((uint8_t)code.dtype == (uint8_t)TYP_INT64) && !hasVector) {
2012
                code.dtype = TYP_INT32;             // changing type to int32 will zero extend
2013
            }
2014
            /*else if (code.instruction == II_MOVE
2015
                && (code.fitNum & IFIT_U16) && !(code.fitNum & IFIT_I16)
2016
                && ((uint8_t)code.dtype >= (uint8_t)TYP_INT32) && !hasVector
2017
                && !(code.etype & (XPR_REG | XPR_MEM | XPR_OPTION | XPR_SYM1))) {
2018
                    code.instruction = II_MOVE_U;
2019
                    code.dtype = TYP_INT64;
2020
            } */
2021
            else if (code.instruction == II_OR && (code.value.u & (code.value.u-1)) == 0 && !(code.fitNum & IFIT_I8)) {
2022
                code.instruction = II_SET_BIT;                  // OR with a power of 2
2023
                code.value.u = bitScanReverse(code.value.u);
2024
                code.fitNum = IFIT_I8 | IFIT_I16 | IFIT_I32;
2025
            }
2026
            else if (code.instruction == II_AND && (~code.value.u & (~code.value.u-1)) == 0 && !(code.fitNum & IFIT_I8)) {
2027
                code.instruction = II_CLEAR_BIT;                // AND with ~(a power of 2)
2028
                code.value.u = bitScanReverse(~code.value.u);
2029
                code.fitNum = IFIT_I8 | IFIT_I16 | IFIT_I32;
2030
            }
2031
            else if (code.instruction == II_XOR && (code.value.u & (code.value.u-1)) == 0 && !(code.fitNum & IFIT_I8)) {
2032
                code.instruction = II_TOGGLE_BIT;               // XOR with a power of 2
2033
                code.value.u = bitScanReverse(code.value.u);
2034
                code.fitNum = IFIT_I8 | IFIT_I16 | IFIT_I32;
2035
            }
2036
        }
2037
        if ((code.etype & XPR_FLT) && !(code.etype & (XPR_OFFSET | XPR_LIMIT | XPR_SYM1))) {
2038
            if (code.instruction == II_SUB && (code.fitNum & FFIT_16) && (uint8_t)code.dtype >= (uint8_t)TYP_FLOAT16) {
2039
                code.instruction = II_ADD;          // replace sub with add
2040
                code.value.d = -code.value.d;       // change sign of immediate constant
2041
            }
2042
        }
2043
    }
2044
 
2045
    // optimize -float as toggle_bit
2046
    if (code.instruction == II_SUB_REV && (code.etype & XPR_IMMEDIATE) && (code.dtype & TYP_FLOAT)
2047
    && code.value.i == 0 && (code.etype & XPR_REG1) && !(code.etype & XPR_REG2)) {
2048
        // code is -v represented as 0-v. replace by flipping bit
2049
        uint32_t bits = 1 << (code.dtype & 7);  // number of bits in floating point type
2050
        code.instruction = II_TOGGLE_BIT;
2051
        code.value.u = bits - 1;
2052
        code.etype = ((code.etype & ~XPR_IMMEDIATE) | XPR_INT);
2053
    }
2054
 
2055
    // optimize multiply and divide instructions
2056
    if ((code.instruction == II_MUL || code.instruction == II_DIV) && (code.etype & XPR_IMMEDIATE)) {
2057
        if (code.dtype & TYP_INT) { // integer multiplication
2058
            // check if constant is positive and a power of 2
2059
            if (code.value.i <= 0 || (code.value.u & (code.value.u - 1))) return;
2060
            if (code.instruction == II_MUL) {
2061
                // integer multiplication by power of 2. replace by left shift
2062
                code.instruction = II_SHIFT_LEFT;
2063
                code.value.u = bitScanReverse(code.value.u);
2064
            }
2065
            else if (code.dtype & TYP_UNS) {
2066
                // unsigned division by power of 2. replace by right shift
2067
                // We are not optimizing signed division because this requires multiple instructions and registers
2068
                code.instruction = II_SHIFT_RIGHT_U;
2069
                code.value.u = bitScanReverse(code.value.u);
2070
            }
2071
        }
2072
        else if (code.dtype & TYP_FLOAT) {
2073
            // floating point multiplication or division
2074
            // check if constant is a power of 2
2075
            int shiftCount = 0xFFFFFFFF;          // shift count to replace multiplication by power of 2
2076
            if ((code.etype & XPR_INT) && code.value.i > 0 && (code.value.u & (code.value.u-1)) == 0) {
2077
                // positive integer power of 2
2078
                shiftCount = bitScanReverse(code.value.u);
2079
                if (code.instruction == II_DIV) shiftCount = -shiftCount;
2080
            }
2081
            else if ((code.etype & XPR_FLT) && code.value.d != 0.) {
2082
                int32_t exponent = (code.value.u >> 52) & 0x7FF;  // exponent field of double
2083
                if ((code.value.u & ((uint64_t(1) << 52) - 1)) == 0 && exponent != 0 && exponent != 0x7FF) {
2084
                    // value is a power of 2, and not inf, nan, or subnormal
2085
                    shiftCount = exponent - 0x3FF;
2086
                    if (code.instruction == II_DIV) shiftCount = -shiftCount;
2087
                }
2088
            }
2089
            if (shiftCount == (int)0xFFFFFFFF) return;  // not a power of 2. cannot optimize
2090
            if (shiftCount >= 0 || cmd.optiLevel >= 3) {
2091
                // replace by mul_2pow instruction
2092
                // use negative powers of 2 only in optimization level 3, because subnormals are ignored
2093
                code.instruction = II_MUL_2POW;
2094
                code.value.i = shiftCount;
2095
                code.etype = (code.etype & ~XPR_IMMEDIATE) | XPR_INT;
2096
            }
2097
            else if (code.instruction == II_DIV) {
2098
                // replace division by power of 2 to multiplication by the reciprocal
2099
                code.instruction = II_MUL;
2100
                if (code.etype & XPR_FLT) code.value.d = 1. / code.value.d;
2101
                else {
2102
                    code.value.d = 1. / double((uint64_t)1 << (-shiftCount));
2103
                    code.etype = (code.etype & ~XPR_IMMEDIATE) | XPR_FLT;
2104
                }
2105
            }
2106
        }
2107
    }
2108
}
2109
 
2110
 
2111
void insertMem(SCode & code, SExpression & expr) {
2112
    // insert memory operand into code structure
2113
    if (code.value.i && expr.value.i) code.etype |= XPR_ERROR; // both have constants
2114
    if (expr.etype & XPR_OFFSET) code.offset_mem = expr.offset_mem;
2115
    else code.offset_mem = expr.value.w;
2116
    code.etype |= expr.etype;
2117
    code.tokens += expr.tokens;
2118
    code.sym1 = expr.sym1;
2119
    code.sym2 = expr.sym2;
2120
    code.base = expr.base;
2121
    code.index = expr.index;
2122
    code.length = expr.length;
2123
    code.scale = expr.scale;
2124
    code.symscale1 = expr.symscale1;
2125
    code.mask |= expr.mask;
2126
    code.fallback |= expr.fallback;
2127
}
2128
 
2129
void insertAll(SCode & code, SExpression & expr) {
2130
    // insert everything from expression to code structure, OR'ing all bits
2131
    for (uint32_t i = 0; i < sizeof(SExpression) / sizeof(uint64_t); i++) {
2132
        (&code.value.u)[i] |= (&expr.value.u)[i];
2133
    }
2134
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.