OpenCores
URL https://opencores.org/ocsvn/forwardcom/forwardcom/trunk

Subversion Repositories forwardcom

[/] [forwardcom/] [bintools/] [assem.h] - Blame information for rev 163

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 65 Agner
/****************************    assem.h    ***********************************
2
* Author:        Agner Fog
3
* Date created:  2017-04-17
4
* Last modified: 2021-05-26
5
* Version:       1.11
6
* Project:       Binary tools for ForwardCom instruction set
7
* Module:        assem.h
8
* Description:
9
* Header file for assembler
10
*
11
* Copyright 2017-2021 GNU General Public License http://www.gnu.org/licenses
12
*****************************************************************************/
13
#pragma once
14
 
15
// Define token types:
16
const int  TOK_EOF = 1;                // end of file
17
const int  TOK_NAM = 2;                // unidentified name
18
const int  TOK_LAB = 3;                // code label or function name
19
const int  TOK_VAR = 4;                // data label
20
const int  TOK_SEC = 5;                // section name
21
const int  TOK_INS = 6;                // instruction name
22
const int  TOK_OPR = 7;                // operator
23
const int  TOK_NUM = 8;                // integer number
24
const int  TOK_FLT = (TOK_NUM + 1);    // floating point number
25
const int  TOK_CHA = 0x0A;             // character or string in single quotes ' '
26
const int  TOK_STR = 0x0B;             // string in double quotes " "
27
const int  TOK_DIR = 0x11;             // section or function directive
28
const int  TOK_ATT = 0x12;             // attribute of sections, functions, or symbols. also some keywords
29
const int  TOK_TYP = 0x14;             // type identifier
30
const int  TOK_OPT = 0x15;             // options of instructions and operands
31
const int  TOK_REG = 0x16;             // register name
32
const int  TOK_SYM = 0x1A;             // symbol, constant, variable, function. id = symbol index
33
const int  TOK_XPR = 0x1B;             // expression. id = expression index
34
const int  TOK_HLL = 0x20;             // high level language block (if, for, etc.)
35
const int  TOK_ERR = 0x80;             // error. illegal character or unmatched quote
36
 
37
// Define keyword id's:
38
 
39
// Directives
40
const int DIR_SECTION  = ((TOK_DIR << 24) + 1);
41
const int DIR_FUNCTION = ((TOK_DIR << 24) + 2);
42
const int DIR_END      = ((TOK_DIR << 24) + 4);
43
const int DIR_PUBLIC   = ((TOK_DIR << 24) + 8);
44
const int DIR_EXTERN   = ((TOK_DIR << 24) + 0x10);
45
 
46
// Attributes of sections
47
const int ATT_READ     = ((TOK_ATT << 24) + SHF_READ);
48
const int ATT_WRITE    = ((TOK_ATT << 24) + SHF_WRITE);
49
const int ATT_EXEC     = ((TOK_ATT << 24) + SHF_EXEC);
50
const int ATT_ALIGN    = ((TOK_ATT << 24) + 0x10);
51
const int SECTION_LOCAL_VAR = 0xFFFFFFFF;                       // local constant with no section
52
 
53
// Attributes of variables, constants and functions
54
const int ATT_WEAK     = ((TOK_ATT << 24) + 0x20);              // weak public or weak external symbol
55
const int ATT_REGUSE   = ((TOK_ATT << 24) + 0x21);              // register use of function
56
const int ATT_CONSTANT = ((TOK_ATT << 24) + 0x10000);           // used for external constants
57
const int ATT_UNINIT   = ((TOK_ATT << 24) + 0x20000);           // uninitialized section (BSS)
58
const int ATT_COMDAT   = ((TOK_ATT << 24) + 0x40000);           // communal section. duplicates and unreferenced sections are removed
59
const int ATT_EXCEPTION= ((TOK_ATT << 24) + SHF_EXCEPTION_HND); // exception handler info
60
const int ATT_EVENT    = ((TOK_ATT << 24) + SHF_EVENT_HND);     // event handler info
61
const int ATT_DEBUG    = ((TOK_ATT << 24) + SHF_DEBUG_INFO);    // debug info
62
const int ATT_COMMENT  = ((TOK_ATT << 24) + SHF_COMMENT);       // comments
63
 
64
// Type definitions
65
const int TYP_INT8     = ((TOK_TYP << 24) + 0x10);
66
const int TYP_INT16    = ((TOK_TYP << 24) + 0x11);
67
const int TYP_INT32    = ((TOK_TYP << 24) + 0x12);
68
const int TYP_INT64    = ((TOK_TYP << 24) + 0x13);
69
const int TYP_INT128   = ((TOK_TYP << 24) + 0x14);
70
const int TYP_UNS                         = 0x20;   // add this for unsigned integer types
71
const int TYP_PLUS                       = 0x100;   // add this larger type allowed
72
const int TYP_FLOAT16  = ((TOK_TYP << 24) + 0x44);
73
const int TYP_FLOAT32  = ((TOK_TYP << 24) + 0x45);
74
const int TYP_FLOAT64  = ((TOK_TYP << 24) + 0x46);
75
const int TYP_FLOAT128 = ((TOK_TYP << 24) + 0x47);
76
const int TYP_INT      =                    0x10;    // generic test for int types
77
const int TYP_FLOAT    =                    0x40;    // generic test for float types
78
const int TYP_STRING   = ((TOK_TYP << 24) + 0x18);
79
 
80
// Options and attributes of instructions
81
const int OPT_MASK     = ((TOK_OPT << 24) + 1);
82
const int OPT_FALLBACK = ((TOK_OPT << 24) + 2);
83
const int OPT_LENGTH   = ((TOK_OPT << 24) + 3);
84
const int OPT_BROADCAST= ((TOK_OPT << 24) + 4);
85
const int OPT_LIMIT    = ((TOK_OPT << 24) + 5);
86
const int OPT_SCALAR   = ((TOK_OPT << 24) + 6);
87
const int OPT_OPTIONS  = ((TOK_OPT << 24) + 7);
88
 
89
// Register types
90
const int REG_R        =  0x20;        // general purpose register
91
const int REG_V        =  0x40;        // vector register
92
const int REG_SPEC     =  0x60;        // special register, accessed with read_spec and write_spec instructions
93
const int REG_CAPAB    =  0x80;        // capabilities register, accessed with read_capabilities
94
const int REG_PERF     =  0xA0;        // performance counter, accessed with read_perf
95
const int REG_SYS      =  0xC0;        // system register, accessed with read_sys and write_sys
96
const int REG_OTHER    = 0x100;        // other register, unclassified
97
 
98
// ID for special registers:
99
// bit 0-4   is the id used when reading or writing the register
100
// bit 5-7   indicate the type of register
101
// bit 16-20 is the id when the register is used as base pointer
102
// bit 24-31 is token type
103
const int REG_NUMCONTR = ((TOK_REG << 24) +                REG_SPEC + 0);    // numeric control register, default flag
104
const int REG_THREADP  = ((TOK_REG << 24) + (0x1C << 16) + REG_SPEC + 1);    // thread data pointer
105
const int REG_DATAP    = ((TOK_REG << 24) + (0x1D << 16) + REG_SPEC + 2);    // data section pointer
106
const int REG_IP       = ((TOK_REG << 24) + (0x1E << 16) + REG_OTHER   );    // instruction pointer, changed by jump instructions
107
const int REG_SP       = ((TOK_REG << 24) + (0x1F << 16) + REG_R + 0x1F);    // stack pointer
108
 
109
// high level language directives
110
const int HLL_IF       = ((TOK_HLL << 24) + 1);
111
const int HLL_ELSE     = ((TOK_HLL << 24) + 2);
112
const int HLL_SWITCH   = ((TOK_HLL << 24) + 3);
113
const int HLL_CASE     = ((TOK_HLL << 24) + 4);
114
const int HLL_FOR      = ((TOK_HLL << 24) + 5);
115
const int HLL_IN       = ((TOK_HLL << 24) + 6);
116
const int HLL_NOCHECK  = ((TOK_HLL << 24) + 7);
117
const int HLL_WHILE    = ((TOK_HLL << 24) + 8);
118
const int HLL_DO       = ((TOK_HLL << 24) + 9);
119
const int HLL_BREAK    = ((TOK_HLL << 24) + 10);
120
const int HLL_CONTINUE = ((TOK_HLL << 24) + 11);
121
 
122
const int HLL_FALSE    = ((TOK_HLL << 24) + 20);
123
const int HLL_TRUE     = ((TOK_HLL << 24) + 21);
124
 
125
// push and pop may be replaced by macros later:
126
const int HLL_PUSH     = ((TOK_HLL << 24) + 12);
127
const int HLL_POP      = ((TOK_HLL << 24) + 13);
128
 
129
// line types
130
const int LINE_DATADEF =    1;                   // data definition
131
const int LINE_CODEDEF =    2;                   // code instruction
132
const int LINE_PUBLICDEF =  3;                   // public symbol definition
133
const int LINE_METADEF =    4;                   // assemble-time definitions and metaprogramming
134
const int LINE_OPTIONS =    5;                   // option setting
135
const int LINE_FUNCTION= 0x11;                   // function definition
136
const int LINE_SECTION = 0x12;                   // section definition
137
const int LINE_ENDDIR  = 0x10;                   // function or section end
138
const int LINE_ERROR   = 0xFF;                   // error detected in this line
139
 
140
// Operator id's are equal to the ASCII code with these additions:
141
const int EQ = 0x100;                  // operator followed by equal sign, e.g. +=
142
const int D2 = 0x200;                  // operator repeated, e.g. <<
143
const int D3 = 0x400;                  // operator triple, e.g. >>>
144
const int OP_UNS = 0x1000;             // unsigned operation
145
 
146
// SExpression types in .etype
147
const int XPR_INT           = 0x01;    // contains integer value
148
const int XPR_INT2          = 0x02;    // contains a second integer constant in the upper half of value
149
const int XPR_FLT           = 0x04;    // contains floating point value
150
const int XPR_IMMEDIATE     = 0x07;    // contains immediate constant
151
const int XPR_STRING        = 0x08;    // contains string (u = string buffer entry, sym2 = length)
152
const int XPR_REG           = 0x10;    // contains register operand
153
const int XPR_OP            = 0x20;    // contains instruction or operator
154
const int XPR_OPTION        = 0x80;    // contains option keyword for memory operand
155
const int XPR_MEM          = 0x100;    // contains memory operand, or part of it
156
const int XPR_SYM1         = 0x200;    // contains symbol address
157
const int XPR_SYM2         = 0x400;    // contains reference symbol address
158
const int XPR_SYMSCALE     = 0x800;    // contains scale factor on (sym1-sym2)
159
const int XPR_REG1        = 0x1000;    // contains first register operand
160
const int XPR_REG2        = 0x2000;    // contains second register operand
161
const int XPR_REG3        = 0x4000;    // contains third register operand in value.u
162
const int XPR_BASE        = 0x8000;    // contains base register
163
const int XPR_INDEX      = 0x10000;    // contains index register and scale factor
164
const int XPR_OFFSET     = 0x20000;    // contains memory address offset
165
const int XPR_LIMIT      = 0x40000;    // contains limit for index
166
const int XPR_SCALAR     = 0x80000;    // contains scalar memory operand without broadcast
167
const int XPR_LENGTH    = 0x100000;    // contains vector length register
168
const int XPR_BROADC    = 0x200000;    // contains vector broadcast length register
169
const int XPR_MASK      = 0x400000;    // contains mask register
170
const int XPR_FALLBACK  = 0x800000;    // contains fallback register
171
const int XPR_OPTIONS  = 0x1000000;    // contains options or signbits in IM3
172
const int XPR_JUMPOS   = 0x2000000;    // contains self-relative jump offset
173
const int XPR_TYPENAME =0x10000000;    // contains type name in value
174
const int XPR_UNRESOLV =0x40000000;    // contains unresolved name or value
175
const int XPR_ERROR    =0x80000000;    // an error occurred during the generation
176
 
177
// Instruction id's
178
const uint32_t II_NOP            =  0x30000;
179
const uint32_t II_STORE          =        1;
180
const uint32_t II_MOVE           =        2;
181
const uint32_t II_COMPARE        =        7;
182
const uint32_t II_ADD            =        8;
183
const uint32_t II_SUB            =        9;
184
const uint32_t II_SUB_REV        =       10;
185
const uint32_t II_MUL            =       11;
186
const uint32_t II_MUL_HI         =       12;
187
const uint32_t II_MUL_EX         =  0x1201A;
188
const uint32_t II_DIV            =       14;
189
const uint32_t II_DIV_U          =       15; // all unsigned variants must be signed variant | 1
190
const uint32_t II_DIV_REV        =       16;
191
const uint32_t II_DIV_EX         =  0x12018;
192
const uint32_t II_REM            =       18;
193
const uint32_t II_REM_U          =       19;
194
const uint32_t II_MIN            =       20;
195
const uint32_t II_MIN_U          =       21;
196
const uint32_t II_MAX            =       22;
197
const uint32_t II_MAX_U          =       23;
198
const uint32_t II_AND            =       26;
199
const uint32_t II_OR             =       27;
200
const uint32_t II_XOR            =       28;
201
const uint32_t II_SHIFT_LEFT     =       32;
202
const uint32_t II_MUL_2POW       =       32;
203
const uint32_t II_ROTATE         =       33;
204
const uint32_t II_SHIFT_RIGHT_S  =       34;
205
const uint32_t II_SHIFT_RIGHT_U  =       35;  // must be = II_SHIFT_RIGHT_S | 1
206
const uint32_t II_CLEAR_BIT      =       36;
207
const uint32_t II_SET_BIT        =       37;
208
const uint32_t II_TOGGLE_BIT     =       38;
209
const uint32_t II_TEST_BIT       =       39;
210
const uint32_t II_TEST_BITS_AND  =       40;
211
const uint32_t II_TEST_BITS_OR   =       41;
212
const uint32_t II_MUL_ADD        =       49;
213
const uint32_t II_MUL_ADD2       =       50;
214
const uint32_t II_ADD_ADD        =       51;
215
const uint32_t II_SELECT_BITS    =       52;
216
const uint32_t II_FUNNEL_SHIFT   =       53;
217
const uint32_t II_SHIFT_U_ADD    =   0x0101;
218
//const uint32_t II_MOVE_U         =   0x11001;
219
const uint32_t II_ADD_H          =  0x50008;  // float16
220
const uint32_t II_SUB_H          =  0x50009;  // float16
221
const uint32_t II_MUL_H          =  0x5000B;  // float16
222
const uint32_t II_DIV_H          =  0x50010;  // float16
223
const uint32_t II_MUL_ADD_H      =  0x50031;  // float16
224
const uint32_t II_PUSH           =  0x18038;
225
const uint32_t II_POP            =  0x18039;
226
const uint32_t II_REPLACE        =  0xA0001;
227
const uint32_t II_REPLACE_EVEN   =  0x26004;
228
const uint32_t II_REPLACE_ODD    =  0x26005;
229
const uint32_t II_ADDRESS        =  0x29020;
230
 
231
// constants for jump and branch instrucions. May be combined with II_ADD, II_SUB, II_COMPARE, etc. 
232
const uint32_t II_INCREMENT      =   0x0051;  // increment. combine with II_JUMP_POSITIVE
233
const uint32_t II_SUB_MAXLEN     =   0x0052;  // subtract max vector length. combine with II_JUMP_POSITIVE
234
const uint32_t II_FP_CATEGORY    =   0x0054;  // fp_category. combine with II_JUMP_TRUE
235
 
236
const uint32_t II_JUMP           = 0x101000;  // jump codes may be combined with II_ADD etc.
237
const uint32_t II_JUMP_ZERO      = 0x101200;  // xor with 0x100 for opposite condition
238
const uint32_t II_JUMP_NOTZERO   = 0x101300;  // not zero or not equal
239
const uint32_t II_JUMP_NEGATIVE  = 0x101400;  // negative or signed below
240
const uint32_t II_JUMP_POSITIVE  = 0x101600;  // positive or signed above
241
const uint32_t II_JUMP_OVERFLOW  = 0x101800;  // signed overflow
242
const uint32_t II_JUMP_CARRY     = 0x102000;  // carry, borrow, unsigned below, abs below. Reverse condition if 'sub n' replaced by 'add (-n)'
243
const uint32_t II_JUMP_UBELOW    = 0x102000;  // carry, borrow, unsigned below, abs below. Reverse condition if 'sub n' replaced by 'add (-n)'
244
const uint32_t II_JUMP_UABOVE    = 0x102200;  // unsigned above, abs above
245
const uint32_t II_JUMP_TRUE      = 0x102400;  // bit test etc. true
246
const uint32_t II_JUMP_FALSE     = 0x102500;  // bit test etc. false
247
const uint32_t II_JUMP_INVERT    =   0x0100;  // flip this bit to invert condition
248
const uint32_t II_JUMP_UNORDERED =   0x8000;  // flip this bit to jump if unordered
249
const uint32_t II_JUMP_INSTR     = 0x100000;  // bit to identify direct jump and call instructions
250
const uint32_t II_INCREMENT_COMPARE_JBELOW = 48; // opj for increment_compare_jump_below
251
const uint32_t II_CALL           = 0x111000;  // direct call
252
 
253
const uint32_t II_ALIGN        = 0x10000000;  // align directive
254
const uint32_t II_OPTIONS      = 0x20000000;  // options directive
255
 
256
const int MAX_ALIGN              =     4096;  // maximum allowed alignment  (note: if changed, change also in error.cpp at ERR_ALIGNMENT)
257
 
258
// Bit values generated by fitConstant() and stored in SCode::fitNumX
259
// Indicates how many bits are needed to contain address offset or immediate constant of an instruction
260
const int IFIT_I8        =     0x10;  // fits into signed 8-bit integer
261
const int IFIT_J8        =     0x20;  // (-x) fits into signed 8-bit integer
262
const int IFIT_U8        =     0x40;  // x fits into unsigned 8-bit integer
263
const int IFIT_I8SHIFT   =     0x80;  // fits into signed 8-bit integer with left shift
264
const int IFIT_I16       =    0x100;  // fits into signed 16-bit integer
265
const int IFIT_J16       =    0x200;  // (-x) fits into signed 16-bit integer
266
const int IFIT_U16       =    0x400;  // fits into unsigned 16-bit integer
267
const int IFIT_I16SHIFT  =    0x800;  // fits into signed 16-bit integer with left shift
268
const int IFIT_I16SH16   =   0x1000;  // fits into signed 16-bit integer shifted left by 16
269
const int IFIT_I24       =   0x4000;  // fits into signed 24-bit signed integer
270
const int IFIT_I32       =  0x10000;  // fits into signed 32-bit integer
271
const int IFIT_J32       =  0x20000;  // (-x) fits into signed 32-bit integer
272
const int IFIT_U32       =  0x40000;  // fits into unsigned 32-bit integer
273
const int IFIT_I32SHIFT  =  0x80000;  // fits into signed 32-bit integer with left shift
274
const int IFIT_I32SH32   = 0x100000;  // fits into 32-bit integer shifted left by 32
275
const int IFIT_J         =  (IFIT_J8 | IFIT_J16 | IFIT_J32); // (-x) fits better than x
276
const int FFIT_16       = 0x1000000;  // fits into normal half precision
277
const int FFIT_32       = 0x2000000;  // fits into normal single precision
278
const int FFIT_64       = 0x4000000;  // fits into double precision
279
const int IFIT_RELOC   = 0x10000000;  // relocation record needed
280
const int IFIT_LARGE   = 0x20000000;  // choose the larger size if uncertain. This input is used if optimization process has convergence problems
281
 
282
// values for immediate operand types
283
//const int OPI_INT4            =   1;  // int4
284
const int OPI_INT8            =   2;  // int8
285
const int OPI_INT16           =   3;  // int16
286
const int OPI_INT32           =   4;  // int32
287
const int OPI_INT64           =   5;  // int64
288
const int OPI_INT8SH          =   6;  // int8 << i
289
const int OPI_INT16SH         =   7;  // int16 << i
290
const int OPI_INT16SH16       =   8;  // int16 << 16
291
const int OPI_INT32SH32       =   9;  // int32 << 32
292
const int OPI_UINT8           =  18;  // uint8
293
const int OPI_UINT16          =  19;  // uint16
294
const int OPI_UINT32          =  20;  // uint32
295
const int OPI_UINT64          =  21;  // uint64
296
const int OPI_2INT8           =  24;  // int8+int8
297
const int OPI_INT886          =  25;  // int8+int8+int6
298
const int OPI_2INT16          =  26;  // int16+int16
299
const int OPI_INT1632         =  27;  // int16+int32
300
const int OPI_2INT32          =  28;  // int32+int32
301
const int OPI_INT1688         =  29;  // int16+int8+int8
302
const int OPI_INT8F           =  34;  // int8 converted to float
303
const int OPI_INT16F          =  35;  // int16 converted to float
304
const int OPI_FLOAT16         =  64;  // float16
305
const int OPI_FLOAT32         =  65;  // float32
306
const int OPI_FLOAT64         =  66;  // float64
307
const int OPI_IMPLICIT        =  99;  // implicit immediate operand (usually uint8)
308
const int OPI_OT              = 100;  // determined by operand type field
309
 
310
 
311
// struct SLine contains information about each line in the input file
312
struct SLine {
313
    uint16_t type;                // line type: LINE_DATADEF, etc
314
    uint16_t sectionType;         // section flags
315
    uint32_t beginPos;            // position in input file
316
    uint32_t firstToken;          // index to first token
317
    uint32_t numTokens;           // number of tokens in line
318
    uint32_t file;                // file of origin. (1 = source file, 2+ = include files, 0x1000+ = meta-generated lines)
319
    uint32_t linenum;             // line number in file of origin
320
};
321
 
322
// struct SToken is used for splitting each line into tokens
323
struct SToken {
324
    uint32_t type;                // Token type
325
    uint32_t id;                  // ID if known name or operator
326
    uint32_t pos;                 // File offset
327
    uint32_t stringLength;        // Length of token as string
328
    uint16_t priority;            // Priority if operator
329
    uint16_t vartype;             // 0: value not known, 3: int64, 5: double, 8: string
330
    uint32_t unused;
331
    union {                       // value if constant or assemble-time variable
332
        uint64_t u;
333
        int64_t  i;
334
        double   d;
335
        uint32_t w;
336
    } value;
337
};
338
 
339
// struct SOperator is used for list of operators
340
struct SOperator {
341
    char name[8];                 // name
342
    uint32_t id;                  // identifier
343
    uint32_t priority;            // priority if operator
344
};
345
 
346
// operator < for sorting operator list
347
static inline bool operator < (SOperator const & a, SOperator const & b) {
348
    return strcmp(a.name, b.name) < 0;
349
}
350
 
351
// struct SKeyword is used for list of keywords
352
struct SKeyword {
353
    char name[28];                // name
354
    uint32_t id;                  // identifier
355
};
356
 
357
// struct SExpression is used during assemble-time evaluation of expressions containing
358
// any type of operands: integer, float, string, registers, memory operands, options
359
struct SExpression {
360
    union {                       // immediate operand value
361
        int64_t  i;               // as signed
362
        uint64_t u;               // as unsigned
363
        double   d;               // as double
364
        uint32_t w;               // as unsigned 32 bit integer
365
    } value;
366
    int32_t  offset_mem;          // offset for memory operand
367
    int32_t  offset_jump;         // offset for jump
368
    uint32_t etype;               // flags for elements in expression: XPR_...
369
    uint32_t tokens;              // number of tokens used
370
    uint32_t sym1;                // first symbol of memory operand, indexed by namebuffer offset
371
    uint32_t sym2;                // reference symbol of memory operand, indexed by namebuffer offset
372
    uint32_t sym3;                // first symbol of immediate operand, indexed by namebuffer offset
373
    uint32_t sym4;                // reference symbol of immediate operand, indexed by namebuffer offset
374
    uint32_t sym5;                // symbol for jump target, indexed by namebuffer offset
375
    uint32_t instruction;         // instruction corresponding to operator
376
    uint8_t  optionbits;          // option bits or sign bits
377
    uint8_t  base;                // base register of memory operand    
378
    uint8_t  index;               // index register of memory operand
379
    uint8_t  length;              // length or broadcast register of memory operand
380
    int8_t   scale;               // scale factor for index register
381
    uint8_t  symscale1;           // scale factor for sym1-sym2    
382
    uint8_t  symscale3;           // scale factor for sym3-sym4
383
    uint8_t  mask;                // mask register
384
    uint8_t  reg1;                // first register operand
385
    uint8_t  reg2;                // second register operand
386
    uint8_t  reg3;                // third register operand   
387
    uint8_t  fallback;            // fallback register
388
};
389
 
390
 
391
// struct SCode is the result of interpreting a line of code containing an instruction
392
struct SCode : public SExpression {
393
    SFormat  const * formatp;     // instruction format. pointer to record in formatList in disassem1.cpp, or a copy of it
394
    uint32_t line;                // entry into lines buffer
395
    uint32_t section;             // code section
396
    uint32_t address;             // address relative to begin of section in current module
397
    uint32_t label;               // a code or data label, identified by an index into symbolNameBuffer (not an index into 'symbols' because this may change when new symbols are added)
398
    uint32_t dtype;               // data type. (TYP_INT8 etc.)
399
    uint32_t instr1;              // index to instruction in instructionlist
400
    uint32_t fitNum;              // indicates if immediate constant fits a certain representation (from fitInteger or fitFloat function)
401
    uint32_t fitAddr;             // indicates if relative address fits a certain number of bits
402
    uint32_t fitJump;             // indicates if relative jump offset fits a certain number of bits
403
    uint8_t  dest;                // destination register (2 = memory destination)
404
    uint8_t  numOp;               // number of source operands
405
    uint8_t  size;                // size of instruction. minimum size if actual size depends on unresolved cross references
406
    uint8_t  sizeUnknown;         // actual size may be up to this value bigger
407
    uint8_t  category;            // instruction category
408
};
409
 
410
 
411
// struct SBlock is used for tracking {} code blocks
412
struct SBlock  {
413
    uint32_t blockType;           // block type. see definitions of HL_FUNC etc. in assem5.cpp
414
    uint32_t blockNumber;         // sequential number used in label names
415
    uint32_t startBracket;        // token of start '{'
416
    uint32_t jumpLabel;           // target label for jump, else, or loop
417
    uint32_t breakLabel;          // target label for break statement. -1 if break is possible but label not yet defined
418
    uint32_t continueLabel;       // target label for continue statement. -1 if continue is possible but label not yet defined
419
    uint32_t codeBuffer2index;    // index of entry in codeBuffer2
420
    uint32_t codeBuffer2num;      // number of instruction codes in codeBuffer2
421
};
422
 
423
// combine contents of two expressions
424
static inline SExpression operator | (SExpression const & exp1, SExpression const & exp2) {
425
    SExpression expr;
426
    for (uint32_t i = 0; i < sizeof(SExpression) / sizeof(uint64_t); i++) {
427
        (&expr.value.u)[i] = (&exp1.value.u)[i] | (&exp2.value.u)[i];
428
    }
429
    return expr;
430
}
431
 
432
static inline SCode operator | (SCode const & code1, SExpression const & exp2) {
433
    SCode code0 = code1;
434
    for (uint32_t i = 0; i < sizeof(SExpression) / sizeof(uint64_t); i++) {
435
        (&code0.value.u)[i] = (&code1.value.u)[i] | (&exp2.value.u)[i];
436
    }
437
    return code0;
438
}
439
 
440
// find the smallest representation that the floating point operand fits into
441
int fitFloat(double x);
442
 
443
// insert memory operand into code structure
444
void insertMem(SCode & code, SExpression & expr);
445
 
446
// insert everything from expression to code structure, OR'ing all bits
447
void insertAll(SCode & code, SExpression & expr);
448
 
449
// operator < for sorting keyword list
450
static inline bool operator < (SKeyword const & a, SKeyword const & b) {
451
    // case insensitive compare. This function is not standardized. make my own:
452
    return strncasecmp_(a.name, b.name, 1000) < 0;
453
 
454
#if defined (_MSC_VER)
455
    //return _stricmp(a.name, b.name) < 0;    // microsoft
456
#else
457
    //return strcasecmp(a.name, b.name) < 0;  // unix
458
#endif
459
 
460
}
461
 
462
// redefine symbol structure sorted by name
463
struct ElfFWC_Sym2 : public ElfFwcSym {
464
};
465
 
466
 
467
static inline bool operator < (ElfFWC_Sym2 const & a, ElfFWC_Sym2 const & b) {
468
    return strcmp(symbolNameBuffer.getString(a.st_name), symbolNameBuffer.getString(b.st_name)) < 0;
469
}
470
 
471
static inline bool operator == (ElfFWC_Sym2 const & a, ElfFWC_Sym2 const & b) {
472
    return strcmp(symbolNameBuffer.getString(a.st_name), symbolNameBuffer.getString(b.st_name)) == 0;
473
}
474
 
475
// structure in list of assembly errors
476
struct SAssemError {
477
    uint32_t pos;                                // position in input file
478
    uint32_t stringLength;                       // length of token string
479
    uint32_t file;                               // File where error was detected
480
    uint16_t num;                                // Error id
481
    uint16_t pass;                               // Pass during which error occurred
482
};
483
 
484
class CAssembler;                                // Forward definition
485
 
486
// class for reporting errors in assembly file
487
class CAssemErrors {
488
public:
489
    CAssemErrors();
490
    void report(uint32_t position, uint32_t stringLength, uint32_t num); // Report an error
491
    void report(SToken const & token);           // Report an error, pointing to a specific token
492
    void reportLine(uint32_t num);               // Report an error in current line
493
    void setOwner(CAssembler * a);               // Give access to CAssembler
494
    uint32_t numErrors();                        // Return number of errors
495
    bool tooMany();                              // true if too many errors
496
    void outputErrors();                         // Write all errors to stderr
497
protected:
498
    CAssembler * owner;
499
    CDynamicArray<SAssemError>list;              // List of errors
500
    uint32_t maxErrors;                          // Maximum number of errors to report
501
};
502
 
503
 
504
// class CDisassembler handles disassembly of ForwardCom ELF file
505
class CAssembler : public CFileBuffer {
506
public:
507
    CAssembler();                                // Constructor
508
    void go();
509
protected:
510
    friend class CAssemErrors;                   // This class handles error messages
511
    uint32_t iInstr;                             // Position of current instruction relative to section start
512
    uint32_t instrLength;                        // Length of current instruction, in 32-bit words
513
    uint32_t operandType;                        // Operand type of current instruction
514
    uint32_t format;                             // Format of current instruction
515
    uint64_t variant;                            // Template variant and options
516
    int64_t  value0;                             // original value of immediate operand   
517
    uint32_t tokenB;                             // index to first token in current line
518
    uint32_t tokenN;                             // number of tokens in current line
519
    uint32_t dataType;                           // data type for current instruction
520
    uint32_t section;                            // Current section
521
    uint32_t sectionFlags;                       // current section information flags
522
    uint32_t linei;                              // index to current line
523
    uint32_t filei;                              // index to current input file
524
    uint32_t pass;                               // what pass are we in
525
    uint32_t iLoop;                              // index of current loop statement
526
    uint32_t iIf;                                // index of current 'if' statement
527
    uint32_t iSwitch;                            // index of current 'switch' statement    
528
    uint32_t numSwitch;                          // total number of 'switch' statements
529
    bool     lineError;                          // error in current line. stop interpreting
530
    uint64_t code_size;                          // codesize option determines code address sizes
531
    uint64_t data_size;                          // datasize option determines data address sizes
532
    STemplate const * pInstr;                    // Pointer to current instruction code
533
    SInstruction2 const * iRecord;               // Pointer to instruction table entry
534
    SFormat const * fInstr;                      // Format details of current instruction code
535
    CELF outFile;                                // Output file
536
    CDynamicArray<SToken> tokens;                // List of tokens
537
    CDynamicArray<SLine> lines;                  // Information about each line of the input file
538
    CDynamicArray<SInstruction> instructionlist; // List of instruction set, unsorted
539
    CDynamicArray<SInstruction> instructionlistNm;// List of instruction set, sorted by name
540
    CDynamicArray<SInstruction3> instructionlistId; // List of instruction set, sorted by id
541
    CDynamicArray<SOperator> operators;          // List of operators
542
    CDynamicArray<SKeyword> keywords;            // List of keywords
543
    CDynamicArray<ElfFWC_Sym2> symbols;          // List of symbols
544
    CDynamicArray<ElfFwcReloc> relocations;     // List of relocations
545
    CDynamicArray<uint8_t> brackets;             // Stack of nested brackets during evaluation of expression
546
    CDynamicArray<SCode> codeBuffer;             // Coded instructions
547
    CDynamicArray<SCode> codeBuffer2;            // Temporary storage of instructions for loops and switch statements
548
    CDynamicArray<ElfFwcShdr> sectionHeaders;    // Section headers
549
    CDynamicArray<SFormat> formatList3;          // Subset of formatList for multiformat instruction formats
550
    CDynamicArray<SFormat> formatList4;          // Subset of formatList for jump instruction formats
551
    CDynamicArray<SBlock>  hllBlocks;            // Tracking of {} blocks    
552
    CDynamicArray<SExpression> expressions;      // Expressions saved as assemble-time symbols    
553
    CTextFileBuffer stringBuffer;                // Buffer for assemble-time string variables
554
    CMetaBuffer<CMemoryBuffer> dataBuffers;      // databuffer for each section
555
    CAssemErrors errors;                         // Error reporting
556
    void initializeWordLists();                  // Initialize and sort instruction list, operator list, and keyword list
557
    void feedBackText1();                        // write feedback text on stdout
558
    void pass1();                                // Split input file into lines and tokens. Handle preprocessing directives. Find symbol definitions
559
    void interpretSectionDirective();            // Interpret section directive during pass 2 or 3
560
    void interpretFunctionDirective();           // Interpret function directive during pass 2 or 3
561
    void interpretEndDirective();                // Interpret section or function end directive during pass 2 or 3
562
    void interpretOptionsLine();                 // Interpret line specifying options
563
    uint32_t addSymbol(ElfFWC_Sym2 & sym);       // Add a symbol to symbols list
564
    uint32_t findSymbol(uint32_t name);          // Find symbol by index into symbolNameBuffer
565
    uint32_t findSymbol(const char * name, uint32_t len); // Find symbol by name with specified length
566
    void pass2();                                // A. Handle metaprogramming directives
567
                                                 // B. Classify lines
568
                                                 // C. Identify symbol names, sections, labels, functions 
569
    void interpretExternDirective();             // Interpret extern directive during pass 2
570
    void interpretPublicDirective();             // Interpret public directive during pass 2
571
    void interpretLabel(uint32_t tok);           // Interpret code or data label during pass 2
572
    void interpretVariableDefinition1();         // interpret assembly style variable definition
573
    void interpretVariableDefinition2();         // interpret C style variable definition
574
    void determineLineType();                    // check if line is code or data
575
    void interpretAlign();                       // interpret code or data alignment directive
576
    void interpretMetaDefinition();              // Interpret line beginning with '%' containing meta code
577
    void replaceKnownNames();                    // Replace known symbol names with symbol references and meta variables with their value
578
    SExpression expression(uint32_t tok1, uint32_t ntok, uint32_t option); // Interpret and evaluate expression
579
    SExpression symbol2expression(uint32_t symi); // make expression out of symbol
580
    SExpression op1minus(SExpression & exp1);    // Interpret -(A+B), etc.
581
    SExpression op2(uint32_t op, SExpression & exp1, SExpression & exp2); // Interpret dyadic expression with any type of operands
582
    SExpression op2Int(uint32_t op, SExpression const & exp1, SExpression const & exp2); // Interpret dyadic expression with integer operands
583
    SExpression op2Float(uint32_t op, SExpression & exp1, SExpression & exp2); // Interpret dyadic expression with floating point operands
584
    SExpression op2String(uint32_t op, SExpression const & exp1, SExpression const & exp2); // Interpret dyadic expression with string operands
585
    SExpression op2Registers(uint32_t op, SExpression const & exp1, SExpression const & exp2); // Interpret dyadic expression with register operands
586
    SExpression op2Memory(uint32_t op, SExpression & exp1, SExpression & exp2); // Interpret dyadic expression with memory operands
587
    SExpression op3(uint32_t tok1, uint32_t toklow, uint32_t tokcolon, uint32_t maxtok, uint32_t options); // Interpreted triadic expression exp1 ? exp2 : exp3 at the indicated positions
588
    void assignMetaVariable(uint32_t symi, SExpression & expr, uint32_t typetoken); // define or modify assemble-time constant or variable
589
    void pass3();                                // Generate code and data
590
    void makeFormatLists();                      // extract subsets of formatList into formatList3 and formatList4
591
    void interpretCodeLine();                    // Interpret a line defining code
592
    int  fitCode(SCode & code);                  // find an instruction variant that fits the code
593
    bool instructionFits(SCode const & code, SCode & codeTemp, uint32_t ii); // check if instruction fits into specified format
594
    bool jumpInstructionFits(SCode const & code, SCode & codeTemp, uint32_t ii); // check if jump instruction fits into specified format
595
    int  fitConstant(SCode & code);              // check how many bits are needed to contain immediate constant in an instruction.
596
    int  fitAddress(SCode & code);               // check how many bits are needed to contain relative address in an instruction.
597
    void checkCode1(SCode & code);               // eheck code for correctness before fitting a format, and fix some code details
598
    void checkCode2(SCode & code);               // eheck register types etc. after fitting a format, and finish code details
599
    uint32_t checkCodeE(SCode & code);           // find reason why no format fits, and return error number
600
    void optimizeCode(SCode & code);             // optimize instruction. replace by more efficient instruction if possible
601
    void pass4();                                // Resolve symbol addresses and cross references, optimize forward references
602
    void pass5();                                // Make binary file
603
    void copySections();                         // copy sections to outFile
604
    void copySymbols();                          // copy symbols to outFile
605
    //void removePrivateSymbols();               // remove local symbols and adjust relocation records with new symbol indexes
606
    void makeListFile();                         // make output listing
607
    int64_t calculateMemoryOffset(SCode & code); // calculate memory address possibly involving symbol. generate relocation if necessary
608
    int64_t calculateJumpOffset(SCode & code);   // calculate jump offset possibly involving symbol. generate relocation if necessary
609
    int64_t calculateConstantOperand(SExpression & expr, uint64_t address, uint32_t fieldSize); // calculate constant or immediate operand possibly involving symbol. generate relocation if necessary
610
    void makeBinaryCode();                       // make binary data for code sections
611
    void makeBinaryData();                       // make binary data for data sections
612
    void makeBinaryRelocations();                // put relocation records in output file
613
    void showTokens();                           // Show all tokens. For debugging only
614
    void showSymbols();                          // Show all symbols. For debugging only
615
    void interpretHighLevelStatement();          // if, else, switch, for, do, while statements    
616
    void interpretEndBracket();                  // finish {} block
617
    void codeIf();                               // Interpret if statement in assembly code
618
    void codeIf2();                              // Finish if statement at end bracket
619
    void codeWhile();                            // Interpret while loop in assembly code
620
    void codeWhile2();                           // Finish while-loop at end bracket
621
    void codeDo();                               // Interpret do-while loop in assembly code
622
    void codeDo2();                              // Finish do-while loop at end bracket
623
    void codeFor();                              // Interpret for-loop in assembly code
624
    void codeFor2();                             // Finish for-loop at end bracket
625
    void codeForIn();                            // Interpret for-in vector loop in assembly code
626
    void codeForIn2();                           // Finish for-in vector loop in assembly code
627
    void codeSwitch();                           // Interpret switch statement in assembly code
628
    void codeCase();                             // Interpret switch case label in assembly code
629
    void codeSwitch2();                          // Finish switch statement at end bracket
630
    void codeBreak();                            // Interpret break or continue statement in assembly code
631
    uint32_t findBreakTarget(uint32_t k);        // Find or make the target symbol of a break or continue statement
632
    uint32_t makeLabelSymbol(const char * name); // Make a symbol for branch label etc., address not known yet
633
    bool mergeJump(SCode & code2);               // Merge jump instruction with preceding arithmetic instruction
634
    uint32_t hasJump(uint32_t line);             // check if line contains unconditional direct jump
635
    void interpretCondition(SCode & code);       // interpret condition in if(), while(), and for(;;) statements
636
    void codePush();                             // push register on stack. (may be replaced by macros later)
637
    void codePop();                              // pop register from stack. (may be replaced by macros later)
638
};

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.