OpenCores
URL https://opencores.org/ocsvn/forwardcom/forwardcom/trunk

Subversion Repositories forwardcom

[/] [forwardcom/] [bintools/] [disassem.h] - Blame information for rev 149

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 69 Agner
/****************************  disassem.h   **********************************
2
* Author:        Agner Fog
3
* Date created:  2017-04-26
4
* Last modified: 2021-04-11
5
* Version:       1.11
6
* Project:       Binary tools for ForwardCom instruction set
7
* Module:        disassem.h
8
* Description:
9
* Header file for disassembler
10
*
11
* Copyright 2006-2021 GNU General Public License http://www.gnu.org/licenses
12
*****************************************************************************/
13
 
14
 
15
/* Additional information stored in symbol records during disassembly:
16
ElfFwcSym::st_other    bit 31 set if symbol has been written out in section listing
17
ElfFwcSym::st_reguse1  old symbol index before sorting and adding more symbols,
18
                        needs translation to new index in relocation records
19
ElfFwcSym::st_size     reference symbol if symbol is a relative pointer, stored in bit 32-63 of st_size
20
ElfFwcSym::st_reguse2  symbol data type: bit 0-2: operand type, bit 3 = 1
21
                        0x100 code pointer
22
                        0x200 data pointer
23
*/
24
 
25
 
26
// Universal template for all instruction formats
27
union STemplate {
28
    uint64_t     q;          // First 64 bits
29
    struct {
30
        uint32_t rt:    5;   // Source register RT in formats A and E
31
        uint32_t mask:  3;   // Mask register in formats A and E
32
        uint32_t rs:    5;   // Source register RS in formats A, B, and E
33
        uint32_t ot:    3;   // Operand type and M in formats A, B, and E
34
        uint32_t rd:    5;   // Destination register RD in formats A, B, C, and E
35
        uint32_t op1:   6;   // Destination register RD in formats A, B, C, and E
36
        uint32_t mode:  3;   // Mode in all formats
37
        uint32_t il:    2;   // Instruction length in all formats
38
        uint32_t im2:  16;   // IM2 in format E
39
        uint32_t im3:   6;   // IM3 in format E
40
        uint32_t op2:   2;   // OP2 in format E
41
        uint32_t ru:    5;   // Source register RU in format E
42
        uint32_t mode2: 3;   // Mode2 in format E
43
    } a;
44
    struct {
45
        int32_t  im2:  24;   // IM2 in format D
46
        uint32_t op1:   3;   // The remaining part of OP1 in format D
47
        uint32_t mode:  3;   // Mode in format D
48
        uint32_t il:    2;   // Instruction length in format D
49
    } d;
50
    uint8_t      b[12];      // IM1 in format B
51
    uint16_t     s[4];       // IM1+2 in format C
52
    uint32_t     i[3];       // IM2 and IM3 in format A2, A3, B2, B3
53
    float        f[2];       // IM2 as float
54
};
55
 
56
// Record in nested lookup lists for formats.
57
// crit = 0 means that index is an index into the format table
58
// crit > 0 means that index is an offset into the next table. Add something to this offset according to the criterion
59
struct SFormatIndex {
60
    uint8_t  crit;           // Criterion for lookup into next table: 0 = format table. 
61
                             // 1: mode2, 2: op1 / 8, 3: op1 % 8, 4: IM1 % 64 / 8, 5: IM1 % 8, 
62
                             // 6: IM12 == 0xFFFF
63
    uint8_t  index;          // Offset into next table
64
};
65
 
66
// Record in list of formats (formatList) 
67
struct SFormat {
68
    uint16_t format2;        // 0x0XYZ, where X = il, Y = mode, Z = subformat (mode2 or OP1) or variant within format
69
    uint8_t  category;       // Category: 1 = single format,  3 = multi-format, 4 = jump instruction                         
70
    uint8_t  tmplate;        // Template: 0xA, 0xB, 0xC, 0xD, 0xE.
71
 
72
    uint8_t  opAvail;        // Operands available: 1 = immediate, 2 = memory,
73
                             // 0x10 = RT, 0x20 = RS, 0x40 = RU, 0x80 = RD
74
 
75
    uint8_t  ot;             // Operand type. 0: determined by OT field. 0x10 - 0x17: 0-7. 
76
                             // 0x32: int32 for even OP1, int64 for odd OP1
77
                             // 0x35: float for even OP1, double for odd OP1
78
 
79
    uint8_t  jumpSize;       // Size of jump offset field (bytes)
80
    uint8_t  jumpPos;        // Position of jump offset field (bytes)
81
    uint8_t  addrSize;       // Size of address offset field (bytes)
82
    uint8_t  addrPos;        // Position of address offset field (bytes)
83
    uint8_t  immSize;        // Size of first immediate operand (bytes)
84
    uint8_t  immPos;         // Position of first immediate operand (bytes)
85
 
86
    uint16_t imm2;           // Size and position of extra immediate operands:
87
                             // 1 = IM2 in template C, 
88
                             // 2 = IM3 in template E may contain options, 
89
                             // 4 = IM3 is shift count for IM2 if no options, 
90
                             // 8 = IM2 is shift count for IM4,
91
                             // 0x10 = IM3 in template A3 or B3, 
92
                             // 0x40 = has fixed values
93
                             // 0x80 = jump OPJ in IM1
94
                             // 0x90 = jump OPJ is in high part of IM2 in format A2
95
                             // 0xC0 = jump with no OPJ
96
                             // 0x100 = OP2 is used for immediate operand as extension of IM3
97
 
98
    uint8_t  vect;           // 1 = vector registers used, 2 = vector length in RT, 4 = broadcast length in RT
99
                             // 0x10 = vector registers used if M bit
100
 
101
    uint8_t  mem;            // 1 (unused), 2 = base in RS, 4 = index in RT, 
102
                             // 0x10 = has offset, 0x20 = has limit,
103
 
104
    uint8_t  scale;          // 1 = offset is scaled, 2 = index is scaled by OS, 4 = scale factor is -1
105
    uint8_t  formatIndex;    // Bit index into format in instruction list
106
    uint8_t  exeTable;       // table of function pointers used during emulation
107
};
108
 
109
// tables in emulator2.cpp:
110
extern const SFormat formatList[];            // == FXEND in disasm1.cpp
111
extern uint32_t formatListSize;               // size of formatList 
112
extern const uint32_t dataSizeTable[8];       // = {1, 2, 4, 8, 16, 4, 8, 16};
113
extern const uint32_t dataSizeTableMax8[8];   // = {1, 2, 4, 8, 8, 4, 8, 8}; 
114
extern const uint32_t dataSizeTableLog[8];    // = {0, 1, 2, 3, 4, 2, 3, 4};
115
extern const uint32_t dataSizeTableBits[8];   // = {8, 16, 32, 64, 128, 32, 64, 128};
116
extern const uint64_t dataSizeMask[8];        // = {FF, FFFF, FFFFFFFF, -1, -1, FFFFFFFF, -1, -1};
117
 
118
 
119
// Operator for sorting symbols by address. Used by disassembler
120
static inline bool operator < (ElfFwcSym const & a, ElfFwcSym const & b) {
121
    if (a.st_section != b.st_section) return a.st_section < b.st_section;
122
    return a.st_value < b.st_value;
123
}
124
 
125
// Operatur == tells if symbols have same address
126
static inline bool operator == (ElfFwcSym const & a, ElfFwcSym const & b) {
127
    return a.st_section == b.st_section && a.st_value == b.st_value;
128
}
129
 
130
// Operator for sorting relocations by address. Used by disassembler
131
static inline bool operator < (ElfFwcReloc const & a, ElfFwcReloc const & b) {
132
    if (a.r_section < b.r_section) return true;
133
    if (a.r_section > b.r_section) return false;
134
    return a.r_offset < b.r_offset;
135
}
136
 
137
// Look up format in FormatList (this function is in emulator2.cpp)
138
uint32_t lookupFormat(uint64_t instruct);
139
 
140
// Check integrity of format lists
141
void checkFormatListIntegrity();
142
 
143
// Interpret template variants in instruction record
144
uint64_t interpretTemplateVariants(const char *);
145
// bits returned by interpretTemplateVariants
146
const int VARIANT_D0 = (1 << 0);                 // no destination, no operand type
147
const int VARIANT_D1 = (1 << 1);                 // no destination, but operant type specified
148
const int VARIANT_D2 = (1 << 2);                 // operant type ignored
149
const int VARIANT_D3 = (1 << 3);                 // register RD used for other purpose
150
const int VARIANT_M0 = (1 << 4);                 // memory operand destination
151
//const int VARIANT_M1 = (1 << 5);                 // IM3 used as extra immediate operand in E formats with a memory operand. obsolote
152
const int VARIANT_R0 = (1 << 8);                 // destination is general purpose register
153
const int VARIANT_R1B =       9;                 // bit index to VARIANT_R1
154
const int VARIANT_R1 = (1 << VARIANT_R1B);       // first source operand is general purpose register
155
const int VARIANT_R2 = (1 << (VARIANT_R1B+1));   // second source operand is general purpose register
156
const int VARIANT_R3 = (1 << (VARIANT_R1B+2));   // third source operand is general purpose register
157
const int VARIANT_R123 = (VARIANT_R1|VARIANT_R2|VARIANT_R3);  // source operand is general purpose register
158
const int VARIANT_D3R0 = VARIANT_D3 | VARIANT_R0; // RD is general purpose register
159
const int VARIANT_RL = (1 << 12);                // RS is a general purpose register specifying length
160
const int VARIANT_F0 = (1 << 14);                // can have mask register, but not fallback register
161
const int VARIANT_F1 = (1 << 15);                // can have fallback register without mask register
162
const int VARIANT_I2 = (1 << 16);                // immediate operand is integer
163
const int VARIANT_U0 = (1 << 18);                // integer operands are unsigned
164
const int VARIANT_U3 = (1 << 19);                // integer operands are unsigned if bit 3 in IM3 (format 2.4.x, 2.8.x) is set.
165
//const int VARIANT_Kn = (1 << 20);                // integer operand is implicit
166
const int VARIANT_On = (7 << 24);                // n IM3 bits used for options
167
const int VARIANT_H0 = (1 << 28);                // half precision floating point operands
168
const int VARIANT_SPECB = 32;                    // bit index to special register type
169
const uint64_t VARIANT_SPEC = (uint64_t)0xF << VARIANT_SPECB; // Special register types for operands
170
const uint64_t VARIANT_SPECS = 0x1000000000;     // Special register type for source
171
const uint64_t VARIANT_SPECD = 0x2000000000;     // Special register type for destination
172
 
173
 
174
struct SInstruction2;                            // defined below
175
struct SLineRef;                                 // defined below
176
 
177
uint8_t findFallback(SFormat const * fInstr, STemplate const * pInstr, int nOperands); // find fallback register in instruction code
178
 
179
// class CDisassembler handles disassembly of ForwardCom ELF file
180
class CDisassembler : public CELF {
181
public:
182
    CDisassembler();                             // Constructor
183
    void getComponents1();                       // Read instruction list, split ELF file into components
184
    void getComponents2(CELF const & assembler, CMemoryBuffer const & instructList);// Read instruction list, get ELF components for assembler output listing
185
    void go();                                   // Disassemble
186
    void getLineList(CDynamicArray<SLineRef> & list); // transfer lineList to debugger
187
    void getOutFile(CTextFileBuffer & buffer);   // transfer outFile to debugger
188
    uint32_t outputFile;                         // Output file name, as index into cmd.fileNameBuffer
189
    uint8_t  debugMode;                          // produce disassembly for emulator/debugger
190
    uint8_t asmTab0;                             // Column for operand type
191
    uint8_t asmTab1;                             // Column for opcode
192
    uint8_t asmTab2;                             // Column for first operand
193
    uint8_t asmTab3;                             // Column for comment
194
protected:
195
    uint32_t pass;                               // Pass number
196
    uint32_t codeMode;                           // 1 = code, 2 = data in code section, 4 = data section
197
    uint32_t iInstr;                             // Position of current instruction relative to section start
198
    uint32_t instrLength;                        // Length of current instruction, in 32-bit words
199
    uint32_t operandType;                        // Operand type of current instruction
200
    uint32_t format;                             // Format of current instruction
201
    uint32_t nextSymbol;                         // Index to next symbol label to write out
202
    uint32_t section;                            // Current section
203
    uint32_t sectionEnd;                         // Size of section
204
    uint64_t sectionAddress;                     // Start address of section
205
    uint32_t currentFunction;                    // Symbol index of current function
206
    uint32_t currentFunctionEnd;                 // Address of end of current function
207
    uint32_t instructionWarning;                 // Warnings and errors for current instruction
208
    uint32_t relocation;                         // relocation index in current instruction + 1
209
    int8_t * sectionBuffer;                      // Pointer to start of current section
210
    uint64_t variant;                            // Template variant and options
211
    STemplate const * pInstr;                    // Pointer to current instruction code
212
    SInstruction2 const * iRecord;               // Pointer to instruction table entry
213
    SFormat const * fInstr;                      // Format details of current instruction code
214
    CDynamicArray<SInstruction2> instructionlist;// List of instruction set, sorted by category, format, and op1
215
    CDynamicArray<ElfFwcSym> newSymbols;         // List of new symbols added during pass 1
216
    CDynamicArray<SLineRef> lineList;            // Cross reference of code addresses to lines in outFile (used by debugger)
217
    CTextFileBuffer outFile;                     // Output file
218
    bool isExecutable;                           // Disassembling executable file
219
    void feedBackText1();                        // Write feedback text on stdout
220
    void parseInstruction();                     // Parse current instruction
221
    //void CheckInstructionErrors();             // Check if instruction is valid
222
    void writeInstruction();                     // Write current instruction to output file
223
    void writeNormalInstruction();               // Write normal instruction to output file
224
    void writeJumpInstruction();                 // Write jump instruction to output file
225
    void writeOperandType(uint32_t ot);          // Write operand type
226
    void writeMemoryOperand();                   // Write memory operand of current instruction
227
    void writeImmediateOperand();                // Write immediate operand depending on type in instruction list
228
    void writeRegister(uint32_t r, uint32_t ot); // Write name of general purpose or vector register
229
    void writeGPRegister(uint32_t r);            // Write name of general purpose register
230
    void writeVectorRegister(uint32_t v);        // Write name of vector register
231
    void writeSpecialRegister(uint32_t r, uint32_t type); // Write name of other type of register
232
    void pass1();                                // Pass 1 of disassembly. Resolves cross references and adds symbol labels
233
    void pass2();                                // Pass 2 of disassembly. Writes output file
234
    void sortSymbolsAndRelocations();            // Sort symbols and relocations by address
235
    void symbolExeAddress(ElfFwcSym & sym);      // Translate symbol address from section:offset to pointerbase:address
236
    void updateSymbols();                        // Make missing symbols for jump targets and data references
237
    void joinSymbolTables();                     // Join the tables: symbols and newSymbols
238
    void assignSymbolNames();                    // Make names for unnamed symbols
239
    void initializeInstructionList();            // Read instruction list from file and sort it
240
    void updateTracer();                         // Trace registers pointing to jump table (to do)
241
    void followJumpTable(uint32_t symi, uint32_t RelType); // Trace targets of jump table  (to do)
242
    void markCodeAsDubious();                    // Mark data in code section
243
    void writeFileBegin();                       // Write beginning of disassembly file
244
    void writeFileEnd();                         // Write end of disassembly file
245
    void writeSectionBegin();                    // Write beginning of section
246
    void writeSectionEnd();                      // Write end of section
247
    void writeCodeComment();                     // Write comment after instruction
248
    void writeDataItems();                       // Write data to disassembly file
249
    void writeLabels();                          // Find and write any labels at current position
250
    void writeRelocationTarget(uint32_t src, uint32_t size); // Write relocation target for this source position
251
    void writeJumpTarget(uint32_t src, uint32_t size); // Write jump relocation target for this source position
252
    void writeWarning(const char * w);           // Write warning message to output file
253
    void writeError(const char * w);             // Write error message to output file
254
    void finalErrorCheck();                      // Check for wrong entries in symbol table and relocations table
255
    //void checkNamesValid();                      // Check for illegal characters in symbols
256
    void writeSymbolName(uint32_t symi);         // Write name of symbol
257
    void writeSectionName(int32_t SegIndex);     // Write name of section
258
    void writePublicsAndExternals();             // Write list of public and external symbols
259
    void writeAddress();                         // write code address
260
    void setTabStops();                          // set tab stops for output
261
};
262
 
263
 
264
/*****************************************************************************
265
Structures and classes for reading instruction list from comma-separated file,
266
sorting, and searching
267
*****************************************************************************/
268
 
269
const int maxINameLen = 31;            // Maximum length of instruction name
270
const int numInstructionColumns = 13;  // Number of columns in csv file to read. Additional columns are ignored
271
 
272
// Record structure for instruction definition
273
struct SInstruction {
274
    uint64_t format;                   // Instruction format for single format instructions,
275
                                       // or one bit for each allowed format for multi format instructions. See table in manual.
276
    uint64_t variant;                  // Template variant 
277
    uint32_t id;                       // Instruction id number
278
    uint8_t  category;                 // 1: single format, 3: multiformat, 4: jump
279
    uint8_t  templt;                   // Format template. 0xA - 0xE, 0 for multiple templates
280
    uint8_t  sourceoperands;           // Number of source operands, including register, memory and immediate operands
281
    uint8_t  op1;                      // Operation code
282
    uint8_t  op2;                      // Additional operation code
283
    uint8_t  opimmediate;              // Type of immediate operand for single-format instructions
284
    uint32_t implicit_imm;             // Value of implicit immediate operand
285
    uint32_t optypesgp;                // Operand types supported for general purpose registers
286
    uint32_t optypesscalar;            // Operand types supported for scalars in vector registers
287
    uint32_t optypesvector;            // Operand types supported for vectors
288
    char     name[maxINameLen+1];      // Name of instruction. Lower case
289
};
290
 
291
// Same structure, but sorted by category, format, and operation codes. Used by disassembler
292
struct SInstruction2 : public SInstruction {
293
};
294
 
295
// Same structure, but sorted by id
296
struct SInstruction3 : public SInstruction {
297
};
298
 
299
// Operator for sorting instructions by name. Used by assembler
300
static inline bool operator < (SInstruction const & a, SInstruction const & b) {
301
#ifdef _MSC_VER     // case insensitive compare. name depends on compiler
302
    return _strcmpi(a.name, b.name) < 0;
303
#else
304
    return strcasecmp(a.name, b.name) < 0;
305
#endif
306
}
307
 
308
// Operator for sorting instructions by category, format, and operation codes. Used by disassembler
309
static inline bool operator < (SInstruction2 const & a, SInstruction2 const & b) {
310
    // first sort criterion is category
311
    if (a.category < b.category) return true;
312
    if (a.category > b.category) return false;
313
    // sort by format for single-format instructions
314
    if (a.category == 1) {
315
        if (a.format < b.format) return true;
316
        if (a.format > b.format) return false;
317
    }
318
    // sort by op1
319
    if (a.op1 < b.op1) return true;
320
    if (a.op1 > b.op1) return false;
321
    // last sort criterion is op2
322
    return a.op2 < b.op2;
323
}
324
 
325
// Operator for sorting instructions by id
326
static inline bool operator < (SInstruction3 const & a, SInstruction3 const & b) {
327
    return a.id < b.id;
328
}
329
 
330
// class for reading comma-separated file
331
class CCSVFile : public CFileBuffer {
332
public:
333
    CCSVFile() : CFileBuffer() {}                // Constructor
334
    void parse();                                // Read and parse file
335
    CDynamicArray<SInstruction> instructionlist; // List of records
336
    uint64_t interpretNumber(const char * text); // Interpret number in instruction list
337
};
338
 
339
// Cross reference of code addresses to lines in outFile. Used by debugger
340
struct SLineRef {
341
    uint64_t address;                            // code address
342
    uint32_t domain;                             // 1 = IP, 2 = datap, 4 = threadp
343
    uint32_t textPos;                            // position of corresponding line in outFile
344
};
345
 
346
// Operators for sorting SLineRef by address
347
static inline bool operator < (SLineRef const & a, SLineRef const & b) {
348
    if (a.domain != b.domain) return a.domain < b.domain;
349
    return a.address < b.address;
350
};
351
 
352
static inline bool operator == (SLineRef const & a, SLineRef const & b) {
353
    return a.domain == b.domain && a.address == b.address;
354
};
355
 
356
static inline bool operator != (SLineRef const & a, SLineRef const & b) {
357
    return !(a == b);
358
};
359
 
360
// Interpret a string with a decimal, binary, octal, or hexadecimal number
361
int64_t interpretNumber(const char * s, uint32_t maxLength, uint32_t * error);
362
 
363
double  interpretFloat(const char * s, uint32_t length);    // interpret floating point number from string with indicated length

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.