1 |
49 |
Agner |
/**************************** disasm1.cpp ********************************
|
2 |
|
|
* Author: Agner Fog
|
3 |
|
|
* Date created: 2017-04-26
|
4 |
|
|
* Last modified: 2021-03-30
|
5 |
|
|
* Version: 1.11
|
6 |
|
|
* Project: Binary tools for ForwardCom instruction set
|
7 |
|
|
* Module: disassem.h
|
8 |
|
|
* Description: Disassembler
|
9 |
|
|
* Disassembler for ForwardCom
|
10 |
|
|
*
|
11 |
|
|
* Copyright 2007-2021 GNU General Public License http://www.gnu.org/licenses
|
12 |
|
|
*****************************************************************************/
|
13 |
|
|
#include "stdafx.h"
|
14 |
|
|
|
15 |
|
|
|
16 |
|
|
uint64_t interpretTemplateVariants(const char * s) {
|
17 |
|
|
// Interpret template variants in instruction record
|
18 |
|
|
// The return value is a combination of bits for each variant option
|
19 |
|
|
// These bits are defined as constants VARIANT_D0, etc., in disassem.h
|
20 |
|
|
uint64_t v = 0;
|
21 |
|
|
for (int i = 0; i < 8; i++) { // Loop through string
|
22 |
|
|
char c = toupper(s[i]), d = toupper(s[i+1]);
|
23 |
|
|
switch (c) {
|
24 |
|
|
case 0:
|
25 |
|
|
return v; // End of string
|
26 |
|
|
case 'D':
|
27 |
|
|
if (d == '0') v |= VARIANT_D0; // D0
|
28 |
|
|
if (d == '1') v |= VARIANT_D1; // D1
|
29 |
|
|
if (d == '2') v |= VARIANT_D2; // D2
|
30 |
|
|
if (d == '3') v |= VARIANT_D3; // D3
|
31 |
|
|
continue;
|
32 |
|
|
case 'F':
|
33 |
|
|
if (d == '0') v |= VARIANT_F0; // F0
|
34 |
|
|
if (d == '1') v |= VARIANT_F1; // F1
|
35 |
|
|
continue;
|
36 |
|
|
case 'M':
|
37 |
|
|
if (d == '0') v |= VARIANT_M0; // M0
|
38 |
|
|
//if (d == '1') v |= VARIANT_M1; // M1. No longer used
|
39 |
|
|
continue;
|
40 |
|
|
case 'R':
|
41 |
|
|
if (d == '0') v |= VARIANT_R0; // R0
|
42 |
|
|
if (d == '1') v |= VARIANT_R1; // R1
|
43 |
|
|
if (d == '2') v |= VARIANT_R2; // R2
|
44 |
|
|
if (d == '3') v |= VARIANT_R3; // R3
|
45 |
|
|
if (d == 'L') v |= VARIANT_RL; // RL
|
46 |
|
|
i++;
|
47 |
|
|
continue;
|
48 |
|
|
case 'I':
|
49 |
|
|
if (d == '2') v |= VARIANT_I2; // I2
|
50 |
|
|
continue;
|
51 |
|
|
|
52 |
|
|
case 'O':
|
53 |
|
|
if (d > '0' && d < '7') v |= (d - '0') << 24; // O1 - O6
|
54 |
|
|
continue;
|
55 |
|
|
case 'U':
|
56 |
|
|
if (d == '0') v |= VARIANT_U0; // U0
|
57 |
|
|
if (d == '3') v |= VARIANT_U3; // U3
|
58 |
|
|
continue;
|
59 |
|
|
case 'H':
|
60 |
|
|
if (d == '0') v |= VARIANT_H0; // H0
|
61 |
|
|
continue;
|
62 |
|
|
case 'X':
|
63 |
|
|
v |= uint64_t(((d-'0') & 0xF) | 0x10) << 32; // X0 - X9
|
64 |
|
|
continue;
|
65 |
|
|
case 'Y':
|
66 |
|
|
v |= uint64_t(((d-'0') & 0xF) | 0x20) << 32; // Y0 - Y9
|
67 |
|
|
continue;
|
68 |
|
|
}
|
69 |
|
|
}
|
70 |
|
|
return v;
|
71 |
|
|
}
|
72 |
|
|
|
73 |
|
|
|
74 |
|
|
void CDisassembler::sortSymbolsAndRelocations() {
|
75 |
|
|
// Sort symbols by address. This is useful when symbol labels are written out
|
76 |
|
|
uint32_t i; // loop counter
|
77 |
|
|
// The values of st_reguse1 and st_reguse2 are no longer needed after these values have been written out.
|
78 |
|
|
// Save old index in st_reguse1.
|
79 |
|
|
// Set st_reguse2 to zero, it is used later for data type
|
80 |
|
|
|
81 |
|
|
for (i = 0; i < symbols.numEntries(); i++) {
|
82 |
|
|
symbols[i].st_reguse1 = i;
|
83 |
|
|
symbols[i].st_reguse2 = 0;
|
84 |
|
|
// symbols are grouped by section in object files, by base pointer in executable files
|
85 |
|
|
if (isExecutable) symbolExeAddress(symbols[i]);
|
86 |
|
|
}
|
87 |
|
|
// Sort symbols by address
|
88 |
|
|
symbols.sort();
|
89 |
|
|
|
90 |
|
|
// Add dummy empty symbol number 0
|
91 |
|
|
ElfFwcSym nulsymbol = {0,0,0,0,0,0,0,0,0};
|
92 |
|
|
symbols.addUnique(nulsymbol);
|
93 |
|
|
|
94 |
|
|
// Update all relocations to the new symbol indexes
|
95 |
|
|
// Translate old to new symbol index in all relocation records
|
96 |
|
|
// Allocate array for translating old to new symbol index
|
97 |
|
|
CDynamicArray<uint32_t> old2newSymbolIndex;
|
98 |
|
|
old2newSymbolIndex.setNum(symbols.numEntries());
|
99 |
|
|
|
100 |
|
|
// Make translation table
|
101 |
|
|
for (i = 0; i < symbols.numEntries(); i++) {
|
102 |
|
|
uint32_t oldindex = symbols[i].st_reguse1;
|
103 |
|
|
if (oldindex < symbols.numEntries()) {
|
104 |
|
|
old2newSymbolIndex[oldindex] = i;
|
105 |
|
|
}
|
106 |
|
|
}
|
107 |
|
|
|
108 |
|
|
// Translate all symbol indices in relocation records
|
109 |
|
|
for (i = 0; i < relocations.numEntries(); i++) {
|
110 |
|
|
if (relocations[i].r_sym < old2newSymbolIndex.numEntries()) {
|
111 |
|
|
relocations[i].r_sym = old2newSymbolIndex[relocations[i].r_sym];
|
112 |
|
|
}
|
113 |
|
|
else relocations[i].r_sym = 0; // index out of range!
|
114 |
|
|
if ((relocations[i].r_type & R_FORW_RELTYPEMASK) == R_FORW_REFP) {
|
115 |
|
|
// relocation record has an additional reference point
|
116 |
|
|
// bit 30 indicates relocation used OK
|
117 |
|
|
uint32_t refsym = relocations[i].r_refsym & ~0x40000000;
|
118 |
|
|
if (refsym < old2newSymbolIndex.numEntries()) {
|
119 |
|
|
relocations[i].r_refsym = old2newSymbolIndex[refsym] | (relocations[i].r_refsym & 0x40000000);
|
120 |
|
|
}
|
121 |
|
|
else relocations[i].r_refsym = 0; // index out of range
|
122 |
|
|
}
|
123 |
|
|
}
|
124 |
|
|
|
125 |
|
|
// Sort relocations by address
|
126 |
|
|
relocations.sort();
|
127 |
|
|
}
|
128 |
|
|
|
129 |
|
|
// Translate symbol address from section:offset to pointerbase:address
|
130 |
|
|
void CDisassembler::symbolExeAddress(ElfFwcSym & sym) {
|
131 |
|
|
// use this translation only when disassembling executable files
|
132 |
|
|
if (!isExecutable) return;
|
133 |
|
|
|
134 |
|
|
// section
|
135 |
|
|
uint32_t sec = sym.st_section;
|
136 |
|
|
if (sec && sec < sectionHeaders.numEntries()) {
|
137 |
|
|
uint32_t flags = (uint32_t)sectionHeaders[sec].sh_flags;
|
138 |
|
|
// get base pointer
|
139 |
|
|
switch (flags & SHF_BASEPOINTER) {
|
140 |
|
|
case SHF_IP:
|
141 |
|
|
sym.st_section = 1; break;
|
142 |
|
|
case SHF_DATAP:
|
143 |
|
|
sym.st_section = 2; break;
|
144 |
|
|
case SHF_THREADP:
|
145 |
|
|
sym.st_section = 3; break;
|
146 |
|
|
default:
|
147 |
|
|
sym.st_section = 0; break;
|
148 |
|
|
}
|
149 |
|
|
sym.st_value += sectionHeaders[sec].sh_addr;
|
150 |
|
|
}
|
151 |
|
|
}
|
152 |
|
|
|
153 |
|
|
|
154 |
|
|
// Join the tables: symbols and newSymbols
|
155 |
|
|
void CDisassembler::joinSymbolTables() {
|
156 |
|
|
/* There are two symbol tables: 'symbols' and 'newSymbols'.
|
157 |
|
|
'symbols' contains the symbols that were in the original file. This table is sorted
|
158 |
|
|
by address in sortSymbolsAndRelocations() in order to make it easy to find a symbol
|
159 |
|
|
at a given address.
|
160 |
|
|
'newSymbols' contains new symbols that were created during pass 1. It is not sorted.
|
161 |
|
|
The reason why we have two symbol tables is that the symbol indexes would change if
|
162 |
|
|
we add to the 'symbols' table during pass 1 and keep it sorted. We need to have
|
163 |
|
|
consistent indexes during pass 1 in order to access symbols by their index. Likewise,
|
164 |
|
|
'newSymbols' is not sorted because indexes would change when new symbols are added to it.
|
165 |
|
|
'newSymbols' may contain dublets because it is not sorted so dublets are not detected
|
166 |
|
|
when new symbols are added.
|
167 |
|
|
'joinSymbolTables()' is called after pass 1 when we are finished making new symbols.
|
168 |
|
|
This function joins the two tables together, removes any dublets, updates symbol indexes
|
169 |
|
|
in all relocation records, and tranfers data type information from relocation records
|
170 |
|
|
to symbol records.
|
171 |
|
|
*/
|
172 |
|
|
uint32_t r; // Relocation index
|
173 |
|
|
uint32_t s; // Symbol index
|
174 |
|
|
uint32_t newsymi; // Symbol index in newSymbols
|
175 |
|
|
uint32_t newsymi2; // Index of new symbol after transfer to symbols table
|
176 |
|
|
uint32_t symTempIndex = symbols.numEntries(); // Temporary index of symbol after transfer
|
177 |
|
|
|
178 |
|
|
// Remember index of each symbol before adding new symbols and reordering
|
179 |
|
|
for (s = 0; s < symbols.numEntries(); s++) {
|
180 |
|
|
symbols[s].st_reguse1 = s;
|
181 |
|
|
}
|
182 |
|
|
|
183 |
|
|
// Loop through relocations to find references to new symbols
|
184 |
|
|
for (r = 0; r < relocations.numEntries(); r++) {
|
185 |
|
|
if (relocations[r].r_sym & 0x80000000) { // Refers to newSymbols table
|
186 |
|
|
newsymi = relocations[r].r_sym & ~0x80000000;
|
187 |
|
|
if (newsymi < newSymbols.numEntries()) {
|
188 |
|
|
// Put symbol into old table if no equivalent symbol exists here
|
189 |
|
|
newsymi2 = symbols.addUnique(newSymbols[newsymi]);
|
190 |
|
|
// Give it a temporary index if it doesn't have one
|
191 |
|
|
if (symbols[newsymi2].st_reguse1 == 0) symbols[newsymi2].st_reguse1 = symTempIndex++;
|
192 |
|
|
// update reference in relocation record to temporary index
|
193 |
|
|
relocations[r].r_sym = symbols[newsymi2].st_reguse1;
|
194 |
|
|
}
|
195 |
|
|
}
|
196 |
|
|
// Do the same with any reference point
|
197 |
|
|
if ((relocations[r].r_type & R_FORW_RELTYPEMASK) == R_FORW_REFP && relocations[r].r_refsym & 0x80000000) {
|
198 |
|
|
newsymi = relocations[r].r_refsym & ~0xC0000000;
|
199 |
|
|
if (newsymi < newSymbols.numEntries()) {
|
200 |
|
|
// Put symbol into old table if no equivalent symbol exists here
|
201 |
|
|
newsymi2 = symbols.addUnique(newSymbols[newsymi]);
|
202 |
|
|
// Give it a temporary index if it doesn't have one
|
203 |
|
|
if (symbols[newsymi2].st_reguse1 == 0) symbols[newsymi2].st_reguse1 = symTempIndex++;
|
204 |
|
|
// update reference in relocation record to temporary index
|
205 |
|
|
relocations[r].r_refsym = symbols[newsymi2].st_reguse1 | (relocations[r].r_refsym & 0x40000000);
|
206 |
|
|
}
|
207 |
|
|
}
|
208 |
|
|
}
|
209 |
|
|
// Make symbol index translation table
|
210 |
|
|
CDynamicArray<uint32_t> old2newSymbolIndex;
|
211 |
|
|
old2newSymbolIndex.setNum(symbols.numEntries());
|
212 |
|
|
for (s = 0; s < symbols.numEntries(); s++) {
|
213 |
|
|
uint32_t oldsymi = symbols[s].st_reguse1;
|
214 |
|
|
if (oldsymi < old2newSymbolIndex.numEntries()) {
|
215 |
|
|
old2newSymbolIndex[oldsymi] = s;
|
216 |
|
|
}
|
217 |
|
|
}
|
218 |
|
|
// Update indexes in relocation records
|
219 |
|
|
for (r = 0; r < relocations.numEntries(); r++) {
|
220 |
|
|
if (relocations[r].r_sym < old2newSymbolIndex.numEntries()) { // Refers to newSymbols table
|
221 |
|
|
relocations[r].r_sym = old2newSymbolIndex[relocations[r].r_sym];
|
222 |
|
|
// Give the symbol a data type from relocation record if it doesn't have one
|
223 |
|
|
if (symbols[relocations[r].r_sym].st_reguse2 == 0) {
|
224 |
|
|
symbols[relocations[r].r_sym].st_reguse2 = relocations[r].r_type >> 8;
|
225 |
|
|
}
|
226 |
|
|
}
|
227 |
|
|
// Do the same with any reference point
|
228 |
|
|
uint32_t refsym = relocations[r].r_refsym & ~0xC0000000;
|
229 |
|
|
if ((relocations[r].r_type & R_FORW_RELTYPEMASK) == R_FORW_REFP && refsym < old2newSymbolIndex.numEntries()) {
|
230 |
|
|
relocations[r].r_refsym = old2newSymbolIndex[refsym] | (relocations[r].r_refsym & 0x40000000);
|
231 |
|
|
}
|
232 |
|
|
}
|
233 |
|
|
}
|
234 |
|
|
|
235 |
|
|
|
236 |
|
|
void CDisassembler::assignSymbolNames() {
|
237 |
|
|
// Assign names to symbols that do not have a name
|
238 |
|
|
uint32_t i; // New symbol index
|
239 |
|
|
uint32_t numDigits; // Number of digits in new symbol names
|
240 |
|
|
char name[64]; // sectionBuffer for making symbol name
|
241 |
|
|
static char format[64];
|
242 |
|
|
uint32_t unnamedNum = 0; // Number of unnamed symbols
|
243 |
|
|
//uint32_t addMoreSymbols = 0; // More symbols need to be added
|
244 |
|
|
|
245 |
|
|
// Find necessary number of digits
|
246 |
|
|
numDigits = 3; i = symbols.numEntries();
|
247 |
|
|
while (i >= 1000) {
|
248 |
|
|
i /= 10;
|
249 |
|
|
numDigits++;
|
250 |
|
|
}
|
251 |
|
|
|
252 |
|
|
// format string for symbol names
|
253 |
|
|
sprintf(format, "%s%c0%i%c", "@_", '%', numDigits, 'i');
|
254 |
|
|
|
255 |
|
|
// Loop through symbols
|
256 |
|
|
for (i = 1; i < symbols.numEntries(); i++) {
|
257 |
|
|
if (symbols[i].st_name == 0 ) {
|
258 |
|
|
// Symbol has no name. Make one
|
259 |
|
|
sprintf(name, format, ++unnamedNum);
|
260 |
|
|
// Store new name
|
261 |
|
|
symbols[i].st_name = stringBuffer.pushString(name);
|
262 |
|
|
}
|
263 |
|
|
}
|
264 |
|
|
|
265 |
|
|
#if 0 //!!
|
266 |
|
|
// For debugging: list all symbols
|
267 |
|
|
printf("\n\nSymbols:");
|
268 |
|
|
for (i = 0; i < symbols.numEntries(); i++) {
|
269 |
|
|
printf("\n%3X %3X %s sect %i offset %X type %X size %i Scope %i",
|
270 |
|
|
i, symbols[i].st_name, stringBuffer.buf() + symbols[i].st_name,
|
271 |
|
|
symbols[i].st_section, (uint32_t)symbols[i].st_value, symbols[i].st_type,
|
272 |
|
|
(uint32_t)symbols[i].st_unitsize, symbols[i].st_other);
|
273 |
|
|
if (symbols[i].st_reguse2) printf(" Type %X", symbols[i].st_reguse2);
|
274 |
|
|
}
|
275 |
|
|
#endif
|
276 |
|
|
#if 0
|
277 |
|
|
// For debugging: list all relocations
|
278 |
|
|
printf("\n\nRelocations:");
|
279 |
|
|
for (uint32_t i = 0; i < relocations.numEntries(); i++) {
|
280 |
|
|
printf("\nsect %i, os %X, type %X, sym %i, add %X, refsym %X",
|
281 |
|
|
(uint32_t)(relocations[i].r_section), (uint32_t)relocations[i].r_offset, relocations[i].r_type,
|
282 |
|
|
relocations[i].r_sym, relocations[i].r_addend, relocations[i].r_refsym);
|
283 |
|
|
}
|
284 |
|
|
#endif
|
285 |
|
|
}
|
286 |
|
|
|
287 |
|
|
|
288 |
|
|
|
289 |
|
|
/************************** class CDisassembler *****************************
|
290 |
|
|
Members of class CDisassembler
|
291 |
|
|
Members that relate to file output are in disasm2.cpp
|
292 |
|
|
******************************************************************************/
|
293 |
|
|
|
294 |
|
|
CDisassembler::CDisassembler() {
|
295 |
|
|
// Constructor. Initialize variables
|
296 |
|
|
pass = 0;
|
297 |
|
|
nextSymbol = 0;
|
298 |
|
|
currentFunction = 0;
|
299 |
|
|
currentFunctionEnd = 0;
|
300 |
|
|
debugMode = 0;
|
301 |
|
|
outputFile = cmd.outputFile;
|
302 |
|
|
checkFormatListIntegrity();
|
303 |
|
|
};
|
304 |
|
|
|
305 |
|
|
void CDisassembler::initializeInstructionList() {
|
306 |
|
|
// Read and initialize instruction list and sort it by category, format, and op1
|
307 |
|
|
CCSVFile instructionListFile;
|
308 |
|
|
instructionListFile.read(cmd.getFilename(cmd.instructionListFile), CMDL_FILE_SEARCH_PATH); // Filename of list of instructions
|
309 |
|
|
instructionListFile.parse(); // Read and interpret instruction list file
|
310 |
|
|
instructionlist << instructionListFile.instructionlist; // Transfer instruction list to my own container
|
311 |
|
|
instructionlist.sort(); // Sort list, using sort order defined by SInstruction2
|
312 |
|
|
}
|
313 |
|
|
|
314 |
|
|
// Read instruction list, split ELF file into components
|
315 |
|
|
void CDisassembler::getComponents1() {
|
316 |
|
|
// Check code integrity
|
317 |
|
|
checkFormatListIntegrity();
|
318 |
|
|
|
319 |
|
|
// Read instruction list
|
320 |
|
|
initializeInstructionList();
|
321 |
|
|
|
322 |
|
|
// Split ELF file into containers
|
323 |
|
|
split();
|
324 |
|
|
}
|
325 |
|
|
|
326 |
|
|
// Read instruction list, get ELF components for assembler output listing
|
327 |
|
|
void CDisassembler::getComponents2(CELF const & assembler, CMemoryBuffer const & instructList) {
|
328 |
|
|
// This function replaces getComponents1() when making an output listing for the assembler
|
329 |
|
|
// list file name from command line
|
330 |
|
|
|
331 |
|
|
// copy containers from assembler outFile
|
332 |
|
|
sectionHeaders.copy(assembler.getSectionHeaders());
|
333 |
|
|
symbols.copy(assembler.getSymbols());
|
334 |
|
|
relocations.copy(assembler.getRelocations());
|
335 |
|
|
stringBuffer.copy(assembler.getStringBuffer());
|
336 |
|
|
dataBuffer.copy(assembler.getDataBuffer());
|
337 |
|
|
// Copy instruction list from assembler to avoid reading the csv file again.
|
338 |
|
|
// Use the unsorted list to make sure the preferred name for an instuction comes first, in case there are alias names
|
339 |
|
|
instructionlist.copy(instructList);
|
340 |
|
|
instructionlist.sort(); // Sort list, using the sort order needed by the disassembler as defined by SInstruction2
|
341 |
|
|
}
|
342 |
|
|
|
343 |
|
|
|
344 |
|
|
// Do the disassembly
|
345 |
|
|
void CDisassembler::go() {
|
346 |
|
|
// set tabulator stops
|
347 |
|
|
setTabStops();
|
348 |
|
|
|
349 |
|
|
// write feedback to console
|
350 |
|
|
feedBackText1();
|
351 |
|
|
|
352 |
|
|
// is this an executable or object file
|
353 |
|
|
isExecutable = fileHeader.e_type == ET_EXEC;
|
354 |
|
|
|
355 |
|
|
// Begin writing output file
|
356 |
|
|
writeFileBegin();
|
357 |
|
|
|
358 |
|
|
// Sort symbols by address
|
359 |
|
|
sortSymbolsAndRelocations();
|
360 |
|
|
|
361 |
|
|
// pass 1: Find symbols types and unnamed symbols
|
362 |
|
|
pass = 1;
|
363 |
|
|
pass1();
|
364 |
|
|
|
365 |
|
|
if (pass & 0x10) {
|
366 |
|
|
// Repetition of pass 1 requested
|
367 |
|
|
pass = 2;
|
368 |
|
|
pass1();
|
369 |
|
|
}
|
370 |
|
|
|
371 |
|
|
// Join the tables: symbols and newSymbols;
|
372 |
|
|
joinSymbolTables();
|
373 |
|
|
|
374 |
|
|
// put names on unnamed symbols
|
375 |
|
|
assignSymbolNames();
|
376 |
|
|
|
377 |
|
|
// pass 2: Write all sections to output file
|
378 |
|
|
pass = 0x100;
|
379 |
|
|
pass2();
|
380 |
|
|
|
381 |
|
|
// Check for illegal entries in symbol table and relocations table
|
382 |
|
|
finalErrorCheck();
|
383 |
|
|
|
384 |
|
|
// Finish writing output file
|
385 |
|
|
writeFileEnd();
|
386 |
|
|
|
387 |
|
|
// write output file
|
388 |
|
|
if (outputFile && !debugMode) outFile.write(cmd.getFilename(outputFile));
|
389 |
|
|
}
|
390 |
|
|
|
391 |
|
|
// write feedback text on stdout
|
392 |
|
|
void CDisassembler::feedBackText1() {
|
393 |
|
|
if (cmd.verbose && cmd.job == CMDL_JOB_DIS) {
|
394 |
|
|
// Tell what we are doing:
|
395 |
|
|
printf("\nDisassembling %s to %s", cmd.getFilename(cmd.inputFile), cmd.getFilename(outputFile));
|
396 |
|
|
}
|
397 |
|
|
}
|
398 |
|
|
|
399 |
|
|
|
400 |
|
|
void CDisassembler::pass1() {
|
401 |
|
|
|
402 |
|
|
/* pass 1: does the following jobs:
|
403 |
|
|
--------------------------------
|
404 |
|
|
|
405 |
|
|
* Scans all code sections, instruction by instruction.
|
406 |
|
|
|
407 |
|
|
* Follows all references to data in order to determine data type for
|
408 |
|
|
each data symbol.
|
409 |
|
|
|
410 |
|
|
* Assigns symbol table entries for all jump and call targets that do not
|
411 |
|
|
allready have a name.
|
412 |
|
|
|
413 |
|
|
* Identifies and analyzes tables of jump addresses and call addresses,
|
414 |
|
|
e.g. switch/case tables and virtual function tables. (to do !)
|
415 |
|
|
|
416 |
|
|
* Tries to identify any data in the code section.
|
417 |
|
|
|
418 |
|
|
*/
|
419 |
|
|
//uint32_t sectionType;
|
420 |
|
|
|
421 |
|
|
// Loop through sections, pass 1
|
422 |
|
|
for (section = 1; section < sectionHeaders.numEntries(); section++) {
|
423 |
|
|
|
424 |
|
|
// Get section type
|
425 |
|
|
//sectionType = sectionHeaders[section].sh_type;
|
426 |
|
|
codeMode = (sectionHeaders[section].sh_flags & SHF_EXEC) ? 1 : 4;
|
427 |
|
|
|
428 |
|
|
sectionBuffer = dataBuffer.buf() + sectionHeaders[section].sh_offset;
|
429 |
|
|
sectionEnd = (uint32_t)sectionHeaders[section].sh_size;
|
430 |
|
|
|
431 |
|
|
if (codeMode < 4) {
|
432 |
|
|
// This is a code section
|
433 |
|
|
|
434 |
|
|
sectionAddress = sectionHeaders[section].sh_addr;
|
435 |
|
|
if (sectionEnd == 0) continue;
|
436 |
|
|
|
437 |
|
|
iInstr = 0;
|
438 |
|
|
|
439 |
|
|
// Loop through instructions
|
440 |
|
|
while (iInstr < sectionEnd) {
|
441 |
|
|
|
442 |
|
|
// Check if code not dubious
|
443 |
|
|
if (codeMode == 1) {
|
444 |
|
|
|
445 |
|
|
parseInstruction(); // Parse instruction
|
446 |
|
|
|
447 |
|
|
updateSymbols(); // Detect symbol types for operands of this instruction
|
448 |
|
|
|
449 |
|
|
updateTracer(); // Trace register values
|
450 |
|
|
|
451 |
|
|
iInstr += instrLength * 4; // Next instruction
|
452 |
|
|
}
|
453 |
|
|
else {
|
454 |
|
|
// iEnd = labelEnd;
|
455 |
|
|
}
|
456 |
|
|
}
|
457 |
|
|
}
|
458 |
|
|
}
|
459 |
|
|
}
|
460 |
|
|
|
461 |
|
|
|
462 |
|
|
void CDisassembler::pass2() {
|
463 |
|
|
|
464 |
|
|
/* pass 2: does the following jobs:
|
465 |
|
|
--------------------------------
|
466 |
|
|
|
467 |
|
|
* Scans through all sections, code and data.
|
468 |
|
|
|
469 |
|
|
* Outputs warnings for suboptimal instruction codes and error messages
|
470 |
|
|
for erroneous code and erroneous relocations.
|
471 |
|
|
|
472 |
|
|
* Outputs disassembly of all instructions, operands and relocations,
|
473 |
|
|
followed by the binary code listing as comment.
|
474 |
|
|
|
475 |
|
|
* Outputs disassembly of all data, followed by alternative representations
|
476 |
|
|
as comment.
|
477 |
|
|
*/
|
478 |
|
|
|
479 |
|
|
//uint32_t sectionType;
|
480 |
|
|
|
481 |
|
|
// Loop through sections, pass 2
|
482 |
|
|
for (section = 1; section < sectionHeaders.numEntries(); section++) {
|
483 |
|
|
|
484 |
|
|
// Get section type
|
485 |
|
|
//sectionType = sectionHeaders[section].sh_type;
|
486 |
|
|
codeMode = (sectionHeaders[section].sh_flags & SHF_EXEC) ? 1 : 4;
|
487 |
|
|
|
488 |
|
|
// Initialize code parser
|
489 |
|
|
sectionBuffer = dataBuffer.buf() + sectionHeaders[section].sh_offset;
|
490 |
|
|
sectionEnd = (uint32_t)sectionHeaders[section].sh_size;
|
491 |
|
|
sectionAddress = sectionHeaders[section].sh_addr;
|
492 |
|
|
|
493 |
|
|
writeSectionBegin(); // Write segment directive
|
494 |
|
|
|
495 |
|
|
if (codeMode < 4) {
|
496 |
|
|
// This is a code section
|
497 |
|
|
if (sectionEnd == 0) continue;
|
498 |
|
|
iInstr = 0;
|
499 |
|
|
|
500 |
|
|
// Loop through instructions
|
501 |
|
|
while (iInstr < sectionEnd) {
|
502 |
|
|
|
503 |
|
|
if (debugMode) {
|
504 |
|
|
// save cross reference
|
505 |
|
|
SLineRef xref = { iInstr + sectionAddress, 1, outFile.dataSize() };
|
506 |
|
|
lineList.push(xref);
|
507 |
|
|
writeAddress();
|
508 |
|
|
}
|
509 |
|
|
writeLabels(); // Find any label here
|
510 |
|
|
|
511 |
|
|
// Check if code not dubious
|
512 |
|
|
if (codeMode == 1) {
|
513 |
|
|
|
514 |
|
|
parseInstruction(); // Parse instruction
|
515 |
|
|
|
516 |
|
|
writeInstruction(); // Write instruction
|
517 |
|
|
|
518 |
|
|
iInstr += instrLength * 4; // Next instruction
|
519 |
|
|
|
520 |
|
|
}
|
521 |
|
|
else {
|
522 |
|
|
// This is data Skip to next label
|
523 |
|
|
}
|
524 |
|
|
}
|
525 |
|
|
writeSectionEnd(); // Write segment directive
|
526 |
|
|
}
|
527 |
|
|
else {
|
528 |
|
|
// This is a data section
|
529 |
|
|
pInstr = 0; iRecord = 0; fInstr = 0; // Set invalid pointers to zero
|
530 |
|
|
operandType = 2; // Default data type is int32
|
531 |
|
|
instrLength = 4; // Default data size is 4 bytes
|
532 |
|
|
iInstr = 0; // Instruction position
|
533 |
|
|
nextSymbol = 0;
|
534 |
|
|
|
535 |
|
|
writeDataItems(); // Loop through data. Write data
|
536 |
|
|
|
537 |
|
|
writeSectionEnd(); // Write segment directive
|
538 |
|
|
}
|
539 |
|
|
}
|
540 |
|
|
}
|
541 |
|
|
|
542 |
|
|
|
543 |
|
|
|
544 |
|
|
/******************** Explanation of tracer: ***************************
|
545 |
|
|
|
546 |
|
|
This is a machine which can trace the contents of each register in certain
|
547 |
|
|
situations. It is currently used for recognizing pointers to jump tables
|
548 |
|
|
in order to identify jump tables (to do!)
|
549 |
|
|
*/
|
550 |
|
|
void CDisassembler::updateTracer() {
|
551 |
|
|
// Trace register values. See explanation above
|
552 |
|
|
}
|
553 |
|
|
|
554 |
|
|
|
555 |
|
|
void CDisassembler::updateSymbols() {
|
556 |
|
|
// Find unnamed symbols, determine symbol types,
|
557 |
|
|
// update symbol list, call checkJumpTarget if jump/call.
|
558 |
|
|
// This function is called during pass 1 for every instruction
|
559 |
|
|
uint32_t relSource = 0; // Position of relocated field
|
560 |
|
|
|
561 |
|
|
if (fInstr->category == 4 && fInstr->jumpSize) {
|
562 |
|
|
// Self-relative jump instruction. Check OPJ
|
563 |
|
|
// uint32_t opj = (instrLength == 1) ? pInstr->a.op1 : pInstr->b[0]; // Jump instruction opcode
|
564 |
|
|
// Check if there is a relocation here
|
565 |
|
|
relSource = iInstr + (fInstr->jumpPos); // Position of relocated field
|
566 |
|
|
ElfFwcReloc rel;
|
567 |
|
|
rel.r_offset = relSource;
|
568 |
|
|
rel.r_section = section;
|
569 |
|
|
rel.r_addend = 0;
|
570 |
|
|
if (relocations.findFirst(rel) < 0) {
|
571 |
|
|
// There is no relocation. Target must be in the same section. Find target
|
572 |
|
|
int32_t offset = 0;
|
573 |
|
|
switch (fInstr->jumpSize) { // Read offset of correct size
|
574 |
|
|
case 1: // 8 bit
|
575 |
|
|
offset = *(int8_t*)(sectionBuffer + relSource);
|
576 |
|
|
rel.r_type = R_FORW_8 | 0x80000000; // add 0x80000000 to remember that this is not a real relocation
|
577 |
|
|
break;
|
578 |
|
|
case 2: // 16 bit
|
579 |
|
|
offset = *(int16_t*)(sectionBuffer + relSource);
|
580 |
|
|
rel.r_type = R_FORW_16 | 0x80000000;
|
581 |
|
|
break;
|
582 |
|
|
case 3: // 24 bit. Sign extend to 32 bits
|
583 |
|
|
offset = *(int32_t*)(sectionBuffer + relSource) << 8 >> 8;
|
584 |
|
|
rel.r_type = R_FORW_24 | 0x80000000;
|
585 |
|
|
break;
|
586 |
|
|
case 4: // 32 bit
|
587 |
|
|
offset = *(int32_t*)(sectionBuffer + relSource);
|
588 |
|
|
rel.r_type = R_FORW_32 | 0x80000000;
|
589 |
|
|
break;
|
590 |
|
|
}
|
591 |
|
|
// Scale offset by 4 and add offset to end of instruction
|
592 |
|
|
int32_t target = iInstr + instrLength * 4 + offset * 4;
|
593 |
|
|
|
594 |
|
|
// Add a symbol at target address if none exists
|
595 |
|
|
ElfFwcSym sym;
|
596 |
|
|
zeroAllMembers(sym);
|
597 |
|
|
sym.st_bind = STB_LOCAL;
|
598 |
|
|
sym.st_other = STV_EXEC;
|
599 |
|
|
sym.st_section = section;
|
600 |
|
|
sym.st_value = (uint64_t)(int64_t)target;
|
601 |
|
|
symbolExeAddress(sym);
|
602 |
|
|
int32_t symi = symbols.findFirst(sym);
|
603 |
|
|
if (symi < 0) {
|
604 |
|
|
symi = newSymbols.push(sym); // Add symbol to new symbols table
|
605 |
|
|
symi |= 0x80000000; // Upper bit means index refers to newSymbols
|
606 |
|
|
}
|
607 |
|
|
// Add a dummy relocation record for this symbol.
|
608 |
|
|
// This relocation does not need type, scale, or addend because the only purpose is to identify the symbol.
|
609 |
|
|
// It does have a size, though, because this is checked later in writeRelocationTarget()
|
610 |
|
|
rel.r_sym = (uint32_t)symi;
|
611 |
|
|
relocations.addUnique(rel);
|
612 |
|
|
}
|
613 |
|
|
}
|
614 |
|
|
|
615 |
|
|
// Check if instruction has a memory reference relative to IP, DATAP, or THREADP
|
616 |
|
|
uint32_t basePointer = 0;
|
617 |
|
|
if (fInstr->mem & 2) basePointer = pInstr->a.rs;
|
618 |
|
|
relSource = iInstr + fInstr->addrPos; // Position of relocated field
|
619 |
|
|
|
620 |
|
|
if (fInstr->addrSize > 1 && basePointer >= 28 && basePointer <= 30 && !(fInstr->mem & 0x20)) {
|
621 |
|
|
// Memory operand is relative to THREADP, DATAP or IP
|
622 |
|
|
// Check if there is a relocation here
|
623 |
|
|
uint32_t relpos = iInstr + fInstr->addrPos;
|
624 |
|
|
ElfFwcReloc rel;
|
625 |
|
|
rel.r_offset = relpos;
|
626 |
|
|
rel.r_section = section;
|
627 |
|
|
rel.r_type = (operandType | 0x80) << 24;
|
628 |
|
|
uint32_t nrel, irel = 0;
|
629 |
|
|
nrel = relocations.findAll(&irel, rel);
|
630 |
|
|
if (nrel > 1) writeWarning("Overlapping relocations here");
|
631 |
|
|
if (nrel) {
|
632 |
|
|
// Relocation found. Put the data type into the relocation record.
|
633 |
|
|
// The data type will later be transferred to the symbol record in joinSymbolTables()
|
634 |
|
|
if (!(relocations[irel].r_type & 0x80000000)) {
|
635 |
|
|
// Save target data type in upper 8 bits of r_type
|
636 |
|
|
relocations[irel].r_type = (relocations[irel].r_type & 0x00FFFFFF) | (operandType /*| 0x80*/) << 24;
|
637 |
|
|
}
|
638 |
|
|
// Check if the target is a section + offset
|
639 |
|
|
uint32_t symi = relocations[irel].r_sym;
|
640 |
|
|
if (symi < symbols.numEntries() && symbols[symi].st_type == STT_SECTION && relocations[irel].r_addend > 0) {
|
641 |
|
|
// Add a new symbol at this address
|
642 |
|
|
ElfFwcSym sym;
|
643 |
|
|
zeroAllMembers(sym);
|
644 |
|
|
sym.st_bind = STB_LOCAL;
|
645 |
|
|
sym.st_other = STT_OBJECT;
|
646 |
|
|
sym.st_section = symbols[symi].st_section;
|
647 |
|
|
sym.st_value = symbols[symi].st_value + (int64_t)relocations[irel].r_addend;
|
648 |
|
|
symbolExeAddress(sym);
|
649 |
|
|
uint32_t symi2 = newSymbols.push(sym);
|
650 |
|
|
relocations[irel].r_sym = symi2 | 0x80000000; // Upper bit means index refers to newSymbols
|
651 |
|
|
relocations[irel].r_addend = 0;
|
652 |
|
|
}
|
653 |
|
|
}
|
654 |
|
|
else if (basePointer == REG_IP >> 16 && fInstr->addrSize > 1 && !(fInstr->mem & 0x20)) {
|
655 |
|
|
// No relocation found. Insert new relocation and new symbol
|
656 |
|
|
// This fits the address instruction with a local IP target.
|
657 |
|
|
// to do: Make it work for other cases
|
658 |
|
|
|
659 |
|
|
// Add a symbol at target address if none exists
|
660 |
|
|
int32_t target = iInstr + instrLength * 4;
|
661 |
|
|
switch (fInstr->addrSize) { // Read offset of correct size
|
662 |
|
|
/* case 1: // 8 bit. cannot use IP
|
663 |
|
|
target += *(int8_t*)(sectionBuffer + relSource) << (operandType & 7);
|
664 |
|
|
rel.r_type = R_FORW_8 | R_FORW_SELFREL | 0x80000000;
|
665 |
|
|
break;*/
|
666 |
|
|
case 2: // 16 bit
|
667 |
|
|
target += *(int16_t*)(sectionBuffer + relSource);
|
668 |
|
|
rel.r_type = R_FORW_16 | R_FORW_SELFREL | 0x80000000;
|
669 |
|
|
break;
|
670 |
|
|
case 4: // 32 bit
|
671 |
|
|
target += *(int32_t*)(sectionBuffer + relSource);
|
672 |
|
|
rel.r_type = R_FORW_32 | R_FORW_SELFREL | 0x80000000;
|
673 |
|
|
break;
|
674 |
|
|
}
|
675 |
|
|
ElfFwcSym sym;
|
676 |
|
|
zeroAllMembers(sym);
|
677 |
|
|
sym.st_bind = STB_LOCAL;
|
678 |
|
|
sym.st_other = STV_EXEC;
|
679 |
|
|
sym.st_section = section;
|
680 |
|
|
sym.st_value = (uint64_t)(int64_t)target;
|
681 |
|
|
|
682 |
|
|
symbolExeAddress(sym);
|
683 |
|
|
int32_t symi = symbols.findFirst(sym);
|
684 |
|
|
if (symi < 0) {
|
685 |
|
|
symi = newSymbols.push(sym); // Add symbol to new symbols table
|
686 |
|
|
symi |= 0x80000000; // Upper bit means index refers to newSymbols
|
687 |
|
|
}
|
688 |
|
|
// Add a dummy relocation record for this symbol.
|
689 |
|
|
// This relocation does not need type, scale, or addend because the only purpose is to identify the symbol.
|
690 |
|
|
// It does have a size, though, because this is checked later in writeRelocationTarget()
|
691 |
|
|
rel.r_offset = (uint64_t)iInstr + fInstr->addrPos; // Position of relocated field
|
692 |
|
|
rel.r_section = section;
|
693 |
|
|
rel.r_addend = -4;
|
694 |
|
|
rel.r_sym = (uint32_t)symi;
|
695 |
|
|
relocations.addUnique(rel);
|
696 |
|
|
}
|
697 |
|
|
else if ((basePointer == REG_DATAP >> 16 || basePointer == REG_THREADP >> 16)
|
698 |
|
|
&& fInstr->addrSize > 1 && !(fInstr->mem & 0x20) && isExecutable) {
|
699 |
|
|
// No relocation found. Insert new relocation and new symbol. datap or threadp based
|
700 |
|
|
|
701 |
|
|
// Add a symbol at target address if none exists
|
702 |
|
|
int64_t target = fileHeader.e_datap_base;
|
703 |
|
|
rel.r_type = R_FORW_DATAP;
|
704 |
|
|
uint32_t dom = 2;
|
705 |
|
|
uint32_t st_other = STV_DATAP;
|
706 |
|
|
if (basePointer == REG_THREADP >> 16) {
|
707 |
|
|
target = fileHeader.e_threadp_base;
|
708 |
|
|
rel.r_type = R_FORW_THREADP;
|
709 |
|
|
dom = 3;
|
710 |
|
|
st_other = STV_THREADP;
|
711 |
|
|
}
|
712 |
|
|
switch (fInstr->addrSize) { // Read offset of correct size
|
713 |
|
|
case 1: // 8 bit
|
714 |
|
|
target += *(int8_t*)(sectionBuffer + relSource);
|
715 |
|
|
rel.r_type |= R_FORW_8 | 0x80000000;
|
716 |
|
|
break;
|
717 |
|
|
case 2: // 16 bit
|
718 |
|
|
target += *(int16_t*)(sectionBuffer + relSource);
|
719 |
|
|
rel.r_type |= R_FORW_16 | 0x80000000;
|
720 |
|
|
break;
|
721 |
|
|
case 4: // 32 bit
|
722 |
|
|
target += *(int32_t*)(sectionBuffer + relSource);
|
723 |
|
|
rel.r_type |= R_FORW_32 | 0x80000000;
|
724 |
|
|
break;
|
725 |
|
|
}
|
726 |
|
|
ElfFwcSym sym;
|
727 |
|
|
zeroAllMembers(sym);
|
728 |
|
|
sym.st_type = STT_OBJECT;
|
729 |
|
|
sym.st_bind = STB_WEAK;
|
730 |
|
|
sym.st_other = st_other;
|
731 |
|
|
sym.st_section = dom;
|
732 |
|
|
sym.st_value = (uint64_t)target;
|
733 |
|
|
|
734 |
|
|
int32_t symi = symbols.findFirst(sym);
|
735 |
|
|
if (symi < 0) {
|
736 |
|
|
symi = newSymbols.push(sym); // Add symbol to new symbols table
|
737 |
|
|
symi |= 0x80000000; // Upper bit means index refers to newSymbols
|
738 |
|
|
}
|
739 |
|
|
// Add a dummy relocation record for this symbol.
|
740 |
|
|
// This relocation does not need type, scale, or addend because the only purpose is to identify the symbol.
|
741 |
|
|
// It does have a size, though, because this is checked later in writeRelocationTarget()
|
742 |
|
|
rel.r_offset = iInstr + fInstr->addrPos; // Position of relocated field
|
743 |
|
|
rel.r_section = section;
|
744 |
|
|
rel.r_addend = 0;
|
745 |
|
|
rel.r_sym = (uint32_t)symi;
|
746 |
|
|
relocations.addUnique(rel);
|
747 |
|
|
}
|
748 |
|
|
}
|
749 |
|
|
}
|
750 |
|
|
|
751 |
|
|
|
752 |
|
|
void CDisassembler::followJumpTable(uint32_t symi, uint32_t RelType) {
|
753 |
|
|
// Check jump/call table and its targets
|
754 |
|
|
// to do !
|
755 |
|
|
}
|
756 |
|
|
|
757 |
|
|
|
758 |
|
|
void CDisassembler::markCodeAsDubious() {
|
759 |
|
|
// Remember that this may be data in a code segment
|
760 |
|
|
}
|
761 |
|
|
|
762 |
|
|
|
763 |
|
|
// List of instructionlengths, used in parseInstruction
|
764 |
|
|
static const uint8_t lengthList[8] = {1,1,1,1,2,2,3,4};
|
765 |
|
|
|
766 |
|
|
|
767 |
|
|
void CDisassembler::parseInstruction() {
|
768 |
|
|
// Parse one opcode at position iInstr
|
769 |
|
|
instructionWarning = 0;
|
770 |
|
|
|
771 |
|
|
// Get instruction
|
772 |
|
|
pInstr = (STemplate*)(sectionBuffer + iInstr);
|
773 |
|
|
|
774 |
|
|
// Get op1
|
775 |
|
|
uint8_t op = pInstr->a.op1;
|
776 |
|
|
|
777 |
|
|
// Get format
|
778 |
|
|
format = (pInstr->a.il << 8) + (pInstr->a.mode << 4); // Construct format = (il,mode,submode)
|
779 |
|
|
|
780 |
|
|
// Get submode
|
781 |
|
|
switch (format) {
|
782 |
|
|
case 0x200: case 0x220: case 0x300: case 0x320: // submode in mode2
|
783 |
|
|
format += pInstr->a.mode2;
|
784 |
|
|
break;
|
785 |
|
|
case 0x250: case 0x310: // Submode for jump instructions etc.
|
786 |
|
|
if (op < 8) {
|
787 |
|
|
format += op; op = pInstr->b[0] & 0x3F;
|
788 |
|
|
}
|
789 |
|
|
else {
|
790 |
|
|
format += 8;
|
791 |
|
|
}
|
792 |
|
|
break;
|
793 |
|
|
}
|
794 |
|
|
|
795 |
|
|
// Look up format details
|
796 |
|
|
static SFormat form;
|
797 |
|
|
fInstr = &formatList[lookupFormat(pInstr->q)]; // lookupFormat is in emulator2.cpp
|
798 |
|
|
format = fInstr->format2; // Include subformat depending on op1
|
799 |
|
|
if (fInstr->tmplate == 0xE && pInstr->a.op2 && !(fInstr->imm2 & 0x100)) {
|
800 |
|
|
// Single format instruction if op2 != 0 and op2 not used as immediate operand
|
801 |
|
|
form = *fInstr;
|
802 |
|
|
form.category = 1;
|
803 |
|
|
fInstr = &form;
|
804 |
|
|
}
|
805 |
|
|
|
806 |
|
|
// Get operand type
|
807 |
|
|
if (fInstr->ot == 0) { // Operand type determined by OT field
|
808 |
|
|
operandType = pInstr->a.ot; // Operand type
|
809 |
|
|
if (!(pInstr->a.mode & 6) && !(fInstr->vect & 0x11)) {
|
810 |
|
|
// Check use of M bit
|
811 |
|
|
format |= (operandType & 4) << 5; // Add M bit to format
|
812 |
|
|
operandType &= ~4; // Remove M bit from operand type
|
813 |
|
|
}
|
814 |
|
|
}
|
815 |
|
|
else if ((fInstr->ot & 0xF0) == 0x10) { // Operand type fixed. Value in formatList
|
816 |
|
|
operandType = fInstr->ot & 7;
|
817 |
|
|
}
|
818 |
|
|
else if (fInstr->ot == 0x32) { // int32 for even op1, int64 for odd op1
|
819 |
|
|
operandType = 2 + (pInstr->a.op1 & 1);
|
820 |
|
|
}
|
821 |
|
|
else if (fInstr->ot == 0x35) { // Float for even op1, double for odd op1
|
822 |
|
|
operandType = 5 + (pInstr->a.op1 & 1);
|
823 |
|
|
}
|
824 |
|
|
else {
|
825 |
|
|
operandType = 0; // Error in formatList. Should not occur
|
826 |
|
|
}
|
827 |
|
|
|
828 |
|
|
// Find instruction length
|
829 |
|
|
instrLength = lengthList[pInstr->i[0] >> 29]; // Length up to 3 determined by il. Length 4 by upper bit of mode
|
830 |
|
|
|
831 |
|
|
// Find any reasons for warnings
|
832 |
|
|
//findWarnings(p);
|
833 |
|
|
|
834 |
|
|
// Find any errors
|
835 |
|
|
//findErrors(p);
|
836 |
|
|
}
|
837 |
|
|
|
838 |
|
|
|
839 |
|
|
|
840 |
|
|
/*****************************************************************************
|
841 |
|
|
Functions for reading instruction list from comma-separated file,
|
842 |
|
|
sorting, and searching
|
843 |
|
|
*****************************************************************************/
|
844 |
|
|
|
845 |
|
|
// Members of class CCSVFile for reading comma-separated file
|
846 |
|
|
|
847 |
|
|
// Read and parse file
|
848 |
|
|
void CCSVFile::parse() {
|
849 |
|
|
// Sorry for the ugly code!
|
850 |
|
|
|
851 |
|
|
const char * fields[numInstructionColumns]; // pointer to each field in line
|
852 |
|
|
int fi = 0; // field index
|
853 |
|
|
uint32_t i, j; // loop counters
|
854 |
|
|
char * s, * t = 0; // point to begin and end of field
|
855 |
|
|
char c;
|
856 |
|
|
char separator = 0; // separator character, preferably comma
|
857 |
|
|
int line = 1; // line number
|
858 |
|
|
SInstruction record; // record constructed from line
|
859 |
|
|
zeroAllMembers(fields);
|
860 |
|
|
|
861 |
|
|
if (data_size==0) read(cmd.getFilename(cmd.instructionListFile), 2); // read file if it has not already been read
|
862 |
|
|
if (err.number()) return;
|
863 |
|
|
|
864 |
|
|
// loop through file
|
865 |
|
|
for (i = 0; i < data_size; i++) {
|
866 |
|
|
// find begin of field, quoted or not
|
867 |
|
|
s = (char*)buf() + i;
|
868 |
|
|
c = *s;
|
869 |
|
|
if (c == ' ') continue; // skip leading spaces
|
870 |
|
|
|
871 |
|
|
if (c == '"' || c == 0x27) { // single or double quote
|
872 |
|
|
fields[fi] = s+1; // begin of quoted string
|
873 |
|
|
for (i++; i < data_size; i++) { // search for matching end quote
|
874 |
|
|
t = (char*)buf() + i;
|
875 |
|
|
if (*t == c) {
|
876 |
|
|
*t = 0; i++; // End quote found. Put end of string here
|
877 |
|
|
goto SEARCHFORCOMMA;
|
878 |
|
|
}
|
879 |
|
|
if (*t == '\n') break; // end of line found before end quote
|
880 |
|
|
}
|
881 |
|
|
// end quote not found
|
882 |
|
|
err.submit(ERR_INSTRUCTION_LIST_QUOTE, line);
|
883 |
|
|
return;
|
884 |
|
|
}
|
885 |
|
|
if (c == '\r' || c == '\n')
|
886 |
|
|
goto NEXTLINE; // end of line found
|
887 |
|
|
if (c == separator || c == ',') {
|
888 |
|
|
// empty field
|
889 |
|
|
fields[fi] = "";
|
890 |
|
|
goto SEARCHFORCOMMA;
|
891 |
|
|
}
|
892 |
|
|
|
893 |
|
|
// Anything else: begin of unquoted string
|
894 |
|
|
fields[fi] = s;
|
895 |
|
|
// search for end of field
|
896 |
|
|
|
897 |
|
|
SEARCHFORCOMMA:
|
898 |
|
|
for (; i < data_size; i++) { // search for comma after field
|
899 |
|
|
t = (char*)buf() + i;
|
900 |
|
|
if (*t == separator || (separator == 0 && (*t == ',' || *t == ';' || *t == '\t'))) {
|
901 |
|
|
separator = *t; // separator set to the first comma, semicolon or tabulator
|
902 |
|
|
*t = 0; // put end of string here
|
903 |
|
|
goto NEXTFIELD;
|
904 |
|
|
}
|
905 |
|
|
if (*t == '\n') break; // end of line found before comma
|
906 |
|
|
}
|
907 |
|
|
fi++;
|
908 |
|
|
goto NEXTLINE;
|
909 |
|
|
|
910 |
|
|
NEXTFIELD:
|
911 |
|
|
// next field
|
912 |
|
|
fi++;
|
913 |
|
|
if (fi != numInstructionColumns) continue;
|
914 |
|
|
// end of last field
|
915 |
|
|
|
916 |
|
|
NEXTLINE:
|
917 |
|
|
for (; i < data_size; i++) { // search for end. of line
|
918 |
|
|
t = (char*)buf() + i;
|
919 |
|
|
// accept newlines as "\r", "\n", or "\r\n"
|
920 |
|
|
if (*t == '\r' || *t == '\n') break;
|
921 |
|
|
}
|
922 |
|
|
if (*t == '\r' && *(t+1) == '\n') i++; // end of line is two characters
|
923 |
|
|
*t = 0; // terminate line
|
924 |
|
|
|
925 |
|
|
// make any remaining fields blank
|
926 |
|
|
for (; fi < numInstructionColumns; fi++) {
|
927 |
|
|
fields[fi] = "";
|
928 |
|
|
}
|
929 |
|
|
// Begin next line
|
930 |
|
|
line++;
|
931 |
|
|
fi = 0;
|
932 |
|
|
|
933 |
|
|
// Check if blank or heading record
|
934 |
|
|
if (fields[2][0] < '0' || fields[2][0] > '9') continue;
|
935 |
|
|
|
936 |
|
|
// save values to record
|
937 |
|
|
// most fields are decimal or hexadecimal numbers
|
938 |
|
|
record.id = (uint32_t)interpretNumber(fields[1]);
|
939 |
|
|
record.category = (uint32_t)interpretNumber(fields[2]);
|
940 |
|
|
record.format = interpretNumber(fields[3]);
|
941 |
|
|
record.templt = (uint32_t)interpretNumber(fields[4]);
|
942 |
|
|
record.sourceoperands = (uint32_t)interpretNumber(fields[6]);
|
943 |
|
|
record.op1 = (uint32_t)interpretNumber(fields[7]);
|
944 |
|
|
record.op2 = (uint32_t)interpretNumber(fields[8]);
|
945 |
|
|
record.optypesgp = (uint32_t)interpretNumber(fields[9]);
|
946 |
|
|
record.optypesscalar = (uint32_t)interpretNumber(fields[10]);
|
947 |
|
|
record.optypesvector = (uint32_t)interpretNumber(fields[11]);
|
948 |
|
|
// interpret immediate operand
|
949 |
|
|
if (tolower(fields[12][0]) == 'i') {
|
950 |
|
|
// implicit immediate operand. value is prefixed by 'i'. Get value
|
951 |
|
|
record.implicit_imm = (uint32_t)interpretNumber(fields[12]+1);
|
952 |
|
|
record.opimmediate = OPI_IMPLICIT;
|
953 |
|
|
}
|
954 |
|
|
else {
|
955 |
|
|
// immediate operand type
|
956 |
|
|
record.opimmediate = (uint8_t)interpretNumber(fields[12]);
|
957 |
|
|
}
|
958 |
|
|
// interpret template variant
|
959 |
|
|
record.variant = interpretTemplateVariants(fields[5]);
|
960 |
|
|
// copy instruction name
|
961 |
|
|
for (j = 0; j < sizeof(record.name)-1; j++) {
|
962 |
|
|
c = fields[0][j];
|
963 |
|
|
if (c == 0) break;
|
964 |
|
|
record.name[j] = tolower(c);
|
965 |
|
|
}
|
966 |
|
|
record.name[j] = 0;
|
967 |
|
|
|
968 |
|
|
// add record to list
|
969 |
|
|
instructionlist.push(record);
|
970 |
|
|
}
|
971 |
|
|
}
|
972 |
|
|
|
973 |
|
|
// Interpret number in instruction list
|
974 |
|
|
uint64_t CCSVFile::interpretNumber(const char * text) {
|
975 |
|
|
uint32_t error = 0;
|
976 |
|
|
uint64_t result = uint64_t(::interpretNumber(text, 64, &error));
|
977 |
|
|
if (error) err.submit(ERR_INSTRUCTION_LIST_SYNTAX, text);
|
978 |
|
|
return result;
|
979 |
|
|
}
|
980 |
|
|
|
981 |
|
|
|
982 |
|
|
// Interpret a string with a decimal, binary, octal, or hexadecimal number
|
983 |
|
|
int64_t interpretNumber(const char * text, uint32_t maxLength, uint32_t * error) {
|
984 |
|
|
int state = 0; // 0: begin, 1: after 0,
|
985 |
|
|
// 2: after 0x, 3: after 0b, 4: after 0o
|
986 |
|
|
// 5: after decimal digit, 6: trailing space
|
987 |
|
|
uint64_t number = 0;
|
988 |
|
|
uint8_t c, clower, digit;
|
989 |
|
|
bool sign = false;
|
990 |
|
|
uint32_t i;
|
991 |
|
|
*error = 0;
|
992 |
|
|
if (text == 0) {
|
993 |
|
|
*error = 1; return number;
|
994 |
|
|
}
|
995 |
|
|
|
996 |
|
|
for (i = 0; i < maxLength; i++) {
|
997 |
|
|
c = text[i]; // read character
|
998 |
|
|
clower = c | 0x20; // convert to lower case
|
999 |
|
|
if (clower == 'x') {
|
1000 |
|
|
if (state != 1) {
|
1001 |
|
|
*error = 1; return 0;
|
1002 |
|
|
}
|
1003 |
|
|
state = 2;
|
1004 |
|
|
}
|
1005 |
|
|
else if (clower == 'o') {
|
1006 |
|
|
if (state != 1) {
|
1007 |
|
|
*error = 1; return 0;
|
1008 |
|
|
}
|
1009 |
|
|
state = 4;
|
1010 |
|
|
}
|
1011 |
|
|
else if (clower == 'b' && state == 1) {
|
1012 |
|
|
state = 3;
|
1013 |
|
|
}
|
1014 |
|
|
else if (c >= '0' && c <= '9') {
|
1015 |
|
|
// digit 0 - 9
|
1016 |
|
|
digit = c - '0';
|
1017 |
|
|
switch (state) {
|
1018 |
|
|
case 0:
|
1019 |
|
|
state = (digit == 0) ? 1 : 5;
|
1020 |
|
|
number = digit;
|
1021 |
|
|
break;
|
1022 |
|
|
case 1:
|
1023 |
|
|
state = 5;
|
1024 |
|
|
// continue in case 5:
|
1025 |
|
|
case 5:
|
1026 |
|
|
// decimal
|
1027 |
|
|
number = number * 10 + digit;
|
1028 |
|
|
break;
|
1029 |
|
|
case 2:
|
1030 |
|
|
// hexadecimal
|
1031 |
|
|
number = number * 16 + digit;
|
1032 |
|
|
break;
|
1033 |
|
|
case 3:
|
1034 |
|
|
// binary
|
1035 |
|
|
if (digit > 1) {
|
1036 |
|
|
*error = 1; return 0;
|
1037 |
|
|
}
|
1038 |
|
|
number = number * 2 + digit;
|
1039 |
|
|
break;
|
1040 |
|
|
case 4:
|
1041 |
|
|
// octal
|
1042 |
|
|
if (digit > 7) {
|
1043 |
|
|
*error = 1; return 0;
|
1044 |
|
|
}
|
1045 |
|
|
number = number * 8 + digit;
|
1046 |
|
|
break;
|
1047 |
|
|
default:
|
1048 |
|
|
*error = 1;
|
1049 |
|
|
return 0;
|
1050 |
|
|
}
|
1051 |
|
|
}
|
1052 |
|
|
else if (clower >= 'a' && clower <= 'f') {
|
1053 |
|
|
// hexadecimal digit
|
1054 |
|
|
digit = clower - ('a' - 10);
|
1055 |
|
|
if (state != 2) {
|
1056 |
|
|
*error = 1; return 0;
|
1057 |
|
|
}
|
1058 |
|
|
number = number * 16 + digit;
|
1059 |
|
|
}
|
1060 |
|
|
else if (c == ' ' || c == '+') {
|
1061 |
|
|
// ignore leading or trailing blank or plus
|
1062 |
|
|
if (state > 0) state = 6;
|
1063 |
|
|
}
|
1064 |
|
|
else if (c == '-') {
|
1065 |
|
|
// change sign
|
1066 |
|
|
if (state != 0) {
|
1067 |
|
|
*error = 1; return 0;
|
1068 |
|
|
}
|
1069 |
|
|
sign = ! sign;
|
1070 |
|
|
}
|
1071 |
|
|
else if (c == 0) break; // end of string
|
1072 |
|
|
else if (c == ',') {
|
1073 |
|
|
*error = i | 0x1000; // end with comma. return position in error
|
1074 |
|
|
break;
|
1075 |
|
|
}
|
1076 |
|
|
else {
|
1077 |
|
|
// illegal character
|
1078 |
|
|
*error = 1; return 0;
|
1079 |
|
|
}
|
1080 |
|
|
}
|
1081 |
|
|
if (sign) number = uint64_t(-int64_t(number));
|
1082 |
|
|
return (int64_t)number;
|
1083 |
|
|
}
|
1084 |
|
|
|
1085 |
|
|
void CDisassembler::getLineList(CDynamicArray<SLineRef> & list) {
|
1086 |
|
|
// transfer lineList to debugger
|
1087 |
|
|
list << lineList;
|
1088 |
|
|
}
|
1089 |
|
|
|
1090 |
|
|
void CDisassembler::getOutFile(CTextFileBuffer & buffer) {
|
1091 |
|
|
// transfer outFile to debugger
|
1092 |
|
|
buffer.copy(outFile);
|
1093 |
|
|
}
|
1094 |
|
|
|