| 1 |
49 |
Agner |
/**************************** disasm1.cpp ********************************
|
| 2 |
|
|
* Author: Agner Fog
|
| 3 |
|
|
* Date created: 2017-04-26
|
| 4 |
|
|
* Last modified: 2021-03-30
|
| 5 |
|
|
* Version: 1.11
|
| 6 |
|
|
* Project: Binary tools for ForwardCom instruction set
|
| 7 |
|
|
* Module: disassem.h
|
| 8 |
|
|
* Description: Disassembler
|
| 9 |
|
|
* Disassembler for ForwardCom
|
| 10 |
|
|
*
|
| 11 |
|
|
* Copyright 2007-2021 GNU General Public License http://www.gnu.org/licenses
|
| 12 |
|
|
*****************************************************************************/
|
| 13 |
|
|
#include "stdafx.h"
|
| 14 |
|
|
|
| 15 |
|
|
|
| 16 |
|
|
uint64_t interpretTemplateVariants(const char * s) {
|
| 17 |
|
|
// Interpret template variants in instruction record
|
| 18 |
|
|
// The return value is a combination of bits for each variant option
|
| 19 |
|
|
// These bits are defined as constants VARIANT_D0, etc., in disassem.h
|
| 20 |
|
|
uint64_t v = 0;
|
| 21 |
|
|
for (int i = 0; i < 8; i++) { // Loop through string
|
| 22 |
|
|
char c = toupper(s[i]), d = toupper(s[i+1]);
|
| 23 |
|
|
switch (c) {
|
| 24 |
|
|
case 0:
|
| 25 |
|
|
return v; // End of string
|
| 26 |
|
|
case 'D':
|
| 27 |
|
|
if (d == '0') v |= VARIANT_D0; // D0
|
| 28 |
|
|
if (d == '1') v |= VARIANT_D1; // D1
|
| 29 |
|
|
if (d == '2') v |= VARIANT_D2; // D2
|
| 30 |
|
|
if (d == '3') v |= VARIANT_D3; // D3
|
| 31 |
|
|
continue;
|
| 32 |
|
|
case 'F':
|
| 33 |
|
|
if (d == '0') v |= VARIANT_F0; // F0
|
| 34 |
|
|
if (d == '1') v |= VARIANT_F1; // F1
|
| 35 |
|
|
continue;
|
| 36 |
|
|
case 'M':
|
| 37 |
|
|
if (d == '0') v |= VARIANT_M0; // M0
|
| 38 |
|
|
//if (d == '1') v |= VARIANT_M1; // M1. No longer used
|
| 39 |
|
|
continue;
|
| 40 |
|
|
case 'R':
|
| 41 |
|
|
if (d == '0') v |= VARIANT_R0; // R0
|
| 42 |
|
|
if (d == '1') v |= VARIANT_R1; // R1
|
| 43 |
|
|
if (d == '2') v |= VARIANT_R2; // R2
|
| 44 |
|
|
if (d == '3') v |= VARIANT_R3; // R3
|
| 45 |
|
|
if (d == 'L') v |= VARIANT_RL; // RL
|
| 46 |
|
|
i++;
|
| 47 |
|
|
continue;
|
| 48 |
|
|
case 'I':
|
| 49 |
|
|
if (d == '2') v |= VARIANT_I2; // I2
|
| 50 |
|
|
continue;
|
| 51 |
|
|
|
| 52 |
|
|
case 'O':
|
| 53 |
|
|
if (d > '0' && d < '7') v |= (d - '0') << 24; // O1 - O6
|
| 54 |
|
|
continue;
|
| 55 |
|
|
case 'U':
|
| 56 |
|
|
if (d == '0') v |= VARIANT_U0; // U0
|
| 57 |
|
|
if (d == '3') v |= VARIANT_U3; // U3
|
| 58 |
|
|
continue;
|
| 59 |
|
|
case 'H':
|
| 60 |
|
|
if (d == '0') v |= VARIANT_H0; // H0
|
| 61 |
|
|
continue;
|
| 62 |
|
|
case 'X':
|
| 63 |
|
|
v |= uint64_t(((d-'0') & 0xF) | 0x10) << 32; // X0 - X9
|
| 64 |
|
|
continue;
|
| 65 |
|
|
case 'Y':
|
| 66 |
|
|
v |= uint64_t(((d-'0') & 0xF) | 0x20) << 32; // Y0 - Y9
|
| 67 |
|
|
continue;
|
| 68 |
|
|
}
|
| 69 |
|
|
}
|
| 70 |
|
|
return v;
|
| 71 |
|
|
}
|
| 72 |
|
|
|
| 73 |
|
|
|
| 74 |
|
|
void CDisassembler::sortSymbolsAndRelocations() {
|
| 75 |
|
|
// Sort symbols by address. This is useful when symbol labels are written out
|
| 76 |
|
|
uint32_t i; // loop counter
|
| 77 |
|
|
// The values of st_reguse1 and st_reguse2 are no longer needed after these values have been written out.
|
| 78 |
|
|
// Save old index in st_reguse1.
|
| 79 |
|
|
// Set st_reguse2 to zero, it is used later for data type
|
| 80 |
|
|
|
| 81 |
|
|
for (i = 0; i < symbols.numEntries(); i++) {
|
| 82 |
|
|
symbols[i].st_reguse1 = i;
|
| 83 |
|
|
symbols[i].st_reguse2 = 0;
|
| 84 |
|
|
// symbols are grouped by section in object files, by base pointer in executable files
|
| 85 |
|
|
if (isExecutable) symbolExeAddress(symbols[i]);
|
| 86 |
|
|
}
|
| 87 |
|
|
// Sort symbols by address
|
| 88 |
|
|
symbols.sort();
|
| 89 |
|
|
|
| 90 |
|
|
// Add dummy empty symbol number 0
|
| 91 |
|
|
ElfFwcSym nulsymbol = {0,0,0,0,0,0,0,0,0};
|
| 92 |
|
|
symbols.addUnique(nulsymbol);
|
| 93 |
|
|
|
| 94 |
|
|
// Update all relocations to the new symbol indexes
|
| 95 |
|
|
// Translate old to new symbol index in all relocation records
|
| 96 |
|
|
// Allocate array for translating old to new symbol index
|
| 97 |
|
|
CDynamicArray<uint32_t> old2newSymbolIndex;
|
| 98 |
|
|
old2newSymbolIndex.setNum(symbols.numEntries());
|
| 99 |
|
|
|
| 100 |
|
|
// Make translation table
|
| 101 |
|
|
for (i = 0; i < symbols.numEntries(); i++) {
|
| 102 |
|
|
uint32_t oldindex = symbols[i].st_reguse1;
|
| 103 |
|
|
if (oldindex < symbols.numEntries()) {
|
| 104 |
|
|
old2newSymbolIndex[oldindex] = i;
|
| 105 |
|
|
}
|
| 106 |
|
|
}
|
| 107 |
|
|
|
| 108 |
|
|
// Translate all symbol indices in relocation records
|
| 109 |
|
|
for (i = 0; i < relocations.numEntries(); i++) {
|
| 110 |
|
|
if (relocations[i].r_sym < old2newSymbolIndex.numEntries()) {
|
| 111 |
|
|
relocations[i].r_sym = old2newSymbolIndex[relocations[i].r_sym];
|
| 112 |
|
|
}
|
| 113 |
|
|
else relocations[i].r_sym = 0; // index out of range!
|
| 114 |
|
|
if ((relocations[i].r_type & R_FORW_RELTYPEMASK) == R_FORW_REFP) {
|
| 115 |
|
|
// relocation record has an additional reference point
|
| 116 |
|
|
// bit 30 indicates relocation used OK
|
| 117 |
|
|
uint32_t refsym = relocations[i].r_refsym & ~0x40000000;
|
| 118 |
|
|
if (refsym < old2newSymbolIndex.numEntries()) {
|
| 119 |
|
|
relocations[i].r_refsym = old2newSymbolIndex[refsym] | (relocations[i].r_refsym & 0x40000000);
|
| 120 |
|
|
}
|
| 121 |
|
|
else relocations[i].r_refsym = 0; // index out of range
|
| 122 |
|
|
}
|
| 123 |
|
|
}
|
| 124 |
|
|
|
| 125 |
|
|
// Sort relocations by address
|
| 126 |
|
|
relocations.sort();
|
| 127 |
|
|
}
|
| 128 |
|
|
|
| 129 |
|
|
// Translate symbol address from section:offset to pointerbase:address
|
| 130 |
|
|
void CDisassembler::symbolExeAddress(ElfFwcSym & sym) {
|
| 131 |
|
|
// use this translation only when disassembling executable files
|
| 132 |
|
|
if (!isExecutable) return;
|
| 133 |
|
|
|
| 134 |
|
|
// section
|
| 135 |
|
|
uint32_t sec = sym.st_section;
|
| 136 |
|
|
if (sec && sec < sectionHeaders.numEntries()) {
|
| 137 |
|
|
uint32_t flags = (uint32_t)sectionHeaders[sec].sh_flags;
|
| 138 |
|
|
// get base pointer
|
| 139 |
|
|
switch (flags & SHF_BASEPOINTER) {
|
| 140 |
|
|
case SHF_IP:
|
| 141 |
|
|
sym.st_section = 1; break;
|
| 142 |
|
|
case SHF_DATAP:
|
| 143 |
|
|
sym.st_section = 2; break;
|
| 144 |
|
|
case SHF_THREADP:
|
| 145 |
|
|
sym.st_section = 3; break;
|
| 146 |
|
|
default:
|
| 147 |
|
|
sym.st_section = 0; break;
|
| 148 |
|
|
}
|
| 149 |
|
|
sym.st_value += sectionHeaders[sec].sh_addr;
|
| 150 |
|
|
}
|
| 151 |
|
|
}
|
| 152 |
|
|
|
| 153 |
|
|
|
| 154 |
|
|
// Join the tables: symbols and newSymbols
|
| 155 |
|
|
void CDisassembler::joinSymbolTables() {
|
| 156 |
|
|
/* There are two symbol tables: 'symbols' and 'newSymbols'.
|
| 157 |
|
|
'symbols' contains the symbols that were in the original file. This table is sorted
|
| 158 |
|
|
by address in sortSymbolsAndRelocations() in order to make it easy to find a symbol
|
| 159 |
|
|
at a given address.
|
| 160 |
|
|
'newSymbols' contains new symbols that were created during pass 1. It is not sorted.
|
| 161 |
|
|
The reason why we have two symbol tables is that the symbol indexes would change if
|
| 162 |
|
|
we add to the 'symbols' table during pass 1 and keep it sorted. We need to have
|
| 163 |
|
|
consistent indexes during pass 1 in order to access symbols by their index. Likewise,
|
| 164 |
|
|
'newSymbols' is not sorted because indexes would change when new symbols are added to it.
|
| 165 |
|
|
'newSymbols' may contain dublets because it is not sorted so dublets are not detected
|
| 166 |
|
|
when new symbols are added.
|
| 167 |
|
|
'joinSymbolTables()' is called after pass 1 when we are finished making new symbols.
|
| 168 |
|
|
This function joins the two tables together, removes any dublets, updates symbol indexes
|
| 169 |
|
|
in all relocation records, and tranfers data type information from relocation records
|
| 170 |
|
|
to symbol records.
|
| 171 |
|
|
*/
|
| 172 |
|
|
uint32_t r; // Relocation index
|
| 173 |
|
|
uint32_t s; // Symbol index
|
| 174 |
|
|
uint32_t newsymi; // Symbol index in newSymbols
|
| 175 |
|
|
uint32_t newsymi2; // Index of new symbol after transfer to symbols table
|
| 176 |
|
|
uint32_t symTempIndex = symbols.numEntries(); // Temporary index of symbol after transfer
|
| 177 |
|
|
|
| 178 |
|
|
// Remember index of each symbol before adding new symbols and reordering
|
| 179 |
|
|
for (s = 0; s < symbols.numEntries(); s++) {
|
| 180 |
|
|
symbols[s].st_reguse1 = s;
|
| 181 |
|
|
}
|
| 182 |
|
|
|
| 183 |
|
|
// Loop through relocations to find references to new symbols
|
| 184 |
|
|
for (r = 0; r < relocations.numEntries(); r++) {
|
| 185 |
|
|
if (relocations[r].r_sym & 0x80000000) { // Refers to newSymbols table
|
| 186 |
|
|
newsymi = relocations[r].r_sym & ~0x80000000;
|
| 187 |
|
|
if (newsymi < newSymbols.numEntries()) {
|
| 188 |
|
|
// Put symbol into old table if no equivalent symbol exists here
|
| 189 |
|
|
newsymi2 = symbols.addUnique(newSymbols[newsymi]);
|
| 190 |
|
|
// Give it a temporary index if it doesn't have one
|
| 191 |
|
|
if (symbols[newsymi2].st_reguse1 == 0) symbols[newsymi2].st_reguse1 = symTempIndex++;
|
| 192 |
|
|
// update reference in relocation record to temporary index
|
| 193 |
|
|
relocations[r].r_sym = symbols[newsymi2].st_reguse1;
|
| 194 |
|
|
}
|
| 195 |
|
|
}
|
| 196 |
|
|
// Do the same with any reference point
|
| 197 |
|
|
if ((relocations[r].r_type & R_FORW_RELTYPEMASK) == R_FORW_REFP && relocations[r].r_refsym & 0x80000000) {
|
| 198 |
|
|
newsymi = relocations[r].r_refsym & ~0xC0000000;
|
| 199 |
|
|
if (newsymi < newSymbols.numEntries()) {
|
| 200 |
|
|
// Put symbol into old table if no equivalent symbol exists here
|
| 201 |
|
|
newsymi2 = symbols.addUnique(newSymbols[newsymi]);
|
| 202 |
|
|
// Give it a temporary index if it doesn't have one
|
| 203 |
|
|
if (symbols[newsymi2].st_reguse1 == 0) symbols[newsymi2].st_reguse1 = symTempIndex++;
|
| 204 |
|
|
// update reference in relocation record to temporary index
|
| 205 |
|
|
relocations[r].r_refsym = symbols[newsymi2].st_reguse1 | (relocations[r].r_refsym & 0x40000000);
|
| 206 |
|
|
}
|
| 207 |
|
|
}
|
| 208 |
|
|
}
|
| 209 |
|
|
// Make symbol index translation table
|
| 210 |
|
|
CDynamicArray<uint32_t> old2newSymbolIndex;
|
| 211 |
|
|
old2newSymbolIndex.setNum(symbols.numEntries());
|
| 212 |
|
|
for (s = 0; s < symbols.numEntries(); s++) {
|
| 213 |
|
|
uint32_t oldsymi = symbols[s].st_reguse1;
|
| 214 |
|
|
if (oldsymi < old2newSymbolIndex.numEntries()) {
|
| 215 |
|
|
old2newSymbolIndex[oldsymi] = s;
|
| 216 |
|
|
}
|
| 217 |
|
|
}
|
| 218 |
|
|
// Update indexes in relocation records
|
| 219 |
|
|
for (r = 0; r < relocations.numEntries(); r++) {
|
| 220 |
|
|
if (relocations[r].r_sym < old2newSymbolIndex.numEntries()) { // Refers to newSymbols table
|
| 221 |
|
|
relocations[r].r_sym = old2newSymbolIndex[relocations[r].r_sym];
|
| 222 |
|
|
// Give the symbol a data type from relocation record if it doesn't have one
|
| 223 |
|
|
if (symbols[relocations[r].r_sym].st_reguse2 == 0) {
|
| 224 |
|
|
symbols[relocations[r].r_sym].st_reguse2 = relocations[r].r_type >> 8;
|
| 225 |
|
|
}
|
| 226 |
|
|
}
|
| 227 |
|
|
// Do the same with any reference point
|
| 228 |
|
|
uint32_t refsym = relocations[r].r_refsym & ~0xC0000000;
|
| 229 |
|
|
if ((relocations[r].r_type & R_FORW_RELTYPEMASK) == R_FORW_REFP && refsym < old2newSymbolIndex.numEntries()) {
|
| 230 |
|
|
relocations[r].r_refsym = old2newSymbolIndex[refsym] | (relocations[r].r_refsym & 0x40000000);
|
| 231 |
|
|
}
|
| 232 |
|
|
}
|
| 233 |
|
|
}
|
| 234 |
|
|
|
| 235 |
|
|
|
| 236 |
|
|
void CDisassembler::assignSymbolNames() {
|
| 237 |
|
|
// Assign names to symbols that do not have a name
|
| 238 |
|
|
uint32_t i; // New symbol index
|
| 239 |
|
|
uint32_t numDigits; // Number of digits in new symbol names
|
| 240 |
|
|
char name[64]; // sectionBuffer for making symbol name
|
| 241 |
|
|
static char format[64];
|
| 242 |
|
|
uint32_t unnamedNum = 0; // Number of unnamed symbols
|
| 243 |
|
|
//uint32_t addMoreSymbols = 0; // More symbols need to be added
|
| 244 |
|
|
|
| 245 |
|
|
// Find necessary number of digits
|
| 246 |
|
|
numDigits = 3; i = symbols.numEntries();
|
| 247 |
|
|
while (i >= 1000) {
|
| 248 |
|
|
i /= 10;
|
| 249 |
|
|
numDigits++;
|
| 250 |
|
|
}
|
| 251 |
|
|
|
| 252 |
|
|
// format string for symbol names
|
| 253 |
|
|
sprintf(format, "%s%c0%i%c", "@_", '%', numDigits, 'i');
|
| 254 |
|
|
|
| 255 |
|
|
// Loop through symbols
|
| 256 |
|
|
for (i = 1; i < symbols.numEntries(); i++) {
|
| 257 |
|
|
if (symbols[i].st_name == 0 ) {
|
| 258 |
|
|
// Symbol has no name. Make one
|
| 259 |
|
|
sprintf(name, format, ++unnamedNum);
|
| 260 |
|
|
// Store new name
|
| 261 |
|
|
symbols[i].st_name = stringBuffer.pushString(name);
|
| 262 |
|
|
}
|
| 263 |
|
|
}
|
| 264 |
|
|
|
| 265 |
|
|
#if 0 //!!
|
| 266 |
|
|
// For debugging: list all symbols
|
| 267 |
|
|
printf("\n\nSymbols:");
|
| 268 |
|
|
for (i = 0; i < symbols.numEntries(); i++) {
|
| 269 |
|
|
printf("\n%3X %3X %s sect %i offset %X type %X size %i Scope %i",
|
| 270 |
|
|
i, symbols[i].st_name, stringBuffer.buf() + symbols[i].st_name,
|
| 271 |
|
|
symbols[i].st_section, (uint32_t)symbols[i].st_value, symbols[i].st_type,
|
| 272 |
|
|
(uint32_t)symbols[i].st_unitsize, symbols[i].st_other);
|
| 273 |
|
|
if (symbols[i].st_reguse2) printf(" Type %X", symbols[i].st_reguse2);
|
| 274 |
|
|
}
|
| 275 |
|
|
#endif
|
| 276 |
|
|
#if 0
|
| 277 |
|
|
// For debugging: list all relocations
|
| 278 |
|
|
printf("\n\nRelocations:");
|
| 279 |
|
|
for (uint32_t i = 0; i < relocations.numEntries(); i++) {
|
| 280 |
|
|
printf("\nsect %i, os %X, type %X, sym %i, add %X, refsym %X",
|
| 281 |
|
|
(uint32_t)(relocations[i].r_section), (uint32_t)relocations[i].r_offset, relocations[i].r_type,
|
| 282 |
|
|
relocations[i].r_sym, relocations[i].r_addend, relocations[i].r_refsym);
|
| 283 |
|
|
}
|
| 284 |
|
|
#endif
|
| 285 |
|
|
}
|
| 286 |
|
|
|
| 287 |
|
|
|
| 288 |
|
|
|
| 289 |
|
|
/************************** class CDisassembler *****************************
|
| 290 |
|
|
Members of class CDisassembler
|
| 291 |
|
|
Members that relate to file output are in disasm2.cpp
|
| 292 |
|
|
******************************************************************************/
|
| 293 |
|
|
|
| 294 |
|
|
CDisassembler::CDisassembler() {
|
| 295 |
|
|
// Constructor. Initialize variables
|
| 296 |
|
|
pass = 0;
|
| 297 |
|
|
nextSymbol = 0;
|
| 298 |
|
|
currentFunction = 0;
|
| 299 |
|
|
currentFunctionEnd = 0;
|
| 300 |
|
|
debugMode = 0;
|
| 301 |
|
|
outputFile = cmd.outputFile;
|
| 302 |
|
|
checkFormatListIntegrity();
|
| 303 |
|
|
};
|
| 304 |
|
|
|
| 305 |
|
|
void CDisassembler::initializeInstructionList() {
|
| 306 |
|
|
// Read and initialize instruction list and sort it by category, format, and op1
|
| 307 |
|
|
CCSVFile instructionListFile;
|
| 308 |
|
|
instructionListFile.read(cmd.getFilename(cmd.instructionListFile), CMDL_FILE_SEARCH_PATH); // Filename of list of instructions
|
| 309 |
|
|
instructionListFile.parse(); // Read and interpret instruction list file
|
| 310 |
|
|
instructionlist << instructionListFile.instructionlist; // Transfer instruction list to my own container
|
| 311 |
|
|
instructionlist.sort(); // Sort list, using sort order defined by SInstruction2
|
| 312 |
|
|
}
|
| 313 |
|
|
|
| 314 |
|
|
// Read instruction list, split ELF file into components
|
| 315 |
|
|
void CDisassembler::getComponents1() {
|
| 316 |
|
|
// Check code integrity
|
| 317 |
|
|
checkFormatListIntegrity();
|
| 318 |
|
|
|
| 319 |
|
|
// Read instruction list
|
| 320 |
|
|
initializeInstructionList();
|
| 321 |
|
|
|
| 322 |
|
|
// Split ELF file into containers
|
| 323 |
|
|
split();
|
| 324 |
|
|
}
|
| 325 |
|
|
|
| 326 |
|
|
// Read instruction list, get ELF components for assembler output listing
|
| 327 |
|
|
void CDisassembler::getComponents2(CELF const & assembler, CMemoryBuffer const & instructList) {
|
| 328 |
|
|
// This function replaces getComponents1() when making an output listing for the assembler
|
| 329 |
|
|
// list file name from command line
|
| 330 |
|
|
|
| 331 |
|
|
// copy containers from assembler outFile
|
| 332 |
|
|
sectionHeaders.copy(assembler.getSectionHeaders());
|
| 333 |
|
|
symbols.copy(assembler.getSymbols());
|
| 334 |
|
|
relocations.copy(assembler.getRelocations());
|
| 335 |
|
|
stringBuffer.copy(assembler.getStringBuffer());
|
| 336 |
|
|
dataBuffer.copy(assembler.getDataBuffer());
|
| 337 |
|
|
// Copy instruction list from assembler to avoid reading the csv file again.
|
| 338 |
|
|
// Use the unsorted list to make sure the preferred name for an instuction comes first, in case there are alias names
|
| 339 |
|
|
instructionlist.copy(instructList);
|
| 340 |
|
|
instructionlist.sort(); // Sort list, using the sort order needed by the disassembler as defined by SInstruction2
|
| 341 |
|
|
}
|
| 342 |
|
|
|
| 343 |
|
|
|
| 344 |
|
|
// Do the disassembly
|
| 345 |
|
|
void CDisassembler::go() {
|
| 346 |
|
|
// set tabulator stops
|
| 347 |
|
|
setTabStops();
|
| 348 |
|
|
|
| 349 |
|
|
// write feedback to console
|
| 350 |
|
|
feedBackText1();
|
| 351 |
|
|
|
| 352 |
|
|
// is this an executable or object file
|
| 353 |
|
|
isExecutable = fileHeader.e_type == ET_EXEC;
|
| 354 |
|
|
|
| 355 |
|
|
// Begin writing output file
|
| 356 |
|
|
writeFileBegin();
|
| 357 |
|
|
|
| 358 |
|
|
// Sort symbols by address
|
| 359 |
|
|
sortSymbolsAndRelocations();
|
| 360 |
|
|
|
| 361 |
|
|
// pass 1: Find symbols types and unnamed symbols
|
| 362 |
|
|
pass = 1;
|
| 363 |
|
|
pass1();
|
| 364 |
|
|
|
| 365 |
|
|
if (pass & 0x10) {
|
| 366 |
|
|
// Repetition of pass 1 requested
|
| 367 |
|
|
pass = 2;
|
| 368 |
|
|
pass1();
|
| 369 |
|
|
}
|
| 370 |
|
|
|
| 371 |
|
|
// Join the tables: symbols and newSymbols;
|
| 372 |
|
|
joinSymbolTables();
|
| 373 |
|
|
|
| 374 |
|
|
// put names on unnamed symbols
|
| 375 |
|
|
assignSymbolNames();
|
| 376 |
|
|
|
| 377 |
|
|
// pass 2: Write all sections to output file
|
| 378 |
|
|
pass = 0x100;
|
| 379 |
|
|
pass2();
|
| 380 |
|
|
|
| 381 |
|
|
// Check for illegal entries in symbol table and relocations table
|
| 382 |
|
|
finalErrorCheck();
|
| 383 |
|
|
|
| 384 |
|
|
// Finish writing output file
|
| 385 |
|
|
writeFileEnd();
|
| 386 |
|
|
|
| 387 |
|
|
// write output file
|
| 388 |
|
|
if (outputFile && !debugMode) outFile.write(cmd.getFilename(outputFile));
|
| 389 |
|
|
}
|
| 390 |
|
|
|
| 391 |
|
|
// write feedback text on stdout
|
| 392 |
|
|
void CDisassembler::feedBackText1() {
|
| 393 |
|
|
if (cmd.verbose && cmd.job == CMDL_JOB_DIS) {
|
| 394 |
|
|
// Tell what we are doing:
|
| 395 |
|
|
printf("\nDisassembling %s to %s", cmd.getFilename(cmd.inputFile), cmd.getFilename(outputFile));
|
| 396 |
|
|
}
|
| 397 |
|
|
}
|
| 398 |
|
|
|
| 399 |
|
|
|
| 400 |
|
|
void CDisassembler::pass1() {
|
| 401 |
|
|
|
| 402 |
|
|
/* pass 1: does the following jobs:
|
| 403 |
|
|
--------------------------------
|
| 404 |
|
|
|
| 405 |
|
|
* Scans all code sections, instruction by instruction.
|
| 406 |
|
|
|
| 407 |
|
|
* Follows all references to data in order to determine data type for
|
| 408 |
|
|
each data symbol.
|
| 409 |
|
|
|
| 410 |
|
|
* Assigns symbol table entries for all jump and call targets that do not
|
| 411 |
|
|
allready have a name.
|
| 412 |
|
|
|
| 413 |
|
|
* Identifies and analyzes tables of jump addresses and call addresses,
|
| 414 |
|
|
e.g. switch/case tables and virtual function tables. (to do !)
|
| 415 |
|
|
|
| 416 |
|
|
* Tries to identify any data in the code section.
|
| 417 |
|
|
|
| 418 |
|
|
*/
|
| 419 |
|
|
//uint32_t sectionType;
|
| 420 |
|
|
|
| 421 |
|
|
// Loop through sections, pass 1
|
| 422 |
|
|
for (section = 1; section < sectionHeaders.numEntries(); section++) {
|
| 423 |
|
|
|
| 424 |
|
|
// Get section type
|
| 425 |
|
|
//sectionType = sectionHeaders[section].sh_type;
|
| 426 |
|
|
codeMode = (sectionHeaders[section].sh_flags & SHF_EXEC) ? 1 : 4;
|
| 427 |
|
|
|
| 428 |
|
|
sectionBuffer = dataBuffer.buf() + sectionHeaders[section].sh_offset;
|
| 429 |
|
|
sectionEnd = (uint32_t)sectionHeaders[section].sh_size;
|
| 430 |
|
|
|
| 431 |
|
|
if (codeMode < 4) {
|
| 432 |
|
|
// This is a code section
|
| 433 |
|
|
|
| 434 |
|
|
sectionAddress = sectionHeaders[section].sh_addr;
|
| 435 |
|
|
if (sectionEnd == 0) continue;
|
| 436 |
|
|
|
| 437 |
|
|
iInstr = 0;
|
| 438 |
|
|
|
| 439 |
|
|
// Loop through instructions
|
| 440 |
|
|
while (iInstr < sectionEnd) {
|
| 441 |
|
|
|
| 442 |
|
|
// Check if code not dubious
|
| 443 |
|
|
if (codeMode == 1) {
|
| 444 |
|
|
|
| 445 |
|
|
parseInstruction(); // Parse instruction
|
| 446 |
|
|
|
| 447 |
|
|
updateSymbols(); // Detect symbol types for operands of this instruction
|
| 448 |
|
|
|
| 449 |
|
|
updateTracer(); // Trace register values
|
| 450 |
|
|
|
| 451 |
|
|
iInstr += instrLength * 4; // Next instruction
|
| 452 |
|
|
}
|
| 453 |
|
|
else {
|
| 454 |
|
|
// iEnd = labelEnd;
|
| 455 |
|
|
}
|
| 456 |
|
|
}
|
| 457 |
|
|
}
|
| 458 |
|
|
}
|
| 459 |
|
|
}
|
| 460 |
|
|
|
| 461 |
|
|
|
| 462 |
|
|
void CDisassembler::pass2() {
|
| 463 |
|
|
|
| 464 |
|
|
/* pass 2: does the following jobs:
|
| 465 |
|
|
--------------------------------
|
| 466 |
|
|
|
| 467 |
|
|
* Scans through all sections, code and data.
|
| 468 |
|
|
|
| 469 |
|
|
* Outputs warnings for suboptimal instruction codes and error messages
|
| 470 |
|
|
for erroneous code and erroneous relocations.
|
| 471 |
|
|
|
| 472 |
|
|
* Outputs disassembly of all instructions, operands and relocations,
|
| 473 |
|
|
followed by the binary code listing as comment.
|
| 474 |
|
|
|
| 475 |
|
|
* Outputs disassembly of all data, followed by alternative representations
|
| 476 |
|
|
as comment.
|
| 477 |
|
|
*/
|
| 478 |
|
|
|
| 479 |
|
|
//uint32_t sectionType;
|
| 480 |
|
|
|
| 481 |
|
|
// Loop through sections, pass 2
|
| 482 |
|
|
for (section = 1; section < sectionHeaders.numEntries(); section++) {
|
| 483 |
|
|
|
| 484 |
|
|
// Get section type
|
| 485 |
|
|
//sectionType = sectionHeaders[section].sh_type;
|
| 486 |
|
|
codeMode = (sectionHeaders[section].sh_flags & SHF_EXEC) ? 1 : 4;
|
| 487 |
|
|
|
| 488 |
|
|
// Initialize code parser
|
| 489 |
|
|
sectionBuffer = dataBuffer.buf() + sectionHeaders[section].sh_offset;
|
| 490 |
|
|
sectionEnd = (uint32_t)sectionHeaders[section].sh_size;
|
| 491 |
|
|
sectionAddress = sectionHeaders[section].sh_addr;
|
| 492 |
|
|
|
| 493 |
|
|
writeSectionBegin(); // Write segment directive
|
| 494 |
|
|
|
| 495 |
|
|
if (codeMode < 4) {
|
| 496 |
|
|
// This is a code section
|
| 497 |
|
|
if (sectionEnd == 0) continue;
|
| 498 |
|
|
iInstr = 0;
|
| 499 |
|
|
|
| 500 |
|
|
// Loop through instructions
|
| 501 |
|
|
while (iInstr < sectionEnd) {
|
| 502 |
|
|
|
| 503 |
|
|
if (debugMode) {
|
| 504 |
|
|
// save cross reference
|
| 505 |
|
|
SLineRef xref = { iInstr + sectionAddress, 1, outFile.dataSize() };
|
| 506 |
|
|
lineList.push(xref);
|
| 507 |
|
|
writeAddress();
|
| 508 |
|
|
}
|
| 509 |
|
|
writeLabels(); // Find any label here
|
| 510 |
|
|
|
| 511 |
|
|
// Check if code not dubious
|
| 512 |
|
|
if (codeMode == 1) {
|
| 513 |
|
|
|
| 514 |
|
|
parseInstruction(); // Parse instruction
|
| 515 |
|
|
|
| 516 |
|
|
writeInstruction(); // Write instruction
|
| 517 |
|
|
|
| 518 |
|
|
iInstr += instrLength * 4; // Next instruction
|
| 519 |
|
|
|
| 520 |
|
|
}
|
| 521 |
|
|
else {
|
| 522 |
|
|
// This is data Skip to next label
|
| 523 |
|
|
}
|
| 524 |
|
|
}
|
| 525 |
|
|
writeSectionEnd(); // Write segment directive
|
| 526 |
|
|
}
|
| 527 |
|
|
else {
|
| 528 |
|
|
// This is a data section
|
| 529 |
|
|
pInstr = 0; iRecord = 0; fInstr = 0; // Set invalid pointers to zero
|
| 530 |
|
|
operandType = 2; // Default data type is int32
|
| 531 |
|
|
instrLength = 4; // Default data size is 4 bytes
|
| 532 |
|
|
iInstr = 0; // Instruction position
|
| 533 |
|
|
nextSymbol = 0;
|
| 534 |
|
|
|
| 535 |
|
|
writeDataItems(); // Loop through data. Write data
|
| 536 |
|
|
|
| 537 |
|
|
writeSectionEnd(); // Write segment directive
|
| 538 |
|
|
}
|
| 539 |
|
|
}
|
| 540 |
|
|
}
|
| 541 |
|
|
|
| 542 |
|
|
|
| 543 |
|
|
|
| 544 |
|
|
/******************** Explanation of tracer: ***************************
|
| 545 |
|
|
|
| 546 |
|
|
This is a machine which can trace the contents of each register in certain
|
| 547 |
|
|
situations. It is currently used for recognizing pointers to jump tables
|
| 548 |
|
|
in order to identify jump tables (to do!)
|
| 549 |
|
|
*/
|
| 550 |
|
|
void CDisassembler::updateTracer() {
|
| 551 |
|
|
// Trace register values. See explanation above
|
| 552 |
|
|
}
|
| 553 |
|
|
|
| 554 |
|
|
|
| 555 |
|
|
void CDisassembler::updateSymbols() {
|
| 556 |
|
|
// Find unnamed symbols, determine symbol types,
|
| 557 |
|
|
// update symbol list, call checkJumpTarget if jump/call.
|
| 558 |
|
|
// This function is called during pass 1 for every instruction
|
| 559 |
|
|
uint32_t relSource = 0; // Position of relocated field
|
| 560 |
|
|
|
| 561 |
|
|
if (fInstr->category == 4 && fInstr->jumpSize) {
|
| 562 |
|
|
// Self-relative jump instruction. Check OPJ
|
| 563 |
|
|
// uint32_t opj = (instrLength == 1) ? pInstr->a.op1 : pInstr->b[0]; // Jump instruction opcode
|
| 564 |
|
|
// Check if there is a relocation here
|
| 565 |
|
|
relSource = iInstr + (fInstr->jumpPos); // Position of relocated field
|
| 566 |
|
|
ElfFwcReloc rel;
|
| 567 |
|
|
rel.r_offset = relSource;
|
| 568 |
|
|
rel.r_section = section;
|
| 569 |
|
|
rel.r_addend = 0;
|
| 570 |
|
|
if (relocations.findFirst(rel) < 0) {
|
| 571 |
|
|
// There is no relocation. Target must be in the same section. Find target
|
| 572 |
|
|
int32_t offset = 0;
|
| 573 |
|
|
switch (fInstr->jumpSize) { // Read offset of correct size
|
| 574 |
|
|
case 1: // 8 bit
|
| 575 |
|
|
offset = *(int8_t*)(sectionBuffer + relSource);
|
| 576 |
|
|
rel.r_type = R_FORW_8 | 0x80000000; // add 0x80000000 to remember that this is not a real relocation
|
| 577 |
|
|
break;
|
| 578 |
|
|
case 2: // 16 bit
|
| 579 |
|
|
offset = *(int16_t*)(sectionBuffer + relSource);
|
| 580 |
|
|
rel.r_type = R_FORW_16 | 0x80000000;
|
| 581 |
|
|
break;
|
| 582 |
|
|
case 3: // 24 bit. Sign extend to 32 bits
|
| 583 |
|
|
offset = *(int32_t*)(sectionBuffer + relSource) << 8 >> 8;
|
| 584 |
|
|
rel.r_type = R_FORW_24 | 0x80000000;
|
| 585 |
|
|
break;
|
| 586 |
|
|
case 4: // 32 bit
|
| 587 |
|
|
offset = *(int32_t*)(sectionBuffer + relSource);
|
| 588 |
|
|
rel.r_type = R_FORW_32 | 0x80000000;
|
| 589 |
|
|
break;
|
| 590 |
|
|
}
|
| 591 |
|
|
// Scale offset by 4 and add offset to end of instruction
|
| 592 |
|
|
int32_t target = iInstr + instrLength * 4 + offset * 4;
|
| 593 |
|
|
|
| 594 |
|
|
// Add a symbol at target address if none exists
|
| 595 |
|
|
ElfFwcSym sym;
|
| 596 |
|
|
zeroAllMembers(sym);
|
| 597 |
|
|
sym.st_bind = STB_LOCAL;
|
| 598 |
|
|
sym.st_other = STV_EXEC;
|
| 599 |
|
|
sym.st_section = section;
|
| 600 |
|
|
sym.st_value = (uint64_t)(int64_t)target;
|
| 601 |
|
|
symbolExeAddress(sym);
|
| 602 |
|
|
int32_t symi = symbols.findFirst(sym);
|
| 603 |
|
|
if (symi < 0) {
|
| 604 |
|
|
symi = newSymbols.push(sym); // Add symbol to new symbols table
|
| 605 |
|
|
symi |= 0x80000000; // Upper bit means index refers to newSymbols
|
| 606 |
|
|
}
|
| 607 |
|
|
// Add a dummy relocation record for this symbol.
|
| 608 |
|
|
// This relocation does not need type, scale, or addend because the only purpose is to identify the symbol.
|
| 609 |
|
|
// It does have a size, though, because this is checked later in writeRelocationTarget()
|
| 610 |
|
|
rel.r_sym = (uint32_t)symi;
|
| 611 |
|
|
relocations.addUnique(rel);
|
| 612 |
|
|
}
|
| 613 |
|
|
}
|
| 614 |
|
|
|
| 615 |
|
|
// Check if instruction has a memory reference relative to IP, DATAP, or THREADP
|
| 616 |
|
|
uint32_t basePointer = 0;
|
| 617 |
|
|
if (fInstr->mem & 2) basePointer = pInstr->a.rs;
|
| 618 |
|
|
relSource = iInstr + fInstr->addrPos; // Position of relocated field
|
| 619 |
|
|
|
| 620 |
|
|
if (fInstr->addrSize > 1 && basePointer >= 28 && basePointer <= 30 && !(fInstr->mem & 0x20)) {
|
| 621 |
|
|
// Memory operand is relative to THREADP, DATAP or IP
|
| 622 |
|
|
// Check if there is a relocation here
|
| 623 |
|
|
uint32_t relpos = iInstr + fInstr->addrPos;
|
| 624 |
|
|
ElfFwcReloc rel;
|
| 625 |
|
|
rel.r_offset = relpos;
|
| 626 |
|
|
rel.r_section = section;
|
| 627 |
|
|
rel.r_type = (operandType | 0x80) << 24;
|
| 628 |
|
|
uint32_t nrel, irel = 0;
|
| 629 |
|
|
nrel = relocations.findAll(&irel, rel);
|
| 630 |
|
|
if (nrel > 1) writeWarning("Overlapping relocations here");
|
| 631 |
|
|
if (nrel) {
|
| 632 |
|
|
// Relocation found. Put the data type into the relocation record.
|
| 633 |
|
|
// The data type will later be transferred to the symbol record in joinSymbolTables()
|
| 634 |
|
|
if (!(relocations[irel].r_type & 0x80000000)) {
|
| 635 |
|
|
// Save target data type in upper 8 bits of r_type
|
| 636 |
|
|
relocations[irel].r_type = (relocations[irel].r_type & 0x00FFFFFF) | (operandType /*| 0x80*/) << 24;
|
| 637 |
|
|
}
|
| 638 |
|
|
// Check if the target is a section + offset
|
| 639 |
|
|
uint32_t symi = relocations[irel].r_sym;
|
| 640 |
|
|
if (symi < symbols.numEntries() && symbols[symi].st_type == STT_SECTION && relocations[irel].r_addend > 0) {
|
| 641 |
|
|
// Add a new symbol at this address
|
| 642 |
|
|
ElfFwcSym sym;
|
| 643 |
|
|
zeroAllMembers(sym);
|
| 644 |
|
|
sym.st_bind = STB_LOCAL;
|
| 645 |
|
|
sym.st_other = STT_OBJECT;
|
| 646 |
|
|
sym.st_section = symbols[symi].st_section;
|
| 647 |
|
|
sym.st_value = symbols[symi].st_value + (int64_t)relocations[irel].r_addend;
|
| 648 |
|
|
symbolExeAddress(sym);
|
| 649 |
|
|
uint32_t symi2 = newSymbols.push(sym);
|
| 650 |
|
|
relocations[irel].r_sym = symi2 | 0x80000000; // Upper bit means index refers to newSymbols
|
| 651 |
|
|
relocations[irel].r_addend = 0;
|
| 652 |
|
|
}
|
| 653 |
|
|
}
|
| 654 |
|
|
else if (basePointer == REG_IP >> 16 && fInstr->addrSize > 1 && !(fInstr->mem & 0x20)) {
|
| 655 |
|
|
// No relocation found. Insert new relocation and new symbol
|
| 656 |
|
|
// This fits the address instruction with a local IP target.
|
| 657 |
|
|
// to do: Make it work for other cases
|
| 658 |
|
|
|
| 659 |
|
|
// Add a symbol at target address if none exists
|
| 660 |
|
|
int32_t target = iInstr + instrLength * 4;
|
| 661 |
|
|
switch (fInstr->addrSize) { // Read offset of correct size
|
| 662 |
|
|
/* case 1: // 8 bit. cannot use IP
|
| 663 |
|
|
target += *(int8_t*)(sectionBuffer + relSource) << (operandType & 7);
|
| 664 |
|
|
rel.r_type = R_FORW_8 | R_FORW_SELFREL | 0x80000000;
|
| 665 |
|
|
break;*/
|
| 666 |
|
|
case 2: // 16 bit
|
| 667 |
|
|
target += *(int16_t*)(sectionBuffer + relSource);
|
| 668 |
|
|
rel.r_type = R_FORW_16 | R_FORW_SELFREL | 0x80000000;
|
| 669 |
|
|
break;
|
| 670 |
|
|
case 4: // 32 bit
|
| 671 |
|
|
target += *(int32_t*)(sectionBuffer + relSource);
|
| 672 |
|
|
rel.r_type = R_FORW_32 | R_FORW_SELFREL | 0x80000000;
|
| 673 |
|
|
break;
|
| 674 |
|
|
}
|
| 675 |
|
|
ElfFwcSym sym;
|
| 676 |
|
|
zeroAllMembers(sym);
|
| 677 |
|
|
sym.st_bind = STB_LOCAL;
|
| 678 |
|
|
sym.st_other = STV_EXEC;
|
| 679 |
|
|
sym.st_section = section;
|
| 680 |
|
|
sym.st_value = (uint64_t)(int64_t)target;
|
| 681 |
|
|
|
| 682 |
|
|
symbolExeAddress(sym);
|
| 683 |
|
|
int32_t symi = symbols.findFirst(sym);
|
| 684 |
|
|
if (symi < 0) {
|
| 685 |
|
|
symi = newSymbols.push(sym); // Add symbol to new symbols table
|
| 686 |
|
|
symi |= 0x80000000; // Upper bit means index refers to newSymbols
|
| 687 |
|
|
}
|
| 688 |
|
|
// Add a dummy relocation record for this symbol.
|
| 689 |
|
|
// This relocation does not need type, scale, or addend because the only purpose is to identify the symbol.
|
| 690 |
|
|
// It does have a size, though, because this is checked later in writeRelocationTarget()
|
| 691 |
|
|
rel.r_offset = (uint64_t)iInstr + fInstr->addrPos; // Position of relocated field
|
| 692 |
|
|
rel.r_section = section;
|
| 693 |
|
|
rel.r_addend = -4;
|
| 694 |
|
|
rel.r_sym = (uint32_t)symi;
|
| 695 |
|
|
relocations.addUnique(rel);
|
| 696 |
|
|
}
|
| 697 |
|
|
else if ((basePointer == REG_DATAP >> 16 || basePointer == REG_THREADP >> 16)
|
| 698 |
|
|
&& fInstr->addrSize > 1 && !(fInstr->mem & 0x20) && isExecutable) {
|
| 699 |
|
|
// No relocation found. Insert new relocation and new symbol. datap or threadp based
|
| 700 |
|
|
|
| 701 |
|
|
// Add a symbol at target address if none exists
|
| 702 |
|
|
int64_t target = fileHeader.e_datap_base;
|
| 703 |
|
|
rel.r_type = R_FORW_DATAP;
|
| 704 |
|
|
uint32_t dom = 2;
|
| 705 |
|
|
uint32_t st_other = STV_DATAP;
|
| 706 |
|
|
if (basePointer == REG_THREADP >> 16) {
|
| 707 |
|
|
target = fileHeader.e_threadp_base;
|
| 708 |
|
|
rel.r_type = R_FORW_THREADP;
|
| 709 |
|
|
dom = 3;
|
| 710 |
|
|
st_other = STV_THREADP;
|
| 711 |
|
|
}
|
| 712 |
|
|
switch (fInstr->addrSize) { // Read offset of correct size
|
| 713 |
|
|
case 1: // 8 bit
|
| 714 |
|
|
target += *(int8_t*)(sectionBuffer + relSource);
|
| 715 |
|
|
rel.r_type |= R_FORW_8 | 0x80000000;
|
| 716 |
|
|
break;
|
| 717 |
|
|
case 2: // 16 bit
|
| 718 |
|
|
target += *(int16_t*)(sectionBuffer + relSource);
|
| 719 |
|
|
rel.r_type |= R_FORW_16 | 0x80000000;
|
| 720 |
|
|
break;
|
| 721 |
|
|
case 4: // 32 bit
|
| 722 |
|
|
target += *(int32_t*)(sectionBuffer + relSource);
|
| 723 |
|
|
rel.r_type |= R_FORW_32 | 0x80000000;
|
| 724 |
|
|
break;
|
| 725 |
|
|
}
|
| 726 |
|
|
ElfFwcSym sym;
|
| 727 |
|
|
zeroAllMembers(sym);
|
| 728 |
|
|
sym.st_type = STT_OBJECT;
|
| 729 |
|
|
sym.st_bind = STB_WEAK;
|
| 730 |
|
|
sym.st_other = st_other;
|
| 731 |
|
|
sym.st_section = dom;
|
| 732 |
|
|
sym.st_value = (uint64_t)target;
|
| 733 |
|
|
|
| 734 |
|
|
int32_t symi = symbols.findFirst(sym);
|
| 735 |
|
|
if (symi < 0) {
|
| 736 |
|
|
symi = newSymbols.push(sym); // Add symbol to new symbols table
|
| 737 |
|
|
symi |= 0x80000000; // Upper bit means index refers to newSymbols
|
| 738 |
|
|
}
|
| 739 |
|
|
// Add a dummy relocation record for this symbol.
|
| 740 |
|
|
// This relocation does not need type, scale, or addend because the only purpose is to identify the symbol.
|
| 741 |
|
|
// It does have a size, though, because this is checked later in writeRelocationTarget()
|
| 742 |
|
|
rel.r_offset = iInstr + fInstr->addrPos; // Position of relocated field
|
| 743 |
|
|
rel.r_section = section;
|
| 744 |
|
|
rel.r_addend = 0;
|
| 745 |
|
|
rel.r_sym = (uint32_t)symi;
|
| 746 |
|
|
relocations.addUnique(rel);
|
| 747 |
|
|
}
|
| 748 |
|
|
}
|
| 749 |
|
|
}
|
| 750 |
|
|
|
| 751 |
|
|
|
| 752 |
|
|
void CDisassembler::followJumpTable(uint32_t symi, uint32_t RelType) {
|
| 753 |
|
|
// Check jump/call table and its targets
|
| 754 |
|
|
// to do !
|
| 755 |
|
|
}
|
| 756 |
|
|
|
| 757 |
|
|
|
| 758 |
|
|
void CDisassembler::markCodeAsDubious() {
|
| 759 |
|
|
// Remember that this may be data in a code segment
|
| 760 |
|
|
}
|
| 761 |
|
|
|
| 762 |
|
|
|
| 763 |
|
|
// List of instructionlengths, used in parseInstruction
|
| 764 |
|
|
static const uint8_t lengthList[8] = {1,1,1,1,2,2,3,4};
|
| 765 |
|
|
|
| 766 |
|
|
|
| 767 |
|
|
void CDisassembler::parseInstruction() {
|
| 768 |
|
|
// Parse one opcode at position iInstr
|
| 769 |
|
|
instructionWarning = 0;
|
| 770 |
|
|
|
| 771 |
|
|
// Get instruction
|
| 772 |
|
|
pInstr = (STemplate*)(sectionBuffer + iInstr);
|
| 773 |
|
|
|
| 774 |
|
|
// Get op1
|
| 775 |
|
|
uint8_t op = pInstr->a.op1;
|
| 776 |
|
|
|
| 777 |
|
|
// Get format
|
| 778 |
|
|
format = (pInstr->a.il << 8) + (pInstr->a.mode << 4); // Construct format = (il,mode,submode)
|
| 779 |
|
|
|
| 780 |
|
|
// Get submode
|
| 781 |
|
|
switch (format) {
|
| 782 |
|
|
case 0x200: case 0x220: case 0x300: case 0x320: // submode in mode2
|
| 783 |
|
|
format += pInstr->a.mode2;
|
| 784 |
|
|
break;
|
| 785 |
|
|
case 0x250: case 0x310: // Submode for jump instructions etc.
|
| 786 |
|
|
if (op < 8) {
|
| 787 |
|
|
format += op; op = pInstr->b[0] & 0x3F;
|
| 788 |
|
|
}
|
| 789 |
|
|
else {
|
| 790 |
|
|
format += 8;
|
| 791 |
|
|
}
|
| 792 |
|
|
break;
|
| 793 |
|
|
}
|
| 794 |
|
|
|
| 795 |
|
|
// Look up format details
|
| 796 |
|
|
static SFormat form;
|
| 797 |
|
|
fInstr = &formatList[lookupFormat(pInstr->q)]; // lookupFormat is in emulator2.cpp
|
| 798 |
|
|
format = fInstr->format2; // Include subformat depending on op1
|
| 799 |
|
|
if (fInstr->tmplate == 0xE && pInstr->a.op2 && !(fInstr->imm2 & 0x100)) {
|
| 800 |
|
|
// Single format instruction if op2 != 0 and op2 not used as immediate operand
|
| 801 |
|
|
form = *fInstr;
|
| 802 |
|
|
form.category = 1;
|
| 803 |
|
|
fInstr = &form;
|
| 804 |
|
|
}
|
| 805 |
|
|
|
| 806 |
|
|
// Get operand type
|
| 807 |
|
|
if (fInstr->ot == 0) { // Operand type determined by OT field
|
| 808 |
|
|
operandType = pInstr->a.ot; // Operand type
|
| 809 |
|
|
if (!(pInstr->a.mode & 6) && !(fInstr->vect & 0x11)) {
|
| 810 |
|
|
// Check use of M bit
|
| 811 |
|
|
format |= (operandType & 4) << 5; // Add M bit to format
|
| 812 |
|
|
operandType &= ~4; // Remove M bit from operand type
|
| 813 |
|
|
}
|
| 814 |
|
|
}
|
| 815 |
|
|
else if ((fInstr->ot & 0xF0) == 0x10) { // Operand type fixed. Value in formatList
|
| 816 |
|
|
operandType = fInstr->ot & 7;
|
| 817 |
|
|
}
|
| 818 |
|
|
else if (fInstr->ot == 0x32) { // int32 for even op1, int64 for odd op1
|
| 819 |
|
|
operandType = 2 + (pInstr->a.op1 & 1);
|
| 820 |
|
|
}
|
| 821 |
|
|
else if (fInstr->ot == 0x35) { // Float for even op1, double for odd op1
|
| 822 |
|
|
operandType = 5 + (pInstr->a.op1 & 1);
|
| 823 |
|
|
}
|
| 824 |
|
|
else {
|
| 825 |
|
|
operandType = 0; // Error in formatList. Should not occur
|
| 826 |
|
|
}
|
| 827 |
|
|
|
| 828 |
|
|
// Find instruction length
|
| 829 |
|
|
instrLength = lengthList[pInstr->i[0] >> 29]; // Length up to 3 determined by il. Length 4 by upper bit of mode
|
| 830 |
|
|
|
| 831 |
|
|
// Find any reasons for warnings
|
| 832 |
|
|
//findWarnings(p);
|
| 833 |
|
|
|
| 834 |
|
|
// Find any errors
|
| 835 |
|
|
//findErrors(p);
|
| 836 |
|
|
}
|
| 837 |
|
|
|
| 838 |
|
|
|
| 839 |
|
|
|
| 840 |
|
|
/*****************************************************************************
|
| 841 |
|
|
Functions for reading instruction list from comma-separated file,
|
| 842 |
|
|
sorting, and searching
|
| 843 |
|
|
*****************************************************************************/
|
| 844 |
|
|
|
| 845 |
|
|
// Members of class CCSVFile for reading comma-separated file
|
| 846 |
|
|
|
| 847 |
|
|
// Read and parse file
|
| 848 |
|
|
void CCSVFile::parse() {
|
| 849 |
|
|
// Sorry for the ugly code!
|
| 850 |
|
|
|
| 851 |
|
|
const char * fields[numInstructionColumns]; // pointer to each field in line
|
| 852 |
|
|
int fi = 0; // field index
|
| 853 |
|
|
uint32_t i, j; // loop counters
|
| 854 |
|
|
char * s, * t = 0; // point to begin and end of field
|
| 855 |
|
|
char c;
|
| 856 |
|
|
char separator = 0; // separator character, preferably comma
|
| 857 |
|
|
int line = 1; // line number
|
| 858 |
|
|
SInstruction record; // record constructed from line
|
| 859 |
|
|
zeroAllMembers(fields);
|
| 860 |
|
|
|
| 861 |
|
|
if (data_size==0) read(cmd.getFilename(cmd.instructionListFile), 2); // read file if it has not already been read
|
| 862 |
|
|
if (err.number()) return;
|
| 863 |
|
|
|
| 864 |
|
|
// loop through file
|
| 865 |
|
|
for (i = 0; i < data_size; i++) {
|
| 866 |
|
|
// find begin of field, quoted or not
|
| 867 |
|
|
s = (char*)buf() + i;
|
| 868 |
|
|
c = *s;
|
| 869 |
|
|
if (c == ' ') continue; // skip leading spaces
|
| 870 |
|
|
|
| 871 |
|
|
if (c == '"' || c == 0x27) { // single or double quote
|
| 872 |
|
|
fields[fi] = s+1; // begin of quoted string
|
| 873 |
|
|
for (i++; i < data_size; i++) { // search for matching end quote
|
| 874 |
|
|
t = (char*)buf() + i;
|
| 875 |
|
|
if (*t == c) {
|
| 876 |
|
|
*t = 0; i++; // End quote found. Put end of string here
|
| 877 |
|
|
goto SEARCHFORCOMMA;
|
| 878 |
|
|
}
|
| 879 |
|
|
if (*t == '\n') break; // end of line found before end quote
|
| 880 |
|
|
}
|
| 881 |
|
|
// end quote not found
|
| 882 |
|
|
err.submit(ERR_INSTRUCTION_LIST_QUOTE, line);
|
| 883 |
|
|
return;
|
| 884 |
|
|
}
|
| 885 |
|
|
if (c == '\r' || c == '\n')
|
| 886 |
|
|
goto NEXTLINE; // end of line found
|
| 887 |
|
|
if (c == separator || c == ',') {
|
| 888 |
|
|
// empty field
|
| 889 |
|
|
fields[fi] = "";
|
| 890 |
|
|
goto SEARCHFORCOMMA;
|
| 891 |
|
|
}
|
| 892 |
|
|
|
| 893 |
|
|
// Anything else: begin of unquoted string
|
| 894 |
|
|
fields[fi] = s;
|
| 895 |
|
|
// search for end of field
|
| 896 |
|
|
|
| 897 |
|
|
SEARCHFORCOMMA:
|
| 898 |
|
|
for (; i < data_size; i++) { // search for comma after field
|
| 899 |
|
|
t = (char*)buf() + i;
|
| 900 |
|
|
if (*t == separator || (separator == 0 && (*t == ',' || *t == ';' || *t == '\t'))) {
|
| 901 |
|
|
separator = *t; // separator set to the first comma, semicolon or tabulator
|
| 902 |
|
|
*t = 0; // put end of string here
|
| 903 |
|
|
goto NEXTFIELD;
|
| 904 |
|
|
}
|
| 905 |
|
|
if (*t == '\n') break; // end of line found before comma
|
| 906 |
|
|
}
|
| 907 |
|
|
fi++;
|
| 908 |
|
|
goto NEXTLINE;
|
| 909 |
|
|
|
| 910 |
|
|
NEXTFIELD:
|
| 911 |
|
|
// next field
|
| 912 |
|
|
fi++;
|
| 913 |
|
|
if (fi != numInstructionColumns) continue;
|
| 914 |
|
|
// end of last field
|
| 915 |
|
|
|
| 916 |
|
|
NEXTLINE:
|
| 917 |
|
|
for (; i < data_size; i++) { // search for end. of line
|
| 918 |
|
|
t = (char*)buf() + i;
|
| 919 |
|
|
// accept newlines as "\r", "\n", or "\r\n"
|
| 920 |
|
|
if (*t == '\r' || *t == '\n') break;
|
| 921 |
|
|
}
|
| 922 |
|
|
if (*t == '\r' && *(t+1) == '\n') i++; // end of line is two characters
|
| 923 |
|
|
*t = 0; // terminate line
|
| 924 |
|
|
|
| 925 |
|
|
// make any remaining fields blank
|
| 926 |
|
|
for (; fi < numInstructionColumns; fi++) {
|
| 927 |
|
|
fields[fi] = "";
|
| 928 |
|
|
}
|
| 929 |
|
|
// Begin next line
|
| 930 |
|
|
line++;
|
| 931 |
|
|
fi = 0;
|
| 932 |
|
|
|
| 933 |
|
|
// Check if blank or heading record
|
| 934 |
|
|
if (fields[2][0] < '0' || fields[2][0] > '9') continue;
|
| 935 |
|
|
|
| 936 |
|
|
// save values to record
|
| 937 |
|
|
// most fields are decimal or hexadecimal numbers
|
| 938 |
|
|
record.id = (uint32_t)interpretNumber(fields[1]);
|
| 939 |
|
|
record.category = (uint32_t)interpretNumber(fields[2]);
|
| 940 |
|
|
record.format = interpretNumber(fields[3]);
|
| 941 |
|
|
record.templt = (uint32_t)interpretNumber(fields[4]);
|
| 942 |
|
|
record.sourceoperands = (uint32_t)interpretNumber(fields[6]);
|
| 943 |
|
|
record.op1 = (uint32_t)interpretNumber(fields[7]);
|
| 944 |
|
|
record.op2 = (uint32_t)interpretNumber(fields[8]);
|
| 945 |
|
|
record.optypesgp = (uint32_t)interpretNumber(fields[9]);
|
| 946 |
|
|
record.optypesscalar = (uint32_t)interpretNumber(fields[10]);
|
| 947 |
|
|
record.optypesvector = (uint32_t)interpretNumber(fields[11]);
|
| 948 |
|
|
// interpret immediate operand
|
| 949 |
|
|
if (tolower(fields[12][0]) == 'i') {
|
| 950 |
|
|
// implicit immediate operand. value is prefixed by 'i'. Get value
|
| 951 |
|
|
record.implicit_imm = (uint32_t)interpretNumber(fields[12]+1);
|
| 952 |
|
|
record.opimmediate = OPI_IMPLICIT;
|
| 953 |
|
|
}
|
| 954 |
|
|
else {
|
| 955 |
|
|
// immediate operand type
|
| 956 |
|
|
record.opimmediate = (uint8_t)interpretNumber(fields[12]);
|
| 957 |
|
|
}
|
| 958 |
|
|
// interpret template variant
|
| 959 |
|
|
record.variant = interpretTemplateVariants(fields[5]);
|
| 960 |
|
|
// copy instruction name
|
| 961 |
|
|
for (j = 0; j < sizeof(record.name)-1; j++) {
|
| 962 |
|
|
c = fields[0][j];
|
| 963 |
|
|
if (c == 0) break;
|
| 964 |
|
|
record.name[j] = tolower(c);
|
| 965 |
|
|
}
|
| 966 |
|
|
record.name[j] = 0;
|
| 967 |
|
|
|
| 968 |
|
|
// add record to list
|
| 969 |
|
|
instructionlist.push(record);
|
| 970 |
|
|
}
|
| 971 |
|
|
}
|
| 972 |
|
|
|
| 973 |
|
|
// Interpret number in instruction list
|
| 974 |
|
|
uint64_t CCSVFile::interpretNumber(const char * text) {
|
| 975 |
|
|
uint32_t error = 0;
|
| 976 |
|
|
uint64_t result = uint64_t(::interpretNumber(text, 64, &error));
|
| 977 |
|
|
if (error) err.submit(ERR_INSTRUCTION_LIST_SYNTAX, text);
|
| 978 |
|
|
return result;
|
| 979 |
|
|
}
|
| 980 |
|
|
|
| 981 |
|
|
|
| 982 |
|
|
// Interpret a string with a decimal, binary, octal, or hexadecimal number
|
| 983 |
|
|
int64_t interpretNumber(const char * text, uint32_t maxLength, uint32_t * error) {
|
| 984 |
|
|
int state = 0; // 0: begin, 1: after 0,
|
| 985 |
|
|
// 2: after 0x, 3: after 0b, 4: after 0o
|
| 986 |
|
|
// 5: after decimal digit, 6: trailing space
|
| 987 |
|
|
uint64_t number = 0;
|
| 988 |
|
|
uint8_t c, clower, digit;
|
| 989 |
|
|
bool sign = false;
|
| 990 |
|
|
uint32_t i;
|
| 991 |
|
|
*error = 0;
|
| 992 |
|
|
if (text == 0) {
|
| 993 |
|
|
*error = 1; return number;
|
| 994 |
|
|
}
|
| 995 |
|
|
|
| 996 |
|
|
for (i = 0; i < maxLength; i++) {
|
| 997 |
|
|
c = text[i]; // read character
|
| 998 |
|
|
clower = c | 0x20; // convert to lower case
|
| 999 |
|
|
if (clower == 'x') {
|
| 1000 |
|
|
if (state != 1) {
|
| 1001 |
|
|
*error = 1; return 0;
|
| 1002 |
|
|
}
|
| 1003 |
|
|
state = 2;
|
| 1004 |
|
|
}
|
| 1005 |
|
|
else if (clower == 'o') {
|
| 1006 |
|
|
if (state != 1) {
|
| 1007 |
|
|
*error = 1; return 0;
|
| 1008 |
|
|
}
|
| 1009 |
|
|
state = 4;
|
| 1010 |
|
|
}
|
| 1011 |
|
|
else if (clower == 'b' && state == 1) {
|
| 1012 |
|
|
state = 3;
|
| 1013 |
|
|
}
|
| 1014 |
|
|
else if (c >= '0' && c <= '9') {
|
| 1015 |
|
|
// digit 0 - 9
|
| 1016 |
|
|
digit = c - '0';
|
| 1017 |
|
|
switch (state) {
|
| 1018 |
|
|
case 0:
|
| 1019 |
|
|
state = (digit == 0) ? 1 : 5;
|
| 1020 |
|
|
number = digit;
|
| 1021 |
|
|
break;
|
| 1022 |
|
|
case 1:
|
| 1023 |
|
|
state = 5;
|
| 1024 |
|
|
// continue in case 5:
|
| 1025 |
|
|
case 5:
|
| 1026 |
|
|
// decimal
|
| 1027 |
|
|
number = number * 10 + digit;
|
| 1028 |
|
|
break;
|
| 1029 |
|
|
case 2:
|
| 1030 |
|
|
// hexadecimal
|
| 1031 |
|
|
number = number * 16 + digit;
|
| 1032 |
|
|
break;
|
| 1033 |
|
|
case 3:
|
| 1034 |
|
|
// binary
|
| 1035 |
|
|
if (digit > 1) {
|
| 1036 |
|
|
*error = 1; return 0;
|
| 1037 |
|
|
}
|
| 1038 |
|
|
number = number * 2 + digit;
|
| 1039 |
|
|
break;
|
| 1040 |
|
|
case 4:
|
| 1041 |
|
|
// octal
|
| 1042 |
|
|
if (digit > 7) {
|
| 1043 |
|
|
*error = 1; return 0;
|
| 1044 |
|
|
}
|
| 1045 |
|
|
number = number * 8 + digit;
|
| 1046 |
|
|
break;
|
| 1047 |
|
|
default:
|
| 1048 |
|
|
*error = 1;
|
| 1049 |
|
|
return 0;
|
| 1050 |
|
|
}
|
| 1051 |
|
|
}
|
| 1052 |
|
|
else if (clower >= 'a' && clower <= 'f') {
|
| 1053 |
|
|
// hexadecimal digit
|
| 1054 |
|
|
digit = clower - ('a' - 10);
|
| 1055 |
|
|
if (state != 2) {
|
| 1056 |
|
|
*error = 1; return 0;
|
| 1057 |
|
|
}
|
| 1058 |
|
|
number = number * 16 + digit;
|
| 1059 |
|
|
}
|
| 1060 |
|
|
else if (c == ' ' || c == '+') {
|
| 1061 |
|
|
// ignore leading or trailing blank or plus
|
| 1062 |
|
|
if (state > 0) state = 6;
|
| 1063 |
|
|
}
|
| 1064 |
|
|
else if (c == '-') {
|
| 1065 |
|
|
// change sign
|
| 1066 |
|
|
if (state != 0) {
|
| 1067 |
|
|
*error = 1; return 0;
|
| 1068 |
|
|
}
|
| 1069 |
|
|
sign = ! sign;
|
| 1070 |
|
|
}
|
| 1071 |
|
|
else if (c == 0) break; // end of string
|
| 1072 |
|
|
else if (c == ',') {
|
| 1073 |
|
|
*error = i | 0x1000; // end with comma. return position in error
|
| 1074 |
|
|
break;
|
| 1075 |
|
|
}
|
| 1076 |
|
|
else {
|
| 1077 |
|
|
// illegal character
|
| 1078 |
|
|
*error = 1; return 0;
|
| 1079 |
|
|
}
|
| 1080 |
|
|
}
|
| 1081 |
|
|
if (sign) number = uint64_t(-int64_t(number));
|
| 1082 |
|
|
return (int64_t)number;
|
| 1083 |
|
|
}
|
| 1084 |
|
|
|
| 1085 |
|
|
void CDisassembler::getLineList(CDynamicArray<SLineRef> & list) {
|
| 1086 |
|
|
// transfer lineList to debugger
|
| 1087 |
|
|
list << lineList;
|
| 1088 |
|
|
}
|
| 1089 |
|
|
|
| 1090 |
|
|
void CDisassembler::getOutFile(CTextFileBuffer & buffer) {
|
| 1091 |
|
|
// transfer outFile to debugger
|
| 1092 |
|
|
buffer.copy(outFile);
|
| 1093 |
|
|
}
|
| 1094 |
|
|
|