OpenCores
URL https://opencores.org/ocsvn/forwardcom/forwardcom/trunk

Subversion Repositories forwardcom

[/] [forwardcom/] [bintools/] [disasm1.cpp] - Blame information for rev 164

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 49 Agner
/****************************  disasm1.cpp   ********************************
2
* Author:        Agner Fog
3
* Date created:  2017-04-26
4
* Last modified: 2021-03-30
5
* Version:       1.11
6
* Project:       Binary tools for ForwardCom instruction set
7
* Module:        disassem.h
8
* Description:   Disassembler
9
* Disassembler for ForwardCom
10
*
11
* Copyright 2007-2021 GNU General Public License http://www.gnu.org/licenses
12
*****************************************************************************/
13
#include "stdafx.h"
14
 
15
 
16
uint64_t interpretTemplateVariants(const char * s) {
17
    // Interpret template variants in instruction record
18
    // The return value is a combination of bits for each variant option
19
    // These bits are defined as constants VARIANT_D0, etc., in disassem.h
20
    uint64_t v = 0;
21
    for (int i = 0; i < 8; i++) {          // Loop through string
22
        char c = toupper(s[i]), d = toupper(s[i+1]);
23
        switch (c) {
24
        case 0:
25
            return v;                      // End of string
26
        case 'D':
27
            if (d == '0') v |= VARIANT_D0; // D0
28
            if (d == '1') v |= VARIANT_D1; // D1
29
            if (d == '2') v |= VARIANT_D2; // D2
30
            if (d == '3') v |= VARIANT_D3; // D3
31
            continue;
32
        case 'F':
33
            if (d == '0') v |= VARIANT_F0; // F0
34
            if (d == '1') v |= VARIANT_F1; // F1
35
            continue;
36
        case 'M':
37
            if (d == '0') v |= VARIANT_M0; // M0
38
            //if (d == '1') v |= VARIANT_M1; // M1. No longer used
39
            continue;
40
        case 'R':
41
            if (d == '0') v |= VARIANT_R0; // R0
42
            if (d == '1') v |= VARIANT_R1; // R1
43
            if (d == '2') v |= VARIANT_R2; // R2
44
            if (d == '3') v |= VARIANT_R3; // R3
45
            if (d == 'L') v |= VARIANT_RL; // RL
46
            i++;
47
            continue;
48
        case 'I':
49
            if (d == '2') v |= VARIANT_I2; // I2
50
            continue;
51
 
52
        case 'O':
53
            if (d > '0' && d < '7') v |= (d - '0') << 24;    // O1 - O6
54
            continue;
55
        case 'U':
56
            if (d == '0') v |= VARIANT_U0; // U0
57
            if (d == '3') v |= VARIANT_U3; // U3
58
            continue;
59
        case 'H':
60
            if (d == '0') v |= VARIANT_H0; // H0
61
            continue;
62
        case 'X':
63
            v |= uint64_t(((d-'0') & 0xF) | 0x10) << 32; // X0 - X9
64
            continue;
65
        case 'Y':
66
            v |= uint64_t(((d-'0') & 0xF) | 0x20) << 32; // Y0 - Y9
67
            continue;
68
        }
69
    }
70
    return v;
71
}
72
 
73
 
74
void CDisassembler::sortSymbolsAndRelocations() {
75
    // Sort symbols by address. This is useful when symbol labels are written out
76
    uint32_t i;                                            // loop counter
77
    // The values of st_reguse1 and st_reguse2 are no longer needed after these values have been written out.
78
    // Save old index in st_reguse1. 
79
    // Set st_reguse2 to zero, it is used later for data type
80
 
81
    for (i = 0; i < symbols.numEntries(); i++) {
82
        symbols[i].st_reguse1 = i;
83
        symbols[i].st_reguse2 = 0;
84
        // symbols are grouped by section in object files, by base pointer in executable files
85
        if (isExecutable) symbolExeAddress(symbols[i]);
86
    }
87
    // Sort symbols by address
88
    symbols.sort();
89
 
90
    // Add dummy empty symbol number 0
91
    ElfFwcSym nulsymbol = {0,0,0,0,0,0,0,0,0};
92
    symbols.addUnique(nulsymbol);
93
 
94
    // Update all relocations to the new symbol indexes
95
    // Translate old to new symbol index in all relocation records
96
    // Allocate array for translating  old to new symbol index
97
    CDynamicArray<uint32_t> old2newSymbolIndex;
98
    old2newSymbolIndex.setNum(symbols.numEntries());
99
 
100
    // Make translation table
101
    for (i = 0; i < symbols.numEntries(); i++) {
102
        uint32_t oldindex = symbols[i].st_reguse1;
103
        if (oldindex < symbols.numEntries()) {
104
            old2newSymbolIndex[oldindex] = i;
105
        }
106
    }
107
 
108
    // Translate all symbol indices in relocation records
109
    for (i = 0; i < relocations.numEntries(); i++) {
110
        if (relocations[i].r_sym < old2newSymbolIndex.numEntries()) {
111
            relocations[i].r_sym = old2newSymbolIndex[relocations[i].r_sym];
112
        }
113
        else relocations[i].r_sym = 0; // index out of range!
114
        if ((relocations[i].r_type & R_FORW_RELTYPEMASK) == R_FORW_REFP) {
115
            // relocation record has an additional reference point
116
            // bit 30 indicates relocation used OK
117
            uint32_t refsym = relocations[i].r_refsym & ~0x40000000;
118
            if (refsym < old2newSymbolIndex.numEntries()) {
119
                relocations[i].r_refsym = old2newSymbolIndex[refsym] | (relocations[i].r_refsym & 0x40000000);
120
            }
121
            else relocations[i].r_refsym = 0; // index out of range
122
        }
123
    }
124
 
125
    // Sort relocations by address
126
    relocations.sort();
127
}
128
 
129
// Translate symbol address from section:offset to pointerbase:address
130
void CDisassembler::symbolExeAddress(ElfFwcSym & sym) {
131
    // use this translation only when disassembling executable files
132
    if (!isExecutable) return;
133
 
134
    // section
135
    uint32_t sec =  sym.st_section;
136
    if (sec && sec < sectionHeaders.numEntries()) {
137
        uint32_t flags = (uint32_t)sectionHeaders[sec].sh_flags;
138
        // get base pointer
139
        switch (flags & SHF_BASEPOINTER) {
140
        case SHF_IP:
141
            sym.st_section = 1;  break;
142
        case SHF_DATAP:
143
            sym.st_section = 2;  break;
144
        case SHF_THREADP:
145
            sym.st_section = 3;  break;
146
        default:
147
            sym.st_section = 0;  break;
148
        }
149
        sym.st_value += sectionHeaders[sec].sh_addr;
150
    }
151
}
152
 
153
 
154
// Join the tables: symbols and newSymbols
155
void CDisassembler::joinSymbolTables() {
156
    /* There are two symbol tables: 'symbols' and 'newSymbols'.
157
    'symbols' contains the symbols that were in the original file. This table is sorted
158
    by address in sortSymbolsAndRelocations() in order to make it easy to find a symbol
159
    at a given address.
160
    'newSymbols' contains new symbols that were created during pass 1. It is not sorted.
161
    The reason why we have two symbol tables is that the symbol indexes would change if
162
    we add to the 'symbols' table during pass 1 and keep it sorted. We need to have
163
    consistent indexes during pass 1 in order to access symbols by their index. Likewise,
164
    'newSymbols' is not sorted because indexes would change when new symbols are added to it.
165
    'newSymbols' may contain dublets because it is not sorted so dublets are not detected
166
    when new symbols are added.
167
    'joinSymbolTables()' is called after pass 1 when we are finished making new symbols.
168
    This function joins the two tables together, removes any dublets, updates symbol indexes
169
    in all relocation records, and tranfers data type information from relocation records
170
    to symbol records.
171
    */
172
    uint32_t r;                                            // Relocation index
173
    uint32_t s;                                            // Symbol index
174
    uint32_t newsymi;                                      // Symbol index in newSymbols
175
    uint32_t newsymi2;                                     // Index of new symbol after transfer to symbols table
176
    uint32_t symTempIndex = symbols.numEntries();       // Temporary index of symbol after transfer
177
 
178
    // Remember index of each symbol before adding new symbols and reordering
179
    for (s = 0; s < symbols.numEntries(); s++) {
180
        symbols[s].st_reguse1 = s;
181
    }
182
 
183
    // Loop through relocations to find references to new symbols
184
    for (r = 0; r < relocations.numEntries(); r++) {
185
        if (relocations[r].r_sym & 0x80000000) {           // Refers to newSymbols table
186
            newsymi = relocations[r].r_sym & ~0x80000000;
187
            if (newsymi < newSymbols.numEntries()) {
188
                // Put symbol into old table if no equivalent symbol exists here
189
                newsymi2 = symbols.addUnique(newSymbols[newsymi]);
190
                // Give it a temporary index if it doesn't have one
191
                if (symbols[newsymi2].st_reguse1 == 0) symbols[newsymi2].st_reguse1 = symTempIndex++;
192
                // update reference in relocation record to temporary index
193
                relocations[r].r_sym = symbols[newsymi2].st_reguse1;
194
            }
195
        }
196
        // Do the same with any reference point
197
        if ((relocations[r].r_type & R_FORW_RELTYPEMASK) == R_FORW_REFP && relocations[r].r_refsym & 0x80000000) {
198
            newsymi = relocations[r].r_refsym & ~0xC0000000;
199
            if (newsymi < newSymbols.numEntries()) {
200
                // Put symbol into old table if no equivalent symbol exists here
201
                newsymi2 = symbols.addUnique(newSymbols[newsymi]);
202
                // Give it a temporary index if it doesn't have one
203
                if (symbols[newsymi2].st_reguse1 == 0) symbols[newsymi2].st_reguse1 = symTempIndex++;
204
                // update reference in relocation record to temporary index
205
                relocations[r].r_refsym = symbols[newsymi2].st_reguse1 | (relocations[r].r_refsym & 0x40000000);
206
            }
207
        }
208
    }
209
    // Make symbol index translation table
210
    CDynamicArray<uint32_t> old2newSymbolIndex;
211
    old2newSymbolIndex.setNum(symbols.numEntries());
212
    for (s = 0; s < symbols.numEntries(); s++) {
213
        uint32_t oldsymi = symbols[s].st_reguse1;
214
        if (oldsymi < old2newSymbolIndex.numEntries()) {
215
            old2newSymbolIndex[oldsymi] = s;
216
        }
217
    }
218
    // Update indexes in relocation records
219
    for (r = 0; r < relocations.numEntries(); r++) {
220
        if (relocations[r].r_sym < old2newSymbolIndex.numEntries()) { // Refers to newSymbols table
221
            relocations[r].r_sym = old2newSymbolIndex[relocations[r].r_sym];
222
            // Give the symbol a data type from relocation record if it doesn't have one
223
            if (symbols[relocations[r].r_sym].st_reguse2 == 0) {
224
                symbols[relocations[r].r_sym].st_reguse2 = relocations[r].r_type >> 8;
225
            }
226
        }
227
        // Do the same with any reference point
228
        uint32_t refsym = relocations[r].r_refsym & ~0xC0000000;
229
        if ((relocations[r].r_type & R_FORW_RELTYPEMASK) == R_FORW_REFP && refsym < old2newSymbolIndex.numEntries()) {
230
            relocations[r].r_refsym = old2newSymbolIndex[refsym] | (relocations[r].r_refsym & 0x40000000);
231
        }
232
    }
233
}
234
 
235
 
236
void CDisassembler::assignSymbolNames() {
237
    // Assign names to symbols that do not have a name
238
    uint32_t i;                                            // New symbol index
239
    uint32_t numDigits;                                    // Number of digits in new symbol names
240
    char name[64];                                         // sectionBuffer for making symbol name
241
    static char format[64];
242
    uint32_t unnamedNum = 0;                               // Number of unnamed symbols
243
    //uint32_t addMoreSymbols = 0;                           // More symbols need to be added
244
 
245
    // Find necessary number of digits
246
    numDigits = 3; i = symbols.numEntries();
247
    while (i >= 1000) {
248
        i /= 10;
249
        numDigits++;
250
    }
251
 
252
    // format string for symbol names
253
    sprintf(format, "%s%c0%i%c", "@_", '%', numDigits, 'i');
254
 
255
    // Loop through symbols
256
    for (i = 1; i < symbols.numEntries(); i++) {
257
        if (symbols[i].st_name == 0 ) {
258
            // Symbol has no name. Make one
259
            sprintf(name, format, ++unnamedNum);
260
            // Store new name
261
            symbols[i].st_name = stringBuffer.pushString(name);
262
        }
263
    }
264
 
265
#if 0  //!!
266
    // For debugging: list all symbols
267
    printf("\n\nSymbols:");
268
    for (i = 0; i < symbols.numEntries(); i++) {
269
        printf("\n%3X %3X %s sect %i offset %X type %X size %i Scope %i",
270
            i, symbols[i].st_name, stringBuffer.buf() +  symbols[i].st_name,
271
            symbols[i].st_section, (uint32_t)symbols[i].st_value, symbols[i].st_type,
272
            (uint32_t)symbols[i].st_unitsize, symbols[i].st_other);
273
        if (symbols[i].st_reguse2) printf(" Type %X", symbols[i].st_reguse2);
274
    }
275
#endif
276
#if 0
277
    // For debugging: list all relocations
278
    printf("\n\nRelocations:");
279
    for (uint32_t i = 0; i < relocations.numEntries(); i++) {
280
        printf("\nsect %i, os %X, type %X, sym %i, add %X, refsym %X",
281
            (uint32_t)(relocations[i].r_section), (uint32_t)relocations[i].r_offset, relocations[i].r_type,
282
            relocations[i].r_sym, relocations[i].r_addend, relocations[i].r_refsym);
283
    }
284
#endif
285
}
286
 
287
 
288
 
289
/**************************  class CDisassembler  *****************************
290
Members of class CDisassembler
291
Members that relate to file output are in disasm2.cpp
292
******************************************************************************/
293
 
294
CDisassembler::CDisassembler() {
295
    // Constructor. Initialize variables
296
    pass = 0;
297
    nextSymbol = 0;
298
    currentFunction = 0;
299
    currentFunctionEnd = 0;
300
    debugMode = 0;
301
    outputFile = cmd.outputFile;
302
    checkFormatListIntegrity();
303
};
304
 
305
void CDisassembler::initializeInstructionList() {
306
    // Read and initialize instruction list and sort it by category, format, and op1
307
    CCSVFile instructionListFile;
308
    instructionListFile.read(cmd.getFilename(cmd.instructionListFile), CMDL_FILE_SEARCH_PATH);  // Filename of list of instructions
309
    instructionListFile.parse();                 // Read and interpret instruction list file
310
    instructionlist << instructionListFile.instructionlist; // Transfer instruction list to my own container
311
    instructionlist.sort();                      // Sort list, using sort order defined by SInstruction2
312
}
313
 
314
// Read instruction list, split ELF file into components
315
void CDisassembler::getComponents1() {
316
    // Check code integrity
317
    checkFormatListIntegrity();
318
 
319
    // Read instruction list
320
    initializeInstructionList();
321
 
322
    // Split ELF file into containers
323
    split();
324
}
325
 
326
// Read instruction list, get ELF components for assembler output listing
327
void CDisassembler::getComponents2(CELF const & assembler, CMemoryBuffer const & instructList) {
328
    // This function replaces getComponents1() when making an output listing for the assembler
329
    // list file name from command line
330
 
331
    // copy containers from assembler outFile
332
    sectionHeaders.copy(assembler.getSectionHeaders());
333
    symbols.copy(assembler.getSymbols());
334
    relocations.copy(assembler.getRelocations());
335
    stringBuffer.copy(assembler.getStringBuffer());
336
    dataBuffer.copy(assembler.getDataBuffer());
337
    // Copy instruction list from assembler to avoid reading the csv file again.
338
    // Use the unsorted list to make sure the preferred name for an instuction comes first, in case there are alias names
339
    instructionlist.copy(instructList);
340
    instructionlist.sort();  // Sort list, using the sort order needed by the disassembler as defined by SInstruction2
341
}
342
 
343
 
344
// Do the disassembly
345
void CDisassembler::go() {
346
    // set tabulator stops
347
    setTabStops();
348
 
349
    // write feedback to console
350
    feedBackText1();
351
 
352
    // is this an executable or object file
353
    isExecutable = fileHeader.e_type == ET_EXEC;
354
 
355
    // Begin writing output file
356
    writeFileBegin();
357
 
358
    // Sort symbols by address
359
    sortSymbolsAndRelocations();
360
 
361
    // pass 1: Find symbols types and unnamed symbols
362
    pass = 1;
363
    pass1();
364
 
365
    if (pass & 0x10) {
366
        // Repetition of pass 1 requested
367
        pass = 2;
368
        pass1();
369
    }
370
 
371
    // Join the tables: symbols and newSymbols;
372
    joinSymbolTables();
373
 
374
    // put names on unnamed symbols
375
    assignSymbolNames();
376
 
377
    // pass 2: Write all sections to output file
378
    pass = 0x100;
379
    pass2();
380
 
381
    // Check for illegal entries in symbol table and relocations table
382
    finalErrorCheck();
383
 
384
    // Finish writing output file
385
    writeFileEnd();
386
 
387
    // write output file
388
    if (outputFile && !debugMode) outFile.write(cmd.getFilename(outputFile));
389
}
390
 
391
// write feedback text on stdout
392
void CDisassembler::feedBackText1() {
393
    if (cmd.verbose && cmd.job == CMDL_JOB_DIS) {
394
        // Tell what we are doing:
395
        printf("\nDisassembling %s to %s", cmd.getFilename(cmd.inputFile), cmd.getFilename(outputFile));
396
    }
397
}
398
 
399
 
400
void CDisassembler::pass1() {
401
 
402
    /*             pass 1: does the following jobs:
403
    --------------------------------
404
 
405
    * Scans all code sections, instruction by instruction.
406
 
407
    * Follows all references to data in order to determine data type for
408
    each data symbol.
409
 
410
    * Assigns symbol table entries for all jump and call targets that do not
411
    allready have a name.
412
 
413
    * Identifies and analyzes tables of jump addresses and call addresses,
414
    e.g. switch/case tables and virtual function tables. (to do !)
415
 
416
    * Tries to identify any data in the code section.
417
 
418
    */
419
    //uint32_t sectionType;
420
 
421
    // Loop through sections, pass 1
422
    for (section = 1; section < sectionHeaders.numEntries(); section++) {
423
 
424
        // Get section type
425
        //sectionType = sectionHeaders[section].sh_type;
426
        codeMode = (sectionHeaders[section].sh_flags & SHF_EXEC) ? 1 : 4;
427
 
428
        sectionBuffer = dataBuffer.buf() + sectionHeaders[section].sh_offset;
429
        sectionEnd = (uint32_t)sectionHeaders[section].sh_size;
430
 
431
        if (codeMode < 4) {
432
            // This is a code section
433
 
434
            sectionAddress = sectionHeaders[section].sh_addr;
435
            if (sectionEnd == 0) continue;
436
 
437
            iInstr = 0;
438
 
439
            // Loop through instructions
440
            while (iInstr < sectionEnd) {
441
 
442
                // Check if code not dubious
443
                if (codeMode == 1) {
444
 
445
                    parseInstruction();                    // Parse instruction
446
 
447
                    updateSymbols();                       // Detect symbol types for operands of this instruction
448
 
449
                    updateTracer();                        // Trace register values
450
 
451
                    iInstr += instrLength * 4;             // Next instruction
452
                }
453
                else {
454
                  //  iEnd = labelEnd;
455
                }
456
            }
457
        }
458
    }
459
}
460
 
461
 
462
void CDisassembler::pass2() {
463
 
464
    /*             pass 2: does the following jobs:
465
    --------------------------------
466
 
467
    * Scans through all sections, code and data.
468
 
469
    * Outputs warnings for suboptimal instruction codes and error messages
470
    for erroneous code and erroneous relocations.
471
 
472
    * Outputs disassembly of all instructions, operands and relocations,
473
    followed by the binary code listing as comment.
474
 
475
    * Outputs disassembly of all data, followed by alternative representations
476
    as comment.
477
    */
478
 
479
    //uint32_t sectionType;
480
 
481
    // Loop through sections, pass 2
482
    for (section = 1; section < sectionHeaders.numEntries(); section++) {
483
 
484
        // Get section type
485
        //sectionType = sectionHeaders[section].sh_type;
486
        codeMode = (sectionHeaders[section].sh_flags & SHF_EXEC) ? 1 : 4;
487
 
488
        // Initialize code parser
489
        sectionBuffer = dataBuffer.buf() + sectionHeaders[section].sh_offset;
490
        sectionEnd = (uint32_t)sectionHeaders[section].sh_size;
491
        sectionAddress = sectionHeaders[section].sh_addr;
492
 
493
        writeSectionBegin();                               // Write segment directive
494
 
495
        if (codeMode < 4) {
496
            // This is a code section
497
            if (sectionEnd == 0) continue;
498
            iInstr = 0;
499
 
500
            // Loop through instructions
501
            while (iInstr < sectionEnd) {
502
 
503
                if (debugMode) {
504
                    // save cross reference
505
                    SLineRef xref = { iInstr + sectionAddress, 1, outFile.dataSize() };
506
                    lineList.push(xref);
507
                    writeAddress();
508
                }
509
                writeLabels();                             // Find any label here
510
 
511
                // Check if code not dubious
512
                if (codeMode == 1) {
513
 
514
                    parseInstruction();                    // Parse instruction
515
 
516
                    writeInstruction();                    // Write instruction
517
 
518
                    iInstr += instrLength * 4;             // Next instruction
519
 
520
                }
521
                else {
522
                    // This is data Skip to next label                                            
523
                }
524
            }
525
            writeSectionEnd();                             // Write segment directive
526
        }
527
        else {
528
            // This is a data section
529
            pInstr = 0; iRecord = 0; fInstr = 0;           // Set invalid pointers to zero
530
            operandType = 2;                               // Default data type is int32
531
            instrLength = 4;                               // Default data size is 4 bytes
532
            iInstr = 0;                                    // Instruction position
533
            nextSymbol = 0;
534
 
535
            writeDataItems();                              // Loop through data. Write data
536
 
537
            writeSectionEnd();                             // Write segment directive
538
        }
539
    }
540
}
541
 
542
 
543
 
544
/********************  Explanation of tracer:  ***************************
545
 
546
This is a machine which can trace the contents of each register in certain
547
situations. It is currently used for recognizing pointers to jump tables
548
in order to identify jump tables (to do!)
549
*/
550
void CDisassembler::updateTracer() {
551
    // Trace register values. See explanation above
552
}
553
 
554
 
555
void CDisassembler::updateSymbols() {
556
    // Find unnamed symbols, determine symbol types,
557
    // update symbol list, call checkJumpTarget if jump/call.
558
    // This function is called during pass 1 for every instruction
559
    uint32_t relSource = 0; // Position of relocated field
560
 
561
    if (fInstr->category == 4 && fInstr->jumpSize) {
562
        // Self-relative jump instruction. Check OPJ
563
        // uint32_t opj = (instrLength == 1) ? pInstr->a.op1 : pInstr->b[0]; // Jump instruction opcode
564
        // Check if there is a relocation here
565
        relSource = iInstr + (fInstr->jumpPos); // Position of relocated field
566
        ElfFwcReloc rel;
567
        rel.r_offset = relSource;
568
        rel.r_section = section;
569
        rel.r_addend = 0;
570
        if (relocations.findFirst(rel) < 0) {
571
            // There is no relocation. Target must be in the same section. Find target
572
            int32_t offset = 0;
573
            switch (fInstr->jumpSize) {                // Read offset of correct size
574
            case 1:      // 8 bit
575
                offset = *(int8_t*)(sectionBuffer + relSource);
576
                rel.r_type = R_FORW_8 | 0x80000000;  //  add 0x80000000 to remember that this is not a real relocation
577
                break;
578
            case 2:      // 16 bit
579
                offset = *(int16_t*)(sectionBuffer + relSource);
580
                rel.r_type = R_FORW_16 | 0x80000000;
581
                break;
582
            case 3:      // 24 bit. Sign extend to 32 bits
583
                offset = *(int32_t*)(sectionBuffer + relSource) << 8 >> 8;
584
                rel.r_type = R_FORW_24 | 0x80000000;
585
                break;
586
            case 4:      // 32 bit
587
                offset = *(int32_t*)(sectionBuffer + relSource);
588
                rel.r_type = R_FORW_32 | 0x80000000;
589
                break;
590
            }
591
            // Scale offset by 4 and add offset to end of instruction
592
            int32_t target = iInstr + instrLength * 4 + offset * 4;
593
 
594
            // Add a symbol at target address if none exists
595
            ElfFwcSym sym;
596
            zeroAllMembers(sym);
597
            sym.st_bind = STB_LOCAL;
598
            sym.st_other = STV_EXEC;
599
            sym.st_section = section;
600
            sym.st_value = (uint64_t)(int64_t)target;
601
            symbolExeAddress(sym);
602
            int32_t symi = symbols.findFirst(sym);
603
            if (symi < 0) {
604
                symi = newSymbols.push(sym);           // Add symbol to new symbols table
605
                symi |= 0x80000000;                    // Upper bit means index refers to newSymbols
606
            }
607
            // Add a dummy relocation record for this symbol. 
608
            // This relocation does not need type, scale, or addend because the only purpose is to identify the symbol.
609
            // It does have a size, though, because this is checked later in writeRelocationTarget()
610
            rel.r_sym = (uint32_t)symi;
611
            relocations.addUnique(rel);
612
        }
613
    }
614
 
615
    // Check if instruction has a memory reference relative to IP, DATAP, or THREADP
616
    uint32_t basePointer = 0;
617
    if (fInstr->mem & 2) basePointer = pInstr->a.rs;
618
    relSource = iInstr + fInstr->addrPos; // Position of relocated field
619
 
620
    if (fInstr->addrSize > 1 && basePointer >= 28 && basePointer <= 30 && !(fInstr->mem & 0x20)) {
621
        // Memory operand is relative to THREADP, DATAP or IP
622
        // Check if there is a relocation here
623
        uint32_t relpos = iInstr + fInstr->addrPos;
624
        ElfFwcReloc rel;
625
        rel.r_offset = relpos;
626
        rel.r_section = section;
627
        rel.r_type = (operandType | 0x80) << 24;
628
        uint32_t nrel, irel = 0;
629
        nrel = relocations.findAll(&irel, rel);
630
        if (nrel > 1) writeWarning("Overlapping relocations here");
631
        if (nrel) {
632
            // Relocation found. Put the data type into the relocation record. 
633
            // The data type will later be transferred to the symbol record in joinSymbolTables()
634
            if (!(relocations[irel].r_type & 0x80000000)) {
635
                // Save target data type in upper 8 bits of r_type
636
                relocations[irel].r_type = (relocations[irel].r_type & 0x00FFFFFF) | (operandType /*| 0x80*/) << 24;
637
            }
638
            // Check if the target is a section + offset
639
            uint32_t symi = relocations[irel].r_sym;
640
            if (symi < symbols.numEntries() && symbols[symi].st_type == STT_SECTION && relocations[irel].r_addend > 0) {
641
                // Add a new symbol at this address
642
                ElfFwcSym sym;
643
                zeroAllMembers(sym);
644
                sym.st_bind = STB_LOCAL;
645
                sym.st_other = STT_OBJECT;
646
                sym.st_section = symbols[symi].st_section;
647
                sym.st_value = symbols[symi].st_value + (int64_t)relocations[irel].r_addend;
648
                symbolExeAddress(sym);
649
                uint32_t symi2 = newSymbols.push(sym);
650
                relocations[irel].r_sym = symi2 | 0x80000000;  // Upper bit means index refers to newSymbols
651
                relocations[irel].r_addend = 0;
652
            }
653
        }
654
        else if (basePointer == REG_IP >> 16 && fInstr->addrSize > 1 && !(fInstr->mem & 0x20)) {
655
            // No relocation found. Insert new relocation and new symbol
656
            // This fits the address instruction with a local IP target.
657
            // to do: Make it work for other cases
658
 
659
            // Add a symbol at target address if none exists
660
            int32_t target = iInstr + instrLength * 4;
661
            switch (fInstr->addrSize) {                // Read offset of correct size
662
            /* case 1:      // 8 bit. cannot use IP
663
                target += *(int8_t*)(sectionBuffer + relSource) << (operandType & 7);
664
                rel.r_type = R_FORW_8 | R_FORW_SELFREL | 0x80000000;
665
                break;*/
666
            case 2:      // 16 bit
667
                target += *(int16_t*)(sectionBuffer + relSource);
668
                rel.r_type = R_FORW_16 | R_FORW_SELFREL | 0x80000000;
669
                break;
670
            case 4:      // 32 bit
671
                target += *(int32_t*)(sectionBuffer + relSource);
672
                rel.r_type = R_FORW_32 | R_FORW_SELFREL | 0x80000000;
673
                break;
674
            }
675
            ElfFwcSym sym;
676
            zeroAllMembers(sym);
677
            sym.st_bind = STB_LOCAL;
678
            sym.st_other = STV_EXEC;
679
            sym.st_section = section;
680
            sym.st_value = (uint64_t)(int64_t)target;
681
 
682
            symbolExeAddress(sym);
683
            int32_t symi = symbols.findFirst(sym);
684
            if (symi < 0) {
685
                symi = newSymbols.push(sym);           // Add symbol to new symbols table
686
                symi |= 0x80000000;                    // Upper bit means index refers to newSymbols
687
            }
688
            // Add a dummy relocation record for this symbol. 
689
            // This relocation does not need type, scale, or addend because the only purpose is to identify the symbol.
690
            // It does have a size, though, because this is checked later in writeRelocationTarget()
691
            rel.r_offset = (uint64_t)iInstr + fInstr->addrPos; // Position of relocated field
692
            rel.r_section = section;
693
            rel.r_addend = -4;
694
            rel.r_sym = (uint32_t)symi;
695
            relocations.addUnique(rel);
696
        }
697
        else if ((basePointer == REG_DATAP >> 16 || basePointer == REG_THREADP >> 16)
698
            && fInstr->addrSize > 1 && !(fInstr->mem & 0x20) && isExecutable) {
699
            // No relocation found. Insert new relocation and new symbol. datap or threadp based
700
 
701
            // Add a symbol at target address if none exists
702
            int64_t target = fileHeader.e_datap_base;
703
            rel.r_type = R_FORW_DATAP;
704
            uint32_t dom = 2;
705
            uint32_t st_other = STV_DATAP;
706
            if (basePointer == REG_THREADP >> 16) {
707
                target = fileHeader.e_threadp_base;
708
                rel.r_type = R_FORW_THREADP;
709
                dom = 3;
710
                st_other = STV_THREADP;
711
            }
712
            switch (fInstr->addrSize) {                // Read offset of correct size
713
            case 1:      // 8 bit
714
                target += *(int8_t*)(sectionBuffer + relSource);
715
                rel.r_type |= R_FORW_8 | 0x80000000;
716
                break;
717
            case 2:      // 16 bit
718
                target += *(int16_t*)(sectionBuffer + relSource);
719
                rel.r_type |= R_FORW_16 | 0x80000000;
720
                break;
721
            case 4:      // 32 bit
722
                target += *(int32_t*)(sectionBuffer + relSource);
723
                rel.r_type |= R_FORW_32 | 0x80000000;
724
                break;
725
            }
726
            ElfFwcSym sym;
727
            zeroAllMembers(sym);
728
            sym.st_type = STT_OBJECT;
729
            sym.st_bind = STB_WEAK;
730
            sym.st_other = st_other;
731
            sym.st_section = dom;
732
            sym.st_value = (uint64_t)target;
733
 
734
            int32_t symi = symbols.findFirst(sym);
735
            if (symi < 0) {
736
                symi = newSymbols.push(sym);           // Add symbol to new symbols table
737
                symi |= 0x80000000;                    // Upper bit means index refers to newSymbols
738
            }
739
            // Add a dummy relocation record for this symbol. 
740
            // This relocation does not need type, scale, or addend because the only purpose is to identify the symbol.
741
            // It does have a size, though, because this is checked later in writeRelocationTarget()
742
            rel.r_offset = iInstr + fInstr->addrPos; // Position of relocated field
743
            rel.r_section = section;
744
            rel.r_addend = 0;
745
            rel.r_sym = (uint32_t)symi;
746
            relocations.addUnique(rel);
747
        }
748
    }
749
}
750
 
751
 
752
void CDisassembler::followJumpTable(uint32_t symi, uint32_t RelType) {
753
    // Check jump/call table and its targets
754
    // to do !
755
}
756
 
757
 
758
void CDisassembler::markCodeAsDubious() {
759
    // Remember that this may be data in a code segment
760
}
761
 
762
 
763
// List of instructionlengths, used in parseInstruction
764
static const uint8_t lengthList[8] = {1,1,1,1,2,2,3,4};
765
 
766
 
767
void CDisassembler::parseInstruction() {
768
    // Parse one opcode at position iInstr
769
    instructionWarning = 0;
770
 
771
    // Get instruction
772
    pInstr = (STemplate*)(sectionBuffer + iInstr);
773
 
774
    // Get op1
775
    uint8_t op = pInstr->a.op1;
776
 
777
    // Get format
778
    format = (pInstr->a.il << 8) + (pInstr->a.mode << 4); // Construct format = (il,mode,submode)
779
 
780
    // Get submode
781
    switch (format) {
782
    case 0x200: case 0x220: case 0x300: case 0x320:  // submode in mode2
783
        format += pInstr->a.mode2;
784
        break;
785
    case 0x250: case 0x310: // Submode for jump instructions etc.
786
        if (op < 8) {
787
            format += op;  op = pInstr->b[0] & 0x3F;
788
        }
789
        else {
790
            format += 8;
791
        }
792
        break;
793
    }
794
 
795
    // Look up format details
796
    static SFormat form;
797
    fInstr = &formatList[lookupFormat(pInstr->q)];     // lookupFormat is in emulator2.cpp
798
    format = fInstr->format2;                          // Include subformat depending on op1
799
    if (fInstr->tmplate == 0xE && pInstr->a.op2 && !(fInstr->imm2 & 0x100)) {
800
        // Single format instruction if op2 != 0 and op2 not used as immediate operand
801
        form = *fInstr;
802
        form.category = 1;
803
        fInstr = &form;
804
    }
805
 
806
    // Get operand type
807
    if (fInstr->ot == 0) {                                 // Operand type determined by OT field
808
        operandType = pInstr->a.ot;                        // Operand type
809
        if (!(pInstr->a.mode & 6) && !(fInstr->vect & 0x11)) {
810
            // Check use of M bit
811
            format |= (operandType & 4) << 5;              // Add M bit to format
812
            operandType &= ~4;                             // Remove M bit from operand type
813
        }
814
    }
815
    else if ((fInstr->ot & 0xF0) == 0x10) {                // Operand type fixed. Value in formatList
816
        operandType = fInstr->ot & 7;
817
    }
818
    else if (fInstr->ot == 0x32) {                         // int32 for even op1, int64 for odd op1
819
        operandType = 2 + (pInstr->a.op1 & 1);
820
    }
821
    else if (fInstr->ot == 0x35) {                         // Float for even op1, double for odd op1
822
        operandType = 5 + (pInstr->a.op1 & 1);
823
    }
824
    else {
825
        operandType = 0;                                   // Error in formatList. Should not occur
826
    }
827
 
828
    // Find instruction length
829
    instrLength = lengthList[pInstr->i[0] >> 29];           // Length up to 3 determined by il. Length 4 by upper bit of mode
830
 
831
    // Find any reasons for warnings
832
    //findWarnings(p);
833
 
834
    // Find any errors
835
    //findErrors(p);
836
}
837
 
838
 
839
 
840
/*****************************************************************************
841
Functions for reading instruction list from comma-separated file,
842
sorting, and searching
843
*****************************************************************************/
844
 
845
// Members of class CCSVFile for reading comma-separated file
846
 
847
// Read and parse file
848
void CCSVFile::parse() {
849
    // Sorry for the ugly code!
850
 
851
    const char * fields[numInstructionColumns];  // pointer to each field in line
852
    int fi = 0;                                  // field index
853
    uint32_t i, j;                               // loop counters
854
    char * s, * t = 0;                           // point to begin and end of field
855
    char c;
856
    char separator = 0;                          // separator character, preferably comma
857
    int line = 1;                                // line number
858
    SInstruction record;                         // record constructed from line
859
    zeroAllMembers(fields);
860
 
861
    if (data_size==0) read(cmd.getFilename(cmd.instructionListFile), 2);                    // read file if it has not already been read
862
    if (err.number()) return;
863
 
864
    // loop through file
865
    for (i = 0; i < data_size; i++) {
866
        // find begin of field, quoted or not
867
        s = (char*)buf() + i;
868
        c = *s;
869
        if (c == ' ') continue;                  // skip leading spaces
870
 
871
        if (c == '"' || c == 0x27) {             // single or double quote
872
            fields[fi] = s+1;                    // begin of quoted string
873
            for (i++; i < data_size; i++) {      // search for matching end quote
874
                t = (char*)buf() + i;
875
                if (*t == c) {
876
                    *t = 0; i++;                 // End quote found. Put end of string here
877
                    goto SEARCHFORCOMMA;
878
                }
879
                if (*t == '\n') break;           // end of line found before end quote
880
            }
881
            // end quote not found
882
            err.submit(ERR_INSTRUCTION_LIST_QUOTE, line);
883
            return;
884
        }
885
        if (c == '\r' || c == '\n')
886
            goto NEXTLINE;  // end of line found
887
        if (c == separator || c == ',') {
888
            // empty field
889
            fields[fi] = "";
890
            goto SEARCHFORCOMMA;
891
        }
892
 
893
        // Anything else: begin of unquoted string
894
        fields[fi] = s;
895
        // search for end of field
896
 
897
    SEARCHFORCOMMA:
898
        for (; i < data_size; i++) {  // search for comma after field
899
            t = (char*)buf() + i;
900
            if (*t == separator || (separator == 0 && (*t == ',' || *t == ';' || *t == '\t'))) {
901
                separator = *t;               // separator set to the first comma, semicolon or tabulator
902
                *t = 0;                       // put end of string here
903
                goto NEXTFIELD;
904
            }
905
            if (*t == '\n') break;        // end of line found before comma
906
        }
907
        fi++;
908
        goto NEXTLINE;
909
 
910
    NEXTFIELD:
911
        // next field
912
        fi++;
913
        if (fi != numInstructionColumns) continue;
914
        // end of last field
915
 
916
    NEXTLINE:
917
        for (; i < data_size; i++) {  // search for end. of line
918
            t = (char*)buf() + i;
919
            // accept newlines as "\r", "\n", or "\r\n"
920
            if (*t == '\r' || *t == '\n') break;
921
        }
922
        if (*t == '\r' && *(t+1) == '\n') i++;  // end of line is two characters
923
        *t = 0;  // terminate line
924
 
925
        // make any remaining fields blank
926
        for (; fi < numInstructionColumns; fi++) {
927
            fields[fi] = "";
928
        }
929
        // Begin next line
930
        line++;
931
        fi = 0;
932
 
933
        // Check if blank or heading record
934
        if (fields[2][0] < '0' || fields[2][0] > '9') continue;
935
 
936
        // save values to record
937
        // most fields are decimal or hexadecimal numbers
938
        record.id = (uint32_t)interpretNumber(fields[1]);
939
        record.category = (uint32_t)interpretNumber(fields[2]);
940
        record.format = interpretNumber(fields[3]);
941
        record.templt = (uint32_t)interpretNumber(fields[4]);
942
        record.sourceoperands = (uint32_t)interpretNumber(fields[6]);
943
        record.op1 = (uint32_t)interpretNumber(fields[7]);
944
        record.op2 = (uint32_t)interpretNumber(fields[8]);
945
        record.optypesgp = (uint32_t)interpretNumber(fields[9]);
946
        record.optypesscalar = (uint32_t)interpretNumber(fields[10]);
947
        record.optypesvector = (uint32_t)interpretNumber(fields[11]);
948
        // interpret immediate operand
949
        if (tolower(fields[12][0]) == 'i') {
950
            // implicit immediate operand. value is prefixed by 'i'. Get value
951
            record.implicit_imm = (uint32_t)interpretNumber(fields[12]+1);
952
            record.opimmediate = OPI_IMPLICIT;
953
        }
954
        else {
955
            // immediate operand type
956
            record.opimmediate = (uint8_t)interpretNumber(fields[12]);
957
        }
958
        // interpret template variant
959
        record.variant = interpretTemplateVariants(fields[5]);
960
        // copy instruction name
961
        for (j = 0; j < sizeof(record.name)-1; j++) {
962
            c = fields[0][j];
963
            if (c == 0) break;
964
            record.name[j] = tolower(c);
965
        }
966
        record.name[j] = 0;
967
 
968
        // add record to list
969
        instructionlist.push(record);
970
    }
971
}
972
 
973
// Interpret number in instruction list
974
uint64_t CCSVFile::interpretNumber(const char * text) {
975
    uint32_t error = 0;
976
    uint64_t result = uint64_t(::interpretNumber(text, 64, &error));
977
    if (error)  err.submit(ERR_INSTRUCTION_LIST_SYNTAX, text);
978
    return result;
979
}
980
 
981
 
982
// Interpret a string with a decimal, binary, octal, or hexadecimal number
983
int64_t interpretNumber(const char * text, uint32_t maxLength, uint32_t * error) {
984
    int state = 0;           // 0: begin, 1: after 0, 
985
                             // 2: after 0x, 3: after 0b, 4: after 0o
986
                             // 5: after decimal digit, 6: trailing space
987
    uint64_t number = 0;
988
    uint8_t c, clower, digit;
989
    bool sign = false;
990
    uint32_t i;
991
    *error = 0;
992
    if (text == 0) {
993
        *error = 1; return number;
994
    }
995
 
996
    for (i = 0; i < maxLength; i++) {
997
        c = text[i];                    // read character
998
        clower = c | 0x20;              // convert to lower case
999
        if (clower == 'x') {
1000
            if (state != 1) {
1001
                *error = 1;  return 0;
1002
            }
1003
            state = 2;
1004
        }
1005
        else if (clower == 'o') {
1006
            if (state != 1) {
1007
                *error = 1;  return 0;
1008
            }
1009
            state = 4;
1010
        }
1011
        else if (clower == 'b' && state == 1) {
1012
            state = 3;
1013
        }
1014
        else if (c >= '0' && c <= '9') {
1015
            // digit 0 - 9
1016
            digit = c - '0';
1017
            switch (state) {
1018
            case 0:
1019
                state = (digit == 0) ? 1 : 5;
1020
                number = digit;
1021
                break;
1022
            case 1:
1023
                state = 5;
1024
                // continue in case 5:
1025
            case 5:
1026
                // decimal
1027
                number = number * 10 + digit;
1028
                break;
1029
            case 2:
1030
                // hexadecimal
1031
                number = number * 16 + digit;
1032
                break;
1033
            case 3:
1034
                // binary
1035
                if (digit > 1) {
1036
                    *error = 1;  return 0;
1037
                }
1038
                number = number * 2 + digit;
1039
                break;
1040
            case 4:
1041
                // octal
1042
                if (digit > 7) {
1043
                    *error = 1;  return 0;
1044
                }
1045
                number = number * 8 + digit;
1046
                break;
1047
            default:
1048
                *error = 1;
1049
                return 0;
1050
            }
1051
        }
1052
        else if (clower >= 'a' && clower <= 'f') {
1053
            // hexadecimal digit
1054
            digit = clower - ('a' - 10);
1055
            if (state != 2)  {
1056
                *error = 1;  return 0;
1057
            }
1058
            number = number * 16 + digit;
1059
        }
1060
        else if (c == ' ' || c == '+') {
1061
            // ignore leading or trailing blank or plus
1062
            if (state > 0) state = 6;
1063
        }
1064
        else if (c == '-') {
1065
            // change sign
1066
            if (state != 0) {
1067
                *error = 1;  return 0;
1068
            }
1069
            sign = ! sign;
1070
        }
1071
        else if (c == 0) break;  // end of string
1072
        else if (c == ',') {
1073
            *error = i | 0x1000;          // end with comma. return position in error
1074
            break;
1075
        }
1076
        else {
1077
            // illegal character
1078
            *error = 1;  return 0;
1079
        }
1080
    }
1081
    if (sign) number = uint64_t(-int64_t(number));
1082
    return (int64_t)number;
1083
}
1084
 
1085
void CDisassembler::getLineList(CDynamicArray<SLineRef> & list) {
1086
    // transfer lineList to debugger
1087
    list << lineList;
1088
}
1089
 
1090
void CDisassembler::getOutFile(CTextFileBuffer & buffer) {
1091
    // transfer outFile to debugger
1092
    buffer.copy(outFile);
1093
}
1094
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.