OpenCores
URL https://opencores.org/ocsvn/forwardcom/forwardcom/trunk

Subversion Repositories forwardcom

[/] [forwardcom/] [bintools/] [disassem.h] - Rev 160

Go to most recent revision | Compare with Previous | Blame | View Log

/****************************  disassem.h   **********************************
* Author:        Agner Fog
* Date created:  2017-04-26
* Last modified: 2021-04-11
* Version:       1.11
* Project:       Binary tools for ForwardCom instruction set
* Module:        disassem.h
* Description:
* Header file for disassembler
*
* Copyright 2006-2021 GNU General Public License http://www.gnu.org/licenses
*****************************************************************************/
 
 
/* Additional information stored in symbol records during disassembly:
ElfFwcSym::st_other    bit 31 set if symbol has been written out in section listing
ElfFwcSym::st_reguse1  old symbol index before sorting and adding more symbols, 
                        needs translation to new index in relocation records
ElfFwcSym::st_size     reference symbol if symbol is a relative pointer, stored in bit 32-63 of st_size
ElfFwcSym::st_reguse2  symbol data type: bit 0-2: operand type, bit 3 = 1
                        0x100 code pointer
                        0x200 data pointer
*/
 
 
// Universal template for all instruction formats
union STemplate {
    uint64_t     q;          // First 64 bits
    struct {
        uint32_t rt:    5;   // Source register RT in formats A and E
        uint32_t mask:  3;   // Mask register in formats A and E
        uint32_t rs:    5;   // Source register RS in formats A, B, and E
        uint32_t ot:    3;   // Operand type and M in formats A, B, and E
        uint32_t rd:    5;   // Destination register RD in formats A, B, C, and E
        uint32_t op1:   6;   // Destination register RD in formats A, B, C, and E
        uint32_t mode:  3;   // Mode in all formats
        uint32_t il:    2;   // Instruction length in all formats
        uint32_t im2:  16;   // IM2 in format E
        uint32_t im3:   6;   // IM3 in format E
        uint32_t op2:   2;   // OP2 in format E
        uint32_t ru:    5;   // Source register RU in format E
        uint32_t mode2: 3;   // Mode2 in format E
    } a;
    struct {
        int32_t  im2:  24;   // IM2 in format D
        uint32_t op1:   3;   // The remaining part of OP1 in format D
        uint32_t mode:  3;   // Mode in format D
        uint32_t il:    2;   // Instruction length in format D
    } d;
    uint8_t      b[12];      // IM1 in format B
    uint16_t     s[4];       // IM1+2 in format C
    uint32_t     i[3];       // IM2 and IM3 in format A2, A3, B2, B3
    float        f[2];       // IM2 as float
};
 
// Record in nested lookup lists for formats.
// crit = 0 means that index is an index into the format table
// crit > 0 means that index is an offset into the next table. Add something to this offset according to the criterion
struct SFormatIndex {
    uint8_t  crit;           // Criterion for lookup into next table: 0 = format table. 
                             // 1: mode2, 2: op1 / 8, 3: op1 % 8, 4: IM1 % 64 / 8, 5: IM1 % 8, 
                             // 6: IM12 == 0xFFFF
    uint8_t  index;          // Offset into next table
}; 
 
// Record in list of formats (formatList) 
struct SFormat { 
    uint16_t format2;        // 0x0XYZ, where X = il, Y = mode, Z = subformat (mode2 or OP1) or variant within format
    uint8_t  category;       // Category: 1 = single format,  3 = multi-format, 4 = jump instruction                         
    uint8_t  tmplate;        // Template: 0xA, 0xB, 0xC, 0xD, 0xE.
 
    uint8_t  opAvail;        // Operands available: 1 = immediate, 2 = memory,
                             // 0x10 = RT, 0x20 = RS, 0x40 = RU, 0x80 = RD
 
    uint8_t  ot;             // Operand type. 0: determined by OT field. 0x10 - 0x17: 0-7. 
                             // 0x32: int32 for even OP1, int64 for odd OP1
                             // 0x35: float for even OP1, double for odd OP1
 
    uint8_t  jumpSize;       // Size of jump offset field (bytes)
    uint8_t  jumpPos;        // Position of jump offset field (bytes)
    uint8_t  addrSize;       // Size of address offset field (bytes)
    uint8_t  addrPos;        // Position of address offset field (bytes)
    uint8_t  immSize;        // Size of first immediate operand (bytes)
    uint8_t  immPos;         // Position of first immediate operand (bytes)
 
    uint16_t imm2;           // Size and position of extra immediate operands:
                             // 1 = IM2 in template C, 
                             // 2 = IM3 in template E may contain options, 
                             // 4 = IM3 is shift count for IM2 if no options, 
                             // 8 = IM2 is shift count for IM4,
                             // 0x10 = IM3 in template A3 or B3, 
                             // 0x40 = has fixed values
                             // 0x80 = jump OPJ in IM1
                             // 0x90 = jump OPJ is in high part of IM2 in format A2
                             // 0xC0 = jump with no OPJ
                             // 0x100 = OP2 is used for immediate operand as extension of IM3
 
    uint8_t  vect;           // 1 = vector registers used, 2 = vector length in RT, 4 = broadcast length in RT
                             // 0x10 = vector registers used if M bit
 
    uint8_t  mem;            // 1 (unused), 2 = base in RS, 4 = index in RT, 
                             // 0x10 = has offset, 0x20 = has limit,
 
    uint8_t  scale;          // 1 = offset is scaled, 2 = index is scaled by OS, 4 = scale factor is -1
    uint8_t  formatIndex;    // Bit index into format in instruction list
    uint8_t  exeTable;       // table of function pointers used during emulation
};
 
// tables in emulator2.cpp:
extern const SFormat formatList[];            // == FXEND in disasm1.cpp
extern uint32_t formatListSize;               // size of formatList 
extern const uint32_t dataSizeTable[8];       // = {1, 2, 4, 8, 16, 4, 8, 16};
extern const uint32_t dataSizeTableMax8[8];   // = {1, 2, 4, 8, 8, 4, 8, 8}; 
extern const uint32_t dataSizeTableLog[8];    // = {0, 1, 2, 3, 4, 2, 3, 4};
extern const uint32_t dataSizeTableBits[8];   // = {8, 16, 32, 64, 128, 32, 64, 128};
extern const uint64_t dataSizeMask[8];        // = {FF, FFFF, FFFFFFFF, -1, -1, FFFFFFFF, -1, -1};
 
 
// Operator for sorting symbols by address. Used by disassembler
static inline bool operator < (ElfFwcSym const & a, ElfFwcSym const & b) {
    if (a.st_section != b.st_section) return a.st_section < b.st_section;
    return a.st_value < b.st_value;
}
 
// Operatur == tells if symbols have same address
static inline bool operator == (ElfFwcSym const & a, ElfFwcSym const & b) {
    return a.st_section == b.st_section && a.st_value == b.st_value;
}
 
// Operator for sorting relocations by address. Used by disassembler
static inline bool operator < (ElfFwcReloc const & a, ElfFwcReloc const & b) {
    if (a.r_section < b.r_section) return true;
    if (a.r_section > b.r_section) return false;
    return a.r_offset < b.r_offset;
}
 
// Look up format in FormatList (this function is in emulator2.cpp)
uint32_t lookupFormat(uint64_t instruct);
 
// Check integrity of format lists
void checkFormatListIntegrity();
 
// Interpret template variants in instruction record
uint64_t interpretTemplateVariants(const char *);
// bits returned by interpretTemplateVariants
const int VARIANT_D0 = (1 << 0);                 // no destination, no operand type
const int VARIANT_D1 = (1 << 1);                 // no destination, but operant type specified
const int VARIANT_D2 = (1 << 2);                 // operant type ignored
const int VARIANT_D3 = (1 << 3);                 // register RD used for other purpose
const int VARIANT_M0 = (1 << 4);                 // memory operand destination
//const int VARIANT_M1 = (1 << 5);                 // IM3 used as extra immediate operand in E formats with a memory operand. obsolote
const int VARIANT_R0 = (1 << 8);                 // destination is general purpose register
const int VARIANT_R1B =       9;                 // bit index to VARIANT_R1
const int VARIANT_R1 = (1 << VARIANT_R1B);       // first source operand is general purpose register
const int VARIANT_R2 = (1 << (VARIANT_R1B+1));   // second source operand is general purpose register
const int VARIANT_R3 = (1 << (VARIANT_R1B+2));   // third source operand is general purpose register
const int VARIANT_R123 = (VARIANT_R1|VARIANT_R2|VARIANT_R3);  // source operand is general purpose register
const int VARIANT_D3R0 = VARIANT_D3 | VARIANT_R0; // RD is general purpose register
const int VARIANT_RL = (1 << 12);                // RS is a general purpose register specifying length
const int VARIANT_F0 = (1 << 14);                // can have mask register, but not fallback register
const int VARIANT_F1 = (1 << 15);                // can have fallback register without mask register
const int VARIANT_I2 = (1 << 16);                // immediate operand is integer
const int VARIANT_U0 = (1 << 18);                // integer operands are unsigned
const int VARIANT_U3 = (1 << 19);                // integer operands are unsigned if bit 3 in IM3 (format 2.4.x, 2.8.x) is set.
//const int VARIANT_Kn = (1 << 20);                // integer operand is implicit
const int VARIANT_On = (7 << 24);                // n IM3 bits used for options
const int VARIANT_H0 = (1 << 28);                // half precision floating point operands
const int VARIANT_SPECB = 32;                    // bit index to special register type
const uint64_t VARIANT_SPEC = (uint64_t)0xF << VARIANT_SPECB; // Special register types for operands
const uint64_t VARIANT_SPECS = 0x1000000000;     // Special register type for source
const uint64_t VARIANT_SPECD = 0x2000000000;     // Special register type for destination
 
 
struct SInstruction2;                            // defined below
struct SLineRef;                                 // defined below
 
uint8_t findFallback(SFormat const * fInstr, STemplate const * pInstr, int nOperands); // find fallback register in instruction code
 
// class CDisassembler handles disassembly of ForwardCom ELF file
class CDisassembler : public CELF {
public:
    CDisassembler();                             // Constructor
    void getComponents1();                       // Read instruction list, split ELF file into components
    void getComponents2(CELF const & assembler, CMemoryBuffer const & instructList);// Read instruction list, get ELF components for assembler output listing
    void go();                                   // Disassemble
    void getLineList(CDynamicArray<SLineRef> & list); // transfer lineList to debugger
    void getOutFile(CTextFileBuffer & buffer);   // transfer outFile to debugger
    uint32_t outputFile;                         // Output file name, as index into cmd.fileNameBuffer
    uint8_t  debugMode;                          // produce disassembly for emulator/debugger
    uint8_t asmTab0;                             // Column for operand type
    uint8_t asmTab1;                             // Column for opcode
    uint8_t asmTab2;                             // Column for first operand
    uint8_t asmTab3;                             // Column for comment
protected:
    uint32_t pass;                               // Pass number
    uint32_t codeMode;                           // 1 = code, 2 = data in code section, 4 = data section
    uint32_t iInstr;                             // Position of current instruction relative to section start
    uint32_t instrLength;                        // Length of current instruction, in 32-bit words
    uint32_t operandType;                        // Operand type of current instruction
    uint32_t format;                             // Format of current instruction
    uint32_t nextSymbol;                         // Index to next symbol label to write out
    uint32_t section;                            // Current section
    uint32_t sectionEnd;                         // Size of section
    uint64_t sectionAddress;                     // Start address of section
    uint32_t currentFunction;                    // Symbol index of current function
    uint32_t currentFunctionEnd;                 // Address of end of current function
    uint32_t instructionWarning;                 // Warnings and errors for current instruction
    uint32_t relocation;                         // relocation index in current instruction + 1
    int8_t * sectionBuffer;                      // Pointer to start of current section
    uint64_t variant;                            // Template variant and options
    STemplate const * pInstr;                    // Pointer to current instruction code
    SInstruction2 const * iRecord;               // Pointer to instruction table entry
    SFormat const * fInstr;                      // Format details of current instruction code
    CDynamicArray<SInstruction2> instructionlist;// List of instruction set, sorted by category, format, and op1
    CDynamicArray<ElfFwcSym> newSymbols;         // List of new symbols added during pass 1
    CDynamicArray<SLineRef> lineList;            // Cross reference of code addresses to lines in outFile (used by debugger)
    CTextFileBuffer outFile;                     // Output file
    bool isExecutable;                           // Disassembling executable file
    void feedBackText1();                        // Write feedback text on stdout
    void parseInstruction();                     // Parse current instruction
    //void CheckInstructionErrors();             // Check if instruction is valid
    void writeInstruction();                     // Write current instruction to output file
    void writeNormalInstruction();               // Write normal instruction to output file
    void writeJumpInstruction();                 // Write jump instruction to output file
    void writeOperandType(uint32_t ot);          // Write operand type
    void writeMemoryOperand();                   // Write memory operand of current instruction
    void writeImmediateOperand();                // Write immediate operand depending on type in instruction list
    void writeRegister(uint32_t r, uint32_t ot); // Write name of general purpose or vector register
    void writeGPRegister(uint32_t r);            // Write name of general purpose register
    void writeVectorRegister(uint32_t v);        // Write name of vector register
    void writeSpecialRegister(uint32_t r, uint32_t type); // Write name of other type of register
    void pass1();                                // Pass 1 of disassembly. Resolves cross references and adds symbol labels
    void pass2();                                // Pass 2 of disassembly. Writes output file
    void sortSymbolsAndRelocations();            // Sort symbols and relocations by address
    void symbolExeAddress(ElfFwcSym & sym);      // Translate symbol address from section:offset to pointerbase:address
    void updateSymbols();                        // Make missing symbols for jump targets and data references
    void joinSymbolTables();                     // Join the tables: symbols and newSymbols
    void assignSymbolNames();                    // Make names for unnamed symbols
    void initializeInstructionList();            // Read instruction list from file and sort it
    void updateTracer();                         // Trace registers pointing to jump table (to do)
    void followJumpTable(uint32_t symi, uint32_t RelType); // Trace targets of jump table  (to do)
    void markCodeAsDubious();                    // Mark data in code section
    void writeFileBegin();                       // Write beginning of disassembly file
    void writeFileEnd();                         // Write end of disassembly file
    void writeSectionBegin();                    // Write beginning of section
    void writeSectionEnd();                      // Write end of section
    void writeCodeComment();                     // Write comment after instruction
    void writeDataItems();                       // Write data to disassembly file
    void writeLabels();                          // Find and write any labels at current position
    void writeRelocationTarget(uint32_t src, uint32_t size); // Write relocation target for this source position
    void writeJumpTarget(uint32_t src, uint32_t size); // Write jump relocation target for this source position
    void writeWarning(const char * w);           // Write warning message to output file
    void writeError(const char * w);             // Write error message to output file
    void finalErrorCheck();                      // Check for wrong entries in symbol table and relocations table
    //void checkNamesValid();                      // Check for illegal characters in symbols
    void writeSymbolName(uint32_t symi);         // Write name of symbol
    void writeSectionName(int32_t SegIndex);     // Write name of section
    void writePublicsAndExternals();             // Write list of public and external symbols
    void writeAddress();                         // write code address
    void setTabStops();                          // set tab stops for output
};
 
 
/*****************************************************************************
Structures and classes for reading instruction list from comma-separated file,
sorting, and searching
*****************************************************************************/
 
const int maxINameLen = 31;            // Maximum length of instruction name
const int numInstructionColumns = 13;  // Number of columns in csv file to read. Additional columns are ignored
 
// Record structure for instruction definition
struct SInstruction {
    uint64_t format;                   // Instruction format for single format instructions,
                                       // or one bit for each allowed format for multi format instructions. See table in manual.
    uint64_t variant;                  // Template variant 
    uint32_t id;                       // Instruction id number
    uint8_t  category;                 // 1: single format, 3: multiformat, 4: jump
    uint8_t  templt;                   // Format template. 0xA - 0xE, 0 for multiple templates
    uint8_t  sourceoperands;           // Number of source operands, including register, memory and immediate operands
    uint8_t  op1;                      // Operation code
    uint8_t  op2;                      // Additional operation code
    uint8_t  opimmediate;              // Type of immediate operand for single-format instructions
    uint32_t implicit_imm;             // Value of implicit immediate operand
    uint32_t optypesgp;                // Operand types supported for general purpose registers
    uint32_t optypesscalar;            // Operand types supported for scalars in vector registers
    uint32_t optypesvector;            // Operand types supported for vectors
    char     name[maxINameLen+1];      // Name of instruction. Lower case
};
 
// Same structure, but sorted by category, format, and operation codes. Used by disassembler
struct SInstruction2 : public SInstruction {
};    
 
// Same structure, but sorted by id
struct SInstruction3 : public SInstruction {
};    
 
// Operator for sorting instructions by name. Used by assembler
static inline bool operator < (SInstruction const & a, SInstruction const & b) {
#ifdef _MSC_VER     // case insensitive compare. name depends on compiler
    return _strcmpi(a.name, b.name) < 0;
#else
    return strcasecmp(a.name, b.name) < 0;
#endif
}
 
// Operator for sorting instructions by category, format, and operation codes. Used by disassembler
static inline bool operator < (SInstruction2 const & a, SInstruction2 const & b) {
    // first sort criterion is category
    if (a.category < b.category) return true;
    if (a.category > b.category) return false;
    // sort by format for single-format instructions
    if (a.category == 1) {
        if (a.format < b.format) return true;
        if (a.format > b.format) return false;
    }
    // sort by op1
    if (a.op1 < b.op1) return true;
    if (a.op1 > b.op1) return false;
    // last sort criterion is op2
    return a.op2 < b.op2;
}
 
// Operator for sorting instructions by id
static inline bool operator < (SInstruction3 const & a, SInstruction3 const & b) {
    return a.id < b.id;
} 
 
// class for reading comma-separated file
class CCSVFile : public CFileBuffer {
public:
    CCSVFile() : CFileBuffer() {}                // Constructor
    void parse();                                // Read and parse file
    CDynamicArray<SInstruction> instructionlist; // List of records
    uint64_t interpretNumber(const char * text); // Interpret number in instruction list
};
 
// Cross reference of code addresses to lines in outFile. Used by debugger
struct SLineRef {
    uint64_t address;                            // code address
    uint32_t domain;                             // 1 = IP, 2 = datap, 4 = threadp
    uint32_t textPos;                            // position of corresponding line in outFile
};
 
// Operators for sorting SLineRef by address
static inline bool operator < (SLineRef const & a, SLineRef const & b) {
    if (a.domain != b.domain) return a.domain < b.domain;
    return a.address < b.address;
};
 
static inline bool operator == (SLineRef const & a, SLineRef const & b) {
    return a.domain == b.domain && a.address == b.address;
};
 
static inline bool operator != (SLineRef const & a, SLineRef const & b) {
    return !(a == b);
};
 
// Interpret a string with a decimal, binary, octal, or hexadecimal number
int64_t interpretNumber(const char * s, uint32_t maxLength, uint32_t * error);
 
double  interpretFloat(const char * s, uint32_t length);    // interpret floating point number from string with indicated length
 

Go to most recent revision | Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.