URL
https://opencores.org/ocsvn/forwardcom/forwardcom/trunk
Subversion Repositories forwardcom
Compare Revisions
- This comparison shows the changes necessary to convert path
/
- from Rev 40 to Rev 41
- ↔ Reverse comparison
Rev 40 → Rev 41
/forwardcom/bintools/assem1.cpp
0,0 → 1,1902
/**************************** assem1.cpp ******************************** |
* Author: Agner Fog |
* Date created: 2017-04-17 |
* Last modified: 2021-07-10 |
* Version: 1.11 |
* Project: Binary tools for ForwardCom instruction set |
* Module: assem.cpp |
* Description: |
* Module for assembling ForwardCom .as files. Contains: |
* pass1(): Split input file into lines and tokens. Remove comments. Find symbol definitions |
* pass2(): Handle meta code. Classify lines. Identify symbol names, sections, functions |
* |
* Copyright 2017-2021 GNU General Public License http://www.gnu.org/licenses |
******************************************************************************/ |
#include "stdafx.h" |
|
const char * allowedInNames = "_$@"; // characters allowed in symbol names (don't allow characters that are used as operators) |
const bool allowUTF8 = true; // UTF-8 characters allowed in symbol names |
const bool allowNestedComments = true; // allow nested comments: /* /* */ */ |
|
// Operator for sorting symbols by name. Used by assembler |
// List of operators |
SOperator operatorsList[] = { |
// name, id, priority |
{"(", '(', 1}, |
{")", ')', 1}, |
{"[", '[', 1}, |
{"]", ']', 1}, |
{"{", '{', 1}, |
{"}", '}', 1}, |
{"'", 39, 1}, |
{"\"", '"', 1}, // " |
{"/*", 'c', 1}, // comment begin |
{"*/", 'd', 1}, // comment end |
{".", '.', 2}, |
{"!", '!', 3}, |
{"~", '~', 3}, |
{"++", '+'+D2, 3}, |
{"--", '-'+D2, 3}, |
{"*", '*', 4}, |
{"/", '/', 4}, |
{"%", '%', 4}, |
{"+", '+', 5}, |
{"-", '-', 5}, |
{"<<", '<'+D2, 6}, |
{">>", '>'+D2, 6}, // signed shift right |
{">>>", '>'+D3, 6}, // unsigned shift right |
{"<", '<', 7}, |
{"<=", '<'+EQ, 7}, |
{">", '>', 7}, |
{">=", '>'+EQ, 7}, |
{"==", '='+D2, 8}, |
{"!=", '!'+EQ, 8}, |
{"&", '&', 9}, |
{"^", '^', 10}, |
{"|", '|', 11}, |
{"&&", '&'+D2, 12}, |
{"||", '|'+D2, 13}, |
{"^^", '^'+D2, 13}, // boolean XOR. non-standard operator |
{"?", '?', 14}, |
{":", ':', 14}, |
{"=", '=', 15}, |
{"+=", '+'+EQ, 15}, |
{"-=", '-'+EQ, 15}, |
{"*=", '*'+EQ, 15}, |
{"/=", '/'+EQ, 15}, |
{"%=", '%'+EQ, 15}, |
{"<<=", '<'+D2+EQ, 15}, |
{">>=", '>'+D2+EQ, 15}, // signed shift right |
{">>>=", '>'+D3+EQ, 15}, // unsigned shift right |
{"&=", '&'+EQ, 15}, |
{"^=", '^'+EQ, 15}, |
{"|=", '|'+EQ, 15}, |
{",", ',', 16}, |
{"//", '/'+D2, 20}, // comment, end of line |
{";", ';', 20} // comment, end of line |
}; |
|
|
// List of keywords |
SKeyword keywordsList[] = { |
// name, id |
{"section", DIR_SECTION}, // TOK_DIR: section, functions directives |
{"function", DIR_FUNCTION}, |
{"end", DIR_END}, |
{"public", DIR_PUBLIC}, |
{"extern", DIR_EXTERN}, |
|
// TOK_ATT: attributes of sections, functions and symbols |
{"read", ATT_READ}, // readable section |
{"write", ATT_WRITE}, // writeable section |
{"execute", ATT_EXEC}, // executable section |
{"align", ATT_ALIGN}, // align section, data, or code |
{"weak", ATT_WEAK}, // weak linking |
{"reguse", ATT_REGUSE}, // register use |
{"constant", ATT_CONSTANT}, // external constant |
{"uninitialized", ATT_UNINIT}, // uninitialized section (BSS) |
{"communal", ATT_COMDAT}, // communal section. duplicates and unreferenced sections are removed |
{"exception_hand", ATT_EXCEPTION}, // exception handler and stack unroll information |
{"event_hand", ATT_EVENT}, // event handler list, including constructors and destructors |
{"debug_info", ATT_DEBUG}, // debug information |
{"comment_info", ATT_COMMENT}, // comments, including copyright and required libraries |
|
// TOK_TYP: type names |
{"int8", TYP_INT8}, |
{"uint8", TYP_INT8+TYP_UNS}, |
{"int16", TYP_INT16}, |
{"uint16", TYP_INT16+TYP_UNS}, |
{"int32", TYP_INT32}, |
{"uint32", TYP_INT32+TYP_UNS}, |
{"int64", TYP_INT64}, |
{"uint64", TYP_INT64+TYP_UNS}, |
{"int128", TYP_INT128}, |
{"uint128", TYP_INT128+TYP_UNS}, |
{"int", TYP_INT32}, |
{"uint", TYP_INT32+TYP_UNS}, |
{"float", TYP_FLOAT32}, |
{"double", TYP_FLOAT64}, |
{"float16", TYP_FLOAT16}, |
{"float32", TYP_FLOAT32}, |
{"float64", TYP_FLOAT64}, |
{"float128", TYP_FLOAT128}, |
{"string", TYP_STRING}, |
|
// TOK_OPT: options of instructions and operands |
{"mask", OPT_MASK}, |
{"fallback", OPT_FALLBACK}, |
{"length", OPT_LENGTH}, |
{"broadcast", OPT_BROADCAST}, |
{"limit", OPT_LIMIT}, |
{"scalar", OPT_SCALAR}, |
{"options", OPT_OPTIONS}, |
{"option", OPT_OPTIONS}, // alias |
|
// TOK_REG: register names |
{"numcontr", REG_NUMCONTR}, |
{"threadp", REG_THREADP}, |
{"datap", REG_DATAP}, |
{"ip", REG_IP}, |
{"sp", REG_SP}, |
|
// TOK_HLL: high level language keywords |
{"if", HLL_IF}, |
{"else", HLL_ELSE}, |
{"switch", HLL_SWITCH}, // switch (r1, scratch registers) { case 0: break; ...} |
{"case", HLL_CASE}, |
{"for", HLL_FOR}, // for (r1 = 1; r1 <= r2; r1++) {} |
{"in", HLL_IN}, // for (float v1 in [r1-r2], nocheck) // (r2 counts down) |
{"while", HLL_WHILE}, // while (r1 > 0) {} |
{"do", HLL_DO}, // do {} while () |
{"break", HLL_BREAK}, // break out of switch or loop |
{"continue", HLL_CONTINUE}, // continue loop |
{"true", HLL_TRUE}, // constant = 1 |
{"false", HLL_FALSE}, // constant = 0 |
|
// temporary additions. will be replaced by macros later: |
{"push", HLL_PUSH}, // push registers |
{"pop", HLL_POP}, // pop registers |
|
}; |
|
// List of register name prefixes |
SKeyword registerNames[] = { |
// name, id |
{"r", REG_R}, |
{"v", REG_V}, |
{"spec", REG_SPEC}, |
{"capab", REG_CAPAB}, |
{"perf", REG_PERF}, |
{"sys", REG_SYS} |
}; |
|
|
CAssembler::CAssembler() { // Constructor |
// Reserve size for buffers |
const int estimatedLineLength = 16; |
const int estimatedTokensPerLine = 10; |
int estimatedNumLines = dataSize() / estimatedLineLength; |
lines.setNum(estimatedNumLines); |
tokens.setNum(estimatedNumLines * estimatedTokensPerLine); |
errors.setOwner(this); |
// Initialize and sort lists |
initializeWordLists(); |
ElfFwcShdr nullHeader; // make first section header empty |
zeroAllMembers(nullHeader); |
sectionHeaders.push(nullHeader); |
} |
|
void CAssembler::go() { |
|
// Write feedback text to console |
feedBackText1(); |
|
// Set default options |
if (cmd.codeSizeOption == 0) cmd.codeSizeOption = 1 << 24; |
if (cmd.dataSizeOption == 0) cmd.dataSizeOption = 1 << 15; |
// initialize options |
code_size = cmd.codeSizeOption; |
data_size = cmd.dataSizeOption; |
|
do { // This loop is repeated only once. Just convenient to break out of in case of errors |
pass = 1; |
// Split input file into lines and tokens. Find symbol definitions |
pass1(); |
if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;} |
|
pass = 2; |
// A. Handle metaprogramming directives |
// B. Classify lines |
// C. Identify symbol names, sections, labels, functions |
pass2(); |
if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;} |
|
//showTokens(); //!! for debugging only |
//showSymbols(); //!! for debugging only |
|
pass = 3; |
// Interpret lines. Generate code and data |
pass3(); |
if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;} |
|
pass = 4; |
// Resolve internal cross references, optimize forward references |
pass4(); |
if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;} |
|
pass = 5; |
// Make binary file |
pass5(); |
if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;} |
|
} while (false); |
|
// output any error messages |
errors.outputErrors(); |
if (errors.numErrors()) cmd.mainReturnValue = 1; // make sure makefile process stops on error |
|
// output object file |
outFile.write(cmd.getFilename(cmd.outputFile)); |
} |
|
|
// Character can be the start of a symbol name |
inline bool nameChar1(char c) { |
return ((c | 0x20) >= 'a' && (c | 0x20) <= 'z') || ((c & 0x80) && allowUTF8) || strchr(allowedInNames, c); |
} |
|
// Character can be the part of a symbol name |
inline bool nameChar2(char c) { |
return nameChar1(c) || (c >= '0' && c <= '9'); |
} |
|
// check if string is a number. Can be decimal, binary, octal, hexadecimal, or floating point |
// Returns the length of the part of the string that belongs to the number |
uint32_t isNumber(const char * s, int maxlen, bool * isFloat) { |
bool is_float = false; |
char c = s[0]; |
if ((c < '0' || c > '9') && (c != '.' || s[1] < '0' || s[1] > '9')) return 0; |
int i = 0; |
int state = 0; |
// 0: begin |
// 1: after 0 |
// 2: after digits 0-9 |
// 3: after 0x |
// 4: after 0b or 0o |
// 5: after . |
// 6: after E |
// 7: after E09 |
// 8: after E+- |
for (i = 0; i < maxlen; i++) { |
c = s[i]; |
char cl = c | 0x20; // upper case letter |
if (c == '0' && state == 0) {state = 1; continue;} |
if (cl == 'x' && state == 1) {state = 3; continue;} |
if ((cl == 'b' || cl == 'o') && state == 1) {state = 4; continue;} |
if (c == '.' && state <= 2) {state = 5; is_float = true; continue;} |
if (cl == 'e' && (state <= 2 || state == 5)) {state = 6; is_float = true; continue;} |
if ((c == '+' || c == '-') && state == 6) {state = 8; continue;} |
if (c >= '0' && c <= '9') { |
if (state < 2) state = 2; |
if (state == 6) state = 7; |
continue; |
} |
if (cl >= 'a' && cl <= 'f' && state == 3) continue; |
// Anything else: stop here |
break; |
} |
if (isFloat) *isFloat = is_float; // return isFloat |
return i; // return length |
} |
|
// Check if string is a register name |
uint32_t isRegister(const char * s, uint32_t len) { |
uint32_t i, j, nl, num; |
for (i = 0; i < TableSize(registerNames); i++) { |
if ((s[0] | 0x20) == registerNames[i].name[0]) { // first character match, lower case |
nl = (uint32_t)strlen(registerNames[i].name); // length of register name prefix |
if (len < nl + 1 || len > nl + 2) continue; // continue search if length wrong |
for (j = 0; j < nl; j++) { // check if each character matches |
if ((s[j] | 0x20) != registerNames[i].name[j]) { // lower case compare |
j = 0xFFFFFFFF; break; |
} |
} |
if (j == 0xFFFFFFFF) continue; // no match |
if (s[j] < '0' || s[j] > '9') continue; // not a number |
num = s[j] - '0'; // get number, first digit |
if (len == nl + 2) { // two digit number |
if (s[j+1] < '0' || s[j+1] > '9') continue;// second digit not a number |
num = num * 10 + (s[j+1] - '0'); |
} |
if (num >= 32) continue; // number too high |
return num + registerNames[i].id; // everyting matches |
} |
} |
return 0; // not found. return 0 |
} |
|
// write feedback text on stdout |
void CAssembler::feedBackText1() { |
if (cmd.verbose) { |
// Tell what we are doing: |
printf("\nAssembling %s to %s", cmd.getFilename(cmd.inputFile), cmd.getFilename(cmd.outputFile)); |
} |
} |
|
|
// Split input file into lines and tokens. Handle preprocessing directives. Find symbol definitions |
void CAssembler::pass1() { |
uint32_t n = 0; // offset into assembly file |
uint32_t m; // end of current token |
int32_t i, f; // temporary |
int32_t comment = 0; // 0: normal, 1: inside comment to end of line, 2: inside /* */ comment |
uint32_t commentStart = 0; // start position of multiline comment |
uint32_t commentStartColumn = 0;// start column of multiline comment |
char c; // current character or byte |
SToken token = {0}; // current token |
SKeyword keywSearch; // record to search for keyword |
SOperator opSearch; // record to search for operator |
SInstruction instructSearch; // record to search for instruction |
SLine line = {0,0,0,0,0,0,0}; // line record |
lines.push(line); // empty records for line 0 |
linei = 1; // start at line 1 |
numSwitch = 0; // count switch statements |
tokens.push(token); // unused token 0 |
|
if (dataSize() >= 3 && (get<uint32_t>(0) & 0xFFFFFF) == 0xBFBBEF) { |
n += 3; // skip UTF-8 byte order mark |
} |
|
line.beginPos = n; // start of line 1 |
line.firstToken = tokens.numEntries(); |
line.file = filei; |
|
// loop through file |
while (n < dataSize()) { |
c = get<char>(n); // get character |
|
// is it space or a control character? |
if (uint8_t(c) <= 0x20) { |
if (c == ' ' || c == '\t') { // skip space and tab |
n++; |
continue; |
} |
if (c == '\r' || c == '\n') { // newline |
n++; |
if (c == '\r' && get<char>(n) == '\n') n++; // "\r\n" windows newline |
if (comment == 1) comment = 0; // end comment |
if (n <= dataSize()) { |
// finish current line |
line.numTokens = tokens.numEntries() - line.firstToken; |
line.linenum = linei++; |
if (line.numTokens) { // save line if not empty |
lines.push(line); |
} |
// start next line |
line.type = 0; |
line.file = filei; |
line.beginPos = n; |
line.firstToken = tokens.numEntries(); |
} |
continue; |
} |
// illegal control character |
token.type = TOK_ERR; |
line.type = LINE_ERROR; |
comment = 1; // ignore rest of line |
m = tokens.push(token); // save error token |
errors.report(n, 1, ERR_CONTROL_CHAR); |
} |
// prepare token of any type |
token.pos = n; |
token.stringLength = 1; |
token.id = 0; |
//token.column = n - line.beginPos; |
|
// is it a name? |
if (!comment && nameChar1(c)) { |
// start of a name |
m = n+1; |
while (m < dataSize() && nameChar2(get<char>(m))) m++; |
// name goes from position n to m-1. make token |
token.type = TOK_NAM; |
token.pos = n; |
token.stringLength = m - n; |
|
// is it a register name |
f = isRegister((char*)buf()+n, token.stringLength); |
if (f) { |
token.type = TOK_REG; |
token.id = f; |
} |
// is it a keyword? |
if (token.type == TOK_NAM && m-n < sizeof(keywSearch.name)) { |
memcpy(keywSearch.name, buf()+n, m-n); |
keywSearch.name[m-n] = 0; |
f = keywords.findFirst(keywSearch); |
if (f >= 0) { // keyword found |
token.id = keywords[f].id; |
token.type = keywords[f].id >> 24; |
if (token.id == HLL_SWITCH) numSwitch++; |
} |
} |
// is it an instruction? |
if (token.type == TOK_NAM && m-n < sizeof(instructSearch.name)) { |
memcpy(instructSearch.name, buf()+n, m-n); |
instructSearch.name[m-n] = 0; |
f = instructionlistNm.findFirst(instructSearch); |
if (f >= 0) { // instruction name found |
token.type = TOK_INS; |
token.id = instructionlistNm[f].id; |
} |
} |
n = m; |
tokens.push(token); // save token |
continue; |
} |
|
// Is it a number? |
if (!comment) { |
bool isFloat; |
f = isNumber((char*)buf() + n, dataSize() - n, &isFloat); |
if (f) { |
token.type = TOK_NUM + isFloat; |
token.id = n; // save number as string. The value is extracted later |
token.stringLength = f; |
n += f; |
tokens.push(token); // save token |
continue; |
} |
} |
|
// is it an operator? |
opSearch.name[0] = c; |
opSearch.name[1] = 0; |
f = operators.findFirst(opSearch); |
if (f >= 0) { |
// found single-character operator |
// make a greedy search for multi-character operators |
i = f; |
for (i = f+1; (uint32_t)i < operators.numEntries(); i++) { |
if (operators[i].name[0] != c) break; |
if (memcmp((char*)buf()+n, operators[i].name, strlen(operators[i].name)) == 0) f = i; |
} |
token.type = TOK_OPR; |
token.id = operators[f].id; |
token.priority = operators[f].priority; |
token.stringLength = (uint32_t)strlen(operators[f].name); |
|
// search for operators that need consideration here |
switch (token.id) { |
|
case 39: case '"': // quoted string in single or double quotes |
if (comment) break; |
// search for end of string |
token.type = token.id == 39 ? TOK_CHA : TOK_STR; |
token.pos = n + 1; |
m = n; |
while (true) { |
if (get<char>(m+1) == '\r' || get<char>(m+1) == '\n' || m == dataSize()) { |
// end of line without matching end quote. multi-line quotes not allowed |
token.type = TOK_ERR; |
errors.report(token.pos-1, 1, ERR_QUOTE_BEGIN); |
comment = 1; // skip rest of line |
break; |
} |
if (get<char>(m+1) == c && get<char>(m) != '\\') { // matching end quote not preceded by escape backslash |
token.stringLength = m - n; |
n += 2; |
break; |
} |
m++; |
} |
break; |
|
case '/'+D2: // "//". comment to end of line |
if (comment == 0) { |
comment = 1; |
} |
break; |
case 'c': // "/*" start of comment |
if (comment == 1) { |
n += token.stringLength; // skip and don't save token |
continue; |
} |
if (comment == 2) { // nested comment |
if (allowNestedComments) { |
comment++; |
} |
else { |
token.type = TOK_ERR; |
errors.report(n, 2, ERR_COMMENT_BEGIN); |
} |
break; |
} |
comment = 2; |
commentStart = n; commentStartColumn = n - line.beginPos; |
break; |
case 'd': // "*/" end of comment |
if (comment == 1) { |
n += token.stringLength; // skip and don't save token |
continue; |
} |
if (comment == 2) { |
comment = 0; |
n += token.stringLength; // skip and don't save token |
continue; |
} |
else if (comment > 2 && allowNestedComments) { |
comment--; |
n += token.stringLength; // skip and don't save token |
continue; |
} |
else { |
token.type = TOK_ERR; // unmatched end comment |
errors.report(n, 2, ERR_COMMENT_END); |
comment = 1; |
} |
break; |
case ';': |
// semicolon starts a new pseudo-line |
if (comment) break; |
// finish current line |
tokens.push(token); // the ';' token is used only in for(;;) loops. should be ignored at the end of the line otherwise |
n += token.stringLength; |
line.numTokens = tokens.numEntries() - line.firstToken; |
line.linenum = linei; |
if (line.numTokens) { // save line if not empty |
lines.push(line); |
} |
// start next line |
line.beginPos = n; |
line.firstToken = tokens.numEntries(); |
continue; // don't save ';' token twice |
case '{': case '}': |
if (comment) break; |
// put each bracket in a separate pseudo-line to ease high level language parsing |
// finish current line |
line.numTokens = tokens.numEntries() - line.firstToken; |
line.linenum = linei; |
if (line.numTokens) { // save line if not empty |
lines.push(line); |
} |
// start line with bracket only |
line.beginPos = n; |
line.firstToken = tokens.numEntries(); |
tokens.push(token); // save token |
n += token.stringLength; |
line.numTokens = 1; |
lines.push(line); |
// start line after bracket |
line.beginPos = n; |
line.firstToken = tokens.numEntries(); |
continue; |
} |
if (comment == 0 && token.type != TOK_ERR) { |
// save token unless we are inside a comment or an error has occurred |
tokens.push(token); // save token |
} |
n += token.stringLength; |
continue; |
} |
|
if (comment) { |
// we are inside a comment. Continue search only for end of line or end of comment |
n++; |
continue; |
} |
|
// none of the above. Make token for illegal character |
token.type = TOK_ERR; |
line.type = LINE_ERROR; |
errors.report(n, 1, ERR_ILLEGAL_CHAR); |
comment = 1; // ignore rest of line |
n++; |
} |
// finish last line |
// tokens.push(token); |
line.numTokens = tokens.numEntries() - line.firstToken; |
lines.push(line); |
// start pseudo line |
line.beginPos = n; |
line.firstToken = tokens.numEntries(); |
line.type = 0; |
|
// check for unmatched comment |
if (comment >= 2) { |
token.type = TOK_ERR; |
errors.report(commentStart, commentStartColumn, ERR_COMMENT_BEGIN); |
} |
// make EOF token in the end |
line.type = 0; |
line.beginPos = n; |
line.firstToken = tokens.numEntries(); |
line.numTokens = 1; |
lines.push(line); |
token.pos = n; |
token.stringLength = 0; |
token.type = TOK_EOF; // end of file |
tokens.push(token); // save eof token |
} |
|
|
void CAssembler::interpretSectionDirective() { |
// Interpret section directive during pass 2 or 3 |
// pass 2: identify section name and type, and give it a number |
// pass 3: make section header |
|
// to do: nested sections |
|
uint32_t tok; // token number |
ElfFWC_Sym2 sym; // symbol record |
int32_t sectionsym = 0; // index to symbol record defining current section name |
uint32_t state = 0; // 1: after align, 2: after '=' |
ElfFwcShdr sectionHeader; // section header |
zeroAllMembers(sym); // reset symbol |
zeroAllMembers(sectionHeader); // reset section header |
sectionHeader.sh_type = SHT_PROGBITS; // default section type |
|
sectionFlags = 0; |
for (tok = tokenB + 2; tok < tokenB + tokenN; tok++) { // get section attributes |
if (tokens[tok].type == TOK_ATT) { |
if (tokens[tok].id == ATT_UNINIT && state != 2) { |
sectionHeader.sh_type = SHT_NOBITS; // uninitialized section (BSS) |
sectionFlags |= SHF_READ | SHF_WRITE; |
} |
else if (tokens[tok].id == ATT_COMDAT && state != 2) { |
sectionHeader.sh_type = SHT_COMDAT; // communal section. duplicates and unreferenced sections are removed |
} |
else if (tokens[tok].id != ATT_ALIGN && state == 0) { |
sectionFlags |= tokens[tok].id & 0xFFFFFF; |
if (sectionFlags & SHF_EXEC) sectionFlags |= SHF_IP; // executable section must be IP based |
} |
else if (tokens[tok].id == ATT_ALIGN && state == 0) { |
state = 1; |
} |
else { |
errors.report(tokens[tok]); break; |
} |
} |
else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_IP && state == 0) sectionFlags |= SHF_IP; |
else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_DATAP && state == 0) sectionFlags |= SHF_DATAP; |
else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_THREADP && state == 0) sectionFlags |= SHF_THREADP; |
else if (tokens[tok].type == TOK_OPR && tokens[tok].id == '=' && state == 1) state = 2; |
else if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',' && state != 2) ; // comma, ignore |
else if (tokens[tok].type == TOK_NUM && state == 2) { |
if (pass >= 3) { // alignment value |
uint32_t alignm = expression(tok, 1, 0).value.w; |
if ((alignm & (alignm - 1)) || alignm > MAX_ALIGN) errors.reportLine(ERR_ALIGNMENT); |
else { |
sectionHeader.sh_align = bitScanReverse(alignm); |
} |
} |
state = 0; |
} |
else { |
errors.report(tokens[tok]); break; |
} |
} |
// find or define symbol with section name |
sectionsym = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength); |
if (sectionsym <= 0) { |
// symbol not previously defined. Define it now |
sym.st_type = STT_SECTION; |
sym.st_name = symbolNameBuffer.putStringN((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength); |
sym.st_bind = sectionFlags; |
sectionsym = addSymbol(sym); // save symbol with section name |
} |
else { |
// symbol already defined. check that it is a section name |
if (symbols[sectionsym].st_type != STT_SECTION) { |
errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED); |
} |
} |
sectionFlags |= SHF_ALLOC; |
lines[linei].type = LINE_SECTION; // line is section directive |
lines[linei].sectionType = sectionFlags; |
if (symbols[sectionsym].st_section == 0) { |
// new section. make section header |
sectionHeader.sh_name = symbols[sectionsym].st_name; |
if (sectionFlags & SHF_EXEC) { |
sectionHeader.sh_entsize = 4; |
if (sectionHeader.sh_align < 2) sectionHeader.sh_align = 2; |
sectionFlags |= SHF_IP; |
} |
else { // data section |
if (!(sectionFlags & (SHF_READ | SHF_WRITE))) sectionFlags |= SHF_READ | SHF_WRITE; // read or write attributes not specified, default is both |
if (!(sectionFlags & (SHF_IP | SHF_DATAP | SHF_THREADP))) { // address reference not specified. assume datap if writeable, ip if readonly |
if (sectionFlags & SHF_WRITE) sectionFlags |= SHF_DATAP; |
else sectionFlags |= SHF_IP; |
} |
} |
sectionHeader.sh_flags = sectionFlags; |
section = sectionHeaders.push(sectionHeader); |
symbols[sectionsym].st_section = section; |
} |
else { // this section is seen before |
section = symbols[sectionsym].st_section; |
if (sectionHeaders[section].sh_align < sectionHeader.sh_align) sectionHeaders[section].sh_align = sectionHeader.sh_align; |
if (sectionFlags && (sectionFlags & ~sectionHeaders[section].sh_flags)) errors.reportLine(ERR_SECTION_DIFFERENT_TYPE); |
sectionFlags = (uint32_t)sectionHeaders[section].sh_flags; |
if (sectionHeader.sh_align > 2) { |
// insert alignment code |
SCode code; |
zeroAllMembers(code); |
code.instruction = II_ALIGN; |
code.value.u = (int64_t)1 << sectionHeader.sh_align; |
code.sizeUnknown = 0x80; |
code.section = section; |
codeBuffer.push(code); |
} |
} |
} |
|
void CAssembler::interpretFunctionDirective() { |
// Interpret function directive during pass 2 |
uint32_t tok; // token number |
ElfFWC_Sym2 sym; // symbol record |
zeroAllMembers(sym); // reset symbol |
int32_t symi; |
|
symi = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength); |
if (symi > 0) { |
if (pass == 2) errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined |
} |
else { |
// define symbol |
sym.st_type = STT_FUNC; |
sym.st_other = STV_IP; |
sym.st_name = symbolNameBuffer.putStringN((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength); |
sym.st_bind = 0; |
sym.st_section = section; |
for (tok = tokenB + 2; tok < tokenB + tokenN; tok++) { // get function attributes |
if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') continue; |
if (tokens[tok].id == ATT_WEAK) sym.st_bind |= STB_WEAK; |
if (tokens[tok].id == ATT_REGUSE) { |
if (tokens[tok+1].id == '=' && tokens[tok+2].type == TOK_NUM) { |
tok += 2; |
sym.st_reguse1 = expression(tok, 1, 0).value.w; |
sym.st_other |= STV_REGUSE; |
if (tokens[tok+1].id == ',' && tokens[tok+2].type == TOK_NUM) { |
tok += 2; |
sym.st_reguse2 = expression(tok, 1, 0).value.w; |
} |
} |
} |
else if (tokens[tok].type == TOK_DIR && tokens[tok].id == DIR_PUBLIC) sym.st_bind |= STB_GLOBAL; |
else { |
errors.report(tokens[tok]); // unexpected token |
} |
} |
symi = addSymbol(sym); // save symbol with function name |
} |
lines[linei].type = LINE_FUNCTION; // line is function directive |
|
if (pass == 3 && symi) { |
// make a label here. The final address will be calculated in pass 4 |
SCode code; // current instruction code |
zeroAllMembers(code); // reset code structure |
code.label = symbols[symi].st_name; |
code.section = section; |
codeBuffer.push(code); |
} |
} |
|
void CAssembler::interpretEndDirective() { |
// Interpret section or function end directive during pass 2 |
ElfFWC_Sym2 sym; // symbol record |
zeroAllMembers(sym); // reset symbol |
int32_t symi; |
CTextFileBuffer tempBuffer; // temporary storage of names |
|
symi = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength); |
if (symi <= 0) { |
errors.reportLine(ERR_UNMATCHED_END); |
} |
else { |
if (symbols[symi].st_type == STT_SECTION) { |
if (symbols[symi].st_section == section) { |
// current section ends here |
section = 0; sectionFlags = 0; |
} |
else { |
errors.reportLine(ERR_UNMATCHED_END); |
} |
} |
else if (symbols[symi].st_type == STT_FUNC && pass >= 4) { |
symbols[symi].st_unitsize = 4; |
// to do: insert size! |
//symbols[symi].st_unitsize = ? |
// support function(){} syntax. prevent nested functions |
} |
} |
lines[linei].type = LINE_ENDDIR; // line is end directive |
} |
|
// Interpret line specifying options |
void CAssembler::interpretOptionsLine() { |
|
// Expecting a line of the type: |
// "options codesize = 0x10000, datasize = 1 << 20" |
uint32_t tok; // token number |
uint32_t state = 0; // 0: start, 1: after option name, 2: after equal sign, 3: after expression |
const char * optionname = 0; |
int option = 0; // 1: codesize, 2: datasize |
SExpression val; // value to be assigned |
SCode code; // instruction code containing options |
for (tok = tokenB + 1; tok < tokenB + tokenN; tok++) { |
|
switch (state) { |
case 0: // start. expect name "datasize" or "codesize" |
if (tokens[tok].type != TOK_NAM) { |
errors.report(tokens[tok]); return; // unexpected token |
} |
optionname = (char*)buf()+tokens[tok].pos; // tokens[tok].stringLength; |
if (strncasecmp_(optionname, "codesize", 8) == 0) option = 1; |
else if (strncasecmp_(optionname, "datasize", 8) == 0) option = 2; |
else { |
errors.report(tokens[tok]); return; // unexpected name |
} |
state = 1; |
break; |
|
case 1: // after name, expecting equal sign |
if (tokens[tok].type == TOK_OPR && tokens[tok].id == '=') { |
state = 2; |
} |
else { |
errors.report(tokens[tok]); return; // unexpected token |
} |
break; |
|
case 2: // expect expression |
val = expression(tok, tokenB + tokenN - tok, 0); // evaluate number or expression |
tok += val.tokens - 1; |
if (val.etype != XPR_INT) { |
errors.reportLine(ERR_MUST_BE_CONSTANT); |
return; |
} |
zeroAllMembers(code); // reset code structure |
switch (option) { |
case 1: // set codesize |
if (val.value.u == 0) code_size = cmd.codeSizeOption; |
else code_size = val.value.u; |
code.value.u = code_size; |
break; |
case 2: // set datasize |
if (val.value.u == 0) data_size = cmd.dataSizeOption; |
else data_size = val.value.u; |
code.value.u = data_size; |
break; |
} |
// This is called only in pass 3. Save this option for pass 4: |
code.instruction = II_OPTIONS; |
code.section = section; |
code.fitNum = option; |
code.sizeUnknown = 1; |
codeBuffer.push(code); |
state = 3; |
break; |
|
case 3: // expect comma or nothing |
if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') { |
state = 0; // start over after comma |
} |
else { |
errors.report(tokens[tok]); return; // unexpected token |
} |
} |
} |
} |
|
|
// Find symbol by index into symbolNameBuffer. The return value is an index into symbols. |
// Symbol indexes may change when new symbols are added to the symbols list, which is sorted by name |
uint32_t CAssembler::findSymbol(uint32_t namei) { |
ElfFWC_Sym2 sym; // temporary symbol record used for searching |
sym.st_name = namei; |
return symbols.findFirst(sym); // find symbol by name |
} |
|
// Find symbol by name as string. The return value is an index into symbols. |
// Symbol indexes may change when new symbols are added to the symbols list, which is sorted by name |
uint32_t CAssembler::findSymbol(const char * name, uint32_t len) { |
uint32_t saveSize = symbolNameBuffer.dataSize(); // save symbolNameBuffer size for later reset |
uint32_t namei = symbolNameBuffer.putStringN(name, len); // put name temporarily into symbolNameBuffer |
int32_t symi = findSymbol(namei); // find symbol by name index |
symbolNameBuffer.setSize(saveSize); // remove temporary name from symbolNameBuffer |
return symi; // return symbol index |
} |
|
// Add a symbol to symbols list |
uint32_t CAssembler::addSymbol(ElfFWC_Sym2 & sym) { |
int32_t f = symbols.findFirst(sym); |
if (f >= 0) { |
// error: symbol already defined |
return 0; |
} |
else { |
return symbols.addUnique(sym); |
} |
} |
|
// interpret name: options {, name: options} |
void CAssembler::interpretExternDirective() { |
uint32_t tok; // token number |
uint32_t nametok = 0; // last name token |
ElfFWC_Sym2 sym; // symbol record |
zeroAllMembers(sym); // reset symbol |
sym.st_bind = STB_GLOBAL; |
|
// Example: extern name1: int32 weak, name2: function, name3, name4: read |
uint32_t state = 0; // 0: after extern or comma, |
// 1: after name, |
// 2: after colon |
|
// loop through tokens on this line |
for (tok = tokenB + 1; tok < tokenB + tokenN; tok++) { |
switch (state) { |
case 0: // after extern or comma. expecting name |
if (tokens[tok].type == TOK_NAM) { |
// name encountered |
sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength); |
state = 1; nametok = tok; |
} |
else errors.report(tokens[tok]); |
break; |
case 1: // after name. expecting colon or comma |
if (tokens[tok].type == TOK_OPR) { |
if (tokens[tok].id == ':') { |
state = 2; |
continue; |
} |
else if (tokens[tok].id == ',') { |
goto COMMA; |
} |
} |
errors.report(tokens[tok]); |
break; |
case 2: // after colon. expecting attribute or comma or end of line |
if (tokens[tok].type == TOK_TYP) { |
// symbol size given by type token |
uint32_t s = tokens[tok].id & 0xF; |
if (s > 4) s -= 3; // float types |
sym.st_unitsize = uint32_t(1 << s); |
sym.st_unitnum = 1; |
} |
else if (tokens[tok].type == TOK_ATT || tokens[tok].type == TOK_DIR) { |
ATTRIBUTE: |
switch (tokens[tok].id) { |
case DIR_FUNCTION: case ATT_EXEC: // function or execute |
if (sym.st_type) { |
errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_CONFLICT_TYPE); |
} |
sym.st_type = STT_FUNC; |
sym.st_other = STV_IP | STV_EXEC; |
break; |
case ATT_READ: // read |
if (sym.st_type == 0) sym.st_other |= STV_READ; |
break; |
case ATT_WRITE: // write |
if (sym.st_type == STT_FUNC) { |
errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_CONFLICT_TYPE); |
} |
else { |
sym.st_type = STT_OBJECT; |
} |
break; |
case ATT_WEAK: // weak |
sym.st_bind = STB_WEAK; |
break; |
case ATT_CONSTANT: // constant |
sym.st_type = STT_CONSTANT; |
break; |
case ATT_REGUSE: |
if (tokens[tok+1].id == '=' && (tokens[tok+2].type == TOK_NUM /*|| tokens[tok+2].type == TOK_OPR)*/)) { |
tok += 2; |
sym.st_reguse1 = expression(tok, 1, 0).value.w; |
sym.st_other |= STV_REGUSE; |
if (tokens[tok+1].id == ',' && tokens[tok+2].type == TOK_NUM) { |
tok += 2; |
sym.st_reguse2 = expression(tok, 1, 0).value.w; |
} |
} |
break; |
default: // error |
errors.report(tokens[tok]); |
} |
} |
else if (tokens[tok].type == TOK_REG) { |
switch (tokens[tok].id) { |
case REG_IP: |
sym.st_other |= STV_IP; break; |
case REG_DATAP: |
sym.st_other |= STV_DATAP; break; |
case REG_THREADP: |
sym.st_other |= STV_THREADP; break; |
default: errors.report(tokens[tok]); |
} |
} |
else if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') { |
// end of definition. save symbol |
COMMA: |
if (tok < tokenB + tokenN |
&& (tokens[tok + 1].type == TOK_ATT || tokens[tok + 1].type == TOK_DIR)) { |
tok++; goto ATTRIBUTE; |
} |
uint32_t symi = addSymbol(sym); // save symbol with function name |
if (symi == 0) { // symbol already defined |
errors.report(tokens[nametok].pos, tokens[nametok].stringLength, ERR_SYMBOL_DEFINED); |
} |
sym.st_name = 0; // clear record for next symbol |
sym.st_type = 0; |
sym.st_other = 0; |
sym.st_unitsize = 0; |
sym.st_unitnum = 0; |
sym.st_bind = STB_GLOBAL; |
state = 0; |
} |
else { |
errors.report(tokens[tok]); |
} |
break; |
} |
} |
if (state) { // last extern definition does not end with comma. finish it here |
goto COMMA; |
} |
lines[linei].type = LINE_DATADEF; // line is data definition |
} |
|
|
void CAssembler::interpretLabel(uint32_t tok) { |
// line begins with a name. interpret label |
// to do: add type if data. not string type |
ElfFWC_Sym2 sym; // symbol record |
zeroAllMembers(sym); // reset symbol |
|
// save name |
sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength); |
sym.st_section = section; |
// determine if code or data from section type |
if (sectionFlags & SHF_EXEC) { |
sym.st_type = STT_FUNC; |
sym.st_other = STV_EXEC | STV_IP; |
} |
else { |
sym.st_type = STT_OBJECT; |
sym.st_other = sectionFlags & STV_SECT_ATTR; |
} |
|
// look for more exact type information |
if (tokenN > 2) { |
uint32_t t = tok+2; |
if (tokens[t].type == TOK_TYP) { |
uint32_t s = tokens[t].id & 0xF; |
if (s > 4) s -= 3; |
sym.st_unitsize = uint32_t(1 << s); |
sym.st_unitnum = 1; |
if (tokenN > 3) t++; |
} |
if (tokens[t].type == TOK_NUM || tokens[t].type == TOK_FLT) { |
sym.st_type = STT_OBJECT; |
lines[linei].type = LINE_DATADEF; |
} |
else if (tokens[t].type == TOK_REG || tokens[t].type == TOK_INS || tokens[t].id == '[') { |
lines[linei].type = LINE_CODEDEF; |
sym.st_type = STT_FUNC; |
} |
} |
if (section) { // copy type info from section |
sym.st_other = sectionHeaders[section].sh_flags & STV_SECT_ATTR; |
} |
|
if (lines[linei].type == 0) { |
lines[linei].type = (sectionFlags & SHF_EXEC) ? LINE_CODEDEF : LINE_DATADEF; |
} |
|
uint32_t symi = addSymbol(sym); // add symbol to symbols list |
|
if (section) { |
// symbol address |
symbols[symi].st_value = sectionHeaders[section].sh_size; |
} |
tokens[tok].id = symbols[symi].st_name; // save symbol name index |
if (symi == 0) errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED); |
} |
|
|
// interpret assembly style variable definition: |
// label: type value1, value2 |
void CAssembler::interpretVariableDefinition1() { |
int state = 0; // 0: start |
// 1: after label |
// 2: after : |
// 3: after type or , |
// 4: after value |
uint32_t tok; // token index |
uint32_t type = 0; // data type |
uint32_t dsize = 0; // data size |
uint32_t dsize1; // log2(dsize) |
uint32_t dnum = 0; // number of data items |
uint32_t stringlen = 0; // length of string |
uint32_t symi = 0; // symbol index |
ElfFWC_Sym2 sym; // symbol record |
zeroAllMembers(sym); // reset symbol |
SExpression exp1; // expression when interpreting numeric expression |
|
if (section == 0) { |
errors.reportLine(ERR_DATA_WO_SECTION); |
} |
|
// loop through tokens on this line |
for (tok = tokenB; tok < tokenB + tokenN; tok++) { |
switch (state) { |
case 0: // start |
if (tokens[tok].type == TOK_NAM) { // name. make symbol |
sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength); |
sym.st_type = STT_OBJECT; |
symi = symbols.addUnique(sym); |
tokens[tok].type = TOK_SYM; // change token type |
tokens[tok].id = symbols[symi].st_name; // use name offset as unique identifier because symbol index can change |
state = 1; |
} |
else if (tokens[tok].type == TOK_SYM) { // symbol |
symi = findSymbol(tokens[tok].id); |
if (symi > 0) { |
if (pass == 2) errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined |
} |
state = 1; |
} |
else if (tokens[tok].type == TOK_TYP) { |
goto TYPE_TOKEN; |
} |
else errors.report(tokens[tok]); |
if (symi && section) { |
symbols[symi].st_value = sectionHeaders[section].sh_size; |
} |
break; |
case 1: // after label. expect colon |
if (tokens[tok].type == TOK_OPR && tokens[tok].id == ':') { |
state = 2; |
} |
else errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_EXPECT_COLON); |
break; |
case 2: // expect type |
if (tokens[tok].type == TOK_TYP) { |
TYPE_TOKEN: |
type = tokens[tok].id & 0xFF; |
dsize1 = type & 0xF; |
if (type & 0x40) dsize1 -= 3; |
dsize = 1 << dsize1; |
state = 3; |
if (section) { // align data |
uint32_t addr = (uint32_t)sectionHeaders[section].sh_size; |
if (sectionHeaders[section].sh_align < dsize1) sectionHeaders[section].sh_align = dsize1; // update section alignment |
if (addr & (dsize - 1)) { // needs to insert zeroes |
uint32_t addr2 = (addr + dsize - 1) & -(int32_t)dsize; |
sectionHeaders[section].sh_size = addr2; // update address |
if (symi) symbols[symi].st_value = addr2; // update symbol address |
if (pass >= 3) { |
dataBuffers[section].align((uint32_t)dsize); // put zeroes in data buffer |
} |
} |
} |
} |
else errors.report(tokens[tok]); |
break; |
case 3: // after type. expect value. evaluate expression |
exp1 = expression(tok, tokenB + tokenN - tok, pass < 3 ? 0x10 : 0); // pass 3: may contain symbols not defined yet |
tok += exp1.tokens - 1; |
if (exp1.etype & XPR_STRING) { // string expression: get size |
if ((type & 0x1F) != (TYP_INT8 & 0x1F)) errors.reportLine(ERR_STRING_TYPE); // string must use type int8 |
stringlen = exp1.sym2; // string length |
} |
else stringlen = 0; |
if (pass < 3) { |
if (section) sectionHeaders[section].sh_size += stringlen ? stringlen : dsize; // update address |
} |
else { |
if (section) { |
// save data of desired type |
if (exp1.etype & XPR_FLT) { |
// floating point number specified |
if ((type & 0xF0) == (TYP_INT8 & 0xF0)) { // float specified, integer expected |
exp1.value.i = int64_t(exp1.value.d); |
errors.reportLine(ERR_CONFLICT_TYPE); |
} |
} |
else if (exp1.etype & XPR_INT) { |
if (type & TYP_FLOAT) { // integer specified, float expected |
exp1.value.d = double(exp1.value.i); // convert to float |
} |
} |
int64_t value = exp1.value.i; //value of expression |
if (exp1.sym3) { |
// calculation of symbol value. add relocation if needed |
uint32_t size = type & 0xF; |
if (type & 0x40) size -= 3; |
size = 1 << size; |
//value = calculateConstantOperand(exp1, dataBuffers[section].dataSize(), size); |
value = calculateConstantOperand(exp1, sectionHeaders[section].sh_size, dsize); |
if (exp1.etype & XPR_ERROR) { |
errors.reportLine((uint32_t)value); // report error |
break; |
} |
// check for overflow |
bool overflow = false; |
switch (type & 0xFF) { |
case TYP_INT8 & 0xFF: |
overflow = value > 0x7F || value < -0x80; |
break; |
case TYP_INT16 & 0xFF: |
overflow = value > 0x7FFF || value < -0x8000; |
break; |
case TYP_INT32 & 0xFF: |
overflow = value > 0x7FFFFFFF || value < int32_t(0x80000000); |
break; |
default:; |
} |
if (overflow) errors.reportLine(ERR_OVERFLOW); // (symbol1 - symbol2) overflows |
} |
if (sectionHeaders[section].sh_type == SHT_NOBITS) { |
// uninitialized (BSS) section. check that value is zero, but don't store |
if (value != 0) errors.reportLine(ERR_NONZERO_IN_BSS); // not zero |
} |
else { |
// save data |
switch (type & 0xFF) { |
case TYP_INT8 & 0xFF: |
if (stringlen) { |
dataBuffers[section].push(stringBuffer.buf() + exp1.value.w, stringlen); |
break; |
} |
dataBuffers[section].push(&value, 1); break; |
case TYP_INT16 & 0xFF: |
dataBuffers[section].push(&value, 2); break; |
case TYP_INT32 & 0xFF: |
dataBuffers[section].push(&value, 4); break; |
case TYP_INT64 & 0xFF: |
dataBuffers[section].push(&value, 8); break; |
case TYP_INT128 & 0xFF: |
dataBuffers[section].push(&value, 8); |
value = value >> 63; // sign extend |
dataBuffers[section].push(&value, 8); |
break; |
case TYP_FLOAT16 & 0xFF: // half precision |
exp1.value.w = double2half(exp1.value.d); |
dataBuffers[section].push(&exp1.value.w, 2); break; |
case TYP_FLOAT32 & 0xFF: { // single precision |
float val = float(exp1.value.d); |
dataBuffers[section].push(&val, 4); } |
break; |
case TYP_FLOAT64 & 0xFF: // double precision |
dataBuffers[section].push(&exp1.value.d, 8); break; |
} |
} |
sectionHeaders[section].sh_size += stringlen ? stringlen : dsize; // update address |
} |
} |
if (!(exp1.etype & (XPR_IMMEDIATE | XPR_STRING | XPR_SYM1 | XPR_UNRESOLV)) || (exp1.etype & (XPR_REG|XPR_OPTION|XPR_MEM|XPR_ERROR))) errors.report(tokens[tok]); |
|
if (stringlen) dnum += stringlen; else dnum += 1; |
state = 4; |
break; |
case 4: // after value. expect comma or end of line |
if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') { |
state = 3; |
} |
else errors.report(tokens[tok]); |
break; |
} |
if (lineError) return; |
} |
if (state != 4 && state != 2) errors.report(tokens[tok-1]); |
if (symi) { // save size |
symbols[symi].st_unitsize = dsize; |
symbols[symi].st_unitnum = dnum; |
symbols[symi].st_section = section; |
if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) symbols[symi].st_other |= STV_FLOAT; |
if (section) { // copy information from section |
symbols[symi].st_other |= sectionHeaders[section].sh_flags & STV_SECT_ATTR; |
} |
} |
} |
|
// interpret C style variable definition: |
// type name1 = value1, name2[num] = {value, value, ..} |
void CAssembler::interpretVariableDefinition2() { |
int state = 0; // 0: start |
// 1: after type or comma |
// 2: after name |
// 3: after [ |
// 4: after [number |
// 5: after = |
// 6: after = number |
// 7: after { |
// 8: after {number |
|
uint32_t tok; // token index |
uint32_t dsize = 0; // data element size |
uint32_t dsize1 = 0; // data element size = 1 << dsize1 |
uint32_t type = 0; // data type |
uint32_t arrayNum1 = 1; // number of elements indicated in [] |
uint32_t arrayNum2 = 0; // number of elements in {} list |
uint32_t stringlen = 0; // length of string |
uint32_t symi = 0; // symbol index |
ElfFWC_Sym2 sym; // symbol record |
zeroAllMembers(sym); // reset symbol |
SExpression exp1; // expression when interpreting numeric expression |
|
if (section == 0) { |
errors.reportLine(ERR_DATA_WO_SECTION); |
} |
|
// loop through tokens on this line |
for (tok = tokenB; tok < tokenB + tokenN; tok++) { |
switch (state) { |
case 0: // this is a type token |
type = tokens[tok].id & 0xFF; |
dsize1 = tokens[tok].id & 0xF; |
if ((type & 0x40) > 3) dsize1 -= 3; |
dsize = 1 << dsize1; |
state = 1; |
if (section) { // align data |
uint32_t addr = (uint32_t)sectionHeaders[section].sh_size; |
if (addr & (dsize - 1)) { // needs to insert zeroes |
uint32_t addr2 = (addr + dsize - 1) & -(int32_t)dsize; // calculate aligned address |
sectionHeaders[section].sh_size = addr2; // update address |
if (pass >= 3) { |
dataBuffers[section].align(dsize); // put zeroes in data buffer |
} |
} |
if (sectionHeaders[section].sh_align < dsize1) sectionHeaders[section].sh_align = dsize1; // update section alignment |
} |
break; |
case 1: // expecting name token. save name |
if (tokens[tok].type == TOK_NAM) { // name. make symbol |
sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength); |
symi = addSymbol(sym); |
if (symi == 0 && pass == 2) { |
errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED); break; |
} |
symbols[symi].st_type = (sectionFlags & SHF_EXEC) ? STT_FUNC : STT_OBJECT; |
tokens[tok].type = TOK_SYM; // change token type |
tokens[tok].id = symbols[symi].st_name; // use name offset as unique identifier because symbol index can change |
state = 2; |
} |
else if (tokens[tok].type == TOK_SYM) { // symbol |
symi = findSymbol(tokens[tok].id); |
if (symi > 0 && pass == 2) errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined |
state = 2; |
} |
else { |
errors.report(tokens[tok]); |
} |
//nametok = tok; |
symbols[symi].st_unitsize = dsize; |
symbols[symi].st_unitnum = 0; |
|
if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) symbols[symi].st_other |= STV_FLOAT; |
if (section) { // copy information from section |
symbols[symi].st_value = sectionHeaders[section].sh_size; |
symbols[symi].st_other |= sectionHeaders[section].sh_flags & STV_SECT_ATTR; |
} |
break; |
case 2: // after name. expect , = [ eol |
if (tokens[tok].type != TOK_OPR) { |
errors.report(tokens[tok]); break; |
} |
switch (tokens[tok].id) { |
case ',': // finish this symbol definition |
COMMA: |
if (arrayNum2 > arrayNum1) { // check if the two array sizes match |
if (arrayNum1 > 1) { |
errors.report(tokens[tok-1].pos, tokens[tok-1].stringLength, ERR_CONFLICT_ARRAYSZ); |
} |
else arrayNum1 = arrayNum2; |
} |
symbols[symi].st_unitsize = dsize; |
symbols[symi].st_unitnum = arrayNum1; |
symbols[symi].st_reguse1 = linei; |
symbols[symi].st_section = section; |
|
if (arrayNum1 > arrayNum2 && section) { |
// unspecified elements are zero. calculate extra size |
uint32_t asize = (arrayNum1 - arrayNum2) * dsize; |
sectionHeaders[section].sh_size += asize; |
if (pass >= 3 && sectionHeaders[section].sh_type != SHT_NOBITS) { |
// store any unspecified elements as zero |
uint64_t zero = 0; |
while (asize > 8) { |
dataBuffers[section].push(&zero, 8); asize -= 8; |
} |
while (asize > 0) { |
dataBuffers[section].push(&zero, 1); asize -= 1; |
} |
} |
} |
|
// get ready for next symbol |
zeroAllMembers(sym); |
arrayNum1 = 1; arrayNum2 = 0; |
if (state == 99) return; // finished line |
state = 1; |
break; |
case '=': |
state = 5; |
break; |
case '[': |
state = 3; |
break; |
default: |
errors.report(tokens[tok]); |
} |
break; |
case 3: // after [ . expect number or ] |
if (tokens[tok].id == ']') { |
state = 2; break; |
} |
if (arrayNum1 > 1) { |
errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_MULTIDIMENSIONAL); break; // error. multidimensional array not supported |
} |
// evaluate numeric expression inside []. |
// it may contain complex expressions that can only be evaluated later, but |
// this will not generate an error message here |
exp1 = expression(tok, tokenB + tokenN - tok, 0x10); |
if (lineError) return; |
tok += exp1.tokens -1; |
if (exp1.etype == 0) errors.report(tokens[tok]); |
if ((exp1.etype & ~XPR_IMMEDIATE) == 0) { |
arrayNum1 = exp1.value.w; |
} |
state = 4; |
break; |
case 4: // after [number. expect ] |
if (tokens[tok].id != ']') { |
errors.report(tokens[tok]); break; |
} |
state = 2; |
break; |
case 5: // after =. expect number or {numbers} |
if (tokens[tok].id == '{') state = 7; |
else { |
state = 6; |
goto SAVE_VALUE; // interpret value and save it |
} |
break; |
case 6: // after = number. expect comma or eol |
if (tokens[tok].id != ',') { |
errors.report(tokens[tok]); break; |
} |
goto COMMA; |
case 7: // after {. expect number list |
state = 8; |
SAVE_VALUE: |
arrayNum2++; |
if (pass < 3) { |
// may contain symbols not defined yet. just pass expression and count tokens |
exp1 = expression(tok, tokenB + tokenN - tok, 0x10); |
tok += exp1.tokens - 1; |
if (lineError) return; |
} |
else { |
// pass 5. evaluate expression and save value |
exp1 = expression(tok, tokenB + tokenN - tok, 0); |
tok += exp1.tokens - 1; |
if (lineError) return; |
if ((exp1.etype & XPR_SYM1) && exp1.sym3 && pass > 3) { |
// calculation of symbol value. add relocation if needed |
exp1.value.i = calculateConstantOperand(exp1, sectionHeaders[section].sh_size, dsize); |
if (exp1.etype & XPR_ERROR) { |
errors.reportLine((uint32_t)(exp1.value.i)); // report error |
break; |
} |
// check for overflow |
bool overflow = false; |
switch (type & 0xFF) { |
case TYP_INT8 & 0xFF: |
overflow = exp1.value.i > 0x7F || exp1.value.i < -0x80; |
break; |
case TYP_INT16 & 0xFF: |
overflow = exp1.value.i > 0x7FFF || exp1.value.i < -0x8000; |
break; |
case TYP_INT32 & 0xFF: |
overflow = exp1.value.i > 0x7FFFFFFF || exp1.value.i < int32_t(0x80000000); |
break; |
default:; |
} |
if (overflow) errors.reportLine(ERR_OVERFLOW); // (symbol1 - symbol2) overflows |
} |
} |
if (!(exp1.etype & (XPR_IMMEDIATE | XPR_STRING | XPR_UNRESOLV | XPR_SYM1)) || (exp1.etype & (XPR_REG|XPR_OPTION|XPR_MEM|XPR_ERROR))) { |
errors.report(tokens[tok]); |
} |
if (section && section < dataBuffers.numEntries() && pass >= 3) { |
// save data of desired type |
if ((exp1.etype & XPR_IMMEDIATE) == XPR_FLT) { |
// floating point number specified |
if ((type & 0xF0) == (TYP_INT8 & 0xF0)) { // float specified, integer expected |
exp1.value.i = int64_t(exp1.value.d); |
errors.reportLine(ERR_CONFLICT_TYPE); |
} |
} |
else if ((exp1.etype & XPR_IMMEDIATE) == XPR_INT) { |
if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) { // integer specified, float expected |
exp1.value.d = double(exp1.value.i); // convert to float |
} |
} |
else if (exp1.etype & XPR_STRING) { // string expression: get size |
if ((type & 0x1F) != (TYP_INT8 & 0x1F)) errors.reportLine(ERR_STRING_TYPE); // string must use type int8 |
stringlen = exp1.sym2; // string length |
} |
else stringlen = 0; |
|
if (sectionHeaders[section].sh_type == SHT_NOBITS) { |
// uninitialized (BSS) section. check that value is zero, but don't store |
if (exp1.value.i != 0) errors.reportLine(ERR_NONZERO_IN_BSS); // not zero |
} |
else { |
// save data |
switch (type & 0xFF) { |
case TYP_INT8 & 0xFF: |
if (stringlen) { |
dataBuffers[section].push(stringBuffer.buf() + exp1.value.w, stringlen); |
break; |
} |
dataBuffers[section].push(&exp1.value.u, 1); break; |
case TYP_INT16 & 0xFF: |
dataBuffers[section].push(&exp1.value.u, 2); break; |
case TYP_INT32 & 0xFF: |
dataBuffers[section].push(&exp1.value.u, 4); break; |
case TYP_INT64 & 0xFF: |
dataBuffers[section].push(&exp1.value.u, 8); break; |
case TYP_INT128 & 0xFF: |
dataBuffers[section].push(&exp1.value.u, 8); |
exp1.value.i = exp1.value.i >> 63; // sign extend |
dataBuffers[section].push(&exp1.value.u, 8); |
break; |
case TYP_FLOAT16 & 0xFF: // half precision |
exp1.value.w = double2half(exp1.value.d); |
dataBuffers[section].push(&exp1.value.w, 2); break; |
case TYP_FLOAT32 & 0xFF: { // single precision |
float val = float(exp1.value.d); |
dataBuffers[section].push(&val, 4); } |
break; |
case TYP_FLOAT64 & 0xFF: // double precision |
dataBuffers[section].push(&exp1.value.d, 8); break; |
} |
} |
} |
sectionHeaders[section].sh_size += stringlen ? stringlen : dsize; // update address |
break; |
case 8: // after {number. expect comma or } |
if (tokens[tok].id == ',') state = 7; |
else if (tokens[tok].id == '}') state = 6; |
else { |
errors.report(tokens[tok]); break; |
} |
} |
if (tok + 1 == tokenB + tokenN && (state == 5 || state >= 7) && linei + 1 < lines.numEntries()) { |
// no more tokens. statement with {} can span multiple lines |
if (state == 5) { |
// after '='. expect next line to be '{' |
uint32_t tokNext = lines[linei+1].firstToken; |
if (tokens[tokNext].type != TOK_OPR || tokens[tokNext].id != '{') break; // anything else: break out of loop and get error message |
} |
// append next line |
lines[linei].type = LINE_DATADEF; |
linei++; |
tokenN += lines[linei].numTokens; |
} |
|
} |
// no more tokens |
if (state == 2 || state == 6) { |
// finish this definition |
lines[linei].type = LINE_DATADEF; |
state = 99; goto COMMA; |
} |
errors.report(tokens[tok-1].pos, tokens[tok-1].stringLength, ERR_UNFINISHED_VAR); |
} |
|
// check if line is code or data |
void CAssembler::determineLineType() { |
uint32_t tok; // current token |
uint32_t elements = 0; // detect type and constant tokens |
|
if (tokens[tokenB].type == TOK_OPT) { |
lines[linei].type = LINE_OPTIONS; return; |
} |
// loop through tokens on this line |
for (tok = tokenB; tok < tokenB + tokenN; tok++) { |
if (tokens[tok].type == TOK_REG || tokens[tok].type == TOK_INS || tokens[tok].type == TOK_XPR || tokens[tok].type == TOK_HLL) { |
lines[linei].type = LINE_CODEDEF; return; // register or instruction found. must be code |
} |
if (tokens[tok].type == TOK_TYP) elements |= 1; |
if (tokens[tok].type == TOK_NUM || tokens[tok].type == TOK_FLT || tokens[tok].type == TOK_CHA || tokens[tok].type == TOK_STR) elements |= 2; |
} |
if (elements == 3) lines[linei].type = LINE_DATADEF; |
else if (tokens[tokenB].type == TOK_ATT && tokens[tokenB].id == ATT_ALIGN) { // align directive |
lines[linei].type = (sectionFlags & SHF_EXEC) ? LINE_CODEDEF : LINE_DATADEF; |
} |
else if (tokens[tokenB].type == TOK_EOF) lines[linei].type = 0; // end of file |
else if (tokenN == 1 && tokens[tokenB].type == TOK_OPR && linei > 1) { |
// {} bracket. same type as previous line |
lines[linei].type = lines[linei-1].type; |
} |
else if (tokens[tokenB].type == TOK_OPR && tokens[tokenB].id == '%') { |
// metaprogramming code |
lines[linei].type = LINE_METADEF; |
} |
else if (linei > 1) { |
// undetermined. This may occur in for(;;) clause. Use same type as previous line |
lines[linei].type = lines[linei-1].type; |
} |
else { |
// error. cannot determine |
errors.report(tokens[tokenB]); |
lines[linei].type = LINE_ERROR; |
} |
} |
|
// interpret data or code alignment directive |
void CAssembler::interpretAlign() { |
if (section) { |
uint32_t addr = (uint32_t)sectionHeaders[section].sh_size; |
SExpression exp1 = expression(tokenB+1, tokenN - 1, pass < 3 ? 0x10 : 0); |
if (exp1.tokens < tokenN - 1) {errors.report(tokens[tokenB+1+exp1.tokens]); return;} |
if ((exp1.etype & XPR_IMMEDIATE) != XPR_INT || (exp1.etype & (XPR_STRING | XPR_REG | XPR_OP | XPR_MEM | XPR_OPTION))) { |
errors.report(tokens[tokenB+1]); return; |
} |
uint64_t alignm = exp1.value.u; |
if ((alignm & (alignm - 1)) || alignm > MAX_ALIGN) {errors.reportLine(ERR_ALIGNMENT); return;} |
uint32_t log2ali = bitScanReverse(alignm); |
if (sectionHeaders[section].sh_align < log2ali) { |
sectionHeaders[section].sh_align = log2ali; // make sure section alignment is not less |
} |
if (addr & ((uint32_t)alignm - 1)) { // needs to insert zeroes |
uint32_t addr2 = (addr + (uint32_t)alignm - 1) & -(int32_t)alignm; |
sectionHeaders[section].sh_size = addr2; // update address |
if (pass >= 3) { |
dataBuffers[section].align((uint32_t)alignm); // put zeroes in data buffer |
} |
} |
} |
} |
|
// Pass 3 does three things. |
// A. Handle metaprogramming directives |
// B. Classify lines |
// C. Identify symbol names, sections, labels, functions |
// These must be done in parallel because metaprogramming directives can refer to previously |
// defined symbols, and data/code definitions can involve metaprogramming variables and macros |
|
void CAssembler::pass2() { |
ElfFWC_Sym2 sym; // symbol record |
zeroAllMembers(sym); // reset symbol |
symbols.push(sym); // symbol record 0 is empty |
symbolNameBuffer.put((char)0); // put dummy zero to avoid zero offset at next string |
sectionFlags = 0; |
section = 0; |
|
// lines loop |
for (linei = 1; linei < lines.numEntries(); linei++) { |
lineError = 0; |
tokenB = lines[linei].firstToken; // first token in line |
tokenN = lines[linei].numTokens; // number of tokens in line |
if (tokenN == 0) continue; |
replaceKnownNames(); // replace previously defined names by symbol references |
// check if line begins with '%' |
if (tokens[tokenB].type == TOK_OPR && tokens[tokenB].id == '%') { |
// metaprogramming code |
lines[linei].type = LINE_METADEF; |
interpretMetaDefinition(); |
continue; |
} |
// classify other lines |
lines[linei].sectionType = sectionFlags; // line is section directive |
if (sectionFlags & ATT_EXEC) lines[linei].type = LINE_CODEDEF; |
else if (sectionFlags & ((ATT_READ | ATT_WRITE))) lines[linei].type = LINE_DATADEF; |
|
if (tokenN > 1) { |
// search for section, function and symbol definitions |
// lines with a single token cannot legally define a symbol name |
if ((tokens[tokenB].type == TOK_NAM || tokens[tokenB].type == TOK_SYM) && tokens[tokenB+1].type == TOK_DIR) { |
switch (tokens[tokenB + 1].id) { |
case DIR_SECTION: // section starts here |
interpretSectionDirective(); |
break; |
case DIR_FUNCTION: // function starts here |
interpretFunctionDirective(); |
break; |
case DIR_END: // section or function end |
interpretEndDirective(); |
break; |
default: |
errors.report(tokens[tokenB + 1]); |
} |
} |
else if (tokens[tokenB].id == DIR_EXTERN) { |
// extern symbols |
interpretExternDirective(); |
} |
else if (tokens[tokenB].id == DIR_PUBLIC) { |
// the interpretation of public symbol declarations is postponed to pass 4 after all |
// symbols have been defined and got their final value |
lines[linei].type = LINE_PUBLICDEF; |
} |
else if (tokens[tokenB].type == TOK_NAM && tokens[tokenB+1].id == ':') { |
interpretLabel(tokenB); |
if (lines[linei].type == LINE_DATADEF) interpretVariableDefinition1(); |
} |
else if (tokens[tokenB].type == TOK_TYP && (tokens[tokenB+1].type == TOK_NAM || tokens[tokenB+1].type == TOK_SYM)) { |
interpretVariableDefinition2(); |
} |
else if (tokens[tokenB].type == TOK_ATT && tokens[tokenB].id == ATT_ALIGN) { |
interpretAlign(); |
} |
else if (tokens[tokenB].type == TOK_SYM && tokens[tokenB+1].id == ':' && pass == 2) { |
errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined |
} |
else { |
determineLineType(); // check if code or data |
if (lines[linei].type == LINE_DATADEF) interpretVariableDefinition1(); |
} |
} |
else { |
determineLineType(); // check if code or data (can only be code) |
} |
} |
|
// loop through lines again to replace names that are forward references to symbols defined during pass 2 |
for (linei = 1; linei < lines.numEntries(); linei++) { |
tokenB = lines[linei].firstToken; // first token in line |
tokenN = lines[linei].numTokens; // number of tokens in line |
replaceKnownNames(); // replace previously defined names by symbol references |
} |
} |
|
|
// Show all symbols. For debugging only |
void CAssembler::showSymbols() { |
uint32_t symi; |
ElfFWC_Sym2 sym; |
printf("\n\nSymbol: name, section, addr, type, size, binding"); |
for (symi = 1; symi < symbols.numEntries(); symi++) { |
sym = symbols[symi]; |
printf("\n%3i: %10s, %7i, %4X", symi, symbolNameBuffer.buf() + sym.st_name, |
sym.st_section, (uint32_t)sym.st_value); |
if (sym.st_type == STT_CONSTANT || sym.st_type == STT_VARIABLE) { |
if (sym.st_other & STV_FLOAT) { // floating point constant |
union { uint64_t i; double d; } val; |
val.i = sym.st_value; |
printf(" = %G", val.d); |
} |
else if (sym.st_other & STV_STRING) { // string |
printf(" = %s", stringBuffer.getString((uint32_t)sym.st_value)); |
} |
else { |
// print 64 bit integer constant |
printf(" = 0x"); |
if (uint64_t(sym.st_value) >> 32) { |
printf("%X%08X", uint32_t(sym.st_value >> 32), uint32_t(sym.st_value)); |
} |
else { |
printf("%X", uint32_t(sym.st_value)); |
} |
// this method causes warnings: |
// printf(((sizeof(long int) > 4) ? " = 0x%lx" : " = 0x%llx"), sym.st_value); |
} |
} |
else { |
printf(" %5X, %X*%X, %7X", // other type |
sym.st_type, sym.st_unitsize, sym.st_unitnum, sym.st_bind); |
} |
} |
} |
|
// Show all tokens. For debugging only |
void CAssembler::showTokens() { |
SKeyword const tokenNames[] = { |
{"name", TOK_NAM}, // unidentified name |
{"direc", TOK_DIR}, // section or function directive |
{"attrib", TOK_ATT}, // section or function attribute |
{"label", TOK_LAB}, // code label or function name |
{"datalb", TOK_VAR}, // data label |
{"secnm", TOK_SEC}, // section name |
{"type", TOK_TYP}, // type name |
{"reg", TOK_REG}, // register name |
{"instr", TOK_INS}, // instruction name |
{"oper", TOK_OPR}, // operator |
{"option", TOK_OPT}, // operator |
{"num", TOK_NUM}, // number |
{"float", TOK_FLT}, // floating point number |
{"char", TOK_CHA}, // character or string in single quotes ' ' |
{"string", TOK_STR}, // string in double quotes " " |
{"symbol", TOK_SYM}, // symbol |
{"expression", TOK_XPR}, // expression |
{"eof", TOK_EOF}, // string in double quotes " " |
{"hll", TOK_HLL} // string in double quotes " " |
// {"error", TOK_ERR} // error. illegal character or unmatched quote |
}; |
|
uint32_t line, tok, i; |
for (line = 1; line < lines.numEntries(); line++) { |
if (line < lines.numEntries() && lines[line].numTokens) { |
printf("\nline %2i type %X", lines[line].linenum, lines[line].type); |
|
for (tok = lines[line].firstToken; tok < lines[line].firstToken + lines[line].numTokens; tok++) { |
// find name for token type |
const char * nm = 0; |
for (i = 0; i < TableSize(tokenNames); i++) { |
if (tokenNames[i].id == tokens[tok].type) nm = tokenNames[i].name; |
} |
if (nm) printf("\n%4X %8s: ", tok, nm); // Token type |
else printf("type %4X", tokens[tok].type); |
|
switch (tokens[tok].type) { |
case TOK_DIR: case TOK_ATT: case TOK_TYP: case TOK_OPT: case TOK_HLL: |
nm = 0; |
for (i = 0; i < TableSize(keywordsList); i++) { |
if (keywordsList[i].id == tokens[tok].id) nm = keywordsList[i].name; |
} |
if (nm) printf("%s", nm); |
else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength); |
break; |
case TOK_OPR: |
nm = 0; |
for (i = 0; i < TableSize(operatorsList); i++) { |
if (operatorsList[i].id == tokens[tok].id) nm = operatorsList[i].name; |
} |
if (nm) printf("%s", nm); |
else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength); |
break; |
case TOK_REG: //registerNames |
nm = 0; |
for (i = 0; i < TableSize(registerNames); i++) { |
if (registerNames[i].id == tokens[tok].id) nm = registerNames[i].name; |
} |
if (nm) printf("%s%i", nm, tokens[tok].id & 0xFF); |
else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength); |
break; |
case TOK_NAM: case TOK_NUM: case TOK_FLT: case TOK_LAB: case TOK_VAR: case TOK_SEC: |
case TOK_CHA: case TOK_STR: case TOK_INS: case TOK_SYM: |
for (i = 0; i < tokens[tok].stringLength; i++) { |
printf("%c", buf()[tokens[tok].pos + i]); |
} |
printf(" id %X, value %X", tokens[tok].id, tokens[tok].value.w); |
break; |
case TOK_XPR: |
default: |
printf("0x%X 0x%X 0x%X %2i", tokens[tok].id, tokens[tok].value.w, tokens[tok].pos, tokens[tok].stringLength); |
break; |
} |
} |
} |
} |
} |
|
void CAssembler::initializeWordLists() { |
// Operators list |
operators.pushBig(operatorsList, sizeof(operatorsList)); |
operators.sort(); |
// Keywords list |
keywords.pushBig(keywordsList,sizeof(keywordsList)); |
keywords.sort(); |
// Read instruction list from file |
CCSVFile instructionListFile; |
instructionListFile.read(cmd.getFilename(cmd.instructionListFile), CMDL_FILE_SEARCH_PATH); // Filename of list of instructions |
instructionListFile.parse(); // Read and interpret instruction list file |
instructionlist << instructionListFile.instructionlist; // Transfer instruction list to my own container |
instructionlistId.copy(instructionlist); // copy instruction list |
instructionlistNm.copy(instructionlist); // copy instruction list |
// sort lists by different criteria, defined by the different operators: |
// operator < (SInstruction const & a, SInstruction const & b) |
// operator < (SInstruction3 const & a, SInstruction3 const & b) |
SInstruction3 nullInstruction; // empty record |
zeroAllMembers(nullInstruction); |
instructionlistId.push(nullInstruction); // Empty record will go to position 0 to avoid an instruction with index 0 |
instructionlistNm.sort(); // Sort instructionlist by name |
instructionlistId.sort(); // Sort instructionlistId by id |
} |