URL
https://opencores.org/ocsvn/forwardcom/forwardcom/trunk
Subversion Repositories forwardcom
[/] [forwardcom/] [bintools/] [assem1.cpp] - Rev 77
Go to most recent revision | Compare with Previous | Blame | View Log
/**************************** assem1.cpp ******************************** * Author: Agner Fog * Date created: 2017-04-17 * Last modified: 2021-07-10 * Version: 1.11 * Project: Binary tools for ForwardCom instruction set * Module: assem.cpp * Description: * Module for assembling ForwardCom .as files. Contains: * pass1(): Split input file into lines and tokens. Remove comments. Find symbol definitions * pass2(): Handle meta code. Classify lines. Identify symbol names, sections, functions * * Copyright 2017-2021 GNU General Public License http://www.gnu.org/licenses ******************************************************************************/ #include "stdafx.h" const char * allowedInNames = "_$@"; // characters allowed in symbol names (don't allow characters that are used as operators) const bool allowUTF8 = true; // UTF-8 characters allowed in symbol names const bool allowNestedComments = true; // allow nested comments: /* /* */ */ // Operator for sorting symbols by name. Used by assembler // List of operators SOperator operatorsList[] = { // name, id, priority {"(", '(', 1}, {")", ')', 1}, {"[", '[', 1}, {"]", ']', 1}, {"{", '{', 1}, {"}", '}', 1}, {"'", 39, 1}, {"\"", '"', 1}, // " {"/*", 'c', 1}, // comment begin {"*/", 'd', 1}, // comment end {".", '.', 2}, {"!", '!', 3}, {"~", '~', 3}, {"++", '+'+D2, 3}, {"--", '-'+D2, 3}, {"*", '*', 4}, {"/", '/', 4}, {"%", '%', 4}, {"+", '+', 5}, {"-", '-', 5}, {"<<", '<'+D2, 6}, {">>", '>'+D2, 6}, // signed shift right {">>>", '>'+D3, 6}, // unsigned shift right {"<", '<', 7}, {"<=", '<'+EQ, 7}, {">", '>', 7}, {">=", '>'+EQ, 7}, {"==", '='+D2, 8}, {"!=", '!'+EQ, 8}, {"&", '&', 9}, {"^", '^', 10}, {"|", '|', 11}, {"&&", '&'+D2, 12}, {"||", '|'+D2, 13}, {"^^", '^'+D2, 13}, // boolean XOR. non-standard operator {"?", '?', 14}, {":", ':', 14}, {"=", '=', 15}, {"+=", '+'+EQ, 15}, {"-=", '-'+EQ, 15}, {"*=", '*'+EQ, 15}, {"/=", '/'+EQ, 15}, {"%=", '%'+EQ, 15}, {"<<=", '<'+D2+EQ, 15}, {">>=", '>'+D2+EQ, 15}, // signed shift right {">>>=", '>'+D3+EQ, 15}, // unsigned shift right {"&=", '&'+EQ, 15}, {"^=", '^'+EQ, 15}, {"|=", '|'+EQ, 15}, {",", ',', 16}, {"//", '/'+D2, 20}, // comment, end of line {";", ';', 20} // comment, end of line }; // List of keywords SKeyword keywordsList[] = { // name, id {"section", DIR_SECTION}, // TOK_DIR: section, functions directives {"function", DIR_FUNCTION}, {"end", DIR_END}, {"public", DIR_PUBLIC}, {"extern", DIR_EXTERN}, // TOK_ATT: attributes of sections, functions and symbols {"read", ATT_READ}, // readable section {"write", ATT_WRITE}, // writeable section {"execute", ATT_EXEC}, // executable section {"align", ATT_ALIGN}, // align section, data, or code {"weak", ATT_WEAK}, // weak linking {"reguse", ATT_REGUSE}, // register use {"constant", ATT_CONSTANT}, // external constant {"uninitialized", ATT_UNINIT}, // uninitialized section (BSS) {"communal", ATT_COMDAT}, // communal section. duplicates and unreferenced sections are removed {"exception_hand", ATT_EXCEPTION}, // exception handler and stack unroll information {"event_hand", ATT_EVENT}, // event handler list, including constructors and destructors {"debug_info", ATT_DEBUG}, // debug information {"comment_info", ATT_COMMENT}, // comments, including copyright and required libraries // TOK_TYP: type names {"int8", TYP_INT8}, {"uint8", TYP_INT8+TYP_UNS}, {"int16", TYP_INT16}, {"uint16", TYP_INT16+TYP_UNS}, {"int32", TYP_INT32}, {"uint32", TYP_INT32+TYP_UNS}, {"int64", TYP_INT64}, {"uint64", TYP_INT64+TYP_UNS}, {"int128", TYP_INT128}, {"uint128", TYP_INT128+TYP_UNS}, {"int", TYP_INT32}, {"uint", TYP_INT32+TYP_UNS}, {"float", TYP_FLOAT32}, {"double", TYP_FLOAT64}, {"float16", TYP_FLOAT16}, {"float32", TYP_FLOAT32}, {"float64", TYP_FLOAT64}, {"float128", TYP_FLOAT128}, {"string", TYP_STRING}, // TOK_OPT: options of instructions and operands {"mask", OPT_MASK}, {"fallback", OPT_FALLBACK}, {"length", OPT_LENGTH}, {"broadcast", OPT_BROADCAST}, {"limit", OPT_LIMIT}, {"scalar", OPT_SCALAR}, {"options", OPT_OPTIONS}, {"option", OPT_OPTIONS}, // alias // TOK_REG: register names {"numcontr", REG_NUMCONTR}, {"threadp", REG_THREADP}, {"datap", REG_DATAP}, {"ip", REG_IP}, {"sp", REG_SP}, // TOK_HLL: high level language keywords {"if", HLL_IF}, {"else", HLL_ELSE}, {"switch", HLL_SWITCH}, // switch (r1, scratch registers) { case 0: break; ...} {"case", HLL_CASE}, {"for", HLL_FOR}, // for (r1 = 1; r1 <= r2; r1++) {} {"in", HLL_IN}, // for (float v1 in [r1-r2], nocheck) // (r2 counts down) {"while", HLL_WHILE}, // while (r1 > 0) {} {"do", HLL_DO}, // do {} while () {"break", HLL_BREAK}, // break out of switch or loop {"continue", HLL_CONTINUE}, // continue loop {"true", HLL_TRUE}, // constant = 1 {"false", HLL_FALSE}, // constant = 0 // temporary additions. will be replaced by macros later: {"push", HLL_PUSH}, // push registers {"pop", HLL_POP}, // pop registers }; // List of register name prefixes SKeyword registerNames[] = { // name, id {"r", REG_R}, {"v", REG_V}, {"spec", REG_SPEC}, {"capab", REG_CAPAB}, {"perf", REG_PERF}, {"sys", REG_SYS} }; CAssembler::CAssembler() { // Constructor // Reserve size for buffers const int estimatedLineLength = 16; const int estimatedTokensPerLine = 10; int estimatedNumLines = dataSize() / estimatedLineLength; lines.setNum(estimatedNumLines); tokens.setNum(estimatedNumLines * estimatedTokensPerLine); errors.setOwner(this); // Initialize and sort lists initializeWordLists(); ElfFwcShdr nullHeader; // make first section header empty zeroAllMembers(nullHeader); sectionHeaders.push(nullHeader); } void CAssembler::go() { // Write feedback text to console feedBackText1(); // Set default options if (cmd.codeSizeOption == 0) cmd.codeSizeOption = 1 << 24; if (cmd.dataSizeOption == 0) cmd.dataSizeOption = 1 << 15; // initialize options code_size = cmd.codeSizeOption; data_size = cmd.dataSizeOption; do { // This loop is repeated only once. Just convenient to break out of in case of errors pass = 1; // Split input file into lines and tokens. Find symbol definitions pass1(); if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;} pass = 2; // A. Handle metaprogramming directives // B. Classify lines // C. Identify symbol names, sections, labels, functions pass2(); if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;} //showTokens(); //!! for debugging only //showSymbols(); //!! for debugging only pass = 3; // Interpret lines. Generate code and data pass3(); if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;} pass = 4; // Resolve internal cross references, optimize forward references pass4(); if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;} pass = 5; // Make binary file pass5(); if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;} } while (false); // output any error messages errors.outputErrors(); if (errors.numErrors()) cmd.mainReturnValue = 1; // make sure makefile process stops on error // output object file outFile.write(cmd.getFilename(cmd.outputFile)); } // Character can be the start of a symbol name inline bool nameChar1(char c) { return ((c | 0x20) >= 'a' && (c | 0x20) <= 'z') || ((c & 0x80) && allowUTF8) || strchr(allowedInNames, c); } // Character can be the part of a symbol name inline bool nameChar2(char c) { return nameChar1(c) || (c >= '0' && c <= '9'); } // check if string is a number. Can be decimal, binary, octal, hexadecimal, or floating point // Returns the length of the part of the string that belongs to the number uint32_t isNumber(const char * s, int maxlen, bool * isFloat) { bool is_float = false; char c = s[0]; if ((c < '0' || c > '9') && (c != '.' || s[1] < '0' || s[1] > '9')) return 0; int i = 0; int state = 0; // 0: begin // 1: after 0 // 2: after digits 0-9 // 3: after 0x // 4: after 0b or 0o // 5: after . // 6: after E // 7: after E09 // 8: after E+- for (i = 0; i < maxlen; i++) { c = s[i]; char cl = c | 0x20; // upper case letter if (c == '0' && state == 0) {state = 1; continue;} if (cl == 'x' && state == 1) {state = 3; continue;} if ((cl == 'b' || cl == 'o') && state == 1) {state = 4; continue;} if (c == '.' && state <= 2) {state = 5; is_float = true; continue;} if (cl == 'e' && (state <= 2 || state == 5)) {state = 6; is_float = true; continue;} if ((c == '+' || c == '-') && state == 6) {state = 8; continue;} if (c >= '0' && c <= '9') { if (state < 2) state = 2; if (state == 6) state = 7; continue; } if (cl >= 'a' && cl <= 'f' && state == 3) continue; // Anything else: stop here break; } if (isFloat) *isFloat = is_float; // return isFloat return i; // return length } // Check if string is a register name uint32_t isRegister(const char * s, uint32_t len) { uint32_t i, j, nl, num; for (i = 0; i < TableSize(registerNames); i++) { if ((s[0] | 0x20) == registerNames[i].name[0]) { // first character match, lower case nl = (uint32_t)strlen(registerNames[i].name); // length of register name prefix if (len < nl + 1 || len > nl + 2) continue; // continue search if length wrong for (j = 0; j < nl; j++) { // check if each character matches if ((s[j] | 0x20) != registerNames[i].name[j]) { // lower case compare j = 0xFFFFFFFF; break; } } if (j == 0xFFFFFFFF) continue; // no match if (s[j] < '0' || s[j] > '9') continue; // not a number num = s[j] - '0'; // get number, first digit if (len == nl + 2) { // two digit number if (s[j+1] < '0' || s[j+1] > '9') continue;// second digit not a number num = num * 10 + (s[j+1] - '0'); } if (num >= 32) continue; // number too high return num + registerNames[i].id; // everyting matches } } return 0; // not found. return 0 } // write feedback text on stdout void CAssembler::feedBackText1() { if (cmd.verbose) { // Tell what we are doing: printf("\nAssembling %s to %s", cmd.getFilename(cmd.inputFile), cmd.getFilename(cmd.outputFile)); } } // Split input file into lines and tokens. Handle preprocessing directives. Find symbol definitions void CAssembler::pass1() { uint32_t n = 0; // offset into assembly file uint32_t m; // end of current token int32_t i, f; // temporary int32_t comment = 0; // 0: normal, 1: inside comment to end of line, 2: inside /* */ comment uint32_t commentStart = 0; // start position of multiline comment uint32_t commentStartColumn = 0;// start column of multiline comment char c; // current character or byte SToken token = {0}; // current token SKeyword keywSearch; // record to search for keyword SOperator opSearch; // record to search for operator SInstruction instructSearch; // record to search for instruction SLine line = {0,0,0,0,0,0,0}; // line record lines.push(line); // empty records for line 0 linei = 1; // start at line 1 numSwitch = 0; // count switch statements tokens.push(token); // unused token 0 if (dataSize() >= 3 && (get<uint32_t>(0) & 0xFFFFFF) == 0xBFBBEF) { n += 3; // skip UTF-8 byte order mark } line.beginPos = n; // start of line 1 line.firstToken = tokens.numEntries(); line.file = filei; // loop through file while (n < dataSize()) { c = get<char>(n); // get character // is it space or a control character? if (uint8_t(c) <= 0x20) { if (c == ' ' || c == '\t') { // skip space and tab n++; continue; } if (c == '\r' || c == '\n') { // newline n++; if (c == '\r' && get<char>(n) == '\n') n++; // "\r\n" windows newline if (comment == 1) comment = 0; // end comment if (n <= dataSize()) { // finish current line line.numTokens = tokens.numEntries() - line.firstToken; line.linenum = linei++; if (line.numTokens) { // save line if not empty lines.push(line); } // start next line line.type = 0; line.file = filei; line.beginPos = n; line.firstToken = tokens.numEntries(); } continue; } // illegal control character token.type = TOK_ERR; line.type = LINE_ERROR; comment = 1; // ignore rest of line m = tokens.push(token); // save error token errors.report(n, 1, ERR_CONTROL_CHAR); } // prepare token of any type token.pos = n; token.stringLength = 1; token.id = 0; //token.column = n - line.beginPos; // is it a name? if (!comment && nameChar1(c)) { // start of a name m = n+1; while (m < dataSize() && nameChar2(get<char>(m))) m++; // name goes from position n to m-1. make token token.type = TOK_NAM; token.pos = n; token.stringLength = m - n; // is it a register name f = isRegister((char*)buf()+n, token.stringLength); if (f) { token.type = TOK_REG; token.id = f; } // is it a keyword? if (token.type == TOK_NAM && m-n < sizeof(keywSearch.name)) { memcpy(keywSearch.name, buf()+n, m-n); keywSearch.name[m-n] = 0; f = keywords.findFirst(keywSearch); if (f >= 0) { // keyword found token.id = keywords[f].id; token.type = keywords[f].id >> 24; if (token.id == HLL_SWITCH) numSwitch++; } } // is it an instruction? if (token.type == TOK_NAM && m-n < sizeof(instructSearch.name)) { memcpy(instructSearch.name, buf()+n, m-n); instructSearch.name[m-n] = 0; f = instructionlistNm.findFirst(instructSearch); if (f >= 0) { // instruction name found token.type = TOK_INS; token.id = instructionlistNm[f].id; } } n = m; tokens.push(token); // save token continue; } // Is it a number? if (!comment) { bool isFloat; f = isNumber((char*)buf() + n, dataSize() - n, &isFloat); if (f) { token.type = TOK_NUM + isFloat; token.id = n; // save number as string. The value is extracted later token.stringLength = f; n += f; tokens.push(token); // save token continue; } } // is it an operator? opSearch.name[0] = c; opSearch.name[1] = 0; f = operators.findFirst(opSearch); if (f >= 0) { // found single-character operator // make a greedy search for multi-character operators i = f; for (i = f+1; (uint32_t)i < operators.numEntries(); i++) { if (operators[i].name[0] != c) break; if (memcmp((char*)buf()+n, operators[i].name, strlen(operators[i].name)) == 0) f = i; } token.type = TOK_OPR; token.id = operators[f].id; token.priority = operators[f].priority; token.stringLength = (uint32_t)strlen(operators[f].name); // search for operators that need consideration here switch (token.id) { case 39: case '"': // quoted string in single or double quotes if (comment) break; // search for end of string token.type = token.id == 39 ? TOK_CHA : TOK_STR; token.pos = n + 1; m = n; while (true) { if (get<char>(m+1) == '\r' || get<char>(m+1) == '\n' || m == dataSize()) { // end of line without matching end quote. multi-line quotes not allowed token.type = TOK_ERR; errors.report(token.pos-1, 1, ERR_QUOTE_BEGIN); comment = 1; // skip rest of line break; } if (get<char>(m+1) == c && get<char>(m) != '\\') { // matching end quote not preceded by escape backslash token.stringLength = m - n; n += 2; break; } m++; } break; case '/'+D2: // "//". comment to end of line if (comment == 0) { comment = 1; } break; case 'c': // "/*" start of comment if (comment == 1) { n += token.stringLength; // skip and don't save token continue; } if (comment == 2) { // nested comment if (allowNestedComments) { comment++; } else { token.type = TOK_ERR; errors.report(n, 2, ERR_COMMENT_BEGIN); } break; } comment = 2; commentStart = n; commentStartColumn = n - line.beginPos; break; case 'd': // "*/" end of comment if (comment == 1) { n += token.stringLength; // skip and don't save token continue; } if (comment == 2) { comment = 0; n += token.stringLength; // skip and don't save token continue; } else if (comment > 2 && allowNestedComments) { comment--; n += token.stringLength; // skip and don't save token continue; } else { token.type = TOK_ERR; // unmatched end comment errors.report(n, 2, ERR_COMMENT_END); comment = 1; } break; case ';': // semicolon starts a new pseudo-line if (comment) break; // finish current line tokens.push(token); // the ';' token is used only in for(;;) loops. should be ignored at the end of the line otherwise n += token.stringLength; line.numTokens = tokens.numEntries() - line.firstToken; line.linenum = linei; if (line.numTokens) { // save line if not empty lines.push(line); } // start next line line.beginPos = n; line.firstToken = tokens.numEntries(); continue; // don't save ';' token twice case '{': case '}': if (comment) break; // put each bracket in a separate pseudo-line to ease high level language parsing // finish current line line.numTokens = tokens.numEntries() - line.firstToken; line.linenum = linei; if (line.numTokens) { // save line if not empty lines.push(line); } // start line with bracket only line.beginPos = n; line.firstToken = tokens.numEntries(); tokens.push(token); // save token n += token.stringLength; line.numTokens = 1; lines.push(line); // start line after bracket line.beginPos = n; line.firstToken = tokens.numEntries(); continue; } if (comment == 0 && token.type != TOK_ERR) { // save token unless we are inside a comment or an error has occurred tokens.push(token); // save token } n += token.stringLength; continue; } if (comment) { // we are inside a comment. Continue search only for end of line or end of comment n++; continue; } // none of the above. Make token for illegal character token.type = TOK_ERR; line.type = LINE_ERROR; errors.report(n, 1, ERR_ILLEGAL_CHAR); comment = 1; // ignore rest of line n++; } // finish last line // tokens.push(token); line.numTokens = tokens.numEntries() - line.firstToken; lines.push(line); // start pseudo line line.beginPos = n; line.firstToken = tokens.numEntries(); line.type = 0; // check for unmatched comment if (comment >= 2) { token.type = TOK_ERR; errors.report(commentStart, commentStartColumn, ERR_COMMENT_BEGIN); } // make EOF token in the end line.type = 0; line.beginPos = n; line.firstToken = tokens.numEntries(); line.numTokens = 1; lines.push(line); token.pos = n; token.stringLength = 0; token.type = TOK_EOF; // end of file tokens.push(token); // save eof token } void CAssembler::interpretSectionDirective() { // Interpret section directive during pass 2 or 3 // pass 2: identify section name and type, and give it a number // pass 3: make section header // to do: nested sections uint32_t tok; // token number ElfFWC_Sym2 sym; // symbol record int32_t sectionsym = 0; // index to symbol record defining current section name uint32_t state = 0; // 1: after align, 2: after '=' ElfFwcShdr sectionHeader; // section header zeroAllMembers(sym); // reset symbol zeroAllMembers(sectionHeader); // reset section header sectionHeader.sh_type = SHT_PROGBITS; // default section type sectionFlags = 0; for (tok = tokenB + 2; tok < tokenB + tokenN; tok++) { // get section attributes if (tokens[tok].type == TOK_ATT) { if (tokens[tok].id == ATT_UNINIT && state != 2) { sectionHeader.sh_type = SHT_NOBITS; // uninitialized section (BSS) sectionFlags |= SHF_READ | SHF_WRITE; } else if (tokens[tok].id == ATT_COMDAT && state != 2) { sectionHeader.sh_type = SHT_COMDAT; // communal section. duplicates and unreferenced sections are removed } else if (tokens[tok].id != ATT_ALIGN && state == 0) { sectionFlags |= tokens[tok].id & 0xFFFFFF; if (sectionFlags & SHF_EXEC) sectionFlags |= SHF_IP; // executable section must be IP based } else if (tokens[tok].id == ATT_ALIGN && state == 0) { state = 1; } else { errors.report(tokens[tok]); break; } } else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_IP && state == 0) sectionFlags |= SHF_IP; else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_DATAP && state == 0) sectionFlags |= SHF_DATAP; else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_THREADP && state == 0) sectionFlags |= SHF_THREADP; else if (tokens[tok].type == TOK_OPR && tokens[tok].id == '=' && state == 1) state = 2; else if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',' && state != 2) ; // comma, ignore else if (tokens[tok].type == TOK_NUM && state == 2) { if (pass >= 3) { // alignment value uint32_t alignm = expression(tok, 1, 0).value.w; if ((alignm & (alignm - 1)) || alignm > MAX_ALIGN) errors.reportLine(ERR_ALIGNMENT); else { sectionHeader.sh_align = bitScanReverse(alignm); } } state = 0; } else { errors.report(tokens[tok]); break; } } // find or define symbol with section name sectionsym = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength); if (sectionsym <= 0) { // symbol not previously defined. Define it now sym.st_type = STT_SECTION; sym.st_name = symbolNameBuffer.putStringN((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength); sym.st_bind = sectionFlags; sectionsym = addSymbol(sym); // save symbol with section name } else { // symbol already defined. check that it is a section name if (symbols[sectionsym].st_type != STT_SECTION) { errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED); } } sectionFlags |= SHF_ALLOC; lines[linei].type = LINE_SECTION; // line is section directive lines[linei].sectionType = sectionFlags; if (symbols[sectionsym].st_section == 0) { // new section. make section header sectionHeader.sh_name = symbols[sectionsym].st_name; if (sectionFlags & SHF_EXEC) { sectionHeader.sh_entsize = 4; if (sectionHeader.sh_align < 2) sectionHeader.sh_align = 2; sectionFlags |= SHF_IP; } else { // data section if (!(sectionFlags & (SHF_READ | SHF_WRITE))) sectionFlags |= SHF_READ | SHF_WRITE; // read or write attributes not specified, default is both if (!(sectionFlags & (SHF_IP | SHF_DATAP | SHF_THREADP))) { // address reference not specified. assume datap if writeable, ip if readonly if (sectionFlags & SHF_WRITE) sectionFlags |= SHF_DATAP; else sectionFlags |= SHF_IP; } } sectionHeader.sh_flags = sectionFlags; section = sectionHeaders.push(sectionHeader); symbols[sectionsym].st_section = section; } else { // this section is seen before section = symbols[sectionsym].st_section; if (sectionHeaders[section].sh_align < sectionHeader.sh_align) sectionHeaders[section].sh_align = sectionHeader.sh_align; if (sectionFlags && (sectionFlags & ~sectionHeaders[section].sh_flags)) errors.reportLine(ERR_SECTION_DIFFERENT_TYPE); sectionFlags = (uint32_t)sectionHeaders[section].sh_flags; if (sectionHeader.sh_align > 2) { // insert alignment code SCode code; zeroAllMembers(code); code.instruction = II_ALIGN; code.value.u = (int64_t)1 << sectionHeader.sh_align; code.sizeUnknown = 0x80; code.section = section; codeBuffer.push(code); } } } void CAssembler::interpretFunctionDirective() { // Interpret function directive during pass 2 uint32_t tok; // token number ElfFWC_Sym2 sym; // symbol record zeroAllMembers(sym); // reset symbol int32_t symi; symi = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength); if (symi > 0) { if (pass == 2) errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined } else { // define symbol sym.st_type = STT_FUNC; sym.st_other = STV_IP; sym.st_name = symbolNameBuffer.putStringN((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength); sym.st_bind = 0; sym.st_section = section; for (tok = tokenB + 2; tok < tokenB + tokenN; tok++) { // get function attributes if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') continue; if (tokens[tok].id == ATT_WEAK) sym.st_bind |= STB_WEAK; if (tokens[tok].id == ATT_REGUSE) { if (tokens[tok+1].id == '=' && tokens[tok+2].type == TOK_NUM) { tok += 2; sym.st_reguse1 = expression(tok, 1, 0).value.w; sym.st_other |= STV_REGUSE; if (tokens[tok+1].id == ',' && tokens[tok+2].type == TOK_NUM) { tok += 2; sym.st_reguse2 = expression(tok, 1, 0).value.w; } } } else if (tokens[tok].type == TOK_DIR && tokens[tok].id == DIR_PUBLIC) sym.st_bind |= STB_GLOBAL; else { errors.report(tokens[tok]); // unexpected token } } symi = addSymbol(sym); // save symbol with function name } lines[linei].type = LINE_FUNCTION; // line is function directive if (pass == 3 && symi) { // make a label here. The final address will be calculated in pass 4 SCode code; // current instruction code zeroAllMembers(code); // reset code structure code.label = symbols[symi].st_name; code.section = section; codeBuffer.push(code); } } void CAssembler::interpretEndDirective() { // Interpret section or function end directive during pass 2 ElfFWC_Sym2 sym; // symbol record zeroAllMembers(sym); // reset symbol int32_t symi; CTextFileBuffer tempBuffer; // temporary storage of names symi = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength); if (symi <= 0) { errors.reportLine(ERR_UNMATCHED_END); } else { if (symbols[symi].st_type == STT_SECTION) { if (symbols[symi].st_section == section) { // current section ends here section = 0; sectionFlags = 0; } else { errors.reportLine(ERR_UNMATCHED_END); } } else if (symbols[symi].st_type == STT_FUNC && pass >= 4) { symbols[symi].st_unitsize = 4; // to do: insert size! //symbols[symi].st_unitsize = ? // support function(){} syntax. prevent nested functions } } lines[linei].type = LINE_ENDDIR; // line is end directive } // Interpret line specifying options void CAssembler::interpretOptionsLine() { // Expecting a line of the type: // "options codesize = 0x10000, datasize = 1 << 20" uint32_t tok; // token number uint32_t state = 0; // 0: start, 1: after option name, 2: after equal sign, 3: after expression const char * optionname = 0; int option = 0; // 1: codesize, 2: datasize SExpression val; // value to be assigned SCode code; // instruction code containing options for (tok = tokenB + 1; tok < tokenB + tokenN; tok++) { switch (state) { case 0: // start. expect name "datasize" or "codesize" if (tokens[tok].type != TOK_NAM) { errors.report(tokens[tok]); return; // unexpected token } optionname = (char*)buf()+tokens[tok].pos; // tokens[tok].stringLength; if (strncasecmp_(optionname, "codesize", 8) == 0) option = 1; else if (strncasecmp_(optionname, "datasize", 8) == 0) option = 2; else { errors.report(tokens[tok]); return; // unexpected name } state = 1; break; case 1: // after name, expecting equal sign if (tokens[tok].type == TOK_OPR && tokens[tok].id == '=') { state = 2; } else { errors.report(tokens[tok]); return; // unexpected token } break; case 2: // expect expression val = expression(tok, tokenB + tokenN - tok, 0); // evaluate number or expression tok += val.tokens - 1; if (val.etype != XPR_INT) { errors.reportLine(ERR_MUST_BE_CONSTANT); return; } zeroAllMembers(code); // reset code structure switch (option) { case 1: // set codesize if (val.value.u == 0) code_size = cmd.codeSizeOption; else code_size = val.value.u; code.value.u = code_size; break; case 2: // set datasize if (val.value.u == 0) data_size = cmd.dataSizeOption; else data_size = val.value.u; code.value.u = data_size; break; } // This is called only in pass 3. Save this option for pass 4: code.instruction = II_OPTIONS; code.section = section; code.fitNum = option; code.sizeUnknown = 1; codeBuffer.push(code); state = 3; break; case 3: // expect comma or nothing if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') { state = 0; // start over after comma } else { errors.report(tokens[tok]); return; // unexpected token } } } } // Find symbol by index into symbolNameBuffer. The return value is an index into symbols. // Symbol indexes may change when new symbols are added to the symbols list, which is sorted by name uint32_t CAssembler::findSymbol(uint32_t namei) { ElfFWC_Sym2 sym; // temporary symbol record used for searching sym.st_name = namei; return symbols.findFirst(sym); // find symbol by name } // Find symbol by name as string. The return value is an index into symbols. // Symbol indexes may change when new symbols are added to the symbols list, which is sorted by name uint32_t CAssembler::findSymbol(const char * name, uint32_t len) { uint32_t saveSize = symbolNameBuffer.dataSize(); // save symbolNameBuffer size for later reset uint32_t namei = symbolNameBuffer.putStringN(name, len); // put name temporarily into symbolNameBuffer int32_t symi = findSymbol(namei); // find symbol by name index symbolNameBuffer.setSize(saveSize); // remove temporary name from symbolNameBuffer return symi; // return symbol index } // Add a symbol to symbols list uint32_t CAssembler::addSymbol(ElfFWC_Sym2 & sym) { int32_t f = symbols.findFirst(sym); if (f >= 0) { // error: symbol already defined return 0; } else { return symbols.addUnique(sym); } } // interpret name: options {, name: options} void CAssembler::interpretExternDirective() { uint32_t tok; // token number uint32_t nametok = 0; // last name token ElfFWC_Sym2 sym; // symbol record zeroAllMembers(sym); // reset symbol sym.st_bind = STB_GLOBAL; // Example: extern name1: int32 weak, name2: function, name3, name4: read uint32_t state = 0; // 0: after extern or comma, // 1: after name, // 2: after colon // loop through tokens on this line for (tok = tokenB + 1; tok < tokenB + tokenN; tok++) { switch (state) { case 0: // after extern or comma. expecting name if (tokens[tok].type == TOK_NAM) { // name encountered sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength); state = 1; nametok = tok; } else errors.report(tokens[tok]); break; case 1: // after name. expecting colon or comma if (tokens[tok].type == TOK_OPR) { if (tokens[tok].id == ':') { state = 2; continue; } else if (tokens[tok].id == ',') { goto COMMA; } } errors.report(tokens[tok]); break; case 2: // after colon. expecting attribute or comma or end of line if (tokens[tok].type == TOK_TYP) { // symbol size given by type token uint32_t s = tokens[tok].id & 0xF; if (s > 4) s -= 3; // float types sym.st_unitsize = uint32_t(1 << s); sym.st_unitnum = 1; } else if (tokens[tok].type == TOK_ATT || tokens[tok].type == TOK_DIR) { ATTRIBUTE: switch (tokens[tok].id) { case DIR_FUNCTION: case ATT_EXEC: // function or execute if (sym.st_type) { errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_CONFLICT_TYPE); } sym.st_type = STT_FUNC; sym.st_other = STV_IP | STV_EXEC; break; case ATT_READ: // read if (sym.st_type == 0) sym.st_other |= STV_READ; break; case ATT_WRITE: // write if (sym.st_type == STT_FUNC) { errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_CONFLICT_TYPE); } else { sym.st_type = STT_OBJECT; } break; case ATT_WEAK: // weak sym.st_bind = STB_WEAK; break; case ATT_CONSTANT: // constant sym.st_type = STT_CONSTANT; break; case ATT_REGUSE: if (tokens[tok+1].id == '=' && (tokens[tok+2].type == TOK_NUM /*|| tokens[tok+2].type == TOK_OPR)*/)) { tok += 2; sym.st_reguse1 = expression(tok, 1, 0).value.w; sym.st_other |= STV_REGUSE; if (tokens[tok+1].id == ',' && tokens[tok+2].type == TOK_NUM) { tok += 2; sym.st_reguse2 = expression(tok, 1, 0).value.w; } } break; default: // error errors.report(tokens[tok]); } } else if (tokens[tok].type == TOK_REG) { switch (tokens[tok].id) { case REG_IP: sym.st_other |= STV_IP; break; case REG_DATAP: sym.st_other |= STV_DATAP; break; case REG_THREADP: sym.st_other |= STV_THREADP; break; default: errors.report(tokens[tok]); } } else if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') { // end of definition. save symbol COMMA: if (tok < tokenB + tokenN && (tokens[tok + 1].type == TOK_ATT || tokens[tok + 1].type == TOK_DIR)) { tok++; goto ATTRIBUTE; } uint32_t symi = addSymbol(sym); // save symbol with function name if (symi == 0) { // symbol already defined errors.report(tokens[nametok].pos, tokens[nametok].stringLength, ERR_SYMBOL_DEFINED); } sym.st_name = 0; // clear record for next symbol sym.st_type = 0; sym.st_other = 0; sym.st_unitsize = 0; sym.st_unitnum = 0; sym.st_bind = STB_GLOBAL; state = 0; } else { errors.report(tokens[tok]); } break; } } if (state) { // last extern definition does not end with comma. finish it here goto COMMA; } lines[linei].type = LINE_DATADEF; // line is data definition } void CAssembler::interpretLabel(uint32_t tok) { // line begins with a name. interpret label // to do: add type if data. not string type ElfFWC_Sym2 sym; // symbol record zeroAllMembers(sym); // reset symbol // save name sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength); sym.st_section = section; // determine if code or data from section type if (sectionFlags & SHF_EXEC) { sym.st_type = STT_FUNC; sym.st_other = STV_EXEC | STV_IP; } else { sym.st_type = STT_OBJECT; sym.st_other = sectionFlags & STV_SECT_ATTR; } // look for more exact type information if (tokenN > 2) { uint32_t t = tok+2; if (tokens[t].type == TOK_TYP) { uint32_t s = tokens[t].id & 0xF; if (s > 4) s -= 3; sym.st_unitsize = uint32_t(1 << s); sym.st_unitnum = 1; if (tokenN > 3) t++; } if (tokens[t].type == TOK_NUM || tokens[t].type == TOK_FLT) { sym.st_type = STT_OBJECT; lines[linei].type = LINE_DATADEF; } else if (tokens[t].type == TOK_REG || tokens[t].type == TOK_INS || tokens[t].id == '[') { lines[linei].type = LINE_CODEDEF; sym.st_type = STT_FUNC; } } if (section) { // copy type info from section sym.st_other = sectionHeaders[section].sh_flags & STV_SECT_ATTR; } if (lines[linei].type == 0) { lines[linei].type = (sectionFlags & SHF_EXEC) ? LINE_CODEDEF : LINE_DATADEF; } uint32_t symi = addSymbol(sym); // add symbol to symbols list if (section) { // symbol address symbols[symi].st_value = sectionHeaders[section].sh_size; } tokens[tok].id = symbols[symi].st_name; // save symbol name index if (symi == 0) errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED); } // interpret assembly style variable definition: // label: type value1, value2 void CAssembler::interpretVariableDefinition1() { int state = 0; // 0: start // 1: after label // 2: after : // 3: after type or , // 4: after value uint32_t tok; // token index uint32_t type = 0; // data type uint32_t dsize = 0; // data size uint32_t dsize1; // log2(dsize) uint32_t dnum = 0; // number of data items uint32_t stringlen = 0; // length of string uint32_t symi = 0; // symbol index ElfFWC_Sym2 sym; // symbol record zeroAllMembers(sym); // reset symbol SExpression exp1; // expression when interpreting numeric expression if (section == 0) { errors.reportLine(ERR_DATA_WO_SECTION); } // loop through tokens on this line for (tok = tokenB; tok < tokenB + tokenN; tok++) { switch (state) { case 0: // start if (tokens[tok].type == TOK_NAM) { // name. make symbol sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength); sym.st_type = STT_OBJECT; symi = symbols.addUnique(sym); tokens[tok].type = TOK_SYM; // change token type tokens[tok].id = symbols[symi].st_name; // use name offset as unique identifier because symbol index can change state = 1; } else if (tokens[tok].type == TOK_SYM) { // symbol symi = findSymbol(tokens[tok].id); if (symi > 0) { if (pass == 2) errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined } state = 1; } else if (tokens[tok].type == TOK_TYP) { goto TYPE_TOKEN; } else errors.report(tokens[tok]); if (symi && section) { symbols[symi].st_value = sectionHeaders[section].sh_size; } break; case 1: // after label. expect colon if (tokens[tok].type == TOK_OPR && tokens[tok].id == ':') { state = 2; } else errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_EXPECT_COLON); break; case 2: // expect type if (tokens[tok].type == TOK_TYP) { TYPE_TOKEN: type = tokens[tok].id & 0xFF; dsize1 = type & 0xF; if (type & 0x40) dsize1 -= 3; dsize = 1 << dsize1; state = 3; if (section) { // align data uint32_t addr = (uint32_t)sectionHeaders[section].sh_size; if (sectionHeaders[section].sh_align < dsize1) sectionHeaders[section].sh_align = dsize1; // update section alignment if (addr & (dsize - 1)) { // needs to insert zeroes uint32_t addr2 = (addr + dsize - 1) & -(int32_t)dsize; sectionHeaders[section].sh_size = addr2; // update address if (symi) symbols[symi].st_value = addr2; // update symbol address if (pass >= 3) { dataBuffers[section].align((uint32_t)dsize); // put zeroes in data buffer } } } } else errors.report(tokens[tok]); break; case 3: // after type. expect value. evaluate expression exp1 = expression(tok, tokenB + tokenN - tok, pass < 3 ? 0x10 : 0); // pass 3: may contain symbols not defined yet tok += exp1.tokens - 1; if (exp1.etype & XPR_STRING) { // string expression: get size if ((type & 0x1F) != (TYP_INT8 & 0x1F)) errors.reportLine(ERR_STRING_TYPE); // string must use type int8 stringlen = exp1.sym2; // string length } else stringlen = 0; if (pass < 3) { if (section) sectionHeaders[section].sh_size += stringlen ? stringlen : dsize; // update address } else { if (section) { // save data of desired type if (exp1.etype & XPR_FLT) { // floating point number specified if ((type & 0xF0) == (TYP_INT8 & 0xF0)) { // float specified, integer expected exp1.value.i = int64_t(exp1.value.d); errors.reportLine(ERR_CONFLICT_TYPE); } } else if (exp1.etype & XPR_INT) { if (type & TYP_FLOAT) { // integer specified, float expected exp1.value.d = double(exp1.value.i); // convert to float } } int64_t value = exp1.value.i; //value of expression if (exp1.sym3) { // calculation of symbol value. add relocation if needed uint32_t size = type & 0xF; if (type & 0x40) size -= 3; size = 1 << size; //value = calculateConstantOperand(exp1, dataBuffers[section].dataSize(), size); value = calculateConstantOperand(exp1, sectionHeaders[section].sh_size, dsize); if (exp1.etype & XPR_ERROR) { errors.reportLine((uint32_t)value); // report error break; } // check for overflow bool overflow = false; switch (type & 0xFF) { case TYP_INT8 & 0xFF: overflow = value > 0x7F || value < -0x80; break; case TYP_INT16 & 0xFF: overflow = value > 0x7FFF || value < -0x8000; break; case TYP_INT32 & 0xFF: overflow = value > 0x7FFFFFFF || value < int32_t(0x80000000); break; default:; } if (overflow) errors.reportLine(ERR_OVERFLOW); // (symbol1 - symbol2) overflows } if (sectionHeaders[section].sh_type == SHT_NOBITS) { // uninitialized (BSS) section. check that value is zero, but don't store if (value != 0) errors.reportLine(ERR_NONZERO_IN_BSS); // not zero } else { // save data switch (type & 0xFF) { case TYP_INT8 & 0xFF: if (stringlen) { dataBuffers[section].push(stringBuffer.buf() + exp1.value.w, stringlen); break; } dataBuffers[section].push(&value, 1); break; case TYP_INT16 & 0xFF: dataBuffers[section].push(&value, 2); break; case TYP_INT32 & 0xFF: dataBuffers[section].push(&value, 4); break; case TYP_INT64 & 0xFF: dataBuffers[section].push(&value, 8); break; case TYP_INT128 & 0xFF: dataBuffers[section].push(&value, 8); value = value >> 63; // sign extend dataBuffers[section].push(&value, 8); break; case TYP_FLOAT16 & 0xFF: // half precision exp1.value.w = double2half(exp1.value.d); dataBuffers[section].push(&exp1.value.w, 2); break; case TYP_FLOAT32 & 0xFF: { // single precision float val = float(exp1.value.d); dataBuffers[section].push(&val, 4); } break; case TYP_FLOAT64 & 0xFF: // double precision dataBuffers[section].push(&exp1.value.d, 8); break; } } sectionHeaders[section].sh_size += stringlen ? stringlen : dsize; // update address } } if (!(exp1.etype & (XPR_IMMEDIATE | XPR_STRING | XPR_SYM1 | XPR_UNRESOLV)) || (exp1.etype & (XPR_REG|XPR_OPTION|XPR_MEM|XPR_ERROR))) errors.report(tokens[tok]); if (stringlen) dnum += stringlen; else dnum += 1; state = 4; break; case 4: // after value. expect comma or end of line if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') { state = 3; } else errors.report(tokens[tok]); break; } if (lineError) return; } if (state != 4 && state != 2) errors.report(tokens[tok-1]); if (symi) { // save size symbols[symi].st_unitsize = dsize; symbols[symi].st_unitnum = dnum; symbols[symi].st_section = section; if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) symbols[symi].st_other |= STV_FLOAT; if (section) { // copy information from section symbols[symi].st_other |= sectionHeaders[section].sh_flags & STV_SECT_ATTR; } } } // interpret C style variable definition: // type name1 = value1, name2[num] = {value, value, ..} void CAssembler::interpretVariableDefinition2() { int state = 0; // 0: start // 1: after type or comma // 2: after name // 3: after [ // 4: after [number // 5: after = // 6: after = number // 7: after { // 8: after {number uint32_t tok; // token index uint32_t dsize = 0; // data element size uint32_t dsize1 = 0; // data element size = 1 << dsize1 uint32_t type = 0; // data type uint32_t arrayNum1 = 1; // number of elements indicated in [] uint32_t arrayNum2 = 0; // number of elements in {} list uint32_t stringlen = 0; // length of string uint32_t symi = 0; // symbol index ElfFWC_Sym2 sym; // symbol record zeroAllMembers(sym); // reset symbol SExpression exp1; // expression when interpreting numeric expression if (section == 0) { errors.reportLine(ERR_DATA_WO_SECTION); } // loop through tokens on this line for (tok = tokenB; tok < tokenB + tokenN; tok++) { switch (state) { case 0: // this is a type token type = tokens[tok].id & 0xFF; dsize1 = tokens[tok].id & 0xF; if ((type & 0x40) > 3) dsize1 -= 3; dsize = 1 << dsize1; state = 1; if (section) { // align data uint32_t addr = (uint32_t)sectionHeaders[section].sh_size; if (addr & (dsize - 1)) { // needs to insert zeroes uint32_t addr2 = (addr + dsize - 1) & -(int32_t)dsize; // calculate aligned address sectionHeaders[section].sh_size = addr2; // update address if (pass >= 3) { dataBuffers[section].align(dsize); // put zeroes in data buffer } } if (sectionHeaders[section].sh_align < dsize1) sectionHeaders[section].sh_align = dsize1; // update section alignment } break; case 1: // expecting name token. save name if (tokens[tok].type == TOK_NAM) { // name. make symbol sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength); symi = addSymbol(sym); if (symi == 0 && pass == 2) { errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED); break; } symbols[symi].st_type = (sectionFlags & SHF_EXEC) ? STT_FUNC : STT_OBJECT; tokens[tok].type = TOK_SYM; // change token type tokens[tok].id = symbols[symi].st_name; // use name offset as unique identifier because symbol index can change state = 2; } else if (tokens[tok].type == TOK_SYM) { // symbol symi = findSymbol(tokens[tok].id); if (symi > 0 && pass == 2) errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined state = 2; } else { errors.report(tokens[tok]); } //nametok = tok; symbols[symi].st_unitsize = dsize; symbols[symi].st_unitnum = 0; if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) symbols[symi].st_other |= STV_FLOAT; if (section) { // copy information from section symbols[symi].st_value = sectionHeaders[section].sh_size; symbols[symi].st_other |= sectionHeaders[section].sh_flags & STV_SECT_ATTR; } break; case 2: // after name. expect , = [ eol if (tokens[tok].type != TOK_OPR) { errors.report(tokens[tok]); break; } switch (tokens[tok].id) { case ',': // finish this symbol definition COMMA: if (arrayNum2 > arrayNum1) { // check if the two array sizes match if (arrayNum1 > 1) { errors.report(tokens[tok-1].pos, tokens[tok-1].stringLength, ERR_CONFLICT_ARRAYSZ); } else arrayNum1 = arrayNum2; } symbols[symi].st_unitsize = dsize; symbols[symi].st_unitnum = arrayNum1; symbols[symi].st_reguse1 = linei; symbols[symi].st_section = section; if (arrayNum1 > arrayNum2 && section) { // unspecified elements are zero. calculate extra size uint32_t asize = (arrayNum1 - arrayNum2) * dsize; sectionHeaders[section].sh_size += asize; if (pass >= 3 && sectionHeaders[section].sh_type != SHT_NOBITS) { // store any unspecified elements as zero uint64_t zero = 0; while (asize > 8) { dataBuffers[section].push(&zero, 8); asize -= 8; } while (asize > 0) { dataBuffers[section].push(&zero, 1); asize -= 1; } } } // get ready for next symbol zeroAllMembers(sym); arrayNum1 = 1; arrayNum2 = 0; if (state == 99) return; // finished line state = 1; break; case '=': state = 5; break; case '[': state = 3; break; default: errors.report(tokens[tok]); } break; case 3: // after [ . expect number or ] if (tokens[tok].id == ']') { state = 2; break; } if (arrayNum1 > 1) { errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_MULTIDIMENSIONAL); break; // error. multidimensional array not supported } // evaluate numeric expression inside []. // it may contain complex expressions that can only be evaluated later, but // this will not generate an error message here exp1 = expression(tok, tokenB + tokenN - tok, 0x10); if (lineError) return; tok += exp1.tokens -1; if (exp1.etype == 0) errors.report(tokens[tok]); if ((exp1.etype & ~XPR_IMMEDIATE) == 0) { arrayNum1 = exp1.value.w; } state = 4; break; case 4: // after [number. expect ] if (tokens[tok].id != ']') { errors.report(tokens[tok]); break; } state = 2; break; case 5: // after =. expect number or {numbers} if (tokens[tok].id == '{') state = 7; else { state = 6; goto SAVE_VALUE; // interpret value and save it } break; case 6: // after = number. expect comma or eol if (tokens[tok].id != ',') { errors.report(tokens[tok]); break; } goto COMMA; case 7: // after {. expect number list state = 8; SAVE_VALUE: arrayNum2++; if (pass < 3) { // may contain symbols not defined yet. just pass expression and count tokens exp1 = expression(tok, tokenB + tokenN - tok, 0x10); tok += exp1.tokens - 1; if (lineError) return; } else { // pass 5. evaluate expression and save value exp1 = expression(tok, tokenB + tokenN - tok, 0); tok += exp1.tokens - 1; if (lineError) return; if ((exp1.etype & XPR_SYM1) && exp1.sym3 && pass > 3) { // calculation of symbol value. add relocation if needed exp1.value.i = calculateConstantOperand(exp1, sectionHeaders[section].sh_size, dsize); if (exp1.etype & XPR_ERROR) { errors.reportLine((uint32_t)(exp1.value.i)); // report error break; } // check for overflow bool overflow = false; switch (type & 0xFF) { case TYP_INT8 & 0xFF: overflow = exp1.value.i > 0x7F || exp1.value.i < -0x80; break; case TYP_INT16 & 0xFF: overflow = exp1.value.i > 0x7FFF || exp1.value.i < -0x8000; break; case TYP_INT32 & 0xFF: overflow = exp1.value.i > 0x7FFFFFFF || exp1.value.i < int32_t(0x80000000); break; default:; } if (overflow) errors.reportLine(ERR_OVERFLOW); // (symbol1 - symbol2) overflows } } if (!(exp1.etype & (XPR_IMMEDIATE | XPR_STRING | XPR_UNRESOLV | XPR_SYM1)) || (exp1.etype & (XPR_REG|XPR_OPTION|XPR_MEM|XPR_ERROR))) { errors.report(tokens[tok]); } if (section && section < dataBuffers.numEntries() && pass >= 3) { // save data of desired type if ((exp1.etype & XPR_IMMEDIATE) == XPR_FLT) { // floating point number specified if ((type & 0xF0) == (TYP_INT8 & 0xF0)) { // float specified, integer expected exp1.value.i = int64_t(exp1.value.d); errors.reportLine(ERR_CONFLICT_TYPE); } } else if ((exp1.etype & XPR_IMMEDIATE) == XPR_INT) { if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) { // integer specified, float expected exp1.value.d = double(exp1.value.i); // convert to float } } else if (exp1.etype & XPR_STRING) { // string expression: get size if ((type & 0x1F) != (TYP_INT8 & 0x1F)) errors.reportLine(ERR_STRING_TYPE); // string must use type int8 stringlen = exp1.sym2; // string length } else stringlen = 0; if (sectionHeaders[section].sh_type == SHT_NOBITS) { // uninitialized (BSS) section. check that value is zero, but don't store if (exp1.value.i != 0) errors.reportLine(ERR_NONZERO_IN_BSS); // not zero } else { // save data switch (type & 0xFF) { case TYP_INT8 & 0xFF: if (stringlen) { dataBuffers[section].push(stringBuffer.buf() + exp1.value.w, stringlen); break; } dataBuffers[section].push(&exp1.value.u, 1); break; case TYP_INT16 & 0xFF: dataBuffers[section].push(&exp1.value.u, 2); break; case TYP_INT32 & 0xFF: dataBuffers[section].push(&exp1.value.u, 4); break; case TYP_INT64 & 0xFF: dataBuffers[section].push(&exp1.value.u, 8); break; case TYP_INT128 & 0xFF: dataBuffers[section].push(&exp1.value.u, 8); exp1.value.i = exp1.value.i >> 63; // sign extend dataBuffers[section].push(&exp1.value.u, 8); break; case TYP_FLOAT16 & 0xFF: // half precision exp1.value.w = double2half(exp1.value.d); dataBuffers[section].push(&exp1.value.w, 2); break; case TYP_FLOAT32 & 0xFF: { // single precision float val = float(exp1.value.d); dataBuffers[section].push(&val, 4); } break; case TYP_FLOAT64 & 0xFF: // double precision dataBuffers[section].push(&exp1.value.d, 8); break; } } } sectionHeaders[section].sh_size += stringlen ? stringlen : dsize; // update address break; case 8: // after {number. expect comma or } if (tokens[tok].id == ',') state = 7; else if (tokens[tok].id == '}') state = 6; else { errors.report(tokens[tok]); break; } } if (tok + 1 == tokenB + tokenN && (state == 5 || state >= 7) && linei + 1 < lines.numEntries()) { // no more tokens. statement with {} can span multiple lines if (state == 5) { // after '='. expect next line to be '{' uint32_t tokNext = lines[linei+1].firstToken; if (tokens[tokNext].type != TOK_OPR || tokens[tokNext].id != '{') break; // anything else: break out of loop and get error message } // append next line lines[linei].type = LINE_DATADEF; linei++; tokenN += lines[linei].numTokens; } } // no more tokens if (state == 2 || state == 6) { // finish this definition lines[linei].type = LINE_DATADEF; state = 99; goto COMMA; } errors.report(tokens[tok-1].pos, tokens[tok-1].stringLength, ERR_UNFINISHED_VAR); } // check if line is code or data void CAssembler::determineLineType() { uint32_t tok; // current token uint32_t elements = 0; // detect type and constant tokens if (tokens[tokenB].type == TOK_OPT) { lines[linei].type = LINE_OPTIONS; return; } // loop through tokens on this line for (tok = tokenB; tok < tokenB + tokenN; tok++) { if (tokens[tok].type == TOK_REG || tokens[tok].type == TOK_INS || tokens[tok].type == TOK_XPR || tokens[tok].type == TOK_HLL) { lines[linei].type = LINE_CODEDEF; return; // register or instruction found. must be code } if (tokens[tok].type == TOK_TYP) elements |= 1; if (tokens[tok].type == TOK_NUM || tokens[tok].type == TOK_FLT || tokens[tok].type == TOK_CHA || tokens[tok].type == TOK_STR) elements |= 2; } if (elements == 3) lines[linei].type = LINE_DATADEF; else if (tokens[tokenB].type == TOK_ATT && tokens[tokenB].id == ATT_ALIGN) { // align directive lines[linei].type = (sectionFlags & SHF_EXEC) ? LINE_CODEDEF : LINE_DATADEF; } else if (tokens[tokenB].type == TOK_EOF) lines[linei].type = 0; // end of file else if (tokenN == 1 && tokens[tokenB].type == TOK_OPR && linei > 1) { // {} bracket. same type as previous line lines[linei].type = lines[linei-1].type; } else if (tokens[tokenB].type == TOK_OPR && tokens[tokenB].id == '%') { // metaprogramming code lines[linei].type = LINE_METADEF; } else if (linei > 1) { // undetermined. This may occur in for(;;) clause. Use same type as previous line lines[linei].type = lines[linei-1].type; } else { // error. cannot determine errors.report(tokens[tokenB]); lines[linei].type = LINE_ERROR; } } // interpret data or code alignment directive void CAssembler::interpretAlign() { if (section) { uint32_t addr = (uint32_t)sectionHeaders[section].sh_size; SExpression exp1 = expression(tokenB+1, tokenN - 1, pass < 3 ? 0x10 : 0); if (exp1.tokens < tokenN - 1) {errors.report(tokens[tokenB+1+exp1.tokens]); return;} if ((exp1.etype & XPR_IMMEDIATE) != XPR_INT || (exp1.etype & (XPR_STRING | XPR_REG | XPR_OP | XPR_MEM | XPR_OPTION))) { errors.report(tokens[tokenB+1]); return; } uint64_t alignm = exp1.value.u; if ((alignm & (alignm - 1)) || alignm > MAX_ALIGN) {errors.reportLine(ERR_ALIGNMENT); return;} uint32_t log2ali = bitScanReverse(alignm); if (sectionHeaders[section].sh_align < log2ali) { sectionHeaders[section].sh_align = log2ali; // make sure section alignment is not less } if (addr & ((uint32_t)alignm - 1)) { // needs to insert zeroes uint32_t addr2 = (addr + (uint32_t)alignm - 1) & -(int32_t)alignm; sectionHeaders[section].sh_size = addr2; // update address if (pass >= 3) { dataBuffers[section].align((uint32_t)alignm); // put zeroes in data buffer } } } } // Pass 3 does three things. // A. Handle metaprogramming directives // B. Classify lines // C. Identify symbol names, sections, labels, functions // These must be done in parallel because metaprogramming directives can refer to previously // defined symbols, and data/code definitions can involve metaprogramming variables and macros void CAssembler::pass2() { ElfFWC_Sym2 sym; // symbol record zeroAllMembers(sym); // reset symbol symbols.push(sym); // symbol record 0 is empty symbolNameBuffer.put((char)0); // put dummy zero to avoid zero offset at next string sectionFlags = 0; section = 0; // lines loop for (linei = 1; linei < lines.numEntries(); linei++) { lineError = 0; tokenB = lines[linei].firstToken; // first token in line tokenN = lines[linei].numTokens; // number of tokens in line if (tokenN == 0) continue; replaceKnownNames(); // replace previously defined names by symbol references // check if line begins with '%' if (tokens[tokenB].type == TOK_OPR && tokens[tokenB].id == '%') { // metaprogramming code lines[linei].type = LINE_METADEF; interpretMetaDefinition(); continue; } // classify other lines lines[linei].sectionType = sectionFlags; // line is section directive if (sectionFlags & ATT_EXEC) lines[linei].type = LINE_CODEDEF; else if (sectionFlags & ((ATT_READ | ATT_WRITE))) lines[linei].type = LINE_DATADEF; if (tokenN > 1) { // search for section, function and symbol definitions // lines with a single token cannot legally define a symbol name if ((tokens[tokenB].type == TOK_NAM || tokens[tokenB].type == TOK_SYM) && tokens[tokenB+1].type == TOK_DIR) { switch (tokens[tokenB + 1].id) { case DIR_SECTION: // section starts here interpretSectionDirective(); break; case DIR_FUNCTION: // function starts here interpretFunctionDirective(); break; case DIR_END: // section or function end interpretEndDirective(); break; default: errors.report(tokens[tokenB + 1]); } } else if (tokens[tokenB].id == DIR_EXTERN) { // extern symbols interpretExternDirective(); } else if (tokens[tokenB].id == DIR_PUBLIC) { // the interpretation of public symbol declarations is postponed to pass 4 after all // symbols have been defined and got their final value lines[linei].type = LINE_PUBLICDEF; } else if (tokens[tokenB].type == TOK_NAM && tokens[tokenB+1].id == ':') { interpretLabel(tokenB); if (lines[linei].type == LINE_DATADEF) interpretVariableDefinition1(); } else if (tokens[tokenB].type == TOK_TYP && (tokens[tokenB+1].type == TOK_NAM || tokens[tokenB+1].type == TOK_SYM)) { interpretVariableDefinition2(); } else if (tokens[tokenB].type == TOK_ATT && tokens[tokenB].id == ATT_ALIGN) { interpretAlign(); } else if (tokens[tokenB].type == TOK_SYM && tokens[tokenB+1].id == ':' && pass == 2) { errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined } else { determineLineType(); // check if code or data if (lines[linei].type == LINE_DATADEF) interpretVariableDefinition1(); } } else { determineLineType(); // check if code or data (can only be code) } } // loop through lines again to replace names that are forward references to symbols defined during pass 2 for (linei = 1; linei < lines.numEntries(); linei++) { tokenB = lines[linei].firstToken; // first token in line tokenN = lines[linei].numTokens; // number of tokens in line replaceKnownNames(); // replace previously defined names by symbol references } } // Show all symbols. For debugging only void CAssembler::showSymbols() { uint32_t symi; ElfFWC_Sym2 sym; printf("\n\nSymbol: name, section, addr, type, size, binding"); for (symi = 1; symi < symbols.numEntries(); symi++) { sym = symbols[symi]; printf("\n%3i: %10s, %7i, %4X", symi, symbolNameBuffer.buf() + sym.st_name, sym.st_section, (uint32_t)sym.st_value); if (sym.st_type == STT_CONSTANT || sym.st_type == STT_VARIABLE) { if (sym.st_other & STV_FLOAT) { // floating point constant union { uint64_t i; double d; } val; val.i = sym.st_value; printf(" = %G", val.d); } else if (sym.st_other & STV_STRING) { // string printf(" = %s", stringBuffer.getString((uint32_t)sym.st_value)); } else { // print 64 bit integer constant printf(" = 0x"); if (uint64_t(sym.st_value) >> 32) { printf("%X%08X", uint32_t(sym.st_value >> 32), uint32_t(sym.st_value)); } else { printf("%X", uint32_t(sym.st_value)); } // this method causes warnings: // printf(((sizeof(long int) > 4) ? " = 0x%lx" : " = 0x%llx"), sym.st_value); } } else { printf(" %5X, %X*%X, %7X", // other type sym.st_type, sym.st_unitsize, sym.st_unitnum, sym.st_bind); } } } // Show all tokens. For debugging only void CAssembler::showTokens() { SKeyword const tokenNames[] = { {"name", TOK_NAM}, // unidentified name {"direc", TOK_DIR}, // section or function directive {"attrib", TOK_ATT}, // section or function attribute {"label", TOK_LAB}, // code label or function name {"datalb", TOK_VAR}, // data label {"secnm", TOK_SEC}, // section name {"type", TOK_TYP}, // type name {"reg", TOK_REG}, // register name {"instr", TOK_INS}, // instruction name {"oper", TOK_OPR}, // operator {"option", TOK_OPT}, // operator {"num", TOK_NUM}, // number {"float", TOK_FLT}, // floating point number {"char", TOK_CHA}, // character or string in single quotes ' ' {"string", TOK_STR}, // string in double quotes " " {"symbol", TOK_SYM}, // symbol {"expression", TOK_XPR}, // expression {"eof", TOK_EOF}, // string in double quotes " " {"hll", TOK_HLL} // string in double quotes " " // {"error", TOK_ERR} // error. illegal character or unmatched quote }; uint32_t line, tok, i; for (line = 1; line < lines.numEntries(); line++) { if (line < lines.numEntries() && lines[line].numTokens) { printf("\nline %2i type %X", lines[line].linenum, lines[line].type); for (tok = lines[line].firstToken; tok < lines[line].firstToken + lines[line].numTokens; tok++) { // find name for token type const char * nm = 0; for (i = 0; i < TableSize(tokenNames); i++) { if (tokenNames[i].id == tokens[tok].type) nm = tokenNames[i].name; } if (nm) printf("\n%4X %8s: ", tok, nm); // Token type else printf("type %4X", tokens[tok].type); switch (tokens[tok].type) { case TOK_DIR: case TOK_ATT: case TOK_TYP: case TOK_OPT: case TOK_HLL: nm = 0; for (i = 0; i < TableSize(keywordsList); i++) { if (keywordsList[i].id == tokens[tok].id) nm = keywordsList[i].name; } if (nm) printf("%s", nm); else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength); break; case TOK_OPR: nm = 0; for (i = 0; i < TableSize(operatorsList); i++) { if (operatorsList[i].id == tokens[tok].id) nm = operatorsList[i].name; } if (nm) printf("%s", nm); else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength); break; case TOK_REG: //registerNames nm = 0; for (i = 0; i < TableSize(registerNames); i++) { if (registerNames[i].id == tokens[tok].id) nm = registerNames[i].name; } if (nm) printf("%s%i", nm, tokens[tok].id & 0xFF); else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength); break; case TOK_NAM: case TOK_NUM: case TOK_FLT: case TOK_LAB: case TOK_VAR: case TOK_SEC: case TOK_CHA: case TOK_STR: case TOK_INS: case TOK_SYM: for (i = 0; i < tokens[tok].stringLength; i++) { printf("%c", buf()[tokens[tok].pos + i]); } printf(" id %X, value %X", tokens[tok].id, tokens[tok].value.w); break; case TOK_XPR: default: printf("0x%X 0x%X 0x%X %2i", tokens[tok].id, tokens[tok].value.w, tokens[tok].pos, tokens[tok].stringLength); break; } } } } } void CAssembler::initializeWordLists() { // Operators list operators.pushBig(operatorsList, sizeof(operatorsList)); operators.sort(); // Keywords list keywords.pushBig(keywordsList,sizeof(keywordsList)); keywords.sort(); // Read instruction list from file CCSVFile instructionListFile; instructionListFile.read(cmd.getFilename(cmd.instructionListFile), CMDL_FILE_SEARCH_PATH); // Filename of list of instructions instructionListFile.parse(); // Read and interpret instruction list file instructionlist << instructionListFile.instructionlist; // Transfer instruction list to my own container instructionlistId.copy(instructionlist); // copy instruction list instructionlistNm.copy(instructionlist); // copy instruction list // sort lists by different criteria, defined by the different operators: // operator < (SInstruction const & a, SInstruction const & b) // operator < (SInstruction3 const & a, SInstruction3 const & b) SInstruction3 nullInstruction; // empty record zeroAllMembers(nullInstruction); instructionlistId.push(nullInstruction); // Empty record will go to position 0 to avoid an instruction with index 0 instructionlistNm.sort(); // Sort instructionlist by name instructionlistId.sort(); // Sort instructionlistId by id }
Go to most recent revision | Compare with Previous | Blame | View Log