1 |
41 |
Agner |
/**************************** assem1.cpp ********************************
|
2 |
|
|
* Author: Agner Fog
|
3 |
|
|
* Date created: 2017-04-17
|
4 |
|
|
* Last modified: 2021-07-10
|
5 |
|
|
* Version: 1.11
|
6 |
|
|
* Project: Binary tools for ForwardCom instruction set
|
7 |
|
|
* Module: assem.cpp
|
8 |
|
|
* Description:
|
9 |
|
|
* Module for assembling ForwardCom .as files. Contains:
|
10 |
|
|
* pass1(): Split input file into lines and tokens. Remove comments. Find symbol definitions
|
11 |
|
|
* pass2(): Handle meta code. Classify lines. Identify symbol names, sections, functions
|
12 |
|
|
*
|
13 |
|
|
* Copyright 2017-2021 GNU General Public License http://www.gnu.org/licenses
|
14 |
|
|
******************************************************************************/
|
15 |
|
|
#include "stdafx.h"
|
16 |
|
|
|
17 |
|
|
const char * allowedInNames = "_$@"; // characters allowed in symbol names (don't allow characters that are used as operators)
|
18 |
|
|
const bool allowUTF8 = true; // UTF-8 characters allowed in symbol names
|
19 |
|
|
const bool allowNestedComments = true; // allow nested comments: /* /* */ */
|
20 |
|
|
|
21 |
|
|
// Operator for sorting symbols by name. Used by assembler
|
22 |
|
|
// List of operators
|
23 |
|
|
SOperator operatorsList[] = {
|
24 |
|
|
// name, id, priority
|
25 |
|
|
{"(", '(', 1},
|
26 |
|
|
{")", ')', 1},
|
27 |
|
|
{"[", '[', 1},
|
28 |
|
|
{"]", ']', 1},
|
29 |
|
|
{"{", '{', 1},
|
30 |
|
|
{"}", '}', 1},
|
31 |
|
|
{"'", 39, 1},
|
32 |
|
|
{"\"", '"', 1}, // "
|
33 |
|
|
{"/*", 'c', 1}, // comment begin
|
34 |
|
|
{"*/", 'd', 1}, // comment end
|
35 |
|
|
{".", '.', 2},
|
36 |
|
|
{"!", '!', 3},
|
37 |
|
|
{"~", '~', 3},
|
38 |
|
|
{"++", '+'+D2, 3},
|
39 |
|
|
{"--", '-'+D2, 3},
|
40 |
|
|
{"*", '*', 4},
|
41 |
|
|
{"/", '/', 4},
|
42 |
|
|
{"%", '%', 4},
|
43 |
|
|
{"+", '+', 5},
|
44 |
|
|
{"-", '-', 5},
|
45 |
|
|
{"<<", '<'+D2, 6},
|
46 |
|
|
{">>", '>'+D2, 6}, // signed shift right
|
47 |
|
|
{">>>", '>'+D3, 6}, // unsigned shift right
|
48 |
|
|
{"<", '<', 7},
|
49 |
|
|
{"<=", '<'+EQ, 7},
|
50 |
|
|
{">", '>', 7},
|
51 |
|
|
{">=", '>'+EQ, 7},
|
52 |
|
|
{"==", '='+D2, 8},
|
53 |
|
|
{"!=", '!'+EQ, 8},
|
54 |
|
|
{"&", '&', 9},
|
55 |
|
|
{"^", '^', 10},
|
56 |
|
|
{"|", '|', 11},
|
57 |
|
|
{"&&", '&'+D2, 12},
|
58 |
|
|
{"||", '|'+D2, 13},
|
59 |
|
|
{"^^", '^'+D2, 13}, // boolean XOR. non-standard operator
|
60 |
|
|
{"?", '?', 14},
|
61 |
|
|
{":", ':', 14},
|
62 |
|
|
{"=", '=', 15},
|
63 |
|
|
{"+=", '+'+EQ, 15},
|
64 |
|
|
{"-=", '-'+EQ, 15},
|
65 |
|
|
{"*=", '*'+EQ, 15},
|
66 |
|
|
{"/=", '/'+EQ, 15},
|
67 |
|
|
{"%=", '%'+EQ, 15},
|
68 |
|
|
{"<<=", '<'+D2+EQ, 15},
|
69 |
|
|
{">>=", '>'+D2+EQ, 15}, // signed shift right
|
70 |
|
|
{">>>=", '>'+D3+EQ, 15}, // unsigned shift right
|
71 |
|
|
{"&=", '&'+EQ, 15},
|
72 |
|
|
{"^=", '^'+EQ, 15},
|
73 |
|
|
{"|=", '|'+EQ, 15},
|
74 |
|
|
{",", ',', 16},
|
75 |
|
|
{"//", '/'+D2, 20}, // comment, end of line
|
76 |
|
|
{";", ';', 20} // comment, end of line
|
77 |
|
|
};
|
78 |
|
|
|
79 |
|
|
|
80 |
|
|
// List of keywords
|
81 |
|
|
SKeyword keywordsList[] = {
|
82 |
|
|
// name, id
|
83 |
|
|
{"section", DIR_SECTION}, // TOK_DIR: section, functions directives
|
84 |
|
|
{"function", DIR_FUNCTION},
|
85 |
|
|
{"end", DIR_END},
|
86 |
|
|
{"public", DIR_PUBLIC},
|
87 |
|
|
{"extern", DIR_EXTERN},
|
88 |
|
|
|
89 |
|
|
// TOK_ATT: attributes of sections, functions and symbols
|
90 |
|
|
{"read", ATT_READ}, // readable section
|
91 |
|
|
{"write", ATT_WRITE}, // writeable section
|
92 |
|
|
{"execute", ATT_EXEC}, // executable section
|
93 |
|
|
{"align", ATT_ALIGN}, // align section, data, or code
|
94 |
|
|
{"weak", ATT_WEAK}, // weak linking
|
95 |
|
|
{"reguse", ATT_REGUSE}, // register use
|
96 |
|
|
{"constant", ATT_CONSTANT}, // external constant
|
97 |
|
|
{"uninitialized", ATT_UNINIT}, // uninitialized section (BSS)
|
98 |
|
|
{"communal", ATT_COMDAT}, // communal section. duplicates and unreferenced sections are removed
|
99 |
|
|
{"exception_hand", ATT_EXCEPTION}, // exception handler and stack unroll information
|
100 |
|
|
{"event_hand", ATT_EVENT}, // event handler list, including constructors and destructors
|
101 |
|
|
{"debug_info", ATT_DEBUG}, // debug information
|
102 |
|
|
{"comment_info", ATT_COMMENT}, // comments, including copyright and required libraries
|
103 |
|
|
|
104 |
|
|
// TOK_TYP: type names
|
105 |
|
|
{"int8", TYP_INT8},
|
106 |
|
|
{"uint8", TYP_INT8+TYP_UNS},
|
107 |
|
|
{"int16", TYP_INT16},
|
108 |
|
|
{"uint16", TYP_INT16+TYP_UNS},
|
109 |
|
|
{"int32", TYP_INT32},
|
110 |
|
|
{"uint32", TYP_INT32+TYP_UNS},
|
111 |
|
|
{"int64", TYP_INT64},
|
112 |
|
|
{"uint64", TYP_INT64+TYP_UNS},
|
113 |
|
|
{"int128", TYP_INT128},
|
114 |
|
|
{"uint128", TYP_INT128+TYP_UNS},
|
115 |
|
|
{"int", TYP_INT32},
|
116 |
|
|
{"uint", TYP_INT32+TYP_UNS},
|
117 |
|
|
{"float", TYP_FLOAT32},
|
118 |
|
|
{"double", TYP_FLOAT64},
|
119 |
|
|
{"float16", TYP_FLOAT16},
|
120 |
|
|
{"float32", TYP_FLOAT32},
|
121 |
|
|
{"float64", TYP_FLOAT64},
|
122 |
|
|
{"float128", TYP_FLOAT128},
|
123 |
|
|
{"string", TYP_STRING},
|
124 |
|
|
|
125 |
|
|
// TOK_OPT: options of instructions and operands
|
126 |
|
|
{"mask", OPT_MASK},
|
127 |
|
|
{"fallback", OPT_FALLBACK},
|
128 |
|
|
{"length", OPT_LENGTH},
|
129 |
|
|
{"broadcast", OPT_BROADCAST},
|
130 |
|
|
{"limit", OPT_LIMIT},
|
131 |
|
|
{"scalar", OPT_SCALAR},
|
132 |
|
|
{"options", OPT_OPTIONS},
|
133 |
|
|
{"option", OPT_OPTIONS}, // alias
|
134 |
|
|
|
135 |
|
|
// TOK_REG: register names
|
136 |
|
|
{"numcontr", REG_NUMCONTR},
|
137 |
|
|
{"threadp", REG_THREADP},
|
138 |
|
|
{"datap", REG_DATAP},
|
139 |
|
|
{"ip", REG_IP},
|
140 |
|
|
{"sp", REG_SP},
|
141 |
|
|
|
142 |
|
|
// TOK_HLL: high level language keywords
|
143 |
|
|
{"if", HLL_IF},
|
144 |
|
|
{"else", HLL_ELSE},
|
145 |
|
|
{"switch", HLL_SWITCH}, // switch (r1, scratch registers) { case 0: break; ...}
|
146 |
|
|
{"case", HLL_CASE},
|
147 |
|
|
{"for", HLL_FOR}, // for (r1 = 1; r1 <= r2; r1++) {}
|
148 |
|
|
{"in", HLL_IN}, // for (float v1 in [r1-r2], nocheck) // (r2 counts down)
|
149 |
|
|
{"while", HLL_WHILE}, // while (r1 > 0) {}
|
150 |
|
|
{"do", HLL_DO}, // do {} while ()
|
151 |
|
|
{"break", HLL_BREAK}, // break out of switch or loop
|
152 |
|
|
{"continue", HLL_CONTINUE}, // continue loop
|
153 |
|
|
{"true", HLL_TRUE}, // constant = 1
|
154 |
|
|
{"false", HLL_FALSE}, // constant = 0
|
155 |
|
|
|
156 |
|
|
// temporary additions. will be replaced by macros later:
|
157 |
|
|
{"push", HLL_PUSH}, // push registers
|
158 |
|
|
{"pop", HLL_POP}, // pop registers
|
159 |
|
|
|
160 |
|
|
};
|
161 |
|
|
|
162 |
|
|
// List of register name prefixes
|
163 |
|
|
SKeyword registerNames[] = {
|
164 |
|
|
// name, id
|
165 |
|
|
{"r", REG_R},
|
166 |
|
|
{"v", REG_V},
|
167 |
|
|
{"spec", REG_SPEC},
|
168 |
|
|
{"capab", REG_CAPAB},
|
169 |
|
|
{"perf", REG_PERF},
|
170 |
|
|
{"sys", REG_SYS}
|
171 |
|
|
};
|
172 |
|
|
|
173 |
|
|
|
174 |
|
|
CAssembler::CAssembler() { // Constructor
|
175 |
|
|
// Reserve size for buffers
|
176 |
|
|
const int estimatedLineLength = 16;
|
177 |
|
|
const int estimatedTokensPerLine = 10;
|
178 |
|
|
int estimatedNumLines = dataSize() / estimatedLineLength;
|
179 |
|
|
lines.setNum(estimatedNumLines);
|
180 |
|
|
tokens.setNum(estimatedNumLines * estimatedTokensPerLine);
|
181 |
|
|
errors.setOwner(this);
|
182 |
|
|
// Initialize and sort lists
|
183 |
|
|
initializeWordLists();
|
184 |
|
|
ElfFwcShdr nullHeader; // make first section header empty
|
185 |
|
|
zeroAllMembers(nullHeader);
|
186 |
|
|
sectionHeaders.push(nullHeader);
|
187 |
|
|
}
|
188 |
|
|
|
189 |
|
|
void CAssembler::go() {
|
190 |
|
|
|
191 |
|
|
// Write feedback text to console
|
192 |
|
|
feedBackText1();
|
193 |
|
|
|
194 |
|
|
// Set default options
|
195 |
|
|
if (cmd.codeSizeOption == 0) cmd.codeSizeOption = 1 << 24;
|
196 |
|
|
if (cmd.dataSizeOption == 0) cmd.dataSizeOption = 1 << 15;
|
197 |
|
|
// initialize options
|
198 |
|
|
code_size = cmd.codeSizeOption;
|
199 |
|
|
data_size = cmd.dataSizeOption;
|
200 |
|
|
|
201 |
|
|
do { // This loop is repeated only once. Just convenient to break out of in case of errors
|
202 |
|
|
pass = 1;
|
203 |
|
|
// Split input file into lines and tokens. Find symbol definitions
|
204 |
|
|
pass1();
|
205 |
|
|
if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;}
|
206 |
|
|
|
207 |
|
|
pass = 2;
|
208 |
|
|
// A. Handle metaprogramming directives
|
209 |
|
|
// B. Classify lines
|
210 |
|
|
// C. Identify symbol names, sections, labels, functions
|
211 |
|
|
pass2();
|
212 |
|
|
if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;}
|
213 |
|
|
|
214 |
|
|
//showTokens(); //!! for debugging only
|
215 |
|
|
//showSymbols(); //!! for debugging only
|
216 |
|
|
|
217 |
|
|
pass = 3;
|
218 |
|
|
// Interpret lines. Generate code and data
|
219 |
|
|
pass3();
|
220 |
|
|
if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;}
|
221 |
|
|
|
222 |
|
|
pass = 4;
|
223 |
|
|
// Resolve internal cross references, optimize forward references
|
224 |
|
|
pass4();
|
225 |
|
|
if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;}
|
226 |
|
|
|
227 |
|
|
pass = 5;
|
228 |
|
|
// Make binary file
|
229 |
|
|
pass5();
|
230 |
|
|
if (errors.tooMany()) {err.submit(ERR_TOO_MANY_ERRORS); break;}
|
231 |
|
|
|
232 |
|
|
} while (false);
|
233 |
|
|
|
234 |
|
|
// output any error messages
|
235 |
|
|
errors.outputErrors();
|
236 |
|
|
if (errors.numErrors()) cmd.mainReturnValue = 1; // make sure makefile process stops on error
|
237 |
|
|
|
238 |
|
|
// output object file
|
239 |
|
|
outFile.write(cmd.getFilename(cmd.outputFile));
|
240 |
|
|
}
|
241 |
|
|
|
242 |
|
|
|
243 |
|
|
// Character can be the start of a symbol name
|
244 |
|
|
inline bool nameChar1(char c) {
|
245 |
|
|
return ((c | 0x20) >= 'a' && (c | 0x20) <= 'z') || ((c & 0x80) && allowUTF8) || strchr(allowedInNames, c);
|
246 |
|
|
}
|
247 |
|
|
|
248 |
|
|
// Character can be the part of a symbol name
|
249 |
|
|
inline bool nameChar2(char c) {
|
250 |
|
|
return nameChar1(c) || (c >= '0' && c <= '9');
|
251 |
|
|
}
|
252 |
|
|
|
253 |
|
|
// check if string is a number. Can be decimal, binary, octal, hexadecimal, or floating point
|
254 |
|
|
// Returns the length of the part of the string that belongs to the number
|
255 |
|
|
uint32_t isNumber(const char * s, int maxlen, bool * isFloat) {
|
256 |
|
|
bool is_float = false;
|
257 |
|
|
char c = s[0];
|
258 |
|
|
if ((c < '0' || c > '9') && (c != '.' || s[1] < '0' || s[1] > '9')) return 0;
|
259 |
|
|
int i = 0;
|
260 |
|
|
int state = 0;
|
261 |
|
|
// 0: begin
|
262 |
|
|
// 1: after 0
|
263 |
|
|
// 2: after digits 0-9
|
264 |
|
|
// 3: after 0x
|
265 |
|
|
// 4: after 0b or 0o
|
266 |
|
|
// 5: after .
|
267 |
|
|
// 6: after E
|
268 |
|
|
// 7: after E09
|
269 |
|
|
// 8: after E+-
|
270 |
|
|
for (i = 0; i < maxlen; i++) {
|
271 |
|
|
c = s[i];
|
272 |
|
|
char cl = c | 0x20; // upper case letter
|
273 |
|
|
if (c == '0' && state == 0) {state = 1; continue;}
|
274 |
|
|
if (cl == 'x' && state == 1) {state = 3; continue;}
|
275 |
|
|
if ((cl == 'b' || cl == 'o') && state == 1) {state = 4; continue;}
|
276 |
|
|
if (c == '.' && state <= 2) {state = 5; is_float = true; continue;}
|
277 |
|
|
if (cl == 'e' && (state <= 2 || state == 5)) {state = 6; is_float = true; continue;}
|
278 |
|
|
if ((c == '+' || c == '-') && state == 6) {state = 8; continue;}
|
279 |
|
|
if (c >= '0' && c <= '9') {
|
280 |
|
|
if (state < 2) state = 2;
|
281 |
|
|
if (state == 6) state = 7;
|
282 |
|
|
continue;
|
283 |
|
|
}
|
284 |
|
|
if (cl >= 'a' && cl <= 'f' && state == 3) continue;
|
285 |
|
|
// Anything else: stop here
|
286 |
|
|
break;
|
287 |
|
|
}
|
288 |
|
|
if (isFloat) *isFloat = is_float; // return isFloat
|
289 |
|
|
return i; // return length
|
290 |
|
|
}
|
291 |
|
|
|
292 |
|
|
// Check if string is a register name
|
293 |
|
|
uint32_t isRegister(const char * s, uint32_t len) {
|
294 |
|
|
uint32_t i, j, nl, num;
|
295 |
|
|
for (i = 0; i < TableSize(registerNames); i++) {
|
296 |
|
|
if ((s[0] | 0x20) == registerNames[i].name[0]) { // first character match, lower case
|
297 |
|
|
nl = (uint32_t)strlen(registerNames[i].name); // length of register name prefix
|
298 |
|
|
if (len < nl + 1 || len > nl + 2) continue; // continue search if length wrong
|
299 |
|
|
for (j = 0; j < nl; j++) { // check if each character matches
|
300 |
|
|
if ((s[j] | 0x20) != registerNames[i].name[j]) { // lower case compare
|
301 |
|
|
j = 0xFFFFFFFF; break;
|
302 |
|
|
}
|
303 |
|
|
}
|
304 |
|
|
if (j == 0xFFFFFFFF) continue; // no match
|
305 |
|
|
if (s[j] < '0' || s[j] > '9') continue; // not a number
|
306 |
|
|
num = s[j] - '0'; // get number, first digit
|
307 |
|
|
if (len == nl + 2) { // two digit number
|
308 |
|
|
if (s[j+1] < '0' || s[j+1] > '9') continue;// second digit not a number
|
309 |
|
|
num = num * 10 + (s[j+1] - '0');
|
310 |
|
|
}
|
311 |
|
|
if (num >= 32) continue; // number too high
|
312 |
|
|
return num + registerNames[i].id; // everyting matches
|
313 |
|
|
}
|
314 |
|
|
}
|
315 |
|
|
return 0; // not found. return 0
|
316 |
|
|
}
|
317 |
|
|
|
318 |
|
|
// write feedback text on stdout
|
319 |
|
|
void CAssembler::feedBackText1() {
|
320 |
|
|
if (cmd.verbose) {
|
321 |
|
|
// Tell what we are doing:
|
322 |
|
|
printf("\nAssembling %s to %s", cmd.getFilename(cmd.inputFile), cmd.getFilename(cmd.outputFile));
|
323 |
|
|
}
|
324 |
|
|
}
|
325 |
|
|
|
326 |
|
|
|
327 |
|
|
// Split input file into lines and tokens. Handle preprocessing directives. Find symbol definitions
|
328 |
|
|
void CAssembler::pass1() {
|
329 |
|
|
uint32_t n = 0; // offset into assembly file
|
330 |
|
|
uint32_t m; // end of current token
|
331 |
|
|
int32_t i, f; // temporary
|
332 |
|
|
int32_t comment = 0; // 0: normal, 1: inside comment to end of line, 2: inside /* */ comment
|
333 |
|
|
uint32_t commentStart = 0; // start position of multiline comment
|
334 |
|
|
uint32_t commentStartColumn = 0;// start column of multiline comment
|
335 |
|
|
char c; // current character or byte
|
336 |
|
|
SToken token = {0}; // current token
|
337 |
|
|
SKeyword keywSearch; // record to search for keyword
|
338 |
|
|
SOperator opSearch; // record to search for operator
|
339 |
|
|
SInstruction instructSearch; // record to search for instruction
|
340 |
|
|
SLine line = {0,0,0,0,0,0,0}; // line record
|
341 |
|
|
lines.push(line); // empty records for line 0
|
342 |
|
|
linei = 1; // start at line 1
|
343 |
|
|
numSwitch = 0; // count switch statements
|
344 |
|
|
tokens.push(token); // unused token 0
|
345 |
|
|
|
346 |
|
|
if (dataSize() >= 3 && (get<uint32_t>(0) & 0xFFFFFF) == 0xBFBBEF) {
|
347 |
|
|
n += 3; // skip UTF-8 byte order mark
|
348 |
|
|
}
|
349 |
|
|
|
350 |
|
|
line.beginPos = n; // start of line 1
|
351 |
|
|
line.firstToken = tokens.numEntries();
|
352 |
|
|
line.file = filei;
|
353 |
|
|
|
354 |
|
|
// loop through file
|
355 |
|
|
while (n < dataSize()) {
|
356 |
|
|
c = get<char>(n); // get character
|
357 |
|
|
|
358 |
|
|
// is it space or a control character?
|
359 |
|
|
if (uint8_t(c) <= 0x20) {
|
360 |
|
|
if (c == ' ' || c == '\t') { // skip space and tab
|
361 |
|
|
n++;
|
362 |
|
|
continue;
|
363 |
|
|
}
|
364 |
|
|
if (c == '\r' || c == '\n') { // newline
|
365 |
|
|
n++;
|
366 |
|
|
if (c == '\r' && get<char>(n) == '\n') n++; // "\r\n" windows newline
|
367 |
|
|
if (comment == 1) comment = 0; // end comment
|
368 |
|
|
if (n <= dataSize()) {
|
369 |
|
|
// finish current line
|
370 |
|
|
line.numTokens = tokens.numEntries() - line.firstToken;
|
371 |
|
|
line.linenum = linei++;
|
372 |
|
|
if (line.numTokens) { // save line if not empty
|
373 |
|
|
lines.push(line);
|
374 |
|
|
}
|
375 |
|
|
// start next line
|
376 |
|
|
line.type = 0;
|
377 |
|
|
line.file = filei;
|
378 |
|
|
line.beginPos = n;
|
379 |
|
|
line.firstToken = tokens.numEntries();
|
380 |
|
|
}
|
381 |
|
|
continue;
|
382 |
|
|
}
|
383 |
|
|
// illegal control character
|
384 |
|
|
token.type = TOK_ERR;
|
385 |
|
|
line.type = LINE_ERROR;
|
386 |
|
|
comment = 1; // ignore rest of line
|
387 |
|
|
m = tokens.push(token); // save error token
|
388 |
|
|
errors.report(n, 1, ERR_CONTROL_CHAR);
|
389 |
|
|
}
|
390 |
|
|
// prepare token of any type
|
391 |
|
|
token.pos = n;
|
392 |
|
|
token.stringLength = 1;
|
393 |
|
|
token.id = 0;
|
394 |
|
|
//token.column = n - line.beginPos;
|
395 |
|
|
|
396 |
|
|
// is it a name?
|
397 |
|
|
if (!comment && nameChar1(c)) {
|
398 |
|
|
// start of a name
|
399 |
|
|
m = n+1;
|
400 |
|
|
while (m < dataSize() && nameChar2(get<char>(m))) m++;
|
401 |
|
|
// name goes from position n to m-1. make token
|
402 |
|
|
token.type = TOK_NAM;
|
403 |
|
|
token.pos = n;
|
404 |
|
|
token.stringLength = m - n;
|
405 |
|
|
|
406 |
|
|
// is it a register name
|
407 |
|
|
f = isRegister((char*)buf()+n, token.stringLength);
|
408 |
|
|
if (f) {
|
409 |
|
|
token.type = TOK_REG;
|
410 |
|
|
token.id = f;
|
411 |
|
|
}
|
412 |
|
|
// is it a keyword?
|
413 |
|
|
if (token.type == TOK_NAM && m-n < sizeof(keywSearch.name)) {
|
414 |
|
|
memcpy(keywSearch.name, buf()+n, m-n);
|
415 |
|
|
keywSearch.name[m-n] = 0;
|
416 |
|
|
f = keywords.findFirst(keywSearch);
|
417 |
|
|
if (f >= 0) { // keyword found
|
418 |
|
|
token.id = keywords[f].id;
|
419 |
|
|
token.type = keywords[f].id >> 24;
|
420 |
|
|
if (token.id == HLL_SWITCH) numSwitch++;
|
421 |
|
|
}
|
422 |
|
|
}
|
423 |
|
|
// is it an instruction?
|
424 |
|
|
if (token.type == TOK_NAM && m-n < sizeof(instructSearch.name)) {
|
425 |
|
|
memcpy(instructSearch.name, buf()+n, m-n);
|
426 |
|
|
instructSearch.name[m-n] = 0;
|
427 |
|
|
f = instructionlistNm.findFirst(instructSearch);
|
428 |
|
|
if (f >= 0) { // instruction name found
|
429 |
|
|
token.type = TOK_INS;
|
430 |
|
|
token.id = instructionlistNm[f].id;
|
431 |
|
|
}
|
432 |
|
|
}
|
433 |
|
|
n = m;
|
434 |
|
|
tokens.push(token); // save token
|
435 |
|
|
continue;
|
436 |
|
|
}
|
437 |
|
|
|
438 |
|
|
// Is it a number?
|
439 |
|
|
if (!comment) {
|
440 |
|
|
bool isFloat;
|
441 |
|
|
f = isNumber((char*)buf() + n, dataSize() - n, &isFloat);
|
442 |
|
|
if (f) {
|
443 |
|
|
token.type = TOK_NUM + isFloat;
|
444 |
|
|
token.id = n; // save number as string. The value is extracted later
|
445 |
|
|
token.stringLength = f;
|
446 |
|
|
n += f;
|
447 |
|
|
tokens.push(token); // save token
|
448 |
|
|
continue;
|
449 |
|
|
}
|
450 |
|
|
}
|
451 |
|
|
|
452 |
|
|
// is it an operator?
|
453 |
|
|
opSearch.name[0] = c;
|
454 |
|
|
opSearch.name[1] = 0;
|
455 |
|
|
f = operators.findFirst(opSearch);
|
456 |
|
|
if (f >= 0) {
|
457 |
|
|
// found single-character operator
|
458 |
|
|
// make a greedy search for multi-character operators
|
459 |
|
|
i = f;
|
460 |
|
|
for (i = f+1; (uint32_t)i < operators.numEntries(); i++) {
|
461 |
|
|
if (operators[i].name[0] != c) break;
|
462 |
|
|
if (memcmp((char*)buf()+n, operators[i].name, strlen(operators[i].name)) == 0) f = i;
|
463 |
|
|
}
|
464 |
|
|
token.type = TOK_OPR;
|
465 |
|
|
token.id = operators[f].id;
|
466 |
|
|
token.priority = operators[f].priority;
|
467 |
|
|
token.stringLength = (uint32_t)strlen(operators[f].name);
|
468 |
|
|
|
469 |
|
|
// search for operators that need consideration here
|
470 |
|
|
switch (token.id) {
|
471 |
|
|
|
472 |
|
|
case 39: case '"': // quoted string in single or double quotes
|
473 |
|
|
if (comment) break;
|
474 |
|
|
// search for end of string
|
475 |
|
|
token.type = token.id == 39 ? TOK_CHA : TOK_STR;
|
476 |
|
|
token.pos = n + 1;
|
477 |
|
|
m = n;
|
478 |
|
|
while (true) {
|
479 |
|
|
if (get<char>(m+1) == '\r' || get<char>(m+1) == '\n' || m == dataSize()) {
|
480 |
|
|
// end of line without matching end quote. multi-line quotes not allowed
|
481 |
|
|
token.type = TOK_ERR;
|
482 |
|
|
errors.report(token.pos-1, 1, ERR_QUOTE_BEGIN);
|
483 |
|
|
comment = 1; // skip rest of line
|
484 |
|
|
break;
|
485 |
|
|
}
|
486 |
|
|
if (get<char>(m+1) == c && get<char>(m) != '\\') { // matching end quote not preceded by escape backslash
|
487 |
|
|
token.stringLength = m - n;
|
488 |
|
|
n += 2;
|
489 |
|
|
break;
|
490 |
|
|
}
|
491 |
|
|
m++;
|
492 |
|
|
}
|
493 |
|
|
break;
|
494 |
|
|
|
495 |
|
|
case '/'+D2: // "//". comment to end of line
|
496 |
|
|
if (comment == 0) {
|
497 |
|
|
comment = 1;
|
498 |
|
|
}
|
499 |
|
|
break;
|
500 |
|
|
case 'c': // "/*" start of comment
|
501 |
|
|
if (comment == 1) {
|
502 |
|
|
n += token.stringLength; // skip and don't save token
|
503 |
|
|
continue;
|
504 |
|
|
}
|
505 |
|
|
if (comment == 2) { // nested comment
|
506 |
|
|
if (allowNestedComments) {
|
507 |
|
|
comment++;
|
508 |
|
|
}
|
509 |
|
|
else {
|
510 |
|
|
token.type = TOK_ERR;
|
511 |
|
|
errors.report(n, 2, ERR_COMMENT_BEGIN);
|
512 |
|
|
}
|
513 |
|
|
break;
|
514 |
|
|
}
|
515 |
|
|
comment = 2;
|
516 |
|
|
commentStart = n; commentStartColumn = n - line.beginPos;
|
517 |
|
|
break;
|
518 |
|
|
case 'd': // "*/" end of comment
|
519 |
|
|
if (comment == 1) {
|
520 |
|
|
n += token.stringLength; // skip and don't save token
|
521 |
|
|
continue;
|
522 |
|
|
}
|
523 |
|
|
if (comment == 2) {
|
524 |
|
|
comment = 0;
|
525 |
|
|
n += token.stringLength; // skip and don't save token
|
526 |
|
|
continue;
|
527 |
|
|
}
|
528 |
|
|
else if (comment > 2 && allowNestedComments) {
|
529 |
|
|
comment--;
|
530 |
|
|
n += token.stringLength; // skip and don't save token
|
531 |
|
|
continue;
|
532 |
|
|
}
|
533 |
|
|
else {
|
534 |
|
|
token.type = TOK_ERR; // unmatched end comment
|
535 |
|
|
errors.report(n, 2, ERR_COMMENT_END);
|
536 |
|
|
comment = 1;
|
537 |
|
|
}
|
538 |
|
|
break;
|
539 |
|
|
case ';':
|
540 |
|
|
// semicolon starts a new pseudo-line
|
541 |
|
|
if (comment) break;
|
542 |
|
|
// finish current line
|
543 |
|
|
tokens.push(token); // the ';' token is used only in for(;;) loops. should be ignored at the end of the line otherwise
|
544 |
|
|
n += token.stringLength;
|
545 |
|
|
line.numTokens = tokens.numEntries() - line.firstToken;
|
546 |
|
|
line.linenum = linei;
|
547 |
|
|
if (line.numTokens) { // save line if not empty
|
548 |
|
|
lines.push(line);
|
549 |
|
|
}
|
550 |
|
|
// start next line
|
551 |
|
|
line.beginPos = n;
|
552 |
|
|
line.firstToken = tokens.numEntries();
|
553 |
|
|
continue; // don't save ';' token twice
|
554 |
|
|
case '{': case '}':
|
555 |
|
|
if (comment) break;
|
556 |
|
|
// put each bracket in a separate pseudo-line to ease high level language parsing
|
557 |
|
|
// finish current line
|
558 |
|
|
line.numTokens = tokens.numEntries() - line.firstToken;
|
559 |
|
|
line.linenum = linei;
|
560 |
|
|
if (line.numTokens) { // save line if not empty
|
561 |
|
|
lines.push(line);
|
562 |
|
|
}
|
563 |
|
|
// start line with bracket only
|
564 |
|
|
line.beginPos = n;
|
565 |
|
|
line.firstToken = tokens.numEntries();
|
566 |
|
|
tokens.push(token); // save token
|
567 |
|
|
n += token.stringLength;
|
568 |
|
|
line.numTokens = 1;
|
569 |
|
|
lines.push(line);
|
570 |
|
|
// start line after bracket
|
571 |
|
|
line.beginPos = n;
|
572 |
|
|
line.firstToken = tokens.numEntries();
|
573 |
|
|
continue;
|
574 |
|
|
}
|
575 |
|
|
if (comment == 0 && token.type != TOK_ERR) {
|
576 |
|
|
// save token unless we are inside a comment or an error has occurred
|
577 |
|
|
tokens.push(token); // save token
|
578 |
|
|
}
|
579 |
|
|
n += token.stringLength;
|
580 |
|
|
continue;
|
581 |
|
|
}
|
582 |
|
|
|
583 |
|
|
if (comment) {
|
584 |
|
|
// we are inside a comment. Continue search only for end of line or end of comment
|
585 |
|
|
n++;
|
586 |
|
|
continue;
|
587 |
|
|
}
|
588 |
|
|
|
589 |
|
|
// none of the above. Make token for illegal character
|
590 |
|
|
token.type = TOK_ERR;
|
591 |
|
|
line.type = LINE_ERROR;
|
592 |
|
|
errors.report(n, 1, ERR_ILLEGAL_CHAR);
|
593 |
|
|
comment = 1; // ignore rest of line
|
594 |
|
|
n++;
|
595 |
|
|
}
|
596 |
|
|
// finish last line
|
597 |
|
|
// tokens.push(token);
|
598 |
|
|
line.numTokens = tokens.numEntries() - line.firstToken;
|
599 |
|
|
lines.push(line);
|
600 |
|
|
// start pseudo line
|
601 |
|
|
line.beginPos = n;
|
602 |
|
|
line.firstToken = tokens.numEntries();
|
603 |
|
|
line.type = 0;
|
604 |
|
|
|
605 |
|
|
// check for unmatched comment
|
606 |
|
|
if (comment >= 2) {
|
607 |
|
|
token.type = TOK_ERR;
|
608 |
|
|
errors.report(commentStart, commentStartColumn, ERR_COMMENT_BEGIN);
|
609 |
|
|
}
|
610 |
|
|
// make EOF token in the end
|
611 |
|
|
line.type = 0;
|
612 |
|
|
line.beginPos = n;
|
613 |
|
|
line.firstToken = tokens.numEntries();
|
614 |
|
|
line.numTokens = 1;
|
615 |
|
|
lines.push(line);
|
616 |
|
|
token.pos = n;
|
617 |
|
|
token.stringLength = 0;
|
618 |
|
|
token.type = TOK_EOF; // end of file
|
619 |
|
|
tokens.push(token); // save eof token
|
620 |
|
|
}
|
621 |
|
|
|
622 |
|
|
|
623 |
|
|
void CAssembler::interpretSectionDirective() {
|
624 |
|
|
// Interpret section directive during pass 2 or 3
|
625 |
|
|
// pass 2: identify section name and type, and give it a number
|
626 |
|
|
// pass 3: make section header
|
627 |
|
|
|
628 |
|
|
// to do: nested sections
|
629 |
|
|
|
630 |
|
|
uint32_t tok; // token number
|
631 |
|
|
ElfFWC_Sym2 sym; // symbol record
|
632 |
|
|
int32_t sectionsym = 0; // index to symbol record defining current section name
|
633 |
|
|
uint32_t state = 0; // 1: after align, 2: after '='
|
634 |
|
|
ElfFwcShdr sectionHeader; // section header
|
635 |
|
|
zeroAllMembers(sym); // reset symbol
|
636 |
|
|
zeroAllMembers(sectionHeader); // reset section header
|
637 |
|
|
sectionHeader.sh_type = SHT_PROGBITS; // default section type
|
638 |
|
|
|
639 |
|
|
sectionFlags = 0;
|
640 |
|
|
for (tok = tokenB + 2; tok < tokenB + tokenN; tok++) { // get section attributes
|
641 |
|
|
if (tokens[tok].type == TOK_ATT) {
|
642 |
|
|
if (tokens[tok].id == ATT_UNINIT && state != 2) {
|
643 |
|
|
sectionHeader.sh_type = SHT_NOBITS; // uninitialized section (BSS)
|
644 |
|
|
sectionFlags |= SHF_READ | SHF_WRITE;
|
645 |
|
|
}
|
646 |
|
|
else if (tokens[tok].id == ATT_COMDAT && state != 2) {
|
647 |
|
|
sectionHeader.sh_type = SHT_COMDAT; // communal section. duplicates and unreferenced sections are removed
|
648 |
|
|
}
|
649 |
|
|
else if (tokens[tok].id != ATT_ALIGN && state == 0) {
|
650 |
|
|
sectionFlags |= tokens[tok].id & 0xFFFFFF;
|
651 |
|
|
if (sectionFlags & SHF_EXEC) sectionFlags |= SHF_IP; // executable section must be IP based
|
652 |
|
|
}
|
653 |
|
|
else if (tokens[tok].id == ATT_ALIGN && state == 0) {
|
654 |
|
|
state = 1;
|
655 |
|
|
}
|
656 |
|
|
else {
|
657 |
|
|
errors.report(tokens[tok]); break;
|
658 |
|
|
}
|
659 |
|
|
}
|
660 |
|
|
else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_IP && state == 0) sectionFlags |= SHF_IP;
|
661 |
|
|
else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_DATAP && state == 0) sectionFlags |= SHF_DATAP;
|
662 |
|
|
else if (tokens[tok].type == TOK_REG && tokens[tok].id == REG_THREADP && state == 0) sectionFlags |= SHF_THREADP;
|
663 |
|
|
else if (tokens[tok].type == TOK_OPR && tokens[tok].id == '=' && state == 1) state = 2;
|
664 |
|
|
else if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',' && state != 2) ; // comma, ignore
|
665 |
|
|
else if (tokens[tok].type == TOK_NUM && state == 2) {
|
666 |
|
|
if (pass >= 3) { // alignment value
|
667 |
|
|
uint32_t alignm = expression(tok, 1, 0).value.w;
|
668 |
|
|
if ((alignm & (alignm - 1)) || alignm > MAX_ALIGN) errors.reportLine(ERR_ALIGNMENT);
|
669 |
|
|
else {
|
670 |
|
|
sectionHeader.sh_align = bitScanReverse(alignm);
|
671 |
|
|
}
|
672 |
|
|
}
|
673 |
|
|
state = 0;
|
674 |
|
|
}
|
675 |
|
|
else {
|
676 |
|
|
errors.report(tokens[tok]); break;
|
677 |
|
|
}
|
678 |
|
|
}
|
679 |
|
|
// find or define symbol with section name
|
680 |
|
|
sectionsym = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength);
|
681 |
|
|
if (sectionsym <= 0) {
|
682 |
|
|
// symbol not previously defined. Define it now
|
683 |
|
|
sym.st_type = STT_SECTION;
|
684 |
|
|
sym.st_name = symbolNameBuffer.putStringN((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength);
|
685 |
|
|
sym.st_bind = sectionFlags;
|
686 |
|
|
sectionsym = addSymbol(sym); // save symbol with section name
|
687 |
|
|
}
|
688 |
|
|
else {
|
689 |
|
|
// symbol already defined. check that it is a section name
|
690 |
|
|
if (symbols[sectionsym].st_type != STT_SECTION) {
|
691 |
|
|
errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED);
|
692 |
|
|
}
|
693 |
|
|
}
|
694 |
|
|
sectionFlags |= SHF_ALLOC;
|
695 |
|
|
lines[linei].type = LINE_SECTION; // line is section directive
|
696 |
|
|
lines[linei].sectionType = sectionFlags;
|
697 |
|
|
if (symbols[sectionsym].st_section == 0) {
|
698 |
|
|
// new section. make section header
|
699 |
|
|
sectionHeader.sh_name = symbols[sectionsym].st_name;
|
700 |
|
|
if (sectionFlags & SHF_EXEC) {
|
701 |
|
|
sectionHeader.sh_entsize = 4;
|
702 |
|
|
if (sectionHeader.sh_align < 2) sectionHeader.sh_align = 2;
|
703 |
|
|
sectionFlags |= SHF_IP;
|
704 |
|
|
}
|
705 |
|
|
else { // data section
|
706 |
|
|
if (!(sectionFlags & (SHF_READ | SHF_WRITE))) sectionFlags |= SHF_READ | SHF_WRITE; // read or write attributes not specified, default is both
|
707 |
|
|
if (!(sectionFlags & (SHF_IP | SHF_DATAP | SHF_THREADP))) { // address reference not specified. assume datap if writeable, ip if readonly
|
708 |
|
|
if (sectionFlags & SHF_WRITE) sectionFlags |= SHF_DATAP;
|
709 |
|
|
else sectionFlags |= SHF_IP;
|
710 |
|
|
}
|
711 |
|
|
}
|
712 |
|
|
sectionHeader.sh_flags = sectionFlags;
|
713 |
|
|
section = sectionHeaders.push(sectionHeader);
|
714 |
|
|
symbols[sectionsym].st_section = section;
|
715 |
|
|
}
|
716 |
|
|
else { // this section is seen before
|
717 |
|
|
section = symbols[sectionsym].st_section;
|
718 |
|
|
if (sectionHeaders[section].sh_align < sectionHeader.sh_align) sectionHeaders[section].sh_align = sectionHeader.sh_align;
|
719 |
|
|
if (sectionFlags && (sectionFlags & ~sectionHeaders[section].sh_flags)) errors.reportLine(ERR_SECTION_DIFFERENT_TYPE);
|
720 |
|
|
sectionFlags = (uint32_t)sectionHeaders[section].sh_flags;
|
721 |
|
|
if (sectionHeader.sh_align > 2) {
|
722 |
|
|
// insert alignment code
|
723 |
|
|
SCode code;
|
724 |
|
|
zeroAllMembers(code);
|
725 |
|
|
code.instruction = II_ALIGN;
|
726 |
|
|
code.value.u = (int64_t)1 << sectionHeader.sh_align;
|
727 |
|
|
code.sizeUnknown = 0x80;
|
728 |
|
|
code.section = section;
|
729 |
|
|
codeBuffer.push(code);
|
730 |
|
|
}
|
731 |
|
|
}
|
732 |
|
|
}
|
733 |
|
|
|
734 |
|
|
void CAssembler::interpretFunctionDirective() {
|
735 |
|
|
// Interpret function directive during pass 2
|
736 |
|
|
uint32_t tok; // token number
|
737 |
|
|
ElfFWC_Sym2 sym; // symbol record
|
738 |
|
|
zeroAllMembers(sym); // reset symbol
|
739 |
|
|
int32_t symi;
|
740 |
|
|
|
741 |
|
|
symi = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength);
|
742 |
|
|
if (symi > 0) {
|
743 |
|
|
if (pass == 2) errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined
|
744 |
|
|
}
|
745 |
|
|
else {
|
746 |
|
|
// define symbol
|
747 |
|
|
sym.st_type = STT_FUNC;
|
748 |
|
|
sym.st_other = STV_IP;
|
749 |
|
|
sym.st_name = symbolNameBuffer.putStringN((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength);
|
750 |
|
|
sym.st_bind = 0;
|
751 |
|
|
sym.st_section = section;
|
752 |
|
|
for (tok = tokenB + 2; tok < tokenB + tokenN; tok++) { // get function attributes
|
753 |
|
|
if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') continue;
|
754 |
|
|
if (tokens[tok].id == ATT_WEAK) sym.st_bind |= STB_WEAK;
|
755 |
|
|
if (tokens[tok].id == ATT_REGUSE) {
|
756 |
|
|
if (tokens[tok+1].id == '=' && tokens[tok+2].type == TOK_NUM) {
|
757 |
|
|
tok += 2;
|
758 |
|
|
sym.st_reguse1 = expression(tok, 1, 0).value.w;
|
759 |
|
|
sym.st_other |= STV_REGUSE;
|
760 |
|
|
if (tokens[tok+1].id == ',' && tokens[tok+2].type == TOK_NUM) {
|
761 |
|
|
tok += 2;
|
762 |
|
|
sym.st_reguse2 = expression(tok, 1, 0).value.w;
|
763 |
|
|
}
|
764 |
|
|
}
|
765 |
|
|
}
|
766 |
|
|
else if (tokens[tok].type == TOK_DIR && tokens[tok].id == DIR_PUBLIC) sym.st_bind |= STB_GLOBAL;
|
767 |
|
|
else {
|
768 |
|
|
errors.report(tokens[tok]); // unexpected token
|
769 |
|
|
}
|
770 |
|
|
}
|
771 |
|
|
symi = addSymbol(sym); // save symbol with function name
|
772 |
|
|
}
|
773 |
|
|
lines[linei].type = LINE_FUNCTION; // line is function directive
|
774 |
|
|
|
775 |
|
|
if (pass == 3 && symi) {
|
776 |
|
|
// make a label here. The final address will be calculated in pass 4
|
777 |
|
|
SCode code; // current instruction code
|
778 |
|
|
zeroAllMembers(code); // reset code structure
|
779 |
|
|
code.label = symbols[symi].st_name;
|
780 |
|
|
code.section = section;
|
781 |
|
|
codeBuffer.push(code);
|
782 |
|
|
}
|
783 |
|
|
}
|
784 |
|
|
|
785 |
|
|
void CAssembler::interpretEndDirective() {
|
786 |
|
|
// Interpret section or function end directive during pass 2
|
787 |
|
|
ElfFWC_Sym2 sym; // symbol record
|
788 |
|
|
zeroAllMembers(sym); // reset symbol
|
789 |
|
|
int32_t symi;
|
790 |
|
|
CTextFileBuffer tempBuffer; // temporary storage of names
|
791 |
|
|
|
792 |
|
|
symi = findSymbol((char*)buf() + tokens[tokenB].pos, tokens[tokenB].stringLength);
|
793 |
|
|
if (symi <= 0) {
|
794 |
|
|
errors.reportLine(ERR_UNMATCHED_END);
|
795 |
|
|
}
|
796 |
|
|
else {
|
797 |
|
|
if (symbols[symi].st_type == STT_SECTION) {
|
798 |
|
|
if (symbols[symi].st_section == section) {
|
799 |
|
|
// current section ends here
|
800 |
|
|
section = 0; sectionFlags = 0;
|
801 |
|
|
}
|
802 |
|
|
else {
|
803 |
|
|
errors.reportLine(ERR_UNMATCHED_END);
|
804 |
|
|
}
|
805 |
|
|
}
|
806 |
|
|
else if (symbols[symi].st_type == STT_FUNC && pass >= 4) {
|
807 |
|
|
symbols[symi].st_unitsize = 4;
|
808 |
|
|
// to do: insert size!
|
809 |
|
|
//symbols[symi].st_unitsize = ?
|
810 |
|
|
// support function(){} syntax. prevent nested functions
|
811 |
|
|
}
|
812 |
|
|
}
|
813 |
|
|
lines[linei].type = LINE_ENDDIR; // line is end directive
|
814 |
|
|
}
|
815 |
|
|
|
816 |
|
|
// Interpret line specifying options
|
817 |
|
|
void CAssembler::interpretOptionsLine() {
|
818 |
|
|
|
819 |
|
|
// Expecting a line of the type:
|
820 |
|
|
// "options codesize = 0x10000, datasize = 1 << 20"
|
821 |
|
|
uint32_t tok; // token number
|
822 |
|
|
uint32_t state = 0; // 0: start, 1: after option name, 2: after equal sign, 3: after expression
|
823 |
|
|
const char * optionname = 0;
|
824 |
|
|
int option = 0; // 1: codesize, 2: datasize
|
825 |
|
|
SExpression val; // value to be assigned
|
826 |
|
|
SCode code; // instruction code containing options
|
827 |
|
|
for (tok = tokenB + 1; tok < tokenB + tokenN; tok++) {
|
828 |
|
|
|
829 |
|
|
switch (state) {
|
830 |
|
|
case 0: // start. expect name "datasize" or "codesize"
|
831 |
|
|
if (tokens[tok].type != TOK_NAM) {
|
832 |
|
|
errors.report(tokens[tok]); return; // unexpected token
|
833 |
|
|
}
|
834 |
|
|
optionname = (char*)buf()+tokens[tok].pos; // tokens[tok].stringLength;
|
835 |
|
|
if (strncasecmp_(optionname, "codesize", 8) == 0) option = 1;
|
836 |
|
|
else if (strncasecmp_(optionname, "datasize", 8) == 0) option = 2;
|
837 |
|
|
else {
|
838 |
|
|
errors.report(tokens[tok]); return; // unexpected name
|
839 |
|
|
}
|
840 |
|
|
state = 1;
|
841 |
|
|
break;
|
842 |
|
|
|
843 |
|
|
case 1: // after name, expecting equal sign
|
844 |
|
|
if (tokens[tok].type == TOK_OPR && tokens[tok].id == '=') {
|
845 |
|
|
state = 2;
|
846 |
|
|
}
|
847 |
|
|
else {
|
848 |
|
|
errors.report(tokens[tok]); return; // unexpected token
|
849 |
|
|
}
|
850 |
|
|
break;
|
851 |
|
|
|
852 |
|
|
case 2: // expect expression
|
853 |
|
|
val = expression(tok, tokenB + tokenN - tok, 0); // evaluate number or expression
|
854 |
|
|
tok += val.tokens - 1;
|
855 |
|
|
if (val.etype != XPR_INT) {
|
856 |
|
|
errors.reportLine(ERR_MUST_BE_CONSTANT);
|
857 |
|
|
return;
|
858 |
|
|
}
|
859 |
|
|
zeroAllMembers(code); // reset code structure
|
860 |
|
|
switch (option) {
|
861 |
|
|
case 1: // set codesize
|
862 |
|
|
if (val.value.u == 0) code_size = cmd.codeSizeOption;
|
863 |
|
|
else code_size = val.value.u;
|
864 |
|
|
code.value.u = code_size;
|
865 |
|
|
break;
|
866 |
|
|
case 2: // set datasize
|
867 |
|
|
if (val.value.u == 0) data_size = cmd.dataSizeOption;
|
868 |
|
|
else data_size = val.value.u;
|
869 |
|
|
code.value.u = data_size;
|
870 |
|
|
break;
|
871 |
|
|
}
|
872 |
|
|
// This is called only in pass 3. Save this option for pass 4:
|
873 |
|
|
code.instruction = II_OPTIONS;
|
874 |
|
|
code.section = section;
|
875 |
|
|
code.fitNum = option;
|
876 |
|
|
code.sizeUnknown = 1;
|
877 |
|
|
codeBuffer.push(code);
|
878 |
|
|
state = 3;
|
879 |
|
|
break;
|
880 |
|
|
|
881 |
|
|
case 3: // expect comma or nothing
|
882 |
|
|
if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') {
|
883 |
|
|
state = 0; // start over after comma
|
884 |
|
|
}
|
885 |
|
|
else {
|
886 |
|
|
errors.report(tokens[tok]); return; // unexpected token
|
887 |
|
|
}
|
888 |
|
|
}
|
889 |
|
|
}
|
890 |
|
|
}
|
891 |
|
|
|
892 |
|
|
|
893 |
|
|
// Find symbol by index into symbolNameBuffer. The return value is an index into symbols.
|
894 |
|
|
// Symbol indexes may change when new symbols are added to the symbols list, which is sorted by name
|
895 |
|
|
uint32_t CAssembler::findSymbol(uint32_t namei) {
|
896 |
|
|
ElfFWC_Sym2 sym; // temporary symbol record used for searching
|
897 |
|
|
sym.st_name = namei;
|
898 |
|
|
return symbols.findFirst(sym); // find symbol by name
|
899 |
|
|
}
|
900 |
|
|
|
901 |
|
|
// Find symbol by name as string. The return value is an index into symbols.
|
902 |
|
|
// Symbol indexes may change when new symbols are added to the symbols list, which is sorted by name
|
903 |
|
|
uint32_t CAssembler::findSymbol(const char * name, uint32_t len) {
|
904 |
|
|
uint32_t saveSize = symbolNameBuffer.dataSize(); // save symbolNameBuffer size for later reset
|
905 |
|
|
uint32_t namei = symbolNameBuffer.putStringN(name, len); // put name temporarily into symbolNameBuffer
|
906 |
|
|
int32_t symi = findSymbol(namei); // find symbol by name index
|
907 |
|
|
symbolNameBuffer.setSize(saveSize); // remove temporary name from symbolNameBuffer
|
908 |
|
|
return symi; // return symbol index
|
909 |
|
|
}
|
910 |
|
|
|
911 |
|
|
// Add a symbol to symbols list
|
912 |
|
|
uint32_t CAssembler::addSymbol(ElfFWC_Sym2 & sym) {
|
913 |
|
|
int32_t f = symbols.findFirst(sym);
|
914 |
|
|
if (f >= 0) {
|
915 |
|
|
// error: symbol already defined
|
916 |
|
|
return 0;
|
917 |
|
|
}
|
918 |
|
|
else {
|
919 |
|
|
return symbols.addUnique(sym);
|
920 |
|
|
}
|
921 |
|
|
}
|
922 |
|
|
|
923 |
|
|
// interpret name: options {, name: options}
|
924 |
|
|
void CAssembler::interpretExternDirective() {
|
925 |
|
|
uint32_t tok; // token number
|
926 |
|
|
uint32_t nametok = 0; // last name token
|
927 |
|
|
ElfFWC_Sym2 sym; // symbol record
|
928 |
|
|
zeroAllMembers(sym); // reset symbol
|
929 |
|
|
sym.st_bind = STB_GLOBAL;
|
930 |
|
|
|
931 |
|
|
// Example: extern name1: int32 weak, name2: function, name3, name4: read
|
932 |
|
|
uint32_t state = 0; // 0: after extern or comma,
|
933 |
|
|
// 1: after name,
|
934 |
|
|
// 2: after colon
|
935 |
|
|
|
936 |
|
|
// loop through tokens on this line
|
937 |
|
|
for (tok = tokenB + 1; tok < tokenB + tokenN; tok++) {
|
938 |
|
|
switch (state) {
|
939 |
|
|
case 0: // after extern or comma. expecting name
|
940 |
|
|
if (tokens[tok].type == TOK_NAM) {
|
941 |
|
|
// name encountered
|
942 |
|
|
sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength);
|
943 |
|
|
state = 1; nametok = tok;
|
944 |
|
|
}
|
945 |
|
|
else errors.report(tokens[tok]);
|
946 |
|
|
break;
|
947 |
|
|
case 1: // after name. expecting colon or comma
|
948 |
|
|
if (tokens[tok].type == TOK_OPR) {
|
949 |
|
|
if (tokens[tok].id == ':') {
|
950 |
|
|
state = 2;
|
951 |
|
|
continue;
|
952 |
|
|
}
|
953 |
|
|
else if (tokens[tok].id == ',') {
|
954 |
|
|
goto COMMA;
|
955 |
|
|
}
|
956 |
|
|
}
|
957 |
|
|
errors.report(tokens[tok]);
|
958 |
|
|
break;
|
959 |
|
|
case 2: // after colon. expecting attribute or comma or end of line
|
960 |
|
|
if (tokens[tok].type == TOK_TYP) {
|
961 |
|
|
// symbol size given by type token
|
962 |
|
|
uint32_t s = tokens[tok].id & 0xF;
|
963 |
|
|
if (s > 4) s -= 3; // float types
|
964 |
|
|
sym.st_unitsize = uint32_t(1 << s);
|
965 |
|
|
sym.st_unitnum = 1;
|
966 |
|
|
}
|
967 |
|
|
else if (tokens[tok].type == TOK_ATT || tokens[tok].type == TOK_DIR) {
|
968 |
|
|
ATTRIBUTE:
|
969 |
|
|
switch (tokens[tok].id) {
|
970 |
|
|
case DIR_FUNCTION: case ATT_EXEC: // function or execute
|
971 |
|
|
if (sym.st_type) {
|
972 |
|
|
errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_CONFLICT_TYPE);
|
973 |
|
|
}
|
974 |
|
|
sym.st_type = STT_FUNC;
|
975 |
|
|
sym.st_other = STV_IP | STV_EXEC;
|
976 |
|
|
break;
|
977 |
|
|
case ATT_READ: // read
|
978 |
|
|
if (sym.st_type == 0) sym.st_other |= STV_READ;
|
979 |
|
|
break;
|
980 |
|
|
case ATT_WRITE: // write
|
981 |
|
|
if (sym.st_type == STT_FUNC) {
|
982 |
|
|
errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_CONFLICT_TYPE);
|
983 |
|
|
}
|
984 |
|
|
else {
|
985 |
|
|
sym.st_type = STT_OBJECT;
|
986 |
|
|
}
|
987 |
|
|
break;
|
988 |
|
|
case ATT_WEAK: // weak
|
989 |
|
|
sym.st_bind = STB_WEAK;
|
990 |
|
|
break;
|
991 |
|
|
case ATT_CONSTANT: // constant
|
992 |
|
|
sym.st_type = STT_CONSTANT;
|
993 |
|
|
break;
|
994 |
|
|
case ATT_REGUSE:
|
995 |
|
|
if (tokens[tok+1].id == '=' && (tokens[tok+2].type == TOK_NUM /*|| tokens[tok+2].type == TOK_OPR)*/)) {
|
996 |
|
|
tok += 2;
|
997 |
|
|
sym.st_reguse1 = expression(tok, 1, 0).value.w;
|
998 |
|
|
sym.st_other |= STV_REGUSE;
|
999 |
|
|
if (tokens[tok+1].id == ',' && tokens[tok+2].type == TOK_NUM) {
|
1000 |
|
|
tok += 2;
|
1001 |
|
|
sym.st_reguse2 = expression(tok, 1, 0).value.w;
|
1002 |
|
|
}
|
1003 |
|
|
}
|
1004 |
|
|
break;
|
1005 |
|
|
default: // error
|
1006 |
|
|
errors.report(tokens[tok]);
|
1007 |
|
|
}
|
1008 |
|
|
}
|
1009 |
|
|
else if (tokens[tok].type == TOK_REG) {
|
1010 |
|
|
switch (tokens[tok].id) {
|
1011 |
|
|
case REG_IP:
|
1012 |
|
|
sym.st_other |= STV_IP; break;
|
1013 |
|
|
case REG_DATAP:
|
1014 |
|
|
sym.st_other |= STV_DATAP; break;
|
1015 |
|
|
case REG_THREADP:
|
1016 |
|
|
sym.st_other |= STV_THREADP; break;
|
1017 |
|
|
default: errors.report(tokens[tok]);
|
1018 |
|
|
}
|
1019 |
|
|
}
|
1020 |
|
|
else if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') {
|
1021 |
|
|
// end of definition. save symbol
|
1022 |
|
|
COMMA:
|
1023 |
|
|
if (tok < tokenB + tokenN
|
1024 |
|
|
&& (tokens[tok + 1].type == TOK_ATT || tokens[tok + 1].type == TOK_DIR)) {
|
1025 |
|
|
tok++; goto ATTRIBUTE;
|
1026 |
|
|
}
|
1027 |
|
|
uint32_t symi = addSymbol(sym); // save symbol with function name
|
1028 |
|
|
if (symi == 0) { // symbol already defined
|
1029 |
|
|
errors.report(tokens[nametok].pos, tokens[nametok].stringLength, ERR_SYMBOL_DEFINED);
|
1030 |
|
|
}
|
1031 |
|
|
sym.st_name = 0; // clear record for next symbol
|
1032 |
|
|
sym.st_type = 0;
|
1033 |
|
|
sym.st_other = 0;
|
1034 |
|
|
sym.st_unitsize = 0;
|
1035 |
|
|
sym.st_unitnum = 0;
|
1036 |
|
|
sym.st_bind = STB_GLOBAL;
|
1037 |
|
|
state = 0;
|
1038 |
|
|
}
|
1039 |
|
|
else {
|
1040 |
|
|
errors.report(tokens[tok]);
|
1041 |
|
|
}
|
1042 |
|
|
break;
|
1043 |
|
|
}
|
1044 |
|
|
}
|
1045 |
|
|
if (state) { // last extern definition does not end with comma. finish it here
|
1046 |
|
|
goto COMMA;
|
1047 |
|
|
}
|
1048 |
|
|
lines[linei].type = LINE_DATADEF; // line is data definition
|
1049 |
|
|
}
|
1050 |
|
|
|
1051 |
|
|
|
1052 |
|
|
void CAssembler::interpretLabel(uint32_t tok) {
|
1053 |
|
|
// line begins with a name. interpret label
|
1054 |
|
|
// to do: add type if data. not string type
|
1055 |
|
|
ElfFWC_Sym2 sym; // symbol record
|
1056 |
|
|
zeroAllMembers(sym); // reset symbol
|
1057 |
|
|
|
1058 |
|
|
// save name
|
1059 |
|
|
sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength);
|
1060 |
|
|
sym.st_section = section;
|
1061 |
|
|
// determine if code or data from section type
|
1062 |
|
|
if (sectionFlags & SHF_EXEC) {
|
1063 |
|
|
sym.st_type = STT_FUNC;
|
1064 |
|
|
sym.st_other = STV_EXEC | STV_IP;
|
1065 |
|
|
}
|
1066 |
|
|
else {
|
1067 |
|
|
sym.st_type = STT_OBJECT;
|
1068 |
|
|
sym.st_other = sectionFlags & STV_SECT_ATTR;
|
1069 |
|
|
}
|
1070 |
|
|
|
1071 |
|
|
// look for more exact type information
|
1072 |
|
|
if (tokenN > 2) {
|
1073 |
|
|
uint32_t t = tok+2;
|
1074 |
|
|
if (tokens[t].type == TOK_TYP) {
|
1075 |
|
|
uint32_t s = tokens[t].id & 0xF;
|
1076 |
|
|
if (s > 4) s -= 3;
|
1077 |
|
|
sym.st_unitsize = uint32_t(1 << s);
|
1078 |
|
|
sym.st_unitnum = 1;
|
1079 |
|
|
if (tokenN > 3) t++;
|
1080 |
|
|
}
|
1081 |
|
|
if (tokens[t].type == TOK_NUM || tokens[t].type == TOK_FLT) {
|
1082 |
|
|
sym.st_type = STT_OBJECT;
|
1083 |
|
|
lines[linei].type = LINE_DATADEF;
|
1084 |
|
|
}
|
1085 |
|
|
else if (tokens[t].type == TOK_REG || tokens[t].type == TOK_INS || tokens[t].id == '[') {
|
1086 |
|
|
lines[linei].type = LINE_CODEDEF;
|
1087 |
|
|
sym.st_type = STT_FUNC;
|
1088 |
|
|
}
|
1089 |
|
|
}
|
1090 |
|
|
if (section) { // copy type info from section
|
1091 |
|
|
sym.st_other = sectionHeaders[section].sh_flags & STV_SECT_ATTR;
|
1092 |
|
|
}
|
1093 |
|
|
|
1094 |
|
|
if (lines[linei].type == 0) {
|
1095 |
|
|
lines[linei].type = (sectionFlags & SHF_EXEC) ? LINE_CODEDEF : LINE_DATADEF;
|
1096 |
|
|
}
|
1097 |
|
|
|
1098 |
|
|
uint32_t symi = addSymbol(sym); // add symbol to symbols list
|
1099 |
|
|
|
1100 |
|
|
if (section) {
|
1101 |
|
|
// symbol address
|
1102 |
|
|
symbols[symi].st_value = sectionHeaders[section].sh_size;
|
1103 |
|
|
}
|
1104 |
|
|
tokens[tok].id = symbols[symi].st_name; // save symbol name index
|
1105 |
|
|
if (symi == 0) errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED);
|
1106 |
|
|
}
|
1107 |
|
|
|
1108 |
|
|
|
1109 |
|
|
// interpret assembly style variable definition:
|
1110 |
|
|
// label: type value1, value2
|
1111 |
|
|
void CAssembler::interpretVariableDefinition1() {
|
1112 |
|
|
int state = 0; // 0: start
|
1113 |
|
|
// 1: after label
|
1114 |
|
|
// 2: after :
|
1115 |
|
|
// 3: after type or ,
|
1116 |
|
|
// 4: after value
|
1117 |
|
|
uint32_t tok; // token index
|
1118 |
|
|
uint32_t type = 0; // data type
|
1119 |
|
|
uint32_t dsize = 0; // data size
|
1120 |
|
|
uint32_t dsize1; // log2(dsize)
|
1121 |
|
|
uint32_t dnum = 0; // number of data items
|
1122 |
|
|
uint32_t stringlen = 0; // length of string
|
1123 |
|
|
uint32_t symi = 0; // symbol index
|
1124 |
|
|
ElfFWC_Sym2 sym; // symbol record
|
1125 |
|
|
zeroAllMembers(sym); // reset symbol
|
1126 |
|
|
SExpression exp1; // expression when interpreting numeric expression
|
1127 |
|
|
|
1128 |
|
|
if (section == 0) {
|
1129 |
|
|
errors.reportLine(ERR_DATA_WO_SECTION);
|
1130 |
|
|
}
|
1131 |
|
|
|
1132 |
|
|
// loop through tokens on this line
|
1133 |
|
|
for (tok = tokenB; tok < tokenB + tokenN; tok++) {
|
1134 |
|
|
switch (state) {
|
1135 |
|
|
case 0: // start
|
1136 |
|
|
if (tokens[tok].type == TOK_NAM) { // name. make symbol
|
1137 |
|
|
sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength);
|
1138 |
|
|
sym.st_type = STT_OBJECT;
|
1139 |
|
|
symi = symbols.addUnique(sym);
|
1140 |
|
|
tokens[tok].type = TOK_SYM; // change token type
|
1141 |
|
|
tokens[tok].id = symbols[symi].st_name; // use name offset as unique identifier because symbol index can change
|
1142 |
|
|
state = 1;
|
1143 |
|
|
}
|
1144 |
|
|
else if (tokens[tok].type == TOK_SYM) { // symbol
|
1145 |
|
|
symi = findSymbol(tokens[tok].id);
|
1146 |
|
|
if (symi > 0) {
|
1147 |
|
|
if (pass == 2) errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined
|
1148 |
|
|
}
|
1149 |
|
|
state = 1;
|
1150 |
|
|
}
|
1151 |
|
|
else if (tokens[tok].type == TOK_TYP) {
|
1152 |
|
|
goto TYPE_TOKEN;
|
1153 |
|
|
}
|
1154 |
|
|
else errors.report(tokens[tok]);
|
1155 |
|
|
if (symi && section) {
|
1156 |
|
|
symbols[symi].st_value = sectionHeaders[section].sh_size;
|
1157 |
|
|
}
|
1158 |
|
|
break;
|
1159 |
|
|
case 1: // after label. expect colon
|
1160 |
|
|
if (tokens[tok].type == TOK_OPR && tokens[tok].id == ':') {
|
1161 |
|
|
state = 2;
|
1162 |
|
|
}
|
1163 |
|
|
else errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_EXPECT_COLON);
|
1164 |
|
|
break;
|
1165 |
|
|
case 2: // expect type
|
1166 |
|
|
if (tokens[tok].type == TOK_TYP) {
|
1167 |
|
|
TYPE_TOKEN:
|
1168 |
|
|
type = tokens[tok].id & 0xFF;
|
1169 |
|
|
dsize1 = type & 0xF;
|
1170 |
|
|
if (type & 0x40) dsize1 -= 3;
|
1171 |
|
|
dsize = 1 << dsize1;
|
1172 |
|
|
state = 3;
|
1173 |
|
|
if (section) { // align data
|
1174 |
|
|
uint32_t addr = (uint32_t)sectionHeaders[section].sh_size;
|
1175 |
|
|
if (sectionHeaders[section].sh_align < dsize1) sectionHeaders[section].sh_align = dsize1; // update section alignment
|
1176 |
|
|
if (addr & (dsize - 1)) { // needs to insert zeroes
|
1177 |
|
|
uint32_t addr2 = (addr + dsize - 1) & -(int32_t)dsize;
|
1178 |
|
|
sectionHeaders[section].sh_size = addr2; // update address
|
1179 |
|
|
if (symi) symbols[symi].st_value = addr2; // update symbol address
|
1180 |
|
|
if (pass >= 3) {
|
1181 |
|
|
dataBuffers[section].align((uint32_t)dsize); // put zeroes in data buffer
|
1182 |
|
|
}
|
1183 |
|
|
}
|
1184 |
|
|
}
|
1185 |
|
|
}
|
1186 |
|
|
else errors.report(tokens[tok]);
|
1187 |
|
|
break;
|
1188 |
|
|
case 3: // after type. expect value. evaluate expression
|
1189 |
|
|
exp1 = expression(tok, tokenB + tokenN - tok, pass < 3 ? 0x10 : 0); // pass 3: may contain symbols not defined yet
|
1190 |
|
|
tok += exp1.tokens - 1;
|
1191 |
|
|
if (exp1.etype & XPR_STRING) { // string expression: get size
|
1192 |
|
|
if ((type & 0x1F) != (TYP_INT8 & 0x1F)) errors.reportLine(ERR_STRING_TYPE); // string must use type int8
|
1193 |
|
|
stringlen = exp1.sym2; // string length
|
1194 |
|
|
}
|
1195 |
|
|
else stringlen = 0;
|
1196 |
|
|
if (pass < 3) {
|
1197 |
|
|
if (section) sectionHeaders[section].sh_size += stringlen ? stringlen : dsize; // update address
|
1198 |
|
|
}
|
1199 |
|
|
else {
|
1200 |
|
|
if (section) {
|
1201 |
|
|
// save data of desired type
|
1202 |
|
|
if (exp1.etype & XPR_FLT) {
|
1203 |
|
|
// floating point number specified
|
1204 |
|
|
if ((type & 0xF0) == (TYP_INT8 & 0xF0)) { // float specified, integer expected
|
1205 |
|
|
exp1.value.i = int64_t(exp1.value.d);
|
1206 |
|
|
errors.reportLine(ERR_CONFLICT_TYPE);
|
1207 |
|
|
}
|
1208 |
|
|
}
|
1209 |
|
|
else if (exp1.etype & XPR_INT) {
|
1210 |
|
|
if (type & TYP_FLOAT) { // integer specified, float expected
|
1211 |
|
|
exp1.value.d = double(exp1.value.i); // convert to float
|
1212 |
|
|
}
|
1213 |
|
|
}
|
1214 |
|
|
int64_t value = exp1.value.i; //value of expression
|
1215 |
|
|
if (exp1.sym3) {
|
1216 |
|
|
// calculation of symbol value. add relocation if needed
|
1217 |
|
|
uint32_t size = type & 0xF;
|
1218 |
|
|
if (type & 0x40) size -= 3;
|
1219 |
|
|
size = 1 << size;
|
1220 |
|
|
//value = calculateConstantOperand(exp1, dataBuffers[section].dataSize(), size);
|
1221 |
|
|
value = calculateConstantOperand(exp1, sectionHeaders[section].sh_size, dsize);
|
1222 |
|
|
if (exp1.etype & XPR_ERROR) {
|
1223 |
|
|
errors.reportLine((uint32_t)value); // report error
|
1224 |
|
|
break;
|
1225 |
|
|
}
|
1226 |
|
|
// check for overflow
|
1227 |
|
|
bool overflow = false;
|
1228 |
|
|
switch (type & 0xFF) {
|
1229 |
|
|
case TYP_INT8 & 0xFF:
|
1230 |
|
|
overflow = value > 0x7F || value < -0x80;
|
1231 |
|
|
break;
|
1232 |
|
|
case TYP_INT16 & 0xFF:
|
1233 |
|
|
overflow = value > 0x7FFF || value < -0x8000;
|
1234 |
|
|
break;
|
1235 |
|
|
case TYP_INT32 & 0xFF:
|
1236 |
|
|
overflow = value > 0x7FFFFFFF || value < int32_t(0x80000000);
|
1237 |
|
|
break;
|
1238 |
|
|
default:;
|
1239 |
|
|
}
|
1240 |
|
|
if (overflow) errors.reportLine(ERR_OVERFLOW); // (symbol1 - symbol2) overflows
|
1241 |
|
|
}
|
1242 |
|
|
if (sectionHeaders[section].sh_type == SHT_NOBITS) {
|
1243 |
|
|
// uninitialized (BSS) section. check that value is zero, but don't store
|
1244 |
|
|
if (value != 0) errors.reportLine(ERR_NONZERO_IN_BSS); // not zero
|
1245 |
|
|
}
|
1246 |
|
|
else {
|
1247 |
|
|
// save data
|
1248 |
|
|
switch (type & 0xFF) {
|
1249 |
|
|
case TYP_INT8 & 0xFF:
|
1250 |
|
|
if (stringlen) {
|
1251 |
|
|
dataBuffers[section].push(stringBuffer.buf() + exp1.value.w, stringlen);
|
1252 |
|
|
break;
|
1253 |
|
|
}
|
1254 |
|
|
dataBuffers[section].push(&value, 1); break;
|
1255 |
|
|
case TYP_INT16 & 0xFF:
|
1256 |
|
|
dataBuffers[section].push(&value, 2); break;
|
1257 |
|
|
case TYP_INT32 & 0xFF:
|
1258 |
|
|
dataBuffers[section].push(&value, 4); break;
|
1259 |
|
|
case TYP_INT64 & 0xFF:
|
1260 |
|
|
dataBuffers[section].push(&value, 8); break;
|
1261 |
|
|
case TYP_INT128 & 0xFF:
|
1262 |
|
|
dataBuffers[section].push(&value, 8);
|
1263 |
|
|
value = value >> 63; // sign extend
|
1264 |
|
|
dataBuffers[section].push(&value, 8);
|
1265 |
|
|
break;
|
1266 |
|
|
case TYP_FLOAT16 & 0xFF: // half precision
|
1267 |
|
|
exp1.value.w = double2half(exp1.value.d);
|
1268 |
|
|
dataBuffers[section].push(&exp1.value.w, 2); break;
|
1269 |
|
|
case TYP_FLOAT32 & 0xFF: { // single precision
|
1270 |
|
|
float val = float(exp1.value.d);
|
1271 |
|
|
dataBuffers[section].push(&val, 4); }
|
1272 |
|
|
break;
|
1273 |
|
|
case TYP_FLOAT64 & 0xFF: // double precision
|
1274 |
|
|
dataBuffers[section].push(&exp1.value.d, 8); break;
|
1275 |
|
|
}
|
1276 |
|
|
}
|
1277 |
|
|
sectionHeaders[section].sh_size += stringlen ? stringlen : dsize; // update address
|
1278 |
|
|
}
|
1279 |
|
|
}
|
1280 |
|
|
if (!(exp1.etype & (XPR_IMMEDIATE | XPR_STRING | XPR_SYM1 | XPR_UNRESOLV)) || (exp1.etype & (XPR_REG|XPR_OPTION|XPR_MEM|XPR_ERROR))) errors.report(tokens[tok]);
|
1281 |
|
|
|
1282 |
|
|
if (stringlen) dnum += stringlen; else dnum += 1;
|
1283 |
|
|
state = 4;
|
1284 |
|
|
break;
|
1285 |
|
|
case 4: // after value. expect comma or end of line
|
1286 |
|
|
if (tokens[tok].type == TOK_OPR && tokens[tok].id == ',') {
|
1287 |
|
|
state = 3;
|
1288 |
|
|
}
|
1289 |
|
|
else errors.report(tokens[tok]);
|
1290 |
|
|
break;
|
1291 |
|
|
}
|
1292 |
|
|
if (lineError) return;
|
1293 |
|
|
}
|
1294 |
|
|
if (state != 4 && state != 2) errors.report(tokens[tok-1]);
|
1295 |
|
|
if (symi) { // save size
|
1296 |
|
|
symbols[symi].st_unitsize = dsize;
|
1297 |
|
|
symbols[symi].st_unitnum = dnum;
|
1298 |
|
|
symbols[symi].st_section = section;
|
1299 |
|
|
if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) symbols[symi].st_other |= STV_FLOAT;
|
1300 |
|
|
if (section) { // copy information from section
|
1301 |
|
|
symbols[symi].st_other |= sectionHeaders[section].sh_flags & STV_SECT_ATTR;
|
1302 |
|
|
}
|
1303 |
|
|
}
|
1304 |
|
|
}
|
1305 |
|
|
|
1306 |
|
|
// interpret C style variable definition:
|
1307 |
|
|
// type name1 = value1, name2[num] = {value, value, ..}
|
1308 |
|
|
void CAssembler::interpretVariableDefinition2() {
|
1309 |
|
|
int state = 0; // 0: start
|
1310 |
|
|
// 1: after type or comma
|
1311 |
|
|
// 2: after name
|
1312 |
|
|
// 3: after [
|
1313 |
|
|
// 4: after [number
|
1314 |
|
|
// 5: after =
|
1315 |
|
|
// 6: after = number
|
1316 |
|
|
// 7: after {
|
1317 |
|
|
// 8: after {number
|
1318 |
|
|
|
1319 |
|
|
uint32_t tok; // token index
|
1320 |
|
|
uint32_t dsize = 0; // data element size
|
1321 |
|
|
uint32_t dsize1 = 0; // data element size = 1 << dsize1
|
1322 |
|
|
uint32_t type = 0; // data type
|
1323 |
|
|
uint32_t arrayNum1 = 1; // number of elements indicated in []
|
1324 |
|
|
uint32_t arrayNum2 = 0; // number of elements in {} list
|
1325 |
|
|
uint32_t stringlen = 0; // length of string
|
1326 |
|
|
uint32_t symi = 0; // symbol index
|
1327 |
|
|
ElfFWC_Sym2 sym; // symbol record
|
1328 |
|
|
zeroAllMembers(sym); // reset symbol
|
1329 |
|
|
SExpression exp1; // expression when interpreting numeric expression
|
1330 |
|
|
|
1331 |
|
|
if (section == 0) {
|
1332 |
|
|
errors.reportLine(ERR_DATA_WO_SECTION);
|
1333 |
|
|
}
|
1334 |
|
|
|
1335 |
|
|
// loop through tokens on this line
|
1336 |
|
|
for (tok = tokenB; tok < tokenB + tokenN; tok++) {
|
1337 |
|
|
switch (state) {
|
1338 |
|
|
case 0: // this is a type token
|
1339 |
|
|
type = tokens[tok].id & 0xFF;
|
1340 |
|
|
dsize1 = tokens[tok].id & 0xF;
|
1341 |
|
|
if ((type & 0x40) > 3) dsize1 -= 3;
|
1342 |
|
|
dsize = 1 << dsize1;
|
1343 |
|
|
state = 1;
|
1344 |
|
|
if (section) { // align data
|
1345 |
|
|
uint32_t addr = (uint32_t)sectionHeaders[section].sh_size;
|
1346 |
|
|
if (addr & (dsize - 1)) { // needs to insert zeroes
|
1347 |
|
|
uint32_t addr2 = (addr + dsize - 1) & -(int32_t)dsize; // calculate aligned address
|
1348 |
|
|
sectionHeaders[section].sh_size = addr2; // update address
|
1349 |
|
|
if (pass >= 3) {
|
1350 |
|
|
dataBuffers[section].align(dsize); // put zeroes in data buffer
|
1351 |
|
|
}
|
1352 |
|
|
}
|
1353 |
|
|
if (sectionHeaders[section].sh_align < dsize1) sectionHeaders[section].sh_align = dsize1; // update section alignment
|
1354 |
|
|
}
|
1355 |
|
|
break;
|
1356 |
|
|
case 1: // expecting name token. save name
|
1357 |
|
|
if (tokens[tok].type == TOK_NAM) { // name. make symbol
|
1358 |
|
|
sym.st_name = symbolNameBuffer.putStringN((char*)buf()+tokens[tok].pos, tokens[tok].stringLength);
|
1359 |
|
|
symi = addSymbol(sym);
|
1360 |
|
|
if (symi == 0 && pass == 2) {
|
1361 |
|
|
errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED); break;
|
1362 |
|
|
}
|
1363 |
|
|
symbols[symi].st_type = (sectionFlags & SHF_EXEC) ? STT_FUNC : STT_OBJECT;
|
1364 |
|
|
tokens[tok].type = TOK_SYM; // change token type
|
1365 |
|
|
tokens[tok].id = symbols[symi].st_name; // use name offset as unique identifier because symbol index can change
|
1366 |
|
|
state = 2;
|
1367 |
|
|
}
|
1368 |
|
|
else if (tokens[tok].type == TOK_SYM) { // symbol
|
1369 |
|
|
symi = findSymbol(tokens[tok].id);
|
1370 |
|
|
if (symi > 0 && pass == 2) errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined
|
1371 |
|
|
state = 2;
|
1372 |
|
|
}
|
1373 |
|
|
else {
|
1374 |
|
|
errors.report(tokens[tok]);
|
1375 |
|
|
}
|
1376 |
|
|
//nametok = tok;
|
1377 |
|
|
symbols[symi].st_unitsize = dsize;
|
1378 |
|
|
symbols[symi].st_unitnum = 0;
|
1379 |
|
|
|
1380 |
|
|
if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) symbols[symi].st_other |= STV_FLOAT;
|
1381 |
|
|
if (section) { // copy information from section
|
1382 |
|
|
symbols[symi].st_value = sectionHeaders[section].sh_size;
|
1383 |
|
|
symbols[symi].st_other |= sectionHeaders[section].sh_flags & STV_SECT_ATTR;
|
1384 |
|
|
}
|
1385 |
|
|
break;
|
1386 |
|
|
case 2: // after name. expect , = [ eol
|
1387 |
|
|
if (tokens[tok].type != TOK_OPR) {
|
1388 |
|
|
errors.report(tokens[tok]); break;
|
1389 |
|
|
}
|
1390 |
|
|
switch (tokens[tok].id) {
|
1391 |
|
|
case ',': // finish this symbol definition
|
1392 |
|
|
COMMA:
|
1393 |
|
|
if (arrayNum2 > arrayNum1) { // check if the two array sizes match
|
1394 |
|
|
if (arrayNum1 > 1) {
|
1395 |
|
|
errors.report(tokens[tok-1].pos, tokens[tok-1].stringLength, ERR_CONFLICT_ARRAYSZ);
|
1396 |
|
|
}
|
1397 |
|
|
else arrayNum1 = arrayNum2;
|
1398 |
|
|
}
|
1399 |
|
|
symbols[symi].st_unitsize = dsize;
|
1400 |
|
|
symbols[symi].st_unitnum = arrayNum1;
|
1401 |
|
|
symbols[symi].st_reguse1 = linei;
|
1402 |
|
|
symbols[symi].st_section = section;
|
1403 |
|
|
|
1404 |
|
|
if (arrayNum1 > arrayNum2 && section) {
|
1405 |
|
|
// unspecified elements are zero. calculate extra size
|
1406 |
|
|
uint32_t asize = (arrayNum1 - arrayNum2) * dsize;
|
1407 |
|
|
sectionHeaders[section].sh_size += asize;
|
1408 |
|
|
if (pass >= 3 && sectionHeaders[section].sh_type != SHT_NOBITS) {
|
1409 |
|
|
// store any unspecified elements as zero
|
1410 |
|
|
uint64_t zero = 0;
|
1411 |
|
|
while (asize > 8) {
|
1412 |
|
|
dataBuffers[section].push(&zero, 8); asize -= 8;
|
1413 |
|
|
}
|
1414 |
|
|
while (asize > 0) {
|
1415 |
|
|
dataBuffers[section].push(&zero, 1); asize -= 1;
|
1416 |
|
|
}
|
1417 |
|
|
}
|
1418 |
|
|
}
|
1419 |
|
|
|
1420 |
|
|
// get ready for next symbol
|
1421 |
|
|
zeroAllMembers(sym);
|
1422 |
|
|
arrayNum1 = 1; arrayNum2 = 0;
|
1423 |
|
|
if (state == 99) return; // finished line
|
1424 |
|
|
state = 1;
|
1425 |
|
|
break;
|
1426 |
|
|
case '=':
|
1427 |
|
|
state = 5;
|
1428 |
|
|
break;
|
1429 |
|
|
case '[':
|
1430 |
|
|
state = 3;
|
1431 |
|
|
break;
|
1432 |
|
|
default:
|
1433 |
|
|
errors.report(tokens[tok]);
|
1434 |
|
|
}
|
1435 |
|
|
break;
|
1436 |
|
|
case 3: // after [ . expect number or ]
|
1437 |
|
|
if (tokens[tok].id == ']') {
|
1438 |
|
|
state = 2; break;
|
1439 |
|
|
}
|
1440 |
|
|
if (arrayNum1 > 1) {
|
1441 |
|
|
errors.report(tokens[tok].pos, tokens[tok].stringLength, ERR_MULTIDIMENSIONAL); break; // error. multidimensional array not supported
|
1442 |
|
|
}
|
1443 |
|
|
// evaluate numeric expression inside [].
|
1444 |
|
|
// it may contain complex expressions that can only be evaluated later, but
|
1445 |
|
|
// this will not generate an error message here
|
1446 |
|
|
exp1 = expression(tok, tokenB + tokenN - tok, 0x10);
|
1447 |
|
|
if (lineError) return;
|
1448 |
|
|
tok += exp1.tokens -1;
|
1449 |
|
|
if (exp1.etype == 0) errors.report(tokens[tok]);
|
1450 |
|
|
if ((exp1.etype & ~XPR_IMMEDIATE) == 0) {
|
1451 |
|
|
arrayNum1 = exp1.value.w;
|
1452 |
|
|
}
|
1453 |
|
|
state = 4;
|
1454 |
|
|
break;
|
1455 |
|
|
case 4: // after [number. expect ]
|
1456 |
|
|
if (tokens[tok].id != ']') {
|
1457 |
|
|
errors.report(tokens[tok]); break;
|
1458 |
|
|
}
|
1459 |
|
|
state = 2;
|
1460 |
|
|
break;
|
1461 |
|
|
case 5: // after =. expect number or {numbers}
|
1462 |
|
|
if (tokens[tok].id == '{') state = 7;
|
1463 |
|
|
else {
|
1464 |
|
|
state = 6;
|
1465 |
|
|
goto SAVE_VALUE; // interpret value and save it
|
1466 |
|
|
}
|
1467 |
|
|
break;
|
1468 |
|
|
case 6: // after = number. expect comma or eol
|
1469 |
|
|
if (tokens[tok].id != ',') {
|
1470 |
|
|
errors.report(tokens[tok]); break;
|
1471 |
|
|
}
|
1472 |
|
|
goto COMMA;
|
1473 |
|
|
case 7: // after {. expect number list
|
1474 |
|
|
state = 8;
|
1475 |
|
|
SAVE_VALUE:
|
1476 |
|
|
arrayNum2++;
|
1477 |
|
|
if (pass < 3) {
|
1478 |
|
|
// may contain symbols not defined yet. just pass expression and count tokens
|
1479 |
|
|
exp1 = expression(tok, tokenB + tokenN - tok, 0x10);
|
1480 |
|
|
tok += exp1.tokens - 1;
|
1481 |
|
|
if (lineError) return;
|
1482 |
|
|
}
|
1483 |
|
|
else {
|
1484 |
|
|
// pass 5. evaluate expression and save value
|
1485 |
|
|
exp1 = expression(tok, tokenB + tokenN - tok, 0);
|
1486 |
|
|
tok += exp1.tokens - 1;
|
1487 |
|
|
if (lineError) return;
|
1488 |
|
|
if ((exp1.etype & XPR_SYM1) && exp1.sym3 && pass > 3) {
|
1489 |
|
|
// calculation of symbol value. add relocation if needed
|
1490 |
|
|
exp1.value.i = calculateConstantOperand(exp1, sectionHeaders[section].sh_size, dsize);
|
1491 |
|
|
if (exp1.etype & XPR_ERROR) {
|
1492 |
|
|
errors.reportLine((uint32_t)(exp1.value.i)); // report error
|
1493 |
|
|
break;
|
1494 |
|
|
}
|
1495 |
|
|
// check for overflow
|
1496 |
|
|
bool overflow = false;
|
1497 |
|
|
switch (type & 0xFF) {
|
1498 |
|
|
case TYP_INT8 & 0xFF:
|
1499 |
|
|
overflow = exp1.value.i > 0x7F || exp1.value.i < -0x80;
|
1500 |
|
|
break;
|
1501 |
|
|
case TYP_INT16 & 0xFF:
|
1502 |
|
|
overflow = exp1.value.i > 0x7FFF || exp1.value.i < -0x8000;
|
1503 |
|
|
break;
|
1504 |
|
|
case TYP_INT32 & 0xFF:
|
1505 |
|
|
overflow = exp1.value.i > 0x7FFFFFFF || exp1.value.i < int32_t(0x80000000);
|
1506 |
|
|
break;
|
1507 |
|
|
default:;
|
1508 |
|
|
}
|
1509 |
|
|
if (overflow) errors.reportLine(ERR_OVERFLOW); // (symbol1 - symbol2) overflows
|
1510 |
|
|
}
|
1511 |
|
|
}
|
1512 |
|
|
if (!(exp1.etype & (XPR_IMMEDIATE | XPR_STRING | XPR_UNRESOLV | XPR_SYM1)) || (exp1.etype & (XPR_REG|XPR_OPTION|XPR_MEM|XPR_ERROR))) {
|
1513 |
|
|
errors.report(tokens[tok]);
|
1514 |
|
|
}
|
1515 |
|
|
if (section && section < dataBuffers.numEntries() && pass >= 3) {
|
1516 |
|
|
// save data of desired type
|
1517 |
|
|
if ((exp1.etype & XPR_IMMEDIATE) == XPR_FLT) {
|
1518 |
|
|
// floating point number specified
|
1519 |
|
|
if ((type & 0xF0) == (TYP_INT8 & 0xF0)) { // float specified, integer expected
|
1520 |
|
|
exp1.value.i = int64_t(exp1.value.d);
|
1521 |
|
|
errors.reportLine(ERR_CONFLICT_TYPE);
|
1522 |
|
|
}
|
1523 |
|
|
}
|
1524 |
|
|
else if ((exp1.etype & XPR_IMMEDIATE) == XPR_INT) {
|
1525 |
|
|
if ((type & 0xF0) == (TYP_FLOAT32 & 0xF0)) { // integer specified, float expected
|
1526 |
|
|
exp1.value.d = double(exp1.value.i); // convert to float
|
1527 |
|
|
}
|
1528 |
|
|
}
|
1529 |
|
|
else if (exp1.etype & XPR_STRING) { // string expression: get size
|
1530 |
|
|
if ((type & 0x1F) != (TYP_INT8 & 0x1F)) errors.reportLine(ERR_STRING_TYPE); // string must use type int8
|
1531 |
|
|
stringlen = exp1.sym2; // string length
|
1532 |
|
|
}
|
1533 |
|
|
else stringlen = 0;
|
1534 |
|
|
|
1535 |
|
|
if (sectionHeaders[section].sh_type == SHT_NOBITS) {
|
1536 |
|
|
// uninitialized (BSS) section. check that value is zero, but don't store
|
1537 |
|
|
if (exp1.value.i != 0) errors.reportLine(ERR_NONZERO_IN_BSS); // not zero
|
1538 |
|
|
}
|
1539 |
|
|
else {
|
1540 |
|
|
// save data
|
1541 |
|
|
switch (type & 0xFF) {
|
1542 |
|
|
case TYP_INT8 & 0xFF:
|
1543 |
|
|
if (stringlen) {
|
1544 |
|
|
dataBuffers[section].push(stringBuffer.buf() + exp1.value.w, stringlen);
|
1545 |
|
|
break;
|
1546 |
|
|
}
|
1547 |
|
|
dataBuffers[section].push(&exp1.value.u, 1); break;
|
1548 |
|
|
case TYP_INT16 & 0xFF:
|
1549 |
|
|
dataBuffers[section].push(&exp1.value.u, 2); break;
|
1550 |
|
|
case TYP_INT32 & 0xFF:
|
1551 |
|
|
dataBuffers[section].push(&exp1.value.u, 4); break;
|
1552 |
|
|
case TYP_INT64 & 0xFF:
|
1553 |
|
|
dataBuffers[section].push(&exp1.value.u, 8); break;
|
1554 |
|
|
case TYP_INT128 & 0xFF:
|
1555 |
|
|
dataBuffers[section].push(&exp1.value.u, 8);
|
1556 |
|
|
exp1.value.i = exp1.value.i >> 63; // sign extend
|
1557 |
|
|
dataBuffers[section].push(&exp1.value.u, 8);
|
1558 |
|
|
break;
|
1559 |
|
|
case TYP_FLOAT16 & 0xFF: // half precision
|
1560 |
|
|
exp1.value.w = double2half(exp1.value.d);
|
1561 |
|
|
dataBuffers[section].push(&exp1.value.w, 2); break;
|
1562 |
|
|
case TYP_FLOAT32 & 0xFF: { // single precision
|
1563 |
|
|
float val = float(exp1.value.d);
|
1564 |
|
|
dataBuffers[section].push(&val, 4); }
|
1565 |
|
|
break;
|
1566 |
|
|
case TYP_FLOAT64 & 0xFF: // double precision
|
1567 |
|
|
dataBuffers[section].push(&exp1.value.d, 8); break;
|
1568 |
|
|
}
|
1569 |
|
|
}
|
1570 |
|
|
}
|
1571 |
|
|
sectionHeaders[section].sh_size += stringlen ? stringlen : dsize; // update address
|
1572 |
|
|
break;
|
1573 |
|
|
case 8: // after {number. expect comma or }
|
1574 |
|
|
if (tokens[tok].id == ',') state = 7;
|
1575 |
|
|
else if (tokens[tok].id == '}') state = 6;
|
1576 |
|
|
else {
|
1577 |
|
|
errors.report(tokens[tok]); break;
|
1578 |
|
|
}
|
1579 |
|
|
}
|
1580 |
|
|
if (tok + 1 == tokenB + tokenN && (state == 5 || state >= 7) && linei + 1 < lines.numEntries()) {
|
1581 |
|
|
// no more tokens. statement with {} can span multiple lines
|
1582 |
|
|
if (state == 5) {
|
1583 |
|
|
// after '='. expect next line to be '{'
|
1584 |
|
|
uint32_t tokNext = lines[linei+1].firstToken;
|
1585 |
|
|
if (tokens[tokNext].type != TOK_OPR || tokens[tokNext].id != '{') break; // anything else: break out of loop and get error message
|
1586 |
|
|
}
|
1587 |
|
|
// append next line
|
1588 |
|
|
lines[linei].type = LINE_DATADEF;
|
1589 |
|
|
linei++;
|
1590 |
|
|
tokenN += lines[linei].numTokens;
|
1591 |
|
|
}
|
1592 |
|
|
|
1593 |
|
|
}
|
1594 |
|
|
// no more tokens
|
1595 |
|
|
if (state == 2 || state == 6) {
|
1596 |
|
|
// finish this definition
|
1597 |
|
|
lines[linei].type = LINE_DATADEF;
|
1598 |
|
|
state = 99; goto COMMA;
|
1599 |
|
|
}
|
1600 |
|
|
errors.report(tokens[tok-1].pos, tokens[tok-1].stringLength, ERR_UNFINISHED_VAR);
|
1601 |
|
|
}
|
1602 |
|
|
|
1603 |
|
|
// check if line is code or data
|
1604 |
|
|
void CAssembler::determineLineType() {
|
1605 |
|
|
uint32_t tok; // current token
|
1606 |
|
|
uint32_t elements = 0; // detect type and constant tokens
|
1607 |
|
|
|
1608 |
|
|
if (tokens[tokenB].type == TOK_OPT) {
|
1609 |
|
|
lines[linei].type = LINE_OPTIONS; return;
|
1610 |
|
|
}
|
1611 |
|
|
// loop through tokens on this line
|
1612 |
|
|
for (tok = tokenB; tok < tokenB + tokenN; tok++) {
|
1613 |
|
|
if (tokens[tok].type == TOK_REG || tokens[tok].type == TOK_INS || tokens[tok].type == TOK_XPR || tokens[tok].type == TOK_HLL) {
|
1614 |
|
|
lines[linei].type = LINE_CODEDEF; return; // register or instruction found. must be code
|
1615 |
|
|
}
|
1616 |
|
|
if (tokens[tok].type == TOK_TYP) elements |= 1;
|
1617 |
|
|
if (tokens[tok].type == TOK_NUM || tokens[tok].type == TOK_FLT || tokens[tok].type == TOK_CHA || tokens[tok].type == TOK_STR) elements |= 2;
|
1618 |
|
|
}
|
1619 |
|
|
if (elements == 3) lines[linei].type = LINE_DATADEF;
|
1620 |
|
|
else if (tokens[tokenB].type == TOK_ATT && tokens[tokenB].id == ATT_ALIGN) { // align directive
|
1621 |
|
|
lines[linei].type = (sectionFlags & SHF_EXEC) ? LINE_CODEDEF : LINE_DATADEF;
|
1622 |
|
|
}
|
1623 |
|
|
else if (tokens[tokenB].type == TOK_EOF) lines[linei].type = 0; // end of file
|
1624 |
|
|
else if (tokenN == 1 && tokens[tokenB].type == TOK_OPR && linei > 1) {
|
1625 |
|
|
// {} bracket. same type as previous line
|
1626 |
|
|
lines[linei].type = lines[linei-1].type;
|
1627 |
|
|
}
|
1628 |
|
|
else if (tokens[tokenB].type == TOK_OPR && tokens[tokenB].id == '%') {
|
1629 |
|
|
// metaprogramming code
|
1630 |
|
|
lines[linei].type = LINE_METADEF;
|
1631 |
|
|
}
|
1632 |
|
|
else if (linei > 1) {
|
1633 |
|
|
// undetermined. This may occur in for(;;) clause. Use same type as previous line
|
1634 |
|
|
lines[linei].type = lines[linei-1].type;
|
1635 |
|
|
}
|
1636 |
|
|
else {
|
1637 |
|
|
// error. cannot determine
|
1638 |
|
|
errors.report(tokens[tokenB]);
|
1639 |
|
|
lines[linei].type = LINE_ERROR;
|
1640 |
|
|
}
|
1641 |
|
|
}
|
1642 |
|
|
|
1643 |
|
|
// interpret data or code alignment directive
|
1644 |
|
|
void CAssembler::interpretAlign() {
|
1645 |
|
|
if (section) {
|
1646 |
|
|
uint32_t addr = (uint32_t)sectionHeaders[section].sh_size;
|
1647 |
|
|
SExpression exp1 = expression(tokenB+1, tokenN - 1, pass < 3 ? 0x10 : 0);
|
1648 |
|
|
if (exp1.tokens < tokenN - 1) {errors.report(tokens[tokenB+1+exp1.tokens]); return;}
|
1649 |
|
|
if ((exp1.etype & XPR_IMMEDIATE) != XPR_INT || (exp1.etype & (XPR_STRING | XPR_REG | XPR_OP | XPR_MEM | XPR_OPTION))) {
|
1650 |
|
|
errors.report(tokens[tokenB+1]); return;
|
1651 |
|
|
}
|
1652 |
|
|
uint64_t alignm = exp1.value.u;
|
1653 |
|
|
if ((alignm & (alignm - 1)) || alignm > MAX_ALIGN) {errors.reportLine(ERR_ALIGNMENT); return;}
|
1654 |
|
|
uint32_t log2ali = bitScanReverse(alignm);
|
1655 |
|
|
if (sectionHeaders[section].sh_align < log2ali) {
|
1656 |
|
|
sectionHeaders[section].sh_align = log2ali; // make sure section alignment is not less
|
1657 |
|
|
}
|
1658 |
|
|
if (addr & ((uint32_t)alignm - 1)) { // needs to insert zeroes
|
1659 |
|
|
uint32_t addr2 = (addr + (uint32_t)alignm - 1) & -(int32_t)alignm;
|
1660 |
|
|
sectionHeaders[section].sh_size = addr2; // update address
|
1661 |
|
|
if (pass >= 3) {
|
1662 |
|
|
dataBuffers[section].align((uint32_t)alignm); // put zeroes in data buffer
|
1663 |
|
|
}
|
1664 |
|
|
}
|
1665 |
|
|
}
|
1666 |
|
|
}
|
1667 |
|
|
|
1668 |
|
|
// Pass 3 does three things.
|
1669 |
|
|
// A. Handle metaprogramming directives
|
1670 |
|
|
// B. Classify lines
|
1671 |
|
|
// C. Identify symbol names, sections, labels, functions
|
1672 |
|
|
// These must be done in parallel because metaprogramming directives can refer to previously
|
1673 |
|
|
// defined symbols, and data/code definitions can involve metaprogramming variables and macros
|
1674 |
|
|
|
1675 |
|
|
void CAssembler::pass2() {
|
1676 |
|
|
ElfFWC_Sym2 sym; // symbol record
|
1677 |
|
|
zeroAllMembers(sym); // reset symbol
|
1678 |
|
|
symbols.push(sym); // symbol record 0 is empty
|
1679 |
|
|
symbolNameBuffer.put((char)0); // put dummy zero to avoid zero offset at next string
|
1680 |
|
|
sectionFlags = 0;
|
1681 |
|
|
section = 0;
|
1682 |
|
|
|
1683 |
|
|
// lines loop
|
1684 |
|
|
for (linei = 1; linei < lines.numEntries(); linei++) {
|
1685 |
|
|
lineError = 0;
|
1686 |
|
|
tokenB = lines[linei].firstToken; // first token in line
|
1687 |
|
|
tokenN = lines[linei].numTokens; // number of tokens in line
|
1688 |
|
|
if (tokenN == 0) continue;
|
1689 |
|
|
replaceKnownNames(); // replace previously defined names by symbol references
|
1690 |
|
|
// check if line begins with '%'
|
1691 |
|
|
if (tokens[tokenB].type == TOK_OPR && tokens[tokenB].id == '%') {
|
1692 |
|
|
// metaprogramming code
|
1693 |
|
|
lines[linei].type = LINE_METADEF;
|
1694 |
|
|
interpretMetaDefinition();
|
1695 |
|
|
continue;
|
1696 |
|
|
}
|
1697 |
|
|
// classify other lines
|
1698 |
|
|
lines[linei].sectionType = sectionFlags; // line is section directive
|
1699 |
|
|
if (sectionFlags & ATT_EXEC) lines[linei].type = LINE_CODEDEF;
|
1700 |
|
|
else if (sectionFlags & ((ATT_READ | ATT_WRITE))) lines[linei].type = LINE_DATADEF;
|
1701 |
|
|
|
1702 |
|
|
if (tokenN > 1) {
|
1703 |
|
|
// search for section, function and symbol definitions
|
1704 |
|
|
// lines with a single token cannot legally define a symbol name
|
1705 |
|
|
if ((tokens[tokenB].type == TOK_NAM || tokens[tokenB].type == TOK_SYM) && tokens[tokenB+1].type == TOK_DIR) {
|
1706 |
|
|
switch (tokens[tokenB + 1].id) {
|
1707 |
|
|
case DIR_SECTION: // section starts here
|
1708 |
|
|
interpretSectionDirective();
|
1709 |
|
|
break;
|
1710 |
|
|
case DIR_FUNCTION: // function starts here
|
1711 |
|
|
interpretFunctionDirective();
|
1712 |
|
|
break;
|
1713 |
|
|
case DIR_END: // section or function end
|
1714 |
|
|
interpretEndDirective();
|
1715 |
|
|
break;
|
1716 |
|
|
default:
|
1717 |
|
|
errors.report(tokens[tokenB + 1]);
|
1718 |
|
|
}
|
1719 |
|
|
}
|
1720 |
|
|
else if (tokens[tokenB].id == DIR_EXTERN) {
|
1721 |
|
|
// extern symbols
|
1722 |
|
|
interpretExternDirective();
|
1723 |
|
|
}
|
1724 |
|
|
else if (tokens[tokenB].id == DIR_PUBLIC) {
|
1725 |
|
|
// the interpretation of public symbol declarations is postponed to pass 4 after all
|
1726 |
|
|
// symbols have been defined and got their final value
|
1727 |
|
|
lines[linei].type = LINE_PUBLICDEF;
|
1728 |
|
|
}
|
1729 |
|
|
else if (tokens[tokenB].type == TOK_NAM && tokens[tokenB+1].id == ':') {
|
1730 |
|
|
interpretLabel(tokenB);
|
1731 |
|
|
if (lines[linei].type == LINE_DATADEF) interpretVariableDefinition1();
|
1732 |
|
|
}
|
1733 |
|
|
else if (tokens[tokenB].type == TOK_TYP && (tokens[tokenB+1].type == TOK_NAM || tokens[tokenB+1].type == TOK_SYM)) {
|
1734 |
|
|
interpretVariableDefinition2();
|
1735 |
|
|
}
|
1736 |
|
|
else if (tokens[tokenB].type == TOK_ATT && tokens[tokenB].id == ATT_ALIGN) {
|
1737 |
|
|
interpretAlign();
|
1738 |
|
|
}
|
1739 |
|
|
else if (tokens[tokenB].type == TOK_SYM && tokens[tokenB+1].id == ':' && pass == 2) {
|
1740 |
|
|
errors.report(tokens[tokenB].pos, tokens[tokenB].stringLength, ERR_SYMBOL_DEFINED); // symbol already defined
|
1741 |
|
|
}
|
1742 |
|
|
else {
|
1743 |
|
|
determineLineType(); // check if code or data
|
1744 |
|
|
if (lines[linei].type == LINE_DATADEF) interpretVariableDefinition1();
|
1745 |
|
|
}
|
1746 |
|
|
}
|
1747 |
|
|
else {
|
1748 |
|
|
determineLineType(); // check if code or data (can only be code)
|
1749 |
|
|
}
|
1750 |
|
|
}
|
1751 |
|
|
|
1752 |
|
|
// loop through lines again to replace names that are forward references to symbols defined during pass 2
|
1753 |
|
|
for (linei = 1; linei < lines.numEntries(); linei++) {
|
1754 |
|
|
tokenB = lines[linei].firstToken; // first token in line
|
1755 |
|
|
tokenN = lines[linei].numTokens; // number of tokens in line
|
1756 |
|
|
replaceKnownNames(); // replace previously defined names by symbol references
|
1757 |
|
|
}
|
1758 |
|
|
}
|
1759 |
|
|
|
1760 |
|
|
|
1761 |
|
|
// Show all symbols. For debugging only
|
1762 |
|
|
void CAssembler::showSymbols() {
|
1763 |
|
|
uint32_t symi;
|
1764 |
|
|
ElfFWC_Sym2 sym;
|
1765 |
|
|
printf("\n\nSymbol: name, section, addr, type, size, binding");
|
1766 |
|
|
for (symi = 1; symi < symbols.numEntries(); symi++) {
|
1767 |
|
|
sym = symbols[symi];
|
1768 |
|
|
printf("\n%3i: %10s, %7i, %4X", symi, symbolNameBuffer.buf() + sym.st_name,
|
1769 |
|
|
sym.st_section, (uint32_t)sym.st_value);
|
1770 |
|
|
if (sym.st_type == STT_CONSTANT || sym.st_type == STT_VARIABLE) {
|
1771 |
|
|
if (sym.st_other & STV_FLOAT) { // floating point constant
|
1772 |
|
|
union { uint64_t i; double d; } val;
|
1773 |
|
|
val.i = sym.st_value;
|
1774 |
|
|
printf(" = %G", val.d);
|
1775 |
|
|
}
|
1776 |
|
|
else if (sym.st_other & STV_STRING) { // string
|
1777 |
|
|
printf(" = %s", stringBuffer.getString((uint32_t)sym.st_value));
|
1778 |
|
|
}
|
1779 |
|
|
else {
|
1780 |
|
|
// print 64 bit integer constant
|
1781 |
|
|
printf(" = 0x");
|
1782 |
|
|
if (uint64_t(sym.st_value) >> 32) {
|
1783 |
|
|
printf("%X%08X", uint32_t(sym.st_value >> 32), uint32_t(sym.st_value));
|
1784 |
|
|
}
|
1785 |
|
|
else {
|
1786 |
|
|
printf("%X", uint32_t(sym.st_value));
|
1787 |
|
|
}
|
1788 |
|
|
// this method causes warnings:
|
1789 |
|
|
// printf(((sizeof(long int) > 4) ? " = 0x%lx" : " = 0x%llx"), sym.st_value);
|
1790 |
|
|
}
|
1791 |
|
|
}
|
1792 |
|
|
else {
|
1793 |
|
|
printf(" %5X, %X*%X, %7X", // other type
|
1794 |
|
|
sym.st_type, sym.st_unitsize, sym.st_unitnum, sym.st_bind);
|
1795 |
|
|
}
|
1796 |
|
|
}
|
1797 |
|
|
}
|
1798 |
|
|
|
1799 |
|
|
// Show all tokens. For debugging only
|
1800 |
|
|
void CAssembler::showTokens() {
|
1801 |
|
|
SKeyword const tokenNames[] = {
|
1802 |
|
|
{"name", TOK_NAM}, // unidentified name
|
1803 |
|
|
{"direc", TOK_DIR}, // section or function directive
|
1804 |
|
|
{"attrib", TOK_ATT}, // section or function attribute
|
1805 |
|
|
{"label", TOK_LAB}, // code label or function name
|
1806 |
|
|
{"datalb", TOK_VAR}, // data label
|
1807 |
|
|
{"secnm", TOK_SEC}, // section name
|
1808 |
|
|
{"type", TOK_TYP}, // type name
|
1809 |
|
|
{"reg", TOK_REG}, // register name
|
1810 |
|
|
{"instr", TOK_INS}, // instruction name
|
1811 |
|
|
{"oper", TOK_OPR}, // operator
|
1812 |
|
|
{"option", TOK_OPT}, // operator
|
1813 |
|
|
{"num", TOK_NUM}, // number
|
1814 |
|
|
{"float", TOK_FLT}, // floating point number
|
1815 |
|
|
{"char", TOK_CHA}, // character or string in single quotes ' '
|
1816 |
|
|
{"string", TOK_STR}, // string in double quotes " "
|
1817 |
|
|
{"symbol", TOK_SYM}, // symbol
|
1818 |
|
|
{"expression", TOK_XPR}, // expression
|
1819 |
|
|
{"eof", TOK_EOF}, // string in double quotes " "
|
1820 |
|
|
{"hll", TOK_HLL} // string in double quotes " "
|
1821 |
|
|
// {"error", TOK_ERR} // error. illegal character or unmatched quote
|
1822 |
|
|
};
|
1823 |
|
|
|
1824 |
|
|
uint32_t line, tok, i;
|
1825 |
|
|
for (line = 1; line < lines.numEntries(); line++) {
|
1826 |
|
|
if (line < lines.numEntries() && lines[line].numTokens) {
|
1827 |
|
|
printf("\nline %2i type %X", lines[line].linenum, lines[line].type);
|
1828 |
|
|
|
1829 |
|
|
for (tok = lines[line].firstToken; tok < lines[line].firstToken + lines[line].numTokens; tok++) {
|
1830 |
|
|
// find name for token type
|
1831 |
|
|
const char * nm = 0;
|
1832 |
|
|
for (i = 0; i < TableSize(tokenNames); i++) {
|
1833 |
|
|
if (tokenNames[i].id == tokens[tok].type) nm = tokenNames[i].name;
|
1834 |
|
|
}
|
1835 |
|
|
if (nm) printf("\n%4X %8s: ", tok, nm); // Token type
|
1836 |
|
|
else printf("type %4X", tokens[tok].type);
|
1837 |
|
|
|
1838 |
|
|
switch (tokens[tok].type) {
|
1839 |
|
|
case TOK_DIR: case TOK_ATT: case TOK_TYP: case TOK_OPT: case TOK_HLL:
|
1840 |
|
|
nm = 0;
|
1841 |
|
|
for (i = 0; i < TableSize(keywordsList); i++) {
|
1842 |
|
|
if (keywordsList[i].id == tokens[tok].id) nm = keywordsList[i].name;
|
1843 |
|
|
}
|
1844 |
|
|
if (nm) printf("%s", nm);
|
1845 |
|
|
else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength);
|
1846 |
|
|
break;
|
1847 |
|
|
case TOK_OPR:
|
1848 |
|
|
nm = 0;
|
1849 |
|
|
for (i = 0; i < TableSize(operatorsList); i++) {
|
1850 |
|
|
if (operatorsList[i].id == tokens[tok].id) nm = operatorsList[i].name;
|
1851 |
|
|
}
|
1852 |
|
|
if (nm) printf("%s", nm);
|
1853 |
|
|
else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength);
|
1854 |
|
|
break;
|
1855 |
|
|
case TOK_REG: //registerNames
|
1856 |
|
|
nm = 0;
|
1857 |
|
|
for (i = 0; i < TableSize(registerNames); i++) {
|
1858 |
|
|
if (registerNames[i].id == tokens[tok].id) nm = registerNames[i].name;
|
1859 |
|
|
}
|
1860 |
|
|
if (nm) printf("%s%i", nm, tokens[tok].id & 0xFF);
|
1861 |
|
|
else printf("%4X %2i", tokens[tok].pos, tokens[tok].stringLength);
|
1862 |
|
|
break;
|
1863 |
|
|
case TOK_NAM: case TOK_NUM: case TOK_FLT: case TOK_LAB: case TOK_VAR: case TOK_SEC:
|
1864 |
|
|
case TOK_CHA: case TOK_STR: case TOK_INS: case TOK_SYM:
|
1865 |
|
|
for (i = 0; i < tokens[tok].stringLength; i++) {
|
1866 |
|
|
printf("%c", buf()[tokens[tok].pos + i]);
|
1867 |
|
|
}
|
1868 |
|
|
printf(" id %X, value %X", tokens[tok].id, tokens[tok].value.w);
|
1869 |
|
|
break;
|
1870 |
|
|
case TOK_XPR:
|
1871 |
|
|
default:
|
1872 |
|
|
printf("0x%X 0x%X 0x%X %2i", tokens[tok].id, tokens[tok].value.w, tokens[tok].pos, tokens[tok].stringLength);
|
1873 |
|
|
break;
|
1874 |
|
|
}
|
1875 |
|
|
}
|
1876 |
|
|
}
|
1877 |
|
|
}
|
1878 |
|
|
}
|
1879 |
|
|
|
1880 |
|
|
void CAssembler::initializeWordLists() {
|
1881 |
|
|
// Operators list
|
1882 |
|
|
operators.pushBig(operatorsList, sizeof(operatorsList));
|
1883 |
|
|
operators.sort();
|
1884 |
|
|
// Keywords list
|
1885 |
|
|
keywords.pushBig(keywordsList,sizeof(keywordsList));
|
1886 |
|
|
keywords.sort();
|
1887 |
|
|
// Read instruction list from file
|
1888 |
|
|
CCSVFile instructionListFile;
|
1889 |
|
|
instructionListFile.read(cmd.getFilename(cmd.instructionListFile), CMDL_FILE_SEARCH_PATH); // Filename of list of instructions
|
1890 |
|
|
instructionListFile.parse(); // Read and interpret instruction list file
|
1891 |
|
|
instructionlist << instructionListFile.instructionlist; // Transfer instruction list to my own container
|
1892 |
|
|
instructionlistId.copy(instructionlist); // copy instruction list
|
1893 |
|
|
instructionlistNm.copy(instructionlist); // copy instruction list
|
1894 |
|
|
// sort lists by different criteria, defined by the different operators:
|
1895 |
|
|
// operator < (SInstruction const & a, SInstruction const & b)
|
1896 |
|
|
// operator < (SInstruction3 const & a, SInstruction3 const & b)
|
1897 |
|
|
SInstruction3 nullInstruction; // empty record
|
1898 |
|
|
zeroAllMembers(nullInstruction);
|
1899 |
|
|
instructionlistId.push(nullInstruction); // Empty record will go to position 0 to avoid an instruction with index 0
|
1900 |
|
|
instructionlistNm.sort(); // Sort instructionlist by name
|
1901 |
|
|
instructionlistId.sort(); // Sort instructionlistId by id
|
1902 |
|
|
}
|