OpenCores
URL https://opencores.org/ocsvn/ssbcc/ssbcc/trunk

Subversion Repositories ssbcc

[/] [ssbcc/] [trunk/] [core/] [9x8/] [asmDef.py] - Blame information for rev 3

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 sinclairrf
################################################################################
2
#
3
# Copyright 2012, Sinclair R.F., Inc.
4
#
5
# Collection of utilities for the assembler.
6
#
7
################################################################################
8
 
9
import os
10
import re
11
 
12
class AsmException(Exception):
13
  """
14
  Exception class for the assembler.\n
15
  This allows the top-level module to capture error messages other than internal
16
  errors and program bugs so that users see a single-line error relevant to
17
  their code rather than the usual Python mess.
18
  """
19
  def __init__(self,message):
20
    self.msg = message;
21
  def __str__(self):
22
    return self.msg;
23
 
24
class FileBodyIterator:
25
  """
26
  Iterator for files that returns bodies of lines of the file.\n
27
  The directive must be the first non-white spaces on a line.\n
28
  The iterator outputs a list whos first element is the line number for the
29
  first line of the block and whose subsequent elements are the lines with the
30
  content of the block.\n
31
  The iterator handles the ".include" directive.
32
  """
33
 
34
  def __init__(self, fps, ad):
35
    """
36
    Initialize the iterator.\n
37
    fps         list of file pointers from the argument line
38
    ad          asmDef_9x8 object (required to identify the directives)
39
    """
40
    # Do sanity check on arguments.
41
    if ad.IsDirective(".include"):
42
      raise Exception('Program Bug:  The ".include" directive is defined by FileBodyIterator');
43
    # Initialize the raw processing states
44
    self.ixConstants = 0;
45
    self.fpPending = list(fps);
46
    self.ad = ad;
47
    self.current = list();
48
    self.pending = list();
49
    # Initialize the include search paths
50
    self.searchPaths = list();
51
    self.searchPaths.append('.');
52
    # Prepare the file parsing
53
    self.included = list();
54
    for fp in self.fpPending:
55
      if fp.name in self.included:
56
        raise AsmException('Input file %s listed more than once' % fp.name);
57
      self.included.append(fp.name);
58
    self.fpStack = list();
59
    self.fpStack.append(dict(fp=self.fpPending.pop(0), line=0));
60
    self.pendingInclude = None;
61
 
62
  def __iter__(self):
63
    """
64
    Required function for an iterable object.
65
    """
66
    return self;
67
 
68
  def next(self):
69
    """
70
    Return the next directive body from the iterator.\n
71
    The body is a list with the following content:
72
      the name of the file
73
      the line number for the first line of the body
74
      the body consisting of lines from the source file\n
75
    The body contains comment lines preceding the directive, the line with the
76
    directive, and optional lines following the directive up to the optional
77
    comments preceding the next directive.
78
    """
79
    # Discard the body emitted by the previous call.
80
    self.current = self.pending;
81
    self.pending = list();
82
    # If the current body is an include directive, then process it immediately.
83
    if self.current and re.match(r'\s*\.include\b',self.current[-1]):
84
      return self.current;
85
    # Loop until all of the files have been processed
86
    while self.fpStack or self.fpPending or self.pendingInclude:
87
      # Indicate when a new file is started.
88
      if 'started' not in self.fpStack[-1]:
89
        if  not self.current:
90
          self.fpStack[-1]['started'] = True;
91
          self.current.append(self.fpStack[-1]['fp'].name);
92
          self.current.append(0);
93
        return self.current;
94
      # Ensure the bodies in closed files are all emitted before continuing to
95
      # the next/enclosing file.
96
      if 'closed' in self.fpStack[-1]:
97
        # Provide end-of-file indication if there is not a pending body fragment.
98
        if not self.current:
99
          self.current.append(self.fpStack[-1]['fp'].name);
100
          self.current.append(-1);
101
          self.fpStack.pop();
102
        return self.current;
103
      # Handle a queued ".include" directive.
104
      if self.pendingInclude:
105
        # Don't open the include file until all previous content has been emitted.
106
        if self.current:
107
          return self.current;
108
        self.included.append(self.pendingInclude);
109
        fp_pending = None;
110
        for path in self.searchPaths:
111
          fullInclude = os.path.join(path,self.pendingInclude);
112
          if os.path.exists(fullInclude):
113
            fp_pending = open('%s/%s' % (path,self.pendingInclude),'r');
114
            break;
115
        else:
116
          raise AsmException('%s not found' % self.pendingInclude);
117
        self.fpStack.append(dict(fp=fp_pending, line=0));
118
        self.pendingInclude = None;
119
        # Provide start-of-file indication.
120
        self.fpStack[-1]['started'] = True;
121
        self.current.append(fp_pending.name);
122
        self.current.append(0);
123
        return self.current;
124
      # Get the next file to process if fpStack is empty.
125
      if not self.fpStack:
126
        self.fpStack.append(dict(fp=self.fpPending.pop(0), line=0));
127
      # Process/continue processing the top file.
128
      fp = self.fpStack[-1];
129
      for line in fp['fp']:
130
        fp['line'] += 1;
131
        # Handle single-line directives.
132
        if re.match(r'\s*\.(IFDEF|IFNDEF|ELSE|ENDIF|include)\b',line):
133
          if not self.pending:
134
            self.pending.append(fp['fp'].name);
135
            self.pending.append(fp['line']);
136
          self.pending.append(line);
137
          if not self.current:
138
            self.current = self.pending;
139
            self.pending = list();
140
          return self.current;
141
        # Append empty and comment lines to the pending block.
142
        if re.match(r'\s*(;|$)', line):
143
          if not self.pending:
144
            self.pending.append(fp['fp'].name);
145
            self.pending.append(fp['line']);
146
          self.pending.append(line);
147
          continue;
148
        # See if the line starts with a directive.
149
        tokens = re.findall(r'\s*(\S+)',line);
150
        if self.ad.IsDirective(tokens[0]):
151
          if not self.pending:
152
            self.pending.append(fp['fp'].name);
153
            self.pending.append(fp['line']);
154
          self.pending.append(line);
155
          if self.current:
156
            return self.current;
157
          self.current = self.pending;
158
          self.pending = list();
159
          continue;
160
        # Otherwise, this line belongs to the body of the preceding directive.
161
        if not self.current:
162
          self.current += self.pending[0:2];
163
        self.current += self.pending[2:];
164
        self.current.append(line);
165
        self.pending = list();
166
      # Past the last line of the current file -- close it.
167
      self.fpStack[-1]['fp'].close();
168
      self.fpStack[-1]['closed'] = True;
169
      # Prepare to emit pending bodies if any.
170
      if not self.current:
171
        self.current = self.pending;
172
        self.pending = list();
173
    raise StopIteration;
174
 
175
  def AddSearchPath(self,path):
176
    """
177
    Use by the top level assembler to add search paths for opening included
178
    files.
179
    """
180
    self.searchPaths.append(path);
181
 
182
  def Include(self,filename):
183
    self.pendingInclude = filename;
184
 
185
################################################################################
186
#
187
# Parse strings into the desired types.
188
#
189
################################################################################
190
 
191
def ParseNumber(inString):
192
  """
193
  Test for recognized integer values and return the value if recognized,
194
  otherwise return None.
195
  """
196
  # look for single-digit 0
197
  if inString == '0':
198
    return 0;
199
  # look for decimal value
200
  a = re.match(r'[+\-]?[1-9]\d*$',inString);
201
  if a:
202
    return int(a.group(0),10);
203
  # look for an octal value
204
  a = re.match(r'0[0-7]+$',inString);
205
  if a:
206
    return int(a.group(0),8);
207
  # look for a hex value
208
  a = re.match(r'0x[0-9A-Fa-f]+$',inString);
209
  if a:
210
    return int(a.group(0),16);
211
  # Everything else is an error
212
  return None;
213
 
214
def ParseChar(inchar):
215
  """
216
  Parse single characters including escaped characters.  Return the character
217
  value and the number of characters in the input string matched.
218
  """
219
  if re.match(r'\\[0-7]{3}',inchar):
220
    return (int(inchar[1:4],8),4,);
221
  elif re.match(r'\\[0-7]{2}',inchar):
222
    return (int(inchar[1:3],8),3,);
223
  elif re.match(r'\\[0-7]{1}',inchar):
224
    return (int(inchar[1],8),2,);
225
  elif re.match(r'\\[xX][0-9A-Fa-f]{2}',inchar):
226
    return (int(inchar[2:4],16),4,);
227
  elif re.match(r'\\[xX][0-9A-Fa-f]{1}',inchar):
228
    return (int(inchar[2],16),3,);
229
  elif re.match(r'\\.',inchar):
230
    if inchar[1] == 'a':        # bell ==> control-G
231
      return (7,2,);
232
    elif inchar[1] == 'b':      # backspace ==> control-H
233
      return (8,2,);
234
    elif inchar[1] == 'f':      # form feed ==> control-L
235
      return (12,2,);
236
    elif inchar[1] == 'n':      # line feed ==> control-J
237
      return (10,2,);
238
    elif inchar[1] == 'r':      # carriage return ==> control-M
239
      return (13,2,);
240
    elif inchar[1] == 't':      # horizontal tab ==> control-I
241
      return (9,2,);
242
    else:                       # unrecognized escaped character ==> return that character
243
      return (ord(inchar[1]),2,);
244
  else:
245
    return (ord(inchar[0]),1,);
246
 
247
def ParseString(inString):
248
  """
249
  Parse strings recognized by the assembler.\n
250
  A string consists of the following:
251
    an optional count/termination character -- one of CNc
252
    a starting double-quote character
253
    the body of the string including escape sequences
254
    a terminating double-quote character
255
  Errors are indicated by returning the location (an integer) within the string
256
  where the error occurs.
257
  """
258
  # Detect optional count/termination character.
259
  ix = 1 if inString[0] in 'CNc' else 0;
260
  # Ensure the required start double quote is preset.
261
  if inString[ix] != '"' or inString[-1] != '"':
262
    raise Exception('Program Bug -- missing one or more double quotes around string');
263
  ix = ix + 1;
264
  # Convert the characters and escape sequences in the string to a list of their
265
  # integer values.
266
  outString = list();
267
  while ix < len(inString)-1:
268
    (thisChar,thisLen,) = ParseChar(inString[ix:-1]);
269
    outString.append(thisChar);
270
    ix += thisLen;
271
  # Insert the optional character count or append the optional nul terminating
272
  # character.
273
  if inString[0] == 'C':
274
    outString.insert(0,len(outString));
275
  elif inString[0] == 'N':
276
    outString.append(0);
277
  elif inString[0] == 'c':
278
    outString.insert(0,len(outString)-1);
279
  # That's all.
280
  return outString;
281
 
282
def ParseToken(ad,fl_loc,col,raw,allowed):
283
  """
284
  Examine the raw tokens and convert them into dictionary objects consisting of
285
  the following:
286
    type        the type of token
287
    value       the value of the token
288
                this can be the name of a symbol, a numeric value, a string body, ...
289
    loc         start location of the token
290
                this is is required by subsequent stages of the assembler for
291
                error messages
292
    argument    optional entry required for macros arguments
293
    range       optional entry required when a range is provided for a parameter\n
294
  The token type is compared against the allowed tokens.\n
295
  Detect syntax errors and display error messages consisting of the error and
296
  the location within the file where the error occurs.
297
  """
298
  flc_loc = fl_loc + ':' + str(col+1);
299
  # look for instructions
300
  # Note:  Do this before anything else because instructions can be a
301
  #        strange mix of symbols.
302
  if ad.IsInstruction(raw):
303
    if 'instruction' not in allowed:
304
      raise AsmException('instruction "%s" not allowed at %s' % (raw,flc_loc));
305
    return dict(type='instruction', value=raw, loc=flc_loc);
306
  # look for computation
307
  a = re.match(r'\${\S+}$',raw);
308
  if a:
309
    if 'singlevalue' not in allowed:
310
      raise AsmException('Computed value not allowed at %s' % flc_loc);
311
    try:
312
      tParseNumber = eval(raw[2:-1],ad.SymbolDict());
313
    except:
314
      raise AsmException('Malformed computed value at %s: "%s"' % (flc_loc,raw,));
315
    if type(tParseNumber) != int:
316
      raise AsmException('Malformed single-byte value at %s' % flc_loc);
317
    return dict(type='value', value=tParseNumber, loc=flc_loc);
318
  # look for a repeated single-byte numeric value (N*M where M is the repeat count)
319
  matchString=r'(0|[+\-]?[1-9]\d*|0[0-7]+|0x[0-9A-Fa-f]{1,2})\*([1-9]\d*|C_\w+|\$\{\S+\})$';
320
  a = re.match(matchString,raw);
321
  if a:
322
    if 'multivalue' not in allowed:
323
      raise AsmException('Multi-byte value not allowed at %s' % flc_loc);
324
    b = re.findall(matchString,a.group(0));
325
    if not b:
326
      raise Exception('Program Bug -- findall failed after match worked');
327
    b = b[0];
328
    try:
329
      tParseNumber = ParseNumber(b[0]);
330
    except:
331
      raise AsmException('Malformed multi-byte value at %s' % (fl_loc + ':' + str(col+1)));
332
    tValue = list();
333
    fl_loc2 = fl_loc+':'+str(col+1+len(b[0])+1);
334
    if re.match(r'[1-9]',b[1]):
335
      repeatCount = int(b[1]);
336
    elif re.match(r'C_',b[1]):
337
      if not ad.IsConstant(b[1]):
338
        raise AsmException('Unrecognized symbol "%s" at %s' % (b[1],fl_loc2,));
339
      ix = ad.symbols['list'].index(b[1]);
340
      if len(ad.symbols['body'][ix]) != 1:
341
        raise asmDef.AsmException('constant can only be one byte at %s' % fl_loc2);
342
      repeatCount = ad.symbols['body'][ix][0];
343
    elif re.match(r'\$',b[1]):
344
      repeatCount = eval(b[1][2:-1],ad.SymbolDict());
345
    else:
346
      raise Exception('Program Bug -- unrecognized repeat count');
347
    if repeatCount <= 0:
348
      raise AsmException('Repeat count must be positive at %s' % fl_loc2);
349
    for ix in range(repeatCount):
350
      tValue.append(tParseNumber);
351
    return dict(type='value', value=tValue, loc=flc_loc);
352
  # look for a single-byte numeric value
353 3 sinclairrf
  a = re.match(r'(0|[+\-]?[1-9]\d*|0[07]+|0x[0-9A-Fa-f]+)$',raw);
354 2 sinclairrf
  if a:
355
    if 'singlevalue' not in allowed:
356
      raise AsmException('Value not allowed at %s' % flc_loc);
357
    try:
358
      tParseNumber = ParseNumber(raw);
359
    except:
360
      raise AsmException('Malformed single-byte value at %s' % flc_loc);
361
    return dict(type='value', value=tParseNumber, loc=flc_loc);
362
  # capture double-quoted strings
363
  if re.match(r'[CNc]?"',raw):
364
    if 'string' not in allowed:
365
      raise AsmException('String not allowed at %s' % flc_loc);
366
    parsedString = ParseString(raw);
367
    if type(parsedString) == int:
368
      raise AsmException('Malformed string at %s' % (fl_loc + ':' + str(col+parsedString)));
369
    return dict(type='value', value=parsedString, loc=flc_loc);
370
  # capture single-quoted character
371
  if raw[0] == "'":
372
    if 'singlevalue' not in allowed:
373
      raise AsmException('Character not allowed at %s' % flc_loc);
374
    (thisChar,thisLen,) = ParseChar(raw[1:-1]);
375
    if len(raw) != thisLen+2:
376
      raise AsmException('Malformed \'.\' in %s' % flc_loc);
377
    return dict(type='value', value=thisChar, loc=flc_loc);
378
  # look for directives
379
  if ad.IsDirective(raw):
380
    if 'directive' not in allowed:
381
      raise AsmException('Directive not allowed at %s' % flc_loc);
382
    return dict(type='directive', value=raw, loc=flc_loc);
383
  # look for macros
384
  a = re.match(r'\.[A-Za-z]\S*(\(\S+(,\S+|,\${\S+})*\))?$',raw);
385
  if a:
386
    b = re.match(r'\.[^(]+',raw);
387
    if not ad.IsMacro(b.group(0)):
388
      raise AsmException('Unrecognized directive or macro at %s:%d' % (fl_loc,col+1));
389
    if ('macro' not in allowed) and not ('singlemacro' in allowed and ad.IsSingleMacro(b.group(0))):
390
      raise AsmException('Macro "%s" not allowed at %s:%d' % (b.group(0),fl_loc,col+1,));
391
    macroArgs = re.findall(r'([^,]+)',raw[len(b.group(0))+1:-1]);
392
    nArgs = ad.MacroNumberArgs(b.group(0))
393
    if len(macroArgs) not in nArgs:
394
      raise AsmException('Wrong number of arguments to macro "%s" at %s:%d' % (b.group(0),fl_loc,col+1));
395
    while len(macroArgs) < nArgs[-1]:
396
      macroArgs.append(ad.MacroDefault(b.group(0),len(macroArgs)));
397
    outArgs = list();
398
    col = col + len(b.group(0))+1;
399
    for ixArg in range(len(macroArgs)):
400
      outArgs.append(ParseToken(ad,fl_loc,col,macroArgs[ixArg],ad.MacroArgTypes(b.group(0),ixArg)));
401
      col = col + len(macroArgs[ixArg]) + 1;
402
    return dict(type='macro', value=b.group(0), loc=fl_loc + ':' + str(col+1), argument=outArgs);
403
  # look for a label definition
404
  a = re.match(r':[A-Za-z]\w*$',raw);
405
  if a:
406
    if 'label' not in allowed:
407
      raise AsmException('Label not allowed at %s' % flc_loc);
408
    return dict(type='label', value=raw[1:], loc=flc_loc);
409
  # look for parameters with range specification
410
  a = re.match('[LG]_\w+[[]\d+\+?:\d+]$',raw);
411
  if a:
412
    if 'symbol' not in allowed:
413
      raise AsmException('Symbol not allowed at %s' % flc_loc);
414
    a = re.findall('([LG]_\w+)([[].*)',raw)[0];
415
    return dict(type='symbol', value=a[0], loc=flc_loc, range=a[1]);
416
  # look for symbols
417
  # Note:  This should be the last check performed as every other kind of
418
  #        token should be recognizable
419
  a = re.match(r'[A-Za-z]\w*$',raw);
420
  if a:
421
    if 'symbol' not in allowed:
422
      raise AsmException('Symbol not allowed at %s' % flc_loc);
423
    return dict(type='symbol', value=a.group(0), loc=flc_loc);
424
  # anything else is an error
425
  raise AsmException('Malformed entry at %s:  "%s"' % (flc_loc,raw,));
426
 
427
################################################################################
428
#
429
# Extract the tokens from a block of code.
430
#
431
# These blocks of code should be generated by FileBodyIterator.
432
#
433
################################################################################
434
 
435
def RawTokens(ad,filename,startLineNumber,lines):
436
  """
437
  Extract the list of tokens from the provided list of lines.
438
  Convert the directive body into a list of individual tokens.\n
439
  Tokens are directive names, symbol names, values, strings, labels, etc.\n
440
  The return is a list of the tokens in the sequence they are encountered.  Each
441
  of these tokens is a dictionary object constructed by ParseToken.
442
  """
443
  allowed = [
444
              'instruction',
445
              'label',
446
              'macro',
447
              'multivalue',
448
              'singlevalue',
449
              'string',
450
              'symbol'
451
            ];
452
  ifstack = list();
453
  tokens = list();
454
  lineNumber = startLineNumber - 1;
455
  for line in lines:
456
    lineNumber = lineNumber + 1;
457
    fl_loc = '%s:%d' % (filename,lineNumber);
458
    col = 0;
459
    spaceFound = True;
460
    while col < len(line):
461
      flc_loc = fl_loc + ':' + str(col+1);
462
      # Identify and then ignore white-space characters.
463
      if re.match(r'\s',line[col:]):
464
        spaceFound = True;
465
        col = col + 1;
466
        continue;
467
      # Ensure tokens start on new lines or are separated by spaces.
468
      if not spaceFound:
469
        raise AsmException('Missing space in %s:%d' % (fl_loc,col+1));
470
      spaceFound = False;
471
      # Ignore comments.
472
      if line[col] == ';':
473
        break;
474
      # Catch N"" string.
475
      if re.match(r'N""',line[col:]):
476
        a = re.match(r'N""',line[col:]);
477
      # Catch strings.
478
      elif re.match(r'[CNc]?"',line[col:]):
479
        a = re.match(r'[CNc]?"([^\\"]|\\.)+"',line[col:]);
480
        if not a:
481
          raise AsmException('Malformed string at %s' % flc_loc);
482
      # Catch single-quoted characters
483
      elif re.match(r'\'',line[col:]):
484
        a = re.match(r'\'(.|\\.|\\[xX][0-9A-Fa-f]{1,2}|\\[0-7]{1,3})\'',line[col:]);
485
        if not a:
486
          raise AsmException('Malformed \'.\' at %s' % flc_loc);
487
      else:
488
        # Everything else is a white-space delimited token.
489
        a = re.match(r'\S+',line[col:]);
490
      # Get the candidate token.
491
      candToken = a.group(0);
492
      # Catch conditional code inclusion constructs before parsing the token
493
      if candToken == '.else':
494
        if not ifstack:
495
          raise AsmException('Unmatched ".else" at %s' % flc_loc);
496
        ifstack[-1] = not ifstack[-1];
497
        col += 5;
498
        continue;
499
      if candToken == '.endif':
500
        if not ifstack:
501
          raise AsmException('Unmatched ".endif" at %s' % flc_loc);
502
        ifstack.pop();
503
        col += 6;
504
        continue;
505
      elif re.match(r'\.ifdef\(',candToken):
506
        a = re.findall(r'\.ifdef\((\w+)\)$',candToken);
507
        if not a:
508
          raise AsmException('Malformed ".ifdef" at %s' % flc_loc);
509
        ifstack.append(ad.IsSymbol(a[0]));
510
        col += 8+len(a[0]);
511
        continue;
512
      elif re.match(r'\.ifndef\(',candToken):
513
        a = re.findall(r'\.ifndef\((\w+)\)$',candToken);
514
        if not a:
515
          raise AsmException('Malformed ".ifndef" at %s' % flc_loc);
516
        ifstack.append(not ad.IsSymbol(a[0]));
517
        col += 9+len(a[0]);
518
        continue;
519
      if ifstack and not ifstack[-1]:
520
        col += len(candToken);
521
        continue;
522
      # Determine which kinds of tokens are allowed at this location in the
523
      # directive body.
524
      if not tokens:
525
        selAllowed = 'directive';
526
      else:
527
        selAllowed = allowed;
528
      # Append the parsed token to the list of tokens.
529
      tokens.append(ParseToken(ad,fl_loc,col,candToken,selAllowed));
530
      col += len(candToken);
531
  if ifstack:
532
    raise AsmException('%d unmatched conditionals at line %d' % (len(ifstack),lineNumber,));
533
  return tokens;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.