OpenCores
URL https://opencores.org/ocsvn/ssbcc/ssbcc/trunk

Subversion Repositories ssbcc

[/] [ssbcc/] [trunk/] [core/] [9x8/] [asmDef.py] - Blame information for rev 2

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 sinclairrf
################################################################################
2
#
3
# Copyright 2012, Sinclair R.F., Inc.
4
#
5
# Collection of utilities for the assembler.
6
#
7
################################################################################
8
 
9
import os
10
import re
11
 
12
class AsmException(Exception):
13
  """
14
  Exception class for the assembler.\n
15
  This allows the top-level module to capture error messages other than internal
16
  errors and program bugs so that users see a single-line error relevant to
17
  their code rather than the usual Python mess.
18
  """
19
  def __init__(self,message):
20
    self.msg = message;
21
  def __str__(self):
22
    return self.msg;
23
 
24
class FileBodyIterator:
25
  """
26
  Iterator for files that returns bodies of lines of the file.\n
27
  The directive must be the first non-white spaces on a line.\n
28
  The iterator outputs a list whos first element is the line number for the
29
  first line of the block and whose subsequent elements are the lines with the
30
  content of the block.\n
31
  The iterator handles the ".include" directive.
32
  """
33
 
34
  def __init__(self, fps, ad):
35
    """
36
    Initialize the iterator.\n
37
    fps         list of file pointers from the argument line
38
    ad          asmDef_9x8 object (required to identify the directives)
39
    """
40
    # Do sanity check on arguments.
41
    if ad.IsDirective(".include"):
42
      raise Exception('Program Bug:  The ".include" directive is defined by FileBodyIterator');
43
    # Initialize the raw processing states
44
    self.ixConstants = 0;
45
    self.fpPending = list(fps);
46
    self.ad = ad;
47
    self.current = list();
48
    self.pending = list();
49
    # Initialize the include search paths
50
    self.searchPaths = list();
51
    self.searchPaths.append('.');
52
    # Prepare the file parsing
53
    self.included = list();
54
    for fp in self.fpPending:
55
      if fp.name in self.included:
56
        raise AsmException('Input file %s listed more than once' % fp.name);
57
      self.included.append(fp.name);
58
    self.fpStack = list();
59
    self.fpStack.append(dict(fp=self.fpPending.pop(0), line=0));
60
    self.pendingInclude = None;
61
 
62
  def __iter__(self):
63
    """
64
    Required function for an iterable object.
65
    """
66
    return self;
67
 
68
  def next(self):
69
    """
70
    Return the next directive body from the iterator.\n
71
    The body is a list with the following content:
72
      the name of the file
73
      the line number for the first line of the body
74
      the body consisting of lines from the source file\n
75
    The body contains comment lines preceding the directive, the line with the
76
    directive, and optional lines following the directive up to the optional
77
    comments preceding the next directive.
78
    """
79
    # Discard the body emitted by the previous call.
80
    self.current = self.pending;
81
    self.pending = list();
82
    # If the current body is an include directive, then process it immediately.
83
    if self.current and re.match(r'\s*\.include\b',self.current[-1]):
84
      return self.current;
85
    # Loop until all of the files have been processed
86
    while self.fpStack or self.fpPending or self.pendingInclude:
87
      # Indicate when a new file is started.
88
      if 'started' not in self.fpStack[-1]:
89
        if  not self.current:
90
          self.fpStack[-1]['started'] = True;
91
          self.current.append(self.fpStack[-1]['fp'].name);
92
          self.current.append(0);
93
        return self.current;
94
      # Ensure the bodies in closed files are all emitted before continuing to
95
      # the next/enclosing file.
96
      if 'closed' in self.fpStack[-1]:
97
        # Provide end-of-file indication if there is not a pending body fragment.
98
        if not self.current:
99
          self.current.append(self.fpStack[-1]['fp'].name);
100
          self.current.append(-1);
101
          self.fpStack.pop();
102
        return self.current;
103
      # Handle a queued ".include" directive.
104
      if self.pendingInclude:
105
        # Don't open the include file until all previous content has been emitted.
106
        if self.current:
107
          return self.current;
108
        if self.pendingInclude in self.included:
109
          raise AsmException('File "%s" already included' % self.pendingInclude);
110
        self.included.append(self.pendingInclude);
111
        fp_pending = None;
112
        for path in self.searchPaths:
113
          fullInclude = os.path.join(path,self.pendingInclude);
114
          if os.path.exists(fullInclude):
115
            fp_pending = open('%s/%s' % (path,self.pendingInclude),'r');
116
            break;
117
        else:
118
          raise AsmException('%s not found' % self.pendingInclude);
119
        self.fpStack.append(dict(fp=fp_pending, line=0));
120
        self.pendingInclude = None;
121
        # Provide start-of-file indication.
122
        self.fpStack[-1]['started'] = True;
123
        self.current.append(fp_pending.name);
124
        self.current.append(0);
125
        return self.current;
126
      # Get the next file to process if fpStack is empty.
127
      if not self.fpStack:
128
        self.fpStack.append(dict(fp=self.fpPending.pop(0), line=0));
129
      # Process/continue processing the top file.
130
      fp = self.fpStack[-1];
131
      for line in fp['fp']:
132
        fp['line'] += 1;
133
        # Handle single-line directives.
134
        if re.match(r'\s*\.(IFDEF|IFNDEF|ELSE|ENDIF|include)\b',line):
135
          if not self.pending:
136
            self.pending.append(fp['fp'].name);
137
            self.pending.append(fp['line']);
138
          self.pending.append(line);
139
          if not self.current:
140
            self.current = self.pending;
141
            self.pending = list();
142
          return self.current;
143
        # Append empty and comment lines to the pending block.
144
        if re.match(r'\s*(;|$)', line):
145
          if not self.pending:
146
            self.pending.append(fp['fp'].name);
147
            self.pending.append(fp['line']);
148
          self.pending.append(line);
149
          continue;
150
        # See if the line starts with a directive.
151
        tokens = re.findall(r'\s*(\S+)',line);
152
        if self.ad.IsDirective(tokens[0]):
153
          if not self.pending:
154
            self.pending.append(fp['fp'].name);
155
            self.pending.append(fp['line']);
156
          self.pending.append(line);
157
          if self.current:
158
            return self.current;
159
          self.current = self.pending;
160
          self.pending = list();
161
          continue;
162
        # Otherwise, this line belongs to the body of the preceding directive.
163
        if not self.current:
164
          self.current += self.pending[0:2];
165
        self.current += self.pending[2:];
166
        self.current.append(line);
167
        self.pending = list();
168
      # Past the last line of the current file -- close it.
169
      self.fpStack[-1]['fp'].close();
170
      self.fpStack[-1]['closed'] = True;
171
      # Prepare to emit pending bodies if any.
172
      if not self.current:
173
        self.current = self.pending;
174
        self.pending = list();
175
    raise StopIteration;
176
 
177
  def AddSearchPath(self,path):
178
    """
179
    Use by the top level assembler to add search paths for opening included
180
    files.
181
    """
182
    self.searchPaths.append(path);
183
 
184
  def Include(self,filename):
185
    self.pendingInclude = filename;
186
 
187
################################################################################
188
#
189
# Parse strings into the desired types.
190
#
191
################################################################################
192
 
193
def ParseNumber(inString):
194
  """
195
  Test for recognized integer values and return the value if recognized,
196
  otherwise return None.
197
  """
198
  # look for single-digit 0
199
  if inString == '0':
200
    return 0;
201
  # look for decimal value
202
  a = re.match(r'[+\-]?[1-9]\d*$',inString);
203
  if a:
204
    return int(a.group(0),10);
205
  # look for an octal value
206
  a = re.match(r'0[0-7]+$',inString);
207
  if a:
208
    return int(a.group(0),8);
209
  # look for a hex value
210
  a = re.match(r'0x[0-9A-Fa-f]+$',inString);
211
  if a:
212
    return int(a.group(0),16);
213
  # Everything else is an error
214
  return None;
215
 
216
def ParseChar(inchar):
217
  """
218
  Parse single characters including escaped characters.  Return the character
219
  value and the number of characters in the input string matched.
220
  """
221
  if re.match(r'\\[0-7]{3}',inchar):
222
    return (int(inchar[1:4],8),4,);
223
  elif re.match(r'\\[0-7]{2}',inchar):
224
    return (int(inchar[1:3],8),3,);
225
  elif re.match(r'\\[0-7]{1}',inchar):
226
    return (int(inchar[1],8),2,);
227
  elif re.match(r'\\[xX][0-9A-Fa-f]{2}',inchar):
228
    return (int(inchar[2:4],16),4,);
229
  elif re.match(r'\\[xX][0-9A-Fa-f]{1}',inchar):
230
    return (int(inchar[2],16),3,);
231
  elif re.match(r'\\.',inchar):
232
    if inchar[1] == 'a':        # bell ==> control-G
233
      return (7,2,);
234
    elif inchar[1] == 'b':      # backspace ==> control-H
235
      return (8,2,);
236
    elif inchar[1] == 'f':      # form feed ==> control-L
237
      return (12,2,);
238
    elif inchar[1] == 'n':      # line feed ==> control-J
239
      return (10,2,);
240
    elif inchar[1] == 'r':      # carriage return ==> control-M
241
      return (13,2,);
242
    elif inchar[1] == 't':      # horizontal tab ==> control-I
243
      return (9,2,);
244
    else:                       # unrecognized escaped character ==> return that character
245
      return (ord(inchar[1]),2,);
246
  else:
247
    return (ord(inchar[0]),1,);
248
 
249
def ParseString(inString):
250
  """
251
  Parse strings recognized by the assembler.\n
252
  A string consists of the following:
253
    an optional count/termination character -- one of CNc
254
    a starting double-quote character
255
    the body of the string including escape sequences
256
    a terminating double-quote character
257
  Errors are indicated by returning the location (an integer) within the string
258
  where the error occurs.
259
  """
260
  # Detect optional count/termination character.
261
  ix = 1 if inString[0] in 'CNc' else 0;
262
  # Ensure the required start double quote is preset.
263
  if inString[ix] != '"' or inString[-1] != '"':
264
    raise Exception('Program Bug -- missing one or more double quotes around string');
265
  ix = ix + 1;
266
  # Convert the characters and escape sequences in the string to a list of their
267
  # integer values.
268
  outString = list();
269
  while ix < len(inString)-1:
270
    (thisChar,thisLen,) = ParseChar(inString[ix:-1]);
271
    outString.append(thisChar);
272
    ix += thisLen;
273
  # Insert the optional character count or append the optional nul terminating
274
  # character.
275
  if inString[0] == 'C':
276
    outString.insert(0,len(outString));
277
  elif inString[0] == 'N':
278
    outString.append(0);
279
  elif inString[0] == 'c':
280
    outString.insert(0,len(outString)-1);
281
  # That's all.
282
  return outString;
283
 
284
def ParseToken(ad,fl_loc,col,raw,allowed):
285
  """
286
  Examine the raw tokens and convert them into dictionary objects consisting of
287
  the following:
288
    type        the type of token
289
    value       the value of the token
290
                this can be the name of a symbol, a numeric value, a string body, ...
291
    loc         start location of the token
292
                this is is required by subsequent stages of the assembler for
293
                error messages
294
    argument    optional entry required for macros arguments
295
    range       optional entry required when a range is provided for a parameter\n
296
  The token type is compared against the allowed tokens.\n
297
  Detect syntax errors and display error messages consisting of the error and
298
  the location within the file where the error occurs.
299
  """
300
  flc_loc = fl_loc + ':' + str(col+1);
301
  # look for instructions
302
  # Note:  Do this before anything else because instructions can be a
303
  #        strange mix of symbols.
304
  if ad.IsInstruction(raw):
305
    if 'instruction' not in allowed:
306
      raise AsmException('instruction "%s" not allowed at %s' % (raw,flc_loc));
307
    return dict(type='instruction', value=raw, loc=flc_loc);
308
  # look for computation
309
  a = re.match(r'\${\S+}$',raw);
310
  if a:
311
    if 'singlevalue' not in allowed:
312
      raise AsmException('Computed value not allowed at %s' % flc_loc);
313
    try:
314
      tParseNumber = eval(raw[2:-1],ad.SymbolDict());
315
    except:
316
      raise AsmException('Malformed computed value at %s: "%s"' % (flc_loc,raw,));
317
    if type(tParseNumber) != int:
318
      raise AsmException('Malformed single-byte value at %s' % flc_loc);
319
    return dict(type='value', value=tParseNumber, loc=flc_loc);
320
  # look for a repeated single-byte numeric value (N*M where M is the repeat count)
321
  matchString=r'(0|[+\-]?[1-9]\d*|0[0-7]+|0x[0-9A-Fa-f]{1,2})\*([1-9]\d*|C_\w+|\$\{\S+\})$';
322
  a = re.match(matchString,raw);
323
  if a:
324
    if 'multivalue' not in allowed:
325
      raise AsmException('Multi-byte value not allowed at %s' % flc_loc);
326
    b = re.findall(matchString,a.group(0));
327
    if not b:
328
      raise Exception('Program Bug -- findall failed after match worked');
329
    b = b[0];
330
    try:
331
      tParseNumber = ParseNumber(b[0]);
332
    except:
333
      raise AsmException('Malformed multi-byte value at %s' % (fl_loc + ':' + str(col+1)));
334
    tValue = list();
335
    fl_loc2 = fl_loc+':'+str(col+1+len(b[0])+1);
336
    if re.match(r'[1-9]',b[1]):
337
      repeatCount = int(b[1]);
338
    elif re.match(r'C_',b[1]):
339
      if not ad.IsConstant(b[1]):
340
        raise AsmException('Unrecognized symbol "%s" at %s' % (b[1],fl_loc2,));
341
      ix = ad.symbols['list'].index(b[1]);
342
      if len(ad.symbols['body'][ix]) != 1:
343
        raise asmDef.AsmException('constant can only be one byte at %s' % fl_loc2);
344
      repeatCount = ad.symbols['body'][ix][0];
345
    elif re.match(r'\$',b[1]):
346
      repeatCount = eval(b[1][2:-1],ad.SymbolDict());
347
    else:
348
      raise Exception('Program Bug -- unrecognized repeat count');
349
    if repeatCount <= 0:
350
      raise AsmException('Repeat count must be positive at %s' % fl_loc2);
351
    for ix in range(repeatCount):
352
      tValue.append(tParseNumber);
353
    return dict(type='value', value=tValue, loc=flc_loc);
354
  # look for a single-byte numeric value
355
  a = re.match(r'(0|[+\-]?[1-9]\d*|0[07]+|0x[0-9A-Fa-f]{1,2})$',raw);
356
  if a:
357
    if 'singlevalue' not in allowed:
358
      raise AsmException('Value not allowed at %s' % flc_loc);
359
    try:
360
      tParseNumber = ParseNumber(raw);
361
    except:
362
      raise AsmException('Malformed single-byte value at %s' % flc_loc);
363
    return dict(type='value', value=tParseNumber, loc=flc_loc);
364
  # capture double-quoted strings
365
  if re.match(r'[CNc]?"',raw):
366
    if 'string' not in allowed:
367
      raise AsmException('String not allowed at %s' % flc_loc);
368
    parsedString = ParseString(raw);
369
    if type(parsedString) == int:
370
      raise AsmException('Malformed string at %s' % (fl_loc + ':' + str(col+parsedString)));
371
    return dict(type='value', value=parsedString, loc=flc_loc);
372
  # capture single-quoted character
373
  if raw[0] == "'":
374
    if 'singlevalue' not in allowed:
375
      raise AsmException('Character not allowed at %s' % flc_loc);
376
    (thisChar,thisLen,) = ParseChar(raw[1:-1]);
377
    if len(raw) != thisLen+2:
378
      raise AsmException('Malformed \'.\' in %s' % flc_loc);
379
    return dict(type='value', value=thisChar, loc=flc_loc);
380
  # look for directives
381
  if ad.IsDirective(raw):
382
    if 'directive' not in allowed:
383
      raise AsmException('Directive not allowed at %s' % flc_loc);
384
    return dict(type='directive', value=raw, loc=flc_loc);
385
  # look for macros
386
  a = re.match(r'\.[A-Za-z]\S*(\(\S+(,\S+|,\${\S+})*\))?$',raw);
387
  if a:
388
    b = re.match(r'\.[^(]+',raw);
389
    if not ad.IsMacro(b.group(0)):
390
      raise AsmException('Unrecognized directive or macro at %s:%d' % (fl_loc,col+1));
391
    if ('macro' not in allowed) and not ('singlemacro' in allowed and ad.IsSingleMacro(b.group(0))):
392
      raise AsmException('Macro "%s" not allowed at %s:%d' % (b.group(0),fl_loc,col+1,));
393
    macroArgs = re.findall(r'([^,]+)',raw[len(b.group(0))+1:-1]);
394
    nArgs = ad.MacroNumberArgs(b.group(0))
395
    if len(macroArgs) not in nArgs:
396
      raise AsmException('Wrong number of arguments to macro "%s" at %s:%d' % (b.group(0),fl_loc,col+1));
397
    while len(macroArgs) < nArgs[-1]:
398
      macroArgs.append(ad.MacroDefault(b.group(0),len(macroArgs)));
399
    outArgs = list();
400
    col = col + len(b.group(0))+1;
401
    for ixArg in range(len(macroArgs)):
402
      outArgs.append(ParseToken(ad,fl_loc,col,macroArgs[ixArg],ad.MacroArgTypes(b.group(0),ixArg)));
403
      col = col + len(macroArgs[ixArg]) + 1;
404
    return dict(type='macro', value=b.group(0), loc=fl_loc + ':' + str(col+1), argument=outArgs);
405
  # look for a label definition
406
  a = re.match(r':[A-Za-z]\w*$',raw);
407
  if a:
408
    if 'label' not in allowed:
409
      raise AsmException('Label not allowed at %s' % flc_loc);
410
    return dict(type='label', value=raw[1:], loc=flc_loc);
411
  # look for parameters with range specification
412
  a = re.match('[LG]_\w+[[]\d+\+?:\d+]$',raw);
413
  if a:
414
    if 'symbol' not in allowed:
415
      raise AsmException('Symbol not allowed at %s' % flc_loc);
416
    a = re.findall('([LG]_\w+)([[].*)',raw)[0];
417
    return dict(type='symbol', value=a[0], loc=flc_loc, range=a[1]);
418
  # look for symbols
419
  # Note:  This should be the last check performed as every other kind of
420
  #        token should be recognizable
421
  a = re.match(r'[A-Za-z]\w*$',raw);
422
  if a:
423
    if 'symbol' not in allowed:
424
      raise AsmException('Symbol not allowed at %s' % flc_loc);
425
    return dict(type='symbol', value=a.group(0), loc=flc_loc);
426
  # anything else is an error
427
  raise AsmException('Malformed entry at %s:  "%s"' % (flc_loc,raw,));
428
 
429
################################################################################
430
#
431
# Extract the tokens from a block of code.
432
#
433
# These blocks of code should be generated by FileBodyIterator.
434
#
435
################################################################################
436
 
437
def RawTokens(ad,filename,startLineNumber,lines):
438
  """
439
  Extract the list of tokens from the provided list of lines.
440
  Convert the directive body into a list of individual tokens.\n
441
  Tokens are directive names, symbol names, values, strings, labels, etc.\n
442
  The return is a list of the tokens in the sequence they are encountered.  Each
443
  of these tokens is a dictionary object constructed by ParseToken.
444
  """
445
  allowed = [
446
              'instruction',
447
              'label',
448
              'macro',
449
              'multivalue',
450
              'singlevalue',
451
              'string',
452
              'symbol'
453
            ];
454
  ifstack = list();
455
  tokens = list();
456
  lineNumber = startLineNumber - 1;
457
  for line in lines:
458
    lineNumber = lineNumber + 1;
459
    fl_loc = '%s:%d' % (filename,lineNumber);
460
    col = 0;
461
    spaceFound = True;
462
    while col < len(line):
463
      flc_loc = fl_loc + ':' + str(col+1);
464
      # Identify and then ignore white-space characters.
465
      if re.match(r'\s',line[col:]):
466
        spaceFound = True;
467
        col = col + 1;
468
        continue;
469
      # Ensure tokens start on new lines or are separated by spaces.
470
      if not spaceFound:
471
        raise AsmException('Missing space in %s:%d' % (fl_loc,col+1));
472
      spaceFound = False;
473
      # Ignore comments.
474
      if line[col] == ';':
475
        break;
476
      # Catch N"" string.
477
      if re.match(r'N""',line[col:]):
478
        a = re.match(r'N""',line[col:]);
479
      # Catch strings.
480
      elif re.match(r'[CNc]?"',line[col:]):
481
        a = re.match(r'[CNc]?"([^\\"]|\\.)+"',line[col:]);
482
        if not a:
483
          raise AsmException('Malformed string at %s' % flc_loc);
484
      # Catch single-quoted characters
485
      elif re.match(r'\'',line[col:]):
486
        a = re.match(r'\'(.|\\.|\\[xX][0-9A-Fa-f]{1,2}|\\[0-7]{1,3})\'',line[col:]);
487
        if not a:
488
          raise AsmException('Malformed \'.\' at %s' % flc_loc);
489
      else:
490
        # Everything else is a white-space delimited token.
491
        a = re.match(r'\S+',line[col:]);
492
      # Get the candidate token.
493
      candToken = a.group(0);
494
      # Catch conditional code inclusion constructs before parsing the token
495
      if candToken == '.else':
496
        if not ifstack:
497
          raise AsmException('Unmatched ".else" at %s' % flc_loc);
498
        ifstack[-1] = not ifstack[-1];
499
        col += 5;
500
        continue;
501
      if candToken == '.endif':
502
        if not ifstack:
503
          raise AsmException('Unmatched ".endif" at %s' % flc_loc);
504
        ifstack.pop();
505
        col += 6;
506
        continue;
507
      elif re.match(r'\.ifdef\(',candToken):
508
        a = re.findall(r'\.ifdef\((\w+)\)$',candToken);
509
        if not a:
510
          raise AsmException('Malformed ".ifdef" at %s' % flc_loc);
511
        ifstack.append(ad.IsSymbol(a[0]));
512
        col += 8+len(a[0]);
513
        continue;
514
      elif re.match(r'\.ifndef\(',candToken):
515
        a = re.findall(r'\.ifndef\((\w+)\)$',candToken);
516
        if not a:
517
          raise AsmException('Malformed ".ifndef" at %s' % flc_loc);
518
        ifstack.append(not ad.IsSymbol(a[0]));
519
        col += 9+len(a[0]);
520
        continue;
521
      if ifstack and not ifstack[-1]:
522
        col += len(candToken);
523
        continue;
524
      # Determine which kinds of tokens are allowed at this location in the
525
      # directive body.
526
      if not tokens:
527
        selAllowed = 'directive';
528
      else:
529
        selAllowed = allowed;
530
      # Append the parsed token to the list of tokens.
531
      tokens.append(ParseToken(ad,fl_loc,col,candToken,selAllowed));
532
      col += len(candToken);
533
  if ifstack:
534
    raise AsmException('%d unmatched conditionals at line %d' % (len(ifstack),lineNumber,));
535
  return tokens;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.