OpenCores
URL https://opencores.org/ocsvn/ssbcc/ssbcc/trunk

Subversion Repositories ssbcc

[/] [ssbcc/] [trunk/] [core/] [9x8/] [asmDef.py] - Blame information for rev 7

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 sinclairrf
################################################################################
2
#
3
# Copyright 2012, Sinclair R.F., Inc.
4
#
5
# Collection of utilities for the assembler.
6
#
7
################################################################################
8
 
9
import os
10
import re
11
 
12
class AsmException(Exception):
13
  """
14
  Exception class for the assembler.\n
15
  This allows the top-level module to capture error messages other than internal
16
  errors and program bugs so that users see a single-line error relevant to
17
  their code rather than the usual Python mess.
18
  """
19
  def __init__(self,message):
20
    self.msg = message;
21
  def __str__(self):
22
    return self.msg;
23
 
24
class FileBodyIterator:
25
  """
26
  Iterator for files that returns bodies of lines of the file.\n
27
  The directive must be the first non-white spaces on a line.\n
28
  The iterator outputs a list whos first element is the line number for the
29
  first line of the block and whose subsequent elements are the lines with the
30
  content of the block.\n
31
  The iterator handles the ".include" directive.
32
  """
33
 
34
  def __init__(self, fps, ad):
35
    """
36
    Initialize the iterator.\n
37
    fps         list of file pointers from the argument line
38
    ad          asmDef_9x8 object (required to identify the directives)
39
    """
40
    # Do sanity check on arguments.
41
    if ad.IsDirective(".include"):
42
      raise Exception('Program Bug:  The ".include" directive is defined by FileBodyIterator');
43
    # Initialize the raw processing states
44
    self.ixConstants = 0;
45
    self.fpPending = list(fps);
46
    self.ad = ad;
47
    self.current = list();
48
    self.pending = list();
49
    # Initialize the include search paths
50
    self.searchPaths = list();
51
    self.searchPaths.append('.');
52
    # Prepare the file parsing
53
    self.included = list();
54
    for fp in self.fpPending:
55
      if fp.name in self.included:
56
        raise AsmException('Input file %s listed more than once' % fp.name);
57
      self.included.append(fp.name);
58
    self.fpStack = list();
59
    self.fpStack.append(dict(fp=self.fpPending.pop(0), line=0));
60
    self.pendingInclude = None;
61
 
62
  def __iter__(self):
63
    """
64
    Required function for an iterable object.
65
    """
66
    return self;
67
 
68
  def next(self):
69
    """
70
    Return the next directive body from the iterator.\n
71
    The body is a list with the following content:
72
      the name of the file
73
      the line number for the first line of the body
74
      the body consisting of lines from the source file\n
75
    The body contains comment lines preceding the directive, the line with the
76
    directive, and optional lines following the directive up to the optional
77
    comments preceding the next directive.
78
    """
79
    # Discard the body emitted by the previous call.
80
    self.current = self.pending;
81
    self.pending = list();
82
    # If the current body is an include directive, then process it immediately.
83
    if self.current and re.match(r'\s*\.include\b',self.current[-1]):
84
      return self.current;
85
    # Loop until all of the files have been processed
86
    while self.fpStack or self.fpPending or self.pendingInclude:
87
      # Indicate when a new file is started.
88
      if 'started' not in self.fpStack[-1]:
89
        if  not self.current:
90
          self.fpStack[-1]['started'] = True;
91
          self.current.append(self.fpStack[-1]['fp'].name);
92
          self.current.append(0);
93
        return self.current;
94
      # Ensure the bodies in closed files are all emitted before continuing to
95
      # the next/enclosing file.
96
      if 'closed' in self.fpStack[-1]:
97
        # Provide end-of-file indication if there is not a pending body fragment.
98
        if not self.current:
99
          self.current.append(self.fpStack[-1]['fp'].name);
100
          self.current.append(-1);
101
          self.fpStack.pop();
102
        return self.current;
103
      # Handle a queued ".include" directive.
104
      if self.pendingInclude:
105
        # Don't open the include file until all previous content has been emitted.
106
        if self.current:
107
          return self.current;
108
        self.included.append(self.pendingInclude);
109
        fp_pending = None;
110
        for path in self.searchPaths:
111
          fullInclude = os.path.join(path,self.pendingInclude);
112
          if os.path.exists(fullInclude):
113
            fp_pending = open('%s/%s' % (path,self.pendingInclude),'r');
114
            break;
115
        else:
116
          raise AsmException('%s not found' % self.pendingInclude);
117
        self.fpStack.append(dict(fp=fp_pending, line=0));
118
        self.pendingInclude = None;
119
        # Provide start-of-file indication.
120
        self.fpStack[-1]['started'] = True;
121
        self.current.append(fp_pending.name);
122
        self.current.append(0);
123
        return self.current;
124
      # Get the next file to process if fpStack is empty.
125
      if not self.fpStack:
126
        self.fpStack.append(dict(fp=self.fpPending.pop(0), line=0));
127
      # Process/continue processing the top file.
128
      fp = self.fpStack[-1];
129
      for line in fp['fp']:
130
        fp['line'] += 1;
131
        # Handle single-line directives.
132
        if re.match(r'\s*\.(IFDEF|IFNDEF|ELSE|ENDIF|include)\b',line):
133
          if not self.pending:
134
            self.pending.append(fp['fp'].name);
135
            self.pending.append(fp['line']);
136
          self.pending.append(line);
137
          if not self.current:
138
            self.current = self.pending;
139
            self.pending = list();
140
          return self.current;
141
        # Append empty and comment lines to the pending block.
142
        if re.match(r'\s*(;|$)', line):
143
          if not self.pending:
144
            self.pending.append(fp['fp'].name);
145
            self.pending.append(fp['line']);
146
          self.pending.append(line);
147
          continue;
148
        # See if the line starts with a directive.
149
        tokens = re.findall(r'\s*(\S+)',line);
150
        if self.ad.IsDirective(tokens[0]):
151
          if not self.pending:
152
            self.pending.append(fp['fp'].name);
153
            self.pending.append(fp['line']);
154
          self.pending.append(line);
155
          if self.current:
156
            return self.current;
157
          self.current = self.pending;
158
          self.pending = list();
159
          continue;
160
        # Otherwise, this line belongs to the body of the preceding directive.
161 5 sinclairrf
        if not self.pending:
162
          self.pending.append(fp['fp'].name);
163
          self.pending.append(fp['line']);
164 2 sinclairrf
        if not self.current:
165
          self.current += self.pending[0:2];
166
        self.current += self.pending[2:];
167
        self.current.append(line);
168
        self.pending = list();
169
      # Past the last line of the current file -- close it.
170
      self.fpStack[-1]['fp'].close();
171
      self.fpStack[-1]['closed'] = True;
172
      # Prepare to emit pending bodies if any.
173
      if not self.current:
174
        self.current = self.pending;
175
        self.pending = list();
176
    raise StopIteration;
177
 
178
  def AddSearchPath(self,path):
179
    """
180
    Use by the top level assembler to add search paths for opening included
181
    files.
182
    """
183
    self.searchPaths.append(path);
184
 
185
  def Include(self,filename):
186
    self.pendingInclude = filename;
187
 
188
################################################################################
189
#
190
# Parse strings into the desired types.
191
#
192
################################################################################
193
 
194
def ParseNumber(inString):
195
  """
196
  Test for recognized integer values and return the value if recognized,
197
  otherwise return None.
198
  """
199
  # look for single-digit 0
200
  if inString == '0':
201
    return 0;
202 5 sinclairrf
  # look for a binary value
203
  a = re.match(r'0b[01_]+$',inString);
204
  if a:
205
    b = re.sub(r'_','',a.group(0)[2:]);
206
    return int(b,2);
207
  # look for an octal value
208
  a = re.match(r'0[0-7_]+$',inString);
209
  if a:
210
    return int(a.group(0)[1:],8);
211 2 sinclairrf
  # look for decimal value
212 5 sinclairrf
  a = re.match(r'[+\-]?[1-9_]\d*$',inString);
213 2 sinclairrf
  if a:
214
    return int(a.group(0),10);
215
  # look for a hex value
216 5 sinclairrf
  a = re.match(r'0x[0-9A-Fa-f_]+$',inString);
217 2 sinclairrf
  if a:
218 5 sinclairrf
    return int(a.group(0)[2:],16);
219 2 sinclairrf
  # Everything else is an error
220
  return None;
221
 
222
def ParseChar(inchar):
223
  """
224
  Parse single characters including escaped characters.  Return the character
225
  value and the number of characters in the input string matched.
226
  """
227
  if re.match(r'\\[0-7]{3}',inchar):
228
    return (int(inchar[1:4],8),4,);
229
  elif re.match(r'\\[0-7]{2}',inchar):
230
    return (int(inchar[1:3],8),3,);
231
  elif re.match(r'\\[0-7]{1}',inchar):
232
    return (int(inchar[1],8),2,);
233
  elif re.match(r'\\[xX][0-9A-Fa-f]{2}',inchar):
234
    return (int(inchar[2:4],16),4,);
235
  elif re.match(r'\\[xX][0-9A-Fa-f]{1}',inchar):
236
    return (int(inchar[2],16),3,);
237
  elif re.match(r'\\.',inchar):
238
    if inchar[1] == 'a':        # bell ==> control-G
239
      return (7,2,);
240
    elif inchar[1] == 'b':      # backspace ==> control-H
241
      return (8,2,);
242
    elif inchar[1] == 'f':      # form feed ==> control-L
243
      return (12,2,);
244
    elif inchar[1] == 'n':      # line feed ==> control-J
245
      return (10,2,);
246
    elif inchar[1] == 'r':      # carriage return ==> control-M
247
      return (13,2,);
248
    elif inchar[1] == 't':      # horizontal tab ==> control-I
249
      return (9,2,);
250
    else:                       # unrecognized escaped character ==> return that character
251
      return (ord(inchar[1]),2,);
252
  else:
253
    return (ord(inchar[0]),1,);
254
 
255
def ParseString(inString):
256
  """
257
  Parse strings recognized by the assembler.\n
258
  A string consists of the following:
259
    an optional count/termination character -- one of CNc
260
    a starting double-quote character
261
    the body of the string including escape sequences
262
    a terminating double-quote character
263
  Errors are indicated by returning the location (an integer) within the string
264
  where the error occurs.
265
  """
266
  # Detect optional count/termination character.
267
  ix = 1 if inString[0] in 'CNc' else 0;
268
  # Ensure the required start double quote is preset.
269
  if inString[ix] != '"' or inString[-1] != '"':
270
    raise Exception('Program Bug -- missing one or more double quotes around string');
271
  ix = ix + 1;
272
  # Convert the characters and escape sequences in the string to a list of their
273
  # integer values.
274
  outString = list();
275
  while ix < len(inString)-1:
276
    (thisChar,thisLen,) = ParseChar(inString[ix:-1]);
277
    outString.append(thisChar);
278
    ix += thisLen;
279
  # Insert the optional character count or append the optional nul terminating
280
  # character.
281
  if inString[0] == 'C':
282
    outString.insert(0,len(outString));
283
  elif inString[0] == 'N':
284
    outString.append(0);
285
  elif inString[0] == 'c':
286
    outString.insert(0,len(outString)-1);
287
  # That's all.
288
  return outString;
289
 
290
def ParseToken(ad,fl_loc,col,raw,allowed):
291
  """
292
  Examine the raw tokens and convert them into dictionary objects consisting of
293
  the following:
294
    type        the type of token
295
    value       the value of the token
296
                this can be the name of a symbol, a numeric value, a string body, ...
297
    loc         start location of the token
298
                this is is required by subsequent stages of the assembler for
299
                error messages
300
    argument    optional entry required for macros arguments
301
    range       optional entry required when a range is provided for a parameter\n
302
  The token type is compared against the allowed tokens.\n
303
  Detect syntax errors and display error messages consisting of the error and
304
  the location within the file where the error occurs.
305
  """
306
  flc_loc = fl_loc + ':' + str(col+1);
307
  # look for instructions
308
  # Note:  Do this before anything else because instructions can be a
309
  #        strange mix of symbols.
310
  if ad.IsInstruction(raw):
311
    if 'instruction' not in allowed:
312
      raise AsmException('instruction "%s" not allowed at %s' % (raw,flc_loc));
313
    return dict(type='instruction', value=raw, loc=flc_loc);
314
  # look for computation
315
  a = re.match(r'\${\S+}$',raw);
316
  if a:
317
    if 'singlevalue' not in allowed:
318
      raise AsmException('Computed value not allowed at %s' % flc_loc);
319
    try:
320
      tParseNumber = eval(raw[2:-1],ad.SymbolDict());
321
    except:
322
      raise AsmException('Malformed computed value at %s: "%s"' % (flc_loc,raw,));
323
    if type(tParseNumber) != int:
324
      raise AsmException('Malformed single-byte value at %s' % flc_loc);
325
    return dict(type='value', value=tParseNumber, loc=flc_loc);
326
  # look for a repeated single-byte numeric value (N*M where M is the repeat count)
327 5 sinclairrf
  matchString=r'(0|0b[01_]+|0[0-7]+|[+\-]?[1-9]\d*|0x[0-9A-Fa-f]{1,2})\*([1-9]\d*|C_\w+|\$\{\S+\})$';
328 2 sinclairrf
  a = re.match(matchString,raw);
329
  if a:
330
    if 'multivalue' not in allowed:
331
      raise AsmException('Multi-byte value not allowed at %s' % flc_loc);
332
    b = re.findall(matchString,a.group(0));
333
    if not b:
334
      raise Exception('Program Bug -- findall failed after match worked');
335
    b = b[0];
336
    try:
337
      tParseNumber = ParseNumber(b[0]);
338
    except:
339
      raise AsmException('Malformed multi-byte value at %s' % (fl_loc + ':' + str(col+1)));
340
    tValue = list();
341
    fl_loc2 = fl_loc+':'+str(col+1+len(b[0])+1);
342
    if re.match(r'[1-9]',b[1]):
343
      repeatCount = int(b[1]);
344
    elif re.match(r'C_',b[1]):
345
      if not ad.IsConstant(b[1]):
346
        raise AsmException('Unrecognized symbol "%s" at %s' % (b[1],fl_loc2,));
347
      ix = ad.symbols['list'].index(b[1]);
348
      if len(ad.symbols['body'][ix]) != 1:
349
        raise asmDef.AsmException('constant can only be one byte at %s' % fl_loc2);
350
      repeatCount = ad.symbols['body'][ix][0];
351
    elif re.match(r'\$',b[1]):
352
      repeatCount = eval(b[1][2:-1],ad.SymbolDict());
353
    else:
354
      raise Exception('Program Bug -- unrecognized repeat count');
355
    if repeatCount <= 0:
356
      raise AsmException('Repeat count must be positive at %s' % fl_loc2);
357
    for ix in range(repeatCount):
358
      tValue.append(tParseNumber);
359
    return dict(type='value', value=tValue, loc=flc_loc);
360
  # look for a single-byte numeric value
361 5 sinclairrf
  a = re.match(r'(0|0b[01_]+|0[0-7]+|[+\-]?[1-9]\d*|0x[0-9A-Fa-f]+)$',raw);
362 2 sinclairrf
  if a:
363
    if 'singlevalue' not in allowed:
364
      raise AsmException('Value not allowed at %s' % flc_loc);
365
    try:
366
      tParseNumber = ParseNumber(raw);
367
    except:
368
      raise AsmException('Malformed single-byte value at %s' % flc_loc);
369
    return dict(type='value', value=tParseNumber, loc=flc_loc);
370
  # capture double-quoted strings
371
  if re.match(r'[CNc]?"',raw):
372
    if 'string' not in allowed:
373
      raise AsmException('String not allowed at %s' % flc_loc);
374
    parsedString = ParseString(raw);
375
    if type(parsedString) == int:
376
      raise AsmException('Malformed string at %s' % (fl_loc + ':' + str(col+parsedString)));
377
    return dict(type='value', value=parsedString, loc=flc_loc);
378
  # capture single-quoted character
379
  if raw[0] == "'":
380
    if 'singlevalue' not in allowed:
381
      raise AsmException('Character not allowed at %s' % flc_loc);
382
    (thisChar,thisLen,) = ParseChar(raw[1:-1]);
383
    if len(raw) != thisLen+2:
384
      raise AsmException('Malformed \'.\' in %s' % flc_loc);
385
    return dict(type='value', value=thisChar, loc=flc_loc);
386
  # look for directives
387
  if ad.IsDirective(raw):
388
    if 'directive' not in allowed:
389
      raise AsmException('Directive not allowed at %s' % flc_loc);
390
    return dict(type='directive', value=raw, loc=flc_loc);
391
  # look for macros
392 6 sinclairrf
  # Note:  Macro arguments can contain a single layer of macros.
393
  a = re.match(r'\.[A-Za-z]',raw);
394 2 sinclairrf
  if a:
395
    b = re.match(r'\.[^(]+',raw);
396
    if not ad.IsMacro(b.group(0)):
397 6 sinclairrf
      raise AsmException('Unrecognized directive or macro at %s:%d' % (fl_loc,col+1,));
398 2 sinclairrf
    if ('macro' not in allowed) and not ('singlemacro' in allowed and ad.IsSingleMacro(b.group(0))):
399
      raise AsmException('Macro "%s" not allowed at %s:%d' % (b.group(0),fl_loc,col+1,));
400 6 sinclairrf
    macroArgs = list();
401
    if len(b.group(0)) == len(raw):
402
      pass;
403
    elif (raw[len(b.group(0))] != '(') or (raw[-1] != ')'):
404
      raise AsmException('Malformed macro invokaction "%" at %s:%d' % (raw,fl_loc,col+1,));
405
    else:
406
      tcol = len(b.group(0))+1;
407
      while tcol < len(raw):
408
        c = re.match(r'[^,(]*(\([^)]*\))?',raw[tcol:-1]);
409
        macroArgs.append(c.group(0));
410
        tcol += len(c.group(0))+1;
411 2 sinclairrf
    nArgs = ad.MacroNumberArgs(b.group(0))
412
    if len(macroArgs) not in nArgs:
413
      raise AsmException('Wrong number of arguments to macro "%s" at %s:%d' % (b.group(0),fl_loc,col+1));
414
    while len(macroArgs) < nArgs[-1]:
415
      macroArgs.append(ad.MacroDefault(b.group(0),len(macroArgs)));
416
    outArgs = list();
417 6 sinclairrf
    tcol = col + len(b.group(0)) + 1;
418 2 sinclairrf
    for ixArg in range(len(macroArgs)):
419 6 sinclairrf
      outArgs.append(ParseToken(ad,fl_loc,tcol,macroArgs[ixArg],ad.MacroArgTypes(b.group(0),ixArg)));
420
      tcol += len(macroArgs[ixArg]) + 1;
421 2 sinclairrf
    return dict(type='macro', value=b.group(0), loc=fl_loc + ':' + str(col+1), argument=outArgs);
422
  # look for a label definition
423
  a = re.match(r':[A-Za-z]\w*$',raw);
424
  if a:
425
    if 'label' not in allowed:
426
      raise AsmException('Label not allowed at %s' % flc_loc);
427
    return dict(type='label', value=raw[1:], loc=flc_loc);
428
  # look for parameters with range specification
429
  a = re.match('[LG]_\w+[[]\d+\+?:\d+]$',raw);
430
  if a:
431
    if 'symbol' not in allowed:
432
      raise AsmException('Symbol not allowed at %s' % flc_loc);
433
    a = re.findall('([LG]_\w+)([[].*)',raw)[0];
434
    return dict(type='symbol', value=a[0], loc=flc_loc, range=a[1]);
435
  # look for symbols
436
  # Note:  This should be the last check performed as every other kind of
437
  #        token should be recognizable
438
  a = re.match(r'[A-Za-z]\w*$',raw);
439
  if a:
440
    if 'symbol' not in allowed:
441
      raise AsmException('Symbol not allowed at %s' % flc_loc);
442
    return dict(type='symbol', value=a.group(0), loc=flc_loc);
443
  # anything else is an error
444
  raise AsmException('Malformed entry at %s:  "%s"' % (flc_loc,raw,));
445
 
446
################################################################################
447
#
448
# Extract the tokens from a block of code.
449
#
450
# These blocks of code should be generated by FileBodyIterator.
451
#
452
################################################################################
453
 
454
def RawTokens(ad,filename,startLineNumber,lines):
455
  """
456
  Extract the list of tokens from the provided list of lines.
457
  Convert the directive body into a list of individual tokens.\n
458
  Tokens are directive names, symbol names, values, strings, labels, etc.\n
459
  The return is a list of the tokens in the sequence they are encountered.  Each
460
  of these tokens is a dictionary object constructed by ParseToken.
461
  """
462
  allowed = [
463
              'instruction',
464
              'label',
465
              'macro',
466
              'multivalue',
467
              'singlevalue',
468
              'string',
469
              'symbol'
470
            ];
471
  ifstack = list();
472
  tokens = list();
473
  lineNumber = startLineNumber - 1;
474
  for line in lines:
475
    lineNumber = lineNumber + 1;
476
    fl_loc = '%s:%d' % (filename,lineNumber);
477
    col = 0;
478
    spaceFound = True;
479
    while col < len(line):
480
      flc_loc = fl_loc + ':' + str(col+1);
481
      # Identify and then ignore white-space characters.
482
      if re.match(r'\s',line[col:]):
483
        spaceFound = True;
484
        col = col + 1;
485
        continue;
486
      # Ensure tokens start on new lines or are separated by spaces.
487
      if not spaceFound:
488
        raise AsmException('Missing space in %s:%d' % (fl_loc,col+1));
489
      spaceFound = False;
490
      # Ignore comments.
491
      if line[col] == ';':
492
        break;
493
      # Catch N"" string.
494
      if re.match(r'N""',line[col:]):
495
        a = re.match(r'N""',line[col:]);
496
      # Catch strings.
497
      elif re.match(r'[CNc]?"',line[col:]):
498
        a = re.match(r'[CNc]?"([^\\"]|\\.)+"',line[col:]);
499
        if not a:
500
          raise AsmException('Malformed string at %s' % flc_loc);
501
      # Catch single-quoted characters
502
      elif re.match(r'\'',line[col:]):
503
        a = re.match(r'\'(.|\\.|\\[xX][0-9A-Fa-f]{1,2}|\\[0-7]{1,3})\'',line[col:]);
504
        if not a:
505
          raise AsmException('Malformed \'.\' at %s' % flc_loc);
506
      else:
507
        # Everything else is a white-space delimited token.
508
        a = re.match(r'\S+',line[col:]);
509
      # Get the candidate token.
510
      candToken = a.group(0);
511
      # Catch conditional code inclusion constructs before parsing the token
512
      if candToken == '.else':
513
        if not ifstack:
514
          raise AsmException('Unmatched ".else" at %s' % flc_loc);
515
        ifstack[-1] = not ifstack[-1];
516
        col += 5;
517
        continue;
518
      if candToken == '.endif':
519
        if not ifstack:
520
          raise AsmException('Unmatched ".endif" at %s' % flc_loc);
521
        ifstack.pop();
522
        col += 6;
523
        continue;
524
      elif re.match(r'\.ifdef\(',candToken):
525
        a = re.findall(r'\.ifdef\((\w+)\)$',candToken);
526
        if not a:
527
          raise AsmException('Malformed ".ifdef" at %s' % flc_loc);
528
        ifstack.append(ad.IsSymbol(a[0]));
529
        col += 8+len(a[0]);
530
        continue;
531
      elif re.match(r'\.ifndef\(',candToken):
532
        a = re.findall(r'\.ifndef\((\w+)\)$',candToken);
533
        if not a:
534
          raise AsmException('Malformed ".ifndef" at %s' % flc_loc);
535
        ifstack.append(not ad.IsSymbol(a[0]));
536
        col += 9+len(a[0]);
537
        continue;
538
      if ifstack and not ifstack[-1]:
539
        col += len(candToken);
540
        continue;
541
      # Determine which kinds of tokens are allowed at this location in the
542
      # directive body.
543
      if not tokens:
544
        selAllowed = 'directive';
545
      else:
546
        selAllowed = allowed;
547
      # Append the parsed token to the list of tokens.
548
      tokens.append(ParseToken(ad,fl_loc,col,candToken,selAllowed));
549
      col += len(candToken);
550
  if ifstack:
551
    raise AsmException('%d unmatched conditionals at line %d' % (len(ifstack),lineNumber,));
552
  return tokens;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.