OpenCores
URL https://opencores.org/ocsvn/open8_urisc/open8_urisc/trunk

Subversion Repositories open8_urisc

[/] [open8_urisc/] [trunk/] [gnu/] [binutils/] [gas/] [app.c] - Blame information for rev 158

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 147 khays
/* This is the Assembler Pre-Processor
2
   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3
   1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009, 2010
4
   Free Software Foundation, Inc.
5
 
6
   This file is part of GAS, the GNU Assembler.
7
 
8
   GAS is free software; you can redistribute it and/or modify
9
   it under the terms of the GNU General Public License as published by
10
   the Free Software Foundation; either version 3, or (at your option)
11
   any later version.
12
 
13
   GAS is distributed in the hope that it will be useful, but WITHOUT
14
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16
   License for more details.
17
 
18
   You should have received a copy of the GNU General Public License
19
   along with GAS; see the file COPYING.  If not, write to the Free
20
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
21
   02110-1301, USA.  */
22
 
23
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
24
/* App, the assembler pre-processor.  This pre-processor strips out
25
   excess spaces, turns single-quoted characters into a decimal
26
   constant, and turns the # in # <number> <filename> <garbage> into a
27
   .linefile.  This needs better error-handling.  */
28
 
29
#include "as.h"
30
 
31
#if (__STDC__ != 1)
32
#ifndef const
33
#define const  /* empty */
34
#endif
35
#endif
36
 
37
#ifdef H_TICK_HEX
38
int enable_h_tick_hex = 0;
39
#endif
40
 
41
#ifdef TC_M68K
42
/* Whether we are scrubbing in m68k MRI mode.  This is different from
43
   flag_m68k_mri, because the two flags will be affected by the .mri
44
   pseudo-op at different times.  */
45
static int scrub_m68k_mri;
46
 
47
/* The pseudo-op which switches in and out of MRI mode.  See the
48
   comment in do_scrub_chars.  */
49
static const char mri_pseudo[] = ".mri 0";
50
#else
51
#define scrub_m68k_mri 0
52
#endif
53
 
54
#if defined TC_ARM && defined OBJ_ELF
55
/* The pseudo-op for which we need to special-case `@' characters.
56
   See the comment in do_scrub_chars.  */
57
static const char   symver_pseudo[] = ".symver";
58
static const char * symver_state;
59
#endif
60
 
61
static char lex[256];
62
static const char symbol_chars[] =
63
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
64
 
65
#define LEX_IS_SYMBOL_COMPONENT         1
66
#define LEX_IS_WHITESPACE               2
67
#define LEX_IS_LINE_SEPARATOR           3
68
#define LEX_IS_COMMENT_START            4
69
#define LEX_IS_LINE_COMMENT_START       5
70
#define LEX_IS_TWOCHAR_COMMENT_1ST      6
71
#define LEX_IS_STRINGQUOTE              8
72
#define LEX_IS_COLON                    9
73
#define LEX_IS_NEWLINE                  10
74
#define LEX_IS_ONECHAR_QUOTE            11
75
#ifdef TC_V850
76
#define LEX_IS_DOUBLEDASH_1ST           12
77
#endif
78
#ifdef TC_M32R
79
#define DOUBLEBAR_PARALLEL
80
#endif
81
#ifdef DOUBLEBAR_PARALLEL
82
#define LEX_IS_DOUBLEBAR_1ST            13
83
#endif
84
#define LEX_IS_PARALLEL_SEPARATOR       14
85
#ifdef H_TICK_HEX
86
#define LEX_IS_H                        15
87
#endif
88
#define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
89
#define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
90
#define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
91
#define IS_PARALLEL_SEPARATOR(c)        (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
92
#define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
93
#define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
94
#define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
95
 
96
static int process_escape (int);
97
 
98
/* FIXME-soon: The entire lexer/parser thingy should be
99
   built statically at compile time rather than dynamically
100
   each and every time the assembler is run.  xoxorich.  */
101
 
102
void
103
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
104
{
105
  const char *p;
106
  int c;
107
 
108
  lex[' '] = LEX_IS_WHITESPACE;
109
  lex['\t'] = LEX_IS_WHITESPACE;
110
  lex['\r'] = LEX_IS_WHITESPACE;
111
  lex['\n'] = LEX_IS_NEWLINE;
112
  lex[':'] = LEX_IS_COLON;
113
 
114
#ifdef TC_M68K
115
  scrub_m68k_mri = m68k_mri;
116
 
117
  if (! m68k_mri)
118
#endif
119
    {
120
      lex['"'] = LEX_IS_STRINGQUOTE;
121
 
122
#if ! defined (TC_HPPA) && ! defined (TC_I370)
123
      /* I370 uses single-quotes to delimit integer, float constants.  */
124
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
125
#endif
126
 
127
#ifdef SINGLE_QUOTE_STRINGS
128
      lex['\''] = LEX_IS_STRINGQUOTE;
129
#endif
130
    }
131
 
132
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133
     in state 5 of do_scrub_chars must be changed.  */
134
 
135
  /* Note that these override the previous defaults, e.g. if ';' is a
136
     comment char, then it isn't a line separator.  */
137
  for (p = symbol_chars; *p; ++p)
138
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
139
 
140
  for (c = 128; c < 256; ++c)
141
    lex[c] = LEX_IS_SYMBOL_COMPONENT;
142
 
143
#ifdef tc_symbol_chars
144
  /* This macro permits the processor to specify all characters which
145
     may appears in an operand.  This will prevent the scrubber from
146
     discarding meaningful whitespace in certain cases.  The i386
147
     backend uses this to support prefixes, which can confuse the
148
     scrubber as to whether it is parsing operands or opcodes.  */
149
  for (p = tc_symbol_chars; *p; ++p)
150
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151
#endif
152
 
153
  /* The m68k backend wants to be able to change comment_chars.  */
154
#ifndef tc_comment_chars
155
#define tc_comment_chars comment_chars
156
#endif
157
  for (p = tc_comment_chars; *p; p++)
158
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
159
 
160
  for (p = line_comment_chars; *p; p++)
161
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
162
 
163
  for (p = line_separator_chars; *p; p++)
164
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
165
 
166
#ifdef tc_parallel_separator_chars
167
  /* This macro permits the processor to specify all characters which
168
     separate parallel insns on the same line.  */
169
  for (p = tc_parallel_separator_chars; *p; p++)
170
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
171
#endif
172
 
173
  /* Only allow slash-star comments if slash is not in use.
174
     FIXME: This isn't right.  We should always permit them.  */
175
  if (lex['/'] == 0)
176
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
177
 
178
#ifdef TC_M68K
179
  if (m68k_mri)
180
    {
181
      lex['\''] = LEX_IS_STRINGQUOTE;
182
      lex[';'] = LEX_IS_COMMENT_START;
183
      lex['*'] = LEX_IS_LINE_COMMENT_START;
184
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
185
         then it can't be used in an expression.  */
186
      lex['!'] = LEX_IS_LINE_COMMENT_START;
187
    }
188
#endif
189
 
190
#ifdef TC_V850
191
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
192
#endif
193
#ifdef DOUBLEBAR_PARALLEL
194
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
195
#endif
196
#ifdef TC_D30V
197
  /* Must do this is we want VLIW instruction with "->" or "<-".  */
198
  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
199
#endif
200
 
201
#ifdef H_TICK_HEX
202
  if (enable_h_tick_hex)
203
    {
204
      lex['h'] = LEX_IS_H;
205
      lex['H'] = LEX_IS_H;
206
    }
207
#endif
208
}
209
 
210
/* Saved state of the scrubber.  */
211
static int state;
212
static int old_state;
213
static char *out_string;
214
static char out_buf[20];
215
static int add_newlines;
216
static char *saved_input;
217
static int saved_input_len;
218
static char input_buffer[32 * 1024];
219
static const char *mri_state;
220
static char mri_last_ch;
221
 
222
/* Data structure for saving the state of app across #include's.  Note that
223
   app is called asynchronously to the parsing of the .include's, so our
224
   state at the time .include is interpreted is completely unrelated.
225
   That's why we have to save it all.  */
226
 
227
struct app_save
228
{
229
  int          state;
230
  int          old_state;
231
  char *       out_string;
232
  char         out_buf[sizeof (out_buf)];
233
  int          add_newlines;
234
  char *       saved_input;
235
  int          saved_input_len;
236
#ifdef TC_M68K
237
  int          scrub_m68k_mri;
238
#endif
239
  const char * mri_state;
240
  char         mri_last_ch;
241
#if defined TC_ARM && defined OBJ_ELF
242
  const char * symver_state;
243
#endif
244
};
245
 
246
char *
247
app_push (void)
248
{
249
  register struct app_save *saved;
250
 
251
  saved = (struct app_save *) xmalloc (sizeof (*saved));
252
  saved->state = state;
253
  saved->old_state = old_state;
254
  saved->out_string = out_string;
255
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
256
  saved->add_newlines = add_newlines;
257
  if (saved_input == NULL)
258
    saved->saved_input = NULL;
259
  else
260
    {
261
      saved->saved_input = (char *) xmalloc (saved_input_len);
262
      memcpy (saved->saved_input, saved_input, saved_input_len);
263
      saved->saved_input_len = saved_input_len;
264
    }
265
#ifdef TC_M68K
266
  saved->scrub_m68k_mri = scrub_m68k_mri;
267
#endif
268
  saved->mri_state = mri_state;
269
  saved->mri_last_ch = mri_last_ch;
270
#if defined TC_ARM && defined OBJ_ELF
271
  saved->symver_state = symver_state;
272
#endif
273
 
274
  /* do_scrub_begin() is not useful, just wastes time.  */
275
 
276
  state = 0;
277
  saved_input = NULL;
278
 
279
  return (char *) saved;
280
}
281
 
282
void
283
app_pop (char *arg)
284
{
285
  register struct app_save *saved = (struct app_save *) arg;
286
 
287
  /* There is no do_scrub_end ().  */
288
  state = saved->state;
289
  old_state = saved->old_state;
290
  out_string = saved->out_string;
291
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
292
  add_newlines = saved->add_newlines;
293
  if (saved->saved_input == NULL)
294
    saved_input = NULL;
295
  else
296
    {
297
      gas_assert (saved->saved_input_len <= (int) (sizeof input_buffer));
298
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
299
      saved_input = input_buffer;
300
      saved_input_len = saved->saved_input_len;
301
      free (saved->saved_input);
302
    }
303
#ifdef TC_M68K
304
  scrub_m68k_mri = saved->scrub_m68k_mri;
305
#endif
306
  mri_state = saved->mri_state;
307
  mri_last_ch = saved->mri_last_ch;
308
#if defined TC_ARM && defined OBJ_ELF
309
  symver_state = saved->symver_state;
310
#endif
311
 
312
  free (arg);
313
}
314
 
315
/* @@ This assumes that \n &c are the same on host and target.  This is not
316
   necessarily true.  */
317
 
318
static int
319
process_escape (int ch)
320
{
321
  switch (ch)
322
    {
323
    case 'b':
324
      return '\b';
325
    case 'f':
326
      return '\f';
327
    case 'n':
328
      return '\n';
329
    case 'r':
330
      return '\r';
331
    case 't':
332
      return '\t';
333
    case '\'':
334
      return '\'';
335
    case '"':
336
      return '\"';
337
    default:
338
      return ch;
339
    }
340
}
341
 
342
/* This function is called to process input characters.  The GET
343
   parameter is used to retrieve more input characters.  GET should
344
   set its parameter to point to a buffer, and return the length of
345
   the buffer; it should return 0 at end of file.  The scrubbed output
346
   characters are put into the buffer starting at TOSTART; the TOSTART
347
   buffer is TOLEN bytes in length.  The function returns the number
348
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
349
   end of file was seen.  This function is arranged as a state
350
   machine, and saves its state so that it may return at any point.
351
   This is the way the old code used to work.  */
352
 
353
int
354
do_scrub_chars (int (*get) (char *, int), char *tostart, int tolen)
355
{
356
  char *to = tostart;
357
  char *toend = tostart + tolen;
358
  char *from;
359
  char *fromend;
360
  int fromlen;
361
  register int ch, ch2 = 0;
362
  /* Character that started the string we're working on.  */
363
  static char quotechar;
364
 
365
  /*State 0: beginning of normal line
366
          1: After first whitespace on line (flush more white)
367
          2: After first non-white (opcode) on line (keep 1white)
368
          3: after second white on line (into operands) (flush white)
369
          4: after putting out a .linefile, put out digits
370
          5: parsing a string, then go to old-state
371
          6: putting out \ escape in a "d string.
372
          7: no longer used
373
          8: no longer used
374
          9: After seeing symbol char in state 3 (keep 1white after symchar)
375
         10: After seeing whitespace in state 9 (keep white before symchar)
376
         11: After seeing a symbol character in state 0 (eg a label definition)
377
         -1: output string in out_string and go to the state in old_state
378
         -2: flush text until a '*' '/' is seen, then go to state old_state
379
#ifdef TC_V850
380
         12: After seeing a dash, looking for a second dash as a start
381
             of comment.
382
#endif
383
#ifdef DOUBLEBAR_PARALLEL
384
         13: After seeing a vertical bar, looking for a second
385
             vertical bar as a parallel expression separator.
386
#endif
387
#ifdef TC_PREDICATE_START_CHAR
388
         14: After seeing a predicate start character at state 0, looking
389
             for a predicate end character as predicate.
390
         15: After seeing a predicate start character at state 1, looking
391
             for a predicate end character as predicate.
392
#endif
393
#ifdef TC_Z80
394
         16: After seeing an 'a' or an 'A' at the start of a symbol
395
         17: After seeing an 'f' or an 'F' in state 16
396
#endif
397
          */
398
 
399
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
400
     constructs like ``.loc 1 20''.  This was turning into ``.loc
401
     120''.  States 9 and 10 ensure that a space is never dropped in
402
     between characters which could appear in an identifier.  Ian
403
     Taylor, ian@cygnus.com.
404
 
405
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
406
     correctly on the PA (and any other target where colons are optional).
407
     Jeff Law, law@cs.utah.edu.
408
 
409
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
410
     get squashed into "cmp r1,r2||trap#1", with the all important space
411
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
412
 
413
  /* This macro gets the next input character.  */
414
 
415
#define GET()                                                   \
416
  (from < fromend                                               \
417
   ? * (unsigned char *) (from++)                               \
418
   : (saved_input = NULL,                                       \
419
      fromlen = (*get) (input_buffer, sizeof input_buffer),     \
420
      from = input_buffer,                                      \
421
      fromend = from + fromlen,                                 \
422
      (fromlen == 0                                              \
423
       ? EOF                                                    \
424
       : * (unsigned char *) (from++))))
425
 
426
  /* This macro pushes a character back on the input stream.  */
427
 
428
#define UNGET(uch) (*--from = (uch))
429
 
430
  /* This macro puts a character into the output buffer.  If this
431
     character fills the output buffer, this macro jumps to the label
432
     TOFULL.  We use this rather ugly approach because we need to
433
     handle two different termination conditions: EOF on the input
434
     stream, and a full output buffer.  It would be simpler if we
435
     always read in the entire input stream before processing it, but
436
     I don't want to make such a significant change to the assembler's
437
     memory usage.  */
438
 
439
#define PUT(pch)                                \
440
  do                                            \
441
    {                                           \
442
      *to++ = (pch);                            \
443
      if (to >= toend)                          \
444
        goto tofull;                            \
445
    }                                           \
446
  while (0)
447
 
448
  if (saved_input != NULL)
449
    {
450
      from = saved_input;
451
      fromend = from + saved_input_len;
452
    }
453
  else
454
    {
455
      fromlen = (*get) (input_buffer, sizeof input_buffer);
456
      if (fromlen == 0)
457
        return 0;
458
      from = input_buffer;
459
      fromend = from + fromlen;
460
    }
461
 
462
  while (1)
463
    {
464
      /* The cases in this switch end with continue, in order to
465
         branch back to the top of this while loop and generate the
466
         next output character in the appropriate state.  */
467
      switch (state)
468
        {
469
        case -1:
470
          ch = *out_string++;
471
          if (*out_string == '\0')
472
            {
473
              state = old_state;
474
              old_state = 3;
475
            }
476
          PUT (ch);
477
          continue;
478
 
479
        case -2:
480
          for (;;)
481
            {
482
              do
483
                {
484
                  ch = GET ();
485
 
486
                  if (ch == EOF)
487
                    {
488
                      as_warn (_("end of file in comment"));
489
                      goto fromeof;
490
                    }
491
 
492
                  if (ch == '\n')
493
                    PUT ('\n');
494
                }
495
              while (ch != '*');
496
 
497
              while ((ch = GET ()) == '*')
498
                ;
499
 
500
              if (ch == EOF)
501
                {
502
                  as_warn (_("end of file in comment"));
503
                  goto fromeof;
504
                }
505
 
506
              if (ch == '/')
507
                break;
508
 
509
              UNGET (ch);
510
            }
511
 
512
          state = old_state;
513
          UNGET (' ');
514
          continue;
515
 
516
        case 4:
517
          ch = GET ();
518
          if (ch == EOF)
519
            goto fromeof;
520
          else if (ch >= '0' && ch <= '9')
521
            PUT (ch);
522
          else
523
            {
524
              while (ch != EOF && IS_WHITESPACE (ch))
525
                ch = GET ();
526
              if (ch == '"')
527
                {
528
                  quotechar = ch;
529
                  state = 5;
530
                  old_state = 3;
531
                  PUT (ch);
532
                }
533
              else
534
                {
535
                  while (ch != EOF && ch != '\n')
536
                    ch = GET ();
537
                  state = 0;
538
                  PUT (ch);
539
                }
540
            }
541
          continue;
542
 
543
        case 5:
544
          /* We are going to copy everything up to a quote character,
545
             with special handling for a backslash.  We try to
546
             optimize the copying in the simple case without using the
547
             GET and PUT macros.  */
548
          {
549
            char *s;
550
            int len;
551
 
552
            for (s = from; s < fromend; s++)
553
              {
554
                ch = *s;
555
                if (ch == '\\'
556
                    || ch == quotechar
557
                    || ch == '\n')
558
                  break;
559
              }
560
            len = s - from;
561
            if (len > toend - to)
562
              len = toend - to;
563
            if (len > 0)
564
              {
565
                memcpy (to, from, len);
566
                to += len;
567
                from += len;
568
                if (to >= toend)
569
                  goto tofull;
570
              }
571
          }
572
 
573
          ch = GET ();
574
          if (ch == EOF)
575
            {
576
              /* This buffer is here specifically so
577
                 that the UNGET below will work.  */
578
              static char one_char_buf[1];
579
 
580
              as_warn (_("end of file in string; '%c' inserted"), quotechar);
581
              state = old_state;
582
              from = fromend = one_char_buf + 1;
583
              fromlen = 1;
584
              UNGET ('\n');
585
              PUT (quotechar);
586
            }
587
          else if (ch == quotechar)
588
            {
589
              state = old_state;
590
              PUT (ch);
591
            }
592
#ifndef NO_STRING_ESCAPES
593
          else if (ch == '\\')
594
            {
595
              state = 6;
596
              PUT (ch);
597
            }
598
#endif
599
          else if (scrub_m68k_mri && ch == '\n')
600
            {
601
              /* Just quietly terminate the string.  This permits lines like
602
                   bne  label   loop if we haven't reach end yet.  */
603
              state = old_state;
604
              UNGET (ch);
605
              PUT ('\'');
606
            }
607
          else
608
            {
609
              PUT (ch);
610
            }
611
          continue;
612
 
613
        case 6:
614
          state = 5;
615
          ch = GET ();
616
          switch (ch)
617
            {
618
              /* Handle strings broken across lines, by turning '\n' into
619
                 '\\' and 'n'.  */
620
            case '\n':
621
              UNGET ('n');
622
              add_newlines++;
623
              PUT ('\\');
624
              continue;
625
 
626
            case EOF:
627
              as_warn (_("end of file in string; '%c' inserted"), quotechar);
628
              PUT (quotechar);
629
              continue;
630
 
631
            case '"':
632
            case '\\':
633
            case 'b':
634
            case 'f':
635
            case 'n':
636
            case 'r':
637
            case 't':
638
            case 'v':
639
            case 'x':
640
            case 'X':
641
            case '0':
642
            case '1':
643
            case '2':
644
            case '3':
645
            case '4':
646
            case '5':
647
            case '6':
648
            case '7':
649
              break;
650
 
651
            default:
652
#ifdef ONLY_STANDARD_ESCAPES
653
              as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
654
#endif
655
              break;
656
            }
657
          PUT (ch);
658
          continue;
659
 
660
#ifdef DOUBLEBAR_PARALLEL
661
        case 13:
662
          ch = GET ();
663
          if (ch != '|')
664
            abort ();
665
 
666
          /* Reset back to state 1 and pretend that we are parsing a
667
             line from just after the first white space.  */
668
          state = 1;
669
          PUT ('|');
670
#ifdef TC_TIC6X
671
          /* "||^" is used for SPMASKed instructions.  */
672
          ch = GET ();
673
          if (ch == EOF)
674
            goto fromeof;
675
          else if (ch == '^')
676
            PUT ('^');
677
          else
678
            UNGET (ch);
679
#endif
680
          continue;
681
#endif
682
#ifdef TC_Z80
683
        case 16:
684
          /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
685
          ch = GET ();
686
          if (ch == 'f' || ch == 'F')
687
            {
688
              state = 17;
689
              PUT (ch);
690
            }
691
          else
692
            {
693
              state = 9;
694
              break;
695
            }
696
        case 17:
697
          /* We have seen "af" at the start of a symbol,
698
             a ' here is a part of that symbol.  */
699
          ch = GET ();
700
          state = 9;
701
          if (ch == '\'')
702
            /* Change to avoid warning about unclosed string.  */
703
            PUT ('`');
704
          else if (ch != EOF)
705
            UNGET (ch);
706
          break;
707
#endif
708
        }
709
 
710
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
711
 
712
      /* flushchar: */
713
      ch = GET ();
714
 
715
#ifdef TC_PREDICATE_START_CHAR
716
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
717
        {
718
          state += 14;
719
          PUT (ch);
720
          continue;
721
        }
722
      else if (state == 14 || state == 15)
723
        {
724
          if (ch == TC_PREDICATE_END_CHAR)
725
            {
726
              state -= 14;
727
              PUT (ch);
728
              ch = GET ();
729
            }
730
          else
731
            {
732
              PUT (ch);
733
              continue;
734
            }
735
        }
736
#endif
737
 
738
    recycle:
739
 
740
#if defined TC_ARM && defined OBJ_ELF
741
      /* We need to watch out for .symver directives.  See the comment later
742
         in this function.  */
743
      if (symver_state == NULL)
744
        {
745
          if ((state == 0 || state == 1) && ch == symver_pseudo[0])
746
            symver_state = symver_pseudo + 1;
747
        }
748
      else
749
        {
750
          /* We advance to the next state if we find the right
751
             character.  */
752
          if (ch != '\0' && (*symver_state == ch))
753
            ++symver_state;
754
          else if (*symver_state != '\0')
755
            /* We did not get the expected character, or we didn't
756
               get a valid terminating character after seeing the
757
               entire pseudo-op, so we must go back to the beginning.  */
758
            symver_state = NULL;
759
          else
760
            {
761
              /* We've read the entire pseudo-op.  If this is the end
762
                 of the line, go back to the beginning.  */
763
              if (IS_NEWLINE (ch))
764
                symver_state = NULL;
765
            }
766
        }
767
#endif /* TC_ARM && OBJ_ELF */
768
 
769
#ifdef TC_M68K
770
      /* We want to have pseudo-ops which control whether we are in
771
         MRI mode or not.  Unfortunately, since m68k MRI mode affects
772
         the scrubber, that means that we need a special purpose
773
         recognizer here.  */
774
      if (mri_state == NULL)
775
        {
776
          if ((state == 0 || state == 1)
777
              && ch == mri_pseudo[0])
778
            mri_state = mri_pseudo + 1;
779
        }
780
      else
781
        {
782
          /* We advance to the next state if we find the right
783
             character, or if we need a space character and we get any
784
             whitespace character, or if we need a '0' and we get a
785
             '1' (this is so that we only need one state to handle
786
             ``.mri 0'' and ``.mri 1'').  */
787
          if (ch != '\0'
788
              && (*mri_state == ch
789
                  || (*mri_state == ' '
790
                      && lex[ch] == LEX_IS_WHITESPACE)
791
                  || (*mri_state == '0'
792
                      && ch == '1')))
793
            {
794
              mri_last_ch = ch;
795
              ++mri_state;
796
            }
797
          else if (*mri_state != '\0'
798
                   || (lex[ch] != LEX_IS_WHITESPACE
799
                       && lex[ch] != LEX_IS_NEWLINE))
800
            {
801
              /* We did not get the expected character, or we didn't
802
                 get a valid terminating character after seeing the
803
                 entire pseudo-op, so we must go back to the
804
                 beginning.  */
805
              mri_state = NULL;
806
            }
807
          else
808
            {
809
              /* We've read the entire pseudo-op.  mips_last_ch is
810
                 either '0' or '1' indicating whether to enter or
811
                 leave MRI mode.  */
812
              do_scrub_begin (mri_last_ch == '1');
813
              mri_state = NULL;
814
 
815
              /* We continue handling the character as usual.  The
816
                 main gas reader must also handle the .mri pseudo-op
817
                 to control expression parsing and the like.  */
818
            }
819
        }
820
#endif
821
 
822
      if (ch == EOF)
823
        {
824
          if (state != 0)
825
            {
826
              as_warn (_("end of file not at end of a line; newline inserted"));
827
              state = 0;
828
              PUT ('\n');
829
            }
830
          goto fromeof;
831
        }
832
 
833
      switch (lex[ch])
834
        {
835
        case LEX_IS_WHITESPACE:
836
          do
837
            {
838
              ch = GET ();
839
            }
840
          while (ch != EOF && IS_WHITESPACE (ch));
841
          if (ch == EOF)
842
            goto fromeof;
843
 
844
          if (state == 0)
845
            {
846
              /* Preserve a single whitespace character at the
847
                 beginning of a line.  */
848
              state = 1;
849
              UNGET (ch);
850
              PUT (' ');
851
              break;
852
            }
853
 
854
#ifdef KEEP_WHITE_AROUND_COLON
855
          if (lex[ch] == LEX_IS_COLON)
856
            {
857
              /* Only keep this white if there's no white *after* the
858
                 colon.  */
859
              ch2 = GET ();
860
              if (ch2 != EOF)
861
                UNGET (ch2);
862
              if (!IS_WHITESPACE (ch2))
863
                {
864
                  state = 9;
865
                  UNGET (ch);
866
                  PUT (' ');
867
                  break;
868
                }
869
            }
870
#endif
871
          if (IS_COMMENT (ch)
872
              || ch == '/'
873
              || IS_LINE_SEPARATOR (ch)
874
              || IS_PARALLEL_SEPARATOR (ch))
875
            {
876
              if (scrub_m68k_mri)
877
                {
878
                  /* In MRI mode, we keep these spaces.  */
879
                  UNGET (ch);
880
                  PUT (' ');
881
                  break;
882
                }
883
              goto recycle;
884
            }
885
 
886
          /* If we're in state 2 or 11, we've seen a non-white
887
             character followed by whitespace.  If the next character
888
             is ':', this is whitespace after a label name which we
889
             normally must ignore.  In MRI mode, though, spaces are
890
             not permitted between the label and the colon.  */
891
          if ((state == 2 || state == 11)
892
              && lex[ch] == LEX_IS_COLON
893
              && ! scrub_m68k_mri)
894
            {
895
              state = 1;
896
              PUT (ch);
897
              break;
898
            }
899
 
900
          switch (state)
901
            {
902
            case 1:
903
              /* We can arrive here if we leave a leading whitespace
904
                 character at the beginning of a line.  */
905
              goto recycle;
906
            case 2:
907
              state = 3;
908
              if (to + 1 < toend)
909
                {
910
                  /* Optimize common case by skipping UNGET/GET.  */
911
                  PUT (' ');    /* Sp after opco */
912
                  goto recycle;
913
                }
914
              UNGET (ch);
915
              PUT (' ');
916
              break;
917
            case 3:
918
#ifndef TC_KEEP_OPERAND_SPACES
919
              /* For TI C6X, we keep these spaces as they may separate
920
                 functional unit specifiers from operands.  */
921
              if (scrub_m68k_mri)
922
#endif
923
                {
924
                  /* In MRI mode, we keep these spaces.  */
925
                  UNGET (ch);
926
                  PUT (' ');
927
                  break;
928
                }
929
              goto recycle;     /* Sp in operands */
930
            case 9:
931
            case 10:
932
#ifndef TC_KEEP_OPERAND_SPACES
933
              if (scrub_m68k_mri)
934
#endif
935
                {
936
                  /* In MRI mode, we keep these spaces.  */
937
                  state = 3;
938
                  UNGET (ch);
939
                  PUT (' ');
940
                  break;
941
                }
942
              state = 10;       /* Sp after symbol char */
943
              goto recycle;
944
            case 11:
945
              if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
946
                state = 1;
947
              else
948
                {
949
                  /* We know that ch is not ':', since we tested that
950
                     case above.  Therefore this is not a label, so it
951
                     must be the opcode, and we've just seen the
952
                     whitespace after it.  */
953
                  state = 3;
954
                }
955
              UNGET (ch);
956
              PUT (' ');        /* Sp after label definition.  */
957
              break;
958
            default:
959
              BAD_CASE (state);
960
            }
961
          break;
962
 
963
        case LEX_IS_TWOCHAR_COMMENT_1ST:
964
          ch2 = GET ();
965
          if (ch2 == '*')
966
            {
967
              for (;;)
968
                {
969
                  do
970
                    {
971
                      ch2 = GET ();
972
                      if (ch2 != EOF && IS_NEWLINE (ch2))
973
                        add_newlines++;
974
                    }
975
                  while (ch2 != EOF && ch2 != '*');
976
 
977
                  while (ch2 == '*')
978
                    ch2 = GET ();
979
 
980
                  if (ch2 == EOF || ch2 == '/')
981
                    break;
982
 
983
                  /* This UNGET will ensure that we count newlines
984
                     correctly.  */
985
                  UNGET (ch2);
986
                }
987
 
988
              if (ch2 == EOF)
989
                as_warn (_("end of file in multiline comment"));
990
 
991
              ch = ' ';
992
              goto recycle;
993
            }
994
#ifdef DOUBLESLASH_LINE_COMMENTS
995
          else if (ch2 == '/')
996
            {
997
              do
998
                {
999
                  ch = GET ();
1000
                }
1001
              while (ch != EOF && !IS_NEWLINE (ch));
1002
              if (ch == EOF)
1003
                as_warn ("end of file in comment; newline inserted");
1004
              state = 0;
1005
              PUT ('\n');
1006
              break;
1007
            }
1008
#endif
1009
          else
1010
            {
1011
              if (ch2 != EOF)
1012
                UNGET (ch2);
1013
              if (state == 9 || state == 10)
1014
                state = 3;
1015
              PUT (ch);
1016
            }
1017
          break;
1018
 
1019
        case LEX_IS_STRINGQUOTE:
1020
          quotechar = ch;
1021
          if (state == 10)
1022
            {
1023
              /* Preserve the whitespace in foo "bar".  */
1024
              UNGET (ch);
1025
              state = 3;
1026
              PUT (' ');
1027
 
1028
              /* PUT didn't jump out.  We could just break, but we
1029
                 know what will happen, so optimize a bit.  */
1030
              ch = GET ();
1031
              old_state = 3;
1032
            }
1033
          else if (state == 9)
1034
            old_state = 3;
1035
          else
1036
            old_state = state;
1037
          state = 5;
1038
          PUT (ch);
1039
          break;
1040
 
1041
#ifndef IEEE_STYLE
1042
        case LEX_IS_ONECHAR_QUOTE:
1043
#ifdef H_TICK_HEX
1044
          if (state == 9 && enable_h_tick_hex)
1045
            {
1046
              char c;
1047
 
1048
              c = GET ();
1049
              as_warn ("'%c found after symbol", c);
1050
              UNGET (c);
1051
            }
1052
#endif
1053
          if (state == 10)
1054
            {
1055
              /* Preserve the whitespace in foo 'b'.  */
1056
              UNGET (ch);
1057
              state = 3;
1058
              PUT (' ');
1059
              break;
1060
            }
1061
          ch = GET ();
1062
          if (ch == EOF)
1063
            {
1064
              as_warn (_("end of file after a one-character quote; \\0 inserted"));
1065
              ch = 0;
1066
            }
1067
          if (ch == '\\')
1068
            {
1069
              ch = GET ();
1070
              if (ch == EOF)
1071
                {
1072
                  as_warn (_("end of file in escape character"));
1073
                  ch = '\\';
1074
                }
1075
              else
1076
                ch = process_escape (ch);
1077
            }
1078
          sprintf (out_buf, "%d", (int) (unsigned char) ch);
1079
 
1080
          /* None of these 'x constants for us.  We want 'x'.  */
1081
          if ((ch = GET ()) != '\'')
1082
            {
1083
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1084
              as_warn (_("missing close quote; (assumed)"));
1085
#else
1086
              if (ch != EOF)
1087
                UNGET (ch);
1088
#endif
1089
            }
1090
          if (strlen (out_buf) == 1)
1091
            {
1092
              PUT (out_buf[0]);
1093
              break;
1094
            }
1095
          if (state == 9)
1096
            old_state = 3;
1097
          else
1098
            old_state = state;
1099
          state = -1;
1100
          out_string = out_buf;
1101
          PUT (*out_string++);
1102
          break;
1103
#endif
1104
 
1105
        case LEX_IS_COLON:
1106
#ifdef KEEP_WHITE_AROUND_COLON
1107
          state = 9;
1108
#else
1109
          if (state == 9 || state == 10)
1110
            state = 3;
1111
          else if (state != 3)
1112
            state = 1;
1113
#endif
1114
          PUT (ch);
1115
          break;
1116
 
1117
        case LEX_IS_NEWLINE:
1118
          /* Roll out a bunch of newlines from inside comments, etc.  */
1119
          if (add_newlines)
1120
            {
1121
              --add_newlines;
1122
              UNGET (ch);
1123
            }
1124
          /* Fall through.  */
1125
 
1126
        case LEX_IS_LINE_SEPARATOR:
1127
          state = 0;
1128
          PUT (ch);
1129
          break;
1130
 
1131
        case LEX_IS_PARALLEL_SEPARATOR:
1132
          state = 1;
1133
          PUT (ch);
1134
          break;
1135
 
1136
#ifdef TC_V850
1137
        case LEX_IS_DOUBLEDASH_1ST:
1138
          ch2 = GET ();
1139
          if (ch2 != '-')
1140
            {
1141
              if (ch2 != EOF)
1142
                UNGET (ch2);
1143
              goto de_fault;
1144
            }
1145
          /* Read and skip to end of line.  */
1146
          do
1147
            {
1148
              ch = GET ();
1149
            }
1150
          while (ch != EOF && ch != '\n');
1151
 
1152
          if (ch == EOF)
1153
            as_warn (_("end of file in comment; newline inserted"));
1154
 
1155
          state = 0;
1156
          PUT ('\n');
1157
          break;
1158
#endif
1159
#ifdef DOUBLEBAR_PARALLEL
1160
        case LEX_IS_DOUBLEBAR_1ST:
1161
          ch2 = GET ();
1162
          if (ch2 != EOF)
1163
            UNGET (ch2);
1164
          if (ch2 != '|')
1165
            goto de_fault;
1166
 
1167
          /* Handle '||' in two states as invoking PUT twice might
1168
             result in the first one jumping out of this loop.  We'd
1169
             then lose track of the state and one '|' char.  */
1170
          state = 13;
1171
          PUT ('|');
1172
          break;
1173
#endif
1174
        case LEX_IS_LINE_COMMENT_START:
1175
          /* FIXME-someday: The two character comment stuff was badly
1176
             thought out.  On i386, we want '/' as line comment start
1177
             AND we want C style comments.  hence this hack.  The
1178
             whole lexical process should be reworked.  xoxorich.  */
1179
          if (ch == '/')
1180
            {
1181
              ch2 = GET ();
1182
              if (ch2 == '*')
1183
                {
1184
                  old_state = 3;
1185
                  state = -2;
1186
                  break;
1187
                }
1188
              else
1189
                {
1190
                  UNGET (ch2);
1191
                }
1192
            }
1193
 
1194
          if (state == 0 || state == 1)  /* Only comment at start of line.  */
1195
            {
1196
              int startch;
1197
 
1198
              startch = ch;
1199
 
1200
              do
1201
                {
1202
                  ch = GET ();
1203
                }
1204
              while (ch != EOF && IS_WHITESPACE (ch));
1205
 
1206
              if (ch == EOF)
1207
                {
1208
                  as_warn (_("end of file in comment; newline inserted"));
1209
                  PUT ('\n');
1210
                  break;
1211
                }
1212
 
1213
              if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1214
                {
1215
                  /* Not a cpp line.  */
1216
                  while (ch != EOF && !IS_NEWLINE (ch))
1217
                    ch = GET ();
1218
                  if (ch == EOF)
1219
                    as_warn (_("end of file in comment; newline inserted"));
1220
                  state = 0;
1221
                  PUT ('\n');
1222
                  break;
1223
                }
1224
              /* Looks like `# 123 "filename"' from cpp.  */
1225
              UNGET (ch);
1226
              old_state = 4;
1227
              state = -1;
1228
              if (scrub_m68k_mri)
1229
                out_string = "\tlinefile ";
1230
              else
1231
                out_string = "\t.linefile ";
1232
              PUT (*out_string++);
1233
              break;
1234
            }
1235
 
1236
#ifdef TC_D10V
1237
          /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1238
             Trap is the only short insn that has a first operand that is
1239
             neither register nor label.
1240
             We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1241
             We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1242
             already LEX_IS_LINE_COMMENT_START.  However, it is the
1243
             only character in line_comment_chars for d10v, hence we
1244
             can recognize it as such.  */
1245
          /* An alternative approach would be to reset the state to 1 when
1246
             we see '||', '<'- or '->', but that seems to be overkill.  */
1247
          if (state == 10)
1248
            PUT (' ');
1249
#endif
1250
          /* We have a line comment character which is not at the
1251
             start of a line.  If this is also a normal comment
1252
             character, fall through.  Otherwise treat it as a default
1253
             character.  */
1254
          if (strchr (tc_comment_chars, ch) == NULL
1255
              && (! scrub_m68k_mri
1256
                  || (ch != '!' && ch != '*')))
1257
            goto de_fault;
1258
          if (scrub_m68k_mri
1259
              && (ch == '!' || ch == '*' || ch == '#')
1260
              && state != 1
1261
              && state != 10)
1262
            goto de_fault;
1263
          /* Fall through.  */
1264
        case LEX_IS_COMMENT_START:
1265
#if defined TC_ARM && defined OBJ_ELF
1266
          /* On the ARM, `@' is the comment character.
1267
             Unfortunately this is also a special character in ELF .symver
1268
             directives (and .type, though we deal with those another way).
1269
             So we check if this line is such a directive, and treat
1270
             the character as default if so.  This is a hack.  */
1271
          if ((symver_state != NULL) && (*symver_state == 0))
1272
            goto de_fault;
1273
#endif
1274
 
1275
#ifdef TC_ARM
1276
          /* For the ARM, care is needed not to damage occurrences of \@
1277
             by stripping the @ onwards.  Yuck.  */
1278
          if (to > tostart && *(to - 1) == '\\')
1279
            /* Do not treat the @ as a start-of-comment.  */
1280
            goto de_fault;
1281
#endif
1282
 
1283
#ifdef WARN_COMMENTS
1284
          if (!found_comment)
1285
            as_where (&found_comment_file, &found_comment);
1286
#endif
1287
          do
1288
            {
1289
              ch = GET ();
1290
            }
1291
          while (ch != EOF && !IS_NEWLINE (ch));
1292
          if (ch == EOF)
1293
            as_warn (_("end of file in comment; newline inserted"));
1294
          state = 0;
1295
          PUT ('\n');
1296
          break;
1297
 
1298
#ifdef H_TICK_HEX
1299
        case LEX_IS_H:
1300
          /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1301
             the H' with 0x to make them gas-style hex characters.  */
1302
          if (enable_h_tick_hex)
1303
            {
1304
              char quot;
1305
 
1306
              quot = GET ();
1307
              if (quot == '\'')
1308
                {
1309
                  UNGET ('x');
1310
                  ch = '0';
1311
                }
1312
              else
1313
                UNGET (quot);
1314
            }
1315
          /* FALL THROUGH */
1316
#endif
1317
 
1318
        case LEX_IS_SYMBOL_COMPONENT:
1319
          if (state == 10)
1320
            {
1321
              /* This is a symbol character following another symbol
1322
                 character, with whitespace in between.  We skipped
1323
                 the whitespace earlier, so output it now.  */
1324
              UNGET (ch);
1325
              state = 3;
1326
              PUT (' ');
1327
              break;
1328
            }
1329
 
1330
#ifdef TC_Z80
1331
          /* "af'" is a symbol containing '\''.  */
1332
          if (state == 3 && (ch == 'a' || ch == 'A'))
1333
            {
1334
              state = 16;
1335
              PUT (ch);
1336
              ch = GET ();
1337
              if (ch == 'f' || ch == 'F')
1338
                {
1339
                  state = 17;
1340
                  PUT (ch);
1341
                  break;
1342
                }
1343
              else
1344
                {
1345
                  state = 9;
1346
                  if (!IS_SYMBOL_COMPONENT (ch))
1347
                    {
1348
                      if (ch != EOF)
1349
                        UNGET (ch);
1350
                      break;
1351
                    }
1352
                }
1353
            }
1354
#endif
1355
          if (state == 3)
1356
            state = 9;
1357
 
1358
          /* This is a common case.  Quickly copy CH and all the
1359
             following symbol component or normal characters.  */
1360
          if (to + 1 < toend
1361
              && mri_state == NULL
1362
#if defined TC_ARM && defined OBJ_ELF
1363
              && symver_state == NULL
1364
#endif
1365
              )
1366
            {
1367
              char *s;
1368
              int len;
1369
 
1370
              for (s = from; s < fromend; s++)
1371
                {
1372
                  int type;
1373
 
1374
                  ch2 = *(unsigned char *) s;
1375
                  type = lex[ch2];
1376
                  if (type != 0
1377
                      && type != LEX_IS_SYMBOL_COMPONENT)
1378
                    break;
1379
                }
1380
 
1381
              if (s > from)
1382
                /* Handle the last character normally, for
1383
                   simplicity.  */
1384
                --s;
1385
 
1386
              len = s - from;
1387
 
1388
              if (len > (toend - to) - 1)
1389
                len = (toend - to) - 1;
1390
 
1391
              if (len > 0)
1392
                {
1393
                  PUT (ch);
1394
                  memcpy (to, from, len);
1395
                  to += len;
1396
                  from += len;
1397
                  if (to >= toend)
1398
                    goto tofull;
1399
                  ch = GET ();
1400
                }
1401
            }
1402
 
1403
          /* Fall through.  */
1404
        default:
1405
        de_fault:
1406
          /* Some relatively `normal' character.  */
1407
          if (state == 0)
1408
            {
1409
              state = 11;       /* Now seeing label definition.  */
1410
            }
1411
          else if (state == 1)
1412
            {
1413
              state = 2;        /* Ditto.  */
1414
            }
1415
          else if (state == 9)
1416
            {
1417
              if (!IS_SYMBOL_COMPONENT (ch))
1418
                state = 3;
1419
            }
1420
          else if (state == 10)
1421
            {
1422
              if (ch == '\\')
1423
                {
1424
                  /* Special handling for backslash: a backslash may
1425
                     be the beginning of a formal parameter (of a
1426
                     macro) following another symbol character, with
1427
                     whitespace in between.  If that is the case, we
1428
                     output a space before the parameter.  Strictly
1429
                     speaking, correct handling depends upon what the
1430
                     macro parameter expands into; if the parameter
1431
                     expands into something which does not start with
1432
                     an operand character, then we don't want to keep
1433
                     the space.  We don't have enough information to
1434
                     make the right choice, so here we are making the
1435
                     choice which is more likely to be correct.  */
1436
                  if (to + 1 >= toend)
1437
                    {
1438
                      /* If we're near the end of the buffer, save the
1439
                         character for the next time round.  Otherwise
1440
                         we'll lose our state.  */
1441
                      UNGET (ch);
1442
                      goto tofull;
1443
                    }
1444
                  *to++ = ' ';
1445
                }
1446
 
1447
              state = 3;
1448
            }
1449
          PUT (ch);
1450
          break;
1451
        }
1452
    }
1453
 
1454
  /*NOTREACHED*/
1455
 
1456
 fromeof:
1457
  /* We have reached the end of the input.  */
1458
  return to - tostart;
1459
 
1460
 tofull:
1461
  /* The output buffer is full.  Save any input we have not yet
1462
     processed.  */
1463
  if (fromend > from)
1464
    {
1465
      saved_input = from;
1466
      saved_input_len = fromend - from;
1467
    }
1468
  else
1469
    saved_input = NULL;
1470
 
1471
  return to - tostart;
1472
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.