OpenCores
URL https://opencores.org/ocsvn/open8_urisc/open8_urisc/trunk

Subversion Repositories open8_urisc

[/] [open8_urisc/] [trunk/] [gnu/] [binutils/] [gas/] [app.c] - Blame information for rev 257

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 147 khays
/* This is the Assembler Pre-Processor
2
   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3
   1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009, 2010
4
   Free Software Foundation, Inc.
5
 
6
   This file is part of GAS, the GNU Assembler.
7
 
8
   GAS is free software; you can redistribute it and/or modify
9
   it under the terms of the GNU General Public License as published by
10
   the Free Software Foundation; either version 3, or (at your option)
11
   any later version.
12
 
13
   GAS is distributed in the hope that it will be useful, but WITHOUT
14
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16
   License for more details.
17
 
18
   You should have received a copy of the GNU General Public License
19
   along with GAS; see the file COPYING.  If not, write to the Free
20
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
21
   02110-1301, USA.  */
22
 
23
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
24
/* App, the assembler pre-processor.  This pre-processor strips out
25
   excess spaces, turns single-quoted characters into a decimal
26
   constant, and turns the # in # <number> <filename> <garbage> into a
27
   .linefile.  This needs better error-handling.  */
28
 
29
#include "as.h"
30
 
31
#if (__STDC__ != 1)
32
#ifndef const
33
#define const  /* empty */
34
#endif
35
#endif
36
 
37
#ifdef H_TICK_HEX
38
int enable_h_tick_hex = 0;
39
#endif
40
 
41
#ifdef TC_M68K
42
/* Whether we are scrubbing in m68k MRI mode.  This is different from
43
   flag_m68k_mri, because the two flags will be affected by the .mri
44
   pseudo-op at different times.  */
45
static int scrub_m68k_mri;
46
 
47
/* The pseudo-op which switches in and out of MRI mode.  See the
48
   comment in do_scrub_chars.  */
49
static const char mri_pseudo[] = ".mri 0";
50
#else
51
#define scrub_m68k_mri 0
52
#endif
53
 
54
#if defined TC_ARM && defined OBJ_ELF
55
/* The pseudo-op for which we need to special-case `@' characters.
56
   See the comment in do_scrub_chars.  */
57
static const char   symver_pseudo[] = ".symver";
58
static const char * symver_state;
59
#endif
60
 
61
static char lex[256];
62
static const char symbol_chars[] =
63
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
64
 
65
#define LEX_IS_SYMBOL_COMPONENT         1
66
#define LEX_IS_WHITESPACE               2
67
#define LEX_IS_LINE_SEPARATOR           3
68
#define LEX_IS_COMMENT_START            4
69
#define LEX_IS_LINE_COMMENT_START       5
70
#define LEX_IS_TWOCHAR_COMMENT_1ST      6
71
#define LEX_IS_STRINGQUOTE              8
72
#define LEX_IS_COLON                    9
73
#define LEX_IS_NEWLINE                  10
74
#define LEX_IS_ONECHAR_QUOTE            11
75
#ifdef TC_V850
76
#define LEX_IS_DOUBLEDASH_1ST           12
77
#endif
78
#ifdef TC_M32R
79
#define DOUBLEBAR_PARALLEL
80
#endif
81
#ifdef DOUBLEBAR_PARALLEL
82
#define LEX_IS_DOUBLEBAR_1ST            13
83
#endif
84
#define LEX_IS_PARALLEL_SEPARATOR       14
85
#ifdef H_TICK_HEX
86
#define LEX_IS_H                        15
87
#endif
88
#define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
89
#define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
90
#define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
91
#define IS_PARALLEL_SEPARATOR(c)        (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
92
#define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
93
#define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
94
#define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
95
 
96
static int process_escape (int);
97
 
98
/* FIXME-soon: The entire lexer/parser thingy should be
99
   built statically at compile time rather than dynamically
100
   each and every time the assembler is run.  xoxorich.  */
101
 
102
void
103
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
104
{
105
  const char *p;
106
  int c;
107
 
108
  lex[' '] = LEX_IS_WHITESPACE;
109
  lex['\t'] = LEX_IS_WHITESPACE;
110
  lex['\r'] = LEX_IS_WHITESPACE;
111
  lex['\n'] = LEX_IS_NEWLINE;
112
  lex[':'] = LEX_IS_COLON;
113
 
114
#ifdef TC_M68K
115
  scrub_m68k_mri = m68k_mri;
116
 
117
  if (! m68k_mri)
118
#endif
119
    {
120
      lex['"'] = LEX_IS_STRINGQUOTE;
121
 
122
#if ! defined (TC_HPPA) && ! defined (TC_I370)
123
      /* I370 uses single-quotes to delimit integer, float constants.  */
124
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
125
#endif
126
 
127
#ifdef SINGLE_QUOTE_STRINGS
128
      lex['\''] = LEX_IS_STRINGQUOTE;
129
#endif
130
    }
131
 
132
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133
     in state 5 of do_scrub_chars must be changed.  */
134
 
135
  /* Note that these override the previous defaults, e.g. if ';' is a
136
     comment char, then it isn't a line separator.  */
137
  for (p = symbol_chars; *p; ++p)
138
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
139
 
140
  for (c = 128; c < 256; ++c)
141
    lex[c] = LEX_IS_SYMBOL_COMPONENT;
142
 
143
#ifdef tc_symbol_chars
144
  /* This macro permits the processor to specify all characters which
145
     may appears in an operand.  This will prevent the scrubber from
146
     discarding meaningful whitespace in certain cases.  The i386
147
     backend uses this to support prefixes, which can confuse the
148
     scrubber as to whether it is parsing operands or opcodes.  */
149
  for (p = tc_symbol_chars; *p; ++p)
150
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151
#endif
152
 
153
  /* The m68k backend wants to be able to change comment_chars.  */
154
#ifndef tc_comment_chars
155
#define tc_comment_chars comment_chars
156
#endif
157
  for (p = tc_comment_chars; *p; p++)
158
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
159
 
160
  for (p = line_comment_chars; *p; p++)
161
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
162
 
163
  for (p = line_separator_chars; *p; p++)
164
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
165
 
166
#ifdef tc_parallel_separator_chars
167
  /* This macro permits the processor to specify all characters which
168
     separate parallel insns on the same line.  */
169
  for (p = tc_parallel_separator_chars; *p; p++)
170
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
171
#endif
172
 
173
  /* Only allow slash-star comments if slash is not in use.
174
     FIXME: This isn't right.  We should always permit them.  */
175
  if (lex['/'] == 0)
176
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
177
 
178
#ifdef TC_M68K
179
  if (m68k_mri)
180
    {
181
      lex['\''] = LEX_IS_STRINGQUOTE;
182
      lex[';'] = LEX_IS_COMMENT_START;
183
      lex['*'] = LEX_IS_LINE_COMMENT_START;
184
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
185
         then it can't be used in an expression.  */
186
      lex['!'] = LEX_IS_LINE_COMMENT_START;
187
    }
188
#endif
189
 
190
#ifdef TC_V850
191
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
192
#endif
193
#ifdef DOUBLEBAR_PARALLEL
194
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
195
#endif
196
#ifdef TC_D30V
197
  /* Must do this is we want VLIW instruction with "->" or "<-".  */
198
  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
199
#endif
200
 
201
#ifdef H_TICK_HEX
202
  if (enable_h_tick_hex)
203
    {
204
      lex['h'] = LEX_IS_H;
205
      lex['H'] = LEX_IS_H;
206
    }
207
#endif
208
}
209
 
210
/* Saved state of the scrubber.  */
211
static int state;
212
static int old_state;
213
static char *out_string;
214
static char out_buf[20];
215
static int add_newlines;
216
static char *saved_input;
217
static int saved_input_len;
218
static char input_buffer[32 * 1024];
219
static const char *mri_state;
220
static char mri_last_ch;
221
 
222
/* Data structure for saving the state of app across #include's.  Note that
223
   app is called asynchronously to the parsing of the .include's, so our
224
   state at the time .include is interpreted is completely unrelated.
225
   That's why we have to save it all.  */
226
 
227
struct app_save
228
{
229
  int          state;
230
  int          old_state;
231
  char *       out_string;
232
  char         out_buf[sizeof (out_buf)];
233
  int          add_newlines;
234
  char *       saved_input;
235
  int          saved_input_len;
236
#ifdef TC_M68K
237
  int          scrub_m68k_mri;
238
#endif
239
  const char * mri_state;
240
  char         mri_last_ch;
241
#if defined TC_ARM && defined OBJ_ELF
242
  const char * symver_state;
243
#endif
244
};
245
 
246
char *
247
app_push (void)
248
{
249
  register struct app_save *saved;
250
 
251
  saved = (struct app_save *) xmalloc (sizeof (*saved));
252
  saved->state = state;
253
  saved->old_state = old_state;
254
  saved->out_string = out_string;
255
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
256
  saved->add_newlines = add_newlines;
257
  if (saved_input == NULL)
258
    saved->saved_input = NULL;
259
  else
260
    {
261
      saved->saved_input = (char *) xmalloc (saved_input_len);
262
      memcpy (saved->saved_input, saved_input, saved_input_len);
263
      saved->saved_input_len = saved_input_len;
264
    }
265
#ifdef TC_M68K
266
  saved->scrub_m68k_mri = scrub_m68k_mri;
267
#endif
268
  saved->mri_state = mri_state;
269
  saved->mri_last_ch = mri_last_ch;
270
#if defined TC_ARM && defined OBJ_ELF
271
  saved->symver_state = symver_state;
272
#endif
273
 
274
  /* do_scrub_begin() is not useful, just wastes time.  */
275
 
276
  state = 0;
277
  saved_input = NULL;
278 166 khays
  add_newlines = 0;
279 147 khays
 
280
  return (char *) saved;
281
}
282
 
283
void
284
app_pop (char *arg)
285
{
286
  register struct app_save *saved = (struct app_save *) arg;
287
 
288
  /* There is no do_scrub_end ().  */
289
  state = saved->state;
290
  old_state = saved->old_state;
291
  out_string = saved->out_string;
292
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
293
  add_newlines = saved->add_newlines;
294
  if (saved->saved_input == NULL)
295
    saved_input = NULL;
296
  else
297
    {
298
      gas_assert (saved->saved_input_len <= (int) (sizeof input_buffer));
299
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
300
      saved_input = input_buffer;
301
      saved_input_len = saved->saved_input_len;
302
      free (saved->saved_input);
303
    }
304
#ifdef TC_M68K
305
  scrub_m68k_mri = saved->scrub_m68k_mri;
306
#endif
307
  mri_state = saved->mri_state;
308
  mri_last_ch = saved->mri_last_ch;
309
#if defined TC_ARM && defined OBJ_ELF
310
  symver_state = saved->symver_state;
311
#endif
312
 
313
  free (arg);
314
}
315
 
316
/* @@ This assumes that \n &c are the same on host and target.  This is not
317
   necessarily true.  */
318
 
319
static int
320
process_escape (int ch)
321
{
322
  switch (ch)
323
    {
324
    case 'b':
325
      return '\b';
326
    case 'f':
327
      return '\f';
328
    case 'n':
329
      return '\n';
330
    case 'r':
331
      return '\r';
332
    case 't':
333
      return '\t';
334
    case '\'':
335
      return '\'';
336
    case '"':
337
      return '\"';
338
    default:
339
      return ch;
340
    }
341
}
342
 
343
/* This function is called to process input characters.  The GET
344
   parameter is used to retrieve more input characters.  GET should
345
   set its parameter to point to a buffer, and return the length of
346
   the buffer; it should return 0 at end of file.  The scrubbed output
347
   characters are put into the buffer starting at TOSTART; the TOSTART
348
   buffer is TOLEN bytes in length.  The function returns the number
349
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
350
   end of file was seen.  This function is arranged as a state
351
   machine, and saves its state so that it may return at any point.
352
   This is the way the old code used to work.  */
353
 
354
int
355
do_scrub_chars (int (*get) (char *, int), char *tostart, int tolen)
356
{
357
  char *to = tostart;
358
  char *toend = tostart + tolen;
359
  char *from;
360
  char *fromend;
361
  int fromlen;
362
  register int ch, ch2 = 0;
363
  /* Character that started the string we're working on.  */
364
  static char quotechar;
365
 
366
  /*State 0: beginning of normal line
367
          1: After first whitespace on line (flush more white)
368
          2: After first non-white (opcode) on line (keep 1white)
369
          3: after second white on line (into operands) (flush white)
370
          4: after putting out a .linefile, put out digits
371
          5: parsing a string, then go to old-state
372
          6: putting out \ escape in a "d string.
373
          7: no longer used
374
          8: no longer used
375
          9: After seeing symbol char in state 3 (keep 1white after symchar)
376
         10: After seeing whitespace in state 9 (keep white before symchar)
377
         11: After seeing a symbol character in state 0 (eg a label definition)
378
         -1: output string in out_string and go to the state in old_state
379
         -2: flush text until a '*' '/' is seen, then go to state old_state
380
#ifdef TC_V850
381
         12: After seeing a dash, looking for a second dash as a start
382
             of comment.
383
#endif
384
#ifdef DOUBLEBAR_PARALLEL
385
         13: After seeing a vertical bar, looking for a second
386
             vertical bar as a parallel expression separator.
387
#endif
388
#ifdef TC_PREDICATE_START_CHAR
389
         14: After seeing a predicate start character at state 0, looking
390
             for a predicate end character as predicate.
391
         15: After seeing a predicate start character at state 1, looking
392
             for a predicate end character as predicate.
393
#endif
394
#ifdef TC_Z80
395
         16: After seeing an 'a' or an 'A' at the start of a symbol
396
         17: After seeing an 'f' or an 'F' in state 16
397
#endif
398
          */
399
 
400
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
401
     constructs like ``.loc 1 20''.  This was turning into ``.loc
402
     120''.  States 9 and 10 ensure that a space is never dropped in
403
     between characters which could appear in an identifier.  Ian
404
     Taylor, ian@cygnus.com.
405
 
406
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
407
     correctly on the PA (and any other target where colons are optional).
408
     Jeff Law, law@cs.utah.edu.
409
 
410
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
411
     get squashed into "cmp r1,r2||trap#1", with the all important space
412
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
413
 
414
  /* This macro gets the next input character.  */
415
 
416
#define GET()                                                   \
417
  (from < fromend                                               \
418
   ? * (unsigned char *) (from++)                               \
419
   : (saved_input = NULL,                                       \
420
      fromlen = (*get) (input_buffer, sizeof input_buffer),     \
421
      from = input_buffer,                                      \
422
      fromend = from + fromlen,                                 \
423
      (fromlen == 0                                              \
424
       ? EOF                                                    \
425
       : * (unsigned char *) (from++))))
426
 
427
  /* This macro pushes a character back on the input stream.  */
428
 
429
#define UNGET(uch) (*--from = (uch))
430
 
431
  /* This macro puts a character into the output buffer.  If this
432
     character fills the output buffer, this macro jumps to the label
433
     TOFULL.  We use this rather ugly approach because we need to
434
     handle two different termination conditions: EOF on the input
435
     stream, and a full output buffer.  It would be simpler if we
436
     always read in the entire input stream before processing it, but
437
     I don't want to make such a significant change to the assembler's
438
     memory usage.  */
439
 
440
#define PUT(pch)                                \
441
  do                                            \
442
    {                                           \
443
      *to++ = (pch);                            \
444
      if (to >= toend)                          \
445
        goto tofull;                            \
446
    }                                           \
447
  while (0)
448
 
449
  if (saved_input != NULL)
450
    {
451
      from = saved_input;
452
      fromend = from + saved_input_len;
453
    }
454
  else
455
    {
456
      fromlen = (*get) (input_buffer, sizeof input_buffer);
457
      if (fromlen == 0)
458
        return 0;
459
      from = input_buffer;
460
      fromend = from + fromlen;
461
    }
462
 
463
  while (1)
464
    {
465
      /* The cases in this switch end with continue, in order to
466
         branch back to the top of this while loop and generate the
467
         next output character in the appropriate state.  */
468
      switch (state)
469
        {
470
        case -1:
471
          ch = *out_string++;
472
          if (*out_string == '\0')
473
            {
474
              state = old_state;
475
              old_state = 3;
476
            }
477
          PUT (ch);
478
          continue;
479
 
480
        case -2:
481
          for (;;)
482
            {
483
              do
484
                {
485
                  ch = GET ();
486
 
487
                  if (ch == EOF)
488
                    {
489
                      as_warn (_("end of file in comment"));
490
                      goto fromeof;
491
                    }
492
 
493
                  if (ch == '\n')
494
                    PUT ('\n');
495
                }
496
              while (ch != '*');
497
 
498
              while ((ch = GET ()) == '*')
499
                ;
500
 
501
              if (ch == EOF)
502
                {
503
                  as_warn (_("end of file in comment"));
504
                  goto fromeof;
505
                }
506
 
507
              if (ch == '/')
508
                break;
509
 
510
              UNGET (ch);
511
            }
512
 
513
          state = old_state;
514
          UNGET (' ');
515
          continue;
516
 
517
        case 4:
518
          ch = GET ();
519
          if (ch == EOF)
520
            goto fromeof;
521
          else if (ch >= '0' && ch <= '9')
522
            PUT (ch);
523
          else
524
            {
525
              while (ch != EOF && IS_WHITESPACE (ch))
526
                ch = GET ();
527
              if (ch == '"')
528
                {
529
                  quotechar = ch;
530
                  state = 5;
531
                  old_state = 3;
532
                  PUT (ch);
533
                }
534
              else
535
                {
536
                  while (ch != EOF && ch != '\n')
537
                    ch = GET ();
538
                  state = 0;
539
                  PUT (ch);
540
                }
541
            }
542
          continue;
543
 
544
        case 5:
545
          /* We are going to copy everything up to a quote character,
546
             with special handling for a backslash.  We try to
547
             optimize the copying in the simple case without using the
548
             GET and PUT macros.  */
549
          {
550
            char *s;
551
            int len;
552
 
553
            for (s = from; s < fromend; s++)
554
              {
555
                ch = *s;
556
                if (ch == '\\'
557
                    || ch == quotechar
558
                    || ch == '\n')
559
                  break;
560
              }
561
            len = s - from;
562
            if (len > toend - to)
563
              len = toend - to;
564
            if (len > 0)
565
              {
566
                memcpy (to, from, len);
567
                to += len;
568
                from += len;
569
                if (to >= toend)
570
                  goto tofull;
571
              }
572
          }
573
 
574
          ch = GET ();
575
          if (ch == EOF)
576
            {
577
              /* This buffer is here specifically so
578
                 that the UNGET below will work.  */
579
              static char one_char_buf[1];
580
 
581
              as_warn (_("end of file in string; '%c' inserted"), quotechar);
582
              state = old_state;
583
              from = fromend = one_char_buf + 1;
584
              fromlen = 1;
585
              UNGET ('\n');
586
              PUT (quotechar);
587
            }
588
          else if (ch == quotechar)
589
            {
590
              state = old_state;
591
              PUT (ch);
592
            }
593
#ifndef NO_STRING_ESCAPES
594
          else if (ch == '\\')
595
            {
596
              state = 6;
597
              PUT (ch);
598
            }
599
#endif
600
          else if (scrub_m68k_mri && ch == '\n')
601
            {
602
              /* Just quietly terminate the string.  This permits lines like
603
                   bne  label   loop if we haven't reach end yet.  */
604
              state = old_state;
605
              UNGET (ch);
606
              PUT ('\'');
607
            }
608
          else
609
            {
610
              PUT (ch);
611
            }
612
          continue;
613
 
614
        case 6:
615
          state = 5;
616
          ch = GET ();
617
          switch (ch)
618
            {
619
              /* Handle strings broken across lines, by turning '\n' into
620
                 '\\' and 'n'.  */
621
            case '\n':
622
              UNGET ('n');
623
              add_newlines++;
624
              PUT ('\\');
625
              continue;
626
 
627
            case EOF:
628
              as_warn (_("end of file in string; '%c' inserted"), quotechar);
629
              PUT (quotechar);
630
              continue;
631
 
632
            case '"':
633
            case '\\':
634
            case 'b':
635
            case 'f':
636
            case 'n':
637
            case 'r':
638
            case 't':
639
            case 'v':
640
            case 'x':
641
            case 'X':
642
            case '0':
643
            case '1':
644
            case '2':
645
            case '3':
646
            case '4':
647
            case '5':
648
            case '6':
649
            case '7':
650
              break;
651
 
652
            default:
653
#ifdef ONLY_STANDARD_ESCAPES
654
              as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
655
#endif
656
              break;
657
            }
658
          PUT (ch);
659
          continue;
660
 
661
#ifdef DOUBLEBAR_PARALLEL
662
        case 13:
663
          ch = GET ();
664
          if (ch != '|')
665
            abort ();
666
 
667
          /* Reset back to state 1 and pretend that we are parsing a
668
             line from just after the first white space.  */
669
          state = 1;
670
          PUT ('|');
671
#ifdef TC_TIC6X
672
          /* "||^" is used for SPMASKed instructions.  */
673
          ch = GET ();
674
          if (ch == EOF)
675
            goto fromeof;
676
          else if (ch == '^')
677
            PUT ('^');
678
          else
679
            UNGET (ch);
680
#endif
681
          continue;
682
#endif
683
#ifdef TC_Z80
684
        case 16:
685
          /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
686
          ch = GET ();
687
          if (ch == 'f' || ch == 'F')
688
            {
689
              state = 17;
690
              PUT (ch);
691
            }
692
          else
693
            {
694
              state = 9;
695
              break;
696
            }
697
        case 17:
698
          /* We have seen "af" at the start of a symbol,
699
             a ' here is a part of that symbol.  */
700
          ch = GET ();
701
          state = 9;
702
          if (ch == '\'')
703
            /* Change to avoid warning about unclosed string.  */
704
            PUT ('`');
705
          else if (ch != EOF)
706
            UNGET (ch);
707
          break;
708
#endif
709
        }
710
 
711
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
712
 
713
      /* flushchar: */
714
      ch = GET ();
715
 
716
#ifdef TC_PREDICATE_START_CHAR
717
      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
718
        {
719
          state += 14;
720
          PUT (ch);
721
          continue;
722
        }
723
      else if (state == 14 || state == 15)
724
        {
725
          if (ch == TC_PREDICATE_END_CHAR)
726
            {
727
              state -= 14;
728
              PUT (ch);
729
              ch = GET ();
730
            }
731
          else
732
            {
733
              PUT (ch);
734
              continue;
735
            }
736
        }
737
#endif
738
 
739
    recycle:
740
 
741
#if defined TC_ARM && defined OBJ_ELF
742
      /* We need to watch out for .symver directives.  See the comment later
743
         in this function.  */
744
      if (symver_state == NULL)
745
        {
746
          if ((state == 0 || state == 1) && ch == symver_pseudo[0])
747
            symver_state = symver_pseudo + 1;
748
        }
749
      else
750
        {
751
          /* We advance to the next state if we find the right
752
             character.  */
753
          if (ch != '\0' && (*symver_state == ch))
754
            ++symver_state;
755
          else if (*symver_state != '\0')
756
            /* We did not get the expected character, or we didn't
757
               get a valid terminating character after seeing the
758
               entire pseudo-op, so we must go back to the beginning.  */
759
            symver_state = NULL;
760
          else
761
            {
762
              /* We've read the entire pseudo-op.  If this is the end
763
                 of the line, go back to the beginning.  */
764
              if (IS_NEWLINE (ch))
765
                symver_state = NULL;
766
            }
767
        }
768
#endif /* TC_ARM && OBJ_ELF */
769
 
770
#ifdef TC_M68K
771
      /* We want to have pseudo-ops which control whether we are in
772
         MRI mode or not.  Unfortunately, since m68k MRI mode affects
773
         the scrubber, that means that we need a special purpose
774
         recognizer here.  */
775
      if (mri_state == NULL)
776
        {
777
          if ((state == 0 || state == 1)
778
              && ch == mri_pseudo[0])
779
            mri_state = mri_pseudo + 1;
780
        }
781
      else
782
        {
783
          /* We advance to the next state if we find the right
784
             character, or if we need a space character and we get any
785
             whitespace character, or if we need a '0' and we get a
786
             '1' (this is so that we only need one state to handle
787
             ``.mri 0'' and ``.mri 1'').  */
788
          if (ch != '\0'
789
              && (*mri_state == ch
790
                  || (*mri_state == ' '
791
                      && lex[ch] == LEX_IS_WHITESPACE)
792
                  || (*mri_state == '0'
793
                      && ch == '1')))
794
            {
795
              mri_last_ch = ch;
796
              ++mri_state;
797
            }
798
          else if (*mri_state != '\0'
799
                   || (lex[ch] != LEX_IS_WHITESPACE
800
                       && lex[ch] != LEX_IS_NEWLINE))
801
            {
802
              /* We did not get the expected character, or we didn't
803
                 get a valid terminating character after seeing the
804
                 entire pseudo-op, so we must go back to the
805
                 beginning.  */
806
              mri_state = NULL;
807
            }
808
          else
809
            {
810
              /* We've read the entire pseudo-op.  mips_last_ch is
811
                 either '0' or '1' indicating whether to enter or
812
                 leave MRI mode.  */
813
              do_scrub_begin (mri_last_ch == '1');
814
              mri_state = NULL;
815
 
816
              /* We continue handling the character as usual.  The
817
                 main gas reader must also handle the .mri pseudo-op
818
                 to control expression parsing and the like.  */
819
            }
820
        }
821
#endif
822
 
823
      if (ch == EOF)
824
        {
825
          if (state != 0)
826
            {
827
              as_warn (_("end of file not at end of a line; newline inserted"));
828
              state = 0;
829
              PUT ('\n');
830
            }
831
          goto fromeof;
832
        }
833
 
834
      switch (lex[ch])
835
        {
836
        case LEX_IS_WHITESPACE:
837
          do
838
            {
839
              ch = GET ();
840
            }
841
          while (ch != EOF && IS_WHITESPACE (ch));
842
          if (ch == EOF)
843
            goto fromeof;
844
 
845
          if (state == 0)
846
            {
847
              /* Preserve a single whitespace character at the
848
                 beginning of a line.  */
849
              state = 1;
850
              UNGET (ch);
851
              PUT (' ');
852
              break;
853
            }
854
 
855
#ifdef KEEP_WHITE_AROUND_COLON
856
          if (lex[ch] == LEX_IS_COLON)
857
            {
858
              /* Only keep this white if there's no white *after* the
859
                 colon.  */
860
              ch2 = GET ();
861
              if (ch2 != EOF)
862
                UNGET (ch2);
863
              if (!IS_WHITESPACE (ch2))
864
                {
865
                  state = 9;
866
                  UNGET (ch);
867
                  PUT (' ');
868
                  break;
869
                }
870
            }
871
#endif
872
          if (IS_COMMENT (ch)
873
              || ch == '/'
874
              || IS_LINE_SEPARATOR (ch)
875
              || IS_PARALLEL_SEPARATOR (ch))
876
            {
877
              if (scrub_m68k_mri)
878
                {
879
                  /* In MRI mode, we keep these spaces.  */
880
                  UNGET (ch);
881
                  PUT (' ');
882
                  break;
883
                }
884
              goto recycle;
885
            }
886
 
887
          /* If we're in state 2 or 11, we've seen a non-white
888
             character followed by whitespace.  If the next character
889
             is ':', this is whitespace after a label name which we
890
             normally must ignore.  In MRI mode, though, spaces are
891
             not permitted between the label and the colon.  */
892
          if ((state == 2 || state == 11)
893
              && lex[ch] == LEX_IS_COLON
894
              && ! scrub_m68k_mri)
895
            {
896
              state = 1;
897
              PUT (ch);
898
              break;
899
            }
900
 
901
          switch (state)
902
            {
903
            case 1:
904
              /* We can arrive here if we leave a leading whitespace
905
                 character at the beginning of a line.  */
906
              goto recycle;
907
            case 2:
908
              state = 3;
909
              if (to + 1 < toend)
910
                {
911
                  /* Optimize common case by skipping UNGET/GET.  */
912
                  PUT (' ');    /* Sp after opco */
913
                  goto recycle;
914
                }
915
              UNGET (ch);
916
              PUT (' ');
917
              break;
918
            case 3:
919
#ifndef TC_KEEP_OPERAND_SPACES
920
              /* For TI C6X, we keep these spaces as they may separate
921
                 functional unit specifiers from operands.  */
922
              if (scrub_m68k_mri)
923
#endif
924
                {
925
                  /* In MRI mode, we keep these spaces.  */
926
                  UNGET (ch);
927
                  PUT (' ');
928
                  break;
929
                }
930
              goto recycle;     /* Sp in operands */
931
            case 9:
932
            case 10:
933
#ifndef TC_KEEP_OPERAND_SPACES
934
              if (scrub_m68k_mri)
935
#endif
936
                {
937
                  /* In MRI mode, we keep these spaces.  */
938
                  state = 3;
939
                  UNGET (ch);
940
                  PUT (' ');
941
                  break;
942
                }
943
              state = 10;       /* Sp after symbol char */
944
              goto recycle;
945
            case 11:
946
              if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
947
                state = 1;
948
              else
949
                {
950
                  /* We know that ch is not ':', since we tested that
951
                     case above.  Therefore this is not a label, so it
952
                     must be the opcode, and we've just seen the
953
                     whitespace after it.  */
954
                  state = 3;
955
                }
956
              UNGET (ch);
957
              PUT (' ');        /* Sp after label definition.  */
958
              break;
959
            default:
960
              BAD_CASE (state);
961
            }
962
          break;
963
 
964
        case LEX_IS_TWOCHAR_COMMENT_1ST:
965
          ch2 = GET ();
966
          if (ch2 == '*')
967
            {
968
              for (;;)
969
                {
970
                  do
971
                    {
972
                      ch2 = GET ();
973
                      if (ch2 != EOF && IS_NEWLINE (ch2))
974
                        add_newlines++;
975
                    }
976
                  while (ch2 != EOF && ch2 != '*');
977
 
978
                  while (ch2 == '*')
979
                    ch2 = GET ();
980
 
981
                  if (ch2 == EOF || ch2 == '/')
982
                    break;
983
 
984
                  /* This UNGET will ensure that we count newlines
985
                     correctly.  */
986
                  UNGET (ch2);
987
                }
988
 
989
              if (ch2 == EOF)
990
                as_warn (_("end of file in multiline comment"));
991
 
992
              ch = ' ';
993
              goto recycle;
994
            }
995
#ifdef DOUBLESLASH_LINE_COMMENTS
996
          else if (ch2 == '/')
997
            {
998
              do
999
                {
1000
                  ch = GET ();
1001
                }
1002
              while (ch != EOF && !IS_NEWLINE (ch));
1003
              if (ch == EOF)
1004
                as_warn ("end of file in comment; newline inserted");
1005
              state = 0;
1006
              PUT ('\n');
1007
              break;
1008
            }
1009
#endif
1010
          else
1011
            {
1012
              if (ch2 != EOF)
1013
                UNGET (ch2);
1014
              if (state == 9 || state == 10)
1015
                state = 3;
1016
              PUT (ch);
1017
            }
1018
          break;
1019
 
1020
        case LEX_IS_STRINGQUOTE:
1021
          quotechar = ch;
1022
          if (state == 10)
1023
            {
1024
              /* Preserve the whitespace in foo "bar".  */
1025
              UNGET (ch);
1026
              state = 3;
1027
              PUT (' ');
1028
 
1029
              /* PUT didn't jump out.  We could just break, but we
1030
                 know what will happen, so optimize a bit.  */
1031
              ch = GET ();
1032
              old_state = 3;
1033
            }
1034
          else if (state == 9)
1035
            old_state = 3;
1036
          else
1037
            old_state = state;
1038
          state = 5;
1039
          PUT (ch);
1040
          break;
1041
 
1042
#ifndef IEEE_STYLE
1043
        case LEX_IS_ONECHAR_QUOTE:
1044
#ifdef H_TICK_HEX
1045
          if (state == 9 && enable_h_tick_hex)
1046
            {
1047
              char c;
1048
 
1049
              c = GET ();
1050
              as_warn ("'%c found after symbol", c);
1051
              UNGET (c);
1052
            }
1053
#endif
1054
          if (state == 10)
1055
            {
1056
              /* Preserve the whitespace in foo 'b'.  */
1057
              UNGET (ch);
1058
              state = 3;
1059
              PUT (' ');
1060
              break;
1061
            }
1062
          ch = GET ();
1063
          if (ch == EOF)
1064
            {
1065
              as_warn (_("end of file after a one-character quote; \\0 inserted"));
1066
              ch = 0;
1067
            }
1068
          if (ch == '\\')
1069
            {
1070
              ch = GET ();
1071
              if (ch == EOF)
1072
                {
1073
                  as_warn (_("end of file in escape character"));
1074
                  ch = '\\';
1075
                }
1076
              else
1077
                ch = process_escape (ch);
1078
            }
1079
          sprintf (out_buf, "%d", (int) (unsigned char) ch);
1080
 
1081
          /* None of these 'x constants for us.  We want 'x'.  */
1082
          if ((ch = GET ()) != '\'')
1083
            {
1084
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1085
              as_warn (_("missing close quote; (assumed)"));
1086
#else
1087
              if (ch != EOF)
1088
                UNGET (ch);
1089
#endif
1090
            }
1091
          if (strlen (out_buf) == 1)
1092
            {
1093
              PUT (out_buf[0]);
1094
              break;
1095
            }
1096
          if (state == 9)
1097
            old_state = 3;
1098
          else
1099
            old_state = state;
1100
          state = -1;
1101
          out_string = out_buf;
1102
          PUT (*out_string++);
1103
          break;
1104
#endif
1105
 
1106
        case LEX_IS_COLON:
1107
#ifdef KEEP_WHITE_AROUND_COLON
1108
          state = 9;
1109
#else
1110
          if (state == 9 || state == 10)
1111
            state = 3;
1112
          else if (state != 3)
1113
            state = 1;
1114
#endif
1115
          PUT (ch);
1116
          break;
1117
 
1118
        case LEX_IS_NEWLINE:
1119
          /* Roll out a bunch of newlines from inside comments, etc.  */
1120
          if (add_newlines)
1121
            {
1122
              --add_newlines;
1123
              UNGET (ch);
1124
            }
1125
          /* Fall through.  */
1126
 
1127
        case LEX_IS_LINE_SEPARATOR:
1128
          state = 0;
1129
          PUT (ch);
1130
          break;
1131
 
1132
        case LEX_IS_PARALLEL_SEPARATOR:
1133
          state = 1;
1134
          PUT (ch);
1135
          break;
1136
 
1137
#ifdef TC_V850
1138
        case LEX_IS_DOUBLEDASH_1ST:
1139
          ch2 = GET ();
1140
          if (ch2 != '-')
1141
            {
1142
              if (ch2 != EOF)
1143
                UNGET (ch2);
1144
              goto de_fault;
1145
            }
1146
          /* Read and skip to end of line.  */
1147
          do
1148
            {
1149
              ch = GET ();
1150
            }
1151
          while (ch != EOF && ch != '\n');
1152
 
1153
          if (ch == EOF)
1154
            as_warn (_("end of file in comment; newline inserted"));
1155
 
1156
          state = 0;
1157
          PUT ('\n');
1158
          break;
1159
#endif
1160
#ifdef DOUBLEBAR_PARALLEL
1161
        case LEX_IS_DOUBLEBAR_1ST:
1162
          ch2 = GET ();
1163
          if (ch2 != EOF)
1164
            UNGET (ch2);
1165
          if (ch2 != '|')
1166
            goto de_fault;
1167
 
1168
          /* Handle '||' in two states as invoking PUT twice might
1169
             result in the first one jumping out of this loop.  We'd
1170
             then lose track of the state and one '|' char.  */
1171
          state = 13;
1172
          PUT ('|');
1173
          break;
1174
#endif
1175
        case LEX_IS_LINE_COMMENT_START:
1176
          /* FIXME-someday: The two character comment stuff was badly
1177
             thought out.  On i386, we want '/' as line comment start
1178
             AND we want C style comments.  hence this hack.  The
1179
             whole lexical process should be reworked.  xoxorich.  */
1180
          if (ch == '/')
1181
            {
1182
              ch2 = GET ();
1183
              if (ch2 == '*')
1184
                {
1185
                  old_state = 3;
1186
                  state = -2;
1187
                  break;
1188
                }
1189
              else
1190
                {
1191
                  UNGET (ch2);
1192
                }
1193
            }
1194
 
1195
          if (state == 0 || state == 1)  /* Only comment at start of line.  */
1196
            {
1197
              int startch;
1198
 
1199
              startch = ch;
1200
 
1201
              do
1202
                {
1203
                  ch = GET ();
1204
                }
1205
              while (ch != EOF && IS_WHITESPACE (ch));
1206
 
1207
              if (ch == EOF)
1208
                {
1209
                  as_warn (_("end of file in comment; newline inserted"));
1210
                  PUT ('\n');
1211
                  break;
1212
                }
1213
 
1214
              if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1215
                {
1216
                  /* Not a cpp line.  */
1217
                  while (ch != EOF && !IS_NEWLINE (ch))
1218
                    ch = GET ();
1219
                  if (ch == EOF)
1220
                    as_warn (_("end of file in comment; newline inserted"));
1221
                  state = 0;
1222
                  PUT ('\n');
1223
                  break;
1224
                }
1225
              /* Looks like `# 123 "filename"' from cpp.  */
1226
              UNGET (ch);
1227
              old_state = 4;
1228
              state = -1;
1229
              if (scrub_m68k_mri)
1230
                out_string = "\tlinefile ";
1231
              else
1232
                out_string = "\t.linefile ";
1233
              PUT (*out_string++);
1234
              break;
1235
            }
1236
 
1237
#ifdef TC_D10V
1238
          /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1239
             Trap is the only short insn that has a first operand that is
1240
             neither register nor label.
1241
             We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1242
             We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1243
             already LEX_IS_LINE_COMMENT_START.  However, it is the
1244
             only character in line_comment_chars for d10v, hence we
1245
             can recognize it as such.  */
1246
          /* An alternative approach would be to reset the state to 1 when
1247
             we see '||', '<'- or '->', but that seems to be overkill.  */
1248
          if (state == 10)
1249
            PUT (' ');
1250
#endif
1251
          /* We have a line comment character which is not at the
1252
             start of a line.  If this is also a normal comment
1253
             character, fall through.  Otherwise treat it as a default
1254
             character.  */
1255
          if (strchr (tc_comment_chars, ch) == NULL
1256
              && (! scrub_m68k_mri
1257
                  || (ch != '!' && ch != '*')))
1258
            goto de_fault;
1259
          if (scrub_m68k_mri
1260
              && (ch == '!' || ch == '*' || ch == '#')
1261
              && state != 1
1262
              && state != 10)
1263
            goto de_fault;
1264
          /* Fall through.  */
1265
        case LEX_IS_COMMENT_START:
1266
#if defined TC_ARM && defined OBJ_ELF
1267
          /* On the ARM, `@' is the comment character.
1268
             Unfortunately this is also a special character in ELF .symver
1269
             directives (and .type, though we deal with those another way).
1270
             So we check if this line is such a directive, and treat
1271
             the character as default if so.  This is a hack.  */
1272
          if ((symver_state != NULL) && (*symver_state == 0))
1273
            goto de_fault;
1274
#endif
1275
 
1276
#ifdef TC_ARM
1277
          /* For the ARM, care is needed not to damage occurrences of \@
1278
             by stripping the @ onwards.  Yuck.  */
1279
          if (to > tostart && *(to - 1) == '\\')
1280
            /* Do not treat the @ as a start-of-comment.  */
1281
            goto de_fault;
1282
#endif
1283
 
1284
#ifdef WARN_COMMENTS
1285
          if (!found_comment)
1286
            as_where (&found_comment_file, &found_comment);
1287
#endif
1288
          do
1289
            {
1290
              ch = GET ();
1291
            }
1292
          while (ch != EOF && !IS_NEWLINE (ch));
1293
          if (ch == EOF)
1294
            as_warn (_("end of file in comment; newline inserted"));
1295
          state = 0;
1296
          PUT ('\n');
1297
          break;
1298
 
1299
#ifdef H_TICK_HEX
1300
        case LEX_IS_H:
1301
          /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1302
             the H' with 0x to make them gas-style hex characters.  */
1303
          if (enable_h_tick_hex)
1304
            {
1305
              char quot;
1306
 
1307
              quot = GET ();
1308
              if (quot == '\'')
1309
                {
1310
                  UNGET ('x');
1311
                  ch = '0';
1312
                }
1313
              else
1314
                UNGET (quot);
1315
            }
1316
          /* FALL THROUGH */
1317
#endif
1318
 
1319
        case LEX_IS_SYMBOL_COMPONENT:
1320
          if (state == 10)
1321
            {
1322
              /* This is a symbol character following another symbol
1323
                 character, with whitespace in between.  We skipped
1324
                 the whitespace earlier, so output it now.  */
1325
              UNGET (ch);
1326
              state = 3;
1327
              PUT (' ');
1328
              break;
1329
            }
1330
 
1331
#ifdef TC_Z80
1332
          /* "af'" is a symbol containing '\''.  */
1333
          if (state == 3 && (ch == 'a' || ch == 'A'))
1334
            {
1335
              state = 16;
1336
              PUT (ch);
1337
              ch = GET ();
1338
              if (ch == 'f' || ch == 'F')
1339
                {
1340
                  state = 17;
1341
                  PUT (ch);
1342
                  break;
1343
                }
1344
              else
1345
                {
1346
                  state = 9;
1347
                  if (!IS_SYMBOL_COMPONENT (ch))
1348
                    {
1349
                      if (ch != EOF)
1350
                        UNGET (ch);
1351
                      break;
1352
                    }
1353
                }
1354
            }
1355
#endif
1356
          if (state == 3)
1357
            state = 9;
1358
 
1359
          /* This is a common case.  Quickly copy CH and all the
1360
             following symbol component or normal characters.  */
1361
          if (to + 1 < toend
1362
              && mri_state == NULL
1363
#if defined TC_ARM && defined OBJ_ELF
1364
              && symver_state == NULL
1365
#endif
1366
              )
1367
            {
1368
              char *s;
1369
              int len;
1370
 
1371
              for (s = from; s < fromend; s++)
1372
                {
1373
                  int type;
1374
 
1375
                  ch2 = *(unsigned char *) s;
1376
                  type = lex[ch2];
1377
                  if (type != 0
1378
                      && type != LEX_IS_SYMBOL_COMPONENT)
1379
                    break;
1380
                }
1381
 
1382
              if (s > from)
1383
                /* Handle the last character normally, for
1384
                   simplicity.  */
1385
                --s;
1386
 
1387
              len = s - from;
1388
 
1389
              if (len > (toend - to) - 1)
1390
                len = (toend - to) - 1;
1391
 
1392
              if (len > 0)
1393
                {
1394
                  PUT (ch);
1395
                  memcpy (to, from, len);
1396
                  to += len;
1397
                  from += len;
1398
                  if (to >= toend)
1399
                    goto tofull;
1400
                  ch = GET ();
1401
                }
1402
            }
1403
 
1404
          /* Fall through.  */
1405
        default:
1406
        de_fault:
1407
          /* Some relatively `normal' character.  */
1408
          if (state == 0)
1409
            {
1410
              state = 11;       /* Now seeing label definition.  */
1411
            }
1412
          else if (state == 1)
1413
            {
1414
              state = 2;        /* Ditto.  */
1415
            }
1416
          else if (state == 9)
1417
            {
1418
              if (!IS_SYMBOL_COMPONENT (ch))
1419
                state = 3;
1420
            }
1421
          else if (state == 10)
1422
            {
1423
              if (ch == '\\')
1424
                {
1425
                  /* Special handling for backslash: a backslash may
1426
                     be the beginning of a formal parameter (of a
1427
                     macro) following another symbol character, with
1428
                     whitespace in between.  If that is the case, we
1429
                     output a space before the parameter.  Strictly
1430
                     speaking, correct handling depends upon what the
1431
                     macro parameter expands into; if the parameter
1432
                     expands into something which does not start with
1433
                     an operand character, then we don't want to keep
1434
                     the space.  We don't have enough information to
1435
                     make the right choice, so here we are making the
1436
                     choice which is more likely to be correct.  */
1437
                  if (to + 1 >= toend)
1438
                    {
1439
                      /* If we're near the end of the buffer, save the
1440
                         character for the next time round.  Otherwise
1441
                         we'll lose our state.  */
1442
                      UNGET (ch);
1443
                      goto tofull;
1444
                    }
1445
                  *to++ = ' ';
1446
                }
1447
 
1448
              state = 3;
1449
            }
1450
          PUT (ch);
1451
          break;
1452
        }
1453
    }
1454
 
1455
  /*NOTREACHED*/
1456
 
1457
 fromeof:
1458
  /* We have reached the end of the input.  */
1459
  return to - tostart;
1460
 
1461
 tofull:
1462
  /* The output buffer is full.  Save any input we have not yet
1463
     processed.  */
1464
  if (fromend > from)
1465
    {
1466
      saved_input = from;
1467
      saved_input_len = fromend - from;
1468
    }
1469
  else
1470
    saved_input = NULL;
1471
 
1472
  return to - tostart;
1473
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.