OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [binutils-2.20.1/] [gas/] [app.c] - Blame information for rev 304

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 205 julius
/* This is the Assembler Pre-Processor
2
   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3
   1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008
4
   Free Software Foundation, Inc.
5
 
6
   This file is part of GAS, the GNU Assembler.
7
 
8
   GAS is free software; you can redistribute it and/or modify
9
   it under the terms of the GNU General Public License as published by
10
   the Free Software Foundation; either version 3, or (at your option)
11
   any later version.
12
 
13
   GAS is distributed in the hope that it will be useful, but WITHOUT
14
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16
   License for more details.
17
 
18
   You should have received a copy of the GNU General Public License
19
   along with GAS; see the file COPYING.  If not, write to the Free
20
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
21
   02110-1301, USA.  */
22
 
23
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
24
/* App, the assembler pre-processor.  This pre-processor strips out
25
   excess spaces, turns single-quoted characters into a decimal
26
   constant, and turns the # in # <number> <filename> <garbage> into a
27
   .linefile.  This needs better error-handling.  */
28
 
29
#include "as.h"
30
 
31
#if (__STDC__ != 1)
32
#ifndef const
33
#define const  /* empty */
34
#endif
35
#endif
36
 
37
#ifdef H_TICK_HEX
38
int enable_h_tick_hex = 0;
39
#endif
40
 
41
#ifdef TC_M68K
42
/* Whether we are scrubbing in m68k MRI mode.  This is different from
43
   flag_m68k_mri, because the two flags will be affected by the .mri
44
   pseudo-op at different times.  */
45
static int scrub_m68k_mri;
46
 
47
/* The pseudo-op which switches in and out of MRI mode.  See the
48
   comment in do_scrub_chars.  */
49
static const char mri_pseudo[] = ".mri 0";
50
#else
51
#define scrub_m68k_mri 0
52
#endif
53
 
54
#if defined TC_ARM && defined OBJ_ELF
55
/* The pseudo-op for which we need to special-case `@' characters.
56
   See the comment in do_scrub_chars.  */
57
static const char   symver_pseudo[] = ".symver";
58
static const char * symver_state;
59
#endif
60
 
61
static char lex[256];
62
static const char symbol_chars[] =
63
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
64
 
65
#define LEX_IS_SYMBOL_COMPONENT         1
66
#define LEX_IS_WHITESPACE               2
67
#define LEX_IS_LINE_SEPARATOR           3
68
#define LEX_IS_COMMENT_START            4
69
#define LEX_IS_LINE_COMMENT_START       5
70
#define LEX_IS_TWOCHAR_COMMENT_1ST      6
71
#define LEX_IS_STRINGQUOTE              8
72
#define LEX_IS_COLON                    9
73
#define LEX_IS_NEWLINE                  10
74
#define LEX_IS_ONECHAR_QUOTE            11
75
#ifdef TC_V850
76
#define LEX_IS_DOUBLEDASH_1ST           12
77
#endif
78
#ifdef TC_M32R
79
#define DOUBLEBAR_PARALLEL
80
#endif
81
#ifdef DOUBLEBAR_PARALLEL
82
#define LEX_IS_DOUBLEBAR_1ST            13
83
#endif
84
#define LEX_IS_PARALLEL_SEPARATOR       14
85
#ifdef H_TICK_HEX
86
#define LEX_IS_H                        15
87
#endif
88
#define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
89
#define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
90
#define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
91
#define IS_PARALLEL_SEPARATOR(c)        (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
92
#define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
93
#define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
94
#define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
95
 
96
static int process_escape (int);
97
 
98
/* FIXME-soon: The entire lexer/parser thingy should be
99
   built statically at compile time rather than dynamically
100
   each and every time the assembler is run.  xoxorich.  */
101
 
102
void
103
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
104
{
105
  const char *p;
106
  int c;
107
 
108
  lex[' '] = LEX_IS_WHITESPACE;
109
  lex['\t'] = LEX_IS_WHITESPACE;
110
  lex['\r'] = LEX_IS_WHITESPACE;
111
  lex['\n'] = LEX_IS_NEWLINE;
112
  lex[':'] = LEX_IS_COLON;
113
 
114
#ifdef TC_M68K
115
  scrub_m68k_mri = m68k_mri;
116
 
117
  if (! m68k_mri)
118
#endif
119
    {
120
      lex['"'] = LEX_IS_STRINGQUOTE;
121
 
122
#if ! defined (TC_HPPA) && ! defined (TC_I370)
123
      /* I370 uses single-quotes to delimit integer, float constants.  */
124
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
125
#endif
126
 
127
#ifdef SINGLE_QUOTE_STRINGS
128
      lex['\''] = LEX_IS_STRINGQUOTE;
129
#endif
130
    }
131
 
132
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133
     in state 5 of do_scrub_chars must be changed.  */
134
 
135
  /* Note that these override the previous defaults, e.g. if ';' is a
136
     comment char, then it isn't a line separator.  */
137
  for (p = symbol_chars; *p; ++p)
138
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
139
 
140
  for (c = 128; c < 256; ++c)
141
    lex[c] = LEX_IS_SYMBOL_COMPONENT;
142
 
143
#ifdef tc_symbol_chars
144
  /* This macro permits the processor to specify all characters which
145
     may appears in an operand.  This will prevent the scrubber from
146
     discarding meaningful whitespace in certain cases.  The i386
147
     backend uses this to support prefixes, which can confuse the
148
     scrubber as to whether it is parsing operands or opcodes.  */
149
  for (p = tc_symbol_chars; *p; ++p)
150
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151
#endif
152
 
153
  /* The m68k backend wants to be able to change comment_chars.  */
154
#ifndef tc_comment_chars
155
#define tc_comment_chars comment_chars
156
#endif
157
  for (p = tc_comment_chars; *p; p++)
158
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
159
 
160
  for (p = line_comment_chars; *p; p++)
161
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
162
 
163
  for (p = line_separator_chars; *p; p++)
164
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
165
 
166
#ifdef tc_parallel_separator_chars
167
  /* This macro permits the processor to specify all characters which
168
     separate parallel insns on the same line.  */
169
  for (p = tc_parallel_separator_chars; *p; p++)
170
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
171
#endif
172
 
173
  /* Only allow slash-star comments if slash is not in use.
174
     FIXME: This isn't right.  We should always permit them.  */
175
  if (lex['/'] == 0)
176
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
177
 
178
#ifdef TC_M68K
179
  if (m68k_mri)
180
    {
181
      lex['\''] = LEX_IS_STRINGQUOTE;
182
      lex[';'] = LEX_IS_COMMENT_START;
183
      lex['*'] = LEX_IS_LINE_COMMENT_START;
184
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
185
         then it can't be used in an expression.  */
186
      lex['!'] = LEX_IS_LINE_COMMENT_START;
187
    }
188
#endif
189
 
190
#ifdef TC_V850
191
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
192
#endif
193
#ifdef DOUBLEBAR_PARALLEL
194
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
195
#endif
196
#ifdef TC_D30V
197
  /* Must do this is we want VLIW instruction with "->" or "<-".  */
198
  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
199
#endif
200
 
201
#ifdef H_TICK_HEX
202
  if (enable_h_tick_hex)
203
    {
204
      lex['h'] = LEX_IS_H;
205
      lex['H'] = LEX_IS_H;
206
    }
207
#endif
208
}
209
 
210
/* Saved state of the scrubber.  */
211
static int state;
212
static int old_state;
213
static char *out_string;
214
static char out_buf[20];
215
static int add_newlines;
216
static char *saved_input;
217
static int saved_input_len;
218
static char input_buffer[32 * 1024];
219
static const char *mri_state;
220
static char mri_last_ch;
221
 
222
/* Data structure for saving the state of app across #include's.  Note that
223
   app is called asynchronously to the parsing of the .include's, so our
224
   state at the time .include is interpreted is completely unrelated.
225
   That's why we have to save it all.  */
226
 
227
struct app_save
228
{
229
  int          state;
230
  int          old_state;
231
  char *       out_string;
232
  char         out_buf[sizeof (out_buf)];
233
  int          add_newlines;
234
  char *       saved_input;
235
  int          saved_input_len;
236
#ifdef TC_M68K
237
  int          scrub_m68k_mri;
238
#endif
239
  const char * mri_state;
240
  char         mri_last_ch;
241
#if defined TC_ARM && defined OBJ_ELF
242
  const char * symver_state;
243
#endif
244
};
245
 
246
char *
247
app_push (void)
248
{
249
  register struct app_save *saved;
250
 
251
  saved = (struct app_save *) xmalloc (sizeof (*saved));
252
  saved->state = state;
253
  saved->old_state = old_state;
254
  saved->out_string = out_string;
255
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
256
  saved->add_newlines = add_newlines;
257
  if (saved_input == NULL)
258
    saved->saved_input = NULL;
259
  else
260
    {
261
      saved->saved_input = (char *) xmalloc (saved_input_len);
262
      memcpy (saved->saved_input, saved_input, saved_input_len);
263
      saved->saved_input_len = saved_input_len;
264
    }
265
#ifdef TC_M68K
266
  saved->scrub_m68k_mri = scrub_m68k_mri;
267
#endif
268
  saved->mri_state = mri_state;
269
  saved->mri_last_ch = mri_last_ch;
270
#if defined TC_ARM && defined OBJ_ELF
271
  saved->symver_state = symver_state;
272
#endif
273
 
274
  /* do_scrub_begin() is not useful, just wastes time.  */
275
 
276
  state = 0;
277
  saved_input = NULL;
278
 
279
  return (char *) saved;
280
}
281
 
282
void
283
app_pop (char *arg)
284
{
285
  register struct app_save *saved = (struct app_save *) arg;
286
 
287
  /* There is no do_scrub_end ().  */
288
  state = saved->state;
289
  old_state = saved->old_state;
290
  out_string = saved->out_string;
291
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
292
  add_newlines = saved->add_newlines;
293
  if (saved->saved_input == NULL)
294
    saved_input = NULL;
295
  else
296
    {
297
      gas_assert (saved->saved_input_len <= (int) (sizeof input_buffer));
298
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
299
      saved_input = input_buffer;
300
      saved_input_len = saved->saved_input_len;
301
      free (saved->saved_input);
302
    }
303
#ifdef TC_M68K
304
  scrub_m68k_mri = saved->scrub_m68k_mri;
305
#endif
306
  mri_state = saved->mri_state;
307
  mri_last_ch = saved->mri_last_ch;
308
#if defined TC_ARM && defined OBJ_ELF
309
  symver_state = saved->symver_state;
310
#endif
311
 
312
  free (arg);
313
}
314
 
315
/* @@ This assumes that \n &c are the same on host and target.  This is not
316
   necessarily true.  */
317
 
318
static int
319
process_escape (int ch)
320
{
321
  switch (ch)
322
    {
323
    case 'b':
324
      return '\b';
325
    case 'f':
326
      return '\f';
327
    case 'n':
328
      return '\n';
329
    case 'r':
330
      return '\r';
331
    case 't':
332
      return '\t';
333
    case '\'':
334
      return '\'';
335
    case '"':
336
      return '\"';
337
    default:
338
      return ch;
339
    }
340
}
341
 
342
/* This function is called to process input characters.  The GET
343
   parameter is used to retrieve more input characters.  GET should
344
   set its parameter to point to a buffer, and return the length of
345
   the buffer; it should return 0 at end of file.  The scrubbed output
346
   characters are put into the buffer starting at TOSTART; the TOSTART
347
   buffer is TOLEN bytes in length.  The function returns the number
348
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
349
   end of file was seen.  This function is arranged as a state
350
   machine, and saves its state so that it may return at any point.
351
   This is the way the old code used to work.  */
352
 
353
int
354
do_scrub_chars (int (*get) (char *, int), char *tostart, int tolen)
355
{
356
  char *to = tostart;
357
  char *toend = tostart + tolen;
358
  char *from;
359
  char *fromend;
360
  int fromlen;
361
  register int ch, ch2 = 0;
362
  /* Character that started the string we're working on.  */
363
  static char quotechar;
364
 
365
  /*State 0: beginning of normal line
366
          1: After first whitespace on line (flush more white)
367
          2: After first non-white (opcode) on line (keep 1white)
368
          3: after second white on line (into operands) (flush white)
369
          4: after putting out a .linefile, put out digits
370
          5: parsing a string, then go to old-state
371
          6: putting out \ escape in a "d string.
372
          7: no longer used
373
          8: no longer used
374
          9: After seeing symbol char in state 3 (keep 1white after symchar)
375
         10: After seeing whitespace in state 9 (keep white before symchar)
376
         11: After seeing a symbol character in state 0 (eg a label definition)
377
         -1: output string in out_string and go to the state in old_state
378
         -2: flush text until a '*' '/' is seen, then go to state old_state
379
#ifdef TC_V850
380
         12: After seeing a dash, looking for a second dash as a start
381
             of comment.
382
#endif
383
#ifdef DOUBLEBAR_PARALLEL
384
         13: After seeing a vertical bar, looking for a second
385
             vertical bar as a parallel expression separator.
386
#endif
387
#ifdef TC_IA64
388
         14: After seeing a `(' at state 0, looking for a `)' as
389
             predicate.
390
         15: After seeing a `(' at state 1, looking for a `)' as
391
             predicate.
392
#endif
393
#ifdef TC_Z80
394
         16: After seeing an 'a' or an 'A' at the start of a symbol
395
         17: After seeing an 'f' or an 'F' in state 16
396
#endif
397
          */
398
 
399
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
400
     constructs like ``.loc 1 20''.  This was turning into ``.loc
401
     120''.  States 9 and 10 ensure that a space is never dropped in
402
     between characters which could appear in an identifier.  Ian
403
     Taylor, ian@cygnus.com.
404
 
405
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
406
     correctly on the PA (and any other target where colons are optional).
407
     Jeff Law, law@cs.utah.edu.
408
 
409
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
410
     get squashed into "cmp r1,r2||trap#1", with the all important space
411
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
412
 
413
  /* This macro gets the next input character.  */
414
 
415
#define GET()                                                   \
416
  (from < fromend                                               \
417
   ? * (unsigned char *) (from++)                               \
418
   : (saved_input = NULL,                                       \
419
      fromlen = (*get) (input_buffer, sizeof input_buffer),     \
420
      from = input_buffer,                                      \
421
      fromend = from + fromlen,                                 \
422
      (fromlen == 0                                              \
423
       ? EOF                                                    \
424
       : * (unsigned char *) (from++))))
425
 
426
  /* This macro pushes a character back on the input stream.  */
427
 
428
#define UNGET(uch) (*--from = (uch))
429
 
430
  /* This macro puts a character into the output buffer.  If this
431
     character fills the output buffer, this macro jumps to the label
432
     TOFULL.  We use this rather ugly approach because we need to
433
     handle two different termination conditions: EOF on the input
434
     stream, and a full output buffer.  It would be simpler if we
435
     always read in the entire input stream before processing it, but
436
     I don't want to make such a significant change to the assembler's
437
     memory usage.  */
438
 
439
#define PUT(pch)                                \
440
  do                                            \
441
    {                                           \
442
      *to++ = (pch);                            \
443
      if (to >= toend)                          \
444
        goto tofull;                            \
445
    }                                           \
446
  while (0)
447
 
448
  if (saved_input != NULL)
449
    {
450
      from = saved_input;
451
      fromend = from + saved_input_len;
452
    }
453
  else
454
    {
455
      fromlen = (*get) (input_buffer, sizeof input_buffer);
456
      if (fromlen == 0)
457
        return 0;
458
      from = input_buffer;
459
      fromend = from + fromlen;
460
    }
461
 
462
  while (1)
463
    {
464
      /* The cases in this switch end with continue, in order to
465
         branch back to the top of this while loop and generate the
466
         next output character in the appropriate state.  */
467
      switch (state)
468
        {
469
        case -1:
470
          ch = *out_string++;
471
          if (*out_string == '\0')
472
            {
473
              state = old_state;
474
              old_state = 3;
475
            }
476
          PUT (ch);
477
          continue;
478
 
479
        case -2:
480
          for (;;)
481
            {
482
              do
483
                {
484
                  ch = GET ();
485
 
486
                  if (ch == EOF)
487
                    {
488
                      as_warn (_("end of file in comment"));
489
                      goto fromeof;
490
                    }
491
 
492
                  if (ch == '\n')
493
                    PUT ('\n');
494
                }
495
              while (ch != '*');
496
 
497
              while ((ch = GET ()) == '*')
498
                ;
499
 
500
              if (ch == EOF)
501
                {
502
                  as_warn (_("end of file in comment"));
503
                  goto fromeof;
504
                }
505
 
506
              if (ch == '/')
507
                break;
508
 
509
              UNGET (ch);
510
            }
511
 
512
          state = old_state;
513
          UNGET (' ');
514
          continue;
515
 
516
        case 4:
517
          ch = GET ();
518
          if (ch == EOF)
519
            goto fromeof;
520
          else if (ch >= '0' && ch <= '9')
521
            PUT (ch);
522
          else
523
            {
524
              while (ch != EOF && IS_WHITESPACE (ch))
525
                ch = GET ();
526
              if (ch == '"')
527
                {
528
                  quotechar = ch;
529
                  state = 5;
530
                  old_state = 3;
531
                  PUT (ch);
532
                }
533
              else
534
                {
535
                  while (ch != EOF && ch != '\n')
536
                    ch = GET ();
537
                  state = 0;
538
                  PUT (ch);
539
                }
540
            }
541
          continue;
542
 
543
        case 5:
544
          /* We are going to copy everything up to a quote character,
545
             with special handling for a backslash.  We try to
546
             optimize the copying in the simple case without using the
547
             GET and PUT macros.  */
548
          {
549
            char *s;
550
            int len;
551
 
552
            for (s = from; s < fromend; s++)
553
              {
554
                ch = *s;
555
                if (ch == '\\'
556
                    || ch == quotechar
557
                    || ch == '\n')
558
                  break;
559
              }
560
            len = s - from;
561
            if (len > toend - to)
562
              len = toend - to;
563
            if (len > 0)
564
              {
565
                memcpy (to, from, len);
566
                to += len;
567
                from += len;
568
                if (to >= toend)
569
                  goto tofull;
570
              }
571
          }
572
 
573
          ch = GET ();
574
          if (ch == EOF)
575
            {
576
              /* This buffer is here specifically so
577
                 that the UNGET below will work.  */
578
              static char one_char_buf[1];
579
 
580
              as_warn (_("end of file in string; '%c' inserted"), quotechar);
581
              state = old_state;
582
              from = fromend = one_char_buf + 1;
583
              fromlen = 1;
584
              UNGET ('\n');
585
              PUT (quotechar);
586
            }
587
          else if (ch == quotechar)
588
            {
589
              state = old_state;
590
              PUT (ch);
591
            }
592
#ifndef NO_STRING_ESCAPES
593
          else if (ch == '\\')
594
            {
595
              state = 6;
596
              PUT (ch);
597
            }
598
#endif
599
          else if (scrub_m68k_mri && ch == '\n')
600
            {
601
              /* Just quietly terminate the string.  This permits lines like
602
                   bne  label   loop if we haven't reach end yet.  */
603
              state = old_state;
604
              UNGET (ch);
605
              PUT ('\'');
606
            }
607
          else
608
            {
609
              PUT (ch);
610
            }
611
          continue;
612
 
613
        case 6:
614
          state = 5;
615
          ch = GET ();
616
          switch (ch)
617
            {
618
              /* Handle strings broken across lines, by turning '\n' into
619
                 '\\' and 'n'.  */
620
            case '\n':
621
              UNGET ('n');
622
              add_newlines++;
623
              PUT ('\\');
624
              continue;
625
 
626
            case EOF:
627
              as_warn (_("end of file in string; '%c' inserted"), quotechar);
628
              PUT (quotechar);
629
              continue;
630
 
631
            case '"':
632
            case '\\':
633
            case 'b':
634
            case 'f':
635
            case 'n':
636
            case 'r':
637
            case 't':
638
            case 'v':
639
            case 'x':
640
            case 'X':
641
            case '0':
642
            case '1':
643
            case '2':
644
            case '3':
645
            case '4':
646
            case '5':
647
            case '6':
648
            case '7':
649
              break;
650
 
651
            default:
652
#ifdef ONLY_STANDARD_ESCAPES
653
              as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
654
#endif
655
              break;
656
            }
657
          PUT (ch);
658
          continue;
659
 
660
#ifdef DOUBLEBAR_PARALLEL
661
        case 13:
662
          ch = GET ();
663
          if (ch != '|')
664
            abort ();
665
 
666
          /* Reset back to state 1 and pretend that we are parsing a
667
             line from just after the first white space.  */
668
          state = 1;
669
          PUT ('|');
670
          continue;
671
#endif
672
#ifdef TC_Z80
673
        case 16:
674
          /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
675
          ch = GET ();
676
          if (ch == 'f' || ch == 'F')
677
            {
678
              state = 17;
679
              PUT (ch);
680
            }
681
          else
682
            {
683
              state = 9;
684
              break;
685
            }
686
        case 17:
687
          /* We have seen "af" at the start of a symbol,
688
             a ' here is a part of that symbol.  */
689
          ch = GET ();
690
          state = 9;
691
          if (ch == '\'')
692
            /* Change to avoid warning about unclosed string.  */
693
            PUT ('`');
694
          else if (ch != EOF)
695
            UNGET (ch);
696
          break;
697
#endif
698
        }
699
 
700
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
701
 
702
      /* flushchar: */
703
      ch = GET ();
704
 
705
#ifdef TC_IA64
706
      if (ch == '(' && (state == 0 || state == 1))
707
        {
708
          state += 14;
709
          PUT (ch);
710
          continue;
711
        }
712
      else if (state == 14 || state == 15)
713
        {
714
          if (ch == ')')
715
            {
716
              state -= 14;
717
              PUT (ch);
718
              ch = GET ();
719
            }
720
          else
721
            {
722
              PUT (ch);
723
              continue;
724
            }
725
        }
726
#endif
727
 
728
    recycle:
729
 
730
#if defined TC_ARM && defined OBJ_ELF
731
      /* We need to watch out for .symver directives.  See the comment later
732
         in this function.  */
733
      if (symver_state == NULL)
734
        {
735
          if ((state == 0 || state == 1) && ch == symver_pseudo[0])
736
            symver_state = symver_pseudo + 1;
737
        }
738
      else
739
        {
740
          /* We advance to the next state if we find the right
741
             character.  */
742
          if (ch != '\0' && (*symver_state == ch))
743
            ++symver_state;
744
          else if (*symver_state != '\0')
745
            /* We did not get the expected character, or we didn't
746
               get a valid terminating character after seeing the
747
               entire pseudo-op, so we must go back to the beginning.  */
748
            symver_state = NULL;
749
          else
750
            {
751
              /* We've read the entire pseudo-op.  If this is the end
752
                 of the line, go back to the beginning.  */
753
              if (IS_NEWLINE (ch))
754
                symver_state = NULL;
755
            }
756
        }
757
#endif /* TC_ARM && OBJ_ELF */
758
 
759
#ifdef TC_M68K
760
      /* We want to have pseudo-ops which control whether we are in
761
         MRI mode or not.  Unfortunately, since m68k MRI mode affects
762
         the scrubber, that means that we need a special purpose
763
         recognizer here.  */
764
      if (mri_state == NULL)
765
        {
766
          if ((state == 0 || state == 1)
767
              && ch == mri_pseudo[0])
768
            mri_state = mri_pseudo + 1;
769
        }
770
      else
771
        {
772
          /* We advance to the next state if we find the right
773
             character, or if we need a space character and we get any
774
             whitespace character, or if we need a '0' and we get a
775
             '1' (this is so that we only need one state to handle
776
             ``.mri 0'' and ``.mri 1'').  */
777
          if (ch != '\0'
778
              && (*mri_state == ch
779
                  || (*mri_state == ' '
780
                      && lex[ch] == LEX_IS_WHITESPACE)
781
                  || (*mri_state == '0'
782
                      && ch == '1')))
783
            {
784
              mri_last_ch = ch;
785
              ++mri_state;
786
            }
787
          else if (*mri_state != '\0'
788
                   || (lex[ch] != LEX_IS_WHITESPACE
789
                       && lex[ch] != LEX_IS_NEWLINE))
790
            {
791
              /* We did not get the expected character, or we didn't
792
                 get a valid terminating character after seeing the
793
                 entire pseudo-op, so we must go back to the
794
                 beginning.  */
795
              mri_state = NULL;
796
            }
797
          else
798
            {
799
              /* We've read the entire pseudo-op.  mips_last_ch is
800
                 either '0' or '1' indicating whether to enter or
801
                 leave MRI mode.  */
802
              do_scrub_begin (mri_last_ch == '1');
803
              mri_state = NULL;
804
 
805
              /* We continue handling the character as usual.  The
806
                 main gas reader must also handle the .mri pseudo-op
807
                 to control expression parsing and the like.  */
808
            }
809
        }
810
#endif
811
 
812
      if (ch == EOF)
813
        {
814
          if (state != 0)
815
            {
816
              as_warn (_("end of file not at end of a line; newline inserted"));
817
              state = 0;
818
              PUT ('\n');
819
            }
820
          goto fromeof;
821
        }
822
 
823
      switch (lex[ch])
824
        {
825
        case LEX_IS_WHITESPACE:
826
          do
827
            {
828
              ch = GET ();
829
            }
830
          while (ch != EOF && IS_WHITESPACE (ch));
831
          if (ch == EOF)
832
            goto fromeof;
833
 
834
          if (state == 0)
835
            {
836
              /* Preserve a single whitespace character at the
837
                 beginning of a line.  */
838
              state = 1;
839
              UNGET (ch);
840
              PUT (' ');
841
              break;
842
            }
843
 
844
#ifdef KEEP_WHITE_AROUND_COLON
845
          if (lex[ch] == LEX_IS_COLON)
846
            {
847
              /* Only keep this white if there's no white *after* the
848
                 colon.  */
849
              ch2 = GET ();
850
              if (ch2 != EOF)
851
                UNGET (ch2);
852
              if (!IS_WHITESPACE (ch2))
853
                {
854
                  state = 9;
855
                  UNGET (ch);
856
                  PUT (' ');
857
                  break;
858
                }
859
            }
860
#endif
861
          if (IS_COMMENT (ch)
862
              || ch == '/'
863
              || IS_LINE_SEPARATOR (ch)
864
              || IS_PARALLEL_SEPARATOR (ch))
865
            {
866
              if (scrub_m68k_mri)
867
                {
868
                  /* In MRI mode, we keep these spaces.  */
869
                  UNGET (ch);
870
                  PUT (' ');
871
                  break;
872
                }
873
              goto recycle;
874
            }
875
 
876
          /* If we're in state 2 or 11, we've seen a non-white
877
             character followed by whitespace.  If the next character
878
             is ':', this is whitespace after a label name which we
879
             normally must ignore.  In MRI mode, though, spaces are
880
             not permitted between the label and the colon.  */
881
          if ((state == 2 || state == 11)
882
              && lex[ch] == LEX_IS_COLON
883
              && ! scrub_m68k_mri)
884
            {
885
              state = 1;
886
              PUT (ch);
887
              break;
888
            }
889
 
890
          switch (state)
891
            {
892
            case 1:
893
              /* We can arrive here if we leave a leading whitespace
894
                 character at the beginning of a line.  */
895
              goto recycle;
896
            case 2:
897
              state = 3;
898
              if (to + 1 < toend)
899
                {
900
                  /* Optimize common case by skipping UNGET/GET.  */
901
                  PUT (' ');    /* Sp after opco */
902
                  goto recycle;
903
                }
904
              UNGET (ch);
905
              PUT (' ');
906
              break;
907
            case 3:
908
              if (scrub_m68k_mri)
909
                {
910
                  /* In MRI mode, we keep these spaces.  */
911
                  UNGET (ch);
912
                  PUT (' ');
913
                  break;
914
                }
915
              goto recycle;     /* Sp in operands */
916
            case 9:
917
            case 10:
918
              if (scrub_m68k_mri)
919
                {
920
                  /* In MRI mode, we keep these spaces.  */
921
                  state = 3;
922
                  UNGET (ch);
923
                  PUT (' ');
924
                  break;
925
                }
926
              state = 10;       /* Sp after symbol char */
927
              goto recycle;
928
            case 11:
929
              if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
930
                state = 1;
931
              else
932
                {
933
                  /* We know that ch is not ':', since we tested that
934
                     case above.  Therefore this is not a label, so it
935
                     must be the opcode, and we've just seen the
936
                     whitespace after it.  */
937
                  state = 3;
938
                }
939
              UNGET (ch);
940
              PUT (' ');        /* Sp after label definition.  */
941
              break;
942
            default:
943
              BAD_CASE (state);
944
            }
945
          break;
946
 
947
        case LEX_IS_TWOCHAR_COMMENT_1ST:
948
          ch2 = GET ();
949
          if (ch2 == '*')
950
            {
951
              for (;;)
952
                {
953
                  do
954
                    {
955
                      ch2 = GET ();
956
                      if (ch2 != EOF && IS_NEWLINE (ch2))
957
                        add_newlines++;
958
                    }
959
                  while (ch2 != EOF && ch2 != '*');
960
 
961
                  while (ch2 == '*')
962
                    ch2 = GET ();
963
 
964
                  if (ch2 == EOF || ch2 == '/')
965
                    break;
966
 
967
                  /* This UNGET will ensure that we count newlines
968
                     correctly.  */
969
                  UNGET (ch2);
970
                }
971
 
972
              if (ch2 == EOF)
973
                as_warn (_("end of file in multiline comment"));
974
 
975
              ch = ' ';
976
              goto recycle;
977
            }
978
#ifdef DOUBLESLASH_LINE_COMMENTS
979
          else if (ch2 == '/')
980
            {
981
              do
982
                {
983
                  ch = GET ();
984
                }
985
              while (ch != EOF && !IS_NEWLINE (ch));
986
              if (ch == EOF)
987
                as_warn ("end of file in comment; newline inserted");
988
              state = 0;
989
              PUT ('\n');
990
              break;
991
            }
992
#endif
993
          else
994
            {
995
              if (ch2 != EOF)
996
                UNGET (ch2);
997
              if (state == 9 || state == 10)
998
                state = 3;
999
              PUT (ch);
1000
            }
1001
          break;
1002
 
1003
        case LEX_IS_STRINGQUOTE:
1004
          quotechar = ch;
1005
          if (state == 10)
1006
            {
1007
              /* Preserve the whitespace in foo "bar".  */
1008
              UNGET (ch);
1009
              state = 3;
1010
              PUT (' ');
1011
 
1012
              /* PUT didn't jump out.  We could just break, but we
1013
                 know what will happen, so optimize a bit.  */
1014
              ch = GET ();
1015
              old_state = 3;
1016
            }
1017
          else if (state == 9)
1018
            old_state = 3;
1019
          else
1020
            old_state = state;
1021
          state = 5;
1022
          PUT (ch);
1023
          break;
1024
 
1025
#ifndef IEEE_STYLE
1026
        case LEX_IS_ONECHAR_QUOTE:
1027
#ifdef H_TICK_HEX
1028
          if (state == 9 && enable_h_tick_hex)
1029
            {
1030
              char c;
1031
 
1032
              c = GET ();
1033
              as_warn ("'%c found after symbol", c);
1034
              UNGET (c);
1035
            }
1036
#endif
1037
          if (state == 10)
1038
            {
1039
              /* Preserve the whitespace in foo 'b'.  */
1040
              UNGET (ch);
1041
              state = 3;
1042
              PUT (' ');
1043
              break;
1044
            }
1045
          ch = GET ();
1046
          if (ch == EOF)
1047
            {
1048
              as_warn (_("end of file after a one-character quote; \\0 inserted"));
1049
              ch = 0;
1050
            }
1051
          if (ch == '\\')
1052
            {
1053
              ch = GET ();
1054
              if (ch == EOF)
1055
                {
1056
                  as_warn (_("end of file in escape character"));
1057
                  ch = '\\';
1058
                }
1059
              else
1060
                ch = process_escape (ch);
1061
            }
1062
          sprintf (out_buf, "%d", (int) (unsigned char) ch);
1063
 
1064
          /* None of these 'x constants for us.  We want 'x'.  */
1065
          if ((ch = GET ()) != '\'')
1066
            {
1067
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1068
              as_warn (_("missing close quote; (assumed)"));
1069
#else
1070
              if (ch != EOF)
1071
                UNGET (ch);
1072
#endif
1073
            }
1074
          if (strlen (out_buf) == 1)
1075
            {
1076
              PUT (out_buf[0]);
1077
              break;
1078
            }
1079
          if (state == 9)
1080
            old_state = 3;
1081
          else
1082
            old_state = state;
1083
          state = -1;
1084
          out_string = out_buf;
1085
          PUT (*out_string++);
1086
          break;
1087
#endif
1088
 
1089
        case LEX_IS_COLON:
1090
#ifdef KEEP_WHITE_AROUND_COLON
1091
          state = 9;
1092
#else
1093
          if (state == 9 || state == 10)
1094
            state = 3;
1095
          else if (state != 3)
1096
            state = 1;
1097
#endif
1098
          PUT (ch);
1099
          break;
1100
 
1101
        case LEX_IS_NEWLINE:
1102
          /* Roll out a bunch of newlines from inside comments, etc.  */
1103
          if (add_newlines)
1104
            {
1105
              --add_newlines;
1106
              UNGET (ch);
1107
            }
1108
          /* Fall through.  */
1109
 
1110
        case LEX_IS_LINE_SEPARATOR:
1111
          state = 0;
1112
          PUT (ch);
1113
          break;
1114
 
1115
        case LEX_IS_PARALLEL_SEPARATOR:
1116
          state = 1;
1117
          PUT (ch);
1118
          break;
1119
 
1120
#ifdef TC_V850
1121
        case LEX_IS_DOUBLEDASH_1ST:
1122
          ch2 = GET ();
1123
          if (ch2 != '-')
1124
            {
1125
              if (ch2 != EOF)
1126
                UNGET (ch2);
1127
              goto de_fault;
1128
            }
1129
          /* Read and skip to end of line.  */
1130
          do
1131
            {
1132
              ch = GET ();
1133
            }
1134
          while (ch != EOF && ch != '\n');
1135
 
1136
          if (ch == EOF)
1137
            as_warn (_("end of file in comment; newline inserted"));
1138
 
1139
          state = 0;
1140
          PUT ('\n');
1141
          break;
1142
#endif
1143
#ifdef DOUBLEBAR_PARALLEL
1144
        case LEX_IS_DOUBLEBAR_1ST:
1145
          ch2 = GET ();
1146
          if (ch2 != EOF)
1147
            UNGET (ch2);
1148
          if (ch2 != '|')
1149
            goto de_fault;
1150
 
1151
          /* Handle '||' in two states as invoking PUT twice might
1152
             result in the first one jumping out of this loop.  We'd
1153
             then lose track of the state and one '|' char.  */
1154
          state = 13;
1155
          PUT ('|');
1156
          break;
1157
#endif
1158
        case LEX_IS_LINE_COMMENT_START:
1159
          /* FIXME-someday: The two character comment stuff was badly
1160
             thought out.  On i386, we want '/' as line comment start
1161
             AND we want C style comments.  hence this hack.  The
1162
             whole lexical process should be reworked.  xoxorich.  */
1163
          if (ch == '/')
1164
            {
1165
              ch2 = GET ();
1166
              if (ch2 == '*')
1167
                {
1168
                  old_state = 3;
1169
                  state = -2;
1170
                  break;
1171
                }
1172
              else
1173
                {
1174
                  UNGET (ch2);
1175
                }
1176
            }
1177
 
1178
          if (state == 0 || state == 1)  /* Only comment at start of line.  */
1179
            {
1180
              int startch;
1181
 
1182
              startch = ch;
1183
 
1184
              do
1185
                {
1186
                  ch = GET ();
1187
                }
1188
              while (ch != EOF && IS_WHITESPACE (ch));
1189
 
1190
              if (ch == EOF)
1191
                {
1192
                  as_warn (_("end of file in comment; newline inserted"));
1193
                  PUT ('\n');
1194
                  break;
1195
                }
1196
 
1197
              if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1198
                {
1199
                  /* Not a cpp line.  */
1200
                  while (ch != EOF && !IS_NEWLINE (ch))
1201
                    ch = GET ();
1202
                  if (ch == EOF)
1203
                    as_warn (_("end of file in comment; newline inserted"));
1204
                  state = 0;
1205
                  PUT ('\n');
1206
                  break;
1207
                }
1208
              /* Looks like `# 123 "filename"' from cpp.  */
1209
              UNGET (ch);
1210
              old_state = 4;
1211
              state = -1;
1212
              if (scrub_m68k_mri)
1213
                out_string = "\tlinefile ";
1214
              else
1215
                out_string = "\t.linefile ";
1216
              PUT (*out_string++);
1217
              break;
1218
            }
1219
 
1220
#ifdef TC_D10V
1221
          /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1222
             Trap is the only short insn that has a first operand that is
1223
             neither register nor label.
1224
             We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1225
             We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1226
             already LEX_IS_LINE_COMMENT_START.  However, it is the
1227
             only character in line_comment_chars for d10v, hence we
1228
             can recognize it as such.  */
1229
          /* An alternative approach would be to reset the state to 1 when
1230
             we see '||', '<'- or '->', but that seems to be overkill.  */
1231
          if (state == 10)
1232
            PUT (' ');
1233
#endif
1234
          /* We have a line comment character which is not at the
1235
             start of a line.  If this is also a normal comment
1236
             character, fall through.  Otherwise treat it as a default
1237
             character.  */
1238
          if (strchr (tc_comment_chars, ch) == NULL
1239
              && (! scrub_m68k_mri
1240
                  || (ch != '!' && ch != '*')))
1241
            goto de_fault;
1242
          if (scrub_m68k_mri
1243
              && (ch == '!' || ch == '*' || ch == '#')
1244
              && state != 1
1245
              && state != 10)
1246
            goto de_fault;
1247
          /* Fall through.  */
1248
        case LEX_IS_COMMENT_START:
1249
#if defined TC_ARM && defined OBJ_ELF
1250
          /* On the ARM, `@' is the comment character.
1251
             Unfortunately this is also a special character in ELF .symver
1252
             directives (and .type, though we deal with those another way).
1253
             So we check if this line is such a directive, and treat
1254
             the character as default if so.  This is a hack.  */
1255
          if ((symver_state != NULL) && (*symver_state == 0))
1256
            goto de_fault;
1257
#endif
1258
 
1259
#ifdef TC_ARM
1260
          /* For the ARM, care is needed not to damage occurrences of \@
1261
             by stripping the @ onwards.  Yuck.  */
1262
          if (to > tostart && *(to - 1) == '\\')
1263
            /* Do not treat the @ as a start-of-comment.  */
1264
            goto de_fault;
1265
#endif
1266
 
1267
#ifdef WARN_COMMENTS
1268
          if (!found_comment)
1269
            as_where (&found_comment_file, &found_comment);
1270
#endif
1271
          do
1272
            {
1273
              ch = GET ();
1274
            }
1275
          while (ch != EOF && !IS_NEWLINE (ch));
1276
          if (ch == EOF)
1277
            as_warn (_("end of file in comment; newline inserted"));
1278
          state = 0;
1279
          PUT ('\n');
1280
          break;
1281
 
1282
#ifdef H_TICK_HEX
1283
        case LEX_IS_H:
1284
          /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1285
             the H' with 0x to make them gas-style hex characters.  */
1286
          if (enable_h_tick_hex)
1287
            {
1288
              char quot;
1289
 
1290
              quot = GET ();
1291
              if (quot == '\'')
1292
                {
1293
                  UNGET ('x');
1294
                  ch = '0';
1295
                }
1296
              else
1297
                UNGET (quot);
1298
            }
1299
          /* FALL THROUGH */
1300
#endif
1301
 
1302
        case LEX_IS_SYMBOL_COMPONENT:
1303
          if (state == 10)
1304
            {
1305
              /* This is a symbol character following another symbol
1306
                 character, with whitespace in between.  We skipped
1307
                 the whitespace earlier, so output it now.  */
1308
              UNGET (ch);
1309
              state = 3;
1310
              PUT (' ');
1311
              break;
1312
            }
1313
 
1314
#ifdef TC_Z80
1315
          /* "af'" is a symbol containing '\''.  */
1316
          if (state == 3 && (ch == 'a' || ch == 'A'))
1317
            {
1318
              state = 16;
1319
              PUT (ch);
1320
              ch = GET ();
1321
              if (ch == 'f' || ch == 'F')
1322
                {
1323
                  state = 17;
1324
                  PUT (ch);
1325
                  break;
1326
                }
1327
              else
1328
                {
1329
                  state = 9;
1330
                  if (!IS_SYMBOL_COMPONENT (ch))
1331
                    {
1332
                      if (ch != EOF)
1333
                        UNGET (ch);
1334
                      break;
1335
                    }
1336
                }
1337
            }
1338
#endif
1339
          if (state == 3)
1340
            state = 9;
1341
 
1342
          /* This is a common case.  Quickly copy CH and all the
1343
             following symbol component or normal characters.  */
1344
          if (to + 1 < toend
1345
              && mri_state == NULL
1346
#if defined TC_ARM && defined OBJ_ELF
1347
              && symver_state == NULL
1348
#endif
1349
              )
1350
            {
1351
              char *s;
1352
              int len;
1353
 
1354
              for (s = from; s < fromend; s++)
1355
                {
1356
                  int type;
1357
 
1358
                  ch2 = *(unsigned char *) s;
1359
                  type = lex[ch2];
1360
                  if (type != 0
1361
                      && type != LEX_IS_SYMBOL_COMPONENT)
1362
                    break;
1363
                }
1364
 
1365
              if (s > from)
1366
                /* Handle the last character normally, for
1367
                   simplicity.  */
1368
                --s;
1369
 
1370
              len = s - from;
1371
 
1372
              if (len > (toend - to) - 1)
1373
                len = (toend - to) - 1;
1374
 
1375
              if (len > 0)
1376
                {
1377
                  PUT (ch);
1378
                  memcpy (to, from, len);
1379
                  to += len;
1380
                  from += len;
1381
                  if (to >= toend)
1382
                    goto tofull;
1383
                  ch = GET ();
1384
                }
1385
            }
1386
 
1387
          /* Fall through.  */
1388
        default:
1389
        de_fault:
1390
          /* Some relatively `normal' character.  */
1391
          if (state == 0)
1392
            {
1393
              state = 11;       /* Now seeing label definition.  */
1394
            }
1395
          else if (state == 1)
1396
            {
1397
              state = 2;        /* Ditto.  */
1398
            }
1399
          else if (state == 9)
1400
            {
1401
              if (!IS_SYMBOL_COMPONENT (ch))
1402
                state = 3;
1403
            }
1404
          else if (state == 10)
1405
            {
1406
              if (ch == '\\')
1407
                {
1408
                  /* Special handling for backslash: a backslash may
1409
                     be the beginning of a formal parameter (of a
1410
                     macro) following another symbol character, with
1411
                     whitespace in between.  If that is the case, we
1412
                     output a space before the parameter.  Strictly
1413
                     speaking, correct handling depends upon what the
1414
                     macro parameter expands into; if the parameter
1415
                     expands into something which does not start with
1416
                     an operand character, then we don't want to keep
1417
                     the space.  We don't have enough information to
1418
                     make the right choice, so here we are making the
1419
                     choice which is more likely to be correct.  */
1420
                  if (to + 1 >= toend)
1421
                    {
1422
                      /* If we're near the end of the buffer, save the
1423
                         character for the next time round.  Otherwise
1424
                         we'll lose our state.  */
1425
                      UNGET (ch);
1426
                      goto tofull;
1427
                    }
1428
                  *to++ = ' ';
1429
                }
1430
 
1431
              state = 3;
1432
            }
1433
          PUT (ch);
1434
          break;
1435
        }
1436
    }
1437
 
1438
  /*NOTREACHED*/
1439
 
1440
 fromeof:
1441
  /* We have reached the end of the input.  */
1442
  return to - tostart;
1443
 
1444
 tofull:
1445
  /* The output buffer is full.  Save any input we have not yet
1446
     processed.  */
1447
  if (fromend > from)
1448
    {
1449
      saved_input = from;
1450
      saved_input_len = fromend - from;
1451
    }
1452
  else
1453
    saved_input = NULL;
1454
 
1455
  return to - tostart;
1456
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.