OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [binutils-2.18.50/] [gas/] [app.c] - Blame information for rev 329

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* This is the Assembler Pre-Processor
2
   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3
   1999, 2000, 2001, 2002, 2003, 2006, 2007
4
   Free Software Foundation, Inc.
5
 
6
   This file is part of GAS, the GNU Assembler.
7
 
8
   GAS is free software; you can redistribute it and/or modify
9
   it under the terms of the GNU General Public License as published by
10
   the Free Software Foundation; either version 3, or (at your option)
11
   any later version.
12
 
13
   GAS is distributed in the hope that it will be useful, but WITHOUT
14
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16
   License for more details.
17
 
18
   You should have received a copy of the GNU General Public License
19
   along with GAS; see the file COPYING.  If not, write to the Free
20
   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
21
   02110-1301, USA.  */
22
 
23
/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
24
/* App, the assembler pre-processor.  This pre-processor strips out
25
   excess spaces, turns single-quoted characters into a decimal
26
   constant, and turns the # in # <number> <filename> <garbage> into a
27
   .linefile.  This needs better error-handling.  */
28
 
29
#include "as.h"
30
 
31
#if (__STDC__ != 1)
32
#ifndef const
33
#define const  /* empty */
34
#endif
35
#endif
36
 
37
#ifdef TC_M68K
38
/* Whether we are scrubbing in m68k MRI mode.  This is different from
39
   flag_m68k_mri, because the two flags will be affected by the .mri
40
   pseudo-op at different times.  */
41
static int scrub_m68k_mri;
42
 
43
/* The pseudo-op which switches in and out of MRI mode.  See the
44
   comment in do_scrub_chars.  */
45
static const char mri_pseudo[] = ".mri 0";
46
#else
47
#define scrub_m68k_mri 0
48
#endif
49
 
50
#if defined TC_ARM && defined OBJ_ELF
51
/* The pseudo-op for which we need to special-case `@' characters.
52
   See the comment in do_scrub_chars.  */
53
static const char   symver_pseudo[] = ".symver";
54
static const char * symver_state;
55
#endif
56
 
57
static char lex[256];
58
static const char symbol_chars[] =
59
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
60
 
61
#define LEX_IS_SYMBOL_COMPONENT         1
62
#define LEX_IS_WHITESPACE               2
63
#define LEX_IS_LINE_SEPARATOR           3
64
#define LEX_IS_COMMENT_START            4
65
#define LEX_IS_LINE_COMMENT_START       5
66
#define LEX_IS_TWOCHAR_COMMENT_1ST      6
67
#define LEX_IS_STRINGQUOTE              8
68
#define LEX_IS_COLON                    9
69
#define LEX_IS_NEWLINE                  10
70
#define LEX_IS_ONECHAR_QUOTE            11
71
#ifdef TC_V850
72
#define LEX_IS_DOUBLEDASH_1ST           12
73
#endif
74
#ifdef TC_M32R
75
#define DOUBLEBAR_PARALLEL
76
#endif
77
#ifdef DOUBLEBAR_PARALLEL
78
#define LEX_IS_DOUBLEBAR_1ST            13
79
#endif
80
#define LEX_IS_PARALLEL_SEPARATOR       14
81
#define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
82
#define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
83
#define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
84
#define IS_PARALLEL_SEPARATOR(c)        (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
85
#define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
86
#define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
87
#define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
88
 
89
static int process_escape (int);
90
 
91
/* FIXME-soon: The entire lexer/parser thingy should be
92
   built statically at compile time rather than dynamically
93
   each and every time the assembler is run.  xoxorich.  */
94
 
95
void
96
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
97
{
98
  const char *p;
99
  int c;
100
 
101
  lex[' '] = LEX_IS_WHITESPACE;
102
  lex['\t'] = LEX_IS_WHITESPACE;
103
  lex['\r'] = LEX_IS_WHITESPACE;
104
  lex['\n'] = LEX_IS_NEWLINE;
105
  lex[':'] = LEX_IS_COLON;
106
 
107
#ifdef TC_M68K
108
  scrub_m68k_mri = m68k_mri;
109
 
110
  if (! m68k_mri)
111
#endif
112
    {
113
      lex['"'] = LEX_IS_STRINGQUOTE;
114
 
115
#if ! defined (TC_HPPA) && ! defined (TC_I370)
116
      /* I370 uses single-quotes to delimit integer, float constants.  */
117
      lex['\''] = LEX_IS_ONECHAR_QUOTE;
118
#endif
119
 
120
#ifdef SINGLE_QUOTE_STRINGS
121
      lex['\''] = LEX_IS_STRINGQUOTE;
122
#endif
123
    }
124
 
125
  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
126
     in state 5 of do_scrub_chars must be changed.  */
127
 
128
  /* Note that these override the previous defaults, e.g. if ';' is a
129
     comment char, then it isn't a line separator.  */
130
  for (p = symbol_chars; *p; ++p)
131
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
132
 
133
  for (c = 128; c < 256; ++c)
134
    lex[c] = LEX_IS_SYMBOL_COMPONENT;
135
 
136
#ifdef tc_symbol_chars
137
  /* This macro permits the processor to specify all characters which
138
     may appears in an operand.  This will prevent the scrubber from
139
     discarding meaningful whitespace in certain cases.  The i386
140
     backend uses this to support prefixes, which can confuse the
141
     scrubber as to whether it is parsing operands or opcodes.  */
142
  for (p = tc_symbol_chars; *p; ++p)
143
    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
144
#endif
145
 
146
  /* The m68k backend wants to be able to change comment_chars.  */
147
#ifndef tc_comment_chars
148
#define tc_comment_chars comment_chars
149
#endif
150
  for (p = tc_comment_chars; *p; p++)
151
    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
152
 
153
  for (p = line_comment_chars; *p; p++)
154
    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
155
 
156
  for (p = line_separator_chars; *p; p++)
157
    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
158
 
159
#ifdef tc_parallel_separator_chars
160
  /* This macro permits the processor to specify all characters which
161
     separate parallel insns on the same line.  */
162
  for (p = tc_parallel_separator_chars; *p; p++)
163
    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
164
#endif
165
 
166
  /* Only allow slash-star comments if slash is not in use.
167
     FIXME: This isn't right.  We should always permit them.  */
168
  if (lex['/'] == 0)
169
    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
170
 
171
#ifdef TC_M68K
172
  if (m68k_mri)
173
    {
174
      lex['\''] = LEX_IS_STRINGQUOTE;
175
      lex[';'] = LEX_IS_COMMENT_START;
176
      lex['*'] = LEX_IS_LINE_COMMENT_START;
177
      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
178
         then it can't be used in an expression.  */
179
      lex['!'] = LEX_IS_LINE_COMMENT_START;
180
    }
181
#endif
182
 
183
#ifdef TC_V850
184
  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
185
#endif
186
#ifdef DOUBLEBAR_PARALLEL
187
  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
188
#endif
189
#ifdef TC_D30V
190
  /* Must do this is we want VLIW instruction with "->" or "<-".  */
191
  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
192
#endif
193
}
194
 
195
/* Saved state of the scrubber.  */
196
static int state;
197
static int old_state;
198
static char *out_string;
199
static char out_buf[20];
200
static int add_newlines;
201
static char *saved_input;
202
static int saved_input_len;
203
static char input_buffer[32 * 1024];
204
static const char *mri_state;
205
static char mri_last_ch;
206
 
207
/* Data structure for saving the state of app across #include's.  Note that
208
   app is called asynchronously to the parsing of the .include's, so our
209
   state at the time .include is interpreted is completely unrelated.
210
   That's why we have to save it all.  */
211
 
212
struct app_save
213
{
214
  int          state;
215
  int          old_state;
216
  char *       out_string;
217
  char         out_buf[sizeof (out_buf)];
218
  int          add_newlines;
219
  char *       saved_input;
220
  int          saved_input_len;
221
#ifdef TC_M68K
222
  int          scrub_m68k_mri;
223
#endif
224
  const char * mri_state;
225
  char         mri_last_ch;
226
#if defined TC_ARM && defined OBJ_ELF
227
  const char * symver_state;
228
#endif
229
};
230
 
231
char *
232
app_push (void)
233
{
234
  register struct app_save *saved;
235
 
236
  saved = (struct app_save *) xmalloc (sizeof (*saved));
237
  saved->state = state;
238
  saved->old_state = old_state;
239
  saved->out_string = out_string;
240
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
241
  saved->add_newlines = add_newlines;
242
  if (saved_input == NULL)
243
    saved->saved_input = NULL;
244
  else
245
    {
246
      saved->saved_input = xmalloc (saved_input_len);
247
      memcpy (saved->saved_input, saved_input, saved_input_len);
248
      saved->saved_input_len = saved_input_len;
249
    }
250
#ifdef TC_M68K
251
  saved->scrub_m68k_mri = scrub_m68k_mri;
252
#endif
253
  saved->mri_state = mri_state;
254
  saved->mri_last_ch = mri_last_ch;
255
#if defined TC_ARM && defined OBJ_ELF
256
  saved->symver_state = symver_state;
257
#endif
258
 
259
  /* do_scrub_begin() is not useful, just wastes time.  */
260
 
261
  state = 0;
262
  saved_input = NULL;
263
 
264
  return (char *) saved;
265
}
266
 
267
void
268
app_pop (char *arg)
269
{
270
  register struct app_save *saved = (struct app_save *) arg;
271
 
272
  /* There is no do_scrub_end ().  */
273
  state = saved->state;
274
  old_state = saved->old_state;
275
  out_string = saved->out_string;
276
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
277
  add_newlines = saved->add_newlines;
278
  if (saved->saved_input == NULL)
279
    saved_input = NULL;
280
  else
281
    {
282
      assert (saved->saved_input_len <= (int) (sizeof input_buffer));
283
      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
284
      saved_input = input_buffer;
285
      saved_input_len = saved->saved_input_len;
286
      free (saved->saved_input);
287
    }
288
#ifdef TC_M68K
289
  scrub_m68k_mri = saved->scrub_m68k_mri;
290
#endif
291
  mri_state = saved->mri_state;
292
  mri_last_ch = saved->mri_last_ch;
293
#if defined TC_ARM && defined OBJ_ELF
294
  symver_state = saved->symver_state;
295
#endif
296
 
297
  free (arg);
298
}
299
 
300
/* @@ This assumes that \n &c are the same on host and target.  This is not
301
   necessarily true.  */
302
 
303
static int
304
process_escape (int ch)
305
{
306
  switch (ch)
307
    {
308
    case 'b':
309
      return '\b';
310
    case 'f':
311
      return '\f';
312
    case 'n':
313
      return '\n';
314
    case 'r':
315
      return '\r';
316
    case 't':
317
      return '\t';
318
    case '\'':
319
      return '\'';
320
    case '"':
321
      return '\"';
322
    default:
323
      return ch;
324
    }
325
}
326
 
327
/* This function is called to process input characters.  The GET
328
   parameter is used to retrieve more input characters.  GET should
329
   set its parameter to point to a buffer, and return the length of
330
   the buffer; it should return 0 at end of file.  The scrubbed output
331
   characters are put into the buffer starting at TOSTART; the TOSTART
332
   buffer is TOLEN bytes in length.  The function returns the number
333
   of scrubbed characters put into TOSTART.  This will be TOLEN unless
334
   end of file was seen.  This function is arranged as a state
335
   machine, and saves its state so that it may return at any point.
336
   This is the way the old code used to work.  */
337
 
338
int
339
do_scrub_chars (int (*get) (char *, int), char *tostart, int tolen)
340
{
341
  char *to = tostart;
342
  char *toend = tostart + tolen;
343
  char *from;
344
  char *fromend;
345
  int fromlen;
346
  register int ch, ch2 = 0;
347
  /* Character that started the string we're working on.  */
348
  static char quotechar;
349
 
350
  /*State 0: beginning of normal line
351
          1: After first whitespace on line (flush more white)
352
          2: After first non-white (opcode) on line (keep 1white)
353
          3: after second white on line (into operands) (flush white)
354
          4: after putting out a .linefile, put out digits
355
          5: parsing a string, then go to old-state
356
          6: putting out \ escape in a "d string.
357
          7: no longer used
358
          8: no longer used
359
          9: After seeing symbol char in state 3 (keep 1white after symchar)
360
         10: After seeing whitespace in state 9 (keep white before symchar)
361
         11: After seeing a symbol character in state 0 (eg a label definition)
362
         -1: output string in out_string and go to the state in old_state
363
         -2: flush text until a '*' '/' is seen, then go to state old_state
364
#ifdef TC_V850
365
         12: After seeing a dash, looking for a second dash as a start
366
             of comment.
367
#endif
368
#ifdef DOUBLEBAR_PARALLEL
369
         13: After seeing a vertical bar, looking for a second
370
             vertical bar as a parallel expression separator.
371
#endif
372
#ifdef TC_IA64
373
         14: After seeing a `(' at state 0, looking for a `)' as
374
             predicate.
375
         15: After seeing a `(' at state 1, looking for a `)' as
376
             predicate.
377
#endif
378
#ifdef TC_Z80
379
         16: After seeing an 'a' or an 'A' at the start of a symbol
380
         17: After seeing an 'f' or an 'F' in state 16
381
#endif
382
          */
383
 
384
  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
385
     constructs like ``.loc 1 20''.  This was turning into ``.loc
386
     120''.  States 9 and 10 ensure that a space is never dropped in
387
     between characters which could appear in an identifier.  Ian
388
     Taylor, ian@cygnus.com.
389
 
390
     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
391
     correctly on the PA (and any other target where colons are optional).
392
     Jeff Law, law@cs.utah.edu.
393
 
394
     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
395
     get squashed into "cmp r1,r2||trap#1", with the all important space
396
     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
397
 
398
  /* This macro gets the next input character.  */
399
 
400
#define GET()                                                   \
401
  (from < fromend                                               \
402
   ? * (unsigned char *) (from++)                               \
403
   : (saved_input = NULL,                                       \
404
      fromlen = (*get) (input_buffer, sizeof input_buffer),     \
405
      from = input_buffer,                                      \
406
      fromend = from + fromlen,                                 \
407
      (fromlen == 0                                              \
408
       ? EOF                                                    \
409
       : * (unsigned char *) (from++))))
410
 
411
  /* This macro pushes a character back on the input stream.  */
412
 
413
#define UNGET(uch) (*--from = (uch))
414
 
415
  /* This macro puts a character into the output buffer.  If this
416
     character fills the output buffer, this macro jumps to the label
417
     TOFULL.  We use this rather ugly approach because we need to
418
     handle two different termination conditions: EOF on the input
419
     stream, and a full output buffer.  It would be simpler if we
420
     always read in the entire input stream before processing it, but
421
     I don't want to make such a significant change to the assembler's
422
     memory usage.  */
423
 
424
#define PUT(pch)                                \
425
  do                                            \
426
    {                                           \
427
      *to++ = (pch);                            \
428
      if (to >= toend)                          \
429
        goto tofull;                            \
430
    }                                           \
431
  while (0)
432
 
433
  if (saved_input != NULL)
434
    {
435
      from = saved_input;
436
      fromend = from + saved_input_len;
437
    }
438
  else
439
    {
440
      fromlen = (*get) (input_buffer, sizeof input_buffer);
441
      if (fromlen == 0)
442
        return 0;
443
      from = input_buffer;
444
      fromend = from + fromlen;
445
    }
446
 
447
  while (1)
448
    {
449
      /* The cases in this switch end with continue, in order to
450
         branch back to the top of this while loop and generate the
451
         next output character in the appropriate state.  */
452
      switch (state)
453
        {
454
        case -1:
455
          ch = *out_string++;
456
          if (*out_string == '\0')
457
            {
458
              state = old_state;
459
              old_state = 3;
460
            }
461
          PUT (ch);
462
          continue;
463
 
464
        case -2:
465
          for (;;)
466
            {
467
              do
468
                {
469
                  ch = GET ();
470
 
471
                  if (ch == EOF)
472
                    {
473
                      as_warn (_("end of file in comment"));
474
                      goto fromeof;
475
                    }
476
 
477
                  if (ch == '\n')
478
                    PUT ('\n');
479
                }
480
              while (ch != '*');
481
 
482
              while ((ch = GET ()) == '*')
483
                ;
484
 
485
              if (ch == EOF)
486
                {
487
                  as_warn (_("end of file in comment"));
488
                  goto fromeof;
489
                }
490
 
491
              if (ch == '/')
492
                break;
493
 
494
              UNGET (ch);
495
            }
496
 
497
          state = old_state;
498
          UNGET (' ');
499
          continue;
500
 
501
        case 4:
502
          ch = GET ();
503
          if (ch == EOF)
504
            goto fromeof;
505
          else if (ch >= '0' && ch <= '9')
506
            PUT (ch);
507
          else
508
            {
509
              while (ch != EOF && IS_WHITESPACE (ch))
510
                ch = GET ();
511
              if (ch == '"')
512
                {
513
                  quotechar = ch;
514
                  state = 5;
515
                  old_state = 3;
516
                  PUT (ch);
517
                }
518
              else
519
                {
520
                  while (ch != EOF && ch != '\n')
521
                    ch = GET ();
522
                  state = 0;
523
                  PUT (ch);
524
                }
525
            }
526
          continue;
527
 
528
        case 5:
529
          /* We are going to copy everything up to a quote character,
530
             with special handling for a backslash.  We try to
531
             optimize the copying in the simple case without using the
532
             GET and PUT macros.  */
533
          {
534
            char *s;
535
            int len;
536
 
537
            for (s = from; s < fromend; s++)
538
              {
539
                ch = *s;
540
                if (ch == '\\'
541
                    || ch == quotechar
542
                    || ch == '\n')
543
                  break;
544
              }
545
            len = s - from;
546
            if (len > toend - to)
547
              len = toend - to;
548
            if (len > 0)
549
              {
550
                memcpy (to, from, len);
551
                to += len;
552
                from += len;
553
                if (to >= toend)
554
                  goto tofull;
555
              }
556
          }
557
 
558
          ch = GET ();
559
          if (ch == EOF)
560
            {
561
              /* This buffer is here specifically so
562
                 that the UNGET below will work.  */
563
              static char one_char_buf[1];
564
 
565
              as_warn (_("end of file in string; '%c' inserted"), quotechar);
566
              state = old_state;
567
              from = fromend = one_char_buf + 1;
568
              fromlen = 1;
569
              UNGET ('\n');
570
              PUT (quotechar);
571
            }
572
          else if (ch == quotechar)
573
            {
574
              state = old_state;
575
              PUT (ch);
576
            }
577
#ifndef NO_STRING_ESCAPES
578
          else if (ch == '\\')
579
            {
580
              state = 6;
581
              PUT (ch);
582
            }
583
#endif
584
          else if (scrub_m68k_mri && ch == '\n')
585
            {
586
              /* Just quietly terminate the string.  This permits lines like
587
                   bne  label   loop if we haven't reach end yet.  */
588
              state = old_state;
589
              UNGET (ch);
590
              PUT ('\'');
591
            }
592
          else
593
            {
594
              PUT (ch);
595
            }
596
          continue;
597
 
598
        case 6:
599
          state = 5;
600
          ch = GET ();
601
          switch (ch)
602
            {
603
              /* Handle strings broken across lines, by turning '\n' into
604
                 '\\' and 'n'.  */
605
            case '\n':
606
              UNGET ('n');
607
              add_newlines++;
608
              PUT ('\\');
609
              continue;
610
 
611
            case EOF:
612
              as_warn (_("end of file in string; '%c' inserted"), quotechar);
613
              PUT (quotechar);
614
              continue;
615
 
616
            case '"':
617
            case '\\':
618
            case 'b':
619
            case 'f':
620
            case 'n':
621
            case 'r':
622
            case 't':
623
            case 'v':
624
            case 'x':
625
            case 'X':
626
            case '0':
627
            case '1':
628
            case '2':
629
            case '3':
630
            case '4':
631
            case '5':
632
            case '6':
633
            case '7':
634
              break;
635
 
636
            default:
637
#ifdef ONLY_STANDARD_ESCAPES
638
              as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
639
#endif
640
              break;
641
            }
642
          PUT (ch);
643
          continue;
644
 
645
#ifdef DOUBLEBAR_PARALLEL
646
        case 13:
647
          ch = GET ();
648
          if (ch != '|')
649
            abort ();
650
 
651
          /* Reset back to state 1 and pretend that we are parsing a
652
             line from just after the first white space.  */
653
          state = 1;
654
          PUT ('|');
655
          continue;
656
#endif
657
#ifdef TC_Z80
658
        case 16:
659
          /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
660
          ch = GET ();
661
          if (ch == 'f' || ch == 'F')
662
            {
663
              state = 17;
664
              PUT (ch);
665
            }
666
          else
667
            {
668
              state = 9;
669
              break;
670
            }
671
        case 17:
672
          /* We have seen "af" at the start of a symbol,
673
             a ' here is a part of that symbol.  */
674
          ch = GET ();
675
          state = 9;
676
          if (ch == '\'')
677
            /* Change to avoid warning about unclosed string.  */
678
            PUT ('`');
679
          else
680
            UNGET (ch);
681
          break;
682
#endif
683
        }
684
 
685
      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
686
 
687
      /* flushchar: */
688
      ch = GET ();
689
 
690
#ifdef TC_IA64
691
      if (ch == '(' && (state == 0 || state == 1))
692
        {
693
          state += 14;
694
          PUT (ch);
695
          continue;
696
        }
697
      else if (state == 14 || state == 15)
698
        {
699
          if (ch == ')')
700
            {
701
              state -= 14;
702
              PUT (ch);
703
              ch = GET ();
704
            }
705
          else
706
            {
707
              PUT (ch);
708
              continue;
709
            }
710
        }
711
#endif
712
 
713
    recycle:
714
 
715
#if defined TC_ARM && defined OBJ_ELF
716
      /* We need to watch out for .symver directives.  See the comment later
717
         in this function.  */
718
      if (symver_state == NULL)
719
        {
720
          if ((state == 0 || state == 1) && ch == symver_pseudo[0])
721
            symver_state = symver_pseudo + 1;
722
        }
723
      else
724
        {
725
          /* We advance to the next state if we find the right
726
             character.  */
727
          if (ch != '\0' && (*symver_state == ch))
728
            ++symver_state;
729
          else if (*symver_state != '\0')
730
            /* We did not get the expected character, or we didn't
731
               get a valid terminating character after seeing the
732
               entire pseudo-op, so we must go back to the beginning.  */
733
            symver_state = NULL;
734
          else
735
            {
736
              /* We've read the entire pseudo-op.  If this is the end
737
                 of the line, go back to the beginning.  */
738
              if (IS_NEWLINE (ch))
739
                symver_state = NULL;
740
            }
741
        }
742
#endif /* TC_ARM && OBJ_ELF */
743
 
744
#ifdef TC_M68K
745
      /* We want to have pseudo-ops which control whether we are in
746
         MRI mode or not.  Unfortunately, since m68k MRI mode affects
747
         the scrubber, that means that we need a special purpose
748
         recognizer here.  */
749
      if (mri_state == NULL)
750
        {
751
          if ((state == 0 || state == 1)
752
              && ch == mri_pseudo[0])
753
            mri_state = mri_pseudo + 1;
754
        }
755
      else
756
        {
757
          /* We advance to the next state if we find the right
758
             character, or if we need a space character and we get any
759
             whitespace character, or if we need a '0' and we get a
760
             '1' (this is so that we only need one state to handle
761
             ``.mri 0'' and ``.mri 1'').  */
762
          if (ch != '\0'
763
              && (*mri_state == ch
764
                  || (*mri_state == ' '
765
                      && lex[ch] == LEX_IS_WHITESPACE)
766
                  || (*mri_state == '0'
767
                      && ch == '1')))
768
            {
769
              mri_last_ch = ch;
770
              ++mri_state;
771
            }
772
          else if (*mri_state != '\0'
773
                   || (lex[ch] != LEX_IS_WHITESPACE
774
                       && lex[ch] != LEX_IS_NEWLINE))
775
            {
776
              /* We did not get the expected character, or we didn't
777
                 get a valid terminating character after seeing the
778
                 entire pseudo-op, so we must go back to the
779
                 beginning.  */
780
              mri_state = NULL;
781
            }
782
          else
783
            {
784
              /* We've read the entire pseudo-op.  mips_last_ch is
785
                 either '0' or '1' indicating whether to enter or
786
                 leave MRI mode.  */
787
              do_scrub_begin (mri_last_ch == '1');
788
              mri_state = NULL;
789
 
790
              /* We continue handling the character as usual.  The
791
                 main gas reader must also handle the .mri pseudo-op
792
                 to control expression parsing and the like.  */
793
            }
794
        }
795
#endif
796
 
797
      if (ch == EOF)
798
        {
799
          if (state != 0)
800
            {
801
              as_warn (_("end of file not at end of a line; newline inserted"));
802
              state = 0;
803
              PUT ('\n');
804
            }
805
          goto fromeof;
806
        }
807
 
808
      switch (lex[ch])
809
        {
810
        case LEX_IS_WHITESPACE:
811
          do
812
            {
813
              ch = GET ();
814
            }
815
          while (ch != EOF && IS_WHITESPACE (ch));
816
          if (ch == EOF)
817
            goto fromeof;
818
 
819
          if (state == 0)
820
            {
821
              /* Preserve a single whitespace character at the
822
                 beginning of a line.  */
823
              state = 1;
824
              UNGET (ch);
825
              PUT (' ');
826
              break;
827
            }
828
 
829
#ifdef KEEP_WHITE_AROUND_COLON
830
          if (lex[ch] == LEX_IS_COLON)
831
            {
832
              /* Only keep this white if there's no white *after* the
833
                 colon.  */
834
              ch2 = GET ();
835
              UNGET (ch2);
836
              if (!IS_WHITESPACE (ch2))
837
                {
838
                  state = 9;
839
                  UNGET (ch);
840
                  PUT (' ');
841
                  break;
842
                }
843
            }
844
#endif
845
          if (IS_COMMENT (ch)
846
              || ch == '/'
847
              || IS_LINE_SEPARATOR (ch)
848
              || IS_PARALLEL_SEPARATOR (ch))
849
            {
850
              if (scrub_m68k_mri)
851
                {
852
                  /* In MRI mode, we keep these spaces.  */
853
                  UNGET (ch);
854
                  PUT (' ');
855
                  break;
856
                }
857
              goto recycle;
858
            }
859
 
860
          /* If we're in state 2 or 11, we've seen a non-white
861
             character followed by whitespace.  If the next character
862
             is ':', this is whitespace after a label name which we
863
             normally must ignore.  In MRI mode, though, spaces are
864
             not permitted between the label and the colon.  */
865
          if ((state == 2 || state == 11)
866
              && lex[ch] == LEX_IS_COLON
867
              && ! scrub_m68k_mri)
868
            {
869
              state = 1;
870
              PUT (ch);
871
              break;
872
            }
873
 
874
          switch (state)
875
            {
876
            case 1:
877
              /* We can arrive here if we leave a leading whitespace
878
                 character at the beginning of a line.  */
879
              goto recycle;
880
            case 2:
881
              state = 3;
882
              if (to + 1 < toend)
883
                {
884
                  /* Optimize common case by skipping UNGET/GET.  */
885
                  PUT (' ');    /* Sp after opco */
886
                  goto recycle;
887
                }
888
              UNGET (ch);
889
              PUT (' ');
890
              break;
891
            case 3:
892
              if (scrub_m68k_mri)
893
                {
894
                  /* In MRI mode, we keep these spaces.  */
895
                  UNGET (ch);
896
                  PUT (' ');
897
                  break;
898
                }
899
              goto recycle;     /* Sp in operands */
900
            case 9:
901
            case 10:
902
              if (scrub_m68k_mri)
903
                {
904
                  /* In MRI mode, we keep these spaces.  */
905
                  state = 3;
906
                  UNGET (ch);
907
                  PUT (' ');
908
                  break;
909
                }
910
              state = 10;       /* Sp after symbol char */
911
              goto recycle;
912
            case 11:
913
              if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
914
                state = 1;
915
              else
916
                {
917
                  /* We know that ch is not ':', since we tested that
918
                     case above.  Therefore this is not a label, so it
919
                     must be the opcode, and we've just seen the
920
                     whitespace after it.  */
921
                  state = 3;
922
                }
923
              UNGET (ch);
924
              PUT (' ');        /* Sp after label definition.  */
925
              break;
926
            default:
927
              BAD_CASE (state);
928
            }
929
          break;
930
 
931
        case LEX_IS_TWOCHAR_COMMENT_1ST:
932
          ch2 = GET ();
933
          if (ch2 == '*')
934
            {
935
              for (;;)
936
                {
937
                  do
938
                    {
939
                      ch2 = GET ();
940
                      if (ch2 != EOF && IS_NEWLINE (ch2))
941
                        add_newlines++;
942
                    }
943
                  while (ch2 != EOF && ch2 != '*');
944
 
945
                  while (ch2 == '*')
946
                    ch2 = GET ();
947
 
948
                  if (ch2 == EOF || ch2 == '/')
949
                    break;
950
 
951
                  /* This UNGET will ensure that we count newlines
952
                     correctly.  */
953
                  UNGET (ch2);
954
                }
955
 
956
              if (ch2 == EOF)
957
                as_warn (_("end of file in multiline comment"));
958
 
959
              ch = ' ';
960
              goto recycle;
961
            }
962
#ifdef DOUBLESLASH_LINE_COMMENTS
963
          else if (ch2 == '/')
964
            {
965
              do
966
                {
967
                  ch = GET ();
968
                }
969
              while (ch != EOF && !IS_NEWLINE (ch));
970
              if (ch == EOF)
971
                as_warn ("end of file in comment; newline inserted");
972
              state = 0;
973
              PUT ('\n');
974
              break;
975
            }
976
#endif
977
          else
978
            {
979
              if (ch2 != EOF)
980
                UNGET (ch2);
981
              if (state == 9 || state == 10)
982
                state = 3;
983
              PUT (ch);
984
            }
985
          break;
986
 
987
        case LEX_IS_STRINGQUOTE:
988
          quotechar = ch;
989
          if (state == 10)
990
            {
991
              /* Preserve the whitespace in foo "bar".  */
992
              UNGET (ch);
993
              state = 3;
994
              PUT (' ');
995
 
996
              /* PUT didn't jump out.  We could just break, but we
997
                 know what will happen, so optimize a bit.  */
998
              ch = GET ();
999
              old_state = 3;
1000
            }
1001
          else if (state == 9)
1002
            old_state = 3;
1003
          else
1004
            old_state = state;
1005
          state = 5;
1006
          PUT (ch);
1007
          break;
1008
 
1009
#ifndef IEEE_STYLE
1010
        case LEX_IS_ONECHAR_QUOTE:
1011
          if (state == 10)
1012
            {
1013
              /* Preserve the whitespace in foo 'b'.  */
1014
              UNGET (ch);
1015
              state = 3;
1016
              PUT (' ');
1017
              break;
1018
            }
1019
          ch = GET ();
1020
          if (ch == EOF)
1021
            {
1022
              as_warn (_("end of file after a one-character quote; \\0 inserted"));
1023
              ch = 0;
1024
            }
1025
          if (ch == '\\')
1026
            {
1027
              ch = GET ();
1028
              if (ch == EOF)
1029
                {
1030
                  as_warn (_("end of file in escape character"));
1031
                  ch = '\\';
1032
                }
1033
              else
1034
                ch = process_escape (ch);
1035
            }
1036
          sprintf (out_buf, "%d", (int) (unsigned char) ch);
1037
 
1038
          /* None of these 'x constants for us.  We want 'x'.  */
1039
          if ((ch = GET ()) != '\'')
1040
            {
1041
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1042
              as_warn (_("missing close quote; (assumed)"));
1043
#else
1044
              if (ch != EOF)
1045
                UNGET (ch);
1046
#endif
1047
            }
1048
          if (strlen (out_buf) == 1)
1049
            {
1050
              PUT (out_buf[0]);
1051
              break;
1052
            }
1053
          if (state == 9)
1054
            old_state = 3;
1055
          else
1056
            old_state = state;
1057
          state = -1;
1058
          out_string = out_buf;
1059
          PUT (*out_string++);
1060
          break;
1061
#endif
1062
 
1063
        case LEX_IS_COLON:
1064
#ifdef KEEP_WHITE_AROUND_COLON
1065
          state = 9;
1066
#else
1067
          if (state == 9 || state == 10)
1068
            state = 3;
1069
          else if (state != 3)
1070
            state = 1;
1071
#endif
1072
          PUT (ch);
1073
          break;
1074
 
1075
        case LEX_IS_NEWLINE:
1076
          /* Roll out a bunch of newlines from inside comments, etc.  */
1077
          if (add_newlines)
1078
            {
1079
              --add_newlines;
1080
              UNGET (ch);
1081
            }
1082
          /* Fall through.  */
1083
 
1084
        case LEX_IS_LINE_SEPARATOR:
1085
          state = 0;
1086
          PUT (ch);
1087
          break;
1088
 
1089
        case LEX_IS_PARALLEL_SEPARATOR:
1090
          state = 1;
1091
          PUT (ch);
1092
          break;
1093
 
1094
#ifdef TC_V850
1095
        case LEX_IS_DOUBLEDASH_1ST:
1096
          ch2 = GET ();
1097
          if (ch2 != '-')
1098
            {
1099
              UNGET (ch2);
1100
              goto de_fault;
1101
            }
1102
          /* Read and skip to end of line.  */
1103
          do
1104
            {
1105
              ch = GET ();
1106
            }
1107
          while (ch != EOF && ch != '\n');
1108
 
1109
          if (ch == EOF)
1110
            as_warn (_("end of file in comment; newline inserted"));
1111
 
1112
          state = 0;
1113
          PUT ('\n');
1114
          break;
1115
#endif
1116
#ifdef DOUBLEBAR_PARALLEL
1117
        case LEX_IS_DOUBLEBAR_1ST:
1118
          ch2 = GET ();
1119
          UNGET (ch2);
1120
          if (ch2 != '|')
1121
            goto de_fault;
1122
 
1123
          /* Handle '||' in two states as invoking PUT twice might
1124
             result in the first one jumping out of this loop.  We'd
1125
             then lose track of the state and one '|' char.  */
1126
          state = 13;
1127
          PUT ('|');
1128
          break;
1129
#endif
1130
        case LEX_IS_LINE_COMMENT_START:
1131
          /* FIXME-someday: The two character comment stuff was badly
1132
             thought out.  On i386, we want '/' as line comment start
1133
             AND we want C style comments.  hence this hack.  The
1134
             whole lexical process should be reworked.  xoxorich.  */
1135
          if (ch == '/')
1136
            {
1137
              ch2 = GET ();
1138
              if (ch2 == '*')
1139
                {
1140
                  old_state = 3;
1141
                  state = -2;
1142
                  break;
1143
                }
1144
              else
1145
                {
1146
                  UNGET (ch2);
1147
                }
1148
            }
1149
 
1150
          if (state == 0 || state == 1)  /* Only comment at start of line.  */
1151
            {
1152
              int startch;
1153
 
1154
              startch = ch;
1155
 
1156
              do
1157
                {
1158
                  ch = GET ();
1159
                }
1160
              while (ch != EOF && IS_WHITESPACE (ch));
1161
 
1162
              if (ch == EOF)
1163
                {
1164
                  as_warn (_("end of file in comment; newline inserted"));
1165
                  PUT ('\n');
1166
                  break;
1167
                }
1168
 
1169
              if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1170
                {
1171
                  /* Not a cpp line.  */
1172
                  while (ch != EOF && !IS_NEWLINE (ch))
1173
                    ch = GET ();
1174
                  if (ch == EOF)
1175
                    as_warn (_("end of file in comment; newline inserted"));
1176
                  state = 0;
1177
                  PUT ('\n');
1178
                  break;
1179
                }
1180
              /* Looks like `# 123 "filename"' from cpp.  */
1181
              UNGET (ch);
1182
              old_state = 4;
1183
              state = -1;
1184
              if (scrub_m68k_mri)
1185
                out_string = "\tlinefile ";
1186
              else
1187
                out_string = "\t.linefile ";
1188
              PUT (*out_string++);
1189
              break;
1190
            }
1191
 
1192
#ifdef TC_D10V
1193
          /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1194
             Trap is the only short insn that has a first operand that is
1195
             neither register nor label.
1196
             We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1197
             We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1198
             already LEX_IS_LINE_COMMENT_START.  However, it is the
1199
             only character in line_comment_chars for d10v, hence we
1200
             can recognize it as such.  */
1201
          /* An alternative approach would be to reset the state to 1 when
1202
             we see '||', '<'- or '->', but that seems to be overkill.  */
1203
          if (state == 10)
1204
            PUT (' ');
1205
#endif
1206
          /* We have a line comment character which is not at the
1207
             start of a line.  If this is also a normal comment
1208
             character, fall through.  Otherwise treat it as a default
1209
             character.  */
1210
          if (strchr (tc_comment_chars, ch) == NULL
1211
              && (! scrub_m68k_mri
1212
                  || (ch != '!' && ch != '*')))
1213
            goto de_fault;
1214
          if (scrub_m68k_mri
1215
              && (ch == '!' || ch == '*' || ch == '#')
1216
              && state != 1
1217
              && state != 10)
1218
            goto de_fault;
1219
          /* Fall through.  */
1220
        case LEX_IS_COMMENT_START:
1221
#if defined TC_ARM && defined OBJ_ELF
1222
          /* On the ARM, `@' is the comment character.
1223
             Unfortunately this is also a special character in ELF .symver
1224
             directives (and .type, though we deal with those another way).
1225
             So we check if this line is such a directive, and treat
1226
             the character as default if so.  This is a hack.  */
1227
          if ((symver_state != NULL) && (*symver_state == 0))
1228
            goto de_fault;
1229
#endif
1230
 
1231
#ifdef TC_ARM
1232
          /* For the ARM, care is needed not to damage occurrences of \@
1233
             by stripping the @ onwards.  Yuck.  */
1234
          if (to > tostart && *(to - 1) == '\\')
1235
            /* Do not treat the @ as a start-of-comment.  */
1236
            goto de_fault;
1237
#endif
1238
 
1239
#ifdef WARN_COMMENTS
1240
          if (!found_comment)
1241
            as_where (&found_comment_file, &found_comment);
1242
#endif
1243
          do
1244
            {
1245
              ch = GET ();
1246
            }
1247
          while (ch != EOF && !IS_NEWLINE (ch));
1248
          if (ch == EOF)
1249
            as_warn (_("end of file in comment; newline inserted"));
1250
          state = 0;
1251
          PUT ('\n');
1252
          break;
1253
 
1254
        case LEX_IS_SYMBOL_COMPONENT:
1255
          if (state == 10)
1256
            {
1257
              /* This is a symbol character following another symbol
1258
                 character, with whitespace in between.  We skipped
1259
                 the whitespace earlier, so output it now.  */
1260
              UNGET (ch);
1261
              state = 3;
1262
              PUT (' ');
1263
              break;
1264
            }
1265
 
1266
#ifdef TC_Z80
1267
          /* "af'" is a symbol containing '\''.  */
1268
          if (state == 3 && (ch == 'a' || ch == 'A'))
1269
            {
1270
              state = 16;
1271
              PUT (ch);
1272
              ch = GET ();
1273
              if (ch == 'f' || ch == 'F')
1274
                {
1275
                  state = 17;
1276
                  PUT (ch);
1277
                  break;
1278
                }
1279
              else
1280
                {
1281
                  state = 9;
1282
                  if (!IS_SYMBOL_COMPONENT (ch))
1283
                    {
1284
                      UNGET (ch);
1285
                      break;
1286
                    }
1287
                }
1288
            }
1289
#endif
1290
          if (state == 3)
1291
            state = 9;
1292
 
1293
          /* This is a common case.  Quickly copy CH and all the
1294
             following symbol component or normal characters.  */
1295
          if (to + 1 < toend
1296
              && mri_state == NULL
1297
#if defined TC_ARM && defined OBJ_ELF
1298
              && symver_state == NULL
1299
#endif
1300
              )
1301
            {
1302
              char *s;
1303
              int len;
1304
 
1305
              for (s = from; s < fromend; s++)
1306
                {
1307
                  int type;
1308
 
1309
                  ch2 = *(unsigned char *) s;
1310
                  type = lex[ch2];
1311
                  if (type != 0
1312
                      && type != LEX_IS_SYMBOL_COMPONENT)
1313
                    break;
1314
                }
1315
 
1316
              if (s > from)
1317
                /* Handle the last character normally, for
1318
                   simplicity.  */
1319
                --s;
1320
 
1321
              len = s - from;
1322
 
1323
              if (len > (toend - to) - 1)
1324
                len = (toend - to) - 1;
1325
 
1326
              if (len > 0)
1327
                {
1328
                  PUT (ch);
1329
                  memcpy (to, from, len);
1330
                  to += len;
1331
                  from += len;
1332
                  if (to >= toend)
1333
                    goto tofull;
1334
                  ch = GET ();
1335
                }
1336
            }
1337
 
1338
          /* Fall through.  */
1339
        default:
1340
        de_fault:
1341
          /* Some relatively `normal' character.  */
1342
          if (state == 0)
1343
            {
1344
              state = 11;       /* Now seeing label definition.  */
1345
            }
1346
          else if (state == 1)
1347
            {
1348
              state = 2;        /* Ditto.  */
1349
            }
1350
          else if (state == 9)
1351
            {
1352
              if (!IS_SYMBOL_COMPONENT (ch))
1353
                state = 3;
1354
            }
1355
          else if (state == 10)
1356
            {
1357
              if (ch == '\\')
1358
                {
1359
                  /* Special handling for backslash: a backslash may
1360
                     be the beginning of a formal parameter (of a
1361
                     macro) following another symbol character, with
1362
                     whitespace in between.  If that is the case, we
1363
                     output a space before the parameter.  Strictly
1364
                     speaking, correct handling depends upon what the
1365
                     macro parameter expands into; if the parameter
1366
                     expands into something which does not start with
1367
                     an operand character, then we don't want to keep
1368
                     the space.  We don't have enough information to
1369
                     make the right choice, so here we are making the
1370
                     choice which is more likely to be correct.  */
1371
                  if (to + 1 >= toend)
1372
                    {
1373
                      /* If we're near the end of the buffer, save the
1374
                         character for the next time round.  Otherwise
1375
                         we'll lose our state.  */
1376
                      UNGET (ch);
1377
                      goto tofull;
1378
                    }
1379
                  *to++ = ' ';
1380
                }
1381
 
1382
              state = 3;
1383
            }
1384
          PUT (ch);
1385
          break;
1386
        }
1387
    }
1388
 
1389
  /*NOTREACHED*/
1390
 
1391
 fromeof:
1392
  /* We have reached the end of the input.  */
1393
  return to - tostart;
1394
 
1395
 tofull:
1396
  /* The output buffer is full.  Save any input we have not yet
1397
     processed.  */
1398
  if (fromend > from)
1399
    {
1400
      saved_input = from;
1401
      saved_input_len = fromend - from;
1402
    }
1403
  else
1404
    saved_input = NULL;
1405
 
1406
  return to - tostart;
1407
}
1408
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.