OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [libcpp/] [lex.c] - Blame information for rev 867

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* CPP Library - lexical analysis.
2
   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
   Contributed by Per Bothner, 1994-95.
4
   Based on CCCP program by Paul Rubin, June 1986
5
   Adapted to ANSI C, Richard Stallman, Jan 1987
6
   Broken out to separate file, Zack Weinberg, Mar 2000
7
 
8
This program is free software; you can redistribute it and/or modify it
9
under the terms of the GNU General Public License as published by the
10
Free Software Foundation; either version 2, or (at your option) any
11
later version.
12
 
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
GNU General Public License for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with this program; if not, write to the Free Software
20
Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
21
 
22
#include "config.h"
23
#include "system.h"
24
#include "cpplib.h"
25
#include "internal.h"
26
 
27
enum spell_type
28
{
29
  SPELL_OPERATOR = 0,
30
  SPELL_IDENT,
31
  SPELL_LITERAL,
32
  SPELL_NONE
33
};
34
 
35
struct token_spelling
36
{
37
  enum spell_type category;
38
  const unsigned char *name;
39
};
40
 
41
static const unsigned char *const digraph_spellings[] =
42
{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43
 
44
#define OP(e, s) { SPELL_OPERATOR, U s  },
45
#define TK(e, s) { SPELL_ ## s,    U #e },
46
static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47
#undef OP
48
#undef TK
49
 
50
#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51
#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
 
53
static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54
static int skip_line_comment (cpp_reader *);
55
static void skip_whitespace (cpp_reader *, cppchar_t);
56
static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57
static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58
static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59
                            unsigned int, enum cpp_ttype);
60
static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61
static int name_p (cpp_reader *, const cpp_string *);
62
static tokenrun *next_tokenrun (tokenrun *);
63
 
64
static _cpp_buff *new_buff (size_t);
65
 
66
 
67
/* Utility routine:
68
 
69
   Compares, the token TOKEN to the NUL-terminated string STRING.
70
   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
71
int
72
cpp_ideq (const cpp_token *token, const char *string)
73
{
74
  if (token->type != CPP_NAME)
75
    return 0;
76
 
77
  return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78
}
79
 
80
/* Record a note TYPE at byte POS into the current cleaned logical
81
   line.  */
82
static void
83
add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84
{
85
  if (buffer->notes_used == buffer->notes_cap)
86
    {
87
      buffer->notes_cap = buffer->notes_cap * 2 + 200;
88
      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89
                                  buffer->notes_cap);
90
    }
91
 
92
  buffer->notes[buffer->notes_used].pos = pos;
93
  buffer->notes[buffer->notes_used].type = type;
94
  buffer->notes_used++;
95
}
96
 
97
/* Returns with a logical line that contains no escaped newlines or
98
   trigraphs.  This is a time-critical inner loop.  */
99
void
100
_cpp_clean_line (cpp_reader *pfile)
101
{
102
  cpp_buffer *buffer;
103
  const uchar *s;
104
  uchar c, *d, *p;
105
 
106
  buffer = pfile->buffer;
107
  buffer->cur_note = buffer->notes_used = 0;
108
  buffer->cur = buffer->line_base = buffer->next_line;
109
  buffer->need_line = false;
110
  s = buffer->next_line - 1;
111
 
112
  if (!buffer->from_stage3)
113
    {
114
      /* Short circuit for the common case of an un-escaped line with
115
         no trigraphs.  The primary win here is by not writing any
116
         data back to memory until we have to.  */
117
      for (;;)
118
        {
119
          c = *++s;
120
          if (c == '\n' || c == '\r')
121
            {
122
              d = (uchar *) s;
123
 
124
              if (s == buffer->rlimit)
125
                goto done;
126
 
127
              /* DOS line ending? */
128
              if (c == '\r' && s[1] == '\n')
129
                s++;
130
 
131
              if (s == buffer->rlimit)
132
                goto done;
133
 
134
              /* check for escaped newline */
135
              p = d;
136
              while (p != buffer->next_line && is_nvspace (p[-1]))
137
                p--;
138
              if (p == buffer->next_line || p[-1] != '\\')
139
                goto done;
140
 
141
              /* Have an escaped newline; process it and proceed to
142
                 the slow path.  */
143
              add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
144
              d = p - 2;
145
              buffer->next_line = p - 1;
146
              break;
147
            }
148
          if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
149
            {
150
              /* Have a trigraph.  We may or may not have to convert
151
                 it.  Add a line note regardless, for -Wtrigraphs.  */
152
              add_line_note (buffer, s, s[2]);
153
              if (CPP_OPTION (pfile, trigraphs))
154
                {
155
                  /* We do, and that means we have to switch to the
156
                     slow path.  */
157
                  d = (uchar *) s;
158
                  *d = _cpp_trigraph_map[s[2]];
159
                  s += 2;
160
                  break;
161
                }
162
            }
163
        }
164
 
165
 
166
      for (;;)
167
        {
168
          c = *++s;
169
          *++d = c;
170
 
171
          if (c == '\n' || c == '\r')
172
            {
173
                  /* Handle DOS line endings.  */
174
              if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
175
                s++;
176
              if (s == buffer->rlimit)
177
                break;
178
 
179
              /* Escaped?  */
180
              p = d;
181
              while (p != buffer->next_line && is_nvspace (p[-1]))
182
                p--;
183
              if (p == buffer->next_line || p[-1] != '\\')
184
                break;
185
 
186
              add_line_note (buffer, p - 1, p != d ? ' ': '\\');
187
              d = p - 2;
188
              buffer->next_line = p - 1;
189
            }
190
          else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
191
            {
192
              /* Add a note regardless, for the benefit of -Wtrigraphs.  */
193
              add_line_note (buffer, d, s[2]);
194
              if (CPP_OPTION (pfile, trigraphs))
195
                {
196
                  *d = _cpp_trigraph_map[s[2]];
197
                  s += 2;
198
                }
199
            }
200
        }
201
    }
202
  else
203
    {
204
      do
205
        s++;
206
      while (*s != '\n' && *s != '\r');
207
      d = (uchar *) s;
208
 
209
      /* Handle DOS line endings.  */
210
      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
211
        s++;
212
    }
213
 
214
 done:
215
  *d = '\n';
216
  /* A sentinel note that should never be processed.  */
217
  add_line_note (buffer, d + 1, '\n');
218
  buffer->next_line = s + 1;
219
}
220
 
221
/* Return true if the trigraph indicated by NOTE should be warned
222
   about in a comment.  */
223
static bool
224
warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
225
{
226
  const uchar *p;
227
 
228
  /* Within comments we don't warn about trigraphs, unless the
229
     trigraph forms an escaped newline, as that may change
230
     behavior.  */
231
  if (note->type != '/')
232
    return false;
233
 
234
  /* If -trigraphs, then this was an escaped newline iff the next note
235
     is coincident.  */
236
  if (CPP_OPTION (pfile, trigraphs))
237
    return note[1].pos == note->pos;
238
 
239
  /* Otherwise, see if this forms an escaped newline.  */
240
  p = note->pos + 3;
241
  while (is_nvspace (*p))
242
    p++;
243
 
244
  /* There might have been escaped newlines between the trigraph and the
245
     newline we found.  Hence the position test.  */
246
  return (*p == '\n' && p < note[1].pos);
247
}
248
 
249
/* Process the notes created by add_line_note as far as the current
250
   location.  */
251
void
252
_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
253
{
254
  cpp_buffer *buffer = pfile->buffer;
255
 
256
  for (;;)
257
    {
258
      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
259
      unsigned int col;
260
 
261
      if (note->pos > buffer->cur)
262
        break;
263
 
264
      buffer->cur_note++;
265
      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
266
 
267
      if (note->type == '\\' || note->type == ' ')
268
        {
269
          if (note->type == ' ' && !in_comment)
270
            cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
271
                                 "backslash and newline separated by space");
272
 
273
          if (buffer->next_line > buffer->rlimit)
274
            {
275
              cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
276
                                   "backslash-newline at end of file");
277
              /* Prevent "no newline at end of file" warning.  */
278
              buffer->next_line = buffer->rlimit;
279
            }
280
 
281
          buffer->line_base = note->pos;
282
          CPP_INCREMENT_LINE (pfile, 0);
283
        }
284
      else if (_cpp_trigraph_map[note->type])
285
        {
286
          if (CPP_OPTION (pfile, warn_trigraphs)
287
              && (!in_comment || warn_in_comment (pfile, note)))
288
            {
289
              if (CPP_OPTION (pfile, trigraphs))
290
                cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
291
                                     "trigraph ??%c converted to %c",
292
                                     note->type,
293
                                     (int) _cpp_trigraph_map[note->type]);
294
              else
295
                {
296
                  cpp_error_with_line
297
                    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
298
                     "trigraph ??%c ignored, use -trigraphs to enable",
299
                     note->type);
300
                }
301
            }
302
        }
303
      else
304
        abort ();
305
    }
306
}
307
 
308
/* Skip a C-style block comment.  We find the end of the comment by
309
   seeing if an asterisk is before every '/' we encounter.  Returns
310
   nonzero if comment terminated by EOF, zero otherwise.
311
 
312
   Buffer->cur points to the initial asterisk of the comment.  */
313
bool
314
_cpp_skip_block_comment (cpp_reader *pfile)
315
{
316
  cpp_buffer *buffer = pfile->buffer;
317
  const uchar *cur = buffer->cur;
318
  uchar c;
319
 
320
  cur++;
321
  if (*cur == '/')
322
    cur++;
323
 
324
  for (;;)
325
    {
326
      /* People like decorating comments with '*', so check for '/'
327
         instead for efficiency.  */
328
      c = *cur++;
329
 
330
      if (c == '/')
331
        {
332
          if (cur[-2] == '*')
333
            break;
334
 
335
          /* Warn about potential nested comments, but not if the '/'
336
             comes immediately before the true comment delimiter.
337
             Don't bother to get it right across escaped newlines.  */
338
          if (CPP_OPTION (pfile, warn_comments)
339
              && cur[0] == '*' && cur[1] != '/')
340
            {
341
              buffer->cur = cur;
342
              cpp_error_with_line (pfile, CPP_DL_WARNING,
343
                                   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
344
                                   "\"/*\" within comment");
345
            }
346
        }
347
      else if (c == '\n')
348
        {
349
          unsigned int cols;
350
          buffer->cur = cur - 1;
351
          _cpp_process_line_notes (pfile, true);
352
          if (buffer->next_line >= buffer->rlimit)
353
            return true;
354
          _cpp_clean_line (pfile);
355
 
356
          cols = buffer->next_line - buffer->line_base;
357
          CPP_INCREMENT_LINE (pfile, cols);
358
 
359
          cur = buffer->cur;
360
        }
361
    }
362
 
363
  buffer->cur = cur;
364
  _cpp_process_line_notes (pfile, true);
365
  return false;
366
}
367
 
368
/* Skip a C++ line comment, leaving buffer->cur pointing to the
369
   terminating newline.  Handles escaped newlines.  Returns nonzero
370
   if a multiline comment.  */
371
static int
372
skip_line_comment (cpp_reader *pfile)
373
{
374
  cpp_buffer *buffer = pfile->buffer;
375
  unsigned int orig_line = pfile->line_table->highest_line;
376
 
377
  while (*buffer->cur != '\n')
378
    buffer->cur++;
379
 
380
  _cpp_process_line_notes (pfile, true);
381
  return orig_line != pfile->line_table->highest_line;
382
}
383
 
384
/* Skips whitespace, saving the next non-whitespace character.  */
385
static void
386
skip_whitespace (cpp_reader *pfile, cppchar_t c)
387
{
388
  cpp_buffer *buffer = pfile->buffer;
389
  bool saw_NUL = false;
390
 
391
  do
392
    {
393
      /* Horizontal space always OK.  */
394
      if (c == ' ' || c == '\t')
395
        ;
396
      /* Just \f \v or \0 left.  */
397
      else if (c == '\0')
398
        saw_NUL = true;
399
      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400
        cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
401
                             CPP_BUF_COL (buffer),
402
                             "%s in preprocessing directive",
403
                             c == '\f' ? "form feed" : "vertical tab");
404
 
405
      c = *buffer->cur++;
406
    }
407
  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
408
  while (is_nvspace (c));
409
 
410
  if (saw_NUL)
411
    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
412
 
413
  buffer->cur--;
414
}
415
 
416
/* See if the characters of a number token are valid in a name (no
417
   '.', '+' or '-').  */
418
static int
419
name_p (cpp_reader *pfile, const cpp_string *string)
420
{
421
  unsigned int i;
422
 
423
  for (i = 0; i < string->len; i++)
424
    if (!is_idchar (string->text[i]))
425
      return 0;
426
 
427
  return 1;
428
}
429
 
430
/* After parsing an identifier or other sequence, produce a warning about
431
   sequences not in NFC/NFKC.  */
432
static void
433
warn_about_normalization (cpp_reader *pfile,
434
                          const cpp_token *token,
435
                          const struct normalize_state *s)
436
{
437
  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
438
      && !pfile->state.skipping)
439
    {
440
      /* Make sure that the token is printed using UCNs, even
441
         if we'd otherwise happily print UTF-8.  */
442
      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
443
      size_t sz;
444
 
445
      sz = cpp_spell_token (pfile, token, buf, false) - buf;
446
      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
447
        cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
448
                             "`%.*s' is not in NFKC", (int) sz, buf);
449
      else
450
        cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
451
                             "`%.*s' is not in NFC", (int) sz, buf);
452
    }
453
}
454
 
455
/* Returns TRUE if the sequence starting at buffer->cur is invalid in
456
   an identifier.  FIRST is TRUE if this starts an identifier.  */
457
static bool
458
forms_identifier_p (cpp_reader *pfile, int first,
459
                    struct normalize_state *state)
460
{
461
  cpp_buffer *buffer = pfile->buffer;
462
 
463
  if (*buffer->cur == '$')
464
    {
465
      if (!CPP_OPTION (pfile, dollars_in_ident))
466
        return false;
467
 
468
      buffer->cur++;
469
      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
470
        {
471
          CPP_OPTION (pfile, warn_dollars) = 0;
472
          cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
473
        }
474
 
475
      return true;
476
    }
477
 
478
  /* Is this a syntactically valid UCN?  */
479
  if (CPP_OPTION (pfile, extended_identifiers)
480
      && *buffer->cur == '\\'
481
      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
482
    {
483
      buffer->cur += 2;
484
      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
485
                          state))
486
        return true;
487
      buffer->cur -= 2;
488
    }
489
 
490
  return false;
491
}
492
 
493
/* Lex an identifier starting at BUFFER->CUR - 1.  */
494
static cpp_hashnode *
495
lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
496
                struct normalize_state *nst)
497
{
498
  cpp_hashnode *result;
499
  const uchar *cur;
500
  unsigned int len;
501
  unsigned int hash = HT_HASHSTEP (0, *base);
502
 
503
  cur = pfile->buffer->cur;
504
  if (! starts_ucn)
505
    while (ISIDNUM (*cur))
506
      {
507
        hash = HT_HASHSTEP (hash, *cur);
508
        cur++;
509
      }
510
  pfile->buffer->cur = cur;
511
  if (starts_ucn || forms_identifier_p (pfile, false, nst))
512
    {
513
      /* Slower version for identifiers containing UCNs (or $).  */
514
      do {
515
        while (ISIDNUM (*pfile->buffer->cur))
516
          {
517
            pfile->buffer->cur++;
518
            NORMALIZE_STATE_UPDATE_IDNUM (nst);
519
          }
520
      } while (forms_identifier_p (pfile, false, nst));
521
      result = _cpp_interpret_identifier (pfile, base,
522
                                          pfile->buffer->cur - base);
523
    }
524
  else
525
    {
526
      len = cur - base;
527
      hash = HT_HASHFINISH (hash, len);
528
 
529
      result = (cpp_hashnode *)
530
        ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
531
    }
532
 
533
  /* Rarely, identifiers require diagnostics when lexed.  */
534
  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
535
                        && !pfile->state.skipping, 0))
536
    {
537
      /* It is allowed to poison the same identifier twice.  */
538
      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
539
        cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
540
                   NODE_NAME (result));
541
 
542
      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
543
         replacement list of a variadic macro.  */
544
      if (result == pfile->spec_nodes.n__VA_ARGS__
545
          && !pfile->state.va_args_ok)
546
        cpp_error (pfile, CPP_DL_PEDWARN,
547
                   "__VA_ARGS__ can only appear in the expansion"
548
                   " of a C99 variadic macro");
549
    }
550
 
551
  return result;
552
}
553
 
554
/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
555
static void
556
lex_number (cpp_reader *pfile, cpp_string *number,
557
            struct normalize_state *nst)
558
{
559
  const uchar *cur;
560
  const uchar *base;
561
  uchar *dest;
562
 
563
  base = pfile->buffer->cur - 1;
564
  do
565
    {
566
      cur = pfile->buffer->cur;
567
 
568
      /* N.B. ISIDNUM does not include $.  */
569
      while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
570
        {
571
          cur++;
572
          NORMALIZE_STATE_UPDATE_IDNUM (nst);
573
        }
574
 
575
      pfile->buffer->cur = cur;
576
    }
577
  while (forms_identifier_p (pfile, false, nst));
578
 
579
  number->len = cur - base;
580
  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
581
  memcpy (dest, base, number->len);
582
  dest[number->len] = '\0';
583
  number->text = dest;
584
}
585
 
586
/* Create a token of type TYPE with a literal spelling.  */
587
static void
588
create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
589
                unsigned int len, enum cpp_ttype type)
590
{
591
  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
592
 
593
  memcpy (dest, base, len);
594
  dest[len] = '\0';
595
  token->type = type;
596
  token->val.str.len = len;
597
  token->val.str.text = dest;
598
}
599
 
600
/* Lexes a string, character constant, or angle-bracketed header file
601
   name.  The stored string contains the spelling, including opening
602
   quote and leading any leading 'L'.  It returns the type of the
603
   literal, or CPP_OTHER if it was not properly terminated.
604
 
605
   The spelling is NUL-terminated, but it is not guaranteed that this
606
   is the first NUL since embedded NULs are preserved.  */
607
static void
608
lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
609
{
610
  bool saw_NUL = false;
611
  const uchar *cur;
612
  cppchar_t terminator;
613
  enum cpp_ttype type;
614
 
615
  cur = base;
616
  terminator = *cur++;
617
  if (terminator == 'L')
618
    terminator = *cur++;
619
  if (terminator == '\"')
620
    type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
621
  else if (terminator == '\'')
622
    type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
623
  else
624
    terminator = '>', type = CPP_HEADER_NAME;
625
 
626
  for (;;)
627
    {
628
      cppchar_t c = *cur++;
629
 
630
      /* In #include-style directives, terminators are not escapable.  */
631
      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
632
        cur++;
633
      else if (c == terminator)
634
        break;
635
      else if (c == '\n')
636
        {
637
          cur--;
638
          type = CPP_OTHER;
639
          break;
640
        }
641
      else if (c == '\0')
642
        saw_NUL = true;
643
    }
644
 
645
  if (saw_NUL && !pfile->state.skipping)
646
    cpp_error (pfile, CPP_DL_WARNING,
647
               "null character(s) preserved in literal");
648
 
649
  if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
650
    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
651
               (int) terminator);
652
 
653
  pfile->buffer->cur = cur;
654
  create_literal (pfile, token, base, cur - base, type);
655
}
656
 
657
/* The stored comment includes the comment start and any terminator.  */
658
static void
659
save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
660
              cppchar_t type)
661
{
662
  unsigned char *buffer;
663
  unsigned int len, clen;
664
 
665
  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
666
 
667
  /* C++ comments probably (not definitely) have moved past a new
668
     line, which we don't want to save in the comment.  */
669
  if (is_vspace (pfile->buffer->cur[-1]))
670
    len--;
671
 
672
  /* If we are currently in a directive, then we need to store all
673
     C++ comments as C comments internally, and so we need to
674
     allocate a little extra space in that case.
675
 
676
     Note that the only time we encounter a directive here is
677
     when we are saving comments in a "#define".  */
678
  clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
679
 
680
  buffer = _cpp_unaligned_alloc (pfile, clen);
681
 
682
  token->type = CPP_COMMENT;
683
  token->val.str.len = clen;
684
  token->val.str.text = buffer;
685
 
686
  buffer[0] = '/';
687
  memcpy (buffer + 1, from, len - 1);
688
 
689
  /* Finish conversion to a C comment, if necessary.  */
690
  if (pfile->state.in_directive && type == '/')
691
    {
692
      buffer[1] = '*';
693
      buffer[clen - 2] = '*';
694
      buffer[clen - 1] = '/';
695
    }
696
}
697
 
698
/* Allocate COUNT tokens for RUN.  */
699
void
700
_cpp_init_tokenrun (tokenrun *run, unsigned int count)
701
{
702
  run->base = XNEWVEC (cpp_token, count);
703
  run->limit = run->base + count;
704
  run->next = NULL;
705
}
706
 
707
/* Returns the next tokenrun, or creates one if there is none.  */
708
static tokenrun *
709
next_tokenrun (tokenrun *run)
710
{
711
  if (run->next == NULL)
712
    {
713
      run->next = XNEW (tokenrun);
714
      run->next->prev = run;
715
      _cpp_init_tokenrun (run->next, 250);
716
    }
717
 
718
  return run->next;
719
}
720
 
721
/* Allocate a single token that is invalidated at the same time as the
722
   rest of the tokens on the line.  Has its line and col set to the
723
   same as the last lexed token, so that diagnostics appear in the
724
   right place.  */
725
cpp_token *
726
_cpp_temp_token (cpp_reader *pfile)
727
{
728
  cpp_token *old, *result;
729
 
730
  old = pfile->cur_token - 1;
731
  if (pfile->cur_token == pfile->cur_run->limit)
732
    {
733
      pfile->cur_run = next_tokenrun (pfile->cur_run);
734
      pfile->cur_token = pfile->cur_run->base;
735
    }
736
 
737
  result = pfile->cur_token++;
738
  result->src_loc = old->src_loc;
739
  return result;
740
}
741
 
742
/* Lex a token into RESULT (external interface).  Takes care of issues
743
   like directive handling, token lookahead, multiple include
744
   optimization and skipping.  */
745
const cpp_token *
746
_cpp_lex_token (cpp_reader *pfile)
747
{
748
  cpp_token *result;
749
 
750
  for (;;)
751
    {
752
      if (pfile->cur_token == pfile->cur_run->limit)
753
        {
754
          pfile->cur_run = next_tokenrun (pfile->cur_run);
755
          pfile->cur_token = pfile->cur_run->base;
756
        }
757
 
758
      if (pfile->lookaheads)
759
        {
760
          pfile->lookaheads--;
761
          result = pfile->cur_token++;
762
        }
763
      else
764
        result = _cpp_lex_direct (pfile);
765
 
766
      if (result->flags & BOL)
767
        {
768
          /* Is this a directive.  If _cpp_handle_directive returns
769
             false, it is an assembler #.  */
770
          if (result->type == CPP_HASH
771
              /* 6.10.3 p 11: Directives in a list of macro arguments
772
                 gives undefined behavior.  This implementation
773
                 handles the directive as normal.  */
774
              && pfile->state.parsing_args != 1)
775
            {
776
              if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
777
                {
778
                  if (pfile->directive_result.type == CPP_PADDING)
779
                    continue;
780
                  result = &pfile->directive_result;
781
                }
782
            }
783
          else if (pfile->state.in_deferred_pragma)
784
            result = &pfile->directive_result;
785
 
786
          if (pfile->cb.line_change && !pfile->state.skipping)
787
            pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
788
        }
789
 
790
      /* We don't skip tokens in directives.  */
791
      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
792
        break;
793
 
794
      /* Outside a directive, invalidate controlling macros.  At file
795
         EOF, _cpp_lex_direct takes care of popping the buffer, so we never
796
         get here and MI optimization works.  */
797
      pfile->mi_valid = false;
798
 
799
      if (!pfile->state.skipping || result->type == CPP_EOF)
800
        break;
801
    }
802
 
803
  return result;
804
}
805
 
806
/* Returns true if a fresh line has been loaded.  */
807
bool
808
_cpp_get_fresh_line (cpp_reader *pfile)
809
{
810
  int return_at_eof;
811
 
812
  /* We can't get a new line until we leave the current directive.  */
813
  if (pfile->state.in_directive)
814
    return false;
815
 
816
  for (;;)
817
    {
818
      cpp_buffer *buffer = pfile->buffer;
819
 
820
      if (!buffer->need_line)
821
        return true;
822
 
823
      if (buffer->next_line < buffer->rlimit)
824
        {
825
          _cpp_clean_line (pfile);
826
          return true;
827
        }
828
 
829
      /* First, get out of parsing arguments state.  */
830
      if (pfile->state.parsing_args)
831
        return false;
832
 
833
      /* End of buffer.  Non-empty files should end in a newline.  */
834
      if (buffer->buf != buffer->rlimit
835
          && buffer->next_line > buffer->rlimit
836
          && !buffer->from_stage3)
837
        {
838
          /* Only warn once.  */
839
          buffer->next_line = buffer->rlimit;
840
          cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
841
                               CPP_BUF_COLUMN (buffer, buffer->cur),
842
                               "no newline at end of file");
843
        }
844
 
845
      return_at_eof = buffer->return_at_eof;
846
      _cpp_pop_buffer (pfile);
847
      if (pfile->buffer == NULL || return_at_eof)
848
        return false;
849
    }
850
}
851
 
852
#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
853
  do                                                    \
854
    {                                                   \
855
      result->type = ELSE_TYPE;                         \
856
      if (*buffer->cur == CHAR)                         \
857
        buffer->cur++, result->type = THEN_TYPE;        \
858
    }                                                   \
859
  while (0)
860
 
861
/* Lex a token into pfile->cur_token, which is also incremented, to
862
   get diagnostics pointing to the correct location.
863
 
864
   Does not handle issues such as token lookahead, multiple-include
865
   optimization, directives, skipping etc.  This function is only
866
   suitable for use by _cpp_lex_token, and in special cases like
867
   lex_expansion_token which doesn't care for any of these issues.
868
 
869
   When meeting a newline, returns CPP_EOF if parsing a directive,
870
   otherwise returns to the start of the token buffer if permissible.
871
   Returns the location of the lexed token.  */
872
cpp_token *
873
_cpp_lex_direct (cpp_reader *pfile)
874
{
875
  cppchar_t c;
876
  cpp_buffer *buffer;
877
  const unsigned char *comment_start;
878
  cpp_token *result = pfile->cur_token++;
879
 
880
 fresh_line:
881
  result->flags = 0;
882
  buffer = pfile->buffer;
883
  if (buffer->need_line)
884
    {
885
      if (pfile->state.in_deferred_pragma)
886
        {
887
          result->type = CPP_PRAGMA_EOL;
888
          pfile->state.in_deferred_pragma = false;
889
          if (!pfile->state.pragma_allow_expansion)
890
            pfile->state.prevent_expansion--;
891
          return result;
892
        }
893
      if (!_cpp_get_fresh_line (pfile))
894
        {
895
          result->type = CPP_EOF;
896
          if (!pfile->state.in_directive)
897
            {
898
              /* Tell the compiler the line number of the EOF token.  */
899
              result->src_loc = pfile->line_table->highest_line;
900
              result->flags = BOL;
901
            }
902
          return result;
903
        }
904
      if (!pfile->keep_tokens)
905
        {
906
          pfile->cur_run = &pfile->base_run;
907
          result = pfile->base_run.base;
908
          pfile->cur_token = result + 1;
909
        }
910
      result->flags = BOL;
911
      if (pfile->state.parsing_args == 2)
912
        result->flags |= PREV_WHITE;
913
    }
914
  buffer = pfile->buffer;
915
 update_tokens_line:
916
  result->src_loc = pfile->line_table->highest_line;
917
 
918
 skipped_white:
919
  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
920
      && !pfile->overlaid_buffer)
921
    {
922
      _cpp_process_line_notes (pfile, false);
923
      result->src_loc = pfile->line_table->highest_line;
924
    }
925
  c = *buffer->cur++;
926
 
927
  LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
928
                               CPP_BUF_COLUMN (buffer, buffer->cur));
929
 
930
  switch (c)
931
    {
932
    case ' ': case '\t': case '\f': case '\v': case '\0':
933
      result->flags |= PREV_WHITE;
934
      skip_whitespace (pfile, c);
935
      goto skipped_white;
936
 
937
    case '\n':
938
      if (buffer->cur < buffer->rlimit)
939
        CPP_INCREMENT_LINE (pfile, 0);
940
      buffer->need_line = true;
941
      goto fresh_line;
942
 
943
    case '0': case '1': case '2': case '3': case '4':
944
    case '5': case '6': case '7': case '8': case '9':
945
      {
946
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
947
        result->type = CPP_NUMBER;
948
        lex_number (pfile, &result->val.str, &nst);
949
        warn_about_normalization (pfile, result, &nst);
950
        break;
951
      }
952
 
953
    case 'L':
954
      /* 'L' may introduce wide characters or strings.  */
955
      if (*buffer->cur == '\'' || *buffer->cur == '"')
956
        {
957
          lex_string (pfile, result, buffer->cur - 1);
958
          break;
959
        }
960
      /* Fall through.  */
961
 
962
    case '_':
963
    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
964
    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
965
    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
966
    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
967
    case 'y': case 'z':
968
    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
969
    case 'G': case 'H': case 'I': case 'J': case 'K':
970
    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
971
    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
972
    case 'Y': case 'Z':
973
      result->type = CPP_NAME;
974
      {
975
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
976
        result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
977
                                           &nst);
978
        warn_about_normalization (pfile, result, &nst);
979
      }
980
 
981
      /* Convert named operators to their proper types.  */
982
      if (result->val.node->flags & NODE_OPERATOR)
983
        {
984
          result->flags |= NAMED_OP;
985
          result->type = (enum cpp_ttype) result->val.node->directive_index;
986
        }
987
      break;
988
 
989
    case '\'':
990
    case '"':
991
      lex_string (pfile, result, buffer->cur - 1);
992
      break;
993
 
994
    case '/':
995
      /* A potential block or line comment.  */
996
      comment_start = buffer->cur;
997
      c = *buffer->cur;
998
 
999
      if (c == '*')
1000
        {
1001
          if (_cpp_skip_block_comment (pfile))
1002
            cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1003
        }
1004
      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1005
                            || cpp_in_system_header (pfile)))
1006
        {
1007
          /* Warn about comments only if pedantically GNUC89, and not
1008
             in system headers.  */
1009
          if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1010
              && ! buffer->warned_cplusplus_comments)
1011
            {
1012
              cpp_error (pfile, CPP_DL_PEDWARN,
1013
                         "C++ style comments are not allowed in ISO C90");
1014
              cpp_error (pfile, CPP_DL_PEDWARN,
1015
                         "(this will be reported only once per input file)");
1016
              buffer->warned_cplusplus_comments = 1;
1017
            }
1018
 
1019
          if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1020
            cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1021
        }
1022
      else if (c == '=')
1023
        {
1024
          buffer->cur++;
1025
          result->type = CPP_DIV_EQ;
1026
          break;
1027
        }
1028
      else
1029
        {
1030
          result->type = CPP_DIV;
1031
          break;
1032
        }
1033
 
1034
      if (!pfile->state.save_comments)
1035
        {
1036
          result->flags |= PREV_WHITE;
1037
          goto update_tokens_line;
1038
        }
1039
 
1040
      /* Save the comment as a token in its own right.  */
1041
      save_comment (pfile, result, comment_start, c);
1042
      break;
1043
 
1044
    case '<':
1045
      if (pfile->state.angled_headers)
1046
        {
1047
          lex_string (pfile, result, buffer->cur - 1);
1048
          break;
1049
        }
1050
 
1051
      result->type = CPP_LESS;
1052
      if (*buffer->cur == '=')
1053
        buffer->cur++, result->type = CPP_LESS_EQ;
1054
      else if (*buffer->cur == '<')
1055
        {
1056
          buffer->cur++;
1057
          IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1058
        }
1059
      else if (CPP_OPTION (pfile, digraphs))
1060
        {
1061
          if (*buffer->cur == ':')
1062
            {
1063
              buffer->cur++;
1064
              result->flags |= DIGRAPH;
1065
              result->type = CPP_OPEN_SQUARE;
1066
            }
1067
          else if (*buffer->cur == '%')
1068
            {
1069
              buffer->cur++;
1070
              result->flags |= DIGRAPH;
1071
              result->type = CPP_OPEN_BRACE;
1072
            }
1073
        }
1074
      break;
1075
 
1076
    case '>':
1077
      result->type = CPP_GREATER;
1078
      if (*buffer->cur == '=')
1079
        buffer->cur++, result->type = CPP_GREATER_EQ;
1080
      else if (*buffer->cur == '>')
1081
        {
1082
          buffer->cur++;
1083
          IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1084
        }
1085
      break;
1086
 
1087
    case '%':
1088
      result->type = CPP_MOD;
1089
      if (*buffer->cur == '=')
1090
        buffer->cur++, result->type = CPP_MOD_EQ;
1091
      else if (CPP_OPTION (pfile, digraphs))
1092
        {
1093
          if (*buffer->cur == ':')
1094
            {
1095
              buffer->cur++;
1096
              result->flags |= DIGRAPH;
1097
              result->type = CPP_HASH;
1098
              if (*buffer->cur == '%' && buffer->cur[1] == ':')
1099
                buffer->cur += 2, result->type = CPP_PASTE;
1100
            }
1101
          else if (*buffer->cur == '>')
1102
            {
1103
              buffer->cur++;
1104
              result->flags |= DIGRAPH;
1105
              result->type = CPP_CLOSE_BRACE;
1106
            }
1107
        }
1108
      break;
1109
 
1110
    case '.':
1111
      result->type = CPP_DOT;
1112
      if (ISDIGIT (*buffer->cur))
1113
        {
1114
          struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1115
          result->type = CPP_NUMBER;
1116
          lex_number (pfile, &result->val.str, &nst);
1117
          warn_about_normalization (pfile, result, &nst);
1118
        }
1119
      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1120
        buffer->cur += 2, result->type = CPP_ELLIPSIS;
1121
      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1122
        buffer->cur++, result->type = CPP_DOT_STAR;
1123
      break;
1124
 
1125
    case '+':
1126
      result->type = CPP_PLUS;
1127
      if (*buffer->cur == '+')
1128
        buffer->cur++, result->type = CPP_PLUS_PLUS;
1129
      else if (*buffer->cur == '=')
1130
        buffer->cur++, result->type = CPP_PLUS_EQ;
1131
      break;
1132
 
1133
    case '-':
1134
      result->type = CPP_MINUS;
1135
      if (*buffer->cur == '>')
1136
        {
1137
          buffer->cur++;
1138
          result->type = CPP_DEREF;
1139
          if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1140
            buffer->cur++, result->type = CPP_DEREF_STAR;
1141
        }
1142
      else if (*buffer->cur == '-')
1143
        buffer->cur++, result->type = CPP_MINUS_MINUS;
1144
      else if (*buffer->cur == '=')
1145
        buffer->cur++, result->type = CPP_MINUS_EQ;
1146
      break;
1147
 
1148
    case '&':
1149
      result->type = CPP_AND;
1150
      if (*buffer->cur == '&')
1151
        buffer->cur++, result->type = CPP_AND_AND;
1152
      else if (*buffer->cur == '=')
1153
        buffer->cur++, result->type = CPP_AND_EQ;
1154
      break;
1155
 
1156
    case '|':
1157
      result->type = CPP_OR;
1158
      if (*buffer->cur == '|')
1159
        buffer->cur++, result->type = CPP_OR_OR;
1160
      else if (*buffer->cur == '=')
1161
        buffer->cur++, result->type = CPP_OR_EQ;
1162
      break;
1163
 
1164
    case ':':
1165
      result->type = CPP_COLON;
1166
      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1167
        buffer->cur++, result->type = CPP_SCOPE;
1168
      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1169
        {
1170
          buffer->cur++;
1171
          result->flags |= DIGRAPH;
1172
          result->type = CPP_CLOSE_SQUARE;
1173
        }
1174
      break;
1175
 
1176
    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1177
    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1178
    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1179
    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1180
    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1181
 
1182
    case '?': result->type = CPP_QUERY; break;
1183
    case '~': result->type = CPP_COMPL; break;
1184
    case ',': result->type = CPP_COMMA; break;
1185
    case '(': result->type = CPP_OPEN_PAREN; break;
1186
    case ')': result->type = CPP_CLOSE_PAREN; break;
1187
    case '[': result->type = CPP_OPEN_SQUARE; break;
1188
    case ']': result->type = CPP_CLOSE_SQUARE; break;
1189
    case '{': result->type = CPP_OPEN_BRACE; break;
1190
    case '}': result->type = CPP_CLOSE_BRACE; break;
1191
    case ';': result->type = CPP_SEMICOLON; break;
1192
 
1193
      /* @ is a punctuator in Objective-C.  */
1194
    case '@': result->type = CPP_ATSIGN; break;
1195
 
1196
    case '$':
1197
    case '\\':
1198
      {
1199
        const uchar *base = --buffer->cur;
1200
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1201
 
1202
        if (forms_identifier_p (pfile, true, &nst))
1203
          {
1204
            result->type = CPP_NAME;
1205
            result->val.node = lex_identifier (pfile, base, true, &nst);
1206
            warn_about_normalization (pfile, result, &nst);
1207
            break;
1208
          }
1209
        buffer->cur++;
1210
      }
1211
 
1212
    default:
1213
      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1214
      break;
1215
    }
1216
 
1217
  return result;
1218
}
1219
 
1220
/* An upper bound on the number of bytes needed to spell TOKEN.
1221
   Does not include preceding whitespace.  */
1222
unsigned int
1223
cpp_token_len (const cpp_token *token)
1224
{
1225
  unsigned int len;
1226
 
1227
  switch (TOKEN_SPELL (token))
1228
    {
1229
    default:            len = 4;                                break;
1230
    case SPELL_LITERAL: len = token->val.str.len;               break;
1231
    case SPELL_IDENT:   len = NODE_LEN (token->val.node) * 10;  break;
1232
    }
1233
 
1234
  return len;
1235
}
1236
 
1237
/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1238
   Return the number of bytes read out of NAME.  (There are always
1239
   10 bytes written to BUFFER.)  */
1240
 
1241
static size_t
1242
utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1243
{
1244
  int j;
1245
  int ucn_len = 0;
1246
  int ucn_len_c;
1247
  unsigned t;
1248
  unsigned long utf32;
1249
 
1250
  /* Compute the length of the UTF-8 sequence.  */
1251
  for (t = *name; t & 0x80; t <<= 1)
1252
    ucn_len++;
1253
 
1254
  utf32 = *name & (0x7F >> ucn_len);
1255
  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1256
    {
1257
      utf32 = (utf32 << 6) | (*++name & 0x3F);
1258
 
1259
      /* Ill-formed UTF-8.  */
1260
      if ((*name & ~0x3F) != 0x80)
1261
        abort ();
1262
    }
1263
 
1264
  *buffer++ = '\\';
1265
  *buffer++ = 'U';
1266
  for (j = 7; j >= 0; j--)
1267
    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1268
  return ucn_len;
1269
}
1270
 
1271
 
1272
/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1273
   already contain the enough space to hold the token's spelling.
1274
   Returns a pointer to the character after the last character written.
1275
   FORSTRING is true if this is to be the spelling after translation
1276
   phase 1 (this is different for UCNs).
1277
   FIXME: Would be nice if we didn't need the PFILE argument.  */
1278
unsigned char *
1279
cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1280
                 unsigned char *buffer, bool forstring)
1281
{
1282
  switch (TOKEN_SPELL (token))
1283
    {
1284
    case SPELL_OPERATOR:
1285
      {
1286
        const unsigned char *spelling;
1287
        unsigned char c;
1288
 
1289
        if (token->flags & DIGRAPH)
1290
          spelling
1291
            = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1292
        else if (token->flags & NAMED_OP)
1293
          goto spell_ident;
1294
        else
1295
          spelling = TOKEN_NAME (token);
1296
 
1297
        while ((c = *spelling++) != '\0')
1298
          *buffer++ = c;
1299
      }
1300
      break;
1301
 
1302
    spell_ident:
1303
    case SPELL_IDENT:
1304
      if (forstring)
1305
        {
1306
          memcpy (buffer, NODE_NAME (token->val.node),
1307
                  NODE_LEN (token->val.node));
1308
          buffer += NODE_LEN (token->val.node);
1309
        }
1310
      else
1311
        {
1312
          size_t i;
1313
          const unsigned char * name = NODE_NAME (token->val.node);
1314
 
1315
          for (i = 0; i < NODE_LEN (token->val.node); i++)
1316
            if (name[i] & ~0x7F)
1317
              {
1318
                i += utf8_to_ucn (buffer, name + i) - 1;
1319
                buffer += 10;
1320
              }
1321
            else
1322
              *buffer++ = NODE_NAME (token->val.node)[i];
1323
        }
1324
      break;
1325
 
1326
    case SPELL_LITERAL:
1327
      memcpy (buffer, token->val.str.text, token->val.str.len);
1328
      buffer += token->val.str.len;
1329
      break;
1330
 
1331
    case SPELL_NONE:
1332
      cpp_error (pfile, CPP_DL_ICE,
1333
                 "unspellable token %s", TOKEN_NAME (token));
1334
      break;
1335
    }
1336
 
1337
  return buffer;
1338
}
1339
 
1340
/* Returns TOKEN spelt as a null-terminated string.  The string is
1341
   freed when the reader is destroyed.  Useful for diagnostics.  */
1342
unsigned char *
1343
cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1344
{
1345
  unsigned int len = cpp_token_len (token) + 1;
1346
  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1347
 
1348
  end = cpp_spell_token (pfile, token, start, false);
1349
  end[0] = '\0';
1350
 
1351
  return start;
1352
}
1353
 
1354
/* Used by C front ends, which really should move to using
1355
   cpp_token_as_text.  */
1356
const char *
1357
cpp_type2name (enum cpp_ttype type)
1358
{
1359
  return (const char *) token_spellings[type].name;
1360
}
1361
 
1362
/* Writes the spelling of token to FP, without any preceding space.
1363
   Separated from cpp_spell_token for efficiency - to avoid stdio
1364
   double-buffering.  */
1365
void
1366
cpp_output_token (const cpp_token *token, FILE *fp)
1367
{
1368
  switch (TOKEN_SPELL (token))
1369
    {
1370
    case SPELL_OPERATOR:
1371
      {
1372
        const unsigned char *spelling;
1373
        int c;
1374
 
1375
        if (token->flags & DIGRAPH)
1376
          spelling
1377
            = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1378
        else if (token->flags & NAMED_OP)
1379
          goto spell_ident;
1380
        else
1381
          spelling = TOKEN_NAME (token);
1382
 
1383
        c = *spelling;
1384
        do
1385
          putc (c, fp);
1386
        while ((c = *++spelling) != '\0');
1387
      }
1388
      break;
1389
 
1390
    spell_ident:
1391
    case SPELL_IDENT:
1392
      {
1393
        size_t i;
1394
        const unsigned char * name = NODE_NAME (token->val.node);
1395
 
1396
        for (i = 0; i < NODE_LEN (token->val.node); i++)
1397
          if (name[i] & ~0x7F)
1398
            {
1399
              unsigned char buffer[10];
1400
              i += utf8_to_ucn (buffer, name + i) - 1;
1401
              fwrite (buffer, 1, 10, fp);
1402
            }
1403
          else
1404
            fputc (NODE_NAME (token->val.node)[i], fp);
1405
      }
1406
      break;
1407
 
1408
    case SPELL_LITERAL:
1409
      fwrite (token->val.str.text, 1, token->val.str.len, fp);
1410
      break;
1411
 
1412
    case SPELL_NONE:
1413
      /* An error, most probably.  */
1414
      break;
1415
    }
1416
}
1417
 
1418
/* Compare two tokens.  */
1419
int
1420
_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1421
{
1422
  if (a->type == b->type && a->flags == b->flags)
1423
    switch (TOKEN_SPELL (a))
1424
      {
1425
      default:                  /* Keep compiler happy.  */
1426
      case SPELL_OPERATOR:
1427
        return 1;
1428
      case SPELL_NONE:
1429
        return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1430
      case SPELL_IDENT:
1431
        return a->val.node == b->val.node;
1432
      case SPELL_LITERAL:
1433
        return (a->val.str.len == b->val.str.len
1434
                && !memcmp (a->val.str.text, b->val.str.text,
1435
                            a->val.str.len));
1436
      }
1437
 
1438
  return 0;
1439
}
1440
 
1441
/* Returns nonzero if a space should be inserted to avoid an
1442
   accidental token paste for output.  For simplicity, it is
1443
   conservative, and occasionally advises a space where one is not
1444
   needed, e.g. "." and ".2".  */
1445
int
1446
cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1447
                 const cpp_token *token2)
1448
{
1449
  enum cpp_ttype a = token1->type, b = token2->type;
1450
  cppchar_t c;
1451
 
1452
  if (token1->flags & NAMED_OP)
1453
    a = CPP_NAME;
1454
  if (token2->flags & NAMED_OP)
1455
    b = CPP_NAME;
1456
 
1457
  c = EOF;
1458
  if (token2->flags & DIGRAPH)
1459
    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1460
  else if (token_spellings[b].category == SPELL_OPERATOR)
1461
    c = token_spellings[b].name[0];
1462
 
1463
  /* Quickly get everything that can paste with an '='.  */
1464
  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1465
    return 1;
1466
 
1467
  switch (a)
1468
    {
1469
    case CPP_GREATER:   return c == '>';
1470
    case CPP_LESS:      return c == '<' || c == '%' || c == ':';
1471
    case CPP_PLUS:      return c == '+';
1472
    case CPP_MINUS:     return c == '-' || c == '>';
1473
    case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1474
    case CPP_MOD:       return c == ':' || c == '>';
1475
    case CPP_AND:       return c == '&';
1476
    case CPP_OR:        return c == '|';
1477
    case CPP_COLON:     return c == ':' || c == '>';
1478
    case CPP_DEREF:     return c == '*';
1479
    case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1480
    case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1481
    case CPP_NAME:      return ((b == CPP_NUMBER
1482
                                 && name_p (pfile, &token2->val.str))
1483
                                || b == CPP_NAME
1484
                                || b == CPP_CHAR || b == CPP_STRING); /* L */
1485
    case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1486
                                || c == '.' || c == '+' || c == '-');
1487
                                      /* UCNs */
1488
    case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1489
                                 && b == CPP_NAME)
1490
                                || (CPP_OPTION (pfile, objc)
1491
                                    && token1->val.str.text[0] == '@'
1492
                                    && (b == CPP_NAME || b == CPP_STRING)));
1493
    default:            break;
1494
    }
1495
 
1496
  return 0;
1497
}
1498
 
1499
/* Output all the remaining tokens on the current line, and a newline
1500
   character, to FP.  Leading whitespace is removed.  If there are
1501
   macros, special token padding is not performed.  */
1502
void
1503
cpp_output_line (cpp_reader *pfile, FILE *fp)
1504
{
1505
  const cpp_token *token;
1506
 
1507
  token = cpp_get_token (pfile);
1508
  while (token->type != CPP_EOF)
1509
    {
1510
      cpp_output_token (token, fp);
1511
      token = cpp_get_token (pfile);
1512
      if (token->flags & PREV_WHITE)
1513
        putc (' ', fp);
1514
    }
1515
 
1516
  putc ('\n', fp);
1517
}
1518
 
1519
/* Memory buffers.  Changing these three constants can have a dramatic
1520
   effect on performance.  The values here are reasonable defaults,
1521
   but might be tuned.  If you adjust them, be sure to test across a
1522
   range of uses of cpplib, including heavy nested function-like macro
1523
   expansion.  Also check the change in peak memory usage (NJAMD is a
1524
   good tool for this).  */
1525
#define MIN_BUFF_SIZE 8000
1526
#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1527
#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1528
        (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1529
 
1530
#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1531
  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1532
#endif
1533
 
1534
/* Create a new allocation buffer.  Place the control block at the end
1535
   of the buffer, so that buffer overflows will cause immediate chaos.  */
1536
static _cpp_buff *
1537
new_buff (size_t len)
1538
{
1539
  _cpp_buff *result;
1540
  unsigned char *base;
1541
 
1542
  if (len < MIN_BUFF_SIZE)
1543
    len = MIN_BUFF_SIZE;
1544
  len = CPP_ALIGN (len);
1545
 
1546
  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1547
  result = (_cpp_buff *) (base + len);
1548
  result->base = base;
1549
  result->cur = base;
1550
  result->limit = base + len;
1551
  result->next = NULL;
1552
  return result;
1553
}
1554
 
1555
/* Place a chain of unwanted allocation buffers on the free list.  */
1556
void
1557
_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1558
{
1559
  _cpp_buff *end = buff;
1560
 
1561
  while (end->next)
1562
    end = end->next;
1563
  end->next = pfile->free_buffs;
1564
  pfile->free_buffs = buff;
1565
}
1566
 
1567
/* Return a free buffer of size at least MIN_SIZE.  */
1568
_cpp_buff *
1569
_cpp_get_buff (cpp_reader *pfile, size_t min_size)
1570
{
1571
  _cpp_buff *result, **p;
1572
 
1573
  for (p = &pfile->free_buffs;; p = &(*p)->next)
1574
    {
1575
      size_t size;
1576
 
1577
      if (*p == NULL)
1578
        return new_buff (min_size);
1579
      result = *p;
1580
      size = result->limit - result->base;
1581
      /* Return a buffer that's big enough, but don't waste one that's
1582
         way too big.  */
1583
      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1584
        break;
1585
    }
1586
 
1587
  *p = result->next;
1588
  result->next = NULL;
1589
  result->cur = result->base;
1590
  return result;
1591
}
1592
 
1593
/* Creates a new buffer with enough space to hold the uncommitted
1594
   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1595
   the excess bytes to the new buffer.  Chains the new buffer after
1596
   BUFF, and returns the new buffer.  */
1597
_cpp_buff *
1598
_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1599
{
1600
  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1601
  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1602
 
1603
  buff->next = new_buff;
1604
  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1605
  return new_buff;
1606
}
1607
 
1608
/* Creates a new buffer with enough space to hold the uncommitted
1609
   remaining bytes of the buffer pointed to by BUFF, and at least
1610
   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1611
   Chains the new buffer before the buffer pointed to by BUFF, and
1612
   updates the pointer to point to the new buffer.  */
1613
void
1614
_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1615
{
1616
  _cpp_buff *new_buff, *old_buff = *pbuff;
1617
  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1618
 
1619
  new_buff = _cpp_get_buff (pfile, size);
1620
  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1621
  new_buff->next = old_buff;
1622
  *pbuff = new_buff;
1623
}
1624
 
1625
/* Free a chain of buffers starting at BUFF.  */
1626
void
1627
_cpp_free_buff (_cpp_buff *buff)
1628
{
1629
  _cpp_buff *next;
1630
 
1631
  for (; buff; buff = next)
1632
    {
1633
      next = buff->next;
1634
      free (buff->base);
1635
    }
1636
}
1637
 
1638
/* Allocate permanent, unaligned storage of length LEN.  */
1639
unsigned char *
1640
_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1641
{
1642
  _cpp_buff *buff = pfile->u_buff;
1643
  unsigned char *result = buff->cur;
1644
 
1645
  if (len > (size_t) (buff->limit - result))
1646
    {
1647
      buff = _cpp_get_buff (pfile, len);
1648
      buff->next = pfile->u_buff;
1649
      pfile->u_buff = buff;
1650
      result = buff->cur;
1651
    }
1652
 
1653
  buff->cur = result + len;
1654
  return result;
1655
}
1656
 
1657
/* Allocate permanent, unaligned storage of length LEN from a_buff.
1658
   That buffer is used for growing allocations when saving macro
1659
   replacement lists in a #define, and when parsing an answer to an
1660
   assertion in #assert, #unassert or #if (and therefore possibly
1661
   whilst expanding macros).  It therefore must not be used by any
1662
   code that they might call: specifically the lexer and the guts of
1663
   the macro expander.
1664
 
1665
   All existing other uses clearly fit this restriction: storing
1666
   registered pragmas during initialization.  */
1667
unsigned char *
1668
_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1669
{
1670
  _cpp_buff *buff = pfile->a_buff;
1671
  unsigned char *result = buff->cur;
1672
 
1673
  if (len > (size_t) (buff->limit - result))
1674
    {
1675
      buff = _cpp_get_buff (pfile, len);
1676
      buff->next = pfile->a_buff;
1677
      pfile->a_buff = buff;
1678
      result = buff->cur;
1679
    }
1680
 
1681
  buff->cur = result + len;
1682
  return result;
1683
}
1684
 
1685
/* Say which field of TOK is in use.  */
1686
 
1687
enum cpp_token_fld_kind
1688
cpp_token_val_index (cpp_token *tok)
1689
{
1690
  switch (TOKEN_SPELL (tok))
1691
    {
1692
    case SPELL_IDENT:
1693
      return CPP_TOKEN_FLD_NODE;
1694
    case SPELL_LITERAL:
1695
      return CPP_TOKEN_FLD_STR;
1696
    case SPELL_NONE:
1697
      if (tok->type == CPP_MACRO_ARG)
1698
        return CPP_TOKEN_FLD_ARG_NO;
1699
      else if (tok->type == CPP_PADDING)
1700
        return CPP_TOKEN_FLD_SOURCE;
1701
      else if (tok->type == CPP_PRAGMA)
1702
        return CPP_TOKEN_FLD_PRAGMA;
1703
      /* else fall through */
1704
    default:
1705
      return CPP_TOKEN_FLD_NONE;
1706
    }
1707
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.