OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [libcpp/] [lex.c] - Blame information for rev 13

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 13 jlechner
/* CPP Library - lexical analysis.
2
   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
   Contributed by Per Bothner, 1994-95.
4
   Based on CCCP program by Paul Rubin, June 1986
5
   Adapted to ANSI C, Richard Stallman, Jan 1987
6
   Broken out to separate file, Zack Weinberg, Mar 2000
7
 
8
This program is free software; you can redistribute it and/or modify it
9
under the terms of the GNU General Public License as published by the
10
Free Software Foundation; either version 2, or (at your option) any
11
later version.
12
 
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
GNU General Public License for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with this program; if not, write to the Free Software
20
Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
21
 
22
#include "config.h"
23
#include "system.h"
24
#include "cpplib.h"
25
#include "internal.h"
26
 
27
enum spell_type
28
{
29
  SPELL_OPERATOR = 0,
30
  SPELL_IDENT,
31
  SPELL_LITERAL,
32
  SPELL_NONE
33
};
34
 
35
struct token_spelling
36
{
37
  enum spell_type category;
38
  const unsigned char *name;
39
};
40
 
41
static const unsigned char *const digraph_spellings[] =
42
{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43
 
44
#define OP(e, s) { SPELL_OPERATOR, U s  },
45
#define TK(e, s) { SPELL_ ## s,    U #e },
46
static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47
#undef OP
48
#undef TK
49
 
50
#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51
#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
 
53
static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54
static int skip_line_comment (cpp_reader *);
55
static void skip_whitespace (cpp_reader *, cppchar_t);
56
static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57
static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58
static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59
                            unsigned int, enum cpp_ttype);
60
static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61
static int name_p (cpp_reader *, const cpp_string *);
62
static tokenrun *next_tokenrun (tokenrun *);
63
 
64
static _cpp_buff *new_buff (size_t);
65
 
66
 
67
/* Utility routine:
68
 
69
   Compares, the token TOKEN to the NUL-terminated string STRING.
70
   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
71
int
72
cpp_ideq (const cpp_token *token, const char *string)
73
{
74
  if (token->type != CPP_NAME)
75
    return 0;
76
 
77
  return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78
}
79
 
80
/* Record a note TYPE at byte POS into the current cleaned logical
81
   line.  */
82
static void
83
add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84
{
85
  if (buffer->notes_used == buffer->notes_cap)
86
    {
87
      buffer->notes_cap = buffer->notes_cap * 2 + 200;
88
      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89
                                  buffer->notes_cap);
90
    }
91
 
92
  buffer->notes[buffer->notes_used].pos = pos;
93
  buffer->notes[buffer->notes_used].type = type;
94
  buffer->notes_used++;
95
}
96
 
97
/* Returns with a logical line that contains no escaped newlines or
98
   trigraphs.  This is a time-critical inner loop.  */
99
void
100
_cpp_clean_line (cpp_reader *pfile)
101
{
102
  cpp_buffer *buffer;
103
  const uchar *s;
104
  uchar c, *d, *p;
105
 
106
  buffer = pfile->buffer;
107
  buffer->cur_note = buffer->notes_used = 0;
108
  buffer->cur = buffer->line_base = buffer->next_line;
109
  buffer->need_line = false;
110
  s = buffer->next_line - 1;
111
 
112
  if (!buffer->from_stage3)
113
    {
114
      /* Short circuit for the common case of an un-escaped line with
115
         no trigraphs.  The primary win here is by not writing any
116
         data back to memory until we have to.  */
117
      for (;;)
118
        {
119
          c = *++s;
120
          if (c == '\n' || c == '\r')
121
            {
122
              d = (uchar *) s;
123
 
124
              if (s == buffer->rlimit)
125
                goto done;
126
 
127
              /* DOS line ending? */
128
              if (c == '\r' && s[1] == '\n')
129
                s++;
130
 
131
              if (s == buffer->rlimit)
132
                goto done;
133
 
134
              /* check for escaped newline */
135
              p = d;
136
              while (p != buffer->next_line && is_nvspace (p[-1]))
137
                p--;
138
              if (p == buffer->next_line || p[-1] != '\\')
139
                goto done;
140
 
141
              /* Have an escaped newline; process it and proceed to
142
                 the slow path.  */
143
              add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
144
              d = p - 2;
145
              buffer->next_line = p - 1;
146
              break;
147
            }
148
          if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
149
            {
150
              /* Have a trigraph.  We may or may not have to convert
151
                 it.  Add a line note regardless, for -Wtrigraphs.  */
152
              add_line_note (buffer, s, s[2]);
153
              if (CPP_OPTION (pfile, trigraphs))
154
                {
155
                  /* We do, and that means we have to switch to the
156
                     slow path.  */
157
                  d = (uchar *) s;
158
                  *d = _cpp_trigraph_map[s[2]];
159
                  s += 2;
160
                  break;
161
                }
162
            }
163
        }
164
 
165
 
166
      for (;;)
167
        {
168
          c = *++s;
169
          *++d = c;
170
 
171
          if (c == '\n' || c == '\r')
172
            {
173
                  /* Handle DOS line endings.  */
174
              if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
175
                s++;
176
              if (s == buffer->rlimit)
177
                break;
178
 
179
              /* Escaped?  */
180
              p = d;
181
              while (p != buffer->next_line && is_nvspace (p[-1]))
182
                p--;
183
              if (p == buffer->next_line || p[-1] != '\\')
184
                break;
185
 
186
              add_line_note (buffer, p - 1, p != d ? ' ': '\\');
187
              d = p - 2;
188
              buffer->next_line = p - 1;
189
            }
190
          else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
191
            {
192
              /* Add a note regardless, for the benefit of -Wtrigraphs.  */
193
              add_line_note (buffer, d, s[2]);
194
              if (CPP_OPTION (pfile, trigraphs))
195
                {
196
                  *d = _cpp_trigraph_map[s[2]];
197
                  s += 2;
198
                }
199
            }
200
        }
201
    }
202
  else
203
    {
204
      do
205
        s++;
206
      while (*s != '\n' && *s != '\r');
207
      d = (uchar *) s;
208
 
209
      /* Handle DOS line endings.  */
210
      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
211
        s++;
212
    }
213
 
214
 done:
215
  *d = '\n';
216
  /* A sentinel note that should never be processed.  */
217
  add_line_note (buffer, d + 1, '\n');
218
  buffer->next_line = s + 1;
219
}
220
 
221
/* Return true if the trigraph indicated by NOTE should be warned
222
   about in a comment.  */
223
static bool
224
warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
225
{
226
  const uchar *p;
227
 
228
  /* Within comments we don't warn about trigraphs, unless the
229
     trigraph forms an escaped newline, as that may change
230
     behavior.  */
231
  if (note->type != '/')
232
    return false;
233
 
234
  /* If -trigraphs, then this was an escaped newline iff the next note
235
     is coincident.  */
236
  if (CPP_OPTION (pfile, trigraphs))
237
    return note[1].pos == note->pos;
238
 
239
  /* Otherwise, see if this forms an escaped newline.  */
240
  p = note->pos + 3;
241
  while (is_nvspace (*p))
242
    p++;
243
 
244
  /* There might have been escaped newlines between the trigraph and the
245
     newline we found.  Hence the position test.  */
246
  return (*p == '\n' && p < note[1].pos);
247
}
248
 
249
/* Process the notes created by add_line_note as far as the current
250
   location.  */
251
void
252
_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
253
{
254
  cpp_buffer *buffer = pfile->buffer;
255
 
256
  for (;;)
257
    {
258
      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
259
      unsigned int col;
260
 
261
      if (note->pos > buffer->cur)
262
        break;
263
 
264
      buffer->cur_note++;
265
      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
266
 
267
      if (note->type == '\\' || note->type == ' ')
268
        {
269
          if (note->type == ' ' && !in_comment)
270
            cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
271
                                 "backslash and newline separated by space");
272
 
273
          if (buffer->next_line > buffer->rlimit)
274
            {
275
              cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
276
                                   "backslash-newline at end of file");
277
              /* Prevent "no newline at end of file" warning.  */
278
              buffer->next_line = buffer->rlimit;
279
            }
280
 
281
          buffer->line_base = note->pos;
282
          CPP_INCREMENT_LINE (pfile, 0);
283
        }
284
      else if (_cpp_trigraph_map[note->type])
285
        {
286
          if (CPP_OPTION (pfile, warn_trigraphs)
287
              && (!in_comment || warn_in_comment (pfile, note)))
288
            {
289
              if (CPP_OPTION (pfile, trigraphs))
290
                cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
291
                                     "trigraph ??%c converted to %c",
292
                                     note->type,
293
                                     (int) _cpp_trigraph_map[note->type]);
294
              else
295
                {
296
                  cpp_error_with_line
297
                    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
298
                     "trigraph ??%c ignored, use -trigraphs to enable",
299
                     note->type);
300
                }
301
            }
302
        }
303
      else
304
        abort ();
305
    }
306
}
307
 
308
/* Skip a C-style block comment.  We find the end of the comment by
309
   seeing if an asterisk is before every '/' we encounter.  Returns
310
   nonzero if comment terminated by EOF, zero otherwise.
311
 
312
   Buffer->cur points to the initial asterisk of the comment.  */
313
bool
314
_cpp_skip_block_comment (cpp_reader *pfile)
315
{
316
  cpp_buffer *buffer = pfile->buffer;
317
  const uchar *cur = buffer->cur;
318
  uchar c;
319
 
320
  cur++;
321
  if (*cur == '/')
322
    cur++;
323
 
324
  for (;;)
325
    {
326
      /* People like decorating comments with '*', so check for '/'
327
         instead for efficiency.  */
328
      c = *cur++;
329
 
330
      if (c == '/')
331
        {
332
          if (cur[-2] == '*')
333
            break;
334
 
335
          /* Warn about potential nested comments, but not if the '/'
336
             comes immediately before the true comment delimiter.
337
             Don't bother to get it right across escaped newlines.  */
338
          if (CPP_OPTION (pfile, warn_comments)
339
              && cur[0] == '*' && cur[1] != '/')
340
            {
341
              buffer->cur = cur;
342
              cpp_error_with_line (pfile, CPP_DL_WARNING,
343
                                   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
344
                                   "\"/*\" within comment");
345
            }
346
        }
347
      else if (c == '\n')
348
        {
349
          unsigned int cols;
350
          buffer->cur = cur - 1;
351
          _cpp_process_line_notes (pfile, true);
352
          if (buffer->next_line >= buffer->rlimit)
353
            return true;
354
          _cpp_clean_line (pfile);
355
 
356
          cols = buffer->next_line - buffer->line_base;
357
          CPP_INCREMENT_LINE (pfile, cols);
358
 
359
          cur = buffer->cur;
360
        }
361
    }
362
 
363
  buffer->cur = cur;
364
  _cpp_process_line_notes (pfile, true);
365
  return false;
366
}
367
 
368
/* Skip a C++ line comment, leaving buffer->cur pointing to the
369
   terminating newline.  Handles escaped newlines.  Returns nonzero
370
   if a multiline comment.  */
371
static int
372
skip_line_comment (cpp_reader *pfile)
373
{
374
  cpp_buffer *buffer = pfile->buffer;
375
  unsigned int orig_line = pfile->line_table->highest_line;
376
 
377
  while (*buffer->cur != '\n')
378
    buffer->cur++;
379
 
380
  _cpp_process_line_notes (pfile, true);
381
  return orig_line != pfile->line_table->highest_line;
382
}
383
 
384
/* Skips whitespace, saving the next non-whitespace character.  */
385
static void
386
skip_whitespace (cpp_reader *pfile, cppchar_t c)
387
{
388
  cpp_buffer *buffer = pfile->buffer;
389
  bool saw_NUL = false;
390
 
391
  do
392
    {
393
      /* Horizontal space always OK.  */
394
      if (c == ' ' || c == '\t')
395
        ;
396
      /* Just \f \v or \0 left.  */
397
      else if (c == '\0')
398
        saw_NUL = true;
399
      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400
        cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
401
                             CPP_BUF_COL (buffer),
402
                             "%s in preprocessing directive",
403
                             c == '\f' ? "form feed" : "vertical tab");
404
 
405
      c = *buffer->cur++;
406
    }
407
  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
408
  while (is_nvspace (c));
409
 
410
  if (saw_NUL)
411
    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
412
 
413
  buffer->cur--;
414
}
415
 
416
/* See if the characters of a number token are valid in a name (no
417
   '.', '+' or '-').  */
418
static int
419
name_p (cpp_reader *pfile, const cpp_string *string)
420
{
421
  unsigned int i;
422
 
423
  for (i = 0; i < string->len; i++)
424
    if (!is_idchar (string->text[i]))
425
      return 0;
426
 
427
  return 1;
428
}
429
 
430
/* After parsing an identifier or other sequence, produce a warning about
431
   sequences not in NFC/NFKC.  */
432
static void
433
warn_about_normalization (cpp_reader *pfile,
434
                          const cpp_token *token,
435
                          const struct normalize_state *s)
436
{
437
  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
438
      && !pfile->state.skipping)
439
    {
440
      /* Make sure that the token is printed using UCNs, even
441
         if we'd otherwise happily print UTF-8.  */
442
      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
443
      size_t sz;
444
 
445
      sz = cpp_spell_token (pfile, token, buf, false) - buf;
446
      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
447
        cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
448
                             "`%.*s' is not in NFKC", (int) sz, buf);
449
      else
450
        cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
451
                             "`%.*s' is not in NFC", (int) sz, buf);
452
    }
453
}
454
 
455
/* Returns TRUE if the sequence starting at buffer->cur is invalid in
456
   an identifier.  FIRST is TRUE if this starts an identifier.  */
457
static bool
458
forms_identifier_p (cpp_reader *pfile, int first,
459
                    struct normalize_state *state)
460
{
461
  cpp_buffer *buffer = pfile->buffer;
462
 
463
  if (*buffer->cur == '$')
464
    {
465
      if (!CPP_OPTION (pfile, dollars_in_ident))
466
        return false;
467
 
468
      buffer->cur++;
469
      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
470
        {
471
          CPP_OPTION (pfile, warn_dollars) = 0;
472
          cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
473
        }
474
 
475
      return true;
476
    }
477
 
478
  /* Is this a syntactically valid UCN?  */
479
  if (CPP_OPTION (pfile, extended_identifiers)
480
      && *buffer->cur == '\\'
481
      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
482
    {
483
      buffer->cur += 2;
484
      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
485
                          state))
486
        return true;
487
      buffer->cur -= 2;
488
    }
489
 
490
  return false;
491
}
492
 
493
/* Lex an identifier starting at BUFFER->CUR - 1.  */
494
static cpp_hashnode *
495
lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
496
                struct normalize_state *nst)
497
{
498
  cpp_hashnode *result;
499
  const uchar *cur;
500
  unsigned int len;
501
  unsigned int hash = HT_HASHSTEP (0, *base);
502
 
503
  cur = pfile->buffer->cur;
504
  if (! starts_ucn)
505
    while (ISIDNUM (*cur))
506
      {
507
        hash = HT_HASHSTEP (hash, *cur);
508
        cur++;
509
      }
510
  pfile->buffer->cur = cur;
511
  if (starts_ucn || forms_identifier_p (pfile, false, nst))
512
    {
513
      /* Slower version for identifiers containing UCNs (or $).  */
514
      do {
515
        while (ISIDNUM (*pfile->buffer->cur))
516
          {
517
            pfile->buffer->cur++;
518
            NORMALIZE_STATE_UPDATE_IDNUM (nst);
519
          }
520
      } while (forms_identifier_p (pfile, false, nst));
521
      result = _cpp_interpret_identifier (pfile, base,
522
                                          pfile->buffer->cur - base);
523
    }
524
  else
525
    {
526
      len = cur - base;
527
      hash = HT_HASHFINISH (hash, len);
528
 
529
      result = (cpp_hashnode *)
530
        ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
531
    }
532
 
533
  /* Rarely, identifiers require diagnostics when lexed.  */
534
  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
535
                        && !pfile->state.skipping, 0))
536
    {
537
      /* It is allowed to poison the same identifier twice.  */
538
      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
539
        cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
540
                   NODE_NAME (result));
541
 
542
      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
543
         replacement list of a variadic macro.  */
544
      if (result == pfile->spec_nodes.n__VA_ARGS__
545
          && !pfile->state.va_args_ok)
546
        cpp_error (pfile, CPP_DL_PEDWARN,
547
                   "__VA_ARGS__ can only appear in the expansion"
548
                   " of a C99 variadic macro");
549
    }
550
 
551
  return result;
552
}
553
 
554
/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
555
static void
556
lex_number (cpp_reader *pfile, cpp_string *number,
557
            struct normalize_state *nst)
558
{
559
  const uchar *cur;
560
  const uchar *base;
561
  uchar *dest;
562
 
563
  base = pfile->buffer->cur - 1;
564
  do
565
    {
566
      cur = pfile->buffer->cur;
567
 
568
      /* N.B. ISIDNUM does not include $.  */
569
      while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
570
        {
571
          cur++;
572
          NORMALIZE_STATE_UPDATE_IDNUM (nst);
573
        }
574
 
575
      pfile->buffer->cur = cur;
576
    }
577
  while (forms_identifier_p (pfile, false, nst));
578
 
579
  number->len = cur - base;
580
  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
581
  memcpy (dest, base, number->len);
582
  dest[number->len] = '\0';
583
  number->text = dest;
584
}
585
 
586
/* Create a token of type TYPE with a literal spelling.  */
587
static void
588
create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
589
                unsigned int len, enum cpp_ttype type)
590
{
591
  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
592
 
593
  memcpy (dest, base, len);
594
  dest[len] = '\0';
595
  token->type = type;
596
  token->val.str.len = len;
597
  token->val.str.text = dest;
598
}
599
 
600
/* Lexes a string, character constant, or angle-bracketed header file
601
   name.  The stored string contains the spelling, including opening
602
   quote and leading any leading 'L'.  It returns the type of the
603
   literal, or CPP_OTHER if it was not properly terminated.
604
 
605
   The spelling is NUL-terminated, but it is not guaranteed that this
606
   is the first NUL since embedded NULs are preserved.  */
607
static void
608
lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
609
{
610
  bool saw_NUL = false;
611
  const uchar *cur;
612
  cppchar_t terminator;
613
  enum cpp_ttype type;
614
 
615
  cur = base;
616
  terminator = *cur++;
617
  if (terminator == 'L')
618
    terminator = *cur++;
619
  if (terminator == '\"')
620
    type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
621
  else if (terminator == '\'')
622
    type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
623
  else
624
    terminator = '>', type = CPP_HEADER_NAME;
625
 
626
  for (;;)
627
    {
628
      cppchar_t c = *cur++;
629
 
630
      /* In #include-style directives, terminators are not escapable.  */
631
      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
632
        cur++;
633
      else if (c == terminator)
634
        break;
635
      else if (c == '\n')
636
        {
637
          cur--;
638
          type = CPP_OTHER;
639
          break;
640
        }
641
      else if (c == '\0')
642
        saw_NUL = true;
643
    }
644
 
645
  if (saw_NUL && !pfile->state.skipping)
646
    cpp_error (pfile, CPP_DL_WARNING,
647
               "null character(s) preserved in literal");
648
 
649
  pfile->buffer->cur = cur;
650
  create_literal (pfile, token, base, cur - base, type);
651
}
652
 
653
/* The stored comment includes the comment start and any terminator.  */
654
static void
655
save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
656
              cppchar_t type)
657
{
658
  unsigned char *buffer;
659
  unsigned int len, clen;
660
 
661
  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
662
 
663
  /* C++ comments probably (not definitely) have moved past a new
664
     line, which we don't want to save in the comment.  */
665
  if (is_vspace (pfile->buffer->cur[-1]))
666
    len--;
667
 
668
  /* If we are currently in a directive, then we need to store all
669
     C++ comments as C comments internally, and so we need to
670
     allocate a little extra space in that case.
671
 
672
     Note that the only time we encounter a directive here is
673
     when we are saving comments in a "#define".  */
674
  clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
675
 
676
  buffer = _cpp_unaligned_alloc (pfile, clen);
677
 
678
  token->type = CPP_COMMENT;
679
  token->val.str.len = clen;
680
  token->val.str.text = buffer;
681
 
682
  buffer[0] = '/';
683
  memcpy (buffer + 1, from, len - 1);
684
 
685
  /* Finish conversion to a C comment, if necessary.  */
686
  if (pfile->state.in_directive && type == '/')
687
    {
688
      buffer[1] = '*';
689
      buffer[clen - 2] = '*';
690
      buffer[clen - 1] = '/';
691
    }
692
}
693
 
694
/* Allocate COUNT tokens for RUN.  */
695
void
696
_cpp_init_tokenrun (tokenrun *run, unsigned int count)
697
{
698
  run->base = XNEWVEC (cpp_token, count);
699
  run->limit = run->base + count;
700
  run->next = NULL;
701
}
702
 
703
/* Returns the next tokenrun, or creates one if there is none.  */
704
static tokenrun *
705
next_tokenrun (tokenrun *run)
706
{
707
  if (run->next == NULL)
708
    {
709
      run->next = XNEW (tokenrun);
710
      run->next->prev = run;
711
      _cpp_init_tokenrun (run->next, 250);
712
    }
713
 
714
  return run->next;
715
}
716
 
717
/* Allocate a single token that is invalidated at the same time as the
718
   rest of the tokens on the line.  Has its line and col set to the
719
   same as the last lexed token, so that diagnostics appear in the
720
   right place.  */
721
cpp_token *
722
_cpp_temp_token (cpp_reader *pfile)
723
{
724
  cpp_token *old, *result;
725
 
726
  old = pfile->cur_token - 1;
727
  if (pfile->cur_token == pfile->cur_run->limit)
728
    {
729
      pfile->cur_run = next_tokenrun (pfile->cur_run);
730
      pfile->cur_token = pfile->cur_run->base;
731
    }
732
 
733
  result = pfile->cur_token++;
734
  result->src_loc = old->src_loc;
735
  return result;
736
}
737
 
738
/* Lex a token into RESULT (external interface).  Takes care of issues
739
   like directive handling, token lookahead, multiple include
740
   optimization and skipping.  */
741
const cpp_token *
742
_cpp_lex_token (cpp_reader *pfile)
743
{
744
  cpp_token *result;
745
 
746
  for (;;)
747
    {
748
      if (pfile->cur_token == pfile->cur_run->limit)
749
        {
750
          pfile->cur_run = next_tokenrun (pfile->cur_run);
751
          pfile->cur_token = pfile->cur_run->base;
752
        }
753
 
754
      if (pfile->lookaheads)
755
        {
756
          pfile->lookaheads--;
757
          result = pfile->cur_token++;
758
        }
759
      else
760
        result = _cpp_lex_direct (pfile);
761
 
762
      if (result->flags & BOL)
763
        {
764
          /* Is this a directive.  If _cpp_handle_directive returns
765
             false, it is an assembler #.  */
766
          if (result->type == CPP_HASH
767
              /* 6.10.3 p 11: Directives in a list of macro arguments
768
                 gives undefined behavior.  This implementation
769
                 handles the directive as normal.  */
770
              && pfile->state.parsing_args != 1
771
              && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
772
            {
773
              if (pfile->directive_result.type == CPP_PADDING)
774
                continue;
775
              else
776
                {
777
                  result = &pfile->directive_result;
778
                  break;
779
                }
780
            }
781
 
782
          if (pfile->cb.line_change && !pfile->state.skipping)
783
            pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
784
        }
785
 
786
      /* We don't skip tokens in directives.  */
787
      if (pfile->state.in_directive)
788
        break;
789
 
790
      /* Outside a directive, invalidate controlling macros.  At file
791
         EOF, _cpp_lex_direct takes care of popping the buffer, so we never
792
         get here and MI optimization works.  */
793
      pfile->mi_valid = false;
794
 
795
      if (!pfile->state.skipping || result->type == CPP_EOF)
796
        break;
797
    }
798
 
799
  return result;
800
}
801
 
802
/* Returns true if a fresh line has been loaded.  */
803
bool
804
_cpp_get_fresh_line (cpp_reader *pfile)
805
{
806
  int return_at_eof;
807
 
808
  /* We can't get a new line until we leave the current directive.  */
809
  if (pfile->state.in_directive)
810
    return false;
811
 
812
  for (;;)
813
    {
814
      cpp_buffer *buffer = pfile->buffer;
815
 
816
      if (!buffer->need_line)
817
        return true;
818
 
819
      if (buffer->next_line < buffer->rlimit)
820
        {
821
          _cpp_clean_line (pfile);
822
          return true;
823
        }
824
 
825
      /* First, get out of parsing arguments state.  */
826
      if (pfile->state.parsing_args)
827
        return false;
828
 
829
      /* End of buffer.  Non-empty files should end in a newline.  */
830
      if (buffer->buf != buffer->rlimit
831
          && buffer->next_line > buffer->rlimit
832
          && !buffer->from_stage3)
833
        {
834
          /* Only warn once.  */
835
          buffer->next_line = buffer->rlimit;
836
          cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
837
                               CPP_BUF_COLUMN (buffer, buffer->cur),
838
                               "no newline at end of file");
839
        }
840
 
841
      return_at_eof = buffer->return_at_eof;
842
      _cpp_pop_buffer (pfile);
843
      if (pfile->buffer == NULL || return_at_eof)
844
        return false;
845
    }
846
}
847
 
848
#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
849
  do                                                    \
850
    {                                                   \
851
      result->type = ELSE_TYPE;                         \
852
      if (*buffer->cur == CHAR)                         \
853
        buffer->cur++, result->type = THEN_TYPE;        \
854
    }                                                   \
855
  while (0)
856
 
857
/* Lex a token into pfile->cur_token, which is also incremented, to
858
   get diagnostics pointing to the correct location.
859
 
860
   Does not handle issues such as token lookahead, multiple-include
861
   optimization, directives, skipping etc.  This function is only
862
   suitable for use by _cpp_lex_token, and in special cases like
863
   lex_expansion_token which doesn't care for any of these issues.
864
 
865
   When meeting a newline, returns CPP_EOF if parsing a directive,
866
   otherwise returns to the start of the token buffer if permissible.
867
   Returns the location of the lexed token.  */
868
cpp_token *
869
_cpp_lex_direct (cpp_reader *pfile)
870
{
871
  cppchar_t c;
872
  cpp_buffer *buffer;
873
  const unsigned char *comment_start;
874
  cpp_token *result = pfile->cur_token++;
875
 
876
 fresh_line:
877
  result->flags = 0;
878
  buffer = pfile->buffer;
879
  if (buffer->need_line)
880
    {
881
      if (!_cpp_get_fresh_line (pfile))
882
        {
883
          result->type = CPP_EOF;
884
          if (!pfile->state.in_directive)
885
            {
886
              /* Tell the compiler the line number of the EOF token.  */
887
              result->src_loc = pfile->line_table->highest_line;
888
              result->flags = BOL;
889
            }
890
          return result;
891
        }
892
      if (!pfile->keep_tokens)
893
        {
894
          pfile->cur_run = &pfile->base_run;
895
          result = pfile->base_run.base;
896
          pfile->cur_token = result + 1;
897
        }
898
      result->flags = BOL;
899
      if (pfile->state.parsing_args == 2)
900
        result->flags |= PREV_WHITE;
901
    }
902
  buffer = pfile->buffer;
903
 update_tokens_line:
904
  result->src_loc = pfile->line_table->highest_line;
905
 
906
 skipped_white:
907
  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
908
      && !pfile->overlaid_buffer)
909
    {
910
      _cpp_process_line_notes (pfile, false);
911
      result->src_loc = pfile->line_table->highest_line;
912
    }
913
  c = *buffer->cur++;
914
 
915
  LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
916
                               CPP_BUF_COLUMN (buffer, buffer->cur));
917
 
918
  switch (c)
919
    {
920
    case ' ': case '\t': case '\f': case '\v': case '\0':
921
      result->flags |= PREV_WHITE;
922
      skip_whitespace (pfile, c);
923
      goto skipped_white;
924
 
925
    case '\n':
926
      if (buffer->cur < buffer->rlimit)
927
        CPP_INCREMENT_LINE (pfile, 0);
928
      buffer->need_line = true;
929
      goto fresh_line;
930
 
931
    case '0': case '1': case '2': case '3': case '4':
932
    case '5': case '6': case '7': case '8': case '9':
933
      {
934
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
935
        result->type = CPP_NUMBER;
936
        lex_number (pfile, &result->val.str, &nst);
937
        warn_about_normalization (pfile, result, &nst);
938
        break;
939
      }
940
 
941
    case 'L':
942
      /* 'L' may introduce wide characters or strings.  */
943
      if (*buffer->cur == '\'' || *buffer->cur == '"')
944
        {
945
          lex_string (pfile, result, buffer->cur - 1);
946
          break;
947
        }
948
      /* Fall through.  */
949
 
950
    case '_':
951
    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
952
    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
953
    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
954
    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
955
    case 'y': case 'z':
956
    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
957
    case 'G': case 'H': case 'I': case 'J': case 'K':
958
    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
959
    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
960
    case 'Y': case 'Z':
961
      result->type = CPP_NAME;
962
      {
963
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
964
        result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
965
                                           &nst);
966
        warn_about_normalization (pfile, result, &nst);
967
      }
968
 
969
      /* Convert named operators to their proper types.  */
970
      if (result->val.node->flags & NODE_OPERATOR)
971
        {
972
          result->flags |= NAMED_OP;
973
          result->type = (enum cpp_ttype) result->val.node->directive_index;
974
        }
975
      break;
976
 
977
    case '\'':
978
    case '"':
979
      lex_string (pfile, result, buffer->cur - 1);
980
      break;
981
 
982
    case '/':
983
      /* A potential block or line comment.  */
984
      comment_start = buffer->cur;
985
      c = *buffer->cur;
986
 
987
      if (c == '*')
988
        {
989
          if (_cpp_skip_block_comment (pfile))
990
            cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
991
        }
992
      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
993
                            || cpp_in_system_header (pfile)))
994
        {
995
          /* Warn about comments only if pedantically GNUC89, and not
996
             in system headers.  */
997
          if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
998
              && ! buffer->warned_cplusplus_comments)
999
            {
1000
              cpp_error (pfile, CPP_DL_PEDWARN,
1001
                         "C++ style comments are not allowed in ISO C90");
1002
              cpp_error (pfile, CPP_DL_PEDWARN,
1003
                         "(this will be reported only once per input file)");
1004
              buffer->warned_cplusplus_comments = 1;
1005
            }
1006
 
1007
          if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1008
            cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1009
        }
1010
      else if (c == '=')
1011
        {
1012
          buffer->cur++;
1013
          result->type = CPP_DIV_EQ;
1014
          break;
1015
        }
1016
      else
1017
        {
1018
          result->type = CPP_DIV;
1019
          break;
1020
        }
1021
 
1022
      if (!pfile->state.save_comments)
1023
        {
1024
          result->flags |= PREV_WHITE;
1025
          goto update_tokens_line;
1026
        }
1027
 
1028
      /* Save the comment as a token in its own right.  */
1029
      save_comment (pfile, result, comment_start, c);
1030
      break;
1031
 
1032
    case '<':
1033
      if (pfile->state.angled_headers)
1034
        {
1035
          lex_string (pfile, result, buffer->cur - 1);
1036
          break;
1037
        }
1038
 
1039
      result->type = CPP_LESS;
1040
      if (*buffer->cur == '=')
1041
        buffer->cur++, result->type = CPP_LESS_EQ;
1042
      else if (*buffer->cur == '<')
1043
        {
1044
          buffer->cur++;
1045
          IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1046
        }
1047
      else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1048
        {
1049
          buffer->cur++;
1050
          IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1051
        }
1052
      else if (CPP_OPTION (pfile, digraphs))
1053
        {
1054
          if (*buffer->cur == ':')
1055
            {
1056
              buffer->cur++;
1057
              result->flags |= DIGRAPH;
1058
              result->type = CPP_OPEN_SQUARE;
1059
            }
1060
          else if (*buffer->cur == '%')
1061
            {
1062
              buffer->cur++;
1063
              result->flags |= DIGRAPH;
1064
              result->type = CPP_OPEN_BRACE;
1065
            }
1066
        }
1067
      break;
1068
 
1069
    case '>':
1070
      result->type = CPP_GREATER;
1071
      if (*buffer->cur == '=')
1072
        buffer->cur++, result->type = CPP_GREATER_EQ;
1073
      else if (*buffer->cur == '>')
1074
        {
1075
          buffer->cur++;
1076
          IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1077
        }
1078
      else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1079
        {
1080
          buffer->cur++;
1081
          IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1082
        }
1083
      break;
1084
 
1085
    case '%':
1086
      result->type = CPP_MOD;
1087
      if (*buffer->cur == '=')
1088
        buffer->cur++, result->type = CPP_MOD_EQ;
1089
      else if (CPP_OPTION (pfile, digraphs))
1090
        {
1091
          if (*buffer->cur == ':')
1092
            {
1093
              buffer->cur++;
1094
              result->flags |= DIGRAPH;
1095
              result->type = CPP_HASH;
1096
              if (*buffer->cur == '%' && buffer->cur[1] == ':')
1097
                buffer->cur += 2, result->type = CPP_PASTE;
1098
            }
1099
          else if (*buffer->cur == '>')
1100
            {
1101
              buffer->cur++;
1102
              result->flags |= DIGRAPH;
1103
              result->type = CPP_CLOSE_BRACE;
1104
            }
1105
        }
1106
      break;
1107
 
1108
    case '.':
1109
      result->type = CPP_DOT;
1110
      if (ISDIGIT (*buffer->cur))
1111
        {
1112
          struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1113
          result->type = CPP_NUMBER;
1114
          lex_number (pfile, &result->val.str, &nst);
1115
          warn_about_normalization (pfile, result, &nst);
1116
        }
1117
      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1118
        buffer->cur += 2, result->type = CPP_ELLIPSIS;
1119
      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1120
        buffer->cur++, result->type = CPP_DOT_STAR;
1121
      break;
1122
 
1123
    case '+':
1124
      result->type = CPP_PLUS;
1125
      if (*buffer->cur == '+')
1126
        buffer->cur++, result->type = CPP_PLUS_PLUS;
1127
      else if (*buffer->cur == '=')
1128
        buffer->cur++, result->type = CPP_PLUS_EQ;
1129
      break;
1130
 
1131
    case '-':
1132
      result->type = CPP_MINUS;
1133
      if (*buffer->cur == '>')
1134
        {
1135
          buffer->cur++;
1136
          result->type = CPP_DEREF;
1137
          if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1138
            buffer->cur++, result->type = CPP_DEREF_STAR;
1139
        }
1140
      else if (*buffer->cur == '-')
1141
        buffer->cur++, result->type = CPP_MINUS_MINUS;
1142
      else if (*buffer->cur == '=')
1143
        buffer->cur++, result->type = CPP_MINUS_EQ;
1144
      break;
1145
 
1146
    case '&':
1147
      result->type = CPP_AND;
1148
      if (*buffer->cur == '&')
1149
        buffer->cur++, result->type = CPP_AND_AND;
1150
      else if (*buffer->cur == '=')
1151
        buffer->cur++, result->type = CPP_AND_EQ;
1152
      break;
1153
 
1154
    case '|':
1155
      result->type = CPP_OR;
1156
      if (*buffer->cur == '|')
1157
        buffer->cur++, result->type = CPP_OR_OR;
1158
      else if (*buffer->cur == '=')
1159
        buffer->cur++, result->type = CPP_OR_EQ;
1160
      break;
1161
 
1162
    case ':':
1163
      result->type = CPP_COLON;
1164
      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1165
        buffer->cur++, result->type = CPP_SCOPE;
1166
      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1167
        {
1168
          buffer->cur++;
1169
          result->flags |= DIGRAPH;
1170
          result->type = CPP_CLOSE_SQUARE;
1171
        }
1172
      break;
1173
 
1174
    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1175
    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1176
    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1177
    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1178
    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1179
 
1180
    case '?': result->type = CPP_QUERY; break;
1181
    case '~': result->type = CPP_COMPL; break;
1182
    case ',': result->type = CPP_COMMA; break;
1183
    case '(': result->type = CPP_OPEN_PAREN; break;
1184
    case ')': result->type = CPP_CLOSE_PAREN; break;
1185
    case '[': result->type = CPP_OPEN_SQUARE; break;
1186
    case ']': result->type = CPP_CLOSE_SQUARE; break;
1187
    case '{': result->type = CPP_OPEN_BRACE; break;
1188
    case '}': result->type = CPP_CLOSE_BRACE; break;
1189
    case ';': result->type = CPP_SEMICOLON; break;
1190
 
1191
      /* @ is a punctuator in Objective-C.  */
1192
    case '@': result->type = CPP_ATSIGN; break;
1193
 
1194
    case '$':
1195
    case '\\':
1196
      {
1197
        const uchar *base = --buffer->cur;
1198
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1199
 
1200
        if (forms_identifier_p (pfile, true, &nst))
1201
          {
1202
            result->type = CPP_NAME;
1203
            result->val.node = lex_identifier (pfile, base, true, &nst);
1204
            warn_about_normalization (pfile, result, &nst);
1205
            break;
1206
          }
1207
        buffer->cur++;
1208
      }
1209
 
1210
    default:
1211
      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1212
      break;
1213
    }
1214
 
1215
  return result;
1216
}
1217
 
1218
/* An upper bound on the number of bytes needed to spell TOKEN.
1219
   Does not include preceding whitespace.  */
1220
unsigned int
1221
cpp_token_len (const cpp_token *token)
1222
{
1223
  unsigned int len;
1224
 
1225
  switch (TOKEN_SPELL (token))
1226
    {
1227
    default:            len = 4;                                break;
1228
    case SPELL_LITERAL: len = token->val.str.len;               break;
1229
    case SPELL_IDENT:   len = NODE_LEN (token->val.node) * 10;  break;
1230
    }
1231
 
1232
  return len;
1233
}
1234
 
1235
/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1236
   Return the number of bytes read out of NAME.  (There are always
1237
   10 bytes written to BUFFER.)  */
1238
 
1239
static size_t
1240
utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1241
{
1242
  int j;
1243
  int ucn_len = 0;
1244
  int ucn_len_c;
1245
  unsigned t;
1246
  unsigned long utf32;
1247
 
1248
  /* Compute the length of the UTF-8 sequence.  */
1249
  for (t = *name; t & 0x80; t <<= 1)
1250
    ucn_len++;
1251
 
1252
  utf32 = *name & (0x7F >> ucn_len);
1253
  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1254
    {
1255
      utf32 = (utf32 << 6) | (*++name & 0x3F);
1256
 
1257
      /* Ill-formed UTF-8.  */
1258
      if ((*name & ~0x3F) != 0x80)
1259
        abort ();
1260
    }
1261
 
1262
  *buffer++ = '\\';
1263
  *buffer++ = 'U';
1264
  for (j = 7; j >= 0; j--)
1265
    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1266
  return ucn_len;
1267
}
1268
 
1269
 
1270
/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1271
   already contain the enough space to hold the token's spelling.
1272
   Returns a pointer to the character after the last character written.
1273
   FORSTRING is true if this is to be the spelling after translation
1274
   phase 1 (this is different for UCNs).
1275
   FIXME: Would be nice if we didn't need the PFILE argument.  */
1276
unsigned char *
1277
cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1278
                 unsigned char *buffer, bool forstring)
1279
{
1280
  switch (TOKEN_SPELL (token))
1281
    {
1282
    case SPELL_OPERATOR:
1283
      {
1284
        const unsigned char *spelling;
1285
        unsigned char c;
1286
 
1287
        if (token->flags & DIGRAPH)
1288
          spelling
1289
            = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1290
        else if (token->flags & NAMED_OP)
1291
          goto spell_ident;
1292
        else
1293
          spelling = TOKEN_NAME (token);
1294
 
1295
        while ((c = *spelling++) != '\0')
1296
          *buffer++ = c;
1297
      }
1298
      break;
1299
 
1300
    spell_ident:
1301
    case SPELL_IDENT:
1302
      if (forstring)
1303
        {
1304
          memcpy (buffer, NODE_NAME (token->val.node),
1305
                  NODE_LEN (token->val.node));
1306
          buffer += NODE_LEN (token->val.node);
1307
        }
1308
      else
1309
        {
1310
          size_t i;
1311
          const unsigned char * name = NODE_NAME (token->val.node);
1312
 
1313
          for (i = 0; i < NODE_LEN (token->val.node); i++)
1314
            if (name[i] & ~0x7F)
1315
              {
1316
                i += utf8_to_ucn (buffer, name + i) - 1;
1317
                buffer += 10;
1318
              }
1319
            else
1320
              *buffer++ = NODE_NAME (token->val.node)[i];
1321
        }
1322
      break;
1323
 
1324
    case SPELL_LITERAL:
1325
      memcpy (buffer, token->val.str.text, token->val.str.len);
1326
      buffer += token->val.str.len;
1327
      break;
1328
 
1329
    case SPELL_NONE:
1330
      cpp_error (pfile, CPP_DL_ICE,
1331
                 "unspellable token %s", TOKEN_NAME (token));
1332
      break;
1333
    }
1334
 
1335
  return buffer;
1336
}
1337
 
1338
/* Returns TOKEN spelt as a null-terminated string.  The string is
1339
   freed when the reader is destroyed.  Useful for diagnostics.  */
1340
unsigned char *
1341
cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1342
{
1343
  unsigned int len = cpp_token_len (token) + 1;
1344
  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1345
 
1346
  end = cpp_spell_token (pfile, token, start, false);
1347
  end[0] = '\0';
1348
 
1349
  return start;
1350
}
1351
 
1352
/* Used by C front ends, which really should move to using
1353
   cpp_token_as_text.  */
1354
const char *
1355
cpp_type2name (enum cpp_ttype type)
1356
{
1357
  return (const char *) token_spellings[type].name;
1358
}
1359
 
1360
/* Writes the spelling of token to FP, without any preceding space.
1361
   Separated from cpp_spell_token for efficiency - to avoid stdio
1362
   double-buffering.  */
1363
void
1364
cpp_output_token (const cpp_token *token, FILE *fp)
1365
{
1366
  switch (TOKEN_SPELL (token))
1367
    {
1368
    case SPELL_OPERATOR:
1369
      {
1370
        const unsigned char *spelling;
1371
        int c;
1372
 
1373
        if (token->flags & DIGRAPH)
1374
          spelling
1375
            = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1376
        else if (token->flags & NAMED_OP)
1377
          goto spell_ident;
1378
        else
1379
          spelling = TOKEN_NAME (token);
1380
 
1381
        c = *spelling;
1382
        do
1383
          putc (c, fp);
1384
        while ((c = *++spelling) != '\0');
1385
      }
1386
      break;
1387
 
1388
    spell_ident:
1389
    case SPELL_IDENT:
1390
      {
1391
        size_t i;
1392
        const unsigned char * name = NODE_NAME (token->val.node);
1393
 
1394
        for (i = 0; i < NODE_LEN (token->val.node); i++)
1395
          if (name[i] & ~0x7F)
1396
            {
1397
              unsigned char buffer[10];
1398
              i += utf8_to_ucn (buffer, name + i) - 1;
1399
              fwrite (buffer, 1, 10, fp);
1400
            }
1401
          else
1402
            fputc (NODE_NAME (token->val.node)[i], fp);
1403
      }
1404
      break;
1405
 
1406
    case SPELL_LITERAL:
1407
      fwrite (token->val.str.text, 1, token->val.str.len, fp);
1408
      break;
1409
 
1410
    case SPELL_NONE:
1411
      /* An error, most probably.  */
1412
      break;
1413
    }
1414
}
1415
 
1416
/* Compare two tokens.  */
1417
int
1418
_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1419
{
1420
  if (a->type == b->type && a->flags == b->flags)
1421
    switch (TOKEN_SPELL (a))
1422
      {
1423
      default:                  /* Keep compiler happy.  */
1424
      case SPELL_OPERATOR:
1425
        return 1;
1426
      case SPELL_NONE:
1427
        return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1428
      case SPELL_IDENT:
1429
        return a->val.node == b->val.node;
1430
      case SPELL_LITERAL:
1431
        return (a->val.str.len == b->val.str.len
1432
                && !memcmp (a->val.str.text, b->val.str.text,
1433
                            a->val.str.len));
1434
      }
1435
 
1436
  return 0;
1437
}
1438
 
1439
/* Returns nonzero if a space should be inserted to avoid an
1440
   accidental token paste for output.  For simplicity, it is
1441
   conservative, and occasionally advises a space where one is not
1442
   needed, e.g. "." and ".2".  */
1443
int
1444
cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1445
                 const cpp_token *token2)
1446
{
1447
  enum cpp_ttype a = token1->type, b = token2->type;
1448
  cppchar_t c;
1449
 
1450
  if (token1->flags & NAMED_OP)
1451
    a = CPP_NAME;
1452
  if (token2->flags & NAMED_OP)
1453
    b = CPP_NAME;
1454
 
1455
  c = EOF;
1456
  if (token2->flags & DIGRAPH)
1457
    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1458
  else if (token_spellings[b].category == SPELL_OPERATOR)
1459
    c = token_spellings[b].name[0];
1460
 
1461
  /* Quickly get everything that can paste with an '='.  */
1462
  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1463
    return 1;
1464
 
1465
  switch (a)
1466
    {
1467
    case CPP_GREATER:   return c == '>' || c == '?';
1468
    case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1469
    case CPP_PLUS:      return c == '+';
1470
    case CPP_MINUS:     return c == '-' || c == '>';
1471
    case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1472
    case CPP_MOD:       return c == ':' || c == '>';
1473
    case CPP_AND:       return c == '&';
1474
    case CPP_OR:        return c == '|';
1475
    case CPP_COLON:     return c == ':' || c == '>';
1476
    case CPP_DEREF:     return c == '*';
1477
    case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1478
    case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1479
    case CPP_NAME:      return ((b == CPP_NUMBER
1480
                                 && name_p (pfile, &token2->val.str))
1481
                                || b == CPP_NAME
1482
                                || b == CPP_CHAR || b == CPP_STRING); /* L */
1483
    case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1484
                                || c == '.' || c == '+' || c == '-');
1485
                                      /* UCNs */
1486
    case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1487
                                 && b == CPP_NAME)
1488
                                || (CPP_OPTION (pfile, objc)
1489
                                    && token1->val.str.text[0] == '@'
1490
                                    && (b == CPP_NAME || b == CPP_STRING)));
1491
    default:            break;
1492
    }
1493
 
1494
  return 0;
1495
}
1496
 
1497
/* Output all the remaining tokens on the current line, and a newline
1498
   character, to FP.  Leading whitespace is removed.  If there are
1499
   macros, special token padding is not performed.  */
1500
void
1501
cpp_output_line (cpp_reader *pfile, FILE *fp)
1502
{
1503
  const cpp_token *token;
1504
 
1505
  token = cpp_get_token (pfile);
1506
  while (token->type != CPP_EOF)
1507
    {
1508
      cpp_output_token (token, fp);
1509
      token = cpp_get_token (pfile);
1510
      if (token->flags & PREV_WHITE)
1511
        putc (' ', fp);
1512
    }
1513
 
1514
  putc ('\n', fp);
1515
}
1516
 
1517
/* Memory buffers.  Changing these three constants can have a dramatic
1518
   effect on performance.  The values here are reasonable defaults,
1519
   but might be tuned.  If you adjust them, be sure to test across a
1520
   range of uses of cpplib, including heavy nested function-like macro
1521
   expansion.  Also check the change in peak memory usage (NJAMD is a
1522
   good tool for this).  */
1523
#define MIN_BUFF_SIZE 8000
1524
#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1525
#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1526
        (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1527
 
1528
#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1529
  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1530
#endif
1531
 
1532
/* Create a new allocation buffer.  Place the control block at the end
1533
   of the buffer, so that buffer overflows will cause immediate chaos.  */
1534
static _cpp_buff *
1535
new_buff (size_t len)
1536
{
1537
  _cpp_buff *result;
1538
  unsigned char *base;
1539
 
1540
  if (len < MIN_BUFF_SIZE)
1541
    len = MIN_BUFF_SIZE;
1542
  len = CPP_ALIGN (len);
1543
 
1544
  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1545
  result = (_cpp_buff *) (base + len);
1546
  result->base = base;
1547
  result->cur = base;
1548
  result->limit = base + len;
1549
  result->next = NULL;
1550
  return result;
1551
}
1552
 
1553
/* Place a chain of unwanted allocation buffers on the free list.  */
1554
void
1555
_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1556
{
1557
  _cpp_buff *end = buff;
1558
 
1559
  while (end->next)
1560
    end = end->next;
1561
  end->next = pfile->free_buffs;
1562
  pfile->free_buffs = buff;
1563
}
1564
 
1565
/* Return a free buffer of size at least MIN_SIZE.  */
1566
_cpp_buff *
1567
_cpp_get_buff (cpp_reader *pfile, size_t min_size)
1568
{
1569
  _cpp_buff *result, **p;
1570
 
1571
  for (p = &pfile->free_buffs;; p = &(*p)->next)
1572
    {
1573
      size_t size;
1574
 
1575
      if (*p == NULL)
1576
        return new_buff (min_size);
1577
      result = *p;
1578
      size = result->limit - result->base;
1579
      /* Return a buffer that's big enough, but don't waste one that's
1580
         way too big.  */
1581
      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1582
        break;
1583
    }
1584
 
1585
  *p = result->next;
1586
  result->next = NULL;
1587
  result->cur = result->base;
1588
  return result;
1589
}
1590
 
1591
/* Creates a new buffer with enough space to hold the uncommitted
1592
   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1593
   the excess bytes to the new buffer.  Chains the new buffer after
1594
   BUFF, and returns the new buffer.  */
1595
_cpp_buff *
1596
_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1597
{
1598
  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1599
  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1600
 
1601
  buff->next = new_buff;
1602
  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1603
  return new_buff;
1604
}
1605
 
1606
/* Creates a new buffer with enough space to hold the uncommitted
1607
   remaining bytes of the buffer pointed to by BUFF, and at least
1608
   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1609
   Chains the new buffer before the buffer pointed to by BUFF, and
1610
   updates the pointer to point to the new buffer.  */
1611
void
1612
_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1613
{
1614
  _cpp_buff *new_buff, *old_buff = *pbuff;
1615
  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1616
 
1617
  new_buff = _cpp_get_buff (pfile, size);
1618
  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1619
  new_buff->next = old_buff;
1620
  *pbuff = new_buff;
1621
}
1622
 
1623
/* Free a chain of buffers starting at BUFF.  */
1624
void
1625
_cpp_free_buff (_cpp_buff *buff)
1626
{
1627
  _cpp_buff *next;
1628
 
1629
  for (; buff; buff = next)
1630
    {
1631
      next = buff->next;
1632
      free (buff->base);
1633
    }
1634
}
1635
 
1636
/* Allocate permanent, unaligned storage of length LEN.  */
1637
unsigned char *
1638
_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1639
{
1640
  _cpp_buff *buff = pfile->u_buff;
1641
  unsigned char *result = buff->cur;
1642
 
1643
  if (len > (size_t) (buff->limit - result))
1644
    {
1645
      buff = _cpp_get_buff (pfile, len);
1646
      buff->next = pfile->u_buff;
1647
      pfile->u_buff = buff;
1648
      result = buff->cur;
1649
    }
1650
 
1651
  buff->cur = result + len;
1652
  return result;
1653
}
1654
 
1655
/* Allocate permanent, unaligned storage of length LEN from a_buff.
1656
   That buffer is used for growing allocations when saving macro
1657
   replacement lists in a #define, and when parsing an answer to an
1658
   assertion in #assert, #unassert or #if (and therefore possibly
1659
   whilst expanding macros).  It therefore must not be used by any
1660
   code that they might call: specifically the lexer and the guts of
1661
   the macro expander.
1662
 
1663
   All existing other uses clearly fit this restriction: storing
1664
   registered pragmas during initialization.  */
1665
unsigned char *
1666
_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1667
{
1668
  _cpp_buff *buff = pfile->a_buff;
1669
  unsigned char *result = buff->cur;
1670
 
1671
  if (len > (size_t) (buff->limit - result))
1672
    {
1673
      buff = _cpp_get_buff (pfile, len);
1674
      buff->next = pfile->a_buff;
1675
      pfile->a_buff = buff;
1676
      result = buff->cur;
1677
    }
1678
 
1679
  buff->cur = result + len;
1680
  return result;
1681
}
1682
 
1683
/* Say which field of TOK is in use.  */
1684
 
1685
enum cpp_token_fld_kind
1686
cpp_token_val_index (cpp_token *tok)
1687
{
1688
  switch (TOKEN_SPELL (tok))
1689
    {
1690
    case SPELL_IDENT:
1691
      return CPP_TOKEN_FLD_NODE;
1692
    case SPELL_LITERAL:
1693
      return CPP_TOKEN_FLD_STR;
1694
    case SPELL_NONE:
1695
      if (tok->type == CPP_MACRO_ARG)
1696
        return CPP_TOKEN_FLD_ARG_NO;
1697
      else if (tok->type == CPP_PADDING)
1698
        return CPP_TOKEN_FLD_SOURCE;
1699
      else if (tok->type == CPP_PRAGMA)
1700
        return CPP_TOKEN_FLD_STR;
1701
      /* else fall through */
1702
    default:
1703
      return CPP_TOKEN_FLD_NONE;
1704
    }
1705
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.