OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [libcpp/] [lex.c] - Blame information for rev 855

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 270 jeremybenn
/* CPP Library - lexical analysis.
2
   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3
   Free Software Foundation, Inc.
4
   Contributed by Per Bothner, 1994-95.
5
   Based on CCCP program by Paul Rubin, June 1986
6
   Adapted to ANSI C, Richard Stallman, Jan 1987
7
   Broken out to separate file, Zack Weinberg, Mar 2000
8
 
9
This program is free software; you can redistribute it and/or modify it
10
under the terms of the GNU General Public License as published by the
11
Free Software Foundation; either version 3, or (at your option) any
12
later version.
13
 
14
This program is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
GNU General Public License for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with this program; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "cpplib.h"
26
#include "internal.h"
27
 
28
enum spell_type
29
{
30
  SPELL_OPERATOR = 0,
31
  SPELL_IDENT,
32
  SPELL_LITERAL,
33
  SPELL_NONE
34
};
35
 
36
struct token_spelling
37
{
38
  enum spell_type category;
39
  const unsigned char *name;
40
};
41
 
42
static const unsigned char *const digraph_spellings[] =
43
{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
44
 
45
#define OP(e, s) { SPELL_OPERATOR, UC s  },
46
#define TK(e, s) { SPELL_ ## s,    UC #e },
47
static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
48
#undef OP
49
#undef TK
50
 
51
#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52
#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53
 
54
static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55
static int skip_line_comment (cpp_reader *);
56
static void skip_whitespace (cpp_reader *, cppchar_t);
57
static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58
static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59
static void store_comment (cpp_reader *, cpp_token *);
60
static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61
                            unsigned int, enum cpp_ttype);
62
static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63
static int name_p (cpp_reader *, const cpp_string *);
64
static tokenrun *next_tokenrun (tokenrun *);
65
 
66
static _cpp_buff *new_buff (size_t);
67
 
68
 
69
/* Utility routine:
70
 
71
   Compares, the token TOKEN to the NUL-terminated string STRING.
72
   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
73
int
74
cpp_ideq (const cpp_token *token, const char *string)
75
{
76
  if (token->type != CPP_NAME)
77
    return 0;
78
 
79
  return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
80
}
81
 
82
/* Record a note TYPE at byte POS into the current cleaned logical
83
   line.  */
84
static void
85
add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86
{
87
  if (buffer->notes_used == buffer->notes_cap)
88
    {
89
      buffer->notes_cap = buffer->notes_cap * 2 + 200;
90
      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91
                                  buffer->notes_cap);
92
    }
93
 
94
  buffer->notes[buffer->notes_used].pos = pos;
95
  buffer->notes[buffer->notes_used].type = type;
96
  buffer->notes_used++;
97
}
98
 
99
/* Returns with a logical line that contains no escaped newlines or
100
   trigraphs.  This is a time-critical inner loop.  */
101
void
102
_cpp_clean_line (cpp_reader *pfile)
103
{
104
  cpp_buffer *buffer;
105
  const uchar *s;
106
  uchar c, *d, *p;
107
 
108
  buffer = pfile->buffer;
109
  buffer->cur_note = buffer->notes_used = 0;
110
  buffer->cur = buffer->line_base = buffer->next_line;
111
  buffer->need_line = false;
112
  s = buffer->next_line - 1;
113
 
114
  if (!buffer->from_stage3)
115
    {
116
      const uchar *pbackslash = NULL;
117
 
118
      /* Short circuit for the common case of an un-escaped line with
119
         no trigraphs.  The primary win here is by not writing any
120
         data back to memory until we have to.  */
121
      for (;;)
122
        {
123
          c = *++s;
124
          if (__builtin_expect (c == '\n', false)
125
              || __builtin_expect (c == '\r', false))
126
            {
127
              d = (uchar *) s;
128
 
129
              if (__builtin_expect (s == buffer->rlimit, false))
130
                goto done;
131
 
132
              /* DOS line ending? */
133
              if (__builtin_expect (c == '\r', false)
134
                  && s[1] == '\n')
135
                {
136
                  s++;
137
                  if (s == buffer->rlimit)
138
                    goto done;
139
                }
140
 
141
              if (__builtin_expect (pbackslash == NULL, true))
142
                goto done;
143
 
144
              /* Check for escaped newline.  */
145
              p = d;
146
              while (is_nvspace (p[-1]))
147
                p--;
148
              if (p - 1 != pbackslash)
149
                goto done;
150
 
151
              /* Have an escaped newline; process it and proceed to
152
                 the slow path.  */
153
              add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154
              d = p - 2;
155
              buffer->next_line = p - 1;
156
              break;
157
            }
158
          if (__builtin_expect (c == '\\', false))
159
            pbackslash = s;
160
          else if (__builtin_expect (c == '?', false)
161
                   && __builtin_expect (s[1] == '?', false)
162
                   && _cpp_trigraph_map[s[2]])
163
            {
164
              /* Have a trigraph.  We may or may not have to convert
165
                 it.  Add a line note regardless, for -Wtrigraphs.  */
166
              add_line_note (buffer, s, s[2]);
167
              if (CPP_OPTION (pfile, trigraphs))
168
                {
169
                  /* We do, and that means we have to switch to the
170
                     slow path.  */
171
                  d = (uchar *) s;
172
                  *d = _cpp_trigraph_map[s[2]];
173
                  s += 2;
174
                  break;
175
                }
176
            }
177
        }
178
 
179
 
180
      for (;;)
181
        {
182
          c = *++s;
183
          *++d = c;
184
 
185
          if (c == '\n' || c == '\r')
186
            {
187
                  /* Handle DOS line endings.  */
188
              if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189
                s++;
190
              if (s == buffer->rlimit)
191
                break;
192
 
193
              /* Escaped?  */
194
              p = d;
195
              while (p != buffer->next_line && is_nvspace (p[-1]))
196
                p--;
197
              if (p == buffer->next_line || p[-1] != '\\')
198
                break;
199
 
200
              add_line_note (buffer, p - 1, p != d ? ' ': '\\');
201
              d = p - 2;
202
              buffer->next_line = p - 1;
203
            }
204
          else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
205
            {
206
              /* Add a note regardless, for the benefit of -Wtrigraphs.  */
207
              add_line_note (buffer, d, s[2]);
208
              if (CPP_OPTION (pfile, trigraphs))
209
                {
210
                  *d = _cpp_trigraph_map[s[2]];
211
                  s += 2;
212
                }
213
            }
214
        }
215
    }
216
  else
217
    {
218
      do
219
        s++;
220
      while (*s != '\n' && *s != '\r');
221
      d = (uchar *) s;
222
 
223
      /* Handle DOS line endings.  */
224
      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
225
        s++;
226
    }
227
 
228
 done:
229
  *d = '\n';
230
  /* A sentinel note that should never be processed.  */
231
  add_line_note (buffer, d + 1, '\n');
232
  buffer->next_line = s + 1;
233
}
234
 
235
/* Return true if the trigraph indicated by NOTE should be warned
236
   about in a comment.  */
237
static bool
238
warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
239
{
240
  const uchar *p;
241
 
242
  /* Within comments we don't warn about trigraphs, unless the
243
     trigraph forms an escaped newline, as that may change
244
     behavior.  */
245
  if (note->type != '/')
246
    return false;
247
 
248
  /* If -trigraphs, then this was an escaped newline iff the next note
249
     is coincident.  */
250
  if (CPP_OPTION (pfile, trigraphs))
251
    return note[1].pos == note->pos;
252
 
253
  /* Otherwise, see if this forms an escaped newline.  */
254
  p = note->pos + 3;
255
  while (is_nvspace (*p))
256
    p++;
257
 
258
  /* There might have been escaped newlines between the trigraph and the
259
     newline we found.  Hence the position test.  */
260
  return (*p == '\n' && p < note[1].pos);
261
}
262
 
263
/* Process the notes created by add_line_note as far as the current
264
   location.  */
265
void
266
_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
267
{
268
  cpp_buffer *buffer = pfile->buffer;
269
 
270
  for (;;)
271
    {
272
      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
273
      unsigned int col;
274
 
275
      if (note->pos > buffer->cur)
276
        break;
277
 
278
      buffer->cur_note++;
279
      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
280
 
281
      if (note->type == '\\' || note->type == ' ')
282
        {
283
          if (note->type == ' ' && !in_comment)
284
            cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
285
                                 "backslash and newline separated by space");
286
 
287
          if (buffer->next_line > buffer->rlimit)
288
            {
289
              cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
290
                                   "backslash-newline at end of file");
291
              /* Prevent "no newline at end of file" warning.  */
292
              buffer->next_line = buffer->rlimit;
293
            }
294
 
295
          buffer->line_base = note->pos;
296
          CPP_INCREMENT_LINE (pfile, 0);
297
        }
298
      else if (_cpp_trigraph_map[note->type])
299
        {
300
          if (CPP_OPTION (pfile, warn_trigraphs)
301
              && (!in_comment || warn_in_comment (pfile, note)))
302
            {
303
              if (CPP_OPTION (pfile, trigraphs))
304
                cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
305
                                     "trigraph ??%c converted to %c",
306
                                     note->type,
307
                                     (int) _cpp_trigraph_map[note->type]);
308
              else
309
                {
310
                  cpp_error_with_line
311
                    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
312
                     "trigraph ??%c ignored, use -trigraphs to enable",
313
                     note->type);
314
                }
315
            }
316
        }
317
      else if (note->type == 0)
318
        /* Already processed in lex_raw_string.  */;
319
      else
320
        abort ();
321
    }
322
}
323
 
324
/* Skip a C-style block comment.  We find the end of the comment by
325
   seeing if an asterisk is before every '/' we encounter.  Returns
326
   nonzero if comment terminated by EOF, zero otherwise.
327
 
328
   Buffer->cur points to the initial asterisk of the comment.  */
329
bool
330
_cpp_skip_block_comment (cpp_reader *pfile)
331
{
332
  cpp_buffer *buffer = pfile->buffer;
333
  const uchar *cur = buffer->cur;
334
  uchar c;
335
 
336
  cur++;
337
  if (*cur == '/')
338
    cur++;
339
 
340
  for (;;)
341
    {
342
      /* People like decorating comments with '*', so check for '/'
343
         instead for efficiency.  */
344
      c = *cur++;
345
 
346
      if (c == '/')
347
        {
348
          if (cur[-2] == '*')
349
            break;
350
 
351
          /* Warn about potential nested comments, but not if the '/'
352
             comes immediately before the true comment delimiter.
353
             Don't bother to get it right across escaped newlines.  */
354
          if (CPP_OPTION (pfile, warn_comments)
355
              && cur[0] == '*' && cur[1] != '/')
356
            {
357
              buffer->cur = cur;
358
              cpp_error_with_line (pfile, CPP_DL_WARNING,
359
                                   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
360
                                   "\"/*\" within comment");
361
            }
362
        }
363
      else if (c == '\n')
364
        {
365
          unsigned int cols;
366
          buffer->cur = cur - 1;
367
          _cpp_process_line_notes (pfile, true);
368
          if (buffer->next_line >= buffer->rlimit)
369
            return true;
370
          _cpp_clean_line (pfile);
371
 
372
          cols = buffer->next_line - buffer->line_base;
373
          CPP_INCREMENT_LINE (pfile, cols);
374
 
375
          cur = buffer->cur;
376
        }
377
    }
378
 
379
  buffer->cur = cur;
380
  _cpp_process_line_notes (pfile, true);
381
  return false;
382
}
383
 
384
/* Skip a C++ line comment, leaving buffer->cur pointing to the
385
   terminating newline.  Handles escaped newlines.  Returns nonzero
386
   if a multiline comment.  */
387
static int
388
skip_line_comment (cpp_reader *pfile)
389
{
390
  cpp_buffer *buffer = pfile->buffer;
391
  source_location orig_line = pfile->line_table->highest_line;
392
 
393
  while (*buffer->cur != '\n')
394
    buffer->cur++;
395
 
396
  _cpp_process_line_notes (pfile, true);
397
  return orig_line != pfile->line_table->highest_line;
398
}
399
 
400
/* Skips whitespace, saving the next non-whitespace character.  */
401
static void
402
skip_whitespace (cpp_reader *pfile, cppchar_t c)
403
{
404
  cpp_buffer *buffer = pfile->buffer;
405
  bool saw_NUL = false;
406
 
407
  do
408
    {
409
      /* Horizontal space always OK.  */
410
      if (c == ' ' || c == '\t')
411
        ;
412
      /* Just \f \v or \0 left.  */
413
      else if (c == '\0')
414
        saw_NUL = true;
415
      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
416
        cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
417
                             CPP_BUF_COL (buffer),
418
                             "%s in preprocessing directive",
419
                             c == '\f' ? "form feed" : "vertical tab");
420
 
421
      c = *buffer->cur++;
422
    }
423
  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
424
  while (is_nvspace (c));
425
 
426
  if (saw_NUL)
427
    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
428
 
429
  buffer->cur--;
430
}
431
 
432
/* See if the characters of a number token are valid in a name (no
433
   '.', '+' or '-').  */
434
static int
435
name_p (cpp_reader *pfile, const cpp_string *string)
436
{
437
  unsigned int i;
438
 
439
  for (i = 0; i < string->len; i++)
440
    if (!is_idchar (string->text[i]))
441
      return 0;
442
 
443
  return 1;
444
}
445
 
446
/* After parsing an identifier or other sequence, produce a warning about
447
   sequences not in NFC/NFKC.  */
448
static void
449
warn_about_normalization (cpp_reader *pfile,
450
                          const cpp_token *token,
451
                          const struct normalize_state *s)
452
{
453
  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
454
      && !pfile->state.skipping)
455
    {
456
      /* Make sure that the token is printed using UCNs, even
457
         if we'd otherwise happily print UTF-8.  */
458
      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
459
      size_t sz;
460
 
461
      sz = cpp_spell_token (pfile, token, buf, false) - buf;
462
      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
463
        cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
464
                             "`%.*s' is not in NFKC", (int) sz, buf);
465
      else
466
        cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
467
                             "`%.*s' is not in NFC", (int) sz, buf);
468
    }
469
}
470
 
471
/* Returns TRUE if the sequence starting at buffer->cur is invalid in
472
   an identifier.  FIRST is TRUE if this starts an identifier.  */
473
static bool
474
forms_identifier_p (cpp_reader *pfile, int first,
475
                    struct normalize_state *state)
476
{
477
  cpp_buffer *buffer = pfile->buffer;
478
 
479
  if (*buffer->cur == '$')
480
    {
481
      if (!CPP_OPTION (pfile, dollars_in_ident))
482
        return false;
483
 
484
      buffer->cur++;
485
      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
486
        {
487
          CPP_OPTION (pfile, warn_dollars) = 0;
488
          cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
489
        }
490
 
491
      return true;
492
    }
493
 
494
  /* Is this a syntactically valid UCN?  */
495
  if (CPP_OPTION (pfile, extended_identifiers)
496
      && *buffer->cur == '\\'
497
      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
498
    {
499
      buffer->cur += 2;
500
      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
501
                          state))
502
        return true;
503
      buffer->cur -= 2;
504
    }
505
 
506
  return false;
507
}
508
 
509
/* Helper function to get the cpp_hashnode of the identifier BASE.  */
510
static cpp_hashnode *
511
lex_identifier_intern (cpp_reader *pfile, const uchar *base)
512
{
513
  cpp_hashnode *result;
514
  const uchar *cur;
515
  unsigned int len;
516
  unsigned int hash = HT_HASHSTEP (0, *base);
517
 
518
  cur = base + 1;
519
  while (ISIDNUM (*cur))
520
    {
521
      hash = HT_HASHSTEP (hash, *cur);
522
      cur++;
523
    }
524
  len = cur - base;
525
  hash = HT_HASHFINISH (hash, len);
526
  result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
527
                                              base, len, hash, HT_ALLOC));
528
 
529
  /* Rarely, identifiers require diagnostics when lexed.  */
530
  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
531
                        && !pfile->state.skipping, 0))
532
    {
533
      /* It is allowed to poison the same identifier twice.  */
534
      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
535
        cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
536
                   NODE_NAME (result));
537
 
538
      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
539
         replacement list of a variadic macro.  */
540
      if (result == pfile->spec_nodes.n__VA_ARGS__
541
          && !pfile->state.va_args_ok)
542
        cpp_error (pfile, CPP_DL_PEDWARN,
543
                   "__VA_ARGS__ can only appear in the expansion"
544
                   " of a C99 variadic macro");
545
 
546
      /* For -Wc++-compat, warn about use of C++ named operators.  */
547
      if (result->flags & NODE_WARN_OPERATOR)
548
        cpp_error (pfile, CPP_DL_WARNING,
549
                   "identifier \"%s\" is a special operator name in C++",
550
                   NODE_NAME (result));
551
    }
552
 
553
  return result;
554
}
555
 
556
/* Get the cpp_hashnode of an identifier specified by NAME in
557
   the current cpp_reader object.  If none is found, NULL is returned.  */
558
cpp_hashnode *
559
_cpp_lex_identifier (cpp_reader *pfile, const char *name)
560
{
561
  cpp_hashnode *result;
562
  result = lex_identifier_intern (pfile, (uchar *) name);
563
  return result;
564
}
565
 
566
/* Lex an identifier starting at BUFFER->CUR - 1.  */
567
static cpp_hashnode *
568
lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
569
                struct normalize_state *nst)
570
{
571
  cpp_hashnode *result;
572
  const uchar *cur;
573
  unsigned int len;
574
  unsigned int hash = HT_HASHSTEP (0, *base);
575
 
576
  cur = pfile->buffer->cur;
577
  if (! starts_ucn)
578
    while (ISIDNUM (*cur))
579
      {
580
        hash = HT_HASHSTEP (hash, *cur);
581
        cur++;
582
      }
583
  pfile->buffer->cur = cur;
584
  if (starts_ucn || forms_identifier_p (pfile, false, nst))
585
    {
586
      /* Slower version for identifiers containing UCNs (or $).  */
587
      do {
588
        while (ISIDNUM (*pfile->buffer->cur))
589
          {
590
            pfile->buffer->cur++;
591
            NORMALIZE_STATE_UPDATE_IDNUM (nst);
592
          }
593
      } while (forms_identifier_p (pfile, false, nst));
594
      result = _cpp_interpret_identifier (pfile, base,
595
                                          pfile->buffer->cur - base);
596
    }
597
  else
598
    {
599
      len = cur - base;
600
      hash = HT_HASHFINISH (hash, len);
601
 
602
      result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
603
                                                  base, len, hash, HT_ALLOC));
604
    }
605
 
606
  /* Rarely, identifiers require diagnostics when lexed.  */
607
  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
608
                        && !pfile->state.skipping, 0))
609
    {
610
      /* It is allowed to poison the same identifier twice.  */
611
      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
612
        cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
613
                   NODE_NAME (result));
614
 
615
      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
616
         replacement list of a variadic macro.  */
617
      if (result == pfile->spec_nodes.n__VA_ARGS__
618
          && !pfile->state.va_args_ok)
619
        cpp_error (pfile, CPP_DL_PEDWARN,
620
                   "__VA_ARGS__ can only appear in the expansion"
621
                   " of a C99 variadic macro");
622
 
623
      /* For -Wc++-compat, warn about use of C++ named operators.  */
624
      if (result->flags & NODE_WARN_OPERATOR)
625
        cpp_error (pfile, CPP_DL_WARNING,
626
                   "identifier \"%s\" is a special operator name in C++",
627
                   NODE_NAME (result));
628
    }
629
 
630
  return result;
631
}
632
 
633
/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
634
static void
635
lex_number (cpp_reader *pfile, cpp_string *number,
636
            struct normalize_state *nst)
637
{
638
  const uchar *cur;
639
  const uchar *base;
640
  uchar *dest;
641
 
642
  base = pfile->buffer->cur - 1;
643
  do
644
    {
645
      cur = pfile->buffer->cur;
646
 
647
      /* N.B. ISIDNUM does not include $.  */
648
      while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
649
        {
650
          cur++;
651
          NORMALIZE_STATE_UPDATE_IDNUM (nst);
652
        }
653
 
654
      pfile->buffer->cur = cur;
655
    }
656
  while (forms_identifier_p (pfile, false, nst));
657
 
658
  number->len = cur - base;
659
  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
660
  memcpy (dest, base, number->len);
661
  dest[number->len] = '\0';
662
  number->text = dest;
663
}
664
 
665
/* Create a token of type TYPE with a literal spelling.  */
666
static void
667
create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
668
                unsigned int len, enum cpp_ttype type)
669
{
670
  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
671
 
672
  memcpy (dest, base, len);
673
  dest[len] = '\0';
674
  token->type = type;
675
  token->val.str.len = len;
676
  token->val.str.text = dest;
677
}
678
 
679
/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
680
   sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
681
 
682
static void
683
bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
684
                _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
685
{
686
  _cpp_buff *first_buff = *first_buff_p;
687
  _cpp_buff *last_buff = *last_buff_p;
688
 
689
  if (first_buff == NULL)
690
    first_buff = last_buff = _cpp_get_buff (pfile, len);
691
  else if (len > BUFF_ROOM (last_buff))
692
    {
693
      size_t room = BUFF_ROOM (last_buff);
694
      memcpy (BUFF_FRONT (last_buff), base, room);
695
      BUFF_FRONT (last_buff) += room;
696
      base += room;
697
      len -= room;
698
      last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
699
    }
700
 
701
  memcpy (BUFF_FRONT (last_buff), base, len);
702
  BUFF_FRONT (last_buff) += len;
703
 
704
  *first_buff_p = first_buff;
705
  *last_buff_p = last_buff;
706
}
707
 
708
/* Lexes a raw string.  The stored string contains the spelling, including
709
   double quotes, delimiter string, '(' and ')', any leading
710
   'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
711
   literal, or CPP_OTHER if it was not properly terminated.
712
 
713
   The spelling is NUL-terminated, but it is not guaranteed that this
714
   is the first NUL since embedded NULs are preserved.  */
715
 
716
static void
717
lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
718
                const uchar *cur)
719
{
720
  source_location saw_NUL = 0;
721
  const uchar *raw_prefix;
722
  unsigned int raw_prefix_len = 0;
723
  enum cpp_ttype type;
724
  size_t total_len = 0;
725
  _cpp_buff *first_buff = NULL, *last_buff = NULL;
726
  _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
727
 
728
  type = (*base == 'L' ? CPP_WSTRING :
729
          *base == 'U' ? CPP_STRING32 :
730
          *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
731
          : CPP_STRING);
732
 
733
  raw_prefix = cur + 1;
734
  while (raw_prefix_len < 16)
735
    {
736
      switch (raw_prefix[raw_prefix_len])
737
        {
738
        case ' ': case '(': case ')': case '\\': case '\t':
739
        case '\v': case '\f': case '\n': default:
740
          break;
741
        /* Basic source charset except the above chars.  */
742
        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
743
        case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
744
        case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
745
        case 's': case 't': case 'u': case 'v': case 'w': case 'x':
746
        case 'y': case 'z':
747
        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
748
        case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
749
        case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
750
        case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
751
        case 'Y': case 'Z':
752
        case '0': case '1': case '2': case '3': case '4': case '5':
753
        case '6': case '7': case '8': case '9':
754
        case '_': case '{': case '}': case '#': case '[': case ']':
755
        case '<': case '>': case '%': case ':': case ';': case '.':
756
        case '?': case '*': case '+': case '-': case '/': case '^':
757
        case '&': case '|': case '~': case '!': case '=': case ',':
758
        case '"': case '\'':
759
          raw_prefix_len++;
760
          continue;
761
        }
762
      break;
763
    }
764
 
765
  if (raw_prefix[raw_prefix_len] != '(')
766
    {
767
      int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
768
                + 1;
769
      if (raw_prefix_len == 16)
770
        cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
771
                             "raw string delimiter longer than 16 characters");
772
      else
773
        cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
774
                             "invalid character '%c' in raw string delimiter",
775
                             (int) raw_prefix[raw_prefix_len]);
776
      pfile->buffer->cur = raw_prefix - 1;
777
      create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
778
      return;
779
    }
780
 
781
  cur = raw_prefix + raw_prefix_len + 1;
782
  for (;;)
783
    {
784
#define BUF_APPEND(STR,LEN)                                     \
785
      do {                                                      \
786
        bufring_append (pfile, (const uchar *)(STR), (LEN),     \
787
                        &first_buff, &last_buff);               \
788
        total_len += (LEN);                                     \
789
      } while (0);
790
 
791
      cppchar_t c;
792
 
793
      /* If we previously performed any trigraph or line splicing
794
         transformations, undo them within the body of the raw string.  */
795
      while (note->pos < cur)
796
        ++note;
797
      for (; note->pos == cur; ++note)
798
        {
799
          switch (note->type)
800
            {
801
            case '\\':
802
            case ' ':
803
              /* Restore backslash followed by newline.  */
804
              BUF_APPEND (base, cur - base);
805
              base = cur;
806
              BUF_APPEND ("\\", 1);
807
            after_backslash:
808
              if (note->type == ' ')
809
                {
810
                  /* GNU backslash whitespace newline extension.  FIXME
811
                     could be any sequence of non-vertical space.  When we
812
                     can properly restore any such sequence, we should mark
813
                     this note as handled so _cpp_process_line_notes
814
                     doesn't warn.  */
815
                  BUF_APPEND (" ", 1);
816
                }
817
 
818
              BUF_APPEND ("\n", 1);
819
              break;
820
 
821
            case 0:
822
              /* Already handled.  */
823
              break;
824
 
825
            default:
826
              if (_cpp_trigraph_map[note->type])
827
                {
828
                  /* Don't warn about this trigraph in
829
                     _cpp_process_line_notes, since trigraphs show up as
830
                     trigraphs in raw strings.  */
831
                  uchar type = note->type;
832
                  note->type = 0;
833
 
834
                  if (!CPP_OPTION (pfile, trigraphs))
835
                    /* If we didn't convert the trigraph in the first
836
                       place, don't do anything now either.  */
837
                    break;
838
 
839
                  BUF_APPEND (base, cur - base);
840
                  base = cur;
841
                  BUF_APPEND ("??", 2);
842
 
843
                  /* ??/ followed by newline gets two line notes, one for
844
                     the trigraph and one for the backslash/newline.  */
845
                  if (type == '/' && note[1].pos == cur)
846
                    {
847
                      if (note[1].type != '\\'
848
                          && note[1].type != ' ')
849
                        abort ();
850
                      BUF_APPEND ("/", 1);
851
                      ++note;
852
                      goto after_backslash;
853
                    }
854
                  /* The ) from ??) could be part of the suffix.  */
855
                  else if (type == ')'
856
                           && strncmp ((const char *) cur+1,
857
                                       (const char *) raw_prefix,
858
                                       raw_prefix_len) == 0
859
                           && cur[raw_prefix_len+1] == '"')
860
                    {
861
                      cur += raw_prefix_len+2;
862
                      goto break_outer_loop;
863
                    }
864
                  else
865
                    {
866
                      /* Skip the replacement character.  */
867
                      base = ++cur;
868
                      BUF_APPEND (&type, 1);
869
                    }
870
                }
871
              else
872
                abort ();
873
              break;
874
            }
875
        }
876
      c = *cur++;
877
 
878
      if (c == ')'
879
          && strncmp ((const char *) cur, (const char *) raw_prefix,
880
                      raw_prefix_len) == 0
881
          && cur[raw_prefix_len] == '"')
882
        {
883
          cur += raw_prefix_len + 1;
884
          break;
885
        }
886
      else if (c == '\n')
887
        {
888
          if (pfile->state.in_directive
889
              || pfile->state.parsing_args
890
              || pfile->state.in_deferred_pragma)
891
            {
892
              cur--;
893
              type = CPP_OTHER;
894
              cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
895
                                   "unterminated raw string");
896
              break;
897
            }
898
 
899
          BUF_APPEND (base, cur - base);
900
 
901
          if (pfile->buffer->cur < pfile->buffer->rlimit)
902
            CPP_INCREMENT_LINE (pfile, 0);
903
          pfile->buffer->need_line = true;
904
 
905
          pfile->buffer->cur = cur-1;
906
          _cpp_process_line_notes (pfile, false);
907
          if (!_cpp_get_fresh_line (pfile))
908
            {
909
              source_location src_loc = token->src_loc;
910
              token->type = CPP_EOF;
911
              /* Tell the compiler the line number of the EOF token.  */
912
              token->src_loc = pfile->line_table->highest_line;
913
              token->flags = BOL;
914
              if (first_buff != NULL)
915
                _cpp_release_buff (pfile, first_buff);
916
              cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
917
                                   "unterminated raw string");
918
              return;
919
            }
920
 
921
          cur = base = pfile->buffer->cur;
922
          note = &pfile->buffer->notes[pfile->buffer->cur_note];
923
        }
924
      else if (c == '\0' && !saw_NUL)
925
        LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
926
                                     CPP_BUF_COLUMN (pfile->buffer, cur));
927
    }
928
 break_outer_loop:
929
 
930
  if (saw_NUL && !pfile->state.skipping)
931
    cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
932
               "null character(s) preserved in literal");
933
 
934
  pfile->buffer->cur = cur;
935
  if (first_buff == NULL)
936
    create_literal (pfile, token, base, cur - base, type);
937
  else
938
    {
939
      uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
940
 
941
      token->type = type;
942
      token->val.str.len = total_len + (cur - base);
943
      token->val.str.text = dest;
944
      last_buff = first_buff;
945
      while (last_buff != NULL)
946
        {
947
          memcpy (dest, last_buff->base,
948
                  BUFF_FRONT (last_buff) - last_buff->base);
949
          dest += BUFF_FRONT (last_buff) - last_buff->base;
950
          last_buff = last_buff->next;
951
        }
952
      _cpp_release_buff (pfile, first_buff);
953
      memcpy (dest, base, cur - base);
954
      dest[cur - base] = '\0';
955
    }
956
}
957
 
958
/* Lexes a string, character constant, or angle-bracketed header file
959
   name.  The stored string contains the spelling, including opening
960
   quote and any leading 'L', 'u', 'U' or 'u8' and optional
961
   'R' modifier.  It returns the type of the literal, or CPP_OTHER
962
   if it was not properly terminated, or CPP_LESS for an unterminated
963
   header name which must be relexed as normal tokens.
964
 
965
   The spelling is NUL-terminated, but it is not guaranteed that this
966
   is the first NUL since embedded NULs are preserved.  */
967
static void
968
lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
969
{
970
  bool saw_NUL = false;
971
  const uchar *cur;
972
  cppchar_t terminator;
973
  enum cpp_ttype type;
974
 
975
  cur = base;
976
  terminator = *cur++;
977
  if (terminator == 'L' || terminator == 'U')
978
    terminator = *cur++;
979
  else if (terminator == 'u')
980
    {
981
      terminator = *cur++;
982
      if (terminator == '8')
983
        terminator = *cur++;
984
    }
985
  if (terminator == 'R')
986
    {
987
      lex_raw_string (pfile, token, base, cur);
988
      return;
989
    }
990
  if (terminator == '"')
991
    type = (*base == 'L' ? CPP_WSTRING :
992
            *base == 'U' ? CPP_STRING32 :
993
            *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
994
                         : CPP_STRING);
995
  else if (terminator == '\'')
996
    type = (*base == 'L' ? CPP_WCHAR :
997
            *base == 'U' ? CPP_CHAR32 :
998
            *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
999
  else
1000
    terminator = '>', type = CPP_HEADER_NAME;
1001
 
1002
  for (;;)
1003
    {
1004
      cppchar_t c = *cur++;
1005
 
1006
      /* In #include-style directives, terminators are not escapable.  */
1007
      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1008
        cur++;
1009
      else if (c == terminator)
1010
        break;
1011
      else if (c == '\n')
1012
        {
1013
          cur--;
1014
          /* Unmatched quotes always yield undefined behavior, but
1015
             greedy lexing means that what appears to be an unterminated
1016
             header name may actually be a legitimate sequence of tokens.  */
1017
          if (terminator == '>')
1018
            {
1019
              token->type = CPP_LESS;
1020
              return;
1021
            }
1022
          type = CPP_OTHER;
1023
          break;
1024
        }
1025
      else if (c == '\0')
1026
        saw_NUL = true;
1027
    }
1028
 
1029
  if (saw_NUL && !pfile->state.skipping)
1030
    cpp_error (pfile, CPP_DL_WARNING,
1031
               "null character(s) preserved in literal");
1032
 
1033
  if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1034
    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1035
               (int) terminator);
1036
 
1037
  pfile->buffer->cur = cur;
1038
  create_literal (pfile, token, base, cur - base, type);
1039
}
1040
 
1041
/* Return the comment table. The client may not make any assumption
1042
   about the ordering of the table.  */
1043
cpp_comment_table *
1044
cpp_get_comments (cpp_reader *pfile)
1045
{
1046
  return &pfile->comments;
1047
}
1048
 
1049
/* Append a comment to the end of the comment table. */
1050
static void
1051
store_comment (cpp_reader *pfile, cpp_token *token)
1052
{
1053
  int len;
1054
 
1055
  if (pfile->comments.allocated == 0)
1056
    {
1057
      pfile->comments.allocated = 256;
1058
      pfile->comments.entries = (cpp_comment *) xmalloc
1059
        (pfile->comments.allocated * sizeof (cpp_comment));
1060
    }
1061
 
1062
  if (pfile->comments.count == pfile->comments.allocated)
1063
    {
1064
      pfile->comments.allocated *= 2;
1065
      pfile->comments.entries = (cpp_comment *) xrealloc
1066
        (pfile->comments.entries,
1067
         pfile->comments.allocated * sizeof (cpp_comment));
1068
    }
1069
 
1070
  len = token->val.str.len;
1071
 
1072
  /* Copy comment. Note, token may not be NULL terminated. */
1073
  pfile->comments.entries[pfile->comments.count].comment =
1074
    (char *) xmalloc (sizeof (char) * (len + 1));
1075
  memcpy (pfile->comments.entries[pfile->comments.count].comment,
1076
          token->val.str.text, len);
1077
  pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1078
 
1079
  /* Set source location. */
1080
  pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1081
 
1082
  /* Increment the count of entries in the comment table. */
1083
  pfile->comments.count++;
1084
}
1085
 
1086
/* The stored comment includes the comment start and any terminator.  */
1087
static void
1088
save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1089
              cppchar_t type)
1090
{
1091
  unsigned char *buffer;
1092
  unsigned int len, clen;
1093
 
1094
  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1095
 
1096
  /* C++ comments probably (not definitely) have moved past a new
1097
     line, which we don't want to save in the comment.  */
1098
  if (is_vspace (pfile->buffer->cur[-1]))
1099
    len--;
1100
 
1101
  /* If we are currently in a directive, then we need to store all
1102
     C++ comments as C comments internally, and so we need to
1103
     allocate a little extra space in that case.
1104
 
1105
     Note that the only time we encounter a directive here is
1106
     when we are saving comments in a "#define".  */
1107
  clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1108
 
1109
  buffer = _cpp_unaligned_alloc (pfile, clen);
1110
 
1111
  token->type = CPP_COMMENT;
1112
  token->val.str.len = clen;
1113
  token->val.str.text = buffer;
1114
 
1115
  buffer[0] = '/';
1116
  memcpy (buffer + 1, from, len - 1);
1117
 
1118
  /* Finish conversion to a C comment, if necessary.  */
1119
  if (pfile->state.in_directive && type == '/')
1120
    {
1121
      buffer[1] = '*';
1122
      buffer[clen - 2] = '*';
1123
      buffer[clen - 1] = '/';
1124
    }
1125
 
1126
  /* Finally store this comment for use by clients of libcpp. */
1127
  store_comment (pfile, token);
1128
}
1129
 
1130
/* Allocate COUNT tokens for RUN.  */
1131
void
1132
_cpp_init_tokenrun (tokenrun *run, unsigned int count)
1133
{
1134
  run->base = XNEWVEC (cpp_token, count);
1135
  run->limit = run->base + count;
1136
  run->next = NULL;
1137
}
1138
 
1139
/* Returns the next tokenrun, or creates one if there is none.  */
1140
static tokenrun *
1141
next_tokenrun (tokenrun *run)
1142
{
1143
  if (run->next == NULL)
1144
    {
1145
      run->next = XNEW (tokenrun);
1146
      run->next->prev = run;
1147
      _cpp_init_tokenrun (run->next, 250);
1148
    }
1149
 
1150
  return run->next;
1151
}
1152
 
1153
/* Look ahead in the input stream.  */
1154
const cpp_token *
1155
cpp_peek_token (cpp_reader *pfile, int index)
1156
{
1157
  cpp_context *context = pfile->context;
1158
  const cpp_token *peektok;
1159
  int count;
1160
 
1161
  /* First, scan through any pending cpp_context objects.  */
1162
  while (context->prev)
1163
    {
1164
      ptrdiff_t sz = (context->direct_p
1165
                      ? LAST (context).token - FIRST (context).token
1166
                      : LAST (context).ptoken - FIRST (context).ptoken);
1167
 
1168
      if (index < (int) sz)
1169
        return (context->direct_p
1170
                ? FIRST (context).token + index
1171
                : *(FIRST (context).ptoken + index));
1172
 
1173
      index -= (int) sz;
1174
      context = context->prev;
1175
    }
1176
 
1177
  /* We will have to read some new tokens after all (and do so
1178
     without invalidating preceding tokens).  */
1179
  count = index;
1180
  pfile->keep_tokens++;
1181
 
1182
  do
1183
    {
1184
      peektok = _cpp_lex_token (pfile);
1185
      if (peektok->type == CPP_EOF)
1186
        return peektok;
1187
    }
1188
  while (index--);
1189
 
1190
  _cpp_backup_tokens_direct (pfile, count + 1);
1191
  pfile->keep_tokens--;
1192
 
1193
  return peektok;
1194
}
1195
 
1196
/* Allocate a single token that is invalidated at the same time as the
1197
   rest of the tokens on the line.  Has its line and col set to the
1198
   same as the last lexed token, so that diagnostics appear in the
1199
   right place.  */
1200
cpp_token *
1201
_cpp_temp_token (cpp_reader *pfile)
1202
{
1203
  cpp_token *old, *result;
1204
  ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1205
  ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1206
 
1207
  old = pfile->cur_token - 1;
1208
  /* Any pre-existing lookaheads must not be clobbered.  */
1209
  if (la)
1210
    {
1211
      if (sz <= la)
1212
        {
1213
          tokenrun *next = next_tokenrun (pfile->cur_run);
1214
 
1215
          if (sz < la)
1216
            memmove (next->base + 1, next->base,
1217
                     (la - sz) * sizeof (cpp_token));
1218
 
1219
          next->base[0] = pfile->cur_run->limit[-1];
1220
        }
1221
 
1222
      if (sz > 1)
1223
        memmove (pfile->cur_token + 1, pfile->cur_token,
1224
                 MIN (la, sz - 1) * sizeof (cpp_token));
1225
    }
1226
 
1227
  if (!sz && pfile->cur_token == pfile->cur_run->limit)
1228
    {
1229
      pfile->cur_run = next_tokenrun (pfile->cur_run);
1230
      pfile->cur_token = pfile->cur_run->base;
1231
    }
1232
 
1233
  result = pfile->cur_token++;
1234
  result->src_loc = old->src_loc;
1235
  return result;
1236
}
1237
 
1238
/* Lex a token into RESULT (external interface).  Takes care of issues
1239
   like directive handling, token lookahead, multiple include
1240
   optimization and skipping.  */
1241
const cpp_token *
1242
_cpp_lex_token (cpp_reader *pfile)
1243
{
1244
  cpp_token *result;
1245
 
1246
  for (;;)
1247
    {
1248
      if (pfile->cur_token == pfile->cur_run->limit)
1249
        {
1250
          pfile->cur_run = next_tokenrun (pfile->cur_run);
1251
          pfile->cur_token = pfile->cur_run->base;
1252
        }
1253
      /* We assume that the current token is somewhere in the current
1254
         run.  */
1255
      if (pfile->cur_token < pfile->cur_run->base
1256
          || pfile->cur_token >= pfile->cur_run->limit)
1257
        abort ();
1258
 
1259
      if (pfile->lookaheads)
1260
        {
1261
          pfile->lookaheads--;
1262
          result = pfile->cur_token++;
1263
        }
1264
      else
1265
        result = _cpp_lex_direct (pfile);
1266
 
1267
      if (result->flags & BOL)
1268
        {
1269
          /* Is this a directive.  If _cpp_handle_directive returns
1270
             false, it is an assembler #.  */
1271
          if (result->type == CPP_HASH
1272
              /* 6.10.3 p 11: Directives in a list of macro arguments
1273
                 gives undefined behavior.  This implementation
1274
                 handles the directive as normal.  */
1275
              && pfile->state.parsing_args != 1)
1276
            {
1277
              if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1278
                {
1279
                  if (pfile->directive_result.type == CPP_PADDING)
1280
                    continue;
1281
                  result = &pfile->directive_result;
1282
                }
1283
            }
1284
          else if (pfile->state.in_deferred_pragma)
1285
            result = &pfile->directive_result;
1286
 
1287
          if (pfile->cb.line_change && !pfile->state.skipping)
1288
            pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1289
        }
1290
 
1291
      /* We don't skip tokens in directives.  */
1292
      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1293
        break;
1294
 
1295
      /* Outside a directive, invalidate controlling macros.  At file
1296
         EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1297
         get here and MI optimization works.  */
1298
      pfile->mi_valid = false;
1299
 
1300
      if (!pfile->state.skipping || result->type == CPP_EOF)
1301
        break;
1302
    }
1303
 
1304
  return result;
1305
}
1306
 
1307
/* Returns true if a fresh line has been loaded.  */
1308
bool
1309
_cpp_get_fresh_line (cpp_reader *pfile)
1310
{
1311
  int return_at_eof;
1312
 
1313
  /* We can't get a new line until we leave the current directive.  */
1314
  if (pfile->state.in_directive)
1315
    return false;
1316
 
1317
  for (;;)
1318
    {
1319
      cpp_buffer *buffer = pfile->buffer;
1320
 
1321
      if (!buffer->need_line)
1322
        return true;
1323
 
1324
      if (buffer->next_line < buffer->rlimit)
1325
        {
1326
          _cpp_clean_line (pfile);
1327
          return true;
1328
        }
1329
 
1330
      /* First, get out of parsing arguments state.  */
1331
      if (pfile->state.parsing_args)
1332
        return false;
1333
 
1334
      /* End of buffer.  Non-empty files should end in a newline.  */
1335
      if (buffer->buf != buffer->rlimit
1336
          && buffer->next_line > buffer->rlimit
1337
          && !buffer->from_stage3)
1338
        {
1339
          /* Clip to buffer size.  */
1340
          buffer->next_line = buffer->rlimit;
1341
        }
1342
 
1343
      return_at_eof = buffer->return_at_eof;
1344
      _cpp_pop_buffer (pfile);
1345
      if (pfile->buffer == NULL || return_at_eof)
1346
        return false;
1347
    }
1348
}
1349
 
1350
#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
1351
  do                                                    \
1352
    {                                                   \
1353
      result->type = ELSE_TYPE;                         \
1354
      if (*buffer->cur == CHAR)                         \
1355
        buffer->cur++, result->type = THEN_TYPE;        \
1356
    }                                                   \
1357
  while (0)
1358
 
1359
/* Lex a token into pfile->cur_token, which is also incremented, to
1360
   get diagnostics pointing to the correct location.
1361
 
1362
   Does not handle issues such as token lookahead, multiple-include
1363
   optimization, directives, skipping etc.  This function is only
1364
   suitable for use by _cpp_lex_token, and in special cases like
1365
   lex_expansion_token which doesn't care for any of these issues.
1366
 
1367
   When meeting a newline, returns CPP_EOF if parsing a directive,
1368
   otherwise returns to the start of the token buffer if permissible.
1369
   Returns the location of the lexed token.  */
1370
cpp_token *
1371
_cpp_lex_direct (cpp_reader *pfile)
1372
{
1373
  cppchar_t c;
1374
  cpp_buffer *buffer;
1375
  const unsigned char *comment_start;
1376
  cpp_token *result = pfile->cur_token++;
1377
 
1378
 fresh_line:
1379
  result->flags = 0;
1380
  buffer = pfile->buffer;
1381
  if (buffer->need_line)
1382
    {
1383
      if (pfile->state.in_deferred_pragma)
1384
        {
1385
          result->type = CPP_PRAGMA_EOL;
1386
          pfile->state.in_deferred_pragma = false;
1387
          if (!pfile->state.pragma_allow_expansion)
1388
            pfile->state.prevent_expansion--;
1389
          return result;
1390
        }
1391
      if (!_cpp_get_fresh_line (pfile))
1392
        {
1393
          result->type = CPP_EOF;
1394
          if (!pfile->state.in_directive)
1395
            {
1396
              /* Tell the compiler the line number of the EOF token.  */
1397
              result->src_loc = pfile->line_table->highest_line;
1398
              result->flags = BOL;
1399
            }
1400
          return result;
1401
        }
1402
      if (!pfile->keep_tokens)
1403
        {
1404
          pfile->cur_run = &pfile->base_run;
1405
          result = pfile->base_run.base;
1406
          pfile->cur_token = result + 1;
1407
        }
1408
      result->flags = BOL;
1409
      if (pfile->state.parsing_args == 2)
1410
        result->flags |= PREV_WHITE;
1411
    }
1412
  buffer = pfile->buffer;
1413
 update_tokens_line:
1414
  result->src_loc = pfile->line_table->highest_line;
1415
 
1416
 skipped_white:
1417
  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1418
      && !pfile->overlaid_buffer)
1419
    {
1420
      _cpp_process_line_notes (pfile, false);
1421
      result->src_loc = pfile->line_table->highest_line;
1422
    }
1423
  c = *buffer->cur++;
1424
 
1425
  LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1426
                               CPP_BUF_COLUMN (buffer, buffer->cur));
1427
 
1428
  switch (c)
1429
    {
1430
    case ' ': case '\t': case '\f': case '\v': case '\0':
1431
      result->flags |= PREV_WHITE;
1432
      skip_whitespace (pfile, c);
1433
      goto skipped_white;
1434
 
1435
    case '\n':
1436
      if (buffer->cur < buffer->rlimit)
1437
        CPP_INCREMENT_LINE (pfile, 0);
1438
      buffer->need_line = true;
1439
      goto fresh_line;
1440
 
1441
    case '0': case '1': case '2': case '3': case '4':
1442
    case '5': case '6': case '7': case '8': case '9':
1443
      {
1444
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1445
        result->type = CPP_NUMBER;
1446
        lex_number (pfile, &result->val.str, &nst);
1447
        warn_about_normalization (pfile, result, &nst);
1448
        break;
1449
      }
1450
 
1451
    case 'L':
1452
    case 'u':
1453
    case 'U':
1454
    case 'R':
1455
      /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
1456
         wide strings or raw strings.  */
1457
      if (c == 'L' || CPP_OPTION (pfile, uliterals))
1458
        {
1459
          if ((*buffer->cur == '\'' && c != 'R')
1460
              || *buffer->cur == '"'
1461
              || (*buffer->cur == 'R'
1462
                  && c != 'R'
1463
                  && buffer->cur[1] == '"'
1464
                  && CPP_OPTION (pfile, uliterals))
1465
              || (*buffer->cur == '8'
1466
                  && c == 'u'
1467
                  && (buffer->cur[1] == '"'
1468
                      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
1469
            {
1470
              lex_string (pfile, result, buffer->cur - 1);
1471
              break;
1472
            }
1473
        }
1474
      /* Fall through.  */
1475
 
1476
    case '_':
1477
    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1478
    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1479
    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1480
    case 's': case 't':           case 'v': case 'w': case 'x':
1481
    case 'y': case 'z':
1482
    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1483
    case 'G': case 'H': case 'I': case 'J': case 'K':
1484
    case 'M': case 'N': case 'O': case 'P': case 'Q':
1485
    case 'S': case 'T':           case 'V': case 'W': case 'X':
1486
    case 'Y': case 'Z':
1487
      result->type = CPP_NAME;
1488
      {
1489
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1490
        result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
1491
                                                &nst);
1492
        warn_about_normalization (pfile, result, &nst);
1493
      }
1494
 
1495
      /* Convert named operators to their proper types.  */
1496
      if (result->val.node.node->flags & NODE_OPERATOR)
1497
        {
1498
          result->flags |= NAMED_OP;
1499
          result->type = (enum cpp_ttype) result->val.node.node->directive_index;
1500
        }
1501
      break;
1502
 
1503
    case '\'':
1504
    case '"':
1505
      lex_string (pfile, result, buffer->cur - 1);
1506
      break;
1507
 
1508
    case '/':
1509
      /* A potential block or line comment.  */
1510
      comment_start = buffer->cur;
1511
      c = *buffer->cur;
1512
 
1513
      if (c == '*')
1514
        {
1515
          if (_cpp_skip_block_comment (pfile))
1516
            cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1517
        }
1518
      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1519
                            || cpp_in_system_header (pfile)))
1520
        {
1521
          /* Warn about comments only if pedantically GNUC89, and not
1522
             in system headers.  */
1523
          if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1524
              && ! buffer->warned_cplusplus_comments)
1525
            {
1526
              cpp_error (pfile, CPP_DL_PEDWARN,
1527
                         "C++ style comments are not allowed in ISO C90");
1528
              cpp_error (pfile, CPP_DL_PEDWARN,
1529
                         "(this will be reported only once per input file)");
1530
              buffer->warned_cplusplus_comments = 1;
1531
            }
1532
 
1533
          if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1534
            cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1535
        }
1536
      else if (c == '=')
1537
        {
1538
          buffer->cur++;
1539
          result->type = CPP_DIV_EQ;
1540
          break;
1541
        }
1542
      else
1543
        {
1544
          result->type = CPP_DIV;
1545
          break;
1546
        }
1547
 
1548
      if (!pfile->state.save_comments)
1549
        {
1550
          result->flags |= PREV_WHITE;
1551
          goto update_tokens_line;
1552
        }
1553
 
1554
      /* Save the comment as a token in its own right.  */
1555
      save_comment (pfile, result, comment_start, c);
1556
      break;
1557
 
1558
    case '<':
1559
      if (pfile->state.angled_headers)
1560
        {
1561
          lex_string (pfile, result, buffer->cur - 1);
1562
          if (result->type != CPP_LESS)
1563
            break;
1564
        }
1565
 
1566
      result->type = CPP_LESS;
1567
      if (*buffer->cur == '=')
1568
        buffer->cur++, result->type = CPP_LESS_EQ;
1569
      else if (*buffer->cur == '<')
1570
        {
1571
          buffer->cur++;
1572
          IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1573
        }
1574
      else if (CPP_OPTION (pfile, digraphs))
1575
        {
1576
          if (*buffer->cur == ':')
1577
            {
1578
              buffer->cur++;
1579
              result->flags |= DIGRAPH;
1580
              result->type = CPP_OPEN_SQUARE;
1581
            }
1582
          else if (*buffer->cur == '%')
1583
            {
1584
              buffer->cur++;
1585
              result->flags |= DIGRAPH;
1586
              result->type = CPP_OPEN_BRACE;
1587
            }
1588
        }
1589
      break;
1590
 
1591
    case '>':
1592
      result->type = CPP_GREATER;
1593
      if (*buffer->cur == '=')
1594
        buffer->cur++, result->type = CPP_GREATER_EQ;
1595
      else if (*buffer->cur == '>')
1596
        {
1597
          buffer->cur++;
1598
          IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1599
        }
1600
      break;
1601
 
1602
    case '%':
1603
      result->type = CPP_MOD;
1604
      if (*buffer->cur == '=')
1605
        buffer->cur++, result->type = CPP_MOD_EQ;
1606
      else if (CPP_OPTION (pfile, digraphs))
1607
        {
1608
          if (*buffer->cur == ':')
1609
            {
1610
              buffer->cur++;
1611
              result->flags |= DIGRAPH;
1612
              result->type = CPP_HASH;
1613
              if (*buffer->cur == '%' && buffer->cur[1] == ':')
1614
                buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
1615
            }
1616
          else if (*buffer->cur == '>')
1617
            {
1618
              buffer->cur++;
1619
              result->flags |= DIGRAPH;
1620
              result->type = CPP_CLOSE_BRACE;
1621
            }
1622
        }
1623
      break;
1624
 
1625
    case '.':
1626
      result->type = CPP_DOT;
1627
      if (ISDIGIT (*buffer->cur))
1628
        {
1629
          struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1630
          result->type = CPP_NUMBER;
1631
          lex_number (pfile, &result->val.str, &nst);
1632
          warn_about_normalization (pfile, result, &nst);
1633
        }
1634
      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1635
        buffer->cur += 2, result->type = CPP_ELLIPSIS;
1636
      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1637
        buffer->cur++, result->type = CPP_DOT_STAR;
1638
      break;
1639
 
1640
    case '+':
1641
      result->type = CPP_PLUS;
1642
      if (*buffer->cur == '+')
1643
        buffer->cur++, result->type = CPP_PLUS_PLUS;
1644
      else if (*buffer->cur == '=')
1645
        buffer->cur++, result->type = CPP_PLUS_EQ;
1646
      break;
1647
 
1648
    case '-':
1649
      result->type = CPP_MINUS;
1650
      if (*buffer->cur == '>')
1651
        {
1652
          buffer->cur++;
1653
          result->type = CPP_DEREF;
1654
          if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1655
            buffer->cur++, result->type = CPP_DEREF_STAR;
1656
        }
1657
      else if (*buffer->cur == '-')
1658
        buffer->cur++, result->type = CPP_MINUS_MINUS;
1659
      else if (*buffer->cur == '=')
1660
        buffer->cur++, result->type = CPP_MINUS_EQ;
1661
      break;
1662
 
1663
    case '&':
1664
      result->type = CPP_AND;
1665
      if (*buffer->cur == '&')
1666
        buffer->cur++, result->type = CPP_AND_AND;
1667
      else if (*buffer->cur == '=')
1668
        buffer->cur++, result->type = CPP_AND_EQ;
1669
      break;
1670
 
1671
    case '|':
1672
      result->type = CPP_OR;
1673
      if (*buffer->cur == '|')
1674
        buffer->cur++, result->type = CPP_OR_OR;
1675
      else if (*buffer->cur == '=')
1676
        buffer->cur++, result->type = CPP_OR_EQ;
1677
      break;
1678
 
1679
    case ':':
1680
      result->type = CPP_COLON;
1681
      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1682
        buffer->cur++, result->type = CPP_SCOPE;
1683
      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1684
        {
1685
          buffer->cur++;
1686
          result->flags |= DIGRAPH;
1687
          result->type = CPP_CLOSE_SQUARE;
1688
        }
1689
      break;
1690
 
1691
    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1692
    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1693
    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1694
    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1695
    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
1696
 
1697
    case '?': result->type = CPP_QUERY; break;
1698
    case '~': result->type = CPP_COMPL; break;
1699
    case ',': result->type = CPP_COMMA; break;
1700
    case '(': result->type = CPP_OPEN_PAREN; break;
1701
    case ')': result->type = CPP_CLOSE_PAREN; break;
1702
    case '[': result->type = CPP_OPEN_SQUARE; break;
1703
    case ']': result->type = CPP_CLOSE_SQUARE; break;
1704
    case '{': result->type = CPP_OPEN_BRACE; break;
1705
    case '}': result->type = CPP_CLOSE_BRACE; break;
1706
    case ';': result->type = CPP_SEMICOLON; break;
1707
 
1708
      /* @ is a punctuator in Objective-C.  */
1709
    case '@': result->type = CPP_ATSIGN; break;
1710
 
1711
    case '$':
1712
    case '\\':
1713
      {
1714
        const uchar *base = --buffer->cur;
1715
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1716
 
1717
        if (forms_identifier_p (pfile, true, &nst))
1718
          {
1719
            result->type = CPP_NAME;
1720
            result->val.node.node = lex_identifier (pfile, base, true, &nst);
1721
            warn_about_normalization (pfile, result, &nst);
1722
            break;
1723
          }
1724
        buffer->cur++;
1725
      }
1726
 
1727
    default:
1728
      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1729
      break;
1730
    }
1731
 
1732
  return result;
1733
}
1734
 
1735
/* An upper bound on the number of bytes needed to spell TOKEN.
1736
   Does not include preceding whitespace.  */
1737
unsigned int
1738
cpp_token_len (const cpp_token *token)
1739
{
1740
  unsigned int len;
1741
 
1742
  switch (TOKEN_SPELL (token))
1743
    {
1744
    default:            len = 6;                                break;
1745
    case SPELL_LITERAL: len = token->val.str.len;               break;
1746
    case SPELL_IDENT:   len = NODE_LEN (token->val.node.node) * 10;     break;
1747
    }
1748
 
1749
  return len;
1750
}
1751
 
1752
/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1753
   Return the number of bytes read out of NAME.  (There are always
1754
   10 bytes written to BUFFER.)  */
1755
 
1756
static size_t
1757
utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1758
{
1759
  int j;
1760
  int ucn_len = 0;
1761
  int ucn_len_c;
1762
  unsigned t;
1763
  unsigned long utf32;
1764
 
1765
  /* Compute the length of the UTF-8 sequence.  */
1766
  for (t = *name; t & 0x80; t <<= 1)
1767
    ucn_len++;
1768
 
1769
  utf32 = *name & (0x7F >> ucn_len);
1770
  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1771
    {
1772
      utf32 = (utf32 << 6) | (*++name & 0x3F);
1773
 
1774
      /* Ill-formed UTF-8.  */
1775
      if ((*name & ~0x3F) != 0x80)
1776
        abort ();
1777
    }
1778
 
1779
  *buffer++ = '\\';
1780
  *buffer++ = 'U';
1781
  for (j = 7; j >= 0; j--)
1782
    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1783
  return ucn_len;
1784
}
1785
 
1786
/* Given a token TYPE corresponding to a digraph, return a pointer to
1787
   the spelling of the digraph.  */
1788
static const unsigned char *
1789
cpp_digraph2name (enum cpp_ttype type)
1790
{
1791
  return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
1792
}
1793
 
1794
/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1795
   already contain the enough space to hold the token's spelling.
1796
   Returns a pointer to the character after the last character written.
1797
   FORSTRING is true if this is to be the spelling after translation
1798
   phase 1 (this is different for UCNs).
1799
   FIXME: Would be nice if we didn't need the PFILE argument.  */
1800
unsigned char *
1801
cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1802
                 unsigned char *buffer, bool forstring)
1803
{
1804
  switch (TOKEN_SPELL (token))
1805
    {
1806
    case SPELL_OPERATOR:
1807
      {
1808
        const unsigned char *spelling;
1809
        unsigned char c;
1810
 
1811
        if (token->flags & DIGRAPH)
1812
          spelling = cpp_digraph2name (token->type);
1813
        else if (token->flags & NAMED_OP)
1814
          goto spell_ident;
1815
        else
1816
          spelling = TOKEN_NAME (token);
1817
 
1818
        while ((c = *spelling++) != '\0')
1819
          *buffer++ = c;
1820
      }
1821
      break;
1822
 
1823
    spell_ident:
1824
    case SPELL_IDENT:
1825
      if (forstring)
1826
        {
1827
          memcpy (buffer, NODE_NAME (token->val.node.node),
1828
                  NODE_LEN (token->val.node.node));
1829
          buffer += NODE_LEN (token->val.node.node);
1830
        }
1831
      else
1832
        {
1833
          size_t i;
1834
          const unsigned char * name = NODE_NAME (token->val.node.node);
1835
 
1836
          for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1837
            if (name[i] & ~0x7F)
1838
              {
1839
                i += utf8_to_ucn (buffer, name + i) - 1;
1840
                buffer += 10;
1841
              }
1842
            else
1843
              *buffer++ = NODE_NAME (token->val.node.node)[i];
1844
        }
1845
      break;
1846
 
1847
    case SPELL_LITERAL:
1848
      memcpy (buffer, token->val.str.text, token->val.str.len);
1849
      buffer += token->val.str.len;
1850
      break;
1851
 
1852
    case SPELL_NONE:
1853
      cpp_error (pfile, CPP_DL_ICE,
1854
                 "unspellable token %s", TOKEN_NAME (token));
1855
      break;
1856
    }
1857
 
1858
  return buffer;
1859
}
1860
 
1861
/* Returns TOKEN spelt as a null-terminated string.  The string is
1862
   freed when the reader is destroyed.  Useful for diagnostics.  */
1863
unsigned char *
1864
cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1865
{
1866
  unsigned int len = cpp_token_len (token) + 1;
1867
  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1868
 
1869
  end = cpp_spell_token (pfile, token, start, false);
1870
  end[0] = '\0';
1871
 
1872
  return start;
1873
}
1874
 
1875
/* Returns a pointer to a string which spells the token defined by
1876
   TYPE and FLAGS.  Used by C front ends, which really should move to
1877
   using cpp_token_as_text.  */
1878
const char *
1879
cpp_type2name (enum cpp_ttype type, unsigned char flags)
1880
{
1881
  if (flags & DIGRAPH)
1882
    return (const char *) cpp_digraph2name (type);
1883
  else if (flags & NAMED_OP)
1884
    return cpp_named_operator2name (type);
1885
 
1886
  return (const char *) token_spellings[type].name;
1887
}
1888
 
1889
/* Writes the spelling of token to FP, without any preceding space.
1890
   Separated from cpp_spell_token for efficiency - to avoid stdio
1891
   double-buffering.  */
1892
void
1893
cpp_output_token (const cpp_token *token, FILE *fp)
1894
{
1895
  switch (TOKEN_SPELL (token))
1896
    {
1897
    case SPELL_OPERATOR:
1898
      {
1899
        const unsigned char *spelling;
1900
        int c;
1901
 
1902
        if (token->flags & DIGRAPH)
1903
          spelling = cpp_digraph2name (token->type);
1904
        else if (token->flags & NAMED_OP)
1905
          goto spell_ident;
1906
        else
1907
          spelling = TOKEN_NAME (token);
1908
 
1909
        c = *spelling;
1910
        do
1911
          putc (c, fp);
1912
        while ((c = *++spelling) != '\0');
1913
      }
1914
      break;
1915
 
1916
    spell_ident:
1917
    case SPELL_IDENT:
1918
      {
1919
        size_t i;
1920
        const unsigned char * name = NODE_NAME (token->val.node.node);
1921
 
1922
        for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1923
          if (name[i] & ~0x7F)
1924
            {
1925
              unsigned char buffer[10];
1926
              i += utf8_to_ucn (buffer, name + i) - 1;
1927
              fwrite (buffer, 1, 10, fp);
1928
            }
1929
          else
1930
            fputc (NODE_NAME (token->val.node.node)[i], fp);
1931
      }
1932
      break;
1933
 
1934
    case SPELL_LITERAL:
1935
      fwrite (token->val.str.text, 1, token->val.str.len, fp);
1936
      break;
1937
 
1938
    case SPELL_NONE:
1939
      /* An error, most probably.  */
1940
      break;
1941
    }
1942
}
1943
 
1944
/* Compare two tokens.  */
1945
int
1946
_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1947
{
1948
  if (a->type == b->type && a->flags == b->flags)
1949
    switch (TOKEN_SPELL (a))
1950
      {
1951
      default:                  /* Keep compiler happy.  */
1952
      case SPELL_OPERATOR:
1953
        /* token_no is used to track where multiple consecutive ##
1954
           tokens were originally located.  */
1955
        return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
1956
      case SPELL_NONE:
1957
        return (a->type != CPP_MACRO_ARG
1958
                || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
1959
      case SPELL_IDENT:
1960
        return a->val.node.node == b->val.node.node;
1961
      case SPELL_LITERAL:
1962
        return (a->val.str.len == b->val.str.len
1963
                && !memcmp (a->val.str.text, b->val.str.text,
1964
                            a->val.str.len));
1965
      }
1966
 
1967
  return 0;
1968
}
1969
 
1970
/* Returns nonzero if a space should be inserted to avoid an
1971
   accidental token paste for output.  For simplicity, it is
1972
   conservative, and occasionally advises a space where one is not
1973
   needed, e.g. "." and ".2".  */
1974
int
1975
cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1976
                 const cpp_token *token2)
1977
{
1978
  enum cpp_ttype a = token1->type, b = token2->type;
1979
  cppchar_t c;
1980
 
1981
  if (token1->flags & NAMED_OP)
1982
    a = CPP_NAME;
1983
  if (token2->flags & NAMED_OP)
1984
    b = CPP_NAME;
1985
 
1986
  c = EOF;
1987
  if (token2->flags & DIGRAPH)
1988
    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1989
  else if (token_spellings[b].category == SPELL_OPERATOR)
1990
    c = token_spellings[b].name[0];
1991
 
1992
  /* Quickly get everything that can paste with an '='.  */
1993
  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1994
    return 1;
1995
 
1996
  switch (a)
1997
    {
1998
    case CPP_GREATER:   return c == '>';
1999
    case CPP_LESS:      return c == '<' || c == '%' || c == ':';
2000
    case CPP_PLUS:      return c == '+';
2001
    case CPP_MINUS:     return c == '-' || c == '>';
2002
    case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
2003
    case CPP_MOD:       return c == ':' || c == '>';
2004
    case CPP_AND:       return c == '&';
2005
    case CPP_OR:        return c == '|';
2006
    case CPP_COLON:     return c == ':' || c == '>';
2007
    case CPP_DEREF:     return c == '*';
2008
    case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
2009
    case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
2010
    case CPP_NAME:      return ((b == CPP_NUMBER
2011
                                 && name_p (pfile, &token2->val.str))
2012
                                || b == CPP_NAME
2013
                                || b == CPP_CHAR || b == CPP_STRING); /* L */
2014
    case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
2015
                                || c == '.' || c == '+' || c == '-');
2016
                                      /* UCNs */
2017
    case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
2018
                                 && b == CPP_NAME)
2019
                                || (CPP_OPTION (pfile, objc)
2020
                                    && token1->val.str.text[0] == '@'
2021
                                    && (b == CPP_NAME || b == CPP_STRING)));
2022
    default:            break;
2023
    }
2024
 
2025
  return 0;
2026
}
2027
 
2028
/* Output all the remaining tokens on the current line, and a newline
2029
   character, to FP.  Leading whitespace is removed.  If there are
2030
   macros, special token padding is not performed.  */
2031
void
2032
cpp_output_line (cpp_reader *pfile, FILE *fp)
2033
{
2034
  const cpp_token *token;
2035
 
2036
  token = cpp_get_token (pfile);
2037
  while (token->type != CPP_EOF)
2038
    {
2039
      cpp_output_token (token, fp);
2040
      token = cpp_get_token (pfile);
2041
      if (token->flags & PREV_WHITE)
2042
        putc (' ', fp);
2043
    }
2044
 
2045
  putc ('\n', fp);
2046
}
2047
 
2048
/* Return a string representation of all the remaining tokens on the
2049
   current line.  The result is allocated using xmalloc and must be
2050
   freed by the caller.  */
2051
unsigned char *
2052
cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2053
{
2054
  const cpp_token *token;
2055
  unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2056
  unsigned int alloced = 120 + out;
2057
  unsigned char *result = (unsigned char *) xmalloc (alloced);
2058
 
2059
  /* If DIR_NAME is empty, there are no initial contents.  */
2060
  if (dir_name)
2061
    {
2062
      sprintf ((char *) result, "#%s ", dir_name);
2063
      out += 2;
2064
    }
2065
 
2066
  token = cpp_get_token (pfile);
2067
  while (token->type != CPP_EOF)
2068
    {
2069
      unsigned char *last;
2070
      /* Include room for a possible space and the terminating nul.  */
2071
      unsigned int len = cpp_token_len (token) + 2;
2072
 
2073
      if (out + len > alloced)
2074
        {
2075
          alloced *= 2;
2076
          if (out + len > alloced)
2077
            alloced = out + len;
2078
          result = (unsigned char *) xrealloc (result, alloced);
2079
        }
2080
 
2081
      last = cpp_spell_token (pfile, token, &result[out], 0);
2082
      out = last - result;
2083
 
2084
      token = cpp_get_token (pfile);
2085
      if (token->flags & PREV_WHITE)
2086
        result[out++] = ' ';
2087
    }
2088
 
2089
  result[out] = '\0';
2090
  return result;
2091
}
2092
 
2093
/* Memory buffers.  Changing these three constants can have a dramatic
2094
   effect on performance.  The values here are reasonable defaults,
2095
   but might be tuned.  If you adjust them, be sure to test across a
2096
   range of uses of cpplib, including heavy nested function-like macro
2097
   expansion.  Also check the change in peak memory usage (NJAMD is a
2098
   good tool for this).  */
2099
#define MIN_BUFF_SIZE 8000
2100
#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2101
#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2102
        (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2103
 
2104
#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2105
  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2106
#endif
2107
 
2108
/* Create a new allocation buffer.  Place the control block at the end
2109
   of the buffer, so that buffer overflows will cause immediate chaos.  */
2110
static _cpp_buff *
2111
new_buff (size_t len)
2112
{
2113
  _cpp_buff *result;
2114
  unsigned char *base;
2115
 
2116
  if (len < MIN_BUFF_SIZE)
2117
    len = MIN_BUFF_SIZE;
2118
  len = CPP_ALIGN (len);
2119
 
2120
  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2121
  result = (_cpp_buff *) (base + len);
2122
  result->base = base;
2123
  result->cur = base;
2124
  result->limit = base + len;
2125
  result->next = NULL;
2126
  return result;
2127
}
2128
 
2129
/* Place a chain of unwanted allocation buffers on the free list.  */
2130
void
2131
_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2132
{
2133
  _cpp_buff *end = buff;
2134
 
2135
  while (end->next)
2136
    end = end->next;
2137
  end->next = pfile->free_buffs;
2138
  pfile->free_buffs = buff;
2139
}
2140
 
2141
/* Return a free buffer of size at least MIN_SIZE.  */
2142
_cpp_buff *
2143
_cpp_get_buff (cpp_reader *pfile, size_t min_size)
2144
{
2145
  _cpp_buff *result, **p;
2146
 
2147
  for (p = &pfile->free_buffs;; p = &(*p)->next)
2148
    {
2149
      size_t size;
2150
 
2151
      if (*p == NULL)
2152
        return new_buff (min_size);
2153
      result = *p;
2154
      size = result->limit - result->base;
2155
      /* Return a buffer that's big enough, but don't waste one that's
2156
         way too big.  */
2157
      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2158
        break;
2159
    }
2160
 
2161
  *p = result->next;
2162
  result->next = NULL;
2163
  result->cur = result->base;
2164
  return result;
2165
}
2166
 
2167
/* Creates a new buffer with enough space to hold the uncommitted
2168
   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2169
   the excess bytes to the new buffer.  Chains the new buffer after
2170
   BUFF, and returns the new buffer.  */
2171
_cpp_buff *
2172
_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2173
{
2174
  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2175
  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2176
 
2177
  buff->next = new_buff;
2178
  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2179
  return new_buff;
2180
}
2181
 
2182
/* Creates a new buffer with enough space to hold the uncommitted
2183
   remaining bytes of the buffer pointed to by BUFF, and at least
2184
   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2185
   Chains the new buffer before the buffer pointed to by BUFF, and
2186
   updates the pointer to point to the new buffer.  */
2187
void
2188
_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2189
{
2190
  _cpp_buff *new_buff, *old_buff = *pbuff;
2191
  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2192
 
2193
  new_buff = _cpp_get_buff (pfile, size);
2194
  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2195
  new_buff->next = old_buff;
2196
  *pbuff = new_buff;
2197
}
2198
 
2199
/* Free a chain of buffers starting at BUFF.  */
2200
void
2201
_cpp_free_buff (_cpp_buff *buff)
2202
{
2203
  _cpp_buff *next;
2204
 
2205
  for (; buff; buff = next)
2206
    {
2207
      next = buff->next;
2208
      free (buff->base);
2209
    }
2210
}
2211
 
2212
/* Allocate permanent, unaligned storage of length LEN.  */
2213
unsigned char *
2214
_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2215
{
2216
  _cpp_buff *buff = pfile->u_buff;
2217
  unsigned char *result = buff->cur;
2218
 
2219
  if (len > (size_t) (buff->limit - result))
2220
    {
2221
      buff = _cpp_get_buff (pfile, len);
2222
      buff->next = pfile->u_buff;
2223
      pfile->u_buff = buff;
2224
      result = buff->cur;
2225
    }
2226
 
2227
  buff->cur = result + len;
2228
  return result;
2229
}
2230
 
2231
/* Allocate permanent, unaligned storage of length LEN from a_buff.
2232
   That buffer is used for growing allocations when saving macro
2233
   replacement lists in a #define, and when parsing an answer to an
2234
   assertion in #assert, #unassert or #if (and therefore possibly
2235
   whilst expanding macros).  It therefore must not be used by any
2236
   code that they might call: specifically the lexer and the guts of
2237
   the macro expander.
2238
 
2239
   All existing other uses clearly fit this restriction: storing
2240
   registered pragmas during initialization.  */
2241
unsigned char *
2242
_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2243
{
2244
  _cpp_buff *buff = pfile->a_buff;
2245
  unsigned char *result = buff->cur;
2246
 
2247
  if (len > (size_t) (buff->limit - result))
2248
    {
2249
      buff = _cpp_get_buff (pfile, len);
2250
      buff->next = pfile->a_buff;
2251
      pfile->a_buff = buff;
2252
      result = buff->cur;
2253
    }
2254
 
2255
  buff->cur = result + len;
2256
  return result;
2257
}
2258
 
2259
/* Say which field of TOK is in use.  */
2260
 
2261
enum cpp_token_fld_kind
2262
cpp_token_val_index (cpp_token *tok)
2263
{
2264
  switch (TOKEN_SPELL (tok))
2265
    {
2266
    case SPELL_IDENT:
2267
      return CPP_TOKEN_FLD_NODE;
2268
    case SPELL_LITERAL:
2269
      return CPP_TOKEN_FLD_STR;
2270
    case SPELL_OPERATOR:
2271
      if (tok->type == CPP_PASTE)
2272
        return CPP_TOKEN_FLD_TOKEN_NO;
2273
      else
2274
        return CPP_TOKEN_FLD_NONE;
2275
    case SPELL_NONE:
2276
      if (tok->type == CPP_MACRO_ARG)
2277
        return CPP_TOKEN_FLD_ARG_NO;
2278
      else if (tok->type == CPP_PADDING)
2279
        return CPP_TOKEN_FLD_SOURCE;
2280
      else if (tok->type == CPP_PRAGMA)
2281
        return CPP_TOKEN_FLD_PRAGMA;
2282
      /* else fall through */
2283
    default:
2284
      return CPP_TOKEN_FLD_NONE;
2285
    }
2286
}

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.