OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libcpp/] [lex.c] - Blame information for rev 730

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 730 jeremybenn
/* CPP Library - lexical analysis.
2
   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010,
3
   2011 Free Software Foundation, Inc.
4
   Contributed by Per Bothner, 1994-95.
5
   Based on CCCP program by Paul Rubin, June 1986
6
   Adapted to ANSI C, Richard Stallman, Jan 1987
7
   Broken out to separate file, Zack Weinberg, Mar 2000
8
 
9
This program is free software; you can redistribute it and/or modify it
10
under the terms of the GNU General Public License as published by the
11
Free Software Foundation; either version 3, or (at your option) any
12
later version.
13
 
14
This program is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
GNU General Public License for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with this program; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "cpplib.h"
26
#include "internal.h"
27
 
28
enum spell_type
29
{
30
  SPELL_OPERATOR = 0,
31
  SPELL_IDENT,
32
  SPELL_LITERAL,
33
  SPELL_NONE
34
};
35
 
36
struct token_spelling
37
{
38
  enum spell_type category;
39
  const unsigned char *name;
40
};
41
 
42
static const unsigned char *const digraph_spellings[] =
43
{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
44
 
45
#define OP(e, s) { SPELL_OPERATOR, UC s  },
46
#define TK(e, s) { SPELL_ ## s,    UC #e },
47
static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
48
#undef OP
49
#undef TK
50
 
51
#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52
#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53
 
54
static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55
static int skip_line_comment (cpp_reader *);
56
static void skip_whitespace (cpp_reader *, cppchar_t);
57
static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58
static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59
static void store_comment (cpp_reader *, cpp_token *);
60
static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61
                            unsigned int, enum cpp_ttype);
62
static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63
static int name_p (cpp_reader *, const cpp_string *);
64
static tokenrun *next_tokenrun (tokenrun *);
65
 
66
static _cpp_buff *new_buff (size_t);
67
 
68
 
69
/* Utility routine:
70
 
71
   Compares, the token TOKEN to the NUL-terminated string STRING.
72
   TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
73
int
74
cpp_ideq (const cpp_token *token, const char *string)
75
{
76
  if (token->type != CPP_NAME)
77
    return 0;
78
 
79
  return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
80
}
81
 
82
/* Record a note TYPE at byte POS into the current cleaned logical
83
   line.  */
84
static void
85
add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86
{
87
  if (buffer->notes_used == buffer->notes_cap)
88
    {
89
      buffer->notes_cap = buffer->notes_cap * 2 + 200;
90
      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91
                                  buffer->notes_cap);
92
    }
93
 
94
  buffer->notes[buffer->notes_used].pos = pos;
95
  buffer->notes[buffer->notes_used].type = type;
96
  buffer->notes_used++;
97
}
98
 
99
 
100
/* Fast path to find line special characters using optimized character
101
   scanning algorithms.  Anything complicated falls back to the slow
102
   path below.  Since this loop is very hot it's worth doing these kinds
103
   of optimizations.
104
 
105
   One of the paths through the ifdefs should provide
106
 
107
     const uchar *search_line_fast (const uchar *s, const uchar *end);
108
 
109
   Between S and END, search for \n, \r, \\, ?.  Return a pointer to
110
   the found character.
111
 
112
   Note that the last character of the buffer is *always* a newline,
113
   as forced by _cpp_convert_input.  This fact can be used to avoid
114
   explicitly looking for the end of the buffer.  */
115
 
116
/* Configure gives us an ifdef test.  */
117
#ifndef WORDS_BIGENDIAN
118
#define WORDS_BIGENDIAN 0
119
#endif
120
 
121
/* We'd like the largest integer that fits into a register.  There's nothing
122
   in <stdint.h> that gives us that.  For most hosts this is unsigned long,
123
   but MS decided on an LLP64 model.  Thankfully when building with GCC we
124
   can get the "real" word size.  */
125
#ifdef __GNUC__
126
typedef unsigned int word_type __attribute__((__mode__(__word__)));
127
#else
128
typedef unsigned long word_type;
129
#endif
130
 
131
/* The code below is only expecting sizes 4 or 8.
132
   Die at compile-time if this expectation is violated.  */
133
typedef char check_word_type_size
134
  [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
135
 
136
/* Return X with the first N bytes forced to values that won't match one
137
   of the interesting characters.  Note that NUL is not interesting.  */
138
 
139
static inline word_type
140
acc_char_mask_misalign (word_type val, unsigned int n)
141
{
142
  word_type mask = -1;
143
  if (WORDS_BIGENDIAN)
144
    mask >>= n * 8;
145
  else
146
    mask <<= n * 8;
147
  return val & mask;
148
}
149
 
150
/* Return X replicated to all byte positions within WORD_TYPE.  */
151
 
152
static inline word_type
153
acc_char_replicate (uchar x)
154
{
155
  word_type ret;
156
 
157
  ret = (x << 24) | (x << 16) | (x << 8) | x;
158
  if (sizeof(word_type) == 8)
159
    ret = (ret << 16 << 16) | ret;
160
  return ret;
161
}
162
 
163
/* Return non-zero if some byte of VAL is (probably) C.  */
164
 
165
static inline word_type
166
acc_char_cmp (word_type val, word_type c)
167
{
168
#if defined(__GNUC__) && defined(__alpha__)
169
  /* We can get exact results using a compare-bytes instruction.
170
     Get (val == c) via (0 >= (val ^ c)).  */
171
  return __builtin_alpha_cmpbge (0, val ^ c);
172
#else
173
  word_type magic = 0x7efefefeU;
174
  if (sizeof(word_type) == 8)
175
    magic = (magic << 16 << 16) | 0xfefefefeU;
176
  magic |= 1;
177
 
178
  val ^= c;
179
  return ((val + magic) ^ ~val) & ~magic;
180
#endif
181
}
182
 
183
/* Given the result of acc_char_cmp is non-zero, return the index of
184
   the found character.  If this was a false positive, return -1.  */
185
 
186
static inline int
187
acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
188
                word_type val ATTRIBUTE_UNUSED)
189
{
190
#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
191
  /* The cmpbge instruction sets *bits* of the result corresponding to
192
     matches in the bytes with no false positives.  */
193
  return __builtin_ctzl (cmp);
194
#else
195
  unsigned int i;
196
 
197
  /* ??? It would be nice to force unrolling here,
198
     and have all of these constants folded.  */
199
  for (i = 0; i < sizeof(word_type); ++i)
200
    {
201
      uchar c;
202
      if (WORDS_BIGENDIAN)
203
        c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
204
      else
205
        c = (val >> i * 8) & 0xff;
206
 
207
      if (c == '\n' || c == '\r' || c == '\\' || c == '?')
208
        return i;
209
    }
210
 
211
  return -1;
212
#endif
213
}
214
 
215
/* A version of the fast scanner using bit fiddling techniques.
216
 
217
   For 32-bit words, one would normally perform 16 comparisons and
218
   16 branches.  With this algorithm one performs 24 arithmetic
219
   operations and one branch.  Whether this is faster with a 32-bit
220
   word size is going to be somewhat system dependent.
221
 
222
   For 64-bit words, we eliminate twice the number of comparisons
223
   and branches without increasing the number of arithmetic operations.
224
   It's almost certainly going to be a win with 64-bit word size.  */
225
 
226
static const uchar * search_line_acc_char (const uchar *, const uchar *)
227
  ATTRIBUTE_UNUSED;
228
 
229
static const uchar *
230
search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
231
{
232
  const word_type repl_nl = acc_char_replicate ('\n');
233
  const word_type repl_cr = acc_char_replicate ('\r');
234
  const word_type repl_bs = acc_char_replicate ('\\');
235
  const word_type repl_qm = acc_char_replicate ('?');
236
 
237
  unsigned int misalign;
238
  const word_type *p;
239
  word_type val, t;
240
 
241
  /* Align the buffer.  Mask out any bytes from before the beginning.  */
242
  p = (word_type *)((uintptr_t)s & -sizeof(word_type));
243
  val = *p;
244
  misalign = (uintptr_t)s & (sizeof(word_type) - 1);
245
  if (misalign)
246
    val = acc_char_mask_misalign (val, misalign);
247
 
248
  /* Main loop.  */
249
  while (1)
250
    {
251
      t  = acc_char_cmp (val, repl_nl);
252
      t |= acc_char_cmp (val, repl_cr);
253
      t |= acc_char_cmp (val, repl_bs);
254
      t |= acc_char_cmp (val, repl_qm);
255
 
256
      if (__builtin_expect (t != 0, 0))
257
        {
258
          int i = acc_char_index (t, val);
259
          if (i >= 0)
260
            return (const uchar *)p + i;
261
        }
262
 
263
      val = *++p;
264
    }
265
}
266
 
267
/* Disable on Solaris 2/x86 until the following problems can be properly
268
   autoconfed:
269
 
270
   The Solaris 8 assembler cannot assemble SSE2/SSE4.2 insns.
271
   The Solaris 9 assembler cannot assemble SSE4.2 insns.
272
   Before Solaris 9 Update 6, SSE insns cannot be executed.
273
   The Solaris 10+ assembler tags objects with the instruction set
274
   extensions used, so SSE4.2 executables cannot run on machines that
275
   don't support that extension.  */
276
 
277
#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
278
 
279
/* Replicated character data to be shared between implementations.
280
   Recall that outside of a context with vector support we can't
281
   define compatible vector types, therefore these are all defined
282
   in terms of raw characters.  */
283
static const char repl_chars[4][16] __attribute__((aligned(16))) = {
284
  { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
285
    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
286
  { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
287
    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
288
  { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
289
    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
290
  { '?', '?', '?', '?', '?', '?', '?', '?',
291
    '?', '?', '?', '?', '?', '?', '?', '?' },
292
};
293
 
294
/* A version of the fast scanner using MMX vectorized byte compare insns.
295
 
296
   This uses the PMOVMSKB instruction which was introduced with "MMX2",
297
   which was packaged into SSE1; it is also present in the AMD MMX
298
   extension.  Mark the function as using "sse" so that we emit a real
299
   "emms" instruction, rather than the 3dNOW "femms" instruction.  */
300
 
301
static const uchar *
302
#ifndef __SSE__
303
__attribute__((__target__("sse")))
304
#endif
305
search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
306
{
307
  typedef char v8qi __attribute__ ((__vector_size__ (8)));
308
  typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
309
 
310
  const v8qi repl_nl = *(const v8qi *)repl_chars[0];
311
  const v8qi repl_cr = *(const v8qi *)repl_chars[1];
312
  const v8qi repl_bs = *(const v8qi *)repl_chars[2];
313
  const v8qi repl_qm = *(const v8qi *)repl_chars[3];
314
 
315
  unsigned int misalign, found, mask;
316
  const v8qi *p;
317
  v8qi data, t, c;
318
 
319
  /* Align the source pointer.  While MMX doesn't generate unaligned data
320
     faults, this allows us to safely scan to the end of the buffer without
321
     reading beyond the end of the last page.  */
322
  misalign = (uintptr_t)s & 7;
323
  p = (const v8qi *)((uintptr_t)s & -8);
324
  data = *p;
325
 
326
  /* Create a mask for the bytes that are valid within the first
327
     16-byte block.  The Idea here is that the AND with the mask
328
     within the loop is "free", since we need some AND or TEST
329
     insn in order to set the flags for the branch anyway.  */
330
  mask = -1u << misalign;
331
 
332
  /* Main loop processing 8 bytes at a time.  */
333
  goto start;
334
  do
335
    {
336
      data = *++p;
337
      mask = -1;
338
 
339
    start:
340
      t = __builtin_ia32_pcmpeqb(data, repl_nl);
341
      c = __builtin_ia32_pcmpeqb(data, repl_cr);
342
      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343
      c = __builtin_ia32_pcmpeqb(data, repl_bs);
344
      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
345
      c = __builtin_ia32_pcmpeqb(data, repl_qm);
346
      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
347
      found = __builtin_ia32_pmovmskb (t);
348
      found &= mask;
349
    }
350
  while (!found);
351
 
352
  __builtin_ia32_emms ();
353
 
354
  /* FOUND contains 1 in bits for which we matched a relevant
355
     character.  Conversion to the byte index is trivial.  */
356
  found = __builtin_ctz(found);
357
  return (const uchar *)p + found;
358
}
359
 
360
/* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
361
 
362
static const uchar *
363
#ifndef __SSE2__
364
__attribute__((__target__("sse2")))
365
#endif
366
search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
367
{
368
  typedef char v16qi __attribute__ ((__vector_size__ (16)));
369
 
370
  const v16qi repl_nl = *(const v16qi *)repl_chars[0];
371
  const v16qi repl_cr = *(const v16qi *)repl_chars[1];
372
  const v16qi repl_bs = *(const v16qi *)repl_chars[2];
373
  const v16qi repl_qm = *(const v16qi *)repl_chars[3];
374
 
375
  unsigned int misalign, found, mask;
376
  const v16qi *p;
377
  v16qi data, t;
378
 
379
  /* Align the source pointer.  */
380
  misalign = (uintptr_t)s & 15;
381
  p = (const v16qi *)((uintptr_t)s & -16);
382
  data = *p;
383
 
384
  /* Create a mask for the bytes that are valid within the first
385
     16-byte block.  The Idea here is that the AND with the mask
386
     within the loop is "free", since we need some AND or TEST
387
     insn in order to set the flags for the branch anyway.  */
388
  mask = -1u << misalign;
389
 
390
  /* Main loop processing 16 bytes at a time.  */
391
  goto start;
392
  do
393
    {
394
      data = *++p;
395
      mask = -1;
396
 
397
    start:
398
      t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
399
      t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
400
      t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
401
      t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
402
      found = __builtin_ia32_pmovmskb128 (t);
403
      found &= mask;
404
    }
405
  while (!found);
406
 
407
  /* FOUND contains 1 in bits for which we matched a relevant
408
     character.  Conversion to the byte index is trivial.  */
409
  found = __builtin_ctz(found);
410
  return (const uchar *)p + found;
411
}
412
 
413
#ifdef HAVE_SSE4
414
/* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
415
 
416
static const uchar *
417
#ifndef __SSE4_2__
418
__attribute__((__target__("sse4.2")))
419
#endif
420
search_line_sse42 (const uchar *s, const uchar *end)
421
{
422
  typedef char v16qi __attribute__ ((__vector_size__ (16)));
423
  static const v16qi search = { '\n', '\r', '?', '\\' };
424
 
425
  uintptr_t si = (uintptr_t)s;
426
  uintptr_t index;
427
 
428
  /* Check for unaligned input.  */
429
  if (si & 15)
430
    {
431
      if (__builtin_expect (end - s < 16, 0)
432
          && __builtin_expect ((si & 0xfff) > 0xff0, 0))
433
        {
434
          /* There are less than 16 bytes left in the buffer, and less
435
             than 16 bytes left on the page.  Reading 16 bytes at this
436
             point might generate a spurious page fault.  Defer to the
437
             SSE2 implementation, which already handles alignment.  */
438
          return search_line_sse2 (s, end);
439
        }
440
 
441
      /* ??? The builtin doesn't understand that the PCMPESTRI read from
442
         memory need not be aligned.  */
443
      __asm ("%vpcmpestri $0, (%1), %2"
444
             : "=c"(index) : "r"(s), "x"(search), "a"(4), "d"(16));
445
      if (__builtin_expect (index < 16, 0))
446
        goto found;
447
 
448
      /* Advance the pointer to an aligned address.  We will re-scan a
449
         few bytes, but we no longer need care for reading past the
450
         end of a page, since we're guaranteed a match.  */
451
      s = (const uchar *)((si + 16) & -16);
452
    }
453
 
454
  /* Main loop, processing 16 bytes at a time.  By doing the whole loop
455
     in inline assembly, we can make proper use of the flags set.  */
456
  __asm (      "sub $16, %1\n"
457
        "       .balign 16\n"
458
        "0:     add $16, %1\n"
459
        "       %vpcmpestri $0, (%1), %2\n"
460
        "       jnc 0b"
461
        : "=&c"(index), "+r"(s)
462
        : "x"(search), "a"(4), "d"(16));
463
 
464
 found:
465
  return s + index;
466
}
467
 
468
#else
469
/* Work around out-dated assemblers without sse4 support.  */
470
#define search_line_sse42 search_line_sse2
471
#endif
472
 
473
/* Check the CPU capabilities.  */
474
 
475
#include "../gcc/config/i386/cpuid.h"
476
 
477
typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
478
static search_line_fast_type search_line_fast;
479
 
480
#define HAVE_init_vectorized_lexer 1
481
static inline void
482
init_vectorized_lexer (void)
483
{
484
  unsigned dummy, ecx = 0, edx = 0;
485
  search_line_fast_type impl = search_line_acc_char;
486
  int minimum = 0;
487
 
488
#if defined(__SSE4_2__)
489
  minimum = 3;
490
#elif defined(__SSE2__)
491
  minimum = 2;
492
#elif defined(__SSE__)
493
  minimum = 1;
494
#endif
495
 
496
  if (minimum == 3)
497
    impl = search_line_sse42;
498
  else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
499
    {
500
      if (minimum == 3 || (ecx & bit_SSE4_2))
501
        impl = search_line_sse42;
502
      else if (minimum == 2 || (edx & bit_SSE2))
503
        impl = search_line_sse2;
504
      else if (minimum == 1 || (edx & bit_SSE))
505
        impl = search_line_mmx;
506
    }
507
  else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
508
    {
509
      if (minimum == 1
510
          || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
511
        impl = search_line_mmx;
512
    }
513
 
514
  search_line_fast = impl;
515
}
516
 
517
#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
518
 
519
/* A vection of the fast scanner using AltiVec vectorized byte compares.  */
520
/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
521
   so we can't compile this function without -maltivec on the command line
522
   (or implied by some other switch).  */
523
 
524
static const uchar *
525
search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
526
{
527
  typedef __attribute__((altivec(vector))) unsigned char vc;
528
 
529
  const vc repl_nl = {
530
    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
531
    '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
532
  };
533
  const vc repl_cr = {
534
    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
535
    '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
536
  };
537
  const vc repl_bs = {
538
    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
539
    '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
540
  };
541
  const vc repl_qm = {
542
    '?', '?', '?', '?', '?', '?', '?', '?',
543
    '?', '?', '?', '?', '?', '?', '?', '?',
544
  };
545
  const vc ones = {
546
    -1, -1, -1, -1, -1, -1, -1, -1,
547
    -1, -1, -1, -1, -1, -1, -1, -1,
548
  };
549
  const vc zero = { 0 };
550
 
551
  vc data, mask, t;
552
 
553
  /* Altivec loads automatically mask addresses with -16.  This lets us
554
     issue the first load as early as possible.  */
555
  data = __builtin_vec_ld(0, (const vc *)s);
556
 
557
  /* Discard bytes before the beginning of the buffer.  Do this by
558
     beginning with all ones and shifting in zeros according to the
559
     mis-alignment.  The LVSR instruction pulls the exact shift we
560
     want from the address.  */
561
  mask = __builtin_vec_lvsr(0, s);
562
  mask = __builtin_vec_perm(zero, ones, mask);
563
  data &= mask;
564
 
565
  /* While altivec loads mask addresses, we still need to align S so
566
     that the offset we compute at the end is correct.  */
567
  s = (const uchar *)((uintptr_t)s & -16);
568
 
569
  /* Main loop processing 16 bytes at a time.  */
570
  goto start;
571
  do
572
    {
573
      vc m_nl, m_cr, m_bs, m_qm;
574
 
575
      s += 16;
576
      data = __builtin_vec_ld(0, (const vc *)s);
577
 
578
    start:
579
      m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
580
      m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
581
      m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
582
      m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
583
      t = (m_nl | m_cr) | (m_bs | m_qm);
584
 
585
      /* T now contains 0xff in bytes for which we matched one of the relevant
586
         characters.  We want to exit the loop if any byte in T is non-zero.
587
         Below is the expansion of vec_any_ne(t, zero).  */
588
    }
589
  while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
590
 
591
  {
592
#define N  (sizeof(vc) / sizeof(long))
593
 
594
    typedef char check_count[(N == 2 || N == 4) * 2 - 1];
595
    union {
596
      vc v;
597
      unsigned long l[N];
598
    } u;
599
    unsigned long l, i = 0;
600
 
601
    u.v = t;
602
 
603
    /* Find the first word of T that is non-zero.  */
604
    switch (N)
605
      {
606
      case 4:
607
        l = u.l[i++];
608
        if (l != 0)
609
          break;
610
        s += sizeof(unsigned long);
611
        l = u.l[i++];
612
        if (l != 0)
613
          break;
614
        s += sizeof(unsigned long);
615
      case 2:
616
        l = u.l[i++];
617
        if (l != 0)
618
          break;
619
        s += sizeof(unsigned long);
620
        l = u.l[i];
621
      }
622
 
623
    /* L now contains 0xff in bytes for which we matched one of the
624
       relevant characters.  We can find the byte index by finding
625
       its bit index and dividing by 8.  */
626
    l = __builtin_clzl(l) >> 3;
627
    return s + l;
628
 
629
#undef N
630
  }
631
}
632
 
633
#else
634
 
635
/* We only have one accellerated alternative.  Use a direct call so that
636
   we encourage inlining.  */
637
 
638
#define search_line_fast  search_line_acc_char
639
 
640
#endif
641
 
642
/* Initialize the lexer if needed.  */
643
 
644
void
645
_cpp_init_lexer (void)
646
{
647
#ifdef HAVE_init_vectorized_lexer
648
  init_vectorized_lexer ();
649
#endif
650
}
651
 
652
/* Returns with a logical line that contains no escaped newlines or
653
   trigraphs.  This is a time-critical inner loop.  */
654
void
655
_cpp_clean_line (cpp_reader *pfile)
656
{
657
  cpp_buffer *buffer;
658
  const uchar *s;
659
  uchar c, *d, *p;
660
 
661
  buffer = pfile->buffer;
662
  buffer->cur_note = buffer->notes_used = 0;
663
  buffer->cur = buffer->line_base = buffer->next_line;
664
  buffer->need_line = false;
665
  s = buffer->next_line;
666
 
667
  if (!buffer->from_stage3)
668
    {
669
      const uchar *pbackslash = NULL;
670
 
671
      /* Fast path.  This is the common case of an un-escaped line with
672
         no trigraphs.  The primary win here is by not writing any
673
         data back to memory until we have to.  */
674
      while (1)
675
        {
676
          /* Perform an optimized search for \n, \r, \\, ?.  */
677
          s = search_line_fast (s, buffer->rlimit);
678
 
679
          c = *s;
680
          if (c == '\\')
681
            {
682
              /* Record the location of the backslash and continue.  */
683
              pbackslash = s++;
684
            }
685
          else if (__builtin_expect (c == '?', 0))
686
            {
687
              if (__builtin_expect (s[1] == '?', false)
688
                   && _cpp_trigraph_map[s[2]])
689
                {
690
                  /* Have a trigraph.  We may or may not have to convert
691
                     it.  Add a line note regardless, for -Wtrigraphs.  */
692
                  add_line_note (buffer, s, s[2]);
693
                  if (CPP_OPTION (pfile, trigraphs))
694
                    {
695
                      /* We do, and that means we have to switch to the
696
                         slow path.  */
697
                      d = (uchar *) s;
698
                      *d = _cpp_trigraph_map[s[2]];
699
                      s += 2;
700
                      goto slow_path;
701
                    }
702
                }
703
              /* Not a trigraph.  Continue on fast-path.  */
704
              s++;
705
            }
706
          else
707
            break;
708
        }
709
 
710
      /* This must be \r or \n.  We're either done, or we'll be forced
711
         to write back to the buffer and continue on the slow path.  */
712
      d = (uchar *) s;
713
 
714
      if (__builtin_expect (s == buffer->rlimit, false))
715
        goto done;
716
 
717
      /* DOS line ending? */
718
      if (__builtin_expect (c == '\r', false) && s[1] == '\n')
719
        {
720
          s++;
721
          if (s == buffer->rlimit)
722
            goto done;
723
        }
724
 
725
      if (__builtin_expect (pbackslash == NULL, true))
726
        goto done;
727
 
728
      /* Check for escaped newline.  */
729
      p = d;
730
      while (is_nvspace (p[-1]))
731
        p--;
732
      if (p - 1 != pbackslash)
733
        goto done;
734
 
735
      /* Have an escaped newline; process it and proceed to
736
         the slow path.  */
737
      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
738
      d = p - 2;
739
      buffer->next_line = p - 1;
740
 
741
    slow_path:
742
      while (1)
743
        {
744
          c = *++s;
745
          *++d = c;
746
 
747
          if (c == '\n' || c == '\r')
748
            {
749
              /* Handle DOS line endings.  */
750
              if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
751
                s++;
752
              if (s == buffer->rlimit)
753
                break;
754
 
755
              /* Escaped?  */
756
              p = d;
757
              while (p != buffer->next_line && is_nvspace (p[-1]))
758
                p--;
759
              if (p == buffer->next_line || p[-1] != '\\')
760
                break;
761
 
762
              add_line_note (buffer, p - 1, p != d ? ' ': '\\');
763
              d = p - 2;
764
              buffer->next_line = p - 1;
765
            }
766
          else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
767
            {
768
              /* Add a note regardless, for the benefit of -Wtrigraphs.  */
769
              add_line_note (buffer, d, s[2]);
770
              if (CPP_OPTION (pfile, trigraphs))
771
                {
772
                  *d = _cpp_trigraph_map[s[2]];
773
                  s += 2;
774
                }
775
            }
776
        }
777
    }
778
  else
779
    {
780
      while (*s != '\n' && *s != '\r')
781
        s++;
782
      d = (uchar *) s;
783
 
784
      /* Handle DOS line endings.  */
785
      if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
786
        s++;
787
    }
788
 
789
 done:
790
  *d = '\n';
791
  /* A sentinel note that should never be processed.  */
792
  add_line_note (buffer, d + 1, '\n');
793
  buffer->next_line = s + 1;
794
}
795
 
796
/* Return true if the trigraph indicated by NOTE should be warned
797
   about in a comment.  */
798
static bool
799
warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
800
{
801
  const uchar *p;
802
 
803
  /* Within comments we don't warn about trigraphs, unless the
804
     trigraph forms an escaped newline, as that may change
805
     behavior.  */
806
  if (note->type != '/')
807
    return false;
808
 
809
  /* If -trigraphs, then this was an escaped newline iff the next note
810
     is coincident.  */
811
  if (CPP_OPTION (pfile, trigraphs))
812
    return note[1].pos == note->pos;
813
 
814
  /* Otherwise, see if this forms an escaped newline.  */
815
  p = note->pos + 3;
816
  while (is_nvspace (*p))
817
    p++;
818
 
819
  /* There might have been escaped newlines between the trigraph and the
820
     newline we found.  Hence the position test.  */
821
  return (*p == '\n' && p < note[1].pos);
822
}
823
 
824
/* Process the notes created by add_line_note as far as the current
825
   location.  */
826
void
827
_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
828
{
829
  cpp_buffer *buffer = pfile->buffer;
830
 
831
  for (;;)
832
    {
833
      _cpp_line_note *note = &buffer->notes[buffer->cur_note];
834
      unsigned int col;
835
 
836
      if (note->pos > buffer->cur)
837
        break;
838
 
839
      buffer->cur_note++;
840
      col = CPP_BUF_COLUMN (buffer, note->pos + 1);
841
 
842
      if (note->type == '\\' || note->type == ' ')
843
        {
844
          if (note->type == ' ' && !in_comment)
845
            cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
846
                                 "backslash and newline separated by space");
847
 
848
          if (buffer->next_line > buffer->rlimit)
849
            {
850
              cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
851
                                   "backslash-newline at end of file");
852
              /* Prevent "no newline at end of file" warning.  */
853
              buffer->next_line = buffer->rlimit;
854
            }
855
 
856
          buffer->line_base = note->pos;
857
          CPP_INCREMENT_LINE (pfile, 0);
858
        }
859
      else if (_cpp_trigraph_map[note->type])
860
        {
861
          if (CPP_OPTION (pfile, warn_trigraphs)
862
              && (!in_comment || warn_in_comment (pfile, note)))
863
            {
864
              if (CPP_OPTION (pfile, trigraphs))
865
                cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
866
                                       pfile->line_table->highest_line, col,
867
                                       "trigraph ??%c converted to %c",
868
                                       note->type,
869
                                       (int) _cpp_trigraph_map[note->type]);
870
              else
871
                {
872
                  cpp_warning_with_line
873
                    (pfile, CPP_W_TRIGRAPHS,
874
                     pfile->line_table->highest_line, col,
875
                     "trigraph ??%c ignored, use -trigraphs to enable",
876
                     note->type);
877
                }
878
            }
879
        }
880
      else if (note->type == 0)
881
        /* Already processed in lex_raw_string.  */;
882
      else
883
        abort ();
884
    }
885
}
886
 
887
/* Skip a C-style block comment.  We find the end of the comment by
888
   seeing if an asterisk is before every '/' we encounter.  Returns
889
   nonzero if comment terminated by EOF, zero otherwise.
890
 
891
   Buffer->cur points to the initial asterisk of the comment.  */
892
bool
893
_cpp_skip_block_comment (cpp_reader *pfile)
894
{
895
  cpp_buffer *buffer = pfile->buffer;
896
  const uchar *cur = buffer->cur;
897
  uchar c;
898
 
899
  cur++;
900
  if (*cur == '/')
901
    cur++;
902
 
903
  for (;;)
904
    {
905
      /* People like decorating comments with '*', so check for '/'
906
         instead for efficiency.  */
907
      c = *cur++;
908
 
909
      if (c == '/')
910
        {
911
          if (cur[-2] == '*')
912
            break;
913
 
914
          /* Warn about potential nested comments, but not if the '/'
915
             comes immediately before the true comment delimiter.
916
             Don't bother to get it right across escaped newlines.  */
917
          if (CPP_OPTION (pfile, warn_comments)
918
              && cur[0] == '*' && cur[1] != '/')
919
            {
920
              buffer->cur = cur;
921
              cpp_warning_with_line (pfile, CPP_W_COMMENTS,
922
                                     pfile->line_table->highest_line,
923
                                     CPP_BUF_COL (buffer),
924
                                     "\"/*\" within comment");
925
            }
926
        }
927
      else if (c == '\n')
928
        {
929
          unsigned int cols;
930
          buffer->cur = cur - 1;
931
          _cpp_process_line_notes (pfile, true);
932
          if (buffer->next_line >= buffer->rlimit)
933
            return true;
934
          _cpp_clean_line (pfile);
935
 
936
          cols = buffer->next_line - buffer->line_base;
937
          CPP_INCREMENT_LINE (pfile, cols);
938
 
939
          cur = buffer->cur;
940
        }
941
    }
942
 
943
  buffer->cur = cur;
944
  _cpp_process_line_notes (pfile, true);
945
  return false;
946
}
947
 
948
/* Skip a C++ line comment, leaving buffer->cur pointing to the
949
   terminating newline.  Handles escaped newlines.  Returns nonzero
950
   if a multiline comment.  */
951
static int
952
skip_line_comment (cpp_reader *pfile)
953
{
954
  cpp_buffer *buffer = pfile->buffer;
955
  source_location orig_line = pfile->line_table->highest_line;
956
 
957
  while (*buffer->cur != '\n')
958
    buffer->cur++;
959
 
960
  _cpp_process_line_notes (pfile, true);
961
  return orig_line != pfile->line_table->highest_line;
962
}
963
 
964
/* Skips whitespace, saving the next non-whitespace character.  */
965
static void
966
skip_whitespace (cpp_reader *pfile, cppchar_t c)
967
{
968
  cpp_buffer *buffer = pfile->buffer;
969
  bool saw_NUL = false;
970
 
971
  do
972
    {
973
      /* Horizontal space always OK.  */
974
      if (c == ' ' || c == '\t')
975
        ;
976
      /* Just \f \v or \0 left.  */
977
      else if (c == '\0')
978
        saw_NUL = true;
979
      else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
980
        cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
981
                             CPP_BUF_COL (buffer),
982
                             "%s in preprocessing directive",
983
                             c == '\f' ? "form feed" : "vertical tab");
984
 
985
      c = *buffer->cur++;
986
    }
987
  /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
988
  while (is_nvspace (c));
989
 
990
  if (saw_NUL)
991
    cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
992
 
993
  buffer->cur--;
994
}
995
 
996
/* See if the characters of a number token are valid in a name (no
997
   '.', '+' or '-').  */
998
static int
999
name_p (cpp_reader *pfile, const cpp_string *string)
1000
{
1001
  unsigned int i;
1002
 
1003
  for (i = 0; i < string->len; i++)
1004
    if (!is_idchar (string->text[i]))
1005
      return 0;
1006
 
1007
  return 1;
1008
}
1009
 
1010
/* After parsing an identifier or other sequence, produce a warning about
1011
   sequences not in NFC/NFKC.  */
1012
static void
1013
warn_about_normalization (cpp_reader *pfile,
1014
                          const cpp_token *token,
1015
                          const struct normalize_state *s)
1016
{
1017
  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1018
      && !pfile->state.skipping)
1019
    {
1020
      /* Make sure that the token is printed using UCNs, even
1021
         if we'd otherwise happily print UTF-8.  */
1022
      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1023
      size_t sz;
1024
 
1025
      sz = cpp_spell_token (pfile, token, buf, false) - buf;
1026
      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1027
        cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1028
                               "`%.*s' is not in NFKC", (int) sz, buf);
1029
      else
1030
        cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1031
                               "`%.*s' is not in NFC", (int) sz, buf);
1032
    }
1033
}
1034
 
1035
/* Returns TRUE if the sequence starting at buffer->cur is invalid in
1036
   an identifier.  FIRST is TRUE if this starts an identifier.  */
1037
static bool
1038
forms_identifier_p (cpp_reader *pfile, int first,
1039
                    struct normalize_state *state)
1040
{
1041
  cpp_buffer *buffer = pfile->buffer;
1042
 
1043
  if (*buffer->cur == '$')
1044
    {
1045
      if (!CPP_OPTION (pfile, dollars_in_ident))
1046
        return false;
1047
 
1048
      buffer->cur++;
1049
      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1050
        {
1051
          CPP_OPTION (pfile, warn_dollars) = 0;
1052
          cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1053
        }
1054
 
1055
      return true;
1056
    }
1057
 
1058
  /* Is this a syntactically valid UCN?  */
1059
  if (CPP_OPTION (pfile, extended_identifiers)
1060
      && *buffer->cur == '\\'
1061
      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1062
    {
1063
      buffer->cur += 2;
1064
      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1065
                          state))
1066
        return true;
1067
      buffer->cur -= 2;
1068
    }
1069
 
1070
  return false;
1071
}
1072
 
1073
/* Helper function to get the cpp_hashnode of the identifier BASE.  */
1074
static cpp_hashnode *
1075
lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1076
{
1077
  cpp_hashnode *result;
1078
  const uchar *cur;
1079
  unsigned int len;
1080
  unsigned int hash = HT_HASHSTEP (0, *base);
1081
 
1082
  cur = base + 1;
1083
  while (ISIDNUM (*cur))
1084
    {
1085
      hash = HT_HASHSTEP (hash, *cur);
1086
      cur++;
1087
    }
1088
  len = cur - base;
1089
  hash = HT_HASHFINISH (hash, len);
1090
  result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1091
                                              base, len, hash, HT_ALLOC));
1092
 
1093
  /* Rarely, identifiers require diagnostics when lexed.  */
1094
  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1095
                        && !pfile->state.skipping, 0))
1096
    {
1097
      /* It is allowed to poison the same identifier twice.  */
1098
      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1099
        cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1100
                   NODE_NAME (result));
1101
 
1102
      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1103
         replacement list of a variadic macro.  */
1104
      if (result == pfile->spec_nodes.n__VA_ARGS__
1105
          && !pfile->state.va_args_ok)
1106
        cpp_error (pfile, CPP_DL_PEDWARN,
1107
                   "__VA_ARGS__ can only appear in the expansion"
1108
                   " of a C99 variadic macro");
1109
 
1110
      /* For -Wc++-compat, warn about use of C++ named operators.  */
1111
      if (result->flags & NODE_WARN_OPERATOR)
1112
        cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1113
                     "identifier \"%s\" is a special operator name in C++",
1114
                     NODE_NAME (result));
1115
    }
1116
 
1117
  return result;
1118
}
1119
 
1120
/* Get the cpp_hashnode of an identifier specified by NAME in
1121
   the current cpp_reader object.  If none is found, NULL is returned.  */
1122
cpp_hashnode *
1123
_cpp_lex_identifier (cpp_reader *pfile, const char *name)
1124
{
1125
  cpp_hashnode *result;
1126
  result = lex_identifier_intern (pfile, (uchar *) name);
1127
  return result;
1128
}
1129
 
1130
/* Lex an identifier starting at BUFFER->CUR - 1.  */
1131
static cpp_hashnode *
1132
lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1133
                struct normalize_state *nst)
1134
{
1135
  cpp_hashnode *result;
1136
  const uchar *cur;
1137
  unsigned int len;
1138
  unsigned int hash = HT_HASHSTEP (0, *base);
1139
 
1140
  cur = pfile->buffer->cur;
1141
  if (! starts_ucn)
1142
    while (ISIDNUM (*cur))
1143
      {
1144
        hash = HT_HASHSTEP (hash, *cur);
1145
        cur++;
1146
      }
1147
  pfile->buffer->cur = cur;
1148
  if (starts_ucn || forms_identifier_p (pfile, false, nst))
1149
    {
1150
      /* Slower version for identifiers containing UCNs (or $).  */
1151
      do {
1152
        while (ISIDNUM (*pfile->buffer->cur))
1153
          {
1154
            pfile->buffer->cur++;
1155
            NORMALIZE_STATE_UPDATE_IDNUM (nst);
1156
          }
1157
      } while (forms_identifier_p (pfile, false, nst));
1158
      result = _cpp_interpret_identifier (pfile, base,
1159
                                          pfile->buffer->cur - base);
1160
    }
1161
  else
1162
    {
1163
      len = cur - base;
1164
      hash = HT_HASHFINISH (hash, len);
1165
 
1166
      result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1167
                                                  base, len, hash, HT_ALLOC));
1168
    }
1169
 
1170
  /* Rarely, identifiers require diagnostics when lexed.  */
1171
  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1172
                        && !pfile->state.skipping, 0))
1173
    {
1174
      /* It is allowed to poison the same identifier twice.  */
1175
      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1176
        cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1177
                   NODE_NAME (result));
1178
 
1179
      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1180
         replacement list of a variadic macro.  */
1181
      if (result == pfile->spec_nodes.n__VA_ARGS__
1182
          && !pfile->state.va_args_ok)
1183
        cpp_error (pfile, CPP_DL_PEDWARN,
1184
                   "__VA_ARGS__ can only appear in the expansion"
1185
                   " of a C99 variadic macro");
1186
 
1187
      /* For -Wc++-compat, warn about use of C++ named operators.  */
1188
      if (result->flags & NODE_WARN_OPERATOR)
1189
        cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1190
                     "identifier \"%s\" is a special operator name in C++",
1191
                     NODE_NAME (result));
1192
    }
1193
 
1194
  return result;
1195
}
1196
 
1197
/* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1198
static void
1199
lex_number (cpp_reader *pfile, cpp_string *number,
1200
            struct normalize_state *nst)
1201
{
1202
  const uchar *cur;
1203
  const uchar *base;
1204
  uchar *dest;
1205
 
1206
  base = pfile->buffer->cur - 1;
1207
  do
1208
    {
1209
      cur = pfile->buffer->cur;
1210
 
1211
      /* N.B. ISIDNUM does not include $.  */
1212
      while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
1213
        {
1214
          cur++;
1215
          NORMALIZE_STATE_UPDATE_IDNUM (nst);
1216
        }
1217
 
1218
      pfile->buffer->cur = cur;
1219
    }
1220
  while (forms_identifier_p (pfile, false, nst));
1221
 
1222
  number->len = cur - base;
1223
  dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1224
  memcpy (dest, base, number->len);
1225
  dest[number->len] = '\0';
1226
  number->text = dest;
1227
}
1228
 
1229
/* Create a token of type TYPE with a literal spelling.  */
1230
static void
1231
create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1232
                unsigned int len, enum cpp_ttype type)
1233
{
1234
  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1235
 
1236
  memcpy (dest, base, len);
1237
  dest[len] = '\0';
1238
  token->type = type;
1239
  token->val.str.len = len;
1240
  token->val.str.text = dest;
1241
}
1242
 
1243
/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1244
   sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1245
 
1246
static void
1247
bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1248
                _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1249
{
1250
  _cpp_buff *first_buff = *first_buff_p;
1251
  _cpp_buff *last_buff = *last_buff_p;
1252
 
1253
  if (first_buff == NULL)
1254
    first_buff = last_buff = _cpp_get_buff (pfile, len);
1255
  else if (len > BUFF_ROOM (last_buff))
1256
    {
1257
      size_t room = BUFF_ROOM (last_buff);
1258
      memcpy (BUFF_FRONT (last_buff), base, room);
1259
      BUFF_FRONT (last_buff) += room;
1260
      base += room;
1261
      len -= room;
1262
      last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1263
    }
1264
 
1265
  memcpy (BUFF_FRONT (last_buff), base, len);
1266
  BUFF_FRONT (last_buff) += len;
1267
 
1268
  *first_buff_p = first_buff;
1269
  *last_buff_p = last_buff;
1270
}
1271
 
1272
/* Lexes a raw string.  The stored string contains the spelling, including
1273
   double quotes, delimiter string, '(' and ')', any leading
1274
   'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1275
   literal, or CPP_OTHER if it was not properly terminated.
1276
 
1277
   The spelling is NUL-terminated, but it is not guaranteed that this
1278
   is the first NUL since embedded NULs are preserved.  */
1279
 
1280
static void
1281
lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1282
                const uchar *cur)
1283
{
1284
  const uchar *raw_prefix;
1285
  unsigned int raw_prefix_len = 0;
1286
  enum cpp_ttype type;
1287
  size_t total_len = 0;
1288
  _cpp_buff *first_buff = NULL, *last_buff = NULL;
1289
  _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1290
 
1291
  type = (*base == 'L' ? CPP_WSTRING :
1292
          *base == 'U' ? CPP_STRING32 :
1293
          *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1294
          : CPP_STRING);
1295
 
1296
  raw_prefix = cur + 1;
1297
  while (raw_prefix_len < 16)
1298
    {
1299
      switch (raw_prefix[raw_prefix_len])
1300
        {
1301
        case ' ': case '(': case ')': case '\\': case '\t':
1302
        case '\v': case '\f': case '\n': default:
1303
          break;
1304
        /* Basic source charset except the above chars.  */
1305
        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1306
        case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1307
        case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1308
        case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1309
        case 'y': case 'z':
1310
        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1311
        case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1312
        case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1313
        case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1314
        case 'Y': case 'Z':
1315
        case '0': case '1': case '2': case '3': case '4': case '5':
1316
        case '6': case '7': case '8': case '9':
1317
        case '_': case '{': case '}': case '#': case '[': case ']':
1318
        case '<': case '>': case '%': case ':': case ';': case '.':
1319
        case '?': case '*': case '+': case '-': case '/': case '^':
1320
        case '&': case '|': case '~': case '!': case '=': case ',':
1321
        case '"': case '\'':
1322
          raw_prefix_len++;
1323
          continue;
1324
        }
1325
      break;
1326
    }
1327
 
1328
  if (raw_prefix[raw_prefix_len] != '(')
1329
    {
1330
      int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
1331
                + 1;
1332
      if (raw_prefix_len == 16)
1333
        cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1334
                             "raw string delimiter longer than 16 characters");
1335
      else
1336
        cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1337
                             "invalid character '%c' in raw string delimiter",
1338
                             (int) raw_prefix[raw_prefix_len]);
1339
      pfile->buffer->cur = raw_prefix - 1;
1340
      create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
1341
      return;
1342
    }
1343
 
1344
  cur = raw_prefix + raw_prefix_len + 1;
1345
  for (;;)
1346
    {
1347
#define BUF_APPEND(STR,LEN)                                     \
1348
      do {                                                      \
1349
        bufring_append (pfile, (const uchar *)(STR), (LEN),     \
1350
                        &first_buff, &last_buff);               \
1351
        total_len += (LEN);                                     \
1352
      } while (0);
1353
 
1354
      cppchar_t c;
1355
 
1356
      /* If we previously performed any trigraph or line splicing
1357
         transformations, undo them within the body of the raw string.  */
1358
      while (note->pos < cur)
1359
        ++note;
1360
      for (; note->pos == cur; ++note)
1361
        {
1362
          switch (note->type)
1363
            {
1364
            case '\\':
1365
            case ' ':
1366
              /* Restore backslash followed by newline.  */
1367
              BUF_APPEND (base, cur - base);
1368
              base = cur;
1369
              BUF_APPEND ("\\", 1);
1370
            after_backslash:
1371
              if (note->type == ' ')
1372
                {
1373
                  /* GNU backslash whitespace newline extension.  FIXME
1374
                     could be any sequence of non-vertical space.  When we
1375
                     can properly restore any such sequence, we should mark
1376
                     this note as handled so _cpp_process_line_notes
1377
                     doesn't warn.  */
1378
                  BUF_APPEND (" ", 1);
1379
                }
1380
 
1381
              BUF_APPEND ("\n", 1);
1382
              break;
1383
 
1384
            case 0:
1385
              /* Already handled.  */
1386
              break;
1387
 
1388
            default:
1389
              if (_cpp_trigraph_map[note->type])
1390
                {
1391
                  /* Don't warn about this trigraph in
1392
                     _cpp_process_line_notes, since trigraphs show up as
1393
                     trigraphs in raw strings.  */
1394
                  uchar type = note->type;
1395
                  note->type = 0;
1396
 
1397
                  if (!CPP_OPTION (pfile, trigraphs))
1398
                    /* If we didn't convert the trigraph in the first
1399
                       place, don't do anything now either.  */
1400
                    break;
1401
 
1402
                  BUF_APPEND (base, cur - base);
1403
                  base = cur;
1404
                  BUF_APPEND ("??", 2);
1405
 
1406
                  /* ??/ followed by newline gets two line notes, one for
1407
                     the trigraph and one for the backslash/newline.  */
1408
                  if (type == '/' && note[1].pos == cur)
1409
                    {
1410
                      if (note[1].type != '\\'
1411
                          && note[1].type != ' ')
1412
                        abort ();
1413
                      BUF_APPEND ("/", 1);
1414
                      ++note;
1415
                      goto after_backslash;
1416
                    }
1417
                  /* The ) from ??) could be part of the suffix.  */
1418
                  else if (type == ')'
1419
                           && strncmp ((const char *) cur+1,
1420
                                       (const char *) raw_prefix,
1421
                                       raw_prefix_len) == 0
1422
                           && cur[raw_prefix_len+1] == '"')
1423
                    {
1424
                      BUF_APPEND (")", 1);
1425
                      base++;
1426
                      cur += raw_prefix_len + 2;
1427
                      goto break_outer_loop;
1428
                    }
1429
                  else
1430
                    {
1431
                      /* Skip the replacement character.  */
1432
                      base = ++cur;
1433
                      BUF_APPEND (&type, 1);
1434
                    }
1435
                }
1436
              else
1437
                abort ();
1438
              break;
1439
            }
1440
        }
1441
      c = *cur++;
1442
 
1443
      if (c == ')'
1444
          && strncmp ((const char *) cur, (const char *) raw_prefix,
1445
                      raw_prefix_len) == 0
1446
          && cur[raw_prefix_len] == '"')
1447
        {
1448
          cur += raw_prefix_len + 1;
1449
          break;
1450
        }
1451
      else if (c == '\n')
1452
        {
1453
          if (pfile->state.in_directive
1454
              || pfile->state.parsing_args
1455
              || pfile->state.in_deferred_pragma)
1456
            {
1457
              cur--;
1458
              type = CPP_OTHER;
1459
              cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1460
                                   "unterminated raw string");
1461
              break;
1462
            }
1463
 
1464
          BUF_APPEND (base, cur - base);
1465
 
1466
          if (pfile->buffer->cur < pfile->buffer->rlimit)
1467
            CPP_INCREMENT_LINE (pfile, 0);
1468
          pfile->buffer->need_line = true;
1469
 
1470
          pfile->buffer->cur = cur-1;
1471
          _cpp_process_line_notes (pfile, false);
1472
          if (!_cpp_get_fresh_line (pfile))
1473
            {
1474
              source_location src_loc = token->src_loc;
1475
              token->type = CPP_EOF;
1476
              /* Tell the compiler the line number of the EOF token.  */
1477
              token->src_loc = pfile->line_table->highest_line;
1478
              token->flags = BOL;
1479
              if (first_buff != NULL)
1480
                _cpp_release_buff (pfile, first_buff);
1481
              cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1482
                                   "unterminated raw string");
1483
              return;
1484
            }
1485
 
1486
          cur = base = pfile->buffer->cur;
1487
          note = &pfile->buffer->notes[pfile->buffer->cur_note];
1488
        }
1489
    }
1490
 break_outer_loop:
1491
 
1492
  if (CPP_OPTION (pfile, user_literals))
1493
    {
1494
      /* Grab user defined literal suffix.  */
1495
      if (ISIDST (*cur))
1496
        {
1497
          type = cpp_userdef_string_add_type (type);
1498
          ++cur;
1499
        }
1500
      while (ISIDNUM (*cur))
1501
        ++cur;
1502
    }
1503
 
1504
  pfile->buffer->cur = cur;
1505
  if (first_buff == NULL)
1506
    create_literal (pfile, token, base, cur - base, type);
1507
  else
1508
    {
1509
      uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1510
 
1511
      token->type = type;
1512
      token->val.str.len = total_len + (cur - base);
1513
      token->val.str.text = dest;
1514
      last_buff = first_buff;
1515
      while (last_buff != NULL)
1516
        {
1517
          memcpy (dest, last_buff->base,
1518
                  BUFF_FRONT (last_buff) - last_buff->base);
1519
          dest += BUFF_FRONT (last_buff) - last_buff->base;
1520
          last_buff = last_buff->next;
1521
        }
1522
      _cpp_release_buff (pfile, first_buff);
1523
      memcpy (dest, base, cur - base);
1524
      dest[cur - base] = '\0';
1525
    }
1526
}
1527
 
1528
/* Lexes a string, character constant, or angle-bracketed header file
1529
   name.  The stored string contains the spelling, including opening
1530
   quote and any leading 'L', 'u', 'U' or 'u8' and optional
1531
   'R' modifier.  It returns the type of the literal, or CPP_OTHER
1532
   if it was not properly terminated, or CPP_LESS for an unterminated
1533
   header name which must be relexed as normal tokens.
1534
 
1535
   The spelling is NUL-terminated, but it is not guaranteed that this
1536
   is the first NUL since embedded NULs are preserved.  */
1537
static void
1538
lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1539
{
1540
  bool saw_NUL = false;
1541
  const uchar *cur;
1542
  cppchar_t terminator;
1543
  enum cpp_ttype type;
1544
 
1545
  cur = base;
1546
  terminator = *cur++;
1547
  if (terminator == 'L' || terminator == 'U')
1548
    terminator = *cur++;
1549
  else if (terminator == 'u')
1550
    {
1551
      terminator = *cur++;
1552
      if (terminator == '8')
1553
        terminator = *cur++;
1554
    }
1555
  if (terminator == 'R')
1556
    {
1557
      lex_raw_string (pfile, token, base, cur);
1558
      return;
1559
    }
1560
  if (terminator == '"')
1561
    type = (*base == 'L' ? CPP_WSTRING :
1562
            *base == 'U' ? CPP_STRING32 :
1563
            *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1564
                         : CPP_STRING);
1565
  else if (terminator == '\'')
1566
    type = (*base == 'L' ? CPP_WCHAR :
1567
            *base == 'U' ? CPP_CHAR32 :
1568
            *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1569
  else
1570
    terminator = '>', type = CPP_HEADER_NAME;
1571
 
1572
  for (;;)
1573
    {
1574
      cppchar_t c = *cur++;
1575
 
1576
      /* In #include-style directives, terminators are not escapable.  */
1577
      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1578
        cur++;
1579
      else if (c == terminator)
1580
        break;
1581
      else if (c == '\n')
1582
        {
1583
          cur--;
1584
          /* Unmatched quotes always yield undefined behavior, but
1585
             greedy lexing means that what appears to be an unterminated
1586
             header name may actually be a legitimate sequence of tokens.  */
1587
          if (terminator == '>')
1588
            {
1589
              token->type = CPP_LESS;
1590
              return;
1591
            }
1592
          type = CPP_OTHER;
1593
          break;
1594
        }
1595
      else if (c == '\0')
1596
        saw_NUL = true;
1597
    }
1598
 
1599
  if (saw_NUL && !pfile->state.skipping)
1600
    cpp_error (pfile, CPP_DL_WARNING,
1601
               "null character(s) preserved in literal");
1602
 
1603
  if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1604
    cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1605
               (int) terminator);
1606
 
1607
  if (CPP_OPTION (pfile, user_literals))
1608
    {
1609
      /* Grab user defined literal suffix.  */
1610
      if (ISIDST (*cur))
1611
        {
1612
          type = cpp_userdef_char_add_type (type);
1613
          type = cpp_userdef_string_add_type (type);
1614
          ++cur;
1615
        }
1616
      while (ISIDNUM (*cur))
1617
        ++cur;
1618
    }
1619
 
1620
  pfile->buffer->cur = cur;
1621
  create_literal (pfile, token, base, cur - base, type);
1622
}
1623
 
1624
/* Return the comment table. The client may not make any assumption
1625
   about the ordering of the table.  */
1626
cpp_comment_table *
1627
cpp_get_comments (cpp_reader *pfile)
1628
{
1629
  return &pfile->comments;
1630
}
1631
 
1632
/* Append a comment to the end of the comment table. */
1633
static void
1634
store_comment (cpp_reader *pfile, cpp_token *token)
1635
{
1636
  int len;
1637
 
1638
  if (pfile->comments.allocated == 0)
1639
    {
1640
      pfile->comments.allocated = 256;
1641
      pfile->comments.entries = (cpp_comment *) xmalloc
1642
        (pfile->comments.allocated * sizeof (cpp_comment));
1643
    }
1644
 
1645
  if (pfile->comments.count == pfile->comments.allocated)
1646
    {
1647
      pfile->comments.allocated *= 2;
1648
      pfile->comments.entries = (cpp_comment *) xrealloc
1649
        (pfile->comments.entries,
1650
         pfile->comments.allocated * sizeof (cpp_comment));
1651
    }
1652
 
1653
  len = token->val.str.len;
1654
 
1655
  /* Copy comment. Note, token may not be NULL terminated. */
1656
  pfile->comments.entries[pfile->comments.count].comment =
1657
    (char *) xmalloc (sizeof (char) * (len + 1));
1658
  memcpy (pfile->comments.entries[pfile->comments.count].comment,
1659
          token->val.str.text, len);
1660
  pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1661
 
1662
  /* Set source location. */
1663
  pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1664
 
1665
  /* Increment the count of entries in the comment table. */
1666
  pfile->comments.count++;
1667
}
1668
 
1669
/* The stored comment includes the comment start and any terminator.  */
1670
static void
1671
save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1672
              cppchar_t type)
1673
{
1674
  unsigned char *buffer;
1675
  unsigned int len, clen, i;
1676
 
1677
  len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1678
 
1679
  /* C++ comments probably (not definitely) have moved past a new
1680
     line, which we don't want to save in the comment.  */
1681
  if (is_vspace (pfile->buffer->cur[-1]))
1682
    len--;
1683
 
1684
  /* If we are currently in a directive or in argument parsing, then
1685
     we need to store all C++ comments as C comments internally, and
1686
     so we need to allocate a little extra space in that case.
1687
 
1688
     Note that the only time we encounter a directive here is
1689
     when we are saving comments in a "#define".  */
1690
  clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1691
          && type == '/') ? len + 2 : len;
1692
 
1693
  buffer = _cpp_unaligned_alloc (pfile, clen);
1694
 
1695
  token->type = CPP_COMMENT;
1696
  token->val.str.len = clen;
1697
  token->val.str.text = buffer;
1698
 
1699
  buffer[0] = '/';
1700
  memcpy (buffer + 1, from, len - 1);
1701
 
1702
  /* Finish conversion to a C comment, if necessary.  */
1703
  if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
1704
    {
1705
      buffer[1] = '*';
1706
      buffer[clen - 2] = '*';
1707
      buffer[clen - 1] = '/';
1708
      /* As there can be in a C++ comments illegal sequences for C comments
1709
         we need to filter them out.  */
1710
      for (i = 2; i < (clen - 2); i++)
1711
        if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1712
          buffer[i] = '|';
1713
    }
1714
 
1715
  /* Finally store this comment for use by clients of libcpp. */
1716
  store_comment (pfile, token);
1717
}
1718
 
1719
/* Allocate COUNT tokens for RUN.  */
1720
void
1721
_cpp_init_tokenrun (tokenrun *run, unsigned int count)
1722
{
1723
  run->base = XNEWVEC (cpp_token, count);
1724
  run->limit = run->base + count;
1725
  run->next = NULL;
1726
}
1727
 
1728
/* Returns the next tokenrun, or creates one if there is none.  */
1729
static tokenrun *
1730
next_tokenrun (tokenrun *run)
1731
{
1732
  if (run->next == NULL)
1733
    {
1734
      run->next = XNEW (tokenrun);
1735
      run->next->prev = run;
1736
      _cpp_init_tokenrun (run->next, 250);
1737
    }
1738
 
1739
  return run->next;
1740
}
1741
 
1742
/* Return the number of not yet processed token in a given
1743
   context.  */
1744
int
1745
_cpp_remaining_tokens_num_in_context (cpp_context *context)
1746
{
1747
  if (context->tokens_kind == TOKENS_KIND_DIRECT)
1748
    return (LAST (context).token - FIRST (context).token);
1749
  else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1750
           || context->tokens_kind == TOKENS_KIND_EXTENDED)
1751
    return (LAST (context).ptoken - FIRST (context).ptoken);
1752
  else
1753
      abort ();
1754
}
1755
 
1756
/* Returns the token present at index INDEX in a given context.  If
1757
   INDEX is zero, the next token to be processed is returned.  */
1758
static const cpp_token*
1759
_cpp_token_from_context_at (cpp_context *context, int index)
1760
{
1761
  if (context->tokens_kind == TOKENS_KIND_DIRECT)
1762
    return &(FIRST (context).token[index]);
1763
  else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1764
           || context->tokens_kind == TOKENS_KIND_EXTENDED)
1765
    return FIRST (context).ptoken[index];
1766
 else
1767
   abort ();
1768
}
1769
 
1770
/* Look ahead in the input stream.  */
1771
const cpp_token *
1772
cpp_peek_token (cpp_reader *pfile, int index)
1773
{
1774
  cpp_context *context = pfile->context;
1775
  const cpp_token *peektok;
1776
  int count;
1777
 
1778
  /* First, scan through any pending cpp_context objects.  */
1779
  while (context->prev)
1780
    {
1781
      ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
1782
 
1783
      if (index < (int) sz)
1784
        return _cpp_token_from_context_at (context, index);
1785
      index -= (int) sz;
1786
      context = context->prev;
1787
    }
1788
 
1789
  /* We will have to read some new tokens after all (and do so
1790
     without invalidating preceding tokens).  */
1791
  count = index;
1792
  pfile->keep_tokens++;
1793
 
1794
  do
1795
    {
1796
      peektok = _cpp_lex_token (pfile);
1797
      if (peektok->type == CPP_EOF)
1798
        return peektok;
1799
    }
1800
  while (index--);
1801
 
1802
  _cpp_backup_tokens_direct (pfile, count + 1);
1803
  pfile->keep_tokens--;
1804
 
1805
  return peektok;
1806
}
1807
 
1808
/* Allocate a single token that is invalidated at the same time as the
1809
   rest of the tokens on the line.  Has its line and col set to the
1810
   same as the last lexed token, so that diagnostics appear in the
1811
   right place.  */
1812
cpp_token *
1813
_cpp_temp_token (cpp_reader *pfile)
1814
{
1815
  cpp_token *old, *result;
1816
  ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1817
  ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1818
 
1819
  old = pfile->cur_token - 1;
1820
  /* Any pre-existing lookaheads must not be clobbered.  */
1821
  if (la)
1822
    {
1823
      if (sz <= la)
1824
        {
1825
          tokenrun *next = next_tokenrun (pfile->cur_run);
1826
 
1827
          if (sz < la)
1828
            memmove (next->base + 1, next->base,
1829
                     (la - sz) * sizeof (cpp_token));
1830
 
1831
          next->base[0] = pfile->cur_run->limit[-1];
1832
        }
1833
 
1834
      if (sz > 1)
1835
        memmove (pfile->cur_token + 1, pfile->cur_token,
1836
                 MIN (la, sz - 1) * sizeof (cpp_token));
1837
    }
1838
 
1839
  if (!sz && pfile->cur_token == pfile->cur_run->limit)
1840
    {
1841
      pfile->cur_run = next_tokenrun (pfile->cur_run);
1842
      pfile->cur_token = pfile->cur_run->base;
1843
    }
1844
 
1845
  result = pfile->cur_token++;
1846
  result->src_loc = old->src_loc;
1847
  return result;
1848
}
1849
 
1850
/* Lex a token into RESULT (external interface).  Takes care of issues
1851
   like directive handling, token lookahead, multiple include
1852
   optimization and skipping.  */
1853
const cpp_token *
1854
_cpp_lex_token (cpp_reader *pfile)
1855
{
1856
  cpp_token *result;
1857
 
1858
  for (;;)
1859
    {
1860
      if (pfile->cur_token == pfile->cur_run->limit)
1861
        {
1862
          pfile->cur_run = next_tokenrun (pfile->cur_run);
1863
          pfile->cur_token = pfile->cur_run->base;
1864
        }
1865
      /* We assume that the current token is somewhere in the current
1866
         run.  */
1867
      if (pfile->cur_token < pfile->cur_run->base
1868
          || pfile->cur_token >= pfile->cur_run->limit)
1869
        abort ();
1870
 
1871
      if (pfile->lookaheads)
1872
        {
1873
          pfile->lookaheads--;
1874
          result = pfile->cur_token++;
1875
        }
1876
      else
1877
        result = _cpp_lex_direct (pfile);
1878
 
1879
      if (result->flags & BOL)
1880
        {
1881
          /* Is this a directive.  If _cpp_handle_directive returns
1882
             false, it is an assembler #.  */
1883
          if (result->type == CPP_HASH
1884
              /* 6.10.3 p 11: Directives in a list of macro arguments
1885
                 gives undefined behavior.  This implementation
1886
                 handles the directive as normal.  */
1887
              && pfile->state.parsing_args != 1)
1888
            {
1889
              if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1890
                {
1891
                  if (pfile->directive_result.type == CPP_PADDING)
1892
                    continue;
1893
                  result = &pfile->directive_result;
1894
                }
1895
            }
1896
          else if (pfile->state.in_deferred_pragma)
1897
            result = &pfile->directive_result;
1898
 
1899
          if (pfile->cb.line_change && !pfile->state.skipping)
1900
            pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1901
        }
1902
 
1903
      /* We don't skip tokens in directives.  */
1904
      if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1905
        break;
1906
 
1907
      /* Outside a directive, invalidate controlling macros.  At file
1908
         EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1909
         get here and MI optimization works.  */
1910
      pfile->mi_valid = false;
1911
 
1912
      if (!pfile->state.skipping || result->type == CPP_EOF)
1913
        break;
1914
    }
1915
 
1916
  return result;
1917
}
1918
 
1919
/* Returns true if a fresh line has been loaded.  */
1920
bool
1921
_cpp_get_fresh_line (cpp_reader *pfile)
1922
{
1923
  int return_at_eof;
1924
 
1925
  /* We can't get a new line until we leave the current directive.  */
1926
  if (pfile->state.in_directive)
1927
    return false;
1928
 
1929
  for (;;)
1930
    {
1931
      cpp_buffer *buffer = pfile->buffer;
1932
 
1933
      if (!buffer->need_line)
1934
        return true;
1935
 
1936
      if (buffer->next_line < buffer->rlimit)
1937
        {
1938
          _cpp_clean_line (pfile);
1939
          return true;
1940
        }
1941
 
1942
      /* First, get out of parsing arguments state.  */
1943
      if (pfile->state.parsing_args)
1944
        return false;
1945
 
1946
      /* End of buffer.  Non-empty files should end in a newline.  */
1947
      if (buffer->buf != buffer->rlimit
1948
          && buffer->next_line > buffer->rlimit
1949
          && !buffer->from_stage3)
1950
        {
1951
          /* Clip to buffer size.  */
1952
          buffer->next_line = buffer->rlimit;
1953
        }
1954
 
1955
      return_at_eof = buffer->return_at_eof;
1956
      _cpp_pop_buffer (pfile);
1957
      if (pfile->buffer == NULL || return_at_eof)
1958
        return false;
1959
    }
1960
}
1961
 
1962
#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
1963
  do                                                    \
1964
    {                                                   \
1965
      result->type = ELSE_TYPE;                         \
1966
      if (*buffer->cur == CHAR)                         \
1967
        buffer->cur++, result->type = THEN_TYPE;        \
1968
    }                                                   \
1969
  while (0)
1970
 
1971
/* Lex a token into pfile->cur_token, which is also incremented, to
1972
   get diagnostics pointing to the correct location.
1973
 
1974
   Does not handle issues such as token lookahead, multiple-include
1975
   optimization, directives, skipping etc.  This function is only
1976
   suitable for use by _cpp_lex_token, and in special cases like
1977
   lex_expansion_token which doesn't care for any of these issues.
1978
 
1979
   When meeting a newline, returns CPP_EOF if parsing a directive,
1980
   otherwise returns to the start of the token buffer if permissible.
1981
   Returns the location of the lexed token.  */
1982
cpp_token *
1983
_cpp_lex_direct (cpp_reader *pfile)
1984
{
1985
  cppchar_t c;
1986
  cpp_buffer *buffer;
1987
  const unsigned char *comment_start;
1988
  cpp_token *result = pfile->cur_token++;
1989
 
1990
 fresh_line:
1991
  result->flags = 0;
1992
  buffer = pfile->buffer;
1993
  if (buffer->need_line)
1994
    {
1995
      if (pfile->state.in_deferred_pragma)
1996
        {
1997
          result->type = CPP_PRAGMA_EOL;
1998
          pfile->state.in_deferred_pragma = false;
1999
          if (!pfile->state.pragma_allow_expansion)
2000
            pfile->state.prevent_expansion--;
2001
          return result;
2002
        }
2003
      if (!_cpp_get_fresh_line (pfile))
2004
        {
2005
          result->type = CPP_EOF;
2006
          if (!pfile->state.in_directive)
2007
            {
2008
              /* Tell the compiler the line number of the EOF token.  */
2009
              result->src_loc = pfile->line_table->highest_line;
2010
              result->flags = BOL;
2011
            }
2012
          return result;
2013
        }
2014
      if (!pfile->keep_tokens)
2015
        {
2016
          pfile->cur_run = &pfile->base_run;
2017
          result = pfile->base_run.base;
2018
          pfile->cur_token = result + 1;
2019
        }
2020
      result->flags = BOL;
2021
      if (pfile->state.parsing_args == 2)
2022
        result->flags |= PREV_WHITE;
2023
    }
2024
  buffer = pfile->buffer;
2025
 update_tokens_line:
2026
  result->src_loc = pfile->line_table->highest_line;
2027
 
2028
 skipped_white:
2029
  if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2030
      && !pfile->overlaid_buffer)
2031
    {
2032
      _cpp_process_line_notes (pfile, false);
2033
      result->src_loc = pfile->line_table->highest_line;
2034
    }
2035
  c = *buffer->cur++;
2036
 
2037
  if (pfile->forced_token_location_p)
2038
    result->src_loc = *pfile->forced_token_location_p;
2039
  else
2040
    result->src_loc = linemap_position_for_column (pfile->line_table,
2041
                                          CPP_BUF_COLUMN (buffer, buffer->cur));
2042
 
2043
  switch (c)
2044
    {
2045
    case ' ': case '\t': case '\f': case '\v': case '\0':
2046
      result->flags |= PREV_WHITE;
2047
      skip_whitespace (pfile, c);
2048
      goto skipped_white;
2049
 
2050
    case '\n':
2051
      if (buffer->cur < buffer->rlimit)
2052
        CPP_INCREMENT_LINE (pfile, 0);
2053
      buffer->need_line = true;
2054
      goto fresh_line;
2055
 
2056
    case '0': case '1': case '2': case '3': case '4':
2057
    case '5': case '6': case '7': case '8': case '9':
2058
      {
2059
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2060
        result->type = CPP_NUMBER;
2061
        lex_number (pfile, &result->val.str, &nst);
2062
        warn_about_normalization (pfile, result, &nst);
2063
        break;
2064
      }
2065
 
2066
    case 'L':
2067
    case 'u':
2068
    case 'U':
2069
    case 'R':
2070
      /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2071
         wide strings or raw strings.  */
2072
      if (c == 'L' || CPP_OPTION (pfile, rliterals)
2073
          || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2074
        {
2075
          if ((*buffer->cur == '\'' && c != 'R')
2076
              || *buffer->cur == '"'
2077
              || (*buffer->cur == 'R'
2078
                  && c != 'R'
2079
                  && buffer->cur[1] == '"'
2080
                  && CPP_OPTION (pfile, rliterals))
2081
              || (*buffer->cur == '8'
2082
                  && c == 'u'
2083
                  && (buffer->cur[1] == '"'
2084
                      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2085
                          && CPP_OPTION (pfile, rliterals)))))
2086
            {
2087
              lex_string (pfile, result, buffer->cur - 1);
2088
              break;
2089
            }
2090
        }
2091
      /* Fall through.  */
2092
 
2093
    case '_':
2094
    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2095
    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2096
    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2097
    case 's': case 't':           case 'v': case 'w': case 'x':
2098
    case 'y': case 'z':
2099
    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2100
    case 'G': case 'H': case 'I': case 'J': case 'K':
2101
    case 'M': case 'N': case 'O': case 'P': case 'Q':
2102
    case 'S': case 'T':           case 'V': case 'W': case 'X':
2103
    case 'Y': case 'Z':
2104
      result->type = CPP_NAME;
2105
      {
2106
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2107
        result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2108
                                                &nst);
2109
        warn_about_normalization (pfile, result, &nst);
2110
      }
2111
 
2112
      /* Convert named operators to their proper types.  */
2113
      if (result->val.node.node->flags & NODE_OPERATOR)
2114
        {
2115
          result->flags |= NAMED_OP;
2116
          result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2117
        }
2118
      break;
2119
 
2120
    case '\'':
2121
    case '"':
2122
      lex_string (pfile, result, buffer->cur - 1);
2123
      break;
2124
 
2125
    case '/':
2126
      /* A potential block or line comment.  */
2127
      comment_start = buffer->cur;
2128
      c = *buffer->cur;
2129
 
2130
      if (c == '*')
2131
        {
2132
          if (_cpp_skip_block_comment (pfile))
2133
            cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2134
        }
2135
      else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
2136
                            || cpp_in_system_header (pfile)))
2137
        {
2138
          /* Warn about comments only if pedantically GNUC89, and not
2139
             in system headers.  */
2140
          if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
2141
              && ! buffer->warned_cplusplus_comments)
2142
            {
2143
              cpp_error (pfile, CPP_DL_PEDWARN,
2144
                         "C++ style comments are not allowed in ISO C90");
2145
              cpp_error (pfile, CPP_DL_PEDWARN,
2146
                         "(this will be reported only once per input file)");
2147
              buffer->warned_cplusplus_comments = 1;
2148
            }
2149
 
2150
          if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2151
            cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2152
        }
2153
      else if (c == '=')
2154
        {
2155
          buffer->cur++;
2156
          result->type = CPP_DIV_EQ;
2157
          break;
2158
        }
2159
      else
2160
        {
2161
          result->type = CPP_DIV;
2162
          break;
2163
        }
2164
 
2165
      if (!pfile->state.save_comments)
2166
        {
2167
          result->flags |= PREV_WHITE;
2168
          goto update_tokens_line;
2169
        }
2170
 
2171
      /* Save the comment as a token in its own right.  */
2172
      save_comment (pfile, result, comment_start, c);
2173
      break;
2174
 
2175
    case '<':
2176
      if (pfile->state.angled_headers)
2177
        {
2178
          lex_string (pfile, result, buffer->cur - 1);
2179
          if (result->type != CPP_LESS)
2180
            break;
2181
        }
2182
 
2183
      result->type = CPP_LESS;
2184
      if (*buffer->cur == '=')
2185
        buffer->cur++, result->type = CPP_LESS_EQ;
2186
      else if (*buffer->cur == '<')
2187
        {
2188
          buffer->cur++;
2189
          IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2190
        }
2191
      else if (CPP_OPTION (pfile, digraphs))
2192
        {
2193
          if (*buffer->cur == ':')
2194
            {
2195
              buffer->cur++;
2196
              result->flags |= DIGRAPH;
2197
              result->type = CPP_OPEN_SQUARE;
2198
            }
2199
          else if (*buffer->cur == '%')
2200
            {
2201
              buffer->cur++;
2202
              result->flags |= DIGRAPH;
2203
              result->type = CPP_OPEN_BRACE;
2204
            }
2205
        }
2206
      break;
2207
 
2208
    case '>':
2209
      result->type = CPP_GREATER;
2210
      if (*buffer->cur == '=')
2211
        buffer->cur++, result->type = CPP_GREATER_EQ;
2212
      else if (*buffer->cur == '>')
2213
        {
2214
          buffer->cur++;
2215
          IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2216
        }
2217
      break;
2218
 
2219
    case '%':
2220
      result->type = CPP_MOD;
2221
      if (*buffer->cur == '=')
2222
        buffer->cur++, result->type = CPP_MOD_EQ;
2223
      else if (CPP_OPTION (pfile, digraphs))
2224
        {
2225
          if (*buffer->cur == ':')
2226
            {
2227
              buffer->cur++;
2228
              result->flags |= DIGRAPH;
2229
              result->type = CPP_HASH;
2230
              if (*buffer->cur == '%' && buffer->cur[1] == ':')
2231
                buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2232
            }
2233
          else if (*buffer->cur == '>')
2234
            {
2235
              buffer->cur++;
2236
              result->flags |= DIGRAPH;
2237
              result->type = CPP_CLOSE_BRACE;
2238
            }
2239
        }
2240
      break;
2241
 
2242
    case '.':
2243
      result->type = CPP_DOT;
2244
      if (ISDIGIT (*buffer->cur))
2245
        {
2246
          struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2247
          result->type = CPP_NUMBER;
2248
          lex_number (pfile, &result->val.str, &nst);
2249
          warn_about_normalization (pfile, result, &nst);
2250
        }
2251
      else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2252
        buffer->cur += 2, result->type = CPP_ELLIPSIS;
2253
      else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2254
        buffer->cur++, result->type = CPP_DOT_STAR;
2255
      break;
2256
 
2257
    case '+':
2258
      result->type = CPP_PLUS;
2259
      if (*buffer->cur == '+')
2260
        buffer->cur++, result->type = CPP_PLUS_PLUS;
2261
      else if (*buffer->cur == '=')
2262
        buffer->cur++, result->type = CPP_PLUS_EQ;
2263
      break;
2264
 
2265
    case '-':
2266
      result->type = CPP_MINUS;
2267
      if (*buffer->cur == '>')
2268
        {
2269
          buffer->cur++;
2270
          result->type = CPP_DEREF;
2271
          if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2272
            buffer->cur++, result->type = CPP_DEREF_STAR;
2273
        }
2274
      else if (*buffer->cur == '-')
2275
        buffer->cur++, result->type = CPP_MINUS_MINUS;
2276
      else if (*buffer->cur == '=')
2277
        buffer->cur++, result->type = CPP_MINUS_EQ;
2278
      break;
2279
 
2280
    case '&':
2281
      result->type = CPP_AND;
2282
      if (*buffer->cur == '&')
2283
        buffer->cur++, result->type = CPP_AND_AND;
2284
      else if (*buffer->cur == '=')
2285
        buffer->cur++, result->type = CPP_AND_EQ;
2286
      break;
2287
 
2288
    case '|':
2289
      result->type = CPP_OR;
2290
      if (*buffer->cur == '|')
2291
        buffer->cur++, result->type = CPP_OR_OR;
2292
      else if (*buffer->cur == '=')
2293
        buffer->cur++, result->type = CPP_OR_EQ;
2294
      break;
2295
 
2296
    case ':':
2297
      result->type = CPP_COLON;
2298
      if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2299
        buffer->cur++, result->type = CPP_SCOPE;
2300
      else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2301
        {
2302
          buffer->cur++;
2303
          result->flags |= DIGRAPH;
2304
          result->type = CPP_CLOSE_SQUARE;
2305
        }
2306
      break;
2307
 
2308
    case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2309
    case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2310
    case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2311
    case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2312
    case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2313
 
2314
    case '?': result->type = CPP_QUERY; break;
2315
    case '~': result->type = CPP_COMPL; break;
2316
    case ',': result->type = CPP_COMMA; break;
2317
    case '(': result->type = CPP_OPEN_PAREN; break;
2318
    case ')': result->type = CPP_CLOSE_PAREN; break;
2319
    case '[': result->type = CPP_OPEN_SQUARE; break;
2320
    case ']': result->type = CPP_CLOSE_SQUARE; break;
2321
    case '{': result->type = CPP_OPEN_BRACE; break;
2322
    case '}': result->type = CPP_CLOSE_BRACE; break;
2323
    case ';': result->type = CPP_SEMICOLON; break;
2324
 
2325
      /* @ is a punctuator in Objective-C.  */
2326
    case '@': result->type = CPP_ATSIGN; break;
2327
 
2328
    case '$':
2329
    case '\\':
2330
      {
2331
        const uchar *base = --buffer->cur;
2332
        struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2333
 
2334
        if (forms_identifier_p (pfile, true, &nst))
2335
          {
2336
            result->type = CPP_NAME;
2337
            result->val.node.node = lex_identifier (pfile, base, true, &nst);
2338
            warn_about_normalization (pfile, result, &nst);
2339
            break;
2340
          }
2341
        buffer->cur++;
2342
      }
2343
 
2344
    default:
2345
      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2346
      break;
2347
    }
2348
 
2349
  return result;
2350
}
2351
 
2352
/* An upper bound on the number of bytes needed to spell TOKEN.
2353
   Does not include preceding whitespace.  */
2354
unsigned int
2355
cpp_token_len (const cpp_token *token)
2356
{
2357
  unsigned int len;
2358
 
2359
  switch (TOKEN_SPELL (token))
2360
    {
2361
    default:            len = 6;                                break;
2362
    case SPELL_LITERAL: len = token->val.str.len;               break;
2363
    case SPELL_IDENT:   len = NODE_LEN (token->val.node.node) * 10;     break;
2364
    }
2365
 
2366
  return len;
2367
}
2368
 
2369
/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2370
   Return the number of bytes read out of NAME.  (There are always
2371
   10 bytes written to BUFFER.)  */
2372
 
2373
static size_t
2374
utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2375
{
2376
  int j;
2377
  int ucn_len = 0;
2378
  int ucn_len_c;
2379
  unsigned t;
2380
  unsigned long utf32;
2381
 
2382
  /* Compute the length of the UTF-8 sequence.  */
2383
  for (t = *name; t & 0x80; t <<= 1)
2384
    ucn_len++;
2385
 
2386
  utf32 = *name & (0x7F >> ucn_len);
2387
  for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2388
    {
2389
      utf32 = (utf32 << 6) | (*++name & 0x3F);
2390
 
2391
      /* Ill-formed UTF-8.  */
2392
      if ((*name & ~0x3F) != 0x80)
2393
        abort ();
2394
    }
2395
 
2396
  *buffer++ = '\\';
2397
  *buffer++ = 'U';
2398
  for (j = 7; j >= 0; j--)
2399
    *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2400
  return ucn_len;
2401
}
2402
 
2403
/* Given a token TYPE corresponding to a digraph, return a pointer to
2404
   the spelling of the digraph.  */
2405
static const unsigned char *
2406
cpp_digraph2name (enum cpp_ttype type)
2407
{
2408
  return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2409
}
2410
 
2411
/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2412
   already contain the enough space to hold the token's spelling.
2413
   Returns a pointer to the character after the last character written.
2414
   FORSTRING is true if this is to be the spelling after translation
2415
   phase 1 (this is different for UCNs).
2416
   FIXME: Would be nice if we didn't need the PFILE argument.  */
2417
unsigned char *
2418
cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2419
                 unsigned char *buffer, bool forstring)
2420
{
2421
  switch (TOKEN_SPELL (token))
2422
    {
2423
    case SPELL_OPERATOR:
2424
      {
2425
        const unsigned char *spelling;
2426
        unsigned char c;
2427
 
2428
        if (token->flags & DIGRAPH)
2429
          spelling = cpp_digraph2name (token->type);
2430
        else if (token->flags & NAMED_OP)
2431
          goto spell_ident;
2432
        else
2433
          spelling = TOKEN_NAME (token);
2434
 
2435
        while ((c = *spelling++) != '\0')
2436
          *buffer++ = c;
2437
      }
2438
      break;
2439
 
2440
    spell_ident:
2441
    case SPELL_IDENT:
2442
      if (forstring)
2443
        {
2444
          memcpy (buffer, NODE_NAME (token->val.node.node),
2445
                  NODE_LEN (token->val.node.node));
2446
          buffer += NODE_LEN (token->val.node.node);
2447
        }
2448
      else
2449
        {
2450
          size_t i;
2451
          const unsigned char * name = NODE_NAME (token->val.node.node);
2452
 
2453
          for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2454
            if (name[i] & ~0x7F)
2455
              {
2456
                i += utf8_to_ucn (buffer, name + i) - 1;
2457
                buffer += 10;
2458
              }
2459
            else
2460
              *buffer++ = NODE_NAME (token->val.node.node)[i];
2461
        }
2462
      break;
2463
 
2464
    case SPELL_LITERAL:
2465
      memcpy (buffer, token->val.str.text, token->val.str.len);
2466
      buffer += token->val.str.len;
2467
      break;
2468
 
2469
    case SPELL_NONE:
2470
      cpp_error (pfile, CPP_DL_ICE,
2471
                 "unspellable token %s", TOKEN_NAME (token));
2472
      break;
2473
    }
2474
 
2475
  return buffer;
2476
}
2477
 
2478
/* Returns TOKEN spelt as a null-terminated string.  The string is
2479
   freed when the reader is destroyed.  Useful for diagnostics.  */
2480
unsigned char *
2481
cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2482
{
2483
  unsigned int len = cpp_token_len (token) + 1;
2484
  unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2485
 
2486
  end = cpp_spell_token (pfile, token, start, false);
2487
  end[0] = '\0';
2488
 
2489
  return start;
2490
}
2491
 
2492
/* Returns a pointer to a string which spells the token defined by
2493
   TYPE and FLAGS.  Used by C front ends, which really should move to
2494
   using cpp_token_as_text.  */
2495
const char *
2496
cpp_type2name (enum cpp_ttype type, unsigned char flags)
2497
{
2498
  if (flags & DIGRAPH)
2499
    return (const char *) cpp_digraph2name (type);
2500
  else if (flags & NAMED_OP)
2501
    return cpp_named_operator2name (type);
2502
 
2503
  return (const char *) token_spellings[type].name;
2504
}
2505
 
2506
/* Writes the spelling of token to FP, without any preceding space.
2507
   Separated from cpp_spell_token for efficiency - to avoid stdio
2508
   double-buffering.  */
2509
void
2510
cpp_output_token (const cpp_token *token, FILE *fp)
2511
{
2512
  switch (TOKEN_SPELL (token))
2513
    {
2514
    case SPELL_OPERATOR:
2515
      {
2516
        const unsigned char *spelling;
2517
        int c;
2518
 
2519
        if (token->flags & DIGRAPH)
2520
          spelling = cpp_digraph2name (token->type);
2521
        else if (token->flags & NAMED_OP)
2522
          goto spell_ident;
2523
        else
2524
          spelling = TOKEN_NAME (token);
2525
 
2526
        c = *spelling;
2527
        do
2528
          putc (c, fp);
2529
        while ((c = *++spelling) != '\0');
2530
      }
2531
      break;
2532
 
2533
    spell_ident:
2534
    case SPELL_IDENT:
2535
      {
2536
        size_t i;
2537
        const unsigned char * name = NODE_NAME (token->val.node.node);
2538
 
2539
        for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2540
          if (name[i] & ~0x7F)
2541
            {
2542
              unsigned char buffer[10];
2543
              i += utf8_to_ucn (buffer, name + i) - 1;
2544
              fwrite (buffer, 1, 10, fp);
2545
            }
2546
          else
2547
            fputc (NODE_NAME (token->val.node.node)[i], fp);
2548
      }
2549
      break;
2550
 
2551
    case SPELL_LITERAL:
2552
      fwrite (token->val.str.text, 1, token->val.str.len, fp);
2553
      break;
2554
 
2555
    case SPELL_NONE:
2556
      /* An error, most probably.  */
2557
      break;
2558
    }
2559
}
2560
 
2561
/* Compare two tokens.  */
2562
int
2563
_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2564
{
2565
  if (a->type == b->type && a->flags == b->flags)
2566
    switch (TOKEN_SPELL (a))
2567
      {
2568
      default:                  /* Keep compiler happy.  */
2569
      case SPELL_OPERATOR:
2570
        /* token_no is used to track where multiple consecutive ##
2571
           tokens were originally located.  */
2572
        return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2573
      case SPELL_NONE:
2574
        return (a->type != CPP_MACRO_ARG
2575
                || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
2576
      case SPELL_IDENT:
2577
        return a->val.node.node == b->val.node.node;
2578
      case SPELL_LITERAL:
2579
        return (a->val.str.len == b->val.str.len
2580
                && !memcmp (a->val.str.text, b->val.str.text,
2581
                            a->val.str.len));
2582
      }
2583
 
2584
  return 0;
2585
}
2586
 
2587
/* Returns nonzero if a space should be inserted to avoid an
2588
   accidental token paste for output.  For simplicity, it is
2589
   conservative, and occasionally advises a space where one is not
2590
   needed, e.g. "." and ".2".  */
2591
int
2592
cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2593
                 const cpp_token *token2)
2594
{
2595
  enum cpp_ttype a = token1->type, b = token2->type;
2596
  cppchar_t c;
2597
 
2598
  if (token1->flags & NAMED_OP)
2599
    a = CPP_NAME;
2600
  if (token2->flags & NAMED_OP)
2601
    b = CPP_NAME;
2602
 
2603
  c = EOF;
2604
  if (token2->flags & DIGRAPH)
2605
    c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2606
  else if (token_spellings[b].category == SPELL_OPERATOR)
2607
    c = token_spellings[b].name[0];
2608
 
2609
  /* Quickly get everything that can paste with an '='.  */
2610
  if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2611
    return 1;
2612
 
2613
  switch (a)
2614
    {
2615
    case CPP_GREATER:   return c == '>';
2616
    case CPP_LESS:      return c == '<' || c == '%' || c == ':';
2617
    case CPP_PLUS:      return c == '+';
2618
    case CPP_MINUS:     return c == '-' || c == '>';
2619
    case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
2620
    case CPP_MOD:       return c == ':' || c == '>';
2621
    case CPP_AND:       return c == '&';
2622
    case CPP_OR:        return c == '|';
2623
    case CPP_COLON:     return c == ':' || c == '>';
2624
    case CPP_DEREF:     return c == '*';
2625
    case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
2626
    case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
2627
    case CPP_NAME:      return ((b == CPP_NUMBER
2628
                                 && name_p (pfile, &token2->val.str))
2629
                                || b == CPP_NAME
2630
                                || b == CPP_CHAR || b == CPP_STRING); /* L */
2631
    case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
2632
                                || c == '.' || c == '+' || c == '-');
2633
                                      /* UCNs */
2634
    case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
2635
                                 && b == CPP_NAME)
2636
                                || (CPP_OPTION (pfile, objc)
2637
                                    && token1->val.str.text[0] == '@'
2638
                                    && (b == CPP_NAME || b == CPP_STRING)));
2639
    default:            break;
2640
    }
2641
 
2642
  return 0;
2643
}
2644
 
2645
/* Output all the remaining tokens on the current line, and a newline
2646
   character, to FP.  Leading whitespace is removed.  If there are
2647
   macros, special token padding is not performed.  */
2648
void
2649
cpp_output_line (cpp_reader *pfile, FILE *fp)
2650
{
2651
  const cpp_token *token;
2652
 
2653
  token = cpp_get_token (pfile);
2654
  while (token->type != CPP_EOF)
2655
    {
2656
      cpp_output_token (token, fp);
2657
      token = cpp_get_token (pfile);
2658
      if (token->flags & PREV_WHITE)
2659
        putc (' ', fp);
2660
    }
2661
 
2662
  putc ('\n', fp);
2663
}
2664
 
2665
/* Return a string representation of all the remaining tokens on the
2666
   current line.  The result is allocated using xmalloc and must be
2667
   freed by the caller.  */
2668
unsigned char *
2669
cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2670
{
2671
  const cpp_token *token;
2672
  unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2673
  unsigned int alloced = 120 + out;
2674
  unsigned char *result = (unsigned char *) xmalloc (alloced);
2675
 
2676
  /* If DIR_NAME is empty, there are no initial contents.  */
2677
  if (dir_name)
2678
    {
2679
      sprintf ((char *) result, "#%s ", dir_name);
2680
      out += 2;
2681
    }
2682
 
2683
  token = cpp_get_token (pfile);
2684
  while (token->type != CPP_EOF)
2685
    {
2686
      unsigned char *last;
2687
      /* Include room for a possible space and the terminating nul.  */
2688
      unsigned int len = cpp_token_len (token) + 2;
2689
 
2690
      if (out + len > alloced)
2691
        {
2692
          alloced *= 2;
2693
          if (out + len > alloced)
2694
            alloced = out + len;
2695
          result = (unsigned char *) xrealloc (result, alloced);
2696
        }
2697
 
2698
      last = cpp_spell_token (pfile, token, &result[out], 0);
2699
      out = last - result;
2700
 
2701
      token = cpp_get_token (pfile);
2702
      if (token->flags & PREV_WHITE)
2703
        result[out++] = ' ';
2704
    }
2705
 
2706
  result[out] = '\0';
2707
  return result;
2708
}
2709
 
2710
/* Memory buffers.  Changing these three constants can have a dramatic
2711
   effect on performance.  The values here are reasonable defaults,
2712
   but might be tuned.  If you adjust them, be sure to test across a
2713
   range of uses of cpplib, including heavy nested function-like macro
2714
   expansion.  Also check the change in peak memory usage (NJAMD is a
2715
   good tool for this).  */
2716
#define MIN_BUFF_SIZE 8000
2717
#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2718
#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2719
        (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2720
 
2721
#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2722
  #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2723
#endif
2724
 
2725
/* Create a new allocation buffer.  Place the control block at the end
2726
   of the buffer, so that buffer overflows will cause immediate chaos.  */
2727
static _cpp_buff *
2728
new_buff (size_t len)
2729
{
2730
  _cpp_buff *result;
2731
  unsigned char *base;
2732
 
2733
  if (len < MIN_BUFF_SIZE)
2734
    len = MIN_BUFF_SIZE;
2735
  len = CPP_ALIGN (len);
2736
 
2737
  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2738
  result = (_cpp_buff *) (base + len);
2739
  result->base = base;
2740
  result->cur = base;
2741
  result->limit = base + len;
2742
  result->next = NULL;
2743
  return result;
2744
}
2745
 
2746
/* Place a chain of unwanted allocation buffers on the free list.  */
2747
void
2748
_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2749
{
2750
  _cpp_buff *end = buff;
2751
 
2752
  while (end->next)
2753
    end = end->next;
2754
  end->next = pfile->free_buffs;
2755
  pfile->free_buffs = buff;
2756
}
2757
 
2758
/* Return a free buffer of size at least MIN_SIZE.  */
2759
_cpp_buff *
2760
_cpp_get_buff (cpp_reader *pfile, size_t min_size)
2761
{
2762
  _cpp_buff *result, **p;
2763
 
2764
  for (p = &pfile->free_buffs;; p = &(*p)->next)
2765
    {
2766
      size_t size;
2767
 
2768
      if (*p == NULL)
2769
        return new_buff (min_size);
2770
      result = *p;
2771
      size = result->limit - result->base;
2772
      /* Return a buffer that's big enough, but don't waste one that's
2773
         way too big.  */
2774
      if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2775
        break;
2776
    }
2777
 
2778
  *p = result->next;
2779
  result->next = NULL;
2780
  result->cur = result->base;
2781
  return result;
2782
}
2783
 
2784
/* Creates a new buffer with enough space to hold the uncommitted
2785
   remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2786
   the excess bytes to the new buffer.  Chains the new buffer after
2787
   BUFF, and returns the new buffer.  */
2788
_cpp_buff *
2789
_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2790
{
2791
  size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2792
  _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2793
 
2794
  buff->next = new_buff;
2795
  memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2796
  return new_buff;
2797
}
2798
 
2799
/* Creates a new buffer with enough space to hold the uncommitted
2800
   remaining bytes of the buffer pointed to by BUFF, and at least
2801
   MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2802
   Chains the new buffer before the buffer pointed to by BUFF, and
2803
   updates the pointer to point to the new buffer.  */
2804
void
2805
_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2806
{
2807
  _cpp_buff *new_buff, *old_buff = *pbuff;
2808
  size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2809
 
2810
  new_buff = _cpp_get_buff (pfile, size);
2811
  memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2812
  new_buff->next = old_buff;
2813
  *pbuff = new_buff;
2814
}
2815
 
2816
/* Free a chain of buffers starting at BUFF.  */
2817
void
2818
_cpp_free_buff (_cpp_buff *buff)
2819
{
2820
  _cpp_buff *next;
2821
 
2822
  for (; buff; buff = next)
2823
    {
2824
      next = buff->next;
2825
      free (buff->base);
2826
    }
2827
}
2828
 
2829
/* Allocate permanent, unaligned storage of length LEN.  */
2830
unsigned char *
2831
_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2832
{
2833
  _cpp_buff *buff = pfile->u_buff;
2834
  unsigned char *result = buff->cur;
2835
 
2836
  if (len > (size_t) (buff->limit - result))
2837
    {
2838
      buff = _cpp_get_buff (pfile, len);
2839
      buff->next = pfile->u_buff;
2840
      pfile->u_buff = buff;
2841
      result = buff->cur;
2842
    }
2843
 
2844
  buff->cur = result + len;
2845
  return result;
2846
}
2847
 
2848
/* Allocate permanent, unaligned storage of length LEN from a_buff.
2849
   That buffer is used for growing allocations when saving macro
2850
   replacement lists in a #define, and when parsing an answer to an
2851
   assertion in #assert, #unassert or #if (and therefore possibly
2852
   whilst expanding macros).  It therefore must not be used by any
2853
   code that they might call: specifically the lexer and the guts of
2854
   the macro expander.
2855
 
2856
   All existing other uses clearly fit this restriction: storing
2857
   registered pragmas during initialization.  */
2858
unsigned char *
2859
_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2860
{
2861
  _cpp_buff *buff = pfile->a_buff;
2862
  unsigned char *result = buff->cur;
2863
 
2864
  if (len > (size_t) (buff->limit - result))
2865
    {
2866
      buff = _cpp_get_buff (pfile, len);
2867
      buff->next = pfile->a_buff;
2868
      pfile->a_buff = buff;
2869
      result = buff->cur;
2870
    }
2871
 
2872
  buff->cur = result + len;
2873
  return result;
2874
}
2875
 
2876
/* Say which field of TOK is in use.  */
2877
 
2878
enum cpp_token_fld_kind
2879
cpp_token_val_index (cpp_token *tok)
2880
{
2881
  switch (TOKEN_SPELL (tok))
2882
    {
2883
    case SPELL_IDENT:
2884
      return CPP_TOKEN_FLD_NODE;
2885
    case SPELL_LITERAL:
2886
      return CPP_TOKEN_FLD_STR;
2887
    case SPELL_OPERATOR:
2888
      if (tok->type == CPP_PASTE)
2889
        return CPP_TOKEN_FLD_TOKEN_NO;
2890
      else
2891
        return CPP_TOKEN_FLD_NONE;
2892
    case SPELL_NONE:
2893
      if (tok->type == CPP_MACRO_ARG)
2894
        return CPP_TOKEN_FLD_ARG_NO;
2895
      else if (tok->type == CPP_PADDING)
2896
        return CPP_TOKEN_FLD_SOURCE;
2897
      else if (tok->type == CPP_PRAGMA)
2898
        return CPP_TOKEN_FLD_PRAGMA;
2899
      /* else fall through */
2900
    default:
2901
      return CPP_TOKEN_FLD_NONE;
2902
    }
2903
}
2904
 
2905
/* All tokens lexed in R after calling this function will be forced to have
2906
   their source_location the same as the location referenced by P, until
2907
   cpp_stop_forcing_token_locations is called for R.  */
2908
 
2909
void
2910
cpp_force_token_locations (cpp_reader *r, source_location *p)
2911
{
2912
  r->forced_token_location_p = p;
2913
}
2914
 
2915
/* Go back to assigning locations naturally for lexed tokens.  */
2916
 
2917
void
2918
cpp_stop_forcing_token_locations (cpp_reader *r)
2919
{
2920
  r->forced_token_location_p = NULL;
2921
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.