OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [go/] [gofrontend/] [lex.h] - Blame information for rev 714

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 714 jeremybenn
// lex.h -- Go frontend lexer.     -*- C++ -*-
2
 
3
// Copyright 2009 The Go Authors. All rights reserved.
4
// Use of this source code is governed by a BSD-style
5
// license that can be found in the LICENSE file.
6
 
7
#ifndef GO_LEX_H
8
#define GO_LEX_H
9
 
10
#include <gmp.h>
11
#include <mpfr.h>
12
 
13
#include "operator.h"
14
#include "go-linemap.h"
15
 
16
struct Unicode_range;
17
 
18
// The keywords.  These must be in sorted order, other than
19
// KEYWORD_INVALID.  They must match the Keywords::mapping_ array in
20
// lex.cc.
21
 
22
enum Keyword
23
{
24
  KEYWORD_INVALID,      // Not a keyword.
25
  KEYWORD_ASM,
26
  KEYWORD_BREAK,
27
  KEYWORD_CASE,
28
  KEYWORD_CHAN,
29
  KEYWORD_CONST,
30
  KEYWORD_CONTINUE,
31
  KEYWORD_DEFAULT,
32
  KEYWORD_DEFER,
33
  KEYWORD_ELSE,
34
  KEYWORD_FALLTHROUGH,
35
  KEYWORD_FOR,
36
  KEYWORD_FUNC,
37
  KEYWORD_GO,
38
  KEYWORD_GOTO,
39
  KEYWORD_IF,
40
  KEYWORD_IMPORT,
41
  KEYWORD_INTERFACE,
42
  KEYWORD_MAP,
43
  KEYWORD_PACKAGE,
44
  KEYWORD_RANGE,
45
  KEYWORD_RETURN,
46
  KEYWORD_SELECT,
47
  KEYWORD_STRUCT,
48
  KEYWORD_SWITCH,
49
  KEYWORD_TYPE,
50
  KEYWORD_VAR
51
};
52
 
53
// A token returned from the lexer.
54
 
55
class Token
56
{
57
 public:
58
  // Token classification.
59
  enum Classification
60
  {
61
    // Token is invalid.
62
    TOKEN_INVALID,
63
    // Token indicates end of input.
64
    TOKEN_EOF,
65
    // Token is a keyword.
66
    TOKEN_KEYWORD,
67
    // Token is an identifier.
68
    TOKEN_IDENTIFIER,
69
    // Token is a string of characters.
70
    TOKEN_STRING,
71
    // Token is an operator.
72
    TOKEN_OPERATOR,
73
    // Token is a character constant.
74
    TOKEN_CHARACTER,
75
    // Token is an integer.
76
    TOKEN_INTEGER,
77
    // Token is a floating point number.
78
    TOKEN_FLOAT,
79
    // Token is an imaginary number.
80
    TOKEN_IMAGINARY
81
  };
82
 
83
  ~Token();
84
  Token(const Token&);
85
  Token& operator=(const Token&);
86
 
87
  // Get token classification.
88
  Classification
89
  classification() const
90
  { return this->classification_; }
91
 
92
  // Make a token for an invalid value.
93
  static Token
94
  make_invalid_token(Location location)
95
  { return Token(TOKEN_INVALID, location); }
96
 
97
  // Make a token representing end of file.
98
  static Token
99
  make_eof_token(Location location)
100
  { return Token(TOKEN_EOF, location); }
101
 
102
  // Make a keyword token.
103
  static Token
104
  make_keyword_token(Keyword keyword, Location location)
105
  {
106
    Token tok(TOKEN_KEYWORD, location);
107
    tok.u_.keyword = keyword;
108
    return tok;
109
  }
110
 
111
  // Make an identifier token.
112
  static Token
113
  make_identifier_token(const std::string& value, bool is_exported,
114
                        Location location)
115
  {
116
    Token tok(TOKEN_IDENTIFIER, location);
117
    tok.u_.identifier_value.name = new std::string(value);
118
    tok.u_.identifier_value.is_exported = is_exported;
119
    return tok;
120
  }
121
 
122
  // Make a quoted string token.
123
  static Token
124
  make_string_token(const std::string& value, Location location)
125
  {
126
    Token tok(TOKEN_STRING, location);
127
    tok.u_.string_value = new std::string(value);
128
    return tok;
129
  }
130
 
131
  // Make an operator token.
132
  static Token
133
  make_operator_token(Operator op, Location location)
134
  {
135
    Token tok(TOKEN_OPERATOR, location);
136
    tok.u_.op = op;
137
    return tok;
138
  }
139
 
140
  // Make a character constant token.
141
  static Token
142
  make_character_token(mpz_t val, Location location)
143
  {
144
    Token tok(TOKEN_CHARACTER, location);
145
    mpz_init(tok.u_.integer_value);
146
    mpz_swap(tok.u_.integer_value, val);
147
    return tok;
148
  }
149
 
150
  // Make an integer token.
151
  static Token
152
  make_integer_token(mpz_t val, Location location)
153
  {
154
    Token tok(TOKEN_INTEGER, location);
155
    mpz_init(tok.u_.integer_value);
156
    mpz_swap(tok.u_.integer_value, val);
157
    return tok;
158
  }
159
 
160
  // Make a float token.
161
  static Token
162
  make_float_token(mpfr_t val, Location location)
163
  {
164
    Token tok(TOKEN_FLOAT, location);
165
    mpfr_init(tok.u_.float_value);
166
    mpfr_swap(tok.u_.float_value, val);
167
    return tok;
168
  }
169
 
170
  // Make a token for an imaginary number.
171
  static Token
172
  make_imaginary_token(mpfr_t val, Location location)
173
  {
174
    Token tok(TOKEN_IMAGINARY, location);
175
    mpfr_init(tok.u_.float_value);
176
    mpfr_swap(tok.u_.float_value, val);
177
    return tok;
178
  }
179
 
180
  // Get the location of the token.
181
  Location
182
  location() const
183
  { return this->location_; }
184
 
185
  // Return whether this is an invalid token.
186
  bool
187
  is_invalid() const
188
  { return this->classification_ == TOKEN_INVALID; }
189
 
190
  // Return whether this is the EOF token.
191
  bool
192
  is_eof() const
193
  { return this->classification_ == TOKEN_EOF; }
194
 
195
  // Return the keyword value for a keyword token.
196
  Keyword
197
  keyword() const
198
  {
199
    go_assert(this->classification_ == TOKEN_KEYWORD);
200
    return this->u_.keyword;
201
  }
202
 
203
  // Return whether this is an identifier.
204
  bool
205
  is_identifier() const
206
  { return this->classification_ == TOKEN_IDENTIFIER; }
207
 
208
  // Return the identifier.
209
  const std::string&
210
  identifier() const
211
  {
212
    go_assert(this->classification_ == TOKEN_IDENTIFIER);
213
    return *this->u_.identifier_value.name;
214
  }
215
 
216
  // Return whether the identifier is exported.
217
  bool
218
  is_identifier_exported() const
219
  {
220
    go_assert(this->classification_ == TOKEN_IDENTIFIER);
221
    return this->u_.identifier_value.is_exported;
222
  }
223
 
224
  // Return whether this is a string.
225
  bool
226
  is_string() const
227
  {
228
    return this->classification_ == TOKEN_STRING;
229
  }
230
 
231
  // Return the value of a string.  The returned value is a string of
232
  // UTF-8 characters.
233
  std::string
234
  string_value() const
235
  {
236
    go_assert(this->classification_ == TOKEN_STRING);
237
    return *this->u_.string_value;
238
  }
239
 
240
  // Return the value of a character constant.
241
  const mpz_t*
242
  character_value() const
243
  {
244
    go_assert(this->classification_ == TOKEN_CHARACTER);
245
    return &this->u_.integer_value;
246
  }
247
 
248
  // Return the value of an integer.
249
  const mpz_t*
250
  integer_value() const
251
  {
252
    go_assert(this->classification_ == TOKEN_INTEGER);
253
    return &this->u_.integer_value;
254
  }
255
 
256
  // Return the value of a float.
257
  const mpfr_t*
258
  float_value() const
259
  {
260
    go_assert(this->classification_ == TOKEN_FLOAT);
261
    return &this->u_.float_value;
262
  }
263
 
264
  // Return the value of an imaginary number.
265
  const mpfr_t*
266
  imaginary_value() const
267
  {
268
    go_assert(this->classification_ == TOKEN_IMAGINARY);
269
    return &this->u_.float_value;
270
  }
271
 
272
  // Return the operator value for an operator token.
273
  Operator
274
  op() const
275
  {
276
    go_assert(this->classification_ == TOKEN_OPERATOR);
277
    return this->u_.op;
278
  }
279
 
280
  // Return whether this token is KEYWORD.
281
  bool
282
  is_keyword(Keyword keyword) const
283
  {
284
    return (this->classification_ == TOKEN_KEYWORD
285
            && this->u_.keyword == keyword);
286
  }
287
 
288
  // Return whether this token is OP.
289
  bool
290
  is_op(Operator op) const
291
  { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
292
 
293
  // Print the token for debugging.
294
  void
295
  print(FILE*) const;
296
 
297
 private:
298
  // Private constructor used by make_..._token functions above.
299
  Token(Classification, Location);
300
 
301
  // Clear the token.
302
  void
303
  clear();
304
 
305
  // The token classification.
306
  Classification classification_;
307
  union
308
  {
309
    // The keyword value for TOKEN_KEYWORD.
310
    Keyword keyword;
311
    // The token value for TOKEN_IDENTIFIER.
312
    struct
313
    {
314
      // The name of the identifier.  This has been mangled to only
315
      // include ASCII characters.
316
      std::string* name;
317
      // Whether this name should be exported.  This is true if the
318
      // first letter in the name is upper case.
319
      bool is_exported;
320
    } identifier_value;
321
    // The string value for TOKEN_STRING.
322
    std::string* string_value;
323
    // The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
324
    mpz_t integer_value;
325
    // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
326
    mpfr_t float_value;
327
    // The token value for TOKEN_OPERATOR or the keyword value
328
    Operator op;
329
  } u_;
330
  // The source location.
331
  Location location_;
332
};
333
 
334
// The lexer itself.
335
 
336
class Lex
337
{
338
 public:
339
  Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
340
 
341
  ~Lex();
342
 
343
  // Return the next token.
344
  Token
345
  next_token();
346
 
347
  // Return the contents of any current //extern comment.
348
  const std::string&
349
  extern_name() const
350
  { return this->extern_; }
351
 
352
  // Return whether the identifier NAME should be exported.  NAME is a
353
  // mangled name which includes only ASCII characters.
354
  static bool
355
  is_exported_name(const std::string& name);
356
 
357
  // Return whether the identifier NAME is invalid.  When we see an
358
  // invalid character we still build an identifier, but we use a
359
  // magic string to indicate that the identifier is invalid.  We then
360
  // use this to avoid knockon errors.
361
  static bool
362
  is_invalid_identifier(const std::string& name);
363
 
364
  // A helper function.  Append V to STR.  IS_CHARACTER is true if V
365
  // is a Unicode character which should be converted into UTF-8,
366
  // false if it is a byte value to be appended directly.  The
367
  // location is used to warn about an out of range character.
368
  static void
369
  append_char(unsigned int v, bool is_charater, std::string* str,
370
              Location);
371
 
372
  // A helper function.  Fetch a UTF-8 character from STR and store it
373
  // in *VALUE.  Return the number of bytes read from STR.  Return 0
374
  // if STR does not point to a valid UTF-8 character.
375
  static int
376
  fetch_char(const char* str, unsigned int *value);
377
 
378
 private:
379
  ssize_t
380
  get_line();
381
 
382
  bool
383
  require_line();
384
 
385
  // The current location.
386
  Location
387
  location() const;
388
 
389
  // A position CHARS column positions before the current location.
390
  Location
391
  earlier_location(int chars) const;
392
 
393
  static bool
394
  is_hex_digit(char);
395
 
396
  static unsigned char
397
  octal_value(char c)
398
  { return c - '0'; }
399
 
400
  Token
401
  make_invalid_token()
402
  { return Token::make_invalid_token(this->location()); }
403
 
404
  Token
405
  make_eof_token()
406
  { return Token::make_eof_token(this->location()); }
407
 
408
  Token
409
  make_operator(Operator op, int chars)
410
  { return Token::make_operator_token(op, this->earlier_location(chars)); }
411
 
412
  Token
413
  gather_identifier();
414
 
415
  static bool
416
  could_be_exponent(const char*, const char*);
417
 
418
  Token
419
  gather_number();
420
 
421
  Token
422
  gather_character();
423
 
424
  Token
425
  gather_string();
426
 
427
  Token
428
  gather_raw_string();
429
 
430
  const char*
431
  advance_one_utf8_char(const char*, unsigned int*, bool*);
432
 
433
  const char*
434
  advance_one_char(const char*, bool, unsigned int*, bool*);
435
 
436
  static bool
437
  is_unicode_digit(unsigned int c);
438
 
439
  static bool
440
  is_unicode_letter(unsigned int c);
441
 
442
  static bool
443
  is_unicode_uppercase(unsigned int c);
444
 
445
  static bool
446
  is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
447
                      size_t range_size);
448
 
449
  Operator
450
  three_character_operator(char, char, char);
451
 
452
  Operator
453
  two_character_operator(char, char);
454
 
455
  Operator
456
  one_character_operator(char);
457
 
458
  bool
459
  skip_c_comment();
460
 
461
  void
462
  skip_cpp_comment();
463
 
464
  // The input file name.
465
  const char* input_file_name_;
466
  // The input file.
467
  FILE* input_file_;
468
  // The object used to keep track of file names and line numbers.
469
  Linemap* linemap_;
470
  // The line buffer.  This holds the current line.
471
  char* linebuf_;
472
  // The size of the line buffer.
473
  size_t linebufsize_;
474
  // The nmber of characters in the current line.
475
  size_t linesize_;
476
  // The current offset in linebuf_.
477
  size_t lineoff_;
478
  // The current line number.
479
  size_t lineno_;
480
  // Whether to add a semicolon if we see a newline now.
481
  bool add_semi_at_eol_;
482
  // The external name to use for a function declaration, from a magic
483
  // //extern comment.
484
  std::string extern_;
485
};
486
 
487
#endif // !defined(GO_LEX_H)

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.