OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libstdc++-v3/] [include/] [bits/] [regex_compiler.h] - Blame information for rev 742

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 742 jeremybenn
// class template regex -*- C++ -*-
2
 
3
// Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4
//
5
// This file is part of the GNU ISO C++ Library.  This library is free
6
// software; you can redistribute it and/or modify it under the
7
// terms of the GNU General Public License as published by the
8
// Free Software Foundation; either version 3, or (at your option)
9
// any later version.
10
 
11
// This library is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
// GNU General Public License for more details.
15
 
16
// Under Section 7 of GPL version 3, you are granted additional
17
// permissions described in the GCC Runtime Library Exception, version
18
// 3.1, as published by the Free Software Foundation.
19
 
20
// You should have received a copy of the GNU General Public License and
21
// a copy of the GCC Runtime Library Exception along with this program;
22
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
// <http://www.gnu.org/licenses/>.
24
 
25
/**
26
 *  @file bits/regex_compiler.h
27
 *  This is an internal header file, included by other library headers.
28
 *  Do not attempt to use it directly. @headername{regex}
29
 */
30
 
31
namespace std _GLIBCXX_VISIBILITY(default)
32
{
33
namespace __regex
34
{
35
_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
 
37
  struct _Scanner_base
38
  {
39
    typedef unsigned int _StateT;
40
 
41
    static constexpr _StateT _S_state_at_start    = 1 << 0;
42
    static constexpr _StateT _S_state_in_brace    = 1 << 2;
43
    static constexpr _StateT _S_state_in_bracket  = 1 << 3;
44
 
45
    virtual ~_Scanner_base() { };
46
  };
47
 
48
  //
49
  // @brief Scans an input range for regex tokens.
50
  //
51
  // The %_Scanner class interprets the regular expression pattern in the input
52
  // range passed to its constructor as a sequence of parse tokens passed to
53
  // the regular expression compiler.  The sequence of tokens provided depends
54
  // on the flag settings passed to the constructor:  different regular
55
  // expression grammars will interpret the same input pattern in
56
  // syntactically different ways.
57
  //
58
  template<typename _InputIterator>
59
    class _Scanner: public _Scanner_base
60
    {
61
    public:
62
      typedef _InputIterator                                        _IteratorT;
63
      typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
64
      typedef std::basic_string<_CharT>                             _StringT;
65
      typedef regex_constants::syntax_option_type                   _FlagT;
66
      typedef const std::ctype<_CharT>                              _CtypeT;
67
 
68
      // Token types returned from the scanner.
69
      enum _TokenT
70
      {
71
        _S_token_anychar,
72
        _S_token_backref,
73
        _S_token_bracket_begin,
74
        _S_token_bracket_end,
75
        _S_token_inverse_class,
76
        _S_token_char_class_name,
77
        _S_token_closure0,
78
        _S_token_closure1,
79
        _S_token_collelem_multi,
80
        _S_token_collelem_single,
81
        _S_token_collsymbol,
82
        _S_token_comma,
83
        _S_token_dash,
84
        _S_token_dup_count,
85
        _S_token_eof,
86
        _S_token_equiv_class_name,
87
        _S_token_interval_begin,
88
        _S_token_interval_end,
89
        _S_token_line_begin,
90
        _S_token_line_end,
91
        _S_token_opt,
92
        _S_token_or,
93
        _S_token_ord_char,
94
        _S_token_quoted_char,
95
        _S_token_subexpr_begin,
96
        _S_token_subexpr_end,
97
        _S_token_word_begin,
98
        _S_token_word_end,
99
        _S_token_unknown
100
      };
101
 
102
    public:
103
      _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
104
               std::locale __loc)
105
      : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
106
        _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
107
      { _M_advance(); }
108
 
109
      void
110
      _M_advance();
111
 
112
      _TokenT
113
      _M_token() const
114
      { return _M_curToken; }
115
 
116
      const _StringT&
117
      _M_value() const
118
      { return _M_curValue; }
119
 
120
#ifdef _GLIBCXX_DEBUG
121
      std::ostream&
122
      _M_print(std::ostream&);
123
#endif
124
 
125
    private:
126
      void
127
      _M_eat_escape();
128
 
129
      void
130
      _M_scan_in_brace();
131
 
132
      void
133
      _M_scan_in_bracket();
134
 
135
      void
136
      _M_eat_charclass();
137
 
138
      void
139
      _M_eat_equivclass();
140
 
141
      void
142
      _M_eat_collsymbol();
143
 
144
    private:
145
      _IteratorT  _M_current;
146
      _IteratorT  _M_end;
147
      _FlagT      _M_flags;
148
      _CtypeT&    _M_ctype;
149
      _TokenT     _M_curToken;
150
      _StringT    _M_curValue;
151
      _StateT     _M_state;
152
    };
153
 
154
  template<typename _InputIterator>
155
    void
156
    _Scanner<_InputIterator>::
157
    _M_advance()
158
    {
159
      if (_M_current == _M_end)
160
        {
161
          _M_curToken = _S_token_eof;
162
          return;
163
        }
164
 
165
      _CharT __c = *_M_current;
166
      if (_M_state & _S_state_in_bracket)
167
        {
168
          _M_scan_in_bracket();
169
          return;
170
        }
171
      if (_M_state & _S_state_in_brace)
172
        {
173
          _M_scan_in_brace();
174
          return;
175
        }
176
#if 0
177
      // TODO: re-enable line anchors when _M_assertion is implemented.
178
      // See PR libstdc++/47724
179
      else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
180
        {
181
          _M_curToken = _S_token_line_begin;
182
          ++_M_current;
183
          return;
184
        }
185
      else if (__c == _M_ctype.widen('$'))
186
        {
187
          _M_curToken = _S_token_line_end;
188
          ++_M_current;
189
          return;
190
        }
191
#endif
192
      else if (__c == _M_ctype.widen('.'))
193
        {
194
          _M_curToken = _S_token_anychar;
195
          ++_M_current;
196
          return;
197
        }
198
      else if (__c == _M_ctype.widen('*'))
199
        {
200
          _M_curToken = _S_token_closure0;
201
          ++_M_current;
202
          return;
203
        }
204
      else if (__c == _M_ctype.widen('+'))
205
        {
206
          _M_curToken = _S_token_closure1;
207
          ++_M_current;
208
          return;
209
        }
210
      else if (__c == _M_ctype.widen('|'))
211
        {
212
          _M_curToken = _S_token_or;
213
          ++_M_current;
214
          return;
215
        }
216
      else if (__c == _M_ctype.widen('['))
217
        {
218
          _M_curToken = _S_token_bracket_begin;
219
          _M_state |= (_S_state_in_bracket | _S_state_at_start);
220
          ++_M_current;
221
          return;
222
        }
223
      else if (__c == _M_ctype.widen('\\'))
224
        {
225
          _M_eat_escape();
226
          return;
227
        }
228
      else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
229
        {
230
          if (__c == _M_ctype.widen('('))
231
            {
232
              _M_curToken = _S_token_subexpr_begin;
233
              ++_M_current;
234
              return;
235
            }
236
          else if (__c == _M_ctype.widen(')'))
237
            {
238
              _M_curToken = _S_token_subexpr_end;
239
              ++_M_current;
240
              return;
241
            }
242
          else if (__c == _M_ctype.widen('{'))
243
            {
244
              _M_curToken = _S_token_interval_begin;
245
              _M_state |= _S_state_in_brace;
246
              ++_M_current;
247
              return;
248
            }
249
        }
250
 
251
      _M_curToken = _S_token_ord_char;
252
      _M_curValue.assign(1, __c);
253
      ++_M_current;
254
    }
255
 
256
 
257
  template<typename _InputIterator>
258
    void
259
    _Scanner<_InputIterator>::
260
    _M_scan_in_brace()
261
    {
262
      if (_M_ctype.is(_CtypeT::digit, *_M_current))
263
        {
264
          _M_curToken = _S_token_dup_count;
265
          _M_curValue.assign(1, *_M_current);
266
          ++_M_current;
267
          while (_M_current != _M_end
268
                 && _M_ctype.is(_CtypeT::digit, *_M_current))
269
            {
270
              _M_curValue += *_M_current;
271
              ++_M_current;
272
            }
273
          return;
274
        }
275
      else if (*_M_current == _M_ctype.widen(','))
276
        {
277
          _M_curToken = _S_token_comma;
278
          ++_M_current;
279
          return;
280
        }
281
      if (_M_flags & (regex_constants::basic | regex_constants::grep))
282
        {
283
          if (*_M_current == _M_ctype.widen('\\'))
284
            _M_eat_escape();
285
        }
286
      else
287
        {
288
          if (*_M_current == _M_ctype.widen('}'))
289
            {
290
              _M_curToken = _S_token_interval_end;
291
              _M_state &= ~_S_state_in_brace;
292
              ++_M_current;
293
              return;
294
            }
295
        }
296
    }
297
 
298
  template<typename _InputIterator>
299
    void
300
    _Scanner<_InputIterator>::
301
    _M_scan_in_bracket()
302
    {
303
      if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
304
        {
305
          _M_curToken = _S_token_inverse_class;
306
          _M_state &= ~_S_state_at_start;
307
          ++_M_current;
308
          return;
309
        }
310
      else if (*_M_current == _M_ctype.widen('['))
311
        {
312
          ++_M_current;
313
          if (_M_current == _M_end)
314
            {
315
              _M_curToken = _S_token_eof;
316
              return;
317
            }
318
 
319
          if (*_M_current == _M_ctype.widen('.'))
320
            {
321
              _M_curToken = _S_token_collsymbol;
322
              _M_eat_collsymbol();
323
              return;
324
            }
325
          else if (*_M_current == _M_ctype.widen(':'))
326
            {
327
              _M_curToken = _S_token_char_class_name;
328
              _M_eat_charclass();
329
              return;
330
            }
331
          else if (*_M_current == _M_ctype.widen('='))
332
            {
333
              _M_curToken = _S_token_equiv_class_name;
334
              _M_eat_equivclass();
335
              return;
336
            }
337
        }
338
      else if (*_M_current == _M_ctype.widen('-'))
339
        {
340
          _M_curToken = _S_token_dash;
341
          ++_M_current;
342
          return;
343
        }
344
      else if (*_M_current == _M_ctype.widen(']'))
345
        {
346
          if (!(_M_flags & regex_constants::ECMAScript)
347
              || !(_M_state & _S_state_at_start))
348
            {
349
              // special case: only if  _not_ chr first after
350
              // '[' or '[^' and if not ECMAscript
351
              _M_curToken = _S_token_bracket_end;
352
              ++_M_current;
353
              return;
354
            }
355
        }
356
      _M_curToken = _S_token_collelem_single;
357
      _M_curValue.assign(1, *_M_current);
358
      ++_M_current;
359
    }
360
 
361
  template<typename _InputIterator>
362
    void
363
    _Scanner<_InputIterator>::
364
    _M_eat_escape()
365
    {
366
      ++_M_current;
367
      if (_M_current == _M_end)
368
        {
369
          _M_curToken = _S_token_eof;
370
          return;
371
        }
372
      _CharT __c = *_M_current;
373
      ++_M_current;
374
 
375
      if (__c == _M_ctype.widen('('))
376
        {
377
          if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
378
            {
379
              _M_curToken = _S_token_ord_char;
380
              _M_curValue.assign(1, __c);
381
            }
382
          else
383
            _M_curToken = _S_token_subexpr_begin;
384
        }
385
      else if (__c == _M_ctype.widen(')'))
386
        {
387
          if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
388
            {
389
              _M_curToken = _S_token_ord_char;
390
              _M_curValue.assign(1, __c);
391
            }
392
          else
393
            _M_curToken = _S_token_subexpr_end;
394
        }
395
      else if (__c == _M_ctype.widen('{'))
396
        {
397
          if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
398
            {
399
              _M_curToken = _S_token_ord_char;
400
              _M_curValue.assign(1, __c);
401
            }
402
          else
403
            {
404
              _M_curToken = _S_token_interval_begin;
405
              _M_state |= _S_state_in_brace;
406
            }
407
        }
408
      else if (__c == _M_ctype.widen('}'))
409
        {
410
          if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
411
            {
412
              _M_curToken = _S_token_ord_char;
413
              _M_curValue.assign(1, __c);
414
            }
415
          else
416
            {
417
              if (!(_M_state && _S_state_in_brace))
418
                __throw_regex_error(regex_constants::error_badbrace);
419
              _M_state &= ~_S_state_in_brace;
420
              _M_curToken = _S_token_interval_end;
421
            }
422
        }
423
      else if (__c == _M_ctype.widen('x'))
424
        {
425
          ++_M_current;
426
          if (_M_current == _M_end)
427
            {
428
              _M_curToken = _S_token_eof;
429
              return;
430
            }
431
          if (_M_ctype.is(_CtypeT::digit, *_M_current))
432
            {
433
              _M_curValue.assign(1, *_M_current);
434
              ++_M_current;
435
              if (_M_current == _M_end)
436
                {
437
                  _M_curToken = _S_token_eof;
438
                  return;
439
                }
440
              if (_M_ctype.is(_CtypeT::digit, *_M_current))
441
                {
442
                  _M_curValue += *_M_current;
443
                  ++_M_current;
444
                  return;
445
                }
446
            }
447
        }
448
      else if (__c == _M_ctype.widen('^')
449
               || __c == _M_ctype.widen('.')
450
               || __c == _M_ctype.widen('*')
451
               || __c == _M_ctype.widen('$')
452
               || __c == _M_ctype.widen('\\'))
453
        {
454
          _M_curToken = _S_token_ord_char;
455
          _M_curValue.assign(1, __c);
456
        }
457
      else if (_M_ctype.is(_CtypeT::digit, __c))
458
        {
459
          _M_curToken = _S_token_backref;
460
          _M_curValue.assign(1, __c);
461
        }
462
      else
463
        __throw_regex_error(regex_constants::error_escape);
464
    }
465
 
466
 
467
  // Eats a character class or throwns an exception.
468
  // current point to ':' delimiter on entry, char after ']' on return
469
  template<typename _InputIterator>
470
    void
471
    _Scanner<_InputIterator>::
472
    _M_eat_charclass()
473
    {
474
      ++_M_current; // skip ':'
475
      if (_M_current == _M_end)
476
        __throw_regex_error(regex_constants::error_ctype);
477
      for (_M_curValue.clear();
478
           _M_current != _M_end && *_M_current != _M_ctype.widen(':');
479
           ++_M_current)
480
        _M_curValue += *_M_current;
481
      if (_M_current == _M_end)
482
        __throw_regex_error(regex_constants::error_ctype);
483
      ++_M_current; // skip ':'
484
      if (*_M_current != _M_ctype.widen(']'))
485
        __throw_regex_error(regex_constants::error_ctype);
486
      ++_M_current; // skip ']'
487
    }
488
 
489
 
490
  template<typename _InputIterator>
491
    void
492
    _Scanner<_InputIterator>::
493
    _M_eat_equivclass()
494
    {
495
      ++_M_current; // skip '='
496
      if (_M_current == _M_end)
497
        __throw_regex_error(regex_constants::error_collate);
498
      for (_M_curValue.clear();
499
           _M_current != _M_end && *_M_current != _M_ctype.widen('=');
500
           ++_M_current)
501
        _M_curValue += *_M_current;
502
      if (_M_current == _M_end)
503
        __throw_regex_error(regex_constants::error_collate);
504
      ++_M_current; // skip '='
505
      if (*_M_current != _M_ctype.widen(']'))
506
        __throw_regex_error(regex_constants::error_collate);
507
      ++_M_current; // skip ']'
508
    }
509
 
510
 
511
  template<typename _InputIterator>
512
    void
513
    _Scanner<_InputIterator>::
514
    _M_eat_collsymbol()
515
    {
516
      ++_M_current; // skip '.'
517
      if (_M_current == _M_end)
518
        __throw_regex_error(regex_constants::error_collate);
519
      for (_M_curValue.clear();
520
           _M_current != _M_end && *_M_current != _M_ctype.widen('.');
521
           ++_M_current)
522
        _M_curValue += *_M_current;
523
      if (_M_current == _M_end)
524
        __throw_regex_error(regex_constants::error_collate);
525
      ++_M_current; // skip '.'
526
      if (*_M_current != _M_ctype.widen(']'))
527
        __throw_regex_error(regex_constants::error_collate);
528
      ++_M_current; // skip ']'
529
    }
530
 
531
#ifdef _GLIBCXX_DEBUG
532
  template<typename _InputIterator>
533
    std::ostream&
534
    _Scanner<_InputIterator>::
535
    _M_print(std::ostream& ostr)
536
    {
537
      switch (_M_curToken)
538
      {
539
        case _S_token_anychar:
540
          ostr << "any-character\n";
541
          break;
542
        case _S_token_backref:
543
          ostr << "backref\n";
544
          break;
545
        case _S_token_bracket_begin:
546
          ostr << "bracket-begin\n";
547
          break;
548
        case _S_token_bracket_end:
549
          ostr << "bracket-end\n";
550
          break;
551
        case _S_token_char_class_name:
552
          ostr << "char-class-name \"" << _M_curValue << "\"\n";
553
          break;
554
        case _S_token_closure0:
555
          ostr << "closure0\n";
556
          break;
557
        case _S_token_closure1:
558
          ostr << "closure1\n";
559
          break;
560
        case _S_token_collelem_multi:
561
          ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
562
          break;
563
        case _S_token_collelem_single:
564
          ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
565
          break;
566
        case _S_token_collsymbol:
567
          ostr << "collsymbol \"" << _M_curValue << "\"\n";
568
          break;
569
        case _S_token_comma:
570
          ostr << "comma\n";
571
          break;
572
        case _S_token_dash:
573
          ostr << "dash\n";
574
          break;
575
        case _S_token_dup_count:
576
          ostr << "dup count: " << _M_curValue << "\n";
577
          break;
578
        case _S_token_eof:
579
          ostr << "EOF\n";
580
          break;
581
        case _S_token_equiv_class_name:
582
          ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
583
          break;
584
        case _S_token_interval_begin:
585
          ostr << "interval begin\n";
586
          break;
587
        case _S_token_interval_end:
588
          ostr << "interval end\n";
589
          break;
590
        case _S_token_line_begin:
591
          ostr << "line begin\n";
592
          break;
593
        case _S_token_line_end:
594
          ostr << "line end\n";
595
          break;
596
        case _S_token_opt:
597
          ostr << "opt\n";
598
          break;
599
        case _S_token_or:
600
          ostr << "or\n";
601
          break;
602
        case _S_token_ord_char:
603
          ostr << "ordinary character: \"" << _M_value() << "\"\n";
604
          break;
605
        case _S_token_quoted_char:
606
          ostr << "quoted char\n";
607
          break;
608
        case _S_token_subexpr_begin:
609
          ostr << "subexpr begin\n";
610
          break;
611
        case _S_token_subexpr_end:
612
          ostr << "subexpr end\n";
613
          break;
614
        case _S_token_word_begin:
615
          ostr << "word begin\n";
616
          break;
617
        case _S_token_word_end:
618
          ostr << "word end\n";
619
          break;
620
        case _S_token_unknown:
621
          ostr << "-- unknown token --\n";
622
          break;
623
      }
624
      return ostr;
625
    }
626
#endif
627
 
628
  // Builds an NFA from an input iterator interval.
629
  template<typename _InIter, typename _TraitsT>
630
    class _Compiler
631
    {
632
    public:
633
      typedef _InIter                                            _IterT;
634
      typedef typename std::iterator_traits<_InIter>::value_type _CharT;
635
      typedef std::basic_string<_CharT>                          _StringT;
636
      typedef regex_constants::syntax_option_type                _FlagT;
637
 
638
    public:
639
      _Compiler(const _InIter& __b, const _InIter& __e,
640
                _TraitsT& __traits, _FlagT __flags);
641
 
642
      const _Nfa&
643
      _M_nfa() const
644
      { return _M_state_store; }
645
 
646
    private:
647
      typedef _Scanner<_InIter>                              _ScannerT;
648
      typedef typename _ScannerT::_TokenT                    _TokenT;
649
      typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
650
      typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
651
 
652
      // accepts a specific token or returns false.
653
      bool
654
      _M_match_token(_TokenT __token);
655
 
656
      void
657
      _M_disjunction();
658
 
659
      bool
660
      _M_alternative();
661
 
662
      bool
663
      _M_term();
664
 
665
      bool
666
      _M_assertion();
667
 
668
      bool
669
      _M_quantifier();
670
 
671
      bool
672
      _M_atom();
673
 
674
      bool
675
      _M_bracket_expression();
676
 
677
      bool
678
      _M_bracket_list(_RMatcherT& __matcher);
679
 
680
      bool
681
      _M_follow_list(_RMatcherT& __matcher);
682
 
683
      bool
684
      _M_follow_list2(_RMatcherT& __matcher);
685
 
686
      bool
687
      _M_expression_term(_RMatcherT& __matcher);
688
 
689
      bool
690
      _M_range_expression(_RMatcherT& __matcher);
691
 
692
      bool
693
      _M_start_range(_RMatcherT& __matcher);
694
 
695
      bool
696
      _M_collating_symbol(_RMatcherT& __matcher);
697
 
698
      bool
699
      _M_equivalence_class(_RMatcherT& __matcher);
700
 
701
      bool
702
      _M_character_class(_RMatcherT& __matcher);
703
 
704
      int
705
      _M_cur_int_value(int __radix);
706
 
707
    private:
708
      _TraitsT&      _M_traits;
709
      _ScannerT      _M_scanner;
710
      _StringT       _M_cur_value;
711
      _Nfa           _M_state_store;
712
      _StackT        _M_stack;
713
    };
714
 
715
  template<typename _InIter, typename _TraitsT>
716
    _Compiler<_InIter, _TraitsT>::
717
    _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
718
              _Compiler<_InIter, _TraitsT>::_FlagT __flags)
719
    : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
720
      _M_state_store(__flags)
721
    {
722
      typedef _StartTagger<_InIter, _TraitsT> _Start;
723
      typedef _EndTagger<_InIter, _TraitsT> _End;
724
 
725
      _StateSeq __r(_M_state_store,
726
                    _M_state_store._M_insert_subexpr_begin(_Start(0)));
727
      _M_disjunction();
728
      if (!_M_stack.empty())
729
        {
730
          __r._M_append(_M_stack.top());
731
          _M_stack.pop();
732
        }
733
      __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
734
      __r._M_append(_M_state_store._M_insert_accept());
735
    }
736
 
737
  template<typename _InIter, typename _TraitsT>
738
    bool
739
    _Compiler<_InIter, _TraitsT>::
740
    _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
741
    {
742
      if (token == _M_scanner._M_token())
743
        {
744
          _M_cur_value = _M_scanner._M_value();
745
          _M_scanner._M_advance();
746
          return true;
747
        }
748
      return false;
749
    }
750
 
751
  template<typename _InIter, typename _TraitsT>
752
    void
753
    _Compiler<_InIter, _TraitsT>::
754
    _M_disjunction()
755
    {
756
      this->_M_alternative();
757
      if (_M_match_token(_ScannerT::_S_token_or))
758
        {
759
          _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
760
          this->_M_disjunction();
761
          _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
762
          _M_stack.push(_StateSeq(__alt1, __alt2));
763
        }
764
    }
765
 
766
  template<typename _InIter, typename _TraitsT>
767
    bool
768
    _Compiler<_InIter, _TraitsT>::
769
    _M_alternative()
770
    {
771
      if (this->_M_term())
772
        {
773
          _StateSeq __re = _M_stack.top(); _M_stack.pop();
774
          this->_M_alternative();
775
          if (!_M_stack.empty())
776
            {
777
              __re._M_append(_M_stack.top());
778
              _M_stack.pop();
779
            }
780
          _M_stack.push(__re);
781
          return true;
782
        }
783
      return false;
784
    }
785
 
786
  template<typename _InIter, typename _TraitsT>
787
    bool
788
    _Compiler<_InIter, _TraitsT>::
789
    _M_term()
790
    {
791
      if (this->_M_assertion())
792
        return true;
793
      if (this->_M_atom())
794
        {
795
          this->_M_quantifier();
796
          return true;
797
        }
798
      return false;
799
    }
800
 
801
  template<typename _InIter, typename _TraitsT>
802
    bool
803
    _Compiler<_InIter, _TraitsT>::
804
    _M_assertion()
805
    {
806
      if (_M_match_token(_ScannerT::_S_token_line_begin))
807
        {
808
          // __m.push(_Matcher::_S_opcode_line_begin);
809
          return true;
810
        }
811
      if (_M_match_token(_ScannerT::_S_token_line_end))
812
        {
813
          // __m.push(_Matcher::_S_opcode_line_end);
814
          return true;
815
        }
816
      if (_M_match_token(_ScannerT::_S_token_word_begin))
817
        {
818
          // __m.push(_Matcher::_S_opcode_word_begin);
819
          return true;
820
        }
821
      if (_M_match_token(_ScannerT::_S_token_word_end))
822
        {
823
          // __m.push(_Matcher::_S_opcode_word_end);
824
          return true;
825
        }
826
      return false;
827
    }
828
 
829
  template<typename _InIter, typename _TraitsT>
830
    bool
831
    _Compiler<_InIter, _TraitsT>::
832
    _M_quantifier()
833
    {
834
      if (_M_match_token(_ScannerT::_S_token_closure0))
835
        {
836
          if (_M_stack.empty())
837
            __throw_regex_error(regex_constants::error_badrepeat);
838
          _StateSeq __r(_M_stack.top(), -1);
839
          __r._M_append(__r._M_front());
840
          _M_stack.pop();
841
          _M_stack.push(__r);
842
          return true;
843
        }
844
      if (_M_match_token(_ScannerT::_S_token_closure1))
845
        {
846
          if (_M_stack.empty())
847
            __throw_regex_error(regex_constants::error_badrepeat);
848
          _StateSeq __r(_M_state_store,
849
                        _M_state_store.
850
                        _M_insert_alt(_S_invalid_state_id,
851
                                      _M_stack.top()._M_front()));
852
          _M_stack.top()._M_append(__r);
853
          return true;
854
        }
855
      if (_M_match_token(_ScannerT::_S_token_opt))
856
        {
857
          if (_M_stack.empty())
858
          __throw_regex_error(regex_constants::error_badrepeat);
859
          _StateSeq __r(_M_stack.top(), -1);
860
          _M_stack.pop();
861
          _M_stack.push(__r);
862
          return true;
863
        }
864
      if (_M_match_token(_ScannerT::_S_token_interval_begin))
865
        {
866
          if (_M_stack.empty())
867
            __throw_regex_error(regex_constants::error_badrepeat);
868
          if (!_M_match_token(_ScannerT::_S_token_dup_count))
869
            __throw_regex_error(regex_constants::error_badbrace);
870
          _StateSeq __r(_M_stack.top());
871
          int __min_rep = _M_cur_int_value(10);
872
          for (int __i = 1; __i < __min_rep; ++__i)
873
            _M_stack.top()._M_append(__r._M_clone());
874
          if (_M_match_token(_ScannerT::_S_token_comma))
875
            if (_M_match_token(_ScannerT::_S_token_dup_count))
876
              {
877
                int __n = _M_cur_int_value(10) - __min_rep;
878
                if (__n < 0)
879
                  __throw_regex_error(regex_constants::error_badbrace);
880
                for (int __i = 0; __i < __n; ++__i)
881
                  {
882
                    _StateSeq __r(_M_state_store,
883
                                  _M_state_store.
884
                                  _M_insert_alt(_S_invalid_state_id,
885
                                                _M_stack.top()._M_front()));
886
                    _M_stack.top()._M_append(__r);
887
                  }
888
              }
889
            else
890
              {
891
                _StateSeq __r(_M_stack.top(), -1);
892
                __r._M_push_back(__r._M_front());
893
                _M_stack.pop();
894
                _M_stack.push(__r);
895
              }
896
          if (!_M_match_token(_ScannerT::_S_token_interval_end))
897
            __throw_regex_error(regex_constants::error_brace);
898
          return true;
899
        }
900
      return false;
901
    }
902
 
903
  template<typename _InIter, typename _TraitsT>
904
    bool
905
    _Compiler<_InIter, _TraitsT>::
906
    _M_atom()
907
    {
908
      typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
909
      typedef _StartTagger<_InIter, _TraitsT> _Start;
910
      typedef _EndTagger<_InIter, _TraitsT> _End;
911
 
912
      if (_M_match_token(_ScannerT::_S_token_anychar))
913
        {
914
          _M_stack.push(_StateSeq(_M_state_store,
915
                                  _M_state_store._M_insert_matcher
916
                                  (_AnyMatcher)));
917
          return true;
918
        }
919
      if (_M_match_token(_ScannerT::_S_token_ord_char))
920
        {
921
          _M_stack.push(_StateSeq(_M_state_store,
922
                                  _M_state_store._M_insert_matcher
923
                                  (_CMatcher(_M_cur_value[0], _M_traits))));
924
          return true;
925
        }
926
      if (_M_match_token(_ScannerT::_S_token_quoted_char))
927
        {
928
          // note that in the ECMA grammar, this case covers backrefs.
929
          _M_stack.push(_StateSeq(_M_state_store,
930
                                  _M_state_store._M_insert_matcher
931
                                  (_CMatcher(_M_cur_value[0], _M_traits))));
932
          return true;
933
        }
934
      if (_M_match_token(_ScannerT::_S_token_backref))
935
        {
936
          // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
937
          return true;
938
        }
939
      if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
940
        {
941
          int __mark = _M_state_store._M_sub_count();
942
          _StateSeq __r(_M_state_store,
943
                        _M_state_store.
944
                        _M_insert_subexpr_begin(_Start(__mark)));
945
          this->_M_disjunction();
946
          if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
947
            __throw_regex_error(regex_constants::error_paren);
948
          if (!_M_stack.empty())
949
            {
950
              __r._M_append(_M_stack.top());
951
              _M_stack.pop();
952
            }
953
          __r._M_append(_M_state_store._M_insert_subexpr_end
954
                        (__mark, _End(__mark)));
955
          _M_stack.push(__r);
956
          return true;
957
        }
958
      return _M_bracket_expression();
959
    }
960
 
961
  template<typename _InIter, typename _TraitsT>
962
    bool
963
    _Compiler<_InIter, _TraitsT>::
964
    _M_bracket_expression()
965
    {
966
      if (_M_match_token(_ScannerT::_S_token_bracket_begin))
967
        {
968
          _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
969
                               _M_traits);
970
          if (!_M_bracket_list(__matcher)
971
              || !_M_match_token(_ScannerT::_S_token_bracket_end))
972
            __throw_regex_error(regex_constants::error_brack);
973
          _M_stack.push(_StateSeq(_M_state_store,
974
                                  _M_state_store._M_insert_matcher(__matcher)));
975
          return true;
976
        }
977
      return false;
978
    }
979
 
980
  // If the dash is the last character in the bracket expression, it is not
981
  // special.
982
  template<typename _InIter, typename _TraitsT>
983
    bool
984
    _Compiler<_InIter, _TraitsT>::
985
    _M_bracket_list(_RMatcherT& __matcher)
986
    {
987
      if (_M_follow_list(__matcher))
988
        {
989
          if (_M_match_token(_ScannerT::_S_token_dash))
990
            __matcher._M_add_char(_M_cur_value[0]);
991
          return true;
992
        }
993
      return false;
994
    }
995
 
996
  template<typename _InIter, typename _TraitsT>
997
    bool
998
    _Compiler<_InIter, _TraitsT>::
999
    _M_follow_list(_RMatcherT& __matcher)
1000
    { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
1001
 
1002
  template<typename _InIter, typename _TraitsT>
1003
    bool
1004
    _Compiler<_InIter, _TraitsT>::
1005
    _M_follow_list2(_RMatcherT& __matcher)
1006
    {
1007
      if (_M_expression_term(__matcher))
1008
        return _M_follow_list2(__matcher);
1009
      return true;
1010
    }
1011
 
1012
  template<typename _InIter, typename _TraitsT>
1013
    bool
1014
    _Compiler<_InIter, _TraitsT>::
1015
    _M_expression_term(_RMatcherT& __matcher)
1016
    {
1017
      return (_M_collating_symbol(__matcher)
1018
              || _M_character_class(__matcher)
1019
              || _M_equivalence_class(__matcher)
1020
              || (_M_start_range(__matcher)
1021
                  && _M_range_expression(__matcher)));
1022
    }
1023
 
1024
  template<typename _InIter, typename _TraitsT>
1025
    bool
1026
    _Compiler<_InIter, _TraitsT>::
1027
    _M_range_expression(_RMatcherT& __matcher)
1028
    {
1029
      if (!_M_collating_symbol(__matcher))
1030
        if (!_M_match_token(_ScannerT::_S_token_dash))
1031
          __throw_regex_error(regex_constants::error_range);
1032
      __matcher._M_make_range();
1033
      return true;
1034
    }
1035
 
1036
  template<typename _InIter, typename _TraitsT>
1037
    bool
1038
    _Compiler<_InIter, _TraitsT>::
1039
    _M_start_range(_RMatcherT& __matcher)
1040
    { return _M_match_token(_ScannerT::_S_token_dash); }
1041
 
1042
  template<typename _InIter, typename _TraitsT>
1043
    bool
1044
    _Compiler<_InIter, _TraitsT>::
1045
    _M_collating_symbol(_RMatcherT& __matcher)
1046
    {
1047
      if (_M_match_token(_ScannerT::_S_token_collelem_single))
1048
        {
1049
          __matcher._M_add_char(_M_cur_value[0]);
1050
          return true;
1051
        }
1052
      if (_M_match_token(_ScannerT::_S_token_collsymbol))
1053
        {
1054
          __matcher._M_add_collating_element(_M_cur_value);
1055
          return true;
1056
        }
1057
      return false;
1058
    }
1059
 
1060
  template<typename _InIter, typename _TraitsT>
1061
    bool
1062
    _Compiler<_InIter, _TraitsT>::
1063
    _M_equivalence_class(_RMatcherT& __matcher)
1064
    {
1065
      if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
1066
        {
1067
          __matcher._M_add_equivalence_class(_M_cur_value);
1068
          return true;
1069
        }
1070
      return false;
1071
    }
1072
 
1073
  template<typename _InIter, typename _TraitsT>
1074
    bool
1075
    _Compiler<_InIter, _TraitsT>::
1076
    _M_character_class(_RMatcherT& __matcher)
1077
    {
1078
      if (_M_match_token(_ScannerT::_S_token_char_class_name))
1079
        {
1080
          __matcher._M_add_character_class(_M_cur_value);
1081
          return true;
1082
        }
1083
      return false;
1084
    }
1085
 
1086
  template<typename _InIter, typename _TraitsT>
1087
    int
1088
    _Compiler<_InIter, _TraitsT>::
1089
    _M_cur_int_value(int __radix)
1090
    {
1091
      int __v = 0;
1092
      for (typename _StringT::size_type __i = 0;
1093
           __i < _M_cur_value.length(); ++__i)
1094
        __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
1095
      return __v;
1096
    }
1097
 
1098
  template<typename _InIter, typename _TraitsT>
1099
    _AutomatonPtr
1100
    __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
1101
              regex_constants::syntax_option_type __f)
1102
    { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
1103
                                        __f)._M_nfa())); }
1104
 
1105
_GLIBCXX_END_NAMESPACE_VERSION
1106
} // namespace __regex
1107
} // namespace std
1108
 
1109
/* vim: set ts=8 sw=2 sts=2: */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.