OpenCores
URL https://opencores.org/ocsvn/altor32/altor32/trunk

Subversion Repositories altor32

[/] [altor32/] [trunk/] [gcc-x64/] [or1knd-elf/] [or1knd-elf/] [include/] [c++/] [4.8.0/] [bits/] [regex_compiler.h] - Blame information for rev 35

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 35 ultra_embe
// class template regex -*- C++ -*-
2
 
3
// Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc.
4
//
5
// This file is part of the GNU ISO C++ Library.  This library is free
6
// software; you can redistribute it and/or modify it under the
7
// terms of the GNU General Public License as published by the
8
// Free Software Foundation; either version 3, or (at your option)
9
// any later version.
10
 
11
// This library is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
// GNU General Public License for more details.
15
 
16
// Under Section 7 of GPL version 3, you are granted additional
17
// permissions described in the GCC Runtime Library Exception, version
18
// 3.1, as published by the Free Software Foundation.
19
 
20
// You should have received a copy of the GNU General Public License and
21
// a copy of the GCC Runtime Library Exception along with this program;
22
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
// <http://www.gnu.org/licenses/>.
24
 
25
/**
26
 *  @file bits/regex_compiler.h
27
 *  This is an internal header file, included by other library headers.
28
 *  Do not attempt to use it directly. @headername{regex}
29
 */
30
 
31
namespace std _GLIBCXX_VISIBILITY(default)
32
{
33
namespace __detail
34
{
35
_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
 
37
  /**
38
   * @addtogroup regex-detail
39
   * @{
40
   */
41
 
42
  /// Base class for scanner.
43
  struct _Scanner_base
44
  {
45
    typedef unsigned int _StateT;
46
 
47
    static constexpr _StateT _S_state_at_start    = 1 << 0;
48
    static constexpr _StateT _S_state_in_brace    = 1 << 2;
49
    static constexpr _StateT _S_state_in_bracket  = 1 << 3;
50
 
51
    virtual ~_Scanner_base() { };
52
  };
53
 
54
  /**
55
   * @brief struct _Scanner. Scans an input range for regex tokens.
56
   *
57
   * The %_Scanner class interprets the regular expression pattern in
58
   * the input range passed to its constructor as a sequence of parse
59
   * tokens passed to the regular expression compiler.  The sequence
60
   * of tokens provided depends on the flag settings passed to the
61
   * constructor: different regular expression grammars will interpret
62
   * the same input pattern in syntactically different ways.
63
   */
64
  template<typename _InputIterator>
65
    class _Scanner: public _Scanner_base
66
    {
67
    public:
68
      typedef _InputIterator                                        _IteratorT;
69
      typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
70
      typedef std::basic_string<_CharT>                             _StringT;
71
      typedef regex_constants::syntax_option_type                   _FlagT;
72
      typedef const std::ctype<_CharT>                              _CtypeT;
73
 
74
      /// Token types returned from the scanner.
75
      enum _TokenT
76
      {
77
        _S_token_anychar,
78
        _S_token_backref,
79
        _S_token_bracket_begin,
80
        _S_token_bracket_end,
81
        _S_token_inverse_class,
82
        _S_token_char_class_name,
83
        _S_token_closure0,
84
        _S_token_closure1,
85
        _S_token_collelem_multi,
86
        _S_token_collelem_single,
87
        _S_token_collsymbol,
88
        _S_token_comma,
89
        _S_token_dash,
90
        _S_token_dup_count,
91
        _S_token_eof,
92
        _S_token_equiv_class_name,
93
        _S_token_interval_begin,
94
        _S_token_interval_end,
95
        _S_token_line_begin,
96
        _S_token_line_end,
97
        _S_token_opt,
98
        _S_token_or,
99
        _S_token_ord_char,
100
        _S_token_quoted_char,
101
        _S_token_subexpr_begin,
102
        _S_token_subexpr_end,
103
        _S_token_word_begin,
104
        _S_token_word_end,
105
        _S_token_unknown
106
      };
107
 
108
      _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
109
               std::locale __loc)
110
      : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
111
        _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
112
      { _M_advance(); }
113
 
114
      void
115
      _M_advance();
116
 
117
      _TokenT
118
      _M_token() const
119
      { return _M_curToken; }
120
 
121
      const _StringT&
122
      _M_value() const
123
      { return _M_curValue; }
124
 
125
#ifdef _GLIBCXX_DEBUG
126
      std::ostream&
127
      _M_print(std::ostream&);
128
#endif
129
 
130
    private:
131
      void
132
      _M_eat_escape();
133
 
134
      void
135
      _M_scan_in_brace();
136
 
137
      void
138
      _M_scan_in_bracket();
139
 
140
      void
141
      _M_eat_charclass();
142
 
143
      void
144
      _M_eat_equivclass();
145
 
146
      void
147
      _M_eat_collsymbol();
148
 
149
      _IteratorT  _M_current;
150
      _IteratorT  _M_end;
151
      _FlagT      _M_flags;
152
      _CtypeT&    _M_ctype;
153
      _TokenT     _M_curToken;
154
      _StringT    _M_curValue;
155
      _StateT     _M_state;
156
    };
157
 
158
  template<typename _InputIterator>
159
    void
160
    _Scanner<_InputIterator>::
161
    _M_advance()
162
    {
163
      if (_M_current == _M_end)
164
        {
165
          _M_curToken = _S_token_eof;
166
          return;
167
        }
168
 
169
      _CharT __c = *_M_current;
170
      if (_M_state & _S_state_in_bracket)
171
        {
172
          _M_scan_in_bracket();
173
          return;
174
        }
175
      if (_M_state & _S_state_in_brace)
176
        {
177
          _M_scan_in_brace();
178
          return;
179
        }
180
#if 0
181
      // TODO: re-enable line anchors when _M_assertion is implemented.
182
      // See PR libstdc++/47724
183
      else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
184
        {
185
          _M_curToken = _S_token_line_begin;
186
          ++_M_current;
187
          return;
188
        }
189
      else if (__c == _M_ctype.widen('$'))
190
        {
191
          _M_curToken = _S_token_line_end;
192
          ++_M_current;
193
          return;
194
        }
195
#endif
196
      else if (__c == _M_ctype.widen('.'))
197
        {
198
          _M_curToken = _S_token_anychar;
199
          ++_M_current;
200
          return;
201
        }
202
      else if (__c == _M_ctype.widen('*'))
203
        {
204
          _M_curToken = _S_token_closure0;
205
          ++_M_current;
206
          return;
207
        }
208
      else if (__c == _M_ctype.widen('+'))
209
        {
210
          _M_curToken = _S_token_closure1;
211
          ++_M_current;
212
          return;
213
        }
214
      else if (__c == _M_ctype.widen('|'))
215
        {
216
          _M_curToken = _S_token_or;
217
          ++_M_current;
218
          return;
219
        }
220
      else if (__c == _M_ctype.widen('['))
221
        {
222
          _M_curToken = _S_token_bracket_begin;
223
          _M_state |= (_S_state_in_bracket | _S_state_at_start);
224
          ++_M_current;
225
          return;
226
        }
227
      else if (__c == _M_ctype.widen('\\'))
228
        {
229
          _M_eat_escape();
230
          return;
231
        }
232
      else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
233
        {
234
          if (__c == _M_ctype.widen('('))
235
            {
236
              _M_curToken = _S_token_subexpr_begin;
237
              ++_M_current;
238
              return;
239
            }
240
          else if (__c == _M_ctype.widen(')'))
241
            {
242
              _M_curToken = _S_token_subexpr_end;
243
              ++_M_current;
244
              return;
245
            }
246
          else if (__c == _M_ctype.widen('{'))
247
            {
248
              _M_curToken = _S_token_interval_begin;
249
              _M_state |= _S_state_in_brace;
250
              ++_M_current;
251
              return;
252
            }
253
        }
254
 
255
      _M_curToken = _S_token_ord_char;
256
      _M_curValue.assign(1, __c);
257
      ++_M_current;
258
    }
259
 
260
 
261
  template<typename _InputIterator>
262
    void
263
    _Scanner<_InputIterator>::
264
    _M_scan_in_brace()
265
    {
266
      if (_M_ctype.is(_CtypeT::digit, *_M_current))
267
        {
268
          _M_curToken = _S_token_dup_count;
269
          _M_curValue.assign(1, *_M_current);
270
          ++_M_current;
271
          while (_M_current != _M_end
272
                 && _M_ctype.is(_CtypeT::digit, *_M_current))
273
            {
274
              _M_curValue += *_M_current;
275
              ++_M_current;
276
            }
277
          return;
278
        }
279
      else if (*_M_current == _M_ctype.widen(','))
280
        {
281
          _M_curToken = _S_token_comma;
282
          ++_M_current;
283
          return;
284
        }
285
      if (_M_flags & (regex_constants::basic | regex_constants::grep))
286
        {
287
          if (*_M_current == _M_ctype.widen('\\'))
288
            _M_eat_escape();
289
        }
290
      else
291
        {
292
          if (*_M_current == _M_ctype.widen('}'))
293
            {
294
              _M_curToken = _S_token_interval_end;
295
              _M_state &= ~_S_state_in_brace;
296
              ++_M_current;
297
              return;
298
            }
299
        }
300
    }
301
 
302
  template<typename _InputIterator>
303
    void
304
    _Scanner<_InputIterator>::
305
    _M_scan_in_bracket()
306
    {
307
      if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
308
        {
309
          _M_curToken = _S_token_inverse_class;
310
          _M_state &= ~_S_state_at_start;
311
          ++_M_current;
312
          return;
313
        }
314
      else if (*_M_current == _M_ctype.widen('['))
315
        {
316
          ++_M_current;
317
          if (_M_current == _M_end)
318
            {
319
              _M_curToken = _S_token_eof;
320
              return;
321
            }
322
 
323
          if (*_M_current == _M_ctype.widen('.'))
324
            {
325
              _M_curToken = _S_token_collsymbol;
326
              _M_eat_collsymbol();
327
              return;
328
            }
329
          else if (*_M_current == _M_ctype.widen(':'))
330
            {
331
              _M_curToken = _S_token_char_class_name;
332
              _M_eat_charclass();
333
              return;
334
            }
335
          else if (*_M_current == _M_ctype.widen('='))
336
            {
337
              _M_curToken = _S_token_equiv_class_name;
338
              _M_eat_equivclass();
339
              return;
340
            }
341
        }
342
      else if (*_M_current == _M_ctype.widen('-'))
343
        {
344
          _M_curToken = _S_token_dash;
345
          ++_M_current;
346
          return;
347
        }
348
      else if (*_M_current == _M_ctype.widen(']'))
349
        {
350
          if (!(_M_flags & regex_constants::ECMAScript)
351
              || !(_M_state & _S_state_at_start))
352
            {
353
              // special case: only if  _not_ chr first after
354
              // '[' or '[^' and if not ECMAscript
355
              _M_curToken = _S_token_bracket_end;
356
              ++_M_current;
357
              return;
358
            }
359
        }
360
      _M_curToken = _S_token_collelem_single;
361
      _M_curValue.assign(1, *_M_current);
362
      ++_M_current;
363
    }
364
 
365
  template<typename _InputIterator>
366
    void
367
    _Scanner<_InputIterator>::
368
    _M_eat_escape()
369
    {
370
      ++_M_current;
371
      if (_M_current == _M_end)
372
        {
373
          _M_curToken = _S_token_eof;
374
          return;
375
        }
376
      _CharT __c = *_M_current;
377
      ++_M_current;
378
 
379
      if (__c == _M_ctype.widen('('))
380
        {
381
          if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
382
            {
383
              _M_curToken = _S_token_ord_char;
384
              _M_curValue.assign(1, __c);
385
            }
386
          else
387
            _M_curToken = _S_token_subexpr_begin;
388
        }
389
      else if (__c == _M_ctype.widen(')'))
390
        {
391
          if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
392
            {
393
              _M_curToken = _S_token_ord_char;
394
              _M_curValue.assign(1, __c);
395
            }
396
          else
397
            _M_curToken = _S_token_subexpr_end;
398
        }
399
      else if (__c == _M_ctype.widen('{'))
400
        {
401
          if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
402
            {
403
              _M_curToken = _S_token_ord_char;
404
              _M_curValue.assign(1, __c);
405
            }
406
          else
407
            {
408
              _M_curToken = _S_token_interval_begin;
409
              _M_state |= _S_state_in_brace;
410
            }
411
        }
412
      else if (__c == _M_ctype.widen('}'))
413
        {
414
          if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
415
            {
416
              _M_curToken = _S_token_ord_char;
417
              _M_curValue.assign(1, __c);
418
            }
419
          else
420
            {
421
              if (!(_M_state && _S_state_in_brace))
422
                __throw_regex_error(regex_constants::error_badbrace);
423
              _M_state &= ~_S_state_in_brace;
424
              _M_curToken = _S_token_interval_end;
425
            }
426
        }
427
      else if (__c == _M_ctype.widen('x'))
428
        {
429
          ++_M_current;
430
          if (_M_current == _M_end)
431
            {
432
              _M_curToken = _S_token_eof;
433
              return;
434
            }
435
          if (_M_ctype.is(_CtypeT::digit, *_M_current))
436
            {
437
              _M_curValue.assign(1, *_M_current);
438
              ++_M_current;
439
              if (_M_current == _M_end)
440
                {
441
                  _M_curToken = _S_token_eof;
442
                  return;
443
                }
444
              if (_M_ctype.is(_CtypeT::digit, *_M_current))
445
                {
446
                  _M_curValue += *_M_current;
447
                  ++_M_current;
448
                  return;
449
                }
450
            }
451
        }
452
      else if (__c == _M_ctype.widen('^')
453
               || __c == _M_ctype.widen('.')
454
               || __c == _M_ctype.widen('*')
455
               || __c == _M_ctype.widen('$')
456
               || __c == _M_ctype.widen('\\'))
457
        {
458
          _M_curToken = _S_token_ord_char;
459
          _M_curValue.assign(1, __c);
460
        }
461
      else if (_M_ctype.is(_CtypeT::digit, __c))
462
        {
463
          _M_curToken = _S_token_backref;
464
          _M_curValue.assign(1, __c);
465
        }
466
      else
467
        __throw_regex_error(regex_constants::error_escape);
468
    }
469
 
470
 
471
  // Eats a character class or throwns an exception.
472
  // current point to ':' delimiter on entry, char after ']' on return
473
  template<typename _InputIterator>
474
    void
475
    _Scanner<_InputIterator>::
476
    _M_eat_charclass()
477
    {
478
      ++_M_current; // skip ':'
479
      if (_M_current == _M_end)
480
        __throw_regex_error(regex_constants::error_ctype);
481
      for (_M_curValue.clear();
482
           _M_current != _M_end && *_M_current != _M_ctype.widen(':');
483
           ++_M_current)
484
        _M_curValue += *_M_current;
485
      if (_M_current == _M_end)
486
        __throw_regex_error(regex_constants::error_ctype);
487
      ++_M_current; // skip ':'
488
      if (*_M_current != _M_ctype.widen(']'))
489
        __throw_regex_error(regex_constants::error_ctype);
490
      ++_M_current; // skip ']'
491
    }
492
 
493
 
494
  template<typename _InputIterator>
495
    void
496
    _Scanner<_InputIterator>::
497
    _M_eat_equivclass()
498
    {
499
      ++_M_current; // skip '='
500
      if (_M_current == _M_end)
501
        __throw_regex_error(regex_constants::error_collate);
502
      for (_M_curValue.clear();
503
           _M_current != _M_end && *_M_current != _M_ctype.widen('=');
504
           ++_M_current)
505
        _M_curValue += *_M_current;
506
      if (_M_current == _M_end)
507
        __throw_regex_error(regex_constants::error_collate);
508
      ++_M_current; // skip '='
509
      if (*_M_current != _M_ctype.widen(']'))
510
        __throw_regex_error(regex_constants::error_collate);
511
      ++_M_current; // skip ']'
512
    }
513
 
514
 
515
  template<typename _InputIterator>
516
    void
517
    _Scanner<_InputIterator>::
518
    _M_eat_collsymbol()
519
    {
520
      ++_M_current; // skip '.'
521
      if (_M_current == _M_end)
522
        __throw_regex_error(regex_constants::error_collate);
523
      for (_M_curValue.clear();
524
           _M_current != _M_end && *_M_current != _M_ctype.widen('.');
525
           ++_M_current)
526
        _M_curValue += *_M_current;
527
      if (_M_current == _M_end)
528
        __throw_regex_error(regex_constants::error_collate);
529
      ++_M_current; // skip '.'
530
      if (*_M_current != _M_ctype.widen(']'))
531
        __throw_regex_error(regex_constants::error_collate);
532
      ++_M_current; // skip ']'
533
    }
534
 
535
#ifdef _GLIBCXX_DEBUG
536
  template<typename _InputIterator>
537
    std::ostream&
538
    _Scanner<_InputIterator>::
539
    _M_print(std::ostream& ostr)
540
    {
541
      switch (_M_curToken)
542
      {
543
        case _S_token_anychar:
544
          ostr << "any-character\n";
545
          break;
546
        case _S_token_backref:
547
          ostr << "backref\n";
548
          break;
549
        case _S_token_bracket_begin:
550
          ostr << "bracket-begin\n";
551
          break;
552
        case _S_token_bracket_end:
553
          ostr << "bracket-end\n";
554
          break;
555
        case _S_token_char_class_name:
556
          ostr << "char-class-name \"" << _M_curValue << "\"\n";
557
          break;
558
        case _S_token_closure0:
559
          ostr << "closure0\n";
560
          break;
561
        case _S_token_closure1:
562
          ostr << "closure1\n";
563
          break;
564
        case _S_token_collelem_multi:
565
          ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
566
          break;
567
        case _S_token_collelem_single:
568
          ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
569
          break;
570
        case _S_token_collsymbol:
571
          ostr << "collsymbol \"" << _M_curValue << "\"\n";
572
          break;
573
        case _S_token_comma:
574
          ostr << "comma\n";
575
          break;
576
        case _S_token_dash:
577
          ostr << "dash\n";
578
          break;
579
        case _S_token_dup_count:
580
          ostr << "dup count: " << _M_curValue << "\n";
581
          break;
582
        case _S_token_eof:
583
          ostr << "EOF\n";
584
          break;
585
        case _S_token_equiv_class_name:
586
          ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
587
          break;
588
        case _S_token_interval_begin:
589
          ostr << "interval begin\n";
590
          break;
591
        case _S_token_interval_end:
592
          ostr << "interval end\n";
593
          break;
594
        case _S_token_line_begin:
595
          ostr << "line begin\n";
596
          break;
597
        case _S_token_line_end:
598
          ostr << "line end\n";
599
          break;
600
        case _S_token_opt:
601
          ostr << "opt\n";
602
          break;
603
        case _S_token_or:
604
          ostr << "or\n";
605
          break;
606
        case _S_token_ord_char:
607
          ostr << "ordinary character: \"" << _M_value() << "\"\n";
608
          break;
609
        case _S_token_quoted_char:
610
          ostr << "quoted char\n";
611
          break;
612
        case _S_token_subexpr_begin:
613
          ostr << "subexpr begin\n";
614
          break;
615
        case _S_token_subexpr_end:
616
          ostr << "subexpr end\n";
617
          break;
618
        case _S_token_word_begin:
619
          ostr << "word begin\n";
620
          break;
621
        case _S_token_word_end:
622
          ostr << "word end\n";
623
          break;
624
        case _S_token_unknown:
625
          ostr << "-- unknown token --\n";
626
          break;
627
      }
628
      return ostr;
629
    }
630
#endif
631
 
632
  /// Builds an NFA from an input iterator interval.
633
  template<typename _InIter, typename _TraitsT>
634
    class _Compiler
635
    {
636
    public:
637
      typedef _InIter                                            _IterT;
638
      typedef typename std::iterator_traits<_InIter>::value_type _CharT;
639
      typedef std::basic_string<_CharT>                          _StringT;
640
      typedef regex_constants::syntax_option_type                _FlagT;
641
 
642
      _Compiler(const _InIter& __b, const _InIter& __e,
643
                _TraitsT& __traits, _FlagT __flags);
644
 
645
      const _Nfa&
646
      _M_nfa() const
647
      { return _M_state_store; }
648
 
649
    private:
650
      typedef _Scanner<_InIter>                              _ScannerT;
651
      typedef typename _ScannerT::_TokenT                    _TokenT;
652
      typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
653
      typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
654
 
655
      // accepts a specific token or returns false.
656
      bool
657
      _M_match_token(_TokenT __token);
658
 
659
      void
660
      _M_disjunction();
661
 
662
      bool
663
      _M_alternative();
664
 
665
      bool
666
      _M_term();
667
 
668
      bool
669
      _M_assertion();
670
 
671
      bool
672
      _M_quantifier();
673
 
674
      bool
675
      _M_atom();
676
 
677
      bool
678
      _M_bracket_expression();
679
 
680
      bool
681
      _M_bracket_list(_RMatcherT& __matcher);
682
 
683
      bool
684
      _M_follow_list(_RMatcherT& __matcher);
685
 
686
      bool
687
      _M_follow_list2(_RMatcherT& __matcher);
688
 
689
      bool
690
      _M_expression_term(_RMatcherT& __matcher);
691
 
692
      bool
693
      _M_range_expression(_RMatcherT& __matcher);
694
 
695
      bool
696
      _M_start_range(_RMatcherT& __matcher);
697
 
698
      bool
699
      _M_collating_symbol(_RMatcherT& __matcher);
700
 
701
      bool
702
      _M_equivalence_class(_RMatcherT& __matcher);
703
 
704
      bool
705
      _M_character_class(_RMatcherT& __matcher);
706
 
707
      int
708
      _M_cur_int_value(int __radix);
709
 
710
      _TraitsT&      _M_traits;
711
      _ScannerT      _M_scanner;
712
      _StringT       _M_cur_value;
713
      _Nfa           _M_state_store;
714
      _StackT        _M_stack;
715
    };
716
 
717
  template<typename _InIter, typename _TraitsT>
718
    _Compiler<_InIter, _TraitsT>::
719
    _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
720
              _Compiler<_InIter, _TraitsT>::_FlagT __flags)
721
    : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
722
      _M_state_store(__flags)
723
    {
724
      typedef _StartTagger<_InIter, _TraitsT> _Start;
725
      typedef _EndTagger<_InIter, _TraitsT> _End;
726
 
727
      _StateSeq __r(_M_state_store,
728
                    _M_state_store._M_insert_subexpr_begin(_Start(0)));
729
      _M_disjunction();
730
      if (!_M_stack.empty())
731
        {
732
          __r._M_append(_M_stack.top());
733
          _M_stack.pop();
734
        }
735
      __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
736
      __r._M_append(_M_state_store._M_insert_accept());
737
    }
738
 
739
  template<typename _InIter, typename _TraitsT>
740
    bool
741
    _Compiler<_InIter, _TraitsT>::
742
    _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
743
    {
744
      if (token == _M_scanner._M_token())
745
        {
746
          _M_cur_value = _M_scanner._M_value();
747
          _M_scanner._M_advance();
748
          return true;
749
        }
750
      return false;
751
    }
752
 
753
  template<typename _InIter, typename _TraitsT>
754
    void
755
    _Compiler<_InIter, _TraitsT>::
756
    _M_disjunction()
757
    {
758
      this->_M_alternative();
759
      if (_M_match_token(_ScannerT::_S_token_or))
760
        {
761
          _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
762
          this->_M_disjunction();
763
          _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
764
          _M_stack.push(_StateSeq(__alt1, __alt2));
765
        }
766
    }
767
 
768
  template<typename _InIter, typename _TraitsT>
769
    bool
770
    _Compiler<_InIter, _TraitsT>::
771
    _M_alternative()
772
    {
773
      if (this->_M_term())
774
        {
775
          _StateSeq __re = _M_stack.top(); _M_stack.pop();
776
          this->_M_alternative();
777
          if (!_M_stack.empty())
778
            {
779
              __re._M_append(_M_stack.top());
780
              _M_stack.pop();
781
            }
782
          _M_stack.push(__re);
783
          return true;
784
        }
785
      return false;
786
    }
787
 
788
  template<typename _InIter, typename _TraitsT>
789
    bool
790
    _Compiler<_InIter, _TraitsT>::
791
    _M_term()
792
    {
793
      if (this->_M_assertion())
794
        return true;
795
      if (this->_M_atom())
796
        {
797
          this->_M_quantifier();
798
          return true;
799
        }
800
      return false;
801
    }
802
 
803
  template<typename _InIter, typename _TraitsT>
804
    bool
805
    _Compiler<_InIter, _TraitsT>::
806
    _M_assertion()
807
    {
808
      if (_M_match_token(_ScannerT::_S_token_line_begin))
809
        {
810
          // __m.push(_Matcher::_S_opcode_line_begin);
811
          return true;
812
        }
813
      if (_M_match_token(_ScannerT::_S_token_line_end))
814
        {
815
          // __m.push(_Matcher::_S_opcode_line_end);
816
          return true;
817
        }
818
      if (_M_match_token(_ScannerT::_S_token_word_begin))
819
        {
820
          // __m.push(_Matcher::_S_opcode_word_begin);
821
          return true;
822
        }
823
      if (_M_match_token(_ScannerT::_S_token_word_end))
824
        {
825
          // __m.push(_Matcher::_S_opcode_word_end);
826
          return true;
827
        }
828
      return false;
829
    }
830
 
831
  template<typename _InIter, typename _TraitsT>
832
    bool
833
    _Compiler<_InIter, _TraitsT>::
834
    _M_quantifier()
835
    {
836
      if (_M_match_token(_ScannerT::_S_token_closure0))
837
        {
838
          if (_M_stack.empty())
839
            __throw_regex_error(regex_constants::error_badrepeat);
840
          _StateSeq __r(_M_stack.top(), -1);
841
          __r._M_append(__r._M_front());
842
          _M_stack.pop();
843
          _M_stack.push(__r);
844
          return true;
845
        }
846
      if (_M_match_token(_ScannerT::_S_token_closure1))
847
        {
848
          if (_M_stack.empty())
849
            __throw_regex_error(regex_constants::error_badrepeat);
850
          _StateSeq __r(_M_state_store,
851
                        _M_state_store.
852
                        _M_insert_alt(_S_invalid_state_id,
853
                                      _M_stack.top()._M_front()));
854
          _M_stack.top()._M_append(__r);
855
          return true;
856
        }
857
      if (_M_match_token(_ScannerT::_S_token_opt))
858
        {
859
          if (_M_stack.empty())
860
          __throw_regex_error(regex_constants::error_badrepeat);
861
          _StateSeq __r(_M_stack.top(), -1);
862
          _M_stack.pop();
863
          _M_stack.push(__r);
864
          return true;
865
        }
866
      if (_M_match_token(_ScannerT::_S_token_interval_begin))
867
        {
868
          if (_M_stack.empty())
869
            __throw_regex_error(regex_constants::error_badrepeat);
870
          if (!_M_match_token(_ScannerT::_S_token_dup_count))
871
            __throw_regex_error(regex_constants::error_badbrace);
872
          _StateSeq __r(_M_stack.top());
873
          int __min_rep = _M_cur_int_value(10);
874
          for (int __i = 1; __i < __min_rep; ++__i)
875
            _M_stack.top()._M_append(__r._M_clone());
876
          if (_M_match_token(_ScannerT::_S_token_comma))
877
            if (_M_match_token(_ScannerT::_S_token_dup_count))
878
              {
879
                int __n = _M_cur_int_value(10) - __min_rep;
880
                if (__n < 0)
881
                  __throw_regex_error(regex_constants::error_badbrace);
882
                for (int __i = 0; __i < __n; ++__i)
883
                  {
884
                    _StateSeq __r(_M_state_store,
885
                                  _M_state_store.
886
                                  _M_insert_alt(_S_invalid_state_id,
887
                                                _M_stack.top()._M_front()));
888
                    _M_stack.top()._M_append(__r);
889
                  }
890
              }
891
            else
892
              {
893
                _StateSeq __r(_M_stack.top(), -1);
894
                __r._M_push_back(__r._M_front());
895
                _M_stack.pop();
896
                _M_stack.push(__r);
897
              }
898
          if (!_M_match_token(_ScannerT::_S_token_interval_end))
899
            __throw_regex_error(regex_constants::error_brace);
900
          return true;
901
        }
902
      return false;
903
    }
904
 
905
  template<typename _InIter, typename _TraitsT>
906
    bool
907
    _Compiler<_InIter, _TraitsT>::
908
    _M_atom()
909
    {
910
      typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
911
      typedef _StartTagger<_InIter, _TraitsT> _Start;
912
      typedef _EndTagger<_InIter, _TraitsT> _End;
913
 
914
      if (_M_match_token(_ScannerT::_S_token_anychar))
915
        {
916
          _M_stack.push(_StateSeq(_M_state_store,
917
                                  _M_state_store._M_insert_matcher
918
                                  (_AnyMatcher)));
919
          return true;
920
        }
921
      if (_M_match_token(_ScannerT::_S_token_ord_char))
922
        {
923
          _M_stack.push(_StateSeq(_M_state_store,
924
                                  _M_state_store._M_insert_matcher
925
                                  (_CMatcher(_M_cur_value[0], _M_traits))));
926
          return true;
927
        }
928
      if (_M_match_token(_ScannerT::_S_token_quoted_char))
929
        {
930
          // note that in the ECMA grammar, this case covers backrefs.
931
          _M_stack.push(_StateSeq(_M_state_store,
932
                                  _M_state_store._M_insert_matcher
933
                                  (_CMatcher(_M_cur_value[0], _M_traits))));
934
          return true;
935
        }
936
      if (_M_match_token(_ScannerT::_S_token_backref))
937
        {
938
          // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
939
          return true;
940
        }
941
      if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
942
        {
943
          int __mark = _M_state_store._M_sub_count();
944
          _StateSeq __r(_M_state_store,
945
                        _M_state_store.
946
                        _M_insert_subexpr_begin(_Start(__mark)));
947
          this->_M_disjunction();
948
          if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
949
            __throw_regex_error(regex_constants::error_paren);
950
          if (!_M_stack.empty())
951
            {
952
              __r._M_append(_M_stack.top());
953
              _M_stack.pop();
954
            }
955
          __r._M_append(_M_state_store._M_insert_subexpr_end
956
                        (__mark, _End(__mark)));
957
          _M_stack.push(__r);
958
          return true;
959
        }
960
      return _M_bracket_expression();
961
    }
962
 
963
  template<typename _InIter, typename _TraitsT>
964
    bool
965
    _Compiler<_InIter, _TraitsT>::
966
    _M_bracket_expression()
967
    {
968
      if (_M_match_token(_ScannerT::_S_token_bracket_begin))
969
        {
970
          _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
971
                               _M_traits);
972
          if (!_M_bracket_list(__matcher)
973
              || !_M_match_token(_ScannerT::_S_token_bracket_end))
974
            __throw_regex_error(regex_constants::error_brack);
975
          _M_stack.push(_StateSeq(_M_state_store,
976
                                  _M_state_store._M_insert_matcher(__matcher)));
977
          return true;
978
        }
979
      return false;
980
    }
981
 
982
  // If the dash is the last character in the bracket expression, it is not
983
  // special.
984
  template<typename _InIter, typename _TraitsT>
985
    bool
986
    _Compiler<_InIter, _TraitsT>::
987
    _M_bracket_list(_RMatcherT& __matcher)
988
    {
989
      if (_M_follow_list(__matcher))
990
        {
991
          if (_M_match_token(_ScannerT::_S_token_dash))
992
            __matcher._M_add_char(_M_cur_value[0]);
993
          return true;
994
        }
995
      return false;
996
    }
997
 
998
  template<typename _InIter, typename _TraitsT>
999
    bool
1000
    _Compiler<_InIter, _TraitsT>::
1001
    _M_follow_list(_RMatcherT& __matcher)
1002
    { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
1003
 
1004
  template<typename _InIter, typename _TraitsT>
1005
    bool
1006
    _Compiler<_InIter, _TraitsT>::
1007
    _M_follow_list2(_RMatcherT& __matcher)
1008
    {
1009
      if (_M_expression_term(__matcher))
1010
        return _M_follow_list2(__matcher);
1011
      return true;
1012
    }
1013
 
1014
  template<typename _InIter, typename _TraitsT>
1015
    bool
1016
    _Compiler<_InIter, _TraitsT>::
1017
    _M_expression_term(_RMatcherT& __matcher)
1018
    {
1019
      return (_M_collating_symbol(__matcher)
1020
              || _M_character_class(__matcher)
1021
              || _M_equivalence_class(__matcher)
1022
              || (_M_start_range(__matcher)
1023
                  && _M_range_expression(__matcher)));
1024
    }
1025
 
1026
  template<typename _InIter, typename _TraitsT>
1027
    bool
1028
    _Compiler<_InIter, _TraitsT>::
1029
    _M_range_expression(_RMatcherT& __matcher)
1030
    {
1031
      if (!_M_collating_symbol(__matcher))
1032
        if (!_M_match_token(_ScannerT::_S_token_dash))
1033
          __throw_regex_error(regex_constants::error_range);
1034
      __matcher._M_make_range();
1035
      return true;
1036
    }
1037
 
1038
  template<typename _InIter, typename _TraitsT>
1039
    bool
1040
    _Compiler<_InIter, _TraitsT>::
1041
    _M_start_range(_RMatcherT& __matcher)
1042
    { return _M_match_token(_ScannerT::_S_token_dash); }
1043
 
1044
  template<typename _InIter, typename _TraitsT>
1045
    bool
1046
    _Compiler<_InIter, _TraitsT>::
1047
    _M_collating_symbol(_RMatcherT& __matcher)
1048
    {
1049
      if (_M_match_token(_ScannerT::_S_token_collelem_single))
1050
        {
1051
          __matcher._M_add_char(_M_cur_value[0]);
1052
          return true;
1053
        }
1054
      if (_M_match_token(_ScannerT::_S_token_collsymbol))
1055
        {
1056
          __matcher._M_add_collating_element(_M_cur_value);
1057
          return true;
1058
        }
1059
      return false;
1060
    }
1061
 
1062
  template<typename _InIter, typename _TraitsT>
1063
    bool
1064
    _Compiler<_InIter, _TraitsT>::
1065
    _M_equivalence_class(_RMatcherT& __matcher)
1066
    {
1067
      if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
1068
        {
1069
          __matcher._M_add_equivalence_class(_M_cur_value);
1070
          return true;
1071
        }
1072
      return false;
1073
    }
1074
 
1075
  template<typename _InIter, typename _TraitsT>
1076
    bool
1077
    _Compiler<_InIter, _TraitsT>::
1078
    _M_character_class(_RMatcherT& __matcher)
1079
    {
1080
      if (_M_match_token(_ScannerT::_S_token_char_class_name))
1081
        {
1082
          __matcher._M_add_character_class(_M_cur_value);
1083
          return true;
1084
        }
1085
      return false;
1086
    }
1087
 
1088
  template<typename _InIter, typename _TraitsT>
1089
    int
1090
    _Compiler<_InIter, _TraitsT>::
1091
    _M_cur_int_value(int __radix)
1092
    {
1093
      int __v = 0;
1094
      for (typename _StringT::size_type __i = 0;
1095
           __i < _M_cur_value.length(); ++__i)
1096
        __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
1097
      return __v;
1098
    }
1099
 
1100
  template<typename _InIter, typename _TraitsT>
1101
    _AutomatonPtr
1102
    __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
1103
              regex_constants::syntax_option_type __f)
1104
    { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
1105
                                        __f)._M_nfa())); }
1106
 
1107
 //@} regex-detail
1108
_GLIBCXX_END_NAMESPACE_VERSION
1109
} // namespace __detail
1110
} // namespace std

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.