OpenCores
URL https://opencores.org/ocsvn/hicovec/hicovec/trunk

Subversion Repositories hicovec

[/] [hicovec/] [trunk/] [assembler/] [pyparsing.py] - Blame information for rev 12

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 hmanske
# module pyparsing.py
2
#
3
# Copyright (c) 2003-2006  Paul T. McGuire
4
#
5
# Permission is hereby granted, free of charge, to any person obtaining
6
# a copy of this software and associated documentation files (the
7
# "Software"), to deal in the Software without restriction, including
8
# without limitation the rights to use, copy, modify, merge, publish,
9
# distribute, sublicense, and/or sell copies of the Software, and to
10
# permit persons to whom the Software is furnished to do so, subject to
11
# the following conditions:
12
#
13
# The above copyright notice and this permission notice shall be
14
# included in all copies or substantial portions of the Software.
15
#
16
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
#
24
#from __future__ import generators
25
 
26
__doc__ = \
27
"""
28
pyparsing module - Classes and methods to define and execute parsing grammars
29
 
30
The pyparsing module is an alternative approach to creating and executing simple grammars,
31
vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
32
don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33
provides a library of classes that you use to construct the grammar directly in Python.
34
 
35
Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
36
 
37
    from pyparsing import Word, alphas
38
 
39
    # define grammar of a greeting
40
    greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
 
42
    hello = "Hello, World!"
43
    print hello, "->", greet.parseString( hello )
44
 
45
The program outputs the following::
46
 
47
    Hello, World! -> ['Hello', ',', 'World', '!']
48
 
49
The Python representation of the grammar is quite readable, owing to the self-explanatory
50
class names, and the use of '+', '|' and '^' operators.
51
 
52
The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53
object with named attributes.
54
 
55
The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56
 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
57
 - quoted strings
58
 - embedded comments
59
"""
60
__version__ = "1.4.4-Mod-HaraldManske"
61
__versionTime__ = "19 October 2006 23:11"
62
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
 
64
 
65
#Modified by Harald Manske:
66
# - removed Deprication Warning of Upcase class
67
# - created Downcase class
68
 
69
import string
70
import copy,sys
71
import warnings
72
import re
73
import sre_constants
74
import xml.sax.saxutils
75
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
76
 
77
def _ustr(obj):
78
    """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
79
       str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
80
       then < returns the unicode object | encodes it with the default encoding | ... >.
81
    """
82
    try:
83
        # If this works, then _ustr(obj) has the same behaviour as str(obj), so
84
        # it won't break any existing code.
85
        return str(obj)
86
 
87
    except UnicodeEncodeError, e:
88
        # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
89
        # state that "The return value must be a string object". However, does a
90
        # unicode object (being a subclass of basestring) count as a "string
91
        # object"?
92
        # If so, then return a unicode object:
93
        return unicode(obj)
94
        # Else encode it... but how? There are many choices... :)
95
        # Replace unprintables with escape codes?
96
        #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
97
        # Replace unprintables with question marks?
98
        #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
99
        # ...
100
 
101
def _str2dict(strg):
102
    return dict( [(c,0) for c in strg] )
103
    #~ return set( [c for c in strg] )
104
 
105
class _Constants(object):
106
    pass
107
 
108
alphas     = string.lowercase + string.uppercase
109
nums       = string.digits
110
hexnums    = nums + "ABCDEFabcdef"
111
alphanums  = alphas + nums
112
 
113
class ParseBaseException(Exception):
114
    """base exception class for all parsing runtime exceptions"""
115
    __slots__ = ( "loc","msg","pstr","parserElement" )
116
    # Performance tuning: we construct a *lot* of these, so keep this
117
    # constructor as small and fast as possible        
118
    def __init__( self, pstr, loc, msg, elem=None ):
119
        self.loc = loc
120
        self.msg = msg
121
        self.pstr = pstr
122
        self.parserElement = elem
123
 
124
    def __getattr__( self, aname ):
125
        """supported attributes by name are:
126
            - lineno - returns the line number of the exception text
127
            - col - returns the column number of the exception text
128
            - line - returns the line containing the exception text
129
        """
130
        if( aname == "lineno" ):
131
            return lineno( self.loc, self.pstr )
132
        elif( aname in ("col", "column") ):
133
            return col( self.loc, self.pstr )
134
        elif( aname == "line" ):
135
            return line( self.loc, self.pstr )
136
        else:
137
            raise AttributeError, aname
138
 
139
    def __str__( self ):
140
        return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
141
    def __repr__( self ):
142
        return _ustr(self)
143
    def markInputline( self, markerString = ">!<" ):
144
        """Extracts the exception line from the input string, and marks
145
           the location of the exception with a special symbol.
146
        """
147
        line_str = self.line
148
        line_column = self.column - 1
149
        if markerString:
150
            line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]])
151
        return line_str.strip()
152
 
153
class ParseException(ParseBaseException):
154
    """exception thrown when parse expressions don't match class"""
155
    """supported attributes by name are:
156
        - lineno - returns the line number of the exception text
157
        - col - returns the column number of the exception text
158
        - line - returns the line containing the exception text
159
    """
160
    pass
161
 
162
class ParseFatalException(ParseBaseException):
163
    """user-throwable exception thrown when inconsistent parse content
164
       is found; stops all parsing immediately"""
165
    pass
166
 
167
class ReparseException(ParseBaseException):
168
    def __init_( self, newstring, restartLoc ):
169
        self.newParseText = newstring
170
        self.reparseLoc = restartLoc
171
 
172
 
173
class RecursiveGrammarException(Exception):
174
    """exception thrown by validate() if the grammar could be improperly recursive"""
175
    def __init__( self, parseElementList ):
176
        self.parseElementTrace = parseElementList
177
 
178
    def __str__( self ):
179
        return "RecursiveGrammarException: %s" % self.parseElementTrace
180
 
181
class ParseResults(object):
182
    """Structured parse results, to provide multiple means of access to the parsed data:
183
       - as a list (len(results))
184
       - by list index (results[0], results[1], etc.)
185
       - by attribute (results.<resultsName>)
186
       """
187
    __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames" )
188
    def __new__(cls, toklist, name=None, asList=True, modal=True ):
189
        if isinstance(toklist, cls):
190
            return toklist
191
        retobj = object.__new__(cls)
192
        retobj.__doinit = True
193
        return retobj
194
 
195
    # Performance tuning: we construct a *lot* of these, so keep this
196
    # constructor as small and fast as possible
197
    def __init__( self, toklist, name=None, asList=True, modal=True ):
198
        if self.__doinit:
199
            self.__doinit = False
200
            self.__name = None
201
            self.__parent = None
202
            self.__accumNames = {}
203
            if isinstance(toklist, list):
204
                self.__toklist = toklist[:]
205
            else:
206
                self.__toklist = [toklist]
207
            self.__tokdict = dict()
208
 
209
        # this line is related to debugging the asXML bug
210
        #~ asList = False
211
 
212
        if name:
213
            if not modal:
214
                self.__accumNames[name] = 0
215
            if isinstance(name,int):
216
                name = _ustr(name) # will always return a str, but use _ustr for consistency
217
            self.__name = name
218
            if not toklist in (None,'',[]):
219
                if isinstance(toklist,basestring):
220
                    toklist = [ toklist ]
221
                if asList:
222
                    if isinstance(toklist,ParseResults):
223
                        self[name] = (toklist.copy(),-1)
224
                    else:
225
                        self[name] = (ParseResults(toklist[0]),-1)
226
                    self[name].__name = name
227
                else:
228
                    try:
229
                        self[name] = toklist[0]
230
                    except (KeyError,TypeError):
231
                        self[name] = toklist
232
 
233
    def __getitem__( self, i ):
234
        if isinstance( i, (int,slice) ):
235
            return self.__toklist[i]
236
        else:
237
            if i not in self.__accumNames:
238
                return self.__tokdict[i][-1][0]
239
            else:
240
                return ParseResults([ v[0] for v in self.__tokdict[i] ])
241
 
242
    def __setitem__( self, k, v ):
243
        if isinstance(v,tuple):
244
            self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
245
            sub = v[0]
246
        elif isinstance(k,int):
247
            self.__toklist[k] = v
248
            sub = v
249
        else:
250
            self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
251
            sub = v
252
        if isinstance(sub,ParseResults):
253
            sub.__parent = self
254
 
255
    def __delitem__( self, i ):
256
        if isinstance(i,(int,slice)):
257
            del self.__toklist[i]
258
        else:
259
            del self._tokdict[i]
260
 
261
    def __contains__( self, k ):
262
        return self.__tokdict.has_key(k)
263
 
264
    def __len__( self ): return len( self.__toklist )
265
    def __nonzero__( self ): return len( self.__toklist ) > 0
266
    def __iter__( self ): return iter( self.__toklist )
267
    def keys( self ):
268
        """Returns all named result keys."""
269
        return self.__tokdict.keys()
270
 
271
    def items( self ):
272
        """Returns all named result keys and values as a list of tuples."""
273
        return [(k,self[k]) for k in self.__tokdict.keys()]
274
 
275
    def values( self ):
276
        """Returns all named result values."""
277
        return [ v[-1][0] for v in self.__tokdict.values() ]
278
 
279
    def __getattr__( self, name ):
280
        if name not in self.__slots__:
281
            if self.__tokdict.has_key( name ):
282
                if name not in self.__accumNames:
283
                    return self.__tokdict[name][-1][0]
284
                else:
285
                    return ParseResults([ v[0] for v in self.__tokdict[name] ])
286
            else:
287
                return ""
288
        return None
289
 
290
    def __add__( self, other ):
291
        ret = self.copy()
292
        ret += other
293
        return ret
294
 
295
    def __iadd__( self, other ):
296
        if other.__tokdict:
297
            offset = len(self.__toklist)
298
            addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
299
            otheritems = other.__tokdict.items()
300
            otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in otheritems for v in vlist]
301
            for k,v in otherdictitems:
302
                self[k] = v
303
                if isinstance(v[0],ParseResults):
304
                    v[0].__parent = self
305
        self.__toklist += other.__toklist
306
        self.__accumNames.update( other.__accumNames )
307
        del other
308
        return self
309
 
310
    def __repr__( self ):
311
        return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
312
 
313
    def __str__( self ):
314
        out = "["
315
        sep = ""
316
        for i in self.__toklist:
317
            if isinstance(i, ParseResults):
318
                out += sep + _ustr(i)
319
            else:
320
                out += sep + repr(i)
321
            sep = ", "
322
        out += "]"
323
        return out
324
 
325
    def _asStringList( self, sep='' ):
326
        out = []
327
        for item in self.__toklist:
328
            if out and sep:
329
                out.append(sep)
330
            if isinstance( item, ParseResults ):
331
                out += item._asStringList()
332
            else:
333
                out.append( _ustr(item) )
334
        return out
335
 
336
    def asList( self ):
337
        """Returns the parse results as a nested list of matching tokens, all converted to strings."""
338
        out = []
339
        for res in self.__toklist:
340
            if isinstance(res,ParseResults):
341
                out.append( res.asList() )
342
            else:
343
                out.append( res )
344
        return out
345
 
346
    def asDict( self ):
347
        """Returns the named parse results as dictionary."""
348
        return dict( self.items() )
349
 
350
    def copy( self ):
351
        """Returns a new copy of a ParseResults object."""
352
        ret = ParseResults( self.__toklist )
353
        ret.__tokdict = self.__tokdict.copy()
354
        ret.__parent = self.__parent
355
        ret.__accumNames.update( self.__accumNames )
356
        ret.__name = self.__name
357
        return ret
358
 
359
    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
360
        """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
361
        nl = "\n"
362
        out = []
363
        namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
364
        nextLevelIndent = indent + "  "
365
 
366
        # collapse out indents if formatting is not desired
367
        if not formatted:
368
            indent = ""
369
            nextLevelIndent = ""
370
            nl = ""
371
 
372
        selfTag = None
373
        if doctag is not None:
374
            selfTag = doctag
375
        else:
376
            if self.__name:
377
                selfTag = self.__name
378
 
379
        if not selfTag:
380
            if namedItemsOnly:
381
                return ""
382
            else:
383
                selfTag = "ITEM"
384
 
385
        out += [ nl, indent, "<", selfTag, ">" ]
386
 
387
        worklist = self.__toklist
388
        for i,res in enumerate(worklist):
389
            if isinstance(res,ParseResults):
390
                if i in namedItems:
391
                    out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
392
                else:
393
                    out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
394
            else:
395
                # individual token, see if there is a name for it
396
                resTag = None
397
                if i in namedItems:
398
                    resTag = namedItems[i]
399
                if not resTag:
400
                    if namedItemsOnly:
401
                        continue
402
                    else:
403
                        resTag = "ITEM"
404
                xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
405
                out += [ nl, nextLevelIndent, "<", resTag, ">", xmlBodyText, "</", resTag, ">" ]
406
 
407
        out += [ nl, indent, "</", selfTag, ">" ]
408
        return "".join(out)
409
 
410
    def __lookup(self,sub):
411
        for k,vlist in self.__tokdict.items():
412
            for v,loc in vlist:
413
                if sub is v:
414
                    return k
415
        return None
416
 
417
    def getName(self):
418
        """Returns the results name for this token expression."""
419
        if self.__name:
420
            return self.__name
421
        elif self.__parent:
422
            par = self.__parent
423
            if par:
424
                return par.__lookup(self)
425
            else:
426
                return None
427
        elif (len(self) == 1 and
428
               len(self.__tokdict) == 1 and
429
               self.__tokdict.values()[0][0][1] in (0,-1)):
430
            return self.__tokdict.keys()[0]
431
        else:
432
            return None
433
 
434
    def dump(self,indent='',depth=0):
435
        """Diagnostic method for listing out the contents of a ParseResults.
436
           Accepts an optional indent argument so that this string can be embedded
437
           in a nested display of other data."""
438
        out = []
439
        out.append( indent+str(self.asList()) )
440
        keys = self.items()
441
        keys.sort()
442
        for k,v in keys:
443
            if out:
444
                out.append('\n')
445
            out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
446
            if isinstance(v,ParseResults):
447
                if v.keys():
448
                    #~ out.append('\n')
449
                    out.append( v.dump(indent,depth+1) )
450
                    #~ out.append('\n')
451
                else:
452
                    out.append(str(v))
453
            else:
454
                out.append(str(v))
455
        #~ out.append('\n')
456
        return "".join(out)
457
 
458
def col (loc,strg):
459
    """Returns current column within a string, counting newlines as line separators.
460
   The first column is number 1.
461
   """
462
    return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
463
 
464
def lineno(loc,strg):
465
    """Returns current line number within a string, counting newlines as line separators.
466
   The first line is number 1.
467
   """
468
    return strg.count("\n",0,loc) + 1
469
 
470
def line( loc, strg ):
471
    """Returns the line of text containing loc within a string, counting newlines as line separators.
472
       """
473
    lastCR = strg.rfind("\n", 0, loc)
474
    nextCR = strg.find("\n", loc)
475
    if nextCR > 0:
476
        return strg[lastCR+1:nextCR]
477
    else:
478
        return strg[lastCR+1:]
479
 
480
def _defaultStartDebugAction( instring, loc, expr ):
481
    print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
482
 
483
def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
484
    print "Matched",expr,"->",toks.asList()
485
 
486
def _defaultExceptionDebugAction( instring, loc, expr, exc ):
487
    print "Exception raised:", exc
488
 
489
def nullDebugAction(*args):
490
    """'Do-nothing' debug action, to suppress debugging output during parsing."""
491
    pass
492
 
493
class ParserElement(object):
494
    """Abstract base level parser element class."""
495
    DEFAULT_WHITE_CHARS = " \n\t\r"
496
 
497
    def setDefaultWhitespaceChars( chars ):
498
        """Overrides the default whitespace chars
499
        """
500
        ParserElement.DEFAULT_WHITE_CHARS = chars
501
    setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
502
 
503
    def __init__( self, savelist=False ):
504
        self.parseAction = list()
505
        self.failAction = None
506
        #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
507
        self.strRepr = None
508
        self.resultsName = None
509
        self.saveAsList = savelist
510
        self.skipWhitespace = True
511
        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
512
        self.copyDefaultWhiteChars = True
513
        self.mayReturnEmpty = False
514
        self.keepTabs = False
515
        self.ignoreExprs = list()
516
        self.debug = False
517
        self.streamlined = False
518
        self.mayIndexError = True
519
        self.errmsg = ""
520
        self.modalResults = True
521
        self.debugActions = ( None, None, None )
522
        self.re = None
523
 
524
    def copy( self ):
525
        """Make a copy of this ParserElement.  Useful for defining different parse actions
526
           for the same parsing pattern, using copies of the original parse element."""
527
        cpy = copy.copy( self )
528
        cpy.parseAction = self.parseAction[:]
529
        cpy.ignoreExprs = self.ignoreExprs[:]
530
        if self.copyDefaultWhiteChars:
531
            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
532
        return cpy
533
 
534
    def setName( self, name ):
535
        """Define name for this expression, for use in debugging."""
536
        self.name = name
537
        self.errmsg = "Expected " + self.name
538
        return self
539
 
540
    def setResultsName( self, name, listAllMatches=False ):
541
        """Define name for referencing matching tokens as a nested attribute
542
           of the returned parse results.
543
           NOTE: this returns a *copy* of the original ParserElement object;
544
           this is so that the client can define a basic element, such as an
545
           integer, and reference it in multiple places with different names.
546
        """
547
        newself = self.copy()
548
        newself.resultsName = name
549
        newself.modalResults = not listAllMatches
550
        return newself
551
 
552
    def normalizeParseActionArgs( f ):
553
        """Internal method used to decorate parse actions that take fewer than 3 arguments,
554
           so that all parse actions can be called as f(s,l,t)."""
555
        STAR_ARGS = 4
556
 
557
        try:
558
            restore = None
559
            if isinstance(f,type):
560
                restore = f
561
                f = f.__init__
562
            if f.func_code.co_flags & STAR_ARGS:
563
                return f
564
            numargs = f.func_code.co_argcount
565
            if hasattr(f,"im_self"):
566
                numargs -= 1
567
            if restore:
568
                f = restore
569
        except AttributeError:
570
            try:
571
                # not a function, must be a callable object, get info from the
572
                # im_func binding of its bound __call__ method
573
                if f.__call__.im_func.func_code.co_flags & STAR_ARGS:
574
                    return f
575
                numargs = f.__call__.im_func.func_code.co_argcount
576
                if hasattr(f.__call__,"im_self"):
577
                    numargs -= 1
578
            except AttributeError:
579
                # not a bound method, get info directly from __call__ method
580
                if f.__call__.func_code.co_flags & STAR_ARGS:
581
                    return f
582
                numargs = f.__call__.func_code.co_argcount
583
                if hasattr(f.__call__,"im_self"):
584
                    numargs -= 1
585
 
586
        #~ print "adding function %s with %d args" % (f.func_name,numargs)
587
        if numargs == 3:
588
            return f
589
        else:
590
            if numargs == 2:
591
                def tmp(s,l,t):
592
                    return f(l,t)
593
            elif numargs == 1:
594
                def tmp(s,l,t):
595
                    return f(t)
596
            else: #~ numargs == 0:
597
                def tmp(s,l,t):
598
                    return f()
599
            return tmp
600
    normalizeParseActionArgs = staticmethod(normalizeParseActionArgs)
601
 
602
    def setParseAction( self, *fns ):
603
        """Define action to perform when successfully matching parse element definition.
604
           Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
605
           fn(loc,toks), fn(toks), or just fn(), where:
606
            - s   = the original string being parsed
607
            - loc = the location of the matching substring
608
            - toks = a list of the matched tokens, packaged as a ParseResults object
609
           If the functions in fns modify the tokens, they can return them as the return
610
           value from fn, and the modified list of tokens will replace the original.
611
           Otherwise, fn does not need to return any value."""
612
        self.parseAction = map(self.normalizeParseActionArgs, list(fns))
613
        return self
614
 
615
    def addParseAction( self, *fns ):
616
        """Add parse action to expression's list of parse actions. See setParseAction_."""
617
        self.parseAction += map(self.normalizeParseActionArgs, list(fns))
618
        return self
619
 
620
    def setFailAction( self, fn ):
621
        """Define action to perform if parsing fails at this expression.
622
           Fail acton fn is a callable function that takes the arguments
623
           fn(s,loc,expr,err) where:
624
            - s = string being parsed
625
            - loc = location where expression match was attempted and failed
626
            - expr = the parse expression that failed
627
            - err = the exception thrown
628
           The function returns no value.  It may throw ParseFatalException
629
           if it is desired to stop parsing immediately."""
630
        self.failAction = fn
631
        return self
632
 
633
    def skipIgnorables( self, instring, loc ):
634
        exprsFound = True
635
        while exprsFound:
636
            exprsFound = False
637
            for e in self.ignoreExprs:
638
                try:
639
                    while 1:
640
                        loc,dummy = e._parse( instring, loc )
641
                        exprsFound = True
642
                except ParseException:
643
                    pass
644
        return loc
645
 
646
    def preParse( self, instring, loc ):
647
        if self.ignoreExprs:
648
            loc = self.skipIgnorables( instring, loc )
649
 
650
        if self.skipWhitespace:
651
            wt = self.whiteChars
652
            instrlen = len(instring)
653
            while loc < instrlen and instring[loc] in wt:
654
                loc += 1
655
 
656
        return loc
657
 
658
    def parseImpl( self, instring, loc, doActions=True ):
659
        return loc, []
660
 
661
    def postParse( self, instring, loc, tokenlist ):
662
        return tokenlist
663
 
664
    #~ @profile
665
    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
666
        debugging = ( self.debug ) #and doActions )
667
 
668
        if debugging or self.failAction:
669
            #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
670
            if (self.debugActions[0] ):
671
                self.debugActions[0]( instring, loc, self )
672
            if callPreParse:
673
                preloc = self.preParse( instring, loc )
674
            else:
675
                preloc = loc
676
            tokensStart = loc
677
            try:
678
                try:
679
                    loc,tokens = self.parseImpl( instring, preloc, doActions )
680
                except IndexError:
681
                    raise ParseException( instring, len(instring), self.errmsg, self )
682
            #~ except ReparseException, retryEx:
683
                #~ pass
684
            except ParseException, err:
685
                #~ print "Exception raised:", err
686
                if self.debugActions[2]:
687
                    self.debugActions[2]( instring, tokensStart, self, err )
688
                if self.failAction:
689
                    self.failAction( instring, tokensStart, self, err )
690
                raise
691
        else:
692
            if callPreParse:
693
                preloc = self.preParse( instring, loc )
694
            else:
695
                preloc = loc
696
            tokensStart = loc
697
            if self.mayIndexError or loc >= len(instring):
698
                try:
699
                    loc,tokens = self.parseImpl( instring, preloc, doActions )
700
                except IndexError:
701
                    raise ParseException( instring, len(instring), self.errmsg, self )
702
            else:
703
                loc,tokens = self.parseImpl( instring, preloc, doActions )
704
 
705
        tokens = self.postParse( instring, loc, tokens )
706
 
707
        retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
708
        if self.parseAction and doActions:
709
            if debugging:
710
                try:
711
                    for fn in self.parseAction:
712
                        tokens = fn( instring, tokensStart, retTokens )
713
                        if tokens is not None:
714
                            retTokens = ParseResults( tokens,
715
                                                      self.resultsName,
716
                                                      asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
717
                                                      modal=self.modalResults )
718
                except ParseException, err:
719
                    #~ print "Exception raised in user parse action:", err
720
                    if (self.debugActions[2] ):
721
                        self.debugActions[2]( instring, tokensStart, self, err )
722
                    raise
723
            else:
724
                for fn in self.parseAction:
725
                    tokens = fn( instring, tokensStart, retTokens )
726
                    if tokens is not None:
727
                        retTokens = ParseResults( tokens,
728
                                                  self.resultsName,
729
                                                  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
730
                                                  modal=self.modalResults )
731
 
732
        if debugging:
733
            #~ print "Matched",self,"->",retTokens.asList()
734
            if (self.debugActions[1] ):
735
                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
736
 
737
        return loc, retTokens
738
 
739
    def tryParse( self, instring, loc ):
740
        return self._parse( instring, loc, doActions=False )[0]
741
 
742
    # this method gets repeatedly called during backtracking with the same arguments -
743
    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
744
    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
745
        if doActions and self.parseAction:
746
            return self._parseNoCache( instring, loc, doActions, callPreParse )
747
        lookup = (self,instring,loc,callPreParse)
748
        if lookup in ParserElement._exprArgCache:
749
            value = ParserElement._exprArgCache[ lookup ]
750
            if isinstance(value,Exception):
751
                if isinstance(value,ParseBaseException):
752
                    value.loc = loc
753
                raise value
754
            return value
755
        else:
756
            try:
757
                ParserElement._exprArgCache[ lookup ] = \
758
                    value = self._parseNoCache( instring, loc, doActions, callPreParse )
759
                return value
760
            except ParseBaseException, pe:
761
                ParserElement._exprArgCache[ lookup ] = pe
762
                raise
763
 
764
    _parse = _parseNoCache
765
 
766
    # argument cache for optimizing repeated calls when backtracking through recursive expressions
767
    _exprArgCache = {}
768
    def resetCache():
769
        ParserElement._exprArgCache.clear()
770
    resetCache = staticmethod(resetCache)
771
 
772
    _packratEnabled = False
773
    def enablePackrat():
774
        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
775
           Repeated parse attempts at the same string location (which happens
776
           often in many complex grammars) can immediately return a cached value,
777
           instead of re-executing parsing/validating code.  Memoizing is done of
778
           both valid results and parsing exceptions.
779
 
780
           This speedup may break existing programs that use parse actions that
781
           have side-effects.  For this reason, packrat parsing is disabled when
782
           you first import pyparsing.  To activate the packrat feature, your
783
           program must call the class method ParserElement.enablePackrat().  If
784
           your program uses psyco to "compile as you go", you must call
785
           enablePackrat before calling psyco.full().  If you do not do this,
786
           Python will crash.  For best results, call enablePackrat() immediately
787
           after importing pyparsing.
788
        """
789
        if not ParserElement._packratEnabled:
790
            ParserElement._packratEnabled = True
791
            ParserElement._parse = ParserElement._parseCache
792
    enablePackrat = staticmethod(enablePackrat)
793
 
794
    def parseString( self, instring ):
795
        """Execute the parse expression with the given string.
796
           This is the main interface to the client code, once the complete
797
           expression has been built.
798
        """
799
        ParserElement.resetCache()
800
        if not self.streamlined:
801
            self.streamline()
802
            #~ self.saveAsList = True
803
        for e in self.ignoreExprs:
804
            e.streamline()
805
        if self.keepTabs:
806
            loc, tokens = self._parse( instring, 0 )
807
        else:
808
            loc, tokens = self._parse( instring.expandtabs(), 0 )
809
        return tokens
810
 
811
    def scanString( self, instring, maxMatches=sys.maxint ):
812
        """Scan the input string for expression matches.  Each match will return the
813
           matching tokens, start location, and end location.  May be called with optional
814
           maxMatches argument, to clip scanning after 'n' matches are found."""
815
        if not self.streamlined:
816
            self.streamline()
817
        for e in self.ignoreExprs:
818
            e.streamline()
819
 
820
        if not self.keepTabs:
821
            instring = instring.expandtabs()
822
        instrlen = len(instring)
823
        loc = 0
824
        preparseFn = self.preParse
825
        parseFn = self._parse
826
        ParserElement.resetCache()
827
        matches = 0
828
        while loc <= instrlen and matches < maxMatches:
829
            try:
830
                preloc = preparseFn( instring, loc )
831
                nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
832
            except ParseException:
833
                loc = preloc+1
834
            else:
835
                matches += 1
836
                yield tokens, preloc, nextLoc
837
                loc = nextLoc
838
 
839
    def transformString( self, instring ):
840
        """Extension to scanString, to modify matching text with modified tokens that may
841
           be returned from a parse action.  To use transformString, define a grammar and
842
           attach a parse action to it that modifies the returned token list.
843
           Invoking transformString() on a target string will then scan for matches,
844
           and replace the matched text patterns according to the logic in the parse
845
           action.  transformString() returns the resulting transformed string."""
846
        out = []
847
        lastE = 0
848
        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
849
        # keep string locs straight between transformString and scanString
850
        self.keepTabs = True
851
        for t,s,e in self.scanString( instring ):
852
            out.append( instring[lastE:s] )
853
            if t:
854
                if isinstance(t,ParseResults):
855
                    out += t.asList()
856
                elif isinstance(t,list):
857
                    out += t
858
                else:
859
                    out.append(t)
860
            lastE = e
861
        out.append(instring[lastE:])
862
        return "".join(out)
863
 
864
    def searchString( self, instring, maxMatches=sys.maxint ):
865
        """Another extension to scanString, simplifying the access to the tokens found
866
           to match the given parse expression.  May be called with optional
867
           maxMatches argument, to clip searching after 'n' matches are found.
868
        """
869
        return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
870
 
871
    def __add__(self, other ):
872
        """Implementation of + operator - returns And"""
873
        if isinstance( other, basestring ):
874
            other = Literal( other )
875
        if not isinstance( other, ParserElement ):
876
            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
877
                    SyntaxWarning, stacklevel=2)
878
        return And( [ self, other ] )
879
 
880
    def __radd__(self, other ):
881
        """Implementation of += operator"""
882
        if isinstance( other, basestring ):
883
            other = Literal( other )
884
        if not isinstance( other, ParserElement ):
885
            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
886
                    SyntaxWarning, stacklevel=2)
887
        return other + self
888
 
889
    def __or__(self, other ):
890
        """Implementation of | operator - returns MatchFirst"""
891
        if isinstance( other, basestring ):
892
            other = Literal( other )
893
        if not isinstance( other, ParserElement ):
894
            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
895
                    SyntaxWarning, stacklevel=2)
896
        return MatchFirst( [ self, other ] )
897
 
898
    def __ror__(self, other ):
899
        """Implementation of |= operator"""
900
        if isinstance( other, basestring ):
901
            other = Literal( other )
902
        if not isinstance( other, ParserElement ):
903
            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
904
                    SyntaxWarning, stacklevel=2)
905
        return other | self
906
 
907
    def __xor__(self, other ):
908
        """Implementation of ^ operator - returns Or"""
909
        if isinstance( other, basestring ):
910
            other = Literal( other )
911
        if not isinstance( other, ParserElement ):
912
            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
913
                    SyntaxWarning, stacklevel=2)
914
        return Or( [ self, other ] )
915
 
916
    def __rxor__(self, other ):
917
        """Implementation of ^= operator"""
918
        if isinstance( other, basestring ):
919
            other = Literal( other )
920
        if not isinstance( other, ParserElement ):
921
            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
922
                    SyntaxWarning, stacklevel=2)
923
        return other ^ self
924
 
925
    def __and__(self, other ):
926
        """Implementation of & operator - returns Each"""
927
        if isinstance( other, basestring ):
928
            other = Literal( other )
929
        if not isinstance( other, ParserElement ):
930
            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
931
                    SyntaxWarning, stacklevel=2)
932
        return Each( [ self, other ] )
933
 
934
    def __rand__(self, other ):
935
        """Implementation of right-& operator"""
936
        if isinstance( other, basestring ):
937
            other = Literal( other )
938
        if not isinstance( other, ParserElement ):
939
            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
940
                    SyntaxWarning, stacklevel=2)
941
        return other & self
942
 
943
    def __invert__( self ):
944
        """Implementation of ~ operator - returns NotAny"""
945
        return NotAny( self )
946
 
947
    def suppress( self ):
948
        """Suppresses the output of this ParserElement; useful to keep punctuation from
949
           cluttering up returned output.
950
        """
951
        return Suppress( self )
952
 
953
    def leaveWhitespace( self ):
954
        """Disables the skipping of whitespace before matching the characters in the
955
           ParserElement's defined pattern.  This is normally only used internally by
956
           the pyparsing module, but may be needed in some whitespace-sensitive grammars.
957
        """
958
        self.skipWhitespace = False
959
        return self
960
 
961
    def setWhitespaceChars( self, chars ):
962
        """Overrides the default whitespace chars
963
        """
964
        self.skipWhitespace = True
965
        self.whiteChars = chars
966
        self.copyDefaultWhiteChars = False
967
        return self
968
 
969
    def parseWithTabs( self ):
970
        """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
971
           Must be called before parseString when the input grammar contains elements that
972
           match <TAB> characters."""
973
        self.keepTabs = True
974
        return self
975
 
976
    def ignore( self, other ):
977
        """Define expression to be ignored (e.g., comments) while doing pattern
978
           matching; may be called repeatedly, to define multiple comment or other
979
           ignorable patterns.
980
        """
981
        if isinstance( other, Suppress ):
982
            if other not in self.ignoreExprs:
983
                self.ignoreExprs.append( other )
984
        else:
985
            self.ignoreExprs.append( Suppress( other ) )
986
        return self
987
 
988
    def setDebugActions( self, startAction, successAction, exceptionAction ):
989
        """Enable display of debugging messages while doing pattern matching."""
990
        self.debugActions = (startAction or _defaultStartDebugAction,
991
                             successAction or _defaultSuccessDebugAction,
992
                             exceptionAction or _defaultExceptionDebugAction)
993
        self.debug = True
994
        return self
995
 
996
    def setDebug( self, flag=True ):
997
        """Enable display of debugging messages while doing pattern matching."""
998
        if flag:
999
            self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1000
        else:
1001
            self.debug = False
1002
        return self
1003
 
1004
    def __str__( self ):
1005
        return self.name
1006
 
1007
    def __repr__( self ):
1008
        return _ustr(self)
1009
 
1010
    def streamline( self ):
1011
        self.streamlined = True
1012
        self.strRepr = None
1013
        return self
1014
 
1015
    def checkRecursion( self, parseElementList ):
1016
        pass
1017
 
1018
    def validate( self, validateTrace=[] ):
1019
        """Check defined expressions for valid structure, check for infinite recursive definitions."""
1020
        self.checkRecursion( [] )
1021
 
1022
    def parseFile( self, file_or_filename ):
1023
        """Execute the parse expression on the given file or filename.
1024
           If a filename is specified (instead of a file object),
1025
           the entire file is opened, read, and closed before parsing.
1026
        """
1027
        try:
1028
            file_contents = file_or_filename.read()
1029
        except AttributeError:
1030
            f = open(file_or_filename, "rb")
1031
            file_contents = f.read()
1032
            f.close()
1033
        return self.parseString(file_contents)
1034
 
1035
 
1036
class Token(ParserElement):
1037
    """Abstract ParserElement subclass, for defining atomic matching patterns."""
1038
    def __init__( self ):
1039
        super(Token,self).__init__( savelist=False )
1040
        self.myException = ParseException("",0,"",self)
1041
 
1042
    def setName(self, name):
1043
        s = super(Token,self).setName(name)
1044
        self.errmsg = "Expected " + self.name
1045
        s.myException.msg = self.errmsg
1046
        return s
1047
 
1048
 
1049
class Empty(Token):
1050
    """An empty token, will always match."""
1051
    def __init__( self ):
1052
        super(Empty,self).__init__()
1053
        self.name = "Empty"
1054
        self.mayReturnEmpty = True
1055
        self.mayIndexError = False
1056
 
1057
 
1058
class NoMatch(Token):
1059
    """A token that will never match."""
1060
    def __init__( self ):
1061
        super(NoMatch,self).__init__()
1062
        self.name = "NoMatch"
1063
        self.mayReturnEmpty = True
1064
        self.mayIndexError = False
1065
        self.errmsg = "Unmatchable token"
1066
        self.myException.msg = self.errmsg
1067
 
1068
    def parseImpl( self, instring, loc, doActions=True ):
1069
        exc = self.myException
1070
        exc.loc = loc
1071
        exc.pstr = instring
1072
        raise exc
1073
 
1074
 
1075
class Literal(Token):
1076
    """Token to exactly match a specified string."""
1077
    def __init__( self, matchString ):
1078
        super(Literal,self).__init__()
1079
        self.match = matchString
1080
        self.matchLen = len(matchString)
1081
        try:
1082
            self.firstMatchChar = matchString[0]
1083
        except IndexError:
1084
            warnings.warn("null string passed to Literal; use Empty() instead",
1085
                            SyntaxWarning, stacklevel=2)
1086
            self.__class__ = Empty
1087
        self.name = '"%s"' % self.match
1088
        self.errmsg = "Expected " + self.name
1089
        self.mayReturnEmpty = False
1090
        self.myException.msg = self.errmsg
1091
        self.mayIndexError = False
1092
 
1093
    # Performance tuning: this routine gets called a *lot*
1094
    # if this is a single character match string  and the first character matches,
1095
    # short-circuit as quickly as possible, and avoid calling startswith
1096
    #~ @profile
1097
    def parseImpl( self, instring, loc, doActions=True ):
1098
        if (instring[loc] == self.firstMatchChar and
1099
            (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1100
            return loc+self.matchLen, self.match
1101
        #~ raise ParseException( instring, loc, self.errmsg )
1102
        exc = self.myException
1103
        exc.loc = loc
1104
        exc.pstr = instring
1105
        raise exc
1106
 
1107
class Keyword(Token):
1108
    """Token to exactly match a specified string as a keyword, that is, it must be
1109
       immediately followed by a non-keyword character.  Compare with Literal::
1110
         Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1111
         Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1112
       Accepts two optional constructor arguments in addition to the keyword string:
1113
       identChars is a string of characters that would be valid identifier characters,
1114
       defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1115
       matching, default is False.
1116
    """
1117
    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1118
 
1119
    def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1120
        super(Keyword,self).__init__()
1121
        self.match = matchString
1122
        self.matchLen = len(matchString)
1123
        try:
1124
            self.firstMatchChar = matchString[0]
1125
        except IndexError:
1126
            warnings.warn("null string passed to Keyword; use Empty() instead",
1127
                            SyntaxWarning, stacklevel=2)
1128
        self.name = '"%s"' % self.match
1129
        self.errmsg = "Expected " + self.name
1130
        self.mayReturnEmpty = False
1131
        self.myException.msg = self.errmsg
1132
        self.mayIndexError = False
1133
        self.caseless = caseless
1134
        if caseless:
1135
            self.caselessmatch = matchString.upper()
1136
            identChars = identChars.upper()
1137
        self.identChars = _str2dict(identChars)
1138
 
1139
    def parseImpl( self, instring, loc, doActions=True ):
1140
        if self.caseless:
1141
            if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1142
                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1143
                 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1144
                return loc+self.matchLen, self.match
1145
        else:
1146
            if (instring[loc] == self.firstMatchChar and
1147
                (self.matchLen==1 or instring.startswith(self.match,loc)) and
1148
                (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1149
                (loc == 0 or instring[loc-1] not in self.identChars) ):
1150
                return loc+self.matchLen, self.match
1151
        #~ raise ParseException( instring, loc, self.errmsg )
1152
        exc = self.myException
1153
        exc.loc = loc
1154
        exc.pstr = instring
1155
        raise exc
1156
 
1157
    def copy(self):
1158
        c = super(Keyword,self).copy()
1159
        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1160
        return c
1161
 
1162
    def setDefaultKeywordChars( chars ):
1163
        """Overrides the default Keyword chars
1164
        """
1165
        Keyword.DEFAULT_KEYWORD_CHARS = chars
1166
    setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1167
 
1168
 
1169
class CaselessLiteral(Literal):
1170
    """Token to match a specified string, ignoring case of letters.
1171
       Note: the matched results will always be in the case of the given
1172
       match string, NOT the case of the input text.
1173
    """
1174
    def __init__( self, matchString ):
1175
        super(CaselessLiteral,self).__init__( matchString.upper() )
1176
        # Preserve the defining literal.
1177
        self.returnString = matchString
1178
        self.name = "'%s'" % self.returnString
1179
        self.errmsg = "Expected " + self.name
1180
        self.myException.msg = self.errmsg
1181
 
1182
    def parseImpl( self, instring, loc, doActions=True ):
1183
        if instring[ loc:loc+self.matchLen ].upper() == self.match:
1184
            return loc+self.matchLen, self.returnString
1185
        #~ raise ParseException( instring, loc, self.errmsg )
1186
        exc = self.myException
1187
        exc.loc = loc
1188
        exc.pstr = instring
1189
        raise exc
1190
 
1191
class CaselessKeyword(Keyword):
1192
    def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1193
        super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1194
 
1195
    def parseImpl( self, instring, loc, doActions=True ):
1196
        if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1197
             (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1198
            return loc+self.matchLen, self.match
1199
        #~ raise ParseException( instring, loc, self.errmsg )
1200
        exc = self.myException
1201
        exc.loc = loc
1202
        exc.pstr = instring
1203
        raise exc
1204
 
1205
class Word(Token):
1206
    """Token for matching words composed of allowed character sets.
1207
       Defined with string containing all allowed initial characters,
1208
       an optional string containing allowed body characters (if omitted,
1209
       defaults to the initial character set), and an optional minimum,
1210
       maximum, and/or exact length.
1211
    """
1212
    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
1213
        super(Word,self).__init__()
1214
        self.initCharsOrig = initChars
1215
        self.initChars = _str2dict(initChars)
1216
        if bodyChars :
1217
            self.bodyCharsOrig = bodyChars
1218
            self.bodyChars = _str2dict(bodyChars)
1219
        else:
1220
            self.bodyCharsOrig = initChars
1221
            self.bodyChars = _str2dict(initChars)
1222
 
1223
        self.maxSpecified = max > 0
1224
 
1225
        self.minLen = min
1226
 
1227
        if max > 0:
1228
            self.maxLen = max
1229
        else:
1230
            self.maxLen = sys.maxint
1231
 
1232
        if exact > 0:
1233
            self.maxLen = exact
1234
            self.minLen = exact
1235
 
1236
        self.name = _ustr(self)
1237
        self.errmsg = "Expected " + self.name
1238
        self.myException.msg = self.errmsg
1239
        self.mayIndexError = False
1240
 
1241
        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1242
            if self.bodyCharsOrig == self.initCharsOrig:
1243
                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1244
            elif len(self.bodyCharsOrig) == 1:
1245
                self.reString = "%s[%s]*" % \
1246
                                      (re.escape(self.initCharsOrig),
1247
                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
1248
            else:
1249
                self.reString = "[%s][%s]*" % \
1250
                                      (_escapeRegexRangeChars(self.initCharsOrig),
1251
                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
1252
            try:
1253
                self.re = re.compile( self.reString )
1254
            except:
1255
                self.re = None
1256
 
1257
    def parseImpl( self, instring, loc, doActions=True ):
1258
        if self.re:
1259
            result = self.re.match(instring,loc)
1260
            if not result:
1261
                exc = self.myException
1262
                exc.loc = loc
1263
                exc.pstr = instring
1264
                raise exc
1265
 
1266
            loc = result.end()
1267
            return loc,result.group()
1268
 
1269
        if not(instring[ loc ] in self.initChars):
1270
            #~ raise ParseException( instring, loc, self.errmsg )
1271
            exc = self.myException
1272
            exc.loc = loc
1273
            exc.pstr = instring
1274
            raise exc
1275
        start = loc
1276
        loc += 1
1277
        instrlen = len(instring)
1278
        bodychars = self.bodyChars
1279
        maxloc = start + self.maxLen
1280
        maxloc = min( maxloc, instrlen )
1281
        while loc < maxloc and instring[loc] in bodychars:
1282
            loc += 1
1283
 
1284
        throwException = False
1285
        if loc - start < self.minLen:
1286
            throwException = True
1287
        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1288
            throwException = True
1289
 
1290
        if throwException:
1291
            #~ raise ParseException( instring, loc, self.errmsg )
1292
            exc = self.myException
1293
            exc.loc = loc
1294
            exc.pstr = instring
1295
            raise exc
1296
 
1297
        return loc, instring[start:loc]
1298
 
1299
    def __str__( self ):
1300
        try:
1301
            return super(Word,self).__str__()
1302
        except:
1303
            pass
1304
 
1305
 
1306
        if self.strRepr is None:
1307
 
1308
            def charsAsStr(s):
1309
                if len(s)>4:
1310
                    return s[:4]+"..."
1311
                else:
1312
                    return s
1313
 
1314
            if ( self.initCharsOrig != self.bodyCharsOrig ):
1315
                self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1316
            else:
1317
                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1318
 
1319
        return self.strRepr
1320
 
1321
 
1322
class Regex(Token):
1323
    """Token for matching strings that match a given regular expression.
1324
       Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1325
    """
1326
    def __init__( self, pattern, flags=0):
1327
        """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1328
        super(Regex,self).__init__()
1329
 
1330
        if len(pattern) == 0:
1331
            warnings.warn("null string passed to Regex; use Empty() instead",
1332
                    SyntaxWarning, stacklevel=2)
1333
 
1334
        self.pattern = pattern
1335
        self.flags = flags
1336
 
1337
        try:
1338
            self.re = re.compile(self.pattern, self.flags)
1339
            self.reString = self.pattern
1340
        except sre_constants.error,e:
1341
            warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1342
                SyntaxWarning, stacklevel=2)
1343
            raise
1344
 
1345
        self.name = _ustr(self)
1346
        self.errmsg = "Expected " + self.name
1347
        self.myException.msg = self.errmsg
1348
        self.mayIndexError = False
1349
        self.mayReturnEmpty = True
1350
 
1351
    def parseImpl( self, instring, loc, doActions=True ):
1352
        result = self.re.match(instring,loc)
1353
        if not result:
1354
            exc = self.myException
1355
            exc.loc = loc
1356
            exc.pstr = instring
1357
            raise exc
1358
 
1359
        loc = result.end()
1360
        d = result.groupdict()
1361
        ret = ParseResults(result.group())
1362
        if d:
1363
            for k in d.keys():
1364
                ret[k] = d[k]
1365
        return loc,ret
1366
 
1367
    def __str__( self ):
1368
        try:
1369
            return super(Regex,self).__str__()
1370
        except:
1371
            pass
1372
 
1373
        if self.strRepr is None:
1374
            self.strRepr = "Re:(%s)" % repr(self.pattern)
1375
 
1376
        return self.strRepr
1377
 
1378
 
1379
class QuotedString(Token):
1380
    """Token for matching strings that are delimited by quoting characters.
1381
    """
1382
    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1383
        """
1384
           Defined with the following parameters:
1385
           - quoteChar - string of one or more characters defining the quote delimiting string
1386
           - escChar - character to escape quotes, typically backslash (default=None)
1387
           - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1388
           - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1389
           - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1390
           - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1391
        """
1392
        super(QuotedString,self).__init__()
1393
 
1394
        # remove white space from quote chars - wont work anyway
1395
        quoteChar = quoteChar.strip()
1396
        if len(quoteChar) == 0:
1397
            warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1398
            raise SyntaxError()
1399
 
1400
        if endQuoteChar is None:
1401
            endQuoteChar = quoteChar
1402
        else:
1403
            endQuoteChar = endQuoteChar.strip()
1404
            if len(endQuoteChar) == 0:
1405
                warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1406
                raise SyntaxError()
1407
 
1408
        self.quoteChar = quoteChar
1409
        self.quoteCharLen = len(quoteChar)
1410
        self.firstQuoteChar = quoteChar[0]
1411
        self.endQuoteChar = endQuoteChar
1412
        self.endQuoteCharLen = len(endQuoteChar)
1413
        self.escChar = escChar
1414
        self.escQuote = escQuote
1415
        self.unquoteResults = unquoteResults
1416
 
1417
        if multiline:
1418
            self.flags = re.MULTILINE | re.DOTALL
1419
            self.pattern = r'%s(?:[^%s%s]' % \
1420
                ( re.escape(self.quoteChar),
1421
                  _escapeRegexRangeChars(self.endQuoteChar[0]),
1422
                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1423
        else:
1424
            self.flags = 0
1425
            self.pattern = r'%s(?:[^%s\n\r%s]' % \
1426
                ( re.escape(self.quoteChar),
1427
                  _escapeRegexRangeChars(self.endQuoteChar[0]),
1428
                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1429
        if len(self.endQuoteChar) > 1:
1430
            self.pattern += (
1431
                '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1432
                                               _escapeRegexRangeChars(self.endQuoteChar[i]))
1433
                                    for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1434
                )
1435
        if escQuote:
1436
            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1437
        if escChar:
1438
            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1439
            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1440
        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1441
 
1442
        try:
1443
            self.re = re.compile(self.pattern, self.flags)
1444
            self.reString = self.pattern
1445
        except sre_constants.error,e:
1446
            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1447
                SyntaxWarning, stacklevel=2)
1448
            raise
1449
 
1450
        self.name = _ustr(self)
1451
        self.errmsg = "Expected " + self.name
1452
        self.myException.msg = self.errmsg
1453
        self.mayIndexError = False
1454
        self.mayReturnEmpty = True
1455
 
1456
    def parseImpl( self, instring, loc, doActions=True ):
1457
        result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1458
        if not result:
1459
            exc = self.myException
1460
            exc.loc = loc
1461
            exc.pstr = instring
1462
            raise exc
1463
 
1464
        loc = result.end()
1465
        ret = result.group()
1466
        print ret
1467
 
1468
        if self.unquoteResults:
1469
 
1470
            # strip off quotes
1471
            ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1472
 
1473
            if isinstance(ret,basestring):
1474
                # replace escaped characters
1475
                if self.escChar:
1476
                    ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1477
 
1478
                # replace escaped quotes
1479
                if self.escQuote:
1480
                    ret = ret.replace(self.escQuote, self.endQuoteChar)
1481
 
1482
        return loc, ret
1483
 
1484
    def __str__( self ):
1485
        try:
1486
            return super(QuotedString,self).__str__()
1487
        except:
1488
            pass
1489
 
1490
        if self.strRepr is None:
1491
            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1492
 
1493
        return self.strRepr
1494
 
1495
 
1496
class CharsNotIn(Token):
1497
    """Token for matching words composed of characters *not* in a given set.
1498
       Defined with string containing all disallowed characters, and an optional
1499
       minimum, maximum, and/or exact length.
1500
    """
1501
    def __init__( self, notChars, min=1, max=0, exact=0 ):
1502
        super(CharsNotIn,self).__init__()
1503
        self.skipWhitespace = False
1504
        self.notChars = notChars
1505
 
1506
        self.minLen = min
1507
 
1508
        if max > 0:
1509
            self.maxLen = max
1510
        else:
1511
            self.maxLen = sys.maxint
1512
 
1513
        if exact > 0:
1514
            self.maxLen = exact
1515
            self.minLen = exact
1516
 
1517
        self.name = _ustr(self)
1518
        self.errmsg = "Expected " + self.name
1519
        self.mayReturnEmpty = ( self.minLen == 0 )
1520
        self.myException.msg = self.errmsg
1521
        self.mayIndexError = False
1522
 
1523
    def parseImpl( self, instring, loc, doActions=True ):
1524
        if instring[loc] in self.notChars:
1525
            #~ raise ParseException( instring, loc, self.errmsg )
1526
            exc = self.myException
1527
            exc.loc = loc
1528
            exc.pstr = instring
1529
            raise exc
1530
 
1531
        start = loc
1532
        loc += 1
1533
        notchars = self.notChars
1534
        maxlen = min( start+self.maxLen, len(instring) )
1535
        while loc < maxlen and \
1536
              (instring[loc] not in notchars):
1537
            loc += 1
1538
 
1539
        if loc - start < self.minLen:
1540
            #~ raise ParseException( instring, loc, self.errmsg )
1541
            exc = self.myException
1542
            exc.loc = loc
1543
            exc.pstr = instring
1544
            raise exc
1545
 
1546
        return loc, instring[start:loc]
1547
 
1548
    def __str__( self ):
1549
        try:
1550
            return super(CharsNotIn, self).__str__()
1551
        except:
1552
            pass
1553
 
1554
        if self.strRepr is None:
1555
            if len(self.notChars) > 4:
1556
                self.strRepr = "!W:(%s...)" % self.notChars[:4]
1557
            else:
1558
                self.strRepr = "!W:(%s)" % self.notChars
1559
 
1560
        return self.strRepr
1561
 
1562
class White(Token):
1563
    """Special matching class for matching whitespace.  Normally, whitespace is ignored
1564
       by pyparsing grammars.  This class is included when some whitespace structures
1565
       are significant.  Define with a string containing the whitespace characters to be
1566
       matched; default is " \\t\\n".  Also takes optional min, max, and exact arguments,
1567
       as defined for the Word class."""
1568
    whiteStrs = {
1569
        " " : "<SPC>",
1570
        "\t": "<TAB>",
1571
        "\n": "<LF>",
1572
        "\r": "<CR>",
1573
        "\f": "<FF>",
1574
        }
1575
    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1576
        super(White,self).__init__()
1577
        self.matchWhite = ws
1578
        self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
1579
        #~ self.leaveWhitespace()
1580
        self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1581
        self.mayReturnEmpty = True
1582
        self.errmsg = "Expected " + self.name
1583
        self.myException.msg = self.errmsg
1584
 
1585
        self.minLen = min
1586
 
1587
        if max > 0:
1588
            self.maxLen = max
1589
        else:
1590
            self.maxLen = sys.maxint
1591
 
1592
        if exact > 0:
1593
            self.maxLen = exact
1594
            self.minLen = exact
1595
 
1596
    def parseImpl( self, instring, loc, doActions=True ):
1597
        if not(instring[ loc ] in self.matchWhite):
1598
            #~ raise ParseException( instring, loc, self.errmsg )
1599
            exc = self.myException
1600
            exc.loc = loc
1601
            exc.pstr = instring
1602
            raise exc
1603
        start = loc
1604
        loc += 1
1605
        maxloc = start + self.maxLen
1606
        maxloc = min( maxloc, len(instring) )
1607
        while loc < maxloc and instring[loc] in self.matchWhite:
1608
            loc += 1
1609
 
1610
        if loc - start < self.minLen:
1611
            #~ raise ParseException( instring, loc, self.errmsg )
1612
            exc = self.myException
1613
            exc.loc = loc
1614
            exc.pstr = instring
1615
            raise exc
1616
 
1617
        return loc, instring[start:loc]
1618
 
1619
 
1620
class PositionToken(Token):
1621
    def __init__( self ):
1622
        super(PositionToken,self).__init__()
1623
        self.name=self.__class__.__name__
1624
        self.mayReturnEmpty = True
1625
        self.mayIndexError = False
1626
 
1627
class GoToColumn(PositionToken):
1628
    """Token to advance to a specific column of input text; useful for tabular report scraping."""
1629
    def __init__( self, colno ):
1630
        super(GoToColumn,self).__init__()
1631
        self.col = colno
1632
 
1633
    def preParse( self, instring, loc ):
1634
        if col(loc,instring) != self.col:
1635
            instrlen = len(instring)
1636
            if self.ignoreExprs:
1637
                loc = self.skipIgnorables( instring, loc )
1638
            while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
1639
                loc += 1
1640
        return loc
1641
 
1642
    def parseImpl( self, instring, loc, doActions=True ):
1643
        thiscol = col( loc, instring )
1644
        if thiscol > self.col:
1645
            raise ParseException( instring, loc, "Text not in expected column", self )
1646
        newloc = loc + self.col - thiscol
1647
        ret = instring[ loc: newloc ]
1648
        return newloc, ret
1649
 
1650
class LineStart(PositionToken):
1651
    """Matches if current position is at the beginning of a line within the parse string"""
1652
    def __init__( self ):
1653
        super(LineStart,self).__init__()
1654
        self.setWhitespaceChars( " \t" )
1655
        self.errmsg = "Expected start of line"
1656
        self.myException.msg = self.errmsg
1657
 
1658
    def preParse( self, instring, loc ):
1659
        preloc = super(LineStart,self).preParse(instring,loc)
1660
        if instring[preloc] == "\n":
1661
            loc += 1
1662
        return loc
1663
 
1664
    def parseImpl( self, instring, loc, doActions=True ):
1665
        if not( loc==0 or ( loc<len(instring) and instring[loc-1] == "\n" ) ): #col(loc, instring) != 1:
1666
            #~ raise ParseException( instring, loc, "Expected start of line" )
1667
            exc = self.myException
1668
            exc.loc = loc
1669
            exc.pstr = instring
1670
            raise exc
1671
        return loc, []
1672
 
1673
class LineEnd(PositionToken):
1674
    """Matches if current position is at the end of a line within the parse string"""
1675
    def __init__( self ):
1676
        super(LineEnd,self).__init__()
1677
        self.setWhitespaceChars( " \t" )
1678
        self.errmsg = "Expected end of line"
1679
        self.myException.msg = self.errmsg
1680
 
1681
    def parseImpl( self, instring, loc, doActions=True ):
1682
        if loc<len(instring):
1683
            if instring[loc] == "\n":
1684
                return loc+1, "\n"
1685
            else:
1686
                #~ raise ParseException( instring, loc, "Expected end of line" )
1687
                exc = self.myException
1688
                exc.loc = loc
1689
                exc.pstr = instring
1690
                raise exc
1691
        elif loc == len(instring):
1692
            return loc+1, []
1693
        else:
1694
            exc = self.myException
1695
            exc.loc = loc
1696
            exc.pstr = instring
1697
            raise exc
1698
 
1699
class StringStart(PositionToken):
1700
    """Matches if current position is at the beginning of the parse string"""
1701
    def __init__( self ):
1702
        super(StringStart,self).__init__()
1703
        self.errmsg = "Expected start of text"
1704
        self.myException.msg = self.errmsg
1705
 
1706
    def parseImpl( self, instring, loc, doActions=True ):
1707
        if loc != 0:
1708
            # see if entire string up to here is just whitespace and ignoreables
1709
            if loc != self.preParse( instring, 0 ):
1710
                #~ raise ParseException( instring, loc, "Expected start of text" )
1711
                exc = self.myException
1712
                exc.loc = loc
1713
                exc.pstr = instring
1714
                raise exc
1715
        return loc, []
1716
 
1717
class StringEnd(PositionToken):
1718
    """Matches if current position is at the end of the parse string"""
1719
    def __init__( self ):
1720
        super(StringEnd,self).__init__()
1721
        self.errmsg = "Expected end of text"
1722
        self.myException.msg = self.errmsg
1723
 
1724
    def parseImpl( self, instring, loc, doActions=True ):
1725
        if loc < len(instring):
1726
            #~ raise ParseException( instring, loc, "Expected end of text" )
1727
            exc = self.myException
1728
            exc.loc = loc
1729
            exc.pstr = instring
1730
            raise exc
1731
        elif loc == len(instring):
1732
            return loc+1, []
1733
        else:
1734
            exc = self.myException
1735
            exc.loc = loc
1736
            exc.pstr = instring
1737
            raise exc
1738
 
1739
 
1740
class ParseExpression(ParserElement):
1741
    """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1742
    def __init__( self, exprs, savelist = False ):
1743
        super(ParseExpression,self).__init__(savelist)
1744
        if isinstance( exprs, list ):
1745
            self.exprs = exprs
1746
        elif isinstance( exprs, basestring ):
1747
            self.exprs = [ Literal( exprs ) ]
1748
        else:
1749
            self.exprs = [ exprs ]
1750
 
1751
    def __getitem__( self, i ):
1752
        return self.exprs[i]
1753
 
1754
    def append( self, other ):
1755
        self.exprs.append( other )
1756
        self.strRepr = None
1757
        return self
1758
 
1759
    def leaveWhitespace( self ):
1760
        """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
1761
           all contained expressions."""
1762
        self.skipWhitespace = False
1763
        self.exprs = [ e.copy() for e in self.exprs ]
1764
        for e in self.exprs:
1765
            e.leaveWhitespace()
1766
        return self
1767
 
1768
    def ignore( self, other ):
1769
        if isinstance( other, Suppress ):
1770
            if other not in self.ignoreExprs:
1771
                super( ParseExpression, self).ignore( other )
1772
                for e in self.exprs:
1773
                    e.ignore( self.ignoreExprs[-1] )
1774
        else:
1775
            super( ParseExpression, self).ignore( other )
1776
            for e in self.exprs:
1777
                e.ignore( self.ignoreExprs[-1] )
1778
        return self
1779
 
1780
    def __str__( self ):
1781
        try:
1782
            return super(ParseExpression,self).__str__()
1783
        except:
1784
            pass
1785
 
1786
        if self.strRepr is None:
1787
            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
1788
        return self.strRepr
1789
 
1790
    def streamline( self ):
1791
        super(ParseExpression,self).streamline()
1792
 
1793
        for e in self.exprs:
1794
            e.streamline()
1795
 
1796
        # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
1797
        # but only if there are no parse actions or resultsNames on the nested And's
1798
        # (likewise for Or's and MatchFirst's)
1799
        if ( len(self.exprs) == 2 ):
1800
            other = self.exprs[0]
1801
            if ( isinstance( other, self.__class__ ) and
1802
                  not(other.parseAction) and
1803
                  other.resultsName is None and
1804
                  not other.debug ):
1805
                self.exprs = other.exprs[:] + [ self.exprs[1] ]
1806
                self.strRepr = None
1807
                self.mayReturnEmpty |= other.mayReturnEmpty
1808
                self.mayIndexError  |= other.mayIndexError
1809
 
1810
            other = self.exprs[-1]
1811
            if ( isinstance( other, self.__class__ ) and
1812
                  not(other.parseAction) and
1813
                  other.resultsName is None and
1814
                  not other.debug ):
1815
                self.exprs = self.exprs[:-1] + other.exprs[:]
1816
                self.strRepr = None
1817
                self.mayReturnEmpty |= other.mayReturnEmpty
1818
                self.mayIndexError  |= other.mayIndexError
1819
 
1820
        return self
1821
 
1822
    def setResultsName( self, name, listAllMatches=False ):
1823
        ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
1824
        return ret
1825
 
1826
    def validate( self, validateTrace=[] ):
1827
        tmp = validateTrace[:]+[self]
1828
        for e in self.exprs:
1829
            e.validate(tmp)
1830
        self.checkRecursion( [] )
1831
 
1832
class And(ParseExpression):
1833
    """Requires all given ParseExpressions to be found in the given order.
1834
       Expressions may be separated by whitespace.
1835
       May be constructed using the '+' operator.
1836
    """
1837
    def __init__( self, exprs, savelist = True ):
1838
        super(And,self).__init__(exprs, savelist)
1839
        self.mayReturnEmpty = True
1840
        for e in self.exprs:
1841
            if not e.mayReturnEmpty:
1842
                self.mayReturnEmpty = False
1843
                break
1844
        self.setWhitespaceChars( exprs[0].whiteChars )
1845
        self.skipWhitespace = exprs[0].skipWhitespace
1846
 
1847
    def parseImpl( self, instring, loc, doActions=True ):
1848
        loc, resultlist = self.exprs[0]._parse( instring, loc, doActions )
1849
        for e in self.exprs[1:]:
1850
            loc, exprtokens = e._parse( instring, loc, doActions )
1851
            if exprtokens or exprtokens.keys():
1852
                resultlist += exprtokens
1853
        return loc, resultlist
1854
 
1855
    def __iadd__(self, other ):
1856
        if isinstance( other, basestring ):
1857
            other = Literal( other )
1858
        return self.append( other ) #And( [ self, other ] )
1859
 
1860
    def checkRecursion( self, parseElementList ):
1861
        subRecCheckList = parseElementList[:] + [ self ]
1862
        for e in self.exprs:
1863
            e.checkRecursion( subRecCheckList )
1864
            if not e.mayReturnEmpty:
1865
                break
1866
 
1867
    def __str__( self ):
1868
        if hasattr(self,"name"):
1869
            return self.name
1870
 
1871
        if self.strRepr is None:
1872
            self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1873
 
1874
        return self.strRepr
1875
 
1876
 
1877
class Or(ParseExpression):
1878
    """Requires that at least one ParseExpression is found.
1879
       If two expressions match, the expression that matches the longest string will be used.
1880
       May be constructed using the '^' operator.
1881
    """
1882
    def __init__( self, exprs, savelist = False ):
1883
        super(Or,self).__init__(exprs, savelist)
1884
        self.mayReturnEmpty = False
1885
        for e in self.exprs:
1886
            if e.mayReturnEmpty:
1887
                self.mayReturnEmpty = True
1888
                break
1889
 
1890
    def parseImpl( self, instring, loc, doActions=True ):
1891
        maxExcLoc = -1
1892
        maxMatchLoc = -1
1893
        for e in self.exprs:
1894
            try:
1895
                loc2 = e.tryParse( instring, loc )
1896
            except ParseException, err:
1897
                if err.loc > maxExcLoc:
1898
                    maxException = err
1899
                    maxExcLoc = err.loc
1900
            except IndexError, err:
1901
                if len(instring) > maxExcLoc:
1902
                    maxException = ParseException(instring,len(instring),e.errmsg,self)
1903
                    maxExcLoc = len(instring)
1904
            else:
1905
                if loc2 > maxMatchLoc:
1906
                    maxMatchLoc = loc2
1907
                    maxMatchExp = e
1908
 
1909
        if maxMatchLoc < 0:
1910
            if self.exprs:
1911
                raise maxException
1912
            else:
1913
                raise ParseException(instring, loc, "no defined alternatives to match", self)
1914
 
1915
        return maxMatchExp._parse( instring, loc, doActions )
1916
 
1917
    def __ixor__(self, other ):
1918
        if isinstance( other, basestring ):
1919
            other = Literal( other )
1920
        return self.append( other ) #Or( [ self, other ] )
1921
 
1922
    def __str__( self ):
1923
        if hasattr(self,"name"):
1924
            return self.name
1925
 
1926
        if self.strRepr is None:
1927
            self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1928
 
1929
        return self.strRepr
1930
 
1931
    def checkRecursion( self, parseElementList ):
1932
        subRecCheckList = parseElementList[:] + [ self ]
1933
        for e in self.exprs:
1934
            e.checkRecursion( subRecCheckList )
1935
 
1936
 
1937
class MatchFirst(ParseExpression):
1938
    """Requires that at least one ParseExpression is found.
1939
       If two expressions match, the first one listed is the one that will match.
1940
       May be constructed using the '|' operator.
1941
    """
1942
    def __init__( self, exprs, savelist = False ):
1943
        super(MatchFirst,self).__init__(exprs, savelist)
1944
        if exprs:
1945
            self.mayReturnEmpty = False
1946
            for e in self.exprs:
1947
                if e.mayReturnEmpty:
1948
                    self.mayReturnEmpty = True
1949
                    break
1950
        else:
1951
            self.mayReturnEmpty = True
1952
 
1953
    def parseImpl( self, instring, loc, doActions=True ):
1954
        maxExcLoc = -1
1955
        for e in self.exprs:
1956
            try:
1957
                ret = e._parse( instring, loc, doActions )
1958
                return ret
1959
            except ParseException, err:
1960
                if err.loc > maxExcLoc:
1961
                    maxException = err
1962
                    maxExcLoc = err.loc
1963
            except IndexError, err:
1964
                if len(instring) > maxExcLoc:
1965
                    maxException = ParseException(instring,len(instring),e.errmsg,self)
1966
                    maxExcLoc = len(instring)
1967
 
1968
        # only got here if no expression matched, raise exception for match that made it the furthest
1969
        else:
1970
            if self.exprs:
1971
                raise maxException
1972
            else:
1973
                raise ParseException(instring, loc, "no defined alternatives to match", self)
1974
 
1975
    def __ior__(self, other ):
1976
        if isinstance( other, basestring ):
1977
            other = Literal( other )
1978
        return self.append( other ) #MatchFirst( [ self, other ] )
1979
 
1980
    def __str__( self ):
1981
        if hasattr(self,"name"):
1982
            return self.name
1983
 
1984
        if self.strRepr is None:
1985
            self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1986
 
1987
        return self.strRepr
1988
 
1989
    def checkRecursion( self, parseElementList ):
1990
        subRecCheckList = parseElementList[:] + [ self ]
1991
        for e in self.exprs:
1992
            e.checkRecursion( subRecCheckList )
1993
 
1994
class Each(ParseExpression):
1995
    """Requires all given ParseExpressions to be found, but in any order.
1996
       Expressions may be separated by whitespace.
1997
       May be constructed using the '&' operator.
1998
    """
1999
    def __init__( self, exprs, savelist = True ):
2000
        super(Each,self).__init__(exprs, savelist)
2001
        self.mayReturnEmpty = True
2002
        for e in self.exprs:
2003
            if not e.mayReturnEmpty:
2004
                self.mayReturnEmpty = False
2005
                break
2006
        self.skipWhitespace = True
2007
        self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
2008
        self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
2009
        self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
2010
        self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2011
        self.required += self.multirequired
2012
 
2013
    def parseImpl( self, instring, loc, doActions=True ):
2014
        tmpLoc = loc
2015
        tmpReqd = self.required[:]
2016
        tmpOpt  = self.optionals[:]
2017
        matchOrder = []
2018
 
2019
        keepMatching = True
2020
        while keepMatching:
2021
            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2022
            failed = []
2023
            for e in tmpExprs:
2024
                try:
2025
                    tmpLoc = e.tryParse( instring, tmpLoc )
2026
                except ParseException:
2027
                    failed.append(e)
2028
                else:
2029
                    matchOrder.append(e)
2030
                    if e in tmpReqd:
2031
                        tmpReqd.remove(e)
2032
                    elif e in tmpOpt:
2033
                        tmpOpt.remove(e)
2034
            if len(failed) == len(tmpExprs):
2035
                keepMatching = False
2036
 
2037
        if tmpReqd:
2038
            missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2039
            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2040
 
2041
        resultlist = []
2042
        for e in matchOrder:
2043
            loc,results = e._parse(instring,loc,doActions)
2044
            resultlist.append(results)
2045
 
2046
        finalResults = ParseResults([])
2047
        for r in resultlist:
2048
            dups = {}
2049
            for k in r.keys():
2050
                if k in finalResults.keys():
2051
                    tmp = ParseResults(finalResults[k])
2052
                    tmp += ParseResults(r[k])
2053
                    dups[k] = tmp
2054
            finalResults += ParseResults(r)
2055
            for k,v in dups.items():
2056
                finalResults[k] = v
2057
        return loc, finalResults
2058
 
2059
    def __str__( self ):
2060
        if hasattr(self,"name"):
2061
            return self.name
2062
 
2063
        if self.strRepr is None:
2064
            self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2065
 
2066
        return self.strRepr
2067
 
2068
    def checkRecursion( self, parseElementList ):
2069
        subRecCheckList = parseElementList[:] + [ self ]
2070
        for e in self.exprs:
2071
            e.checkRecursion( subRecCheckList )
2072
 
2073
 
2074
class ParseElementEnhance(ParserElement):
2075
    """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2076
    def __init__( self, expr, savelist=False ):
2077
        super(ParseElementEnhance,self).__init__(savelist)
2078
        if isinstance( expr, basestring ):
2079
            expr = Literal(expr)
2080
        self.expr = expr
2081
        self.strRepr = None
2082
        if expr is not None:
2083
            self.mayIndexError = expr.mayIndexError
2084
            self.setWhitespaceChars( expr.whiteChars )
2085
            self.skipWhitespace = expr.skipWhitespace
2086
            self.saveAsList = expr.saveAsList
2087
 
2088
    def parseImpl( self, instring, loc, doActions=True ):
2089
        if self.expr is not None:
2090
            return self.expr._parse( instring, loc, doActions )
2091
        else:
2092
            raise ParseException("",loc,self.errmsg,self)
2093
 
2094
    def leaveWhitespace( self ):
2095
        self.skipWhitespace = False
2096
        self.expr = self.expr.copy()
2097
        if self.expr is not None:
2098
            self.expr.leaveWhitespace()
2099
        return self
2100
 
2101
    def ignore( self, other ):
2102
        if isinstance( other, Suppress ):
2103
            if other not in self.ignoreExprs:
2104
                super( ParseElementEnhance, self).ignore( other )
2105
                if self.expr is not None:
2106
                    self.expr.ignore( self.ignoreExprs[-1] )
2107
        else:
2108
            super( ParseElementEnhance, self).ignore( other )
2109
            if self.expr is not None:
2110
                self.expr.ignore( self.ignoreExprs[-1] )
2111
        return self
2112
 
2113
    def streamline( self ):
2114
        super(ParseElementEnhance,self).streamline()
2115
        if self.expr is not None:
2116
            self.expr.streamline()
2117
        return self
2118
 
2119
    def checkRecursion( self, parseElementList ):
2120
        if self in parseElementList:
2121
            raise RecursiveGrammarException( parseElementList+[self] )
2122
        subRecCheckList = parseElementList[:] + [ self ]
2123
        if self.expr is not None:
2124
            self.expr.checkRecursion( subRecCheckList )
2125
 
2126
    def validate( self, validateTrace=[] ):
2127
        tmp = validateTrace[:]+[self]
2128
        if self.expr is not None:
2129
            self.expr.validate(tmp)
2130
        self.checkRecursion( [] )
2131
 
2132
    def __str__( self ):
2133
        try:
2134
            return super(ParseElementEnhance,self).__str__()
2135
        except:
2136
            pass
2137
 
2138
        if self.strRepr is None and self.expr is not None:
2139
            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2140
        return self.strRepr
2141
 
2142
 
2143
class FollowedBy(ParseElementEnhance):
2144
    """Lookahead matching of the given parse expression.  FollowedBy
2145
    does *not* advance the parsing position within the input string, it only
2146
    verifies that the specified parse expression matches at the current
2147
    position.  FollowedBy always returns a null token list."""
2148
    def __init__( self, expr ):
2149
        super(FollowedBy,self).__init__(expr)
2150
        self.mayReturnEmpty = True
2151
 
2152
    def parseImpl( self, instring, loc, doActions=True ):
2153
        self.expr.tryParse( instring, loc )
2154
        return loc, []
2155
 
2156
 
2157
class NotAny(ParseElementEnhance):
2158
    """Lookahead to disallow matching with the given parse expression.  NotAny
2159
    does *not* advance the parsing position within the input string, it only
2160
    verifies that the specified parse expression does *not* match at the current
2161
    position.  Also, NotAny does *not* skip over leading whitespace. NotAny
2162
    always returns a null token list.  May be constructed using the '~' operator."""
2163
    def __init__( self, expr ):
2164
        super(NotAny,self).__init__(expr)
2165
        #~ self.leaveWhitespace()
2166
        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2167
        self.mayReturnEmpty = True
2168
        self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2169
        self.myException = ParseException("",0,self.errmsg,self)
2170
 
2171
    def parseImpl( self, instring, loc, doActions=True ):
2172
        try:
2173
            self.expr.tryParse( instring, loc )
2174
        except (ParseException,IndexError):
2175
            pass
2176
        else:
2177
            #~ raise ParseException(instring, loc, self.errmsg )
2178
            exc = self.myException
2179
            exc.loc = loc
2180
            exc.pstr = instring
2181
            raise exc
2182
        return loc, []
2183
 
2184
    def __str__( self ):
2185
        if hasattr(self,"name"):
2186
            return self.name
2187
 
2188
        if self.strRepr is None:
2189
            self.strRepr = "~{" + _ustr(self.expr) + "}"
2190
 
2191
        return self.strRepr
2192
 
2193
 
2194
class ZeroOrMore(ParseElementEnhance):
2195
    """Optional repetition of zero or more of the given expression."""
2196
    def __init__( self, expr ):
2197
        super(ZeroOrMore,self).__init__(expr)
2198
        self.mayReturnEmpty = True
2199
 
2200
    def parseImpl( self, instring, loc, doActions=True ):
2201
        tokens = []
2202
        try:
2203
            loc, tokens = self.expr._parse( instring, loc, doActions )
2204
            hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2205
            while 1:
2206
                if hasIgnoreExprs:
2207
                    preloc = self.skipIgnorables( instring, loc )
2208
                else:
2209
                    preloc = loc
2210
                loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2211
                if tmptokens or tmptokens.keys():
2212
                    tokens += tmptokens
2213
        except (ParseException,IndexError):
2214
            pass
2215
 
2216
        return loc, tokens
2217
 
2218
    def __str__( self ):
2219
        if hasattr(self,"name"):
2220
            return self.name
2221
 
2222
        if self.strRepr is None:
2223
            self.strRepr = "[" + _ustr(self.expr) + "]..."
2224
 
2225
        return self.strRepr
2226
 
2227
    def setResultsName( self, name, listAllMatches=False ):
2228
        ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2229
        ret.saveAsList = True
2230
        return ret
2231
 
2232
 
2233
class OneOrMore(ParseElementEnhance):
2234
    """Repetition of one or more of the given expression."""
2235
    def parseImpl( self, instring, loc, doActions=True ):
2236
        # must be at least one
2237
        loc, tokens = self.expr._parse( instring, loc, doActions )
2238
        try:
2239
            hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2240
            while 1:
2241
                if hasIgnoreExprs:
2242
                    preloc = self.skipIgnorables( instring, loc )
2243
                else:
2244
                    preloc = loc
2245
                loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2246
                if tmptokens or tmptokens.keys():
2247
                    tokens += tmptokens
2248
        except (ParseException,IndexError):
2249
            pass
2250
 
2251
        return loc, tokens
2252
 
2253
    def __str__( self ):
2254
        if hasattr(self,"name"):
2255
            return self.name
2256
 
2257
        if self.strRepr is None:
2258
            self.strRepr = "{" + _ustr(self.expr) + "}..."
2259
 
2260
        return self.strRepr
2261
 
2262
    def setResultsName( self, name, listAllMatches=False ):
2263
        ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2264
        ret.saveAsList = True
2265
        return ret
2266
 
2267
class _NullToken(object):
2268
    def __bool__(self):
2269
        return False
2270
    def __str__(self):
2271
        return ""
2272
 
2273
_optionalNotMatched = _NullToken()
2274
class Optional(ParseElementEnhance):
2275
    """Optional matching of the given expression.
2276
       A default return string can also be specified, if the optional expression
2277
       is not found.
2278
    """
2279
    def __init__( self, exprs, default=_optionalNotMatched ):
2280
        super(Optional,self).__init__( exprs, savelist=False )
2281
        self.defaultValue = default
2282
        self.mayReturnEmpty = True
2283
 
2284
    def parseImpl( self, instring, loc, doActions=True ):
2285
        try:
2286
            loc, tokens = self.expr._parse( instring, loc, doActions )
2287
        except (ParseException,IndexError):
2288
            if self.defaultValue is not _optionalNotMatched:
2289
                tokens = [ self.defaultValue ]
2290
            else:
2291
                tokens = []
2292
        return loc, tokens
2293
 
2294
    def __str__( self ):
2295
        if hasattr(self,"name"):
2296
            return self.name
2297
 
2298
        if self.strRepr is None:
2299
            self.strRepr = "[" + _ustr(self.expr) + "]"
2300
 
2301
        return self.strRepr
2302
 
2303
 
2304
class SkipTo(ParseElementEnhance):
2305
    """Token for skipping over all undefined text until the matched expression is found.
2306
       If include is set to true, the matched expression is also consumed.  The ignore
2307
       argument is used to define grammars (typically quoted strings and comments) that
2308
       might contain false matches.
2309
    """
2310
    def __init__( self, other, include=False, ignore=None ):
2311
        super( SkipTo, self ).__init__( other )
2312
        if ignore is not None:
2313
            self.expr = self.expr.copy()
2314
            self.expr.ignore(ignore)
2315
        self.mayReturnEmpty = True
2316
        self.mayIndexError = False
2317
        self.includeMatch = include
2318
        self.asList = False
2319
        self.errmsg = "No match found for "+_ustr(self.expr)
2320
        self.myException = ParseException("",0,self.errmsg,self)
2321
 
2322
    def parseImpl( self, instring, loc, doActions=True ):
2323
        startLoc = loc
2324
        instrlen = len(instring)
2325
        expr = self.expr
2326
        while loc <= instrlen:
2327
            try:
2328
                loc = expr.skipIgnorables( instring, loc )
2329
                expr._parse( instring, loc, doActions=False, callPreParse=False )
2330
                if self.includeMatch:
2331
                    skipText = instring[startLoc:loc]
2332
                    loc,mat = expr._parse(instring,loc)
2333
                    if mat:
2334
                        return loc, [ skipText, mat ]
2335
                    else:
2336
                        return loc, [ skipText ]
2337
                else:
2338
                    return loc, [ instring[startLoc:loc] ]
2339
            except (ParseException,IndexError):
2340
                loc += 1
2341
        exc = self.myException
2342
        exc.loc = loc
2343
        exc.pstr = instring
2344
        raise exc
2345
 
2346
class Forward(ParseElementEnhance):
2347
    """Forward declaration of an expression to be defined later -
2348
       used for recursive grammars, such as algebraic infix notation.
2349
       When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2350
 
2351
       Note: take care when assigning to Forward not to overlook precedence of operators.
2352
       Specifically, '|' has a lower precedence than '<<', so that::
2353
          fwdExpr << a | b | c
2354
       will actually be evaluated as::
2355
          (fwdExpr << a) | b | c
2356
       thereby leaving b and c out as parseable alternatives.  It is recommended that you
2357
       explicitly group the values inserted into the Forward::
2358
          fwdExpr << (a | b | c)
2359
    """
2360
    def __init__( self, other=None ):
2361
        super(Forward,self).__init__( other, savelist=False )
2362
 
2363
    def __lshift__( self, other ):
2364
        if isinstance( other, basestring ):
2365
            other = Literal(other)
2366
        self.expr = other
2367
        self.mayReturnEmpty = other.mayReturnEmpty
2368
        self.strRepr = None
2369
        return self
2370
 
2371
    def leaveWhitespace( self ):
2372
        self.skipWhitespace = False
2373
        return self
2374
 
2375
    def streamline( self ):
2376
        if not self.streamlined:
2377
            self.streamlined = True
2378
            if self.expr is not None:
2379
                self.expr.streamline()
2380
        return self
2381
 
2382
    def validate( self, validateTrace=[] ):
2383
        if self not in validateTrace:
2384
            tmp = validateTrace[:]+[self]
2385
            if self.expr is not None:
2386
                self.expr.validate(tmp)
2387
        self.checkRecursion([])
2388
 
2389
    def __str__( self ):
2390
        if hasattr(self,"name"):
2391
            return self.name
2392
 
2393
        self.__class__ = _ForwardNoRecurse
2394
        try:
2395
            if self.expr is not None:
2396
                retString = _ustr(self.expr)
2397
            else:
2398
                retString = "None"
2399
        finally:
2400
            self.__class__ = Forward
2401
        return "Forward: "+retString
2402
 
2403
    def copy(self):
2404
        if self.expr is not None:
2405
            return super(Forward,self).copy()
2406
        else:
2407
            ret = Forward()
2408
            ret << self
2409
            return ret
2410
 
2411
class _ForwardNoRecurse(Forward):
2412
    def __str__( self ):
2413
        return "..."
2414
 
2415
class TokenConverter(ParseElementEnhance):
2416
    """Abstract subclass of ParseExpression, for converting parsed results."""
2417
    def __init__( self, expr, savelist=False ):
2418
        super(TokenConverter,self).__init__( expr )#, savelist )
2419
        self.saveAsList = False
2420
 
2421
 
2422
class Upcase(TokenConverter):
2423
    """Converter to upper case all matching tokens."""
2424
    def __init__(self, *args):
2425
        super(Upcase,self).__init__(*args)
2426
 
2427
    def postParse( self, instring, loc, tokenlist ):
2428
        return map( string.upper, tokenlist )
2429
 
2430
 
2431
class Downcase(TokenConverter):
2432
    """Converter to upper case all matching tokens."""
2433
    def __init__(self, *args):
2434
        super(Downcase,self).__init__(*args)
2435
 
2436
    def postParse( self, instring, loc, tokenlist ):
2437
        return map( string.lower, tokenlist )
2438
 
2439
 
2440
 
2441
class Combine(TokenConverter):
2442
    """Converter to concatenate all matching tokens to a single string.
2443
       By default, the matching patterns must also be contiguous in the input string;
2444
       this can be disabled by specifying 'adjacent=False' in the constructor.
2445
    """
2446
    def __init__( self, expr, joinString="", adjacent=True ):
2447
        super(Combine,self).__init__( expr )
2448
        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2449
        if adjacent:
2450
            self.leaveWhitespace()
2451
        self.adjacent = adjacent
2452
        self.skipWhitespace = True
2453
        self.joinString = joinString
2454
 
2455
    def ignore( self, other ):
2456
        if self.adjacent:
2457
            ParserElement.ignore(self, other)
2458
        else:
2459
            super( Combine, self).ignore( other )
2460
        return self
2461
 
2462
    def postParse( self, instring, loc, tokenlist ):
2463
        retToks = tokenlist.copy()
2464
        del retToks[:]
2465
        retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
2466
 
2467
        if self.resultsName and len(retToks.keys())>0:
2468
            return [ retToks ]
2469
        else:
2470
            return retToks
2471
 
2472
class Group(TokenConverter):
2473
    """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2474
    def __init__( self, expr ):
2475
        super(Group,self).__init__( expr )
2476
        self.saveAsList = True
2477
 
2478
    def postParse( self, instring, loc, tokenlist ):
2479
        return [ tokenlist ]
2480
 
2481
class Dict(TokenConverter):
2482
    """Converter to return a repetitive expression as a list, but also as a dictionary.
2483
       Each element can also be referenced using the first token in the expression as its key.
2484
       Useful for tabular report scraping when the first column can be used as a item key.
2485
    """
2486
    def __init__( self, exprs ):
2487
        super(Dict,self).__init__( exprs )
2488
        self.saveAsList = True
2489
 
2490
    def postParse( self, instring, loc, tokenlist ):
2491
        for i,tok in enumerate(tokenlist):
2492
            ikey = _ustr(tok[0]).strip()
2493
            if len(tok)==1:
2494
                tokenlist[ikey] = ("",i)
2495
            elif len(tok)==2 and not isinstance(tok[1],ParseResults):
2496
                tokenlist[ikey] = (tok[1],i)
2497
            else:
2498
                dictvalue = tok.copy() #ParseResults(i)
2499
                del dictvalue[0]
2500
                if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
2501
                    tokenlist[ikey] = (dictvalue,i)
2502
                else:
2503
                    tokenlist[ikey] = (dictvalue[0],i)
2504
 
2505
        if self.resultsName:
2506
            return [ tokenlist ]
2507
        else:
2508
            return tokenlist
2509
 
2510
 
2511
class Suppress(TokenConverter):
2512
    """Converter for ignoring the results of a parsed expression."""
2513
    def postParse( self, instring, loc, tokenlist ):
2514
        return []
2515
 
2516
    def suppress( self ):
2517
        return self
2518
 
2519
 
2520
class OnlyOnce(object):
2521
    """Wrapper for parse actions, to ensure they are only called once."""
2522
    def __init__(self, methodCall):
2523
        self.callable = ParserElement.normalizeParseActionArgs(methodCall)
2524
        self.called = False
2525
    def __call__(self,s,l,t):
2526
        if not self.called:
2527
            results = self.callable(s,l,t)
2528
            self.called = True
2529
            return results
2530
        raise ParseException(s,l,"")
2531
    def reset():
2532
        self.called = False
2533
 
2534
def traceParseAction(f):
2535
    """Decorator for debugging parse actions."""
2536
    f = ParserElement.normalizeParseActionArgs(f)
2537
    def z(*paArgs):
2538
        thisFunc = f.func_name
2539
        s,l,t = paArgs[-3:]
2540
        if len(paArgs)>3:
2541
            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
2542
        sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
2543
        try:
2544
            ret = f(*paArgs)
2545
        except Exception, exc:
2546
            sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
2547
            raise
2548
        sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
2549
        return ret
2550
    return z
2551
 
2552
#
2553
# global helpers
2554
#
2555
def delimitedList( expr, delim=",", combine=False ):
2556
    """Helper to define a delimited list of expressions - the delimiter defaults to ','.
2557
       By default, the list elements and delimiters can have intervening whitespace, and
2558
       comments, but this can be overridden by passing 'combine=True' in the constructor.
2559
       If combine is set to True, the matching tokens are returned as a single token
2560
       string, with the delimiters included; otherwise, the matching tokens are returned
2561
       as a list of tokens, with the delimiters suppressed.
2562
    """
2563
    dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
2564
    if combine:
2565
        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
2566
    else:
2567
        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
2568
 
2569
def countedArray( expr ):
2570
    """Helper to define a counted list of expressions.
2571
       This helper defines a pattern of the form::
2572
           integer expr expr expr...
2573
       where the leading integer tells how many expr expressions follow.
2574
       The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
2575
    """
2576
    arrayExpr = Forward()
2577
    def countFieldParseAction(s,l,t):
2578
        n = int(t[0])
2579
        arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
2580
        return []
2581
    return ( Word(nums).setParseAction(countFieldParseAction) + arrayExpr )
2582
 
2583
def _flatten(L):
2584
    if type(L) is not list: return [L]
2585
    if L == []: return L
2586
    return _flatten(L[0]) + _flatten(L[1:])
2587
 
2588
def matchPreviousLiteral(expr):
2589
    """Helper to define an expression that is indirectly defined from
2590
       the tokens matched in a previous expression, that is, it looks
2591
       for a 'repeat' of a previous expression.  For example::
2592
           first = Word(nums)
2593
           second = matchPreviousLiteral(first)
2594
           matchExpr = first + ":" + second
2595
       will match "1:1", but not "1:2".  Because this matches a
2596
       previous literal, will also match the leading "1:1" in "1:10".
2597
       If this is not desired, use matchPreviousExpr.
2598
       Do *not* use with packrat parsing enabled.
2599
    """
2600
    rep = Forward()
2601
    def copyTokenToRepeater(s,l,t):
2602
        if t:
2603
            if len(t) == 1:
2604
                rep << t[0]
2605
            else:
2606
                # flatten t tokens
2607
                tflat = _flatten(t.asList())
2608
                rep << And( [ Literal(tt) for tt in tflat ] )
2609
        else:
2610
            rep << Empty()
2611
    expr.addParseAction(copyTokenToRepeater)
2612
    return rep
2613
 
2614
def matchPreviousExpr(expr):
2615
    """Helper to define an expression that is indirectly defined from
2616
       the tokens matched in a previous expression, that is, it looks
2617
       for a 'repeat' of a previous expression.  For example::
2618
           first = Word(nums)
2619
           second = matchPreviousExpr(first)
2620
           matchExpr = first + ":" + second
2621
       will match "1:1", but not "1:2".  Because this matches by
2622
       expressions, will *not* match the leading "1:1" in "1:10";
2623
       the expressions are evaluated first, and then compared, so
2624
       "1" is compared with "10".
2625
       Do *not* use with packrat parsing enabled.
2626
    """
2627
    rep = Forward()
2628
    e2 = expr.copy()
2629
    rep << e2
2630
    def copyTokenToRepeater(s,l,t):
2631
        matchTokens = _flatten(t.asList())
2632
        def mustMatchTheseTokens(s,l,t):
2633
            theseTokens = _flatten(t.asList())
2634
            if  theseTokens != matchTokens:
2635
                raise ParseException("",0,"")
2636
        rep.setParseAction( mustMatchTheseTokens )
2637
    expr.addParseAction(copyTokenToRepeater)
2638
    return rep
2639
 
2640
def _escapeRegexRangeChars(s):
2641
    #~  escape these chars: ^-]
2642
    for c in r"\^-]":
2643
        s = s.replace(c,"\\"+c)
2644
    s = s.replace("\n",r"\n")
2645
    s = s.replace("\t",r"\t")
2646
    return _ustr(s)
2647
 
2648
def oneOf( strs, caseless=False, useRegex=True ):
2649
    """Helper to quickly define a set of alternative Literals, and makes sure to do
2650
       longest-first testing when there is a conflict, regardless of the input order,
2651
       but returns a MatchFirst for best performance.
2652
 
2653
       Parameters:
2654
        - strs - a string of space-delimited literals, or a list of string literals
2655
        - caseless - (default=False) - treat all literals as caseless
2656
        - useRegex - (default=True) - as an optimization, will generate a Regex
2657
          object; otherwise, will generate a MatchFirst object (if caseless=True, or
2658
          if creating a Regex raises an exception)
2659
    """
2660
    if caseless:
2661
        isequal = ( lambda a,b: a.upper() == b.upper() )
2662
        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
2663
        parseElementClass = CaselessLiteral
2664
    else:
2665
        isequal = ( lambda a,b: a == b )
2666
        masks = ( lambda a,b: b.startswith(a) )
2667
        parseElementClass = Literal
2668
 
2669
    if isinstance(strs,(list,tuple)):
2670
        symbols = strs[:]
2671
    elif isinstance(strs,basestring):
2672
        symbols = strs.split()
2673
    else:
2674
        warnings.warn("Invalid argument to oneOf, expected string or list",
2675
                SyntaxWarning, stacklevel=2)
2676
 
2677
    i = 0
2678
    while i < len(symbols)-1:
2679
        cur = symbols[i]
2680
        for j,other in enumerate(symbols[i+1:]):
2681
            if ( isequal(other, cur) ):
2682
                del symbols[i+j+1]
2683
                break
2684
            elif ( masks(cur, other) ):
2685
                del symbols[i+j+1]
2686
                symbols.insert(i,other)
2687
                cur = other
2688
                break
2689
        else:
2690
            i += 1
2691
 
2692
    if not caseless and useRegex:
2693
        #~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )
2694
        try:
2695
            if len(symbols)==len("".join(symbols)):
2696
                return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
2697
            else:
2698
                return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
2699
        except:
2700
            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
2701
                    SyntaxWarning, stacklevel=2)
2702
 
2703
 
2704
    # last resort, just use MatchFirst
2705
    return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
2706
 
2707
def dictOf( key, value ):
2708
    """Helper to easily and clearly define a dictionary by specifying the respective patterns
2709
       for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens
2710
       in the proper order.  The key pattern can include delimiting markers or punctuation,
2711
       as long as they are suppressed, thereby leaving the significant key text.  The value
2712
       pattern can include named results, so that the Dict results can include named token
2713
       fields.
2714
    """
2715
    return Dict( ZeroOrMore( Group ( key + value ) ) )
2716
 
2717
_bslash = "\\"
2718
printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
2719
 
2720
# convenience constants for positional expressions
2721
empty       = Empty().setName("empty")
2722
lineStart   = LineStart().setName("lineStart")
2723
lineEnd     = LineEnd().setName("lineEnd")
2724
stringStart = StringStart().setName("stringStart")
2725
stringEnd   = StringEnd().setName("stringEnd")
2726
 
2727
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
2728
_printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ])
2729
_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
2730
_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
2731
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
2732
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
2733
_reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
2734
 
2735
_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
2736
 
2737
def srange(s):
2738
    r"""Helper to easily define string ranges for use in Word construction.  Borrows
2739
       syntax from regexp '[]' string range definitions::
2740
          srange("[0-9]")   -> "0123456789"
2741
          srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
2742
          srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
2743
       The input string must be enclosed in []'s, and the returned string is the expanded
2744
       character set joined into a single string.
2745
       The values enclosed in the []'s may be::
2746
          a single character
2747
          an escaped character with a leading backslash (such as \- or \])
2748
          an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
2749
          an escaped octal character with a leading '\0' (\041, which is a '!' character)
2750
          a range of any of the above, separated by a dash ('a-z', etc.)
2751
          any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
2752
    """
2753
    try:
2754
        return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
2755
    except:
2756
        return ""
2757
 
2758
def replaceWith(replStr):
2759
    """Helper method for common parse actions that simply return a literal value.  Especially
2760
       useful when used with transformString().
2761
    """
2762
    def _replFunc(*args):
2763
        return [replStr]
2764
    return _replFunc
2765
 
2766
def removeQuotes(s,l,t):
2767
    """Helper parse action for removing quotation marks from parsed quoted strings.
2768
       To use, add this parse action to quoted string using::
2769
         quotedString.setParseAction( removeQuotes )
2770
    """
2771
    return t[0][1:-1]
2772
 
2773
def upcaseTokens(s,l,t):
2774
    """Helper parse action to convert tokens to upper case."""
2775
    return [ str(tt).upper() for tt in t ]
2776
 
2777
def downcaseTokens(s,l,t):
2778
    """Helper parse action to convert tokens to lower case."""
2779
    return [ str(tt).lower() for tt in t ]
2780
 
2781
def keepOriginalText(s,startLoc,t):
2782
    import inspect
2783
    """Helper parse action to preserve original parsed text,
2784
       overriding any nested parse actions."""
2785
    f = inspect.stack()[1][0]
2786
    try:
2787
        endloc = f.f_locals["loc"]
2788
    finally:
2789
        del f
2790
    return s[startLoc:endloc]
2791
 
2792
def _makeTags(tagStr, xml):
2793
    """Internal helper to construct opening and closing tag expressions, given a tag name"""
2794
    tagAttrName = Word(alphanums)
2795
    if (xml):
2796
        tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
2797
        openTag = Suppress("<") + Keyword(tagStr) + \
2798
                Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
2799
                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
2800
    else:
2801
        printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
2802
        tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
2803
        openTag = Suppress("<") + Keyword(tagStr,caseless=True) + \
2804
                Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
2805
                Suppress("=") + tagAttrValue ))) + \
2806
                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
2807
    closeTag = Combine("</" + Keyword(tagStr,caseless=not xml) + ">")
2808
 
2809
    openTag = openTag.setResultsName("start"+"".join(tagStr.replace(":"," ").title().split())).setName("<%s>" % tagStr)
2810
    closeTag = closeTag.setResultsName("end"+"".join(tagStr.replace(":"," ").title().split())).setName("</%s>" % tagStr)
2811
 
2812
    return openTag, closeTag
2813
 
2814
def makeHTMLTags(tagStr):
2815
    """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
2816
    return _makeTags( tagStr, False )
2817
 
2818
def makeXMLTags(tagStr):
2819
    """Helper to construct opening and closing tag expressions for XML, given a tag name"""
2820
    return _makeTags( tagStr, True )
2821
 
2822
opAssoc = _Constants()
2823
opAssoc.LEFT = object()
2824
opAssoc.RIGHT = object()
2825
 
2826
def operatorPrecedence( baseExpr, opList ):
2827
    """Helper method for constructing grammars of expressions made up of
2828
       operators working in a precedence hierarchy.  Operators may be unary or
2829
       binary, left- or right-associative.  Parse actions can also be attached
2830
       to operator expressions.
2831
 
2832
       Parameters:
2833
        - baseExpr - expression representing the most basic element for the nested
2834
        - opList - list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form
2835
          (opExpr, numTerms, rightLeftAssoc, parseAction), where:
2836
           - opExpr is the pyparsing expression for the operator;
2837
              may also be a string, which will be converted to a Literal
2838
           - numTerms is the number of terms for this operator (must
2839
              be 1 or 2)
2840
           - rightLeftAssoc is the indicator whether the operator is
2841
              right or left associative, using the pyparsing-defined
2842
              constants opAssoc.RIGHT and opAssoc.LEFT.
2843
           - parseAction is the parse action to be associated with
2844
              expressions matching this operator expression (the
2845
              parse action tuple member may be omitted)
2846
    """
2847
    ret = Forward()
2848
    lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
2849
    for i,operDef in enumerate(opList):
2850
        opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
2851
        thisExpr = Forward().setName("expr%d" % i)
2852
        if rightLeftAssoc == opAssoc.LEFT:
2853
            if arity == 1:
2854
                matchExpr = Group( lastExpr + opExpr )
2855
            elif arity == 2:
2856
                matchExpr = Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
2857
            else:
2858
                raise ValueError, "operator must be unary (1) or binary (2)"
2859
        elif rightLeftAssoc == opAssoc.RIGHT:
2860
            if arity == 1:
2861
                # try to avoid LR with this extra test
2862
                if not isinstance(opExpr, Optional):
2863
                    opExpr = Optional(opExpr)
2864
                matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
2865
            elif arity == 2:
2866
                matchExpr = Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
2867
            else:
2868
                raise ValueError, "operator must be unary (1) or binary (2)"
2869
        else:
2870
            raise ValueError, "operator must indicate right or left associativity"
2871
        if pa:
2872
            matchExpr.setParseAction( pa )
2873
        thisExpr << ( matchExpr | lastExpr )
2874
        lastExpr = thisExpr
2875
    ret << lastExpr
2876
    return ret
2877
 
2878
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xfe]")
2879
 
2880
dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\.))*"').setName("string enclosed in double quotes")
2881
sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\.))*'").setName("string enclosed in single quotes")
2882
quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\.))*')''').setName("quotedString using single or double quotes")
2883
 
2884
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
2885
cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
2886
 
2887
htmlComment = Regex(r"<!--[\s\S]*?-->")
2888
restOfLine = Regex(r".*").leaveWhitespace()
2889
dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
2890
cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
2891
 
2892
javaStyleComment = cppStyleComment
2893
pythonStyleComment = Regex(r"#.*").setName("Python style comment")
2894
_noncomma = "".join( [ c for c in printables if c != "," ] )
2895
_commasepitem = Combine(OneOrMore(Word(_noncomma) +
2896
                                  Optional( Word(" \t") +
2897
                                            ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
2898
commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
2899
 
2900
 
2901
if __name__ == "__main__":
2902
 
2903
    def test( teststring ):
2904
        print teststring,"->",
2905
        try:
2906
            tokens = simpleSQL.parseString( teststring )
2907
            tokenlist = tokens.asList()
2908
            print tokenlist
2909
            print "tokens = ",        tokens
2910
            print "tokens.columns =", tokens.columns
2911
            print "tokens.tables =",  tokens.tables
2912
            print tokens.asXML("SQL",True)
2913
        except ParseException, err:
2914
            print err.line
2915
            print " "*(err.column-1) + "^"
2916
            print err
2917
        print
2918
 
2919
    selectToken    = CaselessLiteral( "select" )
2920
    fromToken      = CaselessLiteral( "from" )
2921
 
2922
    ident          = Word( alphas, alphanums + "_$" )
2923
    columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
2924
    columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
2925
    tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
2926
    tableNameList  = Group( delimitedList( tableName ) )#.setName("tables")
2927
    simpleSQL      = ( selectToken + \
2928
                     ( '*' | columnNameList ).setResultsName( "columns" ) + \
2929
                     fromToken + \
2930
                     tableNameList.setResultsName( "tables" ) )
2931
 
2932
    test( "SELECT * from XYZZY, ABC" )
2933
    test( "select * from SYS.XYZZY" )
2934
    test( "Select A from Sys.dual" )
2935
    test( "Select AA,BB,CC from Sys.dual" )
2936
    test( "Select A, B, C from Sys.dual" )
2937
    test( "Select A, B, C from Sys.dual" )
2938
    test( "Xelect A, B, C from Sys.dual" )
2939
    test( "Select A, B, C frox Sys.dual" )
2940
    test( "Select" )
2941
    test( "Select ^^^ frox Sys.dual" )
2942
    test( "Select A, B, C from Sys.dual, Table2   " )

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.