Package pyparsing ::
Module pyparsing
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28
29 The pyparsing module is an alternative approach to creating and executing simple grammars,
30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32 provides a library of classes that you use to construct the grammar directly in Python.
33
34 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
35
36 from pyparsing import Word, alphas
37
38 # define grammar of a greeting
39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
40
41 hello = "Hello, World!"
42 print (hello, "->", greet.parseString( hello ))
43
44 The program outputs the following::
45
46 Hello, World! -> ['Hello', ',', 'World', '!']
47
48 The Python representation of the grammar is quite readable, owing to the self-explanatory
49 class names, and the use of '+', '|' and '^' operators.
50
51 The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
52 object with named attributes.
53
54 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
56 - quoted strings
57 - embedded comments
58 """
59
60 __version__ = "2.0.2"
61 __versionTime__ = "13 April 2014 11:10"
62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
64 import string
65 from weakref import ref as wkref
66 import copy
67 import sys
68 import warnings
69 import re
70 import sre_constants
71 import collections
72 import pprint
73
74
75 __all__ = [
76 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
77 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
78 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
79 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
80 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
81 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
82 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
83 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
84 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
85 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
86 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
87 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
88 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
89 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
90 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
91 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
92 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr',
93 ]
94
95 PY_3 = sys.version.startswith('3')
96 if PY_3:
97 _MAX_INT = sys.maxsize
98 basestring = str
99 unichr = chr
100 _ustr = str
101
102
103 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
104
105 else:
106 _MAX_INT = sys.maxint
107 range = xrange
108
110 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
111 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
112 then < returns the unicode object | encodes it with the default encoding | ... >.
113 """
114 if isinstance(obj,unicode):
115 return obj
116
117 try:
118
119
120 return str(obj)
121
122 except UnicodeEncodeError:
123
124
125
126
127
128 return unicode(obj)
129
130
131
132
133
134
135
136
137 singleArgBuiltins = []
138 import __builtin__
139 for fname in "sum len sorted reversed list tuple set any all min max".split():
140 try:
141 singleArgBuiltins.append(getattr(__builtin__,fname))
142 except AttributeError:
143 continue
144
145 _generatorType = type((y for y in range(1)))
146
148 """Escape &, <, >, ", ', etc. in a string of data."""
149
150
151 from_symbols = '&><"\''
152 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
153 for from_,to_ in zip(from_symbols, to_symbols):
154 data = data.replace(from_, to_)
155 return data
156
159
160 alphas = string.ascii_lowercase + string.ascii_uppercase
161 nums = "0123456789"
162 hexnums = nums + "ABCDEFabcdef"
163 alphanums = alphas + nums
164 _bslash = chr(92)
165 printables = "".join(c for c in string.printable if c not in string.whitespace)
166
168 """base exception class for all parsing runtime exceptions"""
169
170
171 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
172 self.loc = loc
173 if msg is None:
174 self.msg = pstr
175 self.pstr = ""
176 else:
177 self.msg = msg
178 self.pstr = pstr
179 self.parserElement = elem
180
182 """supported attributes by name are:
183 - lineno - returns the line number of the exception text
184 - col - returns the column number of the exception text
185 - line - returns the line containing the exception text
186 """
187 if( aname == "lineno" ):
188 return lineno( self.loc, self.pstr )
189 elif( aname in ("col", "column") ):
190 return col( self.loc, self.pstr )
191 elif( aname == "line" ):
192 return line( self.loc, self.pstr )
193 else:
194 raise AttributeError(aname)
195
197 return "%s (at char %d), (line:%d, col:%d)" % \
198 ( self.msg, self.loc, self.lineno, self.column )
212 return "loc msg pstr parserElement lineno col line " \
213 "markInputline __str__ __repr__".split()
214
216 """exception thrown when parse expressions don't match class;
217 supported attributes by name are:
218 - lineno - returns the line number of the exception text
219 - col - returns the column number of the exception text
220 - line - returns the line containing the exception text
221 """
222 pass
223
225 """user-throwable exception thrown when inconsistent parse content
226 is found; stops all parsing immediately"""
227 pass
228
230 """just like C{L{ParseFatalException}}, but thrown internally when an
231 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
232 an unbacktrackable syntax error has been found"""
236
237
238
239
240
241
242
243
244
245
246
247
248
249
251 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
252 - def __init__( self, parseElementList ):
253 self.parseElementTrace = parseElementList
254
256 return "RecursiveGrammarException: %s" % self.parseElementTrace
257
264 return repr(self.tup)
266 self.tup = (self.tup[0],i)
267
269 """Structured parse results, to provide multiple means of access to the parsed data:
270 - as a list (C{len(results)})
271 - by list index (C{results[0], results[1]}, etc.)
272 - by attribute (C{results.<resultsName>})
273 """
274 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
275 if isinstance(toklist, cls):
276 return toklist
277 retobj = object.__new__(cls)
278 retobj.__doinit = True
279 return retobj
280
281
282
283 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
284 if self.__doinit:
285 self.__doinit = False
286 self.__name = None
287 self.__parent = None
288 self.__accumNames = {}
289 if isinstance(toklist, list):
290 self.__toklist = toklist[:]
291 elif isinstance(toklist, _generatorType):
292 self.__toklist = list(toklist)
293 else:
294 self.__toklist = [toklist]
295 self.__tokdict = dict()
296
297 if name is not None and name:
298 if not modal:
299 self.__accumNames[name] = 0
300 if isinstance(name,int):
301 name = _ustr(name)
302 self.__name = name
303 if not toklist in (None,'',[]):
304 if isinstance(toklist,basestring):
305 toklist = [ toklist ]
306 if asList:
307 if isinstance(toklist,ParseResults):
308 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
309 else:
310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
311 self[name].__name = name
312 else:
313 try:
314 self[name] = toklist[0]
315 except (KeyError,TypeError,IndexError):
316 self[name] = toklist
317
319 if isinstance( i, (int,slice) ):
320 return self.__toklist[i]
321 else:
322 if i not in self.__accumNames:
323 return self.__tokdict[i][-1][0]
324 else:
325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
328 if isinstance(v,_ParseResultsWithOffset):
329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
330 sub = v[0]
331 elif isinstance(k,int):
332 self.__toklist[k] = v
333 sub = v
334 else:
335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
336 sub = v
337 if isinstance(sub,ParseResults):
338 sub.__parent = wkref(self)
339
341 if isinstance(i,(int,slice)):
342 mylen = len( self.__toklist )
343 del self.__toklist[i]
344
345
346 if isinstance(i, int):
347 if i < 0:
348 i += mylen
349 i = slice(i, i+1)
350
351 removed = list(range(*i.indices(mylen)))
352 removed.reverse()
353
354 for name in self.__tokdict:
355 occurrences = self.__tokdict[name]
356 for j in removed:
357 for k, (value, position) in enumerate(occurrences):
358 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
359 else:
360 del self.__tokdict[i]
361
363 return k in self.__tokdict
364
365 - def __len__( self ): return len( self.__toklist )
366 - def __bool__(self): return len( self.__toklist ) > 0
367 __nonzero__ = __bool__
368 - def __iter__( self ): return iter( self.__toklist )
369 - def __reversed__( self ): return iter( self.__toklist[::-1] )
371 """Returns all named result keys."""
372 if hasattr(self.__tokdict, "iterkeys"):
373 return self.__tokdict.iterkeys()
374 else:
375 return iter(self.__tokdict)
376
378 """Returns all named result values."""
379 return (self[k] for k in self.iterkeys())
380
382 return ((k, self[k]) for k in self.iterkeys())
383
384 if PY_3:
385 keys = iterkeys
386 values = itervalues
387 items = iteritems
388 else:
390 """Returns all named result keys."""
391 return list(self.iterkeys())
392
394 """Returns all named result values."""
395 return list(self.itervalues())
396
398 """Returns all named result keys and values as a list of tuples."""
399 return list(self.iteritems())
400
402 """Since keys() returns an iterator, this method is helpful in bypassing
403 code that looks for the existence of any defined results names."""
404 return bool(self.__tokdict)
405
406 - def pop( self, *args, **kwargs):
407 """Removes and returns item at specified index (default=last).
408 Supports both list and dict semantics for pop(). If passed no
409 argument or an integer argument, it will use list semantics
410 and pop tokens from the list of parsed tokens. If passed a
411 non-integer argument (most likely a string), it will use dict
412 semantics and pop the corresponding value from any defined
413 results names. A second default return value argument is
414 supported, just as in dict.pop()."""
415 if not args:
416 args = [-1]
417 if 'default' in kwargs:
418 args.append(kwargs['default'])
419 if (isinstance(args[0], int) or
420 len(args) == 1 or
421 args[0] in self):
422 ret = self[index]
423 del self[index]
424 return ret
425 else:
426 defaultvalue = args[1]
427 return defaultvalue
428
429 - def get(self, key, defaultValue=None):
430 """Returns named result matching the given key, or if there is no
431 such name, then returns the given C{defaultValue} or C{None} if no
432 C{defaultValue} is specified."""
433 if key in self:
434 return self[key]
435 else:
436 return defaultValue
437
438 - def insert( self, index, insStr ):
439 """Inserts new element at location index in the list of parsed tokens."""
440 self.__toklist.insert(index, insStr)
441
442 for name in self.__tokdict:
443 occurrences = self.__tokdict[name]
444 for k, (value, position) in enumerate(occurrences):
445 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
446
448 """Add single element to end of ParseResults list of elements."""
449 self.__toklist.append(item)
450
452 """Add sequence of elements to end of ParseResults list of elements."""
453 if isinstance(itemseq, ParseResults):
454 self += itemseq
455 else:
456 self.__toklist.extend(itemseq)
457
459 """Clear all elements and results names."""
460 del self.__toklist[:]
461 self.__tokdict.clear()
462
464 try:
465 return self[name]
466 except KeyError:
467 return ""
468
469 if name in self.__tokdict:
470 if name not in self.__accumNames:
471 return self.__tokdict[name][-1][0]
472 else:
473 return ParseResults([ v[0] for v in self.__tokdict[name] ])
474 else:
475 return ""
476
478 ret = self.copy()
479 ret += other
480 return ret
481
483 if other.__tokdict:
484 offset = len(self.__toklist)
485 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
486 otheritems = other.__tokdict.items()
487 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
488 for (k,vlist) in otheritems for v in vlist]
489 for k,v in otherdictitems:
490 self[k] = v
491 if isinstance(v[0],ParseResults):
492 v[0].__parent = wkref(self)
493
494 self.__toklist += other.__toklist
495 self.__accumNames.update( other.__accumNames )
496 return self
497
499 if isinstance(other,int) and other == 0:
500 return self.copy()
501
503 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
504
506 out = []
507 for i in self.__toklist:
508 if isinstance(i, ParseResults):
509 out.append(_ustr(i))
510 else:
511 out.append(repr(i))
512 return '[' + ', '.join(out) + ']'
513
515 out = []
516 for item in self.__toklist:
517 if out and sep:
518 out.append(sep)
519 if isinstance( item, ParseResults ):
520 out += item._asStringList()
521 else:
522 out.append( _ustr(item) )
523 return out
524
526 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
527 out = []
528 for res in self.__toklist:
529 if isinstance(res,ParseResults):
530 out.append( res.asList() )
531 else:
532 out.append( res )
533 return out
534
536 """Returns the named parse results as dictionary."""
537 if PY_3:
538 return dict( self.items() )
539 else:
540 return dict( self.iteritems() )
541
543 """Returns a new copy of a C{ParseResults} object."""
544 ret = ParseResults( self.__toklist )
545 ret.__tokdict = self.__tokdict.copy()
546 ret.__parent = self.__parent
547 ret.__accumNames.update( self.__accumNames )
548 ret.__name = self.__name
549 return ret
550
551 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
552 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
553 nl = "\n"
554 out = []
555 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
556 for v in vlist)
557 nextLevelIndent = indent + " "
558
559
560 if not formatted:
561 indent = ""
562 nextLevelIndent = ""
563 nl = ""
564
565 selfTag = None
566 if doctag is not None:
567 selfTag = doctag
568 else:
569 if self.__name:
570 selfTag = self.__name
571
572 if not selfTag:
573 if namedItemsOnly:
574 return ""
575 else:
576 selfTag = "ITEM"
577
578 out += [ nl, indent, "<", selfTag, ">" ]
579
580 worklist = self.__toklist
581 for i,res in enumerate(worklist):
582 if isinstance(res,ParseResults):
583 if i in namedItems:
584 out += [ res.asXML(namedItems[i],
585 namedItemsOnly and doctag is None,
586 nextLevelIndent,
587 formatted)]
588 else:
589 out += [ res.asXML(None,
590 namedItemsOnly and doctag is None,
591 nextLevelIndent,
592 formatted)]
593 else:
594
595 resTag = None
596 if i in namedItems:
597 resTag = namedItems[i]
598 if not resTag:
599 if namedItemsOnly:
600 continue
601 else:
602 resTag = "ITEM"
603 xmlBodyText = _xml_escape(_ustr(res))
604 out += [ nl, nextLevelIndent, "<", resTag, ">",
605 xmlBodyText,
606 "</", resTag, ">" ]
607
608 out += [ nl, indent, "</", selfTag, ">" ]
609 return "".join(out)
610
612 for k,vlist in self.__tokdict.items():
613 for v,loc in vlist:
614 if sub is v:
615 return k
616 return None
617
619 """Returns the results name for this token expression."""
620 if self.__name:
621 return self.__name
622 elif self.__parent:
623 par = self.__parent()
624 if par:
625 return par.__lookup(self)
626 else:
627 return None
628 elif (len(self) == 1 and
629 len(self.__tokdict) == 1 and
630 self.__tokdict.values()[0][0][1] in (0,-1)):
631 return self.__tokdict.keys()[0]
632 else:
633 return None
634
635 - def dump(self,indent='',depth=0):
636 """Diagnostic method for listing out the contents of a C{ParseResults}.
637 Accepts an optional C{indent} argument so that this string can be embedded
638 in a nested display of other data."""
639 out = []
640 out.append( indent+_ustr(self.asList()) )
641 items = sorted(self.items())
642 for k,v in items:
643 if out:
644 out.append('\n')
645 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
646 if isinstance(v,ParseResults):
647 if v.haskeys():
648 out.append( v.dump(indent,depth+1) )
649 else:
650 out.append(_ustr(v))
651 else:
652 out.append(_ustr(v))
653 return "".join(out)
654
655 - def pprint(self, *args, **kwargs):
656 """Pretty-printer for parsed results as a list, using the C{pprint} module.
657 Accepts additional positional or keyword args as defined for the
658 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})"""
659 pprint.pprint(self.asList(), *args, **kwargs)
660
661
663 return ( self.__toklist,
664 ( self.__tokdict.copy(),
665 self.__parent is not None and self.__parent() or None,
666 self.__accumNames,
667 self.__name ) )
668
670 self.__toklist = state[0]
671 (self.__tokdict,
672 par,
673 inAccumNames,
674 self.__name) = state[1]
675 self.__accumNames = {}
676 self.__accumNames.update(inAccumNames)
677 if par is not None:
678 self.__parent = wkref(par)
679 else:
680 self.__parent = None
681
684
685 collections.MutableMapping.register(ParseResults)
686
688 """Returns current column within a string, counting newlines as line separators.
689 The first column is number 1.
690
691 Note: the default parsing behavior is to expand tabs in the input string
692 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
693 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
694 consistent view of the parsed string, the parse location, and line and column
695 positions within the parsed string.
696 """
697 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
698
700 """Returns current line number within a string, counting newlines as line separators.
701 The first line is number 1.
702
703 Note: the default parsing behavior is to expand tabs in the input string
704 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
705 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
706 consistent view of the parsed string, the parse location, and line and column
707 positions within the parsed string.
708 """
709 return strg.count("\n",0,loc) + 1
710
711 -def line( loc, strg ):
712 """Returns the line of text containing loc within a string, counting newlines as line separators.
713 """
714 lastCR = strg.rfind("\n", 0, loc)
715 nextCR = strg.find("\n", loc)
716 if nextCR >= 0:
717 return strg[lastCR+1:nextCR]
718 else:
719 return strg[lastCR+1:]
720
722 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
723
725 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
726
728 print ("Exception raised:" + _ustr(exc))
729
731 """'Do-nothing' debug action, to suppress debugging output during parsing."""
732 pass
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756 'decorator to trim function calls to match the arity of the target'
758 if func in singleArgBuiltins:
759 return lambda s,l,t: func(t)
760 limit = [0]
761 foundArity = [False]
762 def wrapper(*args):
763 while 1:
764 try:
765 ret = func(*args[limit[0]:])
766 foundArity[0] = True
767 return ret
768 except TypeError:
769 if limit[0] <= maxargs and not foundArity[0]:
770 limit[0] += 1
771 continue
772 raise
773 return wrapper
774
776 """Abstract base level parser element class."""
777 DEFAULT_WHITE_CHARS = " \n\t\r"
778 verbose_stacktrace = False
779
784 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
785
787 """
788 Set class to be used for inclusion of string literals into a parser.
789 """
790 ParserElement.literalStringClass = cls
791 inlineLiteralsUsing = staticmethod(inlineLiteralsUsing)
792
794 self.parseAction = list()
795 self.failAction = None
796
797 self.strRepr = None
798 self.resultsName = None
799 self.saveAsList = savelist
800 self.skipWhitespace = True
801 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
802 self.copyDefaultWhiteChars = True
803 self.mayReturnEmpty = False
804 self.keepTabs = False
805 self.ignoreExprs = list()
806 self.debug = False
807 self.streamlined = False
808 self.mayIndexError = True
809 self.errmsg = ""
810 self.modalResults = True
811 self.debugActions = ( None, None, None )
812 self.re = None
813 self.callPreparse = True
814 self.callDuringTry = False
815
817 """Make a copy of this C{ParserElement}. Useful for defining different parse actions
818 for the same parsing pattern, using copies of the original parse element."""
819 cpy = copy.copy( self )
820 cpy.parseAction = self.parseAction[:]
821 cpy.ignoreExprs = self.ignoreExprs[:]
822 if self.copyDefaultWhiteChars:
823 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
824 return cpy
825
827 """Define name for this expression, for use in debugging."""
828 self.name = name
829 self.errmsg = "Expected " + self.name
830 if hasattr(self,"exception"):
831 self.exception.msg = self.errmsg
832 return self
833
835 """Define name for referencing matching tokens as a nested attribute
836 of the returned parse results.
837 NOTE: this returns a *copy* of the original C{ParserElement} object;
838 this is so that the client can define a basic element, such as an
839 integer, and reference it in multiple places with different names.
840
841 You can also set results names using the abbreviated syntax,
842 C{expr("name")} in place of C{expr.setResultsName("name")} -
843 see L{I{__call__}<__call__>}.
844 """
845 newself = self.copy()
846 if name.endswith("*"):
847 name = name[:-1]
848 listAllMatches=True
849 newself.resultsName = name
850 newself.modalResults = not listAllMatches
851 return newself
852
854 """Method to invoke the Python pdb debugger when this element is
855 about to be parsed. Set C{breakFlag} to True to enable, False to
856 disable.
857 """
858 if breakFlag:
859 _parseMethod = self._parse
860 def breaker(instring, loc, doActions=True, callPreParse=True):
861 import pdb
862 pdb.set_trace()
863 return _parseMethod( instring, loc, doActions, callPreParse )
864 breaker._originalParseMethod = _parseMethod
865 self._parse = breaker
866 else:
867 if hasattr(self._parse,"_originalParseMethod"):
868 self._parse = self._parse._originalParseMethod
869 return self
870
872 """Define action to perform when successfully matching parse element definition.
873 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
874 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
875 - s = the original string being parsed (see note below)
876 - loc = the location of the matching substring
877 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
878 If the functions in fns modify the tokens, they can return them as the return
879 value from fn, and the modified list of tokens will replace the original.
880 Otherwise, fn does not need to return any value.
881
882 Note: the default parsing behavior is to expand tabs in the input string
883 before starting the parsing process. See L{I{parseString}<parseString>} for more information
884 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
885 consistent view of the parsed string, the parse location, and line and column
886 positions within the parsed string.
887 """
888 self.parseAction = list(map(_trim_arity, list(fns)))
889 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
890 return self
891
893 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
894 self.parseAction += list(map(_trim_arity, list(fns)))
895 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
896 return self
897
899 """Define action to perform if parsing fails at this expression.
900 Fail acton fn is a callable function that takes the arguments
901 C{fn(s,loc,expr,err)} where:
902 - s = string being parsed
903 - loc = location where expression match was attempted and failed
904 - expr = the parse expression that failed
905 - err = the exception thrown
906 The function returns no value. It may throw C{L{ParseFatalException}}
907 if it is desired to stop parsing immediately."""
908 self.failAction = fn
909 return self
910
912 exprsFound = True
913 while exprsFound:
914 exprsFound = False
915 for e in self.ignoreExprs:
916 try:
917 while 1:
918 loc,dummy = e._parse( instring, loc )
919 exprsFound = True
920 except ParseException:
921 pass
922 return loc
923
925 if self.ignoreExprs:
926 loc = self._skipIgnorables( instring, loc )
927
928 if self.skipWhitespace:
929 wt = self.whiteChars
930 instrlen = len(instring)
931 while loc < instrlen and instring[loc] in wt:
932 loc += 1
933
934 return loc
935
936 - def parseImpl( self, instring, loc, doActions=True ):
938
939 - def postParse( self, instring, loc, tokenlist ):
941
942
943 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
944 debugging = ( self.debug )
945
946 if debugging or self.failAction:
947
948 if (self.debugActions[0] ):
949 self.debugActions[0]( instring, loc, self )
950 if callPreParse and self.callPreparse:
951 preloc = self.preParse( instring, loc )
952 else:
953 preloc = loc
954 tokensStart = preloc
955 try:
956 try:
957 loc,tokens = self.parseImpl( instring, preloc, doActions )
958 except IndexError:
959 raise ParseException( instring, len(instring), self.errmsg, self )
960 except ParseBaseException as err:
961
962 if self.debugActions[2]:
963 self.debugActions[2]( instring, tokensStart, self, err )
964 if self.failAction:
965 self.failAction( instring, tokensStart, self, err )
966 raise
967 else:
968 if callPreParse and self.callPreparse:
969 preloc = self.preParse( instring, loc )
970 else:
971 preloc = loc
972 tokensStart = preloc
973 if self.mayIndexError or loc >= len(instring):
974 try:
975 loc,tokens = self.parseImpl( instring, preloc, doActions )
976 except IndexError:
977 raise ParseException( instring, len(instring), self.errmsg, self )
978 else:
979 loc,tokens = self.parseImpl( instring, preloc, doActions )
980
981 tokens = self.postParse( instring, loc, tokens )
982
983 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
984 if self.parseAction and (doActions or self.callDuringTry):
985 if debugging:
986 try:
987 for fn in self.parseAction:
988 tokens = fn( instring, tokensStart, retTokens )
989 if tokens is not None:
990 retTokens = ParseResults( tokens,
991 self.resultsName,
992 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
993 modal=self.modalResults )
994 except ParseBaseException as err:
995
996 if (self.debugActions[2] ):
997 self.debugActions[2]( instring, tokensStart, self, err )
998 raise
999 else:
1000 for fn in self.parseAction:
1001 tokens = fn( instring, tokensStart, retTokens )
1002 if tokens is not None:
1003 retTokens = ParseResults( tokens,
1004 self.resultsName,
1005 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1006 modal=self.modalResults )
1007
1008 if debugging:
1009
1010 if (self.debugActions[1] ):
1011 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1012
1013 return loc, retTokens
1014
1020
1021
1022
1023 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1024 lookup = (self,instring,loc,callPreParse,doActions)
1025 if lookup in ParserElement._exprArgCache:
1026 value = ParserElement._exprArgCache[ lookup ]
1027 if isinstance(value, Exception):
1028 raise value
1029 return (value[0],value[1].copy())
1030 else:
1031 try:
1032 value = self._parseNoCache( instring, loc, doActions, callPreParse )
1033 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1034 return value
1035 except ParseBaseException as pe:
1036 pe.__traceback__ = None
1037 ParserElement._exprArgCache[ lookup ] = pe
1038 raise
1039
1040 _parse = _parseNoCache
1041
1042
1043 _exprArgCache = {}
1046 resetCache = staticmethod(resetCache)
1047
1048 _packratEnabled = False
1050 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1051 Repeated parse attempts at the same string location (which happens
1052 often in many complex grammars) can immediately return a cached value,
1053 instead of re-executing parsing/validating code. Memoizing is done of
1054 both valid results and parsing exceptions.
1055
1056 This speedup may break existing programs that use parse actions that
1057 have side-effects. For this reason, packrat parsing is disabled when
1058 you first import pyparsing. To activate the packrat feature, your
1059 program must call the class method C{ParserElement.enablePackrat()}. If
1060 your program uses C{psyco} to "compile as you go", you must call
1061 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1062 Python will crash. For best results, call C{enablePackrat()} immediately
1063 after importing pyparsing.
1064 """
1065 if not ParserElement._packratEnabled:
1066 ParserElement._packratEnabled = True
1067 ParserElement._parse = ParserElement._parseCache
1068 enablePackrat = staticmethod(enablePackrat)
1069
1071 """Execute the parse expression with the given string.
1072 This is the main interface to the client code, once the complete
1073 expression has been built.
1074
1075 If you want the grammar to require that the entire input string be
1076 successfully parsed, then set C{parseAll} to True (equivalent to ending
1077 the grammar with C{L{StringEnd()}}).
1078
1079 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1080 in order to report proper column numbers in parse actions.
1081 If the input string contains tabs and
1082 the grammar uses parse actions that use the C{loc} argument to index into the
1083 string being parsed, you can ensure you have a consistent view of the input
1084 string by:
1085 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1086 (see L{I{parseWithTabs}<parseWithTabs>})
1087 - define your parse action using the full C{(s,loc,toks)} signature, and
1088 reference the input string using the parse action's C{s} argument
1089 - explictly expand the tabs in your input string before calling
1090 C{parseString}
1091 """
1092 ParserElement.resetCache()
1093 if not self.streamlined:
1094 self.streamline()
1095
1096 for e in self.ignoreExprs:
1097 e.streamline()
1098 if not self.keepTabs:
1099 instring = instring.expandtabs()
1100 try:
1101 loc, tokens = self._parse( instring, 0 )
1102 if parseAll:
1103 loc = self.preParse( instring, loc )
1104 se = Empty() + StringEnd()
1105 se._parse( instring, loc )
1106 except ParseBaseException as exc:
1107 if ParserElement.verbose_stacktrace:
1108 raise
1109 else:
1110
1111 raise exc
1112 else:
1113 return tokens
1114
1116 """Scan the input string for expression matches. Each match will return the
1117 matching tokens, start location, and end location. May be called with optional
1118 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1119 C{overlap} is specified, then overlapping matches will be reported.
1120
1121 Note that the start and end locations are reported relative to the string
1122 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1123 strings with embedded tabs."""
1124 if not self.streamlined:
1125 self.streamline()
1126 for e in self.ignoreExprs:
1127 e.streamline()
1128
1129 if not self.keepTabs:
1130 instring = _ustr(instring).expandtabs()
1131 instrlen = len(instring)
1132 loc = 0
1133 preparseFn = self.preParse
1134 parseFn = self._parse
1135 ParserElement.resetCache()
1136 matches = 0
1137 try:
1138 while loc <= instrlen and matches < maxMatches:
1139 try:
1140 preloc = preparseFn( instring, loc )
1141 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1142 except ParseException:
1143 loc = preloc+1
1144 else:
1145 if nextLoc > loc:
1146 matches += 1
1147 yield tokens, preloc, nextLoc
1148 if overlap:
1149 nextloc = preparseFn( instring, loc )
1150 if nextloc > loc:
1151 loc = nextLoc
1152 else:
1153 loc += 1
1154 else:
1155 loc = nextLoc
1156 else:
1157 loc = preloc+1
1158 except ParseBaseException as exc:
1159 if ParserElement.verbose_stacktrace:
1160 raise
1161 else:
1162
1163 raise exc
1164
1197
1199 """Another extension to C{L{scanString}}, simplifying the access to the tokens found
1200 to match the given parse expression. May be called with optional
1201 C{maxMatches} argument, to clip searching after 'n' matches are found.
1202 """
1203 try:
1204 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1205 except ParseBaseException as exc:
1206 if ParserElement.verbose_stacktrace:
1207 raise
1208 else:
1209
1210 raise exc
1211
1213 """Implementation of + operator - returns C{L{And}}"""
1214 if isinstance( other, basestring ):
1215 other = ParserElement.literalStringClass( other )
1216 if not isinstance( other, ParserElement ):
1217 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1218 SyntaxWarning, stacklevel=2)
1219 return None
1220 return And( [ self, other ] )
1221
1223 """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
1224 if isinstance( other, basestring ):
1225 other = ParserElement.literalStringClass( other )
1226 if not isinstance( other, ParserElement ):
1227 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1228 SyntaxWarning, stacklevel=2)
1229 return None
1230 return other + self
1231
1233 """Implementation of - operator, returns C{L{And}} with error stop"""
1234 if isinstance( other, basestring ):
1235 other = ParserElement.literalStringClass( other )
1236 if not isinstance( other, ParserElement ):
1237 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1238 SyntaxWarning, stacklevel=2)
1239 return None
1240 return And( [ self, And._ErrorStop(), other ] )
1241
1243 """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
1244 if isinstance( other, basestring ):
1245 other = ParserElement.literalStringClass( other )
1246 if not isinstance( other, ParserElement ):
1247 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1248 SyntaxWarning, stacklevel=2)
1249 return None
1250 return other - self
1251
1253 """Implementation of * operator, allows use of C{expr * 3} in place of
1254 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1255 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1256 may also include C{None} as in:
1257 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1258 to C{expr*n + L{ZeroOrMore}(expr)}
1259 (read as "at least n instances of C{expr}")
1260 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1261 (read as "0 to n instances of C{expr}")
1262 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1263 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1264
1265 Note that C{expr*(None,n)} does not raise an exception if
1266 more than n exprs exist in the input stream; that is,
1267 C{expr*(None,n)} does not enforce a maximum number of expr
1268 occurrences. If this behavior is desired, then write
1269 C{expr*(None,n) + ~expr}
1270
1271 """
1272 if isinstance(other,int):
1273 minElements, optElements = other,0
1274 elif isinstance(other,tuple):
1275 other = (other + (None, None))[:2]
1276 if other[0] is None:
1277 other = (0, other[1])
1278 if isinstance(other[0],int) and other[1] is None:
1279 if other[0] == 0:
1280 return ZeroOrMore(self)
1281 if other[0] == 1:
1282 return OneOrMore(self)
1283 else:
1284 return self*other[0] + ZeroOrMore(self)
1285 elif isinstance(other[0],int) and isinstance(other[1],int):
1286 minElements, optElements = other
1287 optElements -= minElements
1288 else:
1289 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1290 else:
1291 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1292
1293 if minElements < 0:
1294 raise ValueError("cannot multiply ParserElement by negative value")
1295 if optElements < 0:
1296 raise ValueError("second tuple value must be greater or equal to first tuple value")
1297 if minElements == optElements == 0:
1298 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1299
1300 if (optElements):
1301 def makeOptionalList(n):
1302 if n>1:
1303 return Optional(self + makeOptionalList(n-1))
1304 else:
1305 return Optional(self)
1306 if minElements:
1307 if minElements == 1:
1308 ret = self + makeOptionalList(optElements)
1309 else:
1310 ret = And([self]*minElements) + makeOptionalList(optElements)
1311 else:
1312 ret = makeOptionalList(optElements)
1313 else:
1314 if minElements == 1:
1315 ret = self
1316 else:
1317 ret = And([self]*minElements)
1318 return ret
1319
1322
1324 """Implementation of | operator - returns C{L{MatchFirst}}"""
1325 if isinstance( other, basestring ):
1326 other = ParserElement.literalStringClass( other )
1327 if not isinstance( other, ParserElement ):
1328 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1329 SyntaxWarning, stacklevel=2)
1330 return None
1331 return MatchFirst( [ self, other ] )
1332
1334 """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
1335 if isinstance( other, basestring ):
1336 other = ParserElement.literalStringClass( other )
1337 if not isinstance( other, ParserElement ):
1338 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1339 SyntaxWarning, stacklevel=2)
1340 return None
1341 return other | self
1342
1344 """Implementation of ^ operator - returns C{L{Or}}"""
1345 if isinstance( other, basestring ):
1346 other = ParserElement.literalStringClass( other )
1347 if not isinstance( other, ParserElement ):
1348 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1349 SyntaxWarning, stacklevel=2)
1350 return None
1351 return Or( [ self, other ] )
1352
1354 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
1355 if isinstance( other, basestring ):
1356 other = ParserElement.literalStringClass( other )
1357 if not isinstance( other, ParserElement ):
1358 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1359 SyntaxWarning, stacklevel=2)
1360 return None
1361 return other ^ self
1362
1364 """Implementation of & operator - returns C{L{Each}}"""
1365 if isinstance( other, basestring ):
1366 other = ParserElement.literalStringClass( other )
1367 if not isinstance( other, ParserElement ):
1368 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1369 SyntaxWarning, stacklevel=2)
1370 return None
1371 return Each( [ self, other ] )
1372
1374 """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
1375 if isinstance( other, basestring ):
1376 other = ParserElement.literalStringClass( other )
1377 if not isinstance( other, ParserElement ):
1378 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1379 SyntaxWarning, stacklevel=2)
1380 return None
1381 return other & self
1382
1384 """Implementation of ~ operator - returns C{L{NotAny}}"""
1385 return NotAny( self )
1386
1388 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
1389 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1390 could be written as::
1391 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1392
1393 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1394 passed as C{True}.
1395
1396 If C{name} is omitted, same as calling C{L{copy}}.
1397 """
1398 if name is not None:
1399 return self.setResultsName(name)
1400 else:
1401 return self.copy()
1402
1404 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1405 cluttering up returned output.
1406 """
1407 return Suppress( self )
1408
1410 """Disables the skipping of whitespace before matching the characters in the
1411 C{ParserElement}'s defined pattern. This is normally only used internally by
1412 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1413 """
1414 self.skipWhitespace = False
1415 return self
1416
1418 """Overrides the default whitespace chars
1419 """
1420 self.skipWhitespace = True
1421 self.whiteChars = chars
1422 self.copyDefaultWhiteChars = False
1423 return self
1424
1426 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
1427 Must be called before C{parseString} when the input grammar contains elements that
1428 match C{<TAB>} characters."""
1429 self.keepTabs = True
1430 return self
1431
1433 """Define expression to be ignored (e.g., comments) while doing pattern
1434 matching; may be called repeatedly, to define multiple comment or other
1435 ignorable patterns.
1436 """
1437 if isinstance( other, Suppress ):
1438 if other not in self.ignoreExprs:
1439 self.ignoreExprs.append( other.copy() )
1440 else:
1441 self.ignoreExprs.append( Suppress( other.copy() ) )
1442 return self
1443
1444 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1445 """Enable display of debugging messages while doing pattern matching."""
1446 self.debugActions = (startAction or _defaultStartDebugAction,
1447 successAction or _defaultSuccessDebugAction,
1448 exceptionAction or _defaultExceptionDebugAction)
1449 self.debug = True
1450 return self
1451
1453 """Enable display of debugging messages while doing pattern matching.
1454 Set C{flag} to True to enable, False to disable."""
1455 if flag:
1456 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1457 else:
1458 self.debug = False
1459 return self
1460
1463
1466
1468 self.streamlined = True
1469 self.strRepr = None
1470 return self
1471
1474
1475 - def validate( self, validateTrace=[] ):
1476 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1477 self.checkRecursion( [] )
1478
1479 - def parseFile( self, file_or_filename, parseAll=False ):
1480 """Execute the parse expression on the given file or filename.
1481 If a filename is specified (instead of a file object),
1482 the entire file is opened, read, and closed before parsing.
1483 """
1484 try:
1485 file_contents = file_or_filename.read()
1486 except AttributeError:
1487 f = open(file_or_filename, "r")
1488 file_contents = f.read()
1489 f.close()
1490 try:
1491 return self.parseString(file_contents, parseAll)
1492 except ParseBaseException as exc:
1493 if ParserElement.verbose_stacktrace:
1494 raise
1495 else:
1496
1497 raise exc
1498
1500 if isinstance(other, ParserElement):
1501 return self is other or self.__dict__ == other.__dict__
1502 elif isinstance(other, basestring):
1503 try:
1504 self.parseString(_ustr(other), parseAll=True)
1505 return True
1506 except ParseBaseException:
1507 return False
1508 else:
1509 return super(ParserElement,self)==other
1510
1512 return not (self == other)
1513
1515 return hash(id(self))
1516
1518 return self == other
1519
1521 return not (self == other)
1522
1523
1524 -class Token(ParserElement):
1525 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1528
1530 s = super(Token,self).setName(name)
1531 self.errmsg = "Expected " + self.name
1532 return s
1533
1534
1536 """An empty token, will always match."""
1538 super(Empty,self).__init__()
1539 self.name = "Empty"
1540 self.mayReturnEmpty = True
1541 self.mayIndexError = False
1542
1543
1545 """A token that will never match."""
1547 super(NoMatch,self).__init__()
1548 self.name = "NoMatch"
1549 self.mayReturnEmpty = True
1550 self.mayIndexError = False
1551 self.errmsg = "Unmatchable token"
1552
1553 - def parseImpl( self, instring, loc, doActions=True ):
1555
1556
1558 """Token to exactly match a specified string."""
1560 super(Literal,self).__init__()
1561 self.match = matchString
1562 self.matchLen = len(matchString)
1563 try:
1564 self.firstMatchChar = matchString[0]
1565 except IndexError:
1566 warnings.warn("null string passed to Literal; use Empty() instead",
1567 SyntaxWarning, stacklevel=2)
1568 self.__class__ = Empty
1569 self.name = '"%s"' % _ustr(self.match)
1570 self.errmsg = "Expected " + self.name
1571 self.mayReturnEmpty = False
1572 self.mayIndexError = False
1573
1574
1575
1576
1577
1578 - def parseImpl( self, instring, loc, doActions=True ):
1579 if (instring[loc] == self.firstMatchChar and
1580 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1581 return loc+self.matchLen, self.match
1582 raise ParseException(instring, loc, self.errmsg, self)
1583 _L = Literal
1584 ParserElement.literalStringClass = Literal
1585
1587 """Token to exactly match a specified string as a keyword, that is, it must be
1588 immediately followed by a non-keyword character. Compare with C{L{Literal}}::
1589 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
1590 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
1591 Accepts two optional constructor arguments in addition to the keyword string:
1592 C{identChars} is a string of characters that would be valid identifier characters,
1593 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
1594 matching, default is C{False}.
1595 """
1596 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1597
1599 super(Keyword,self).__init__()
1600 self.match = matchString
1601 self.matchLen = len(matchString)
1602 try:
1603 self.firstMatchChar = matchString[0]
1604 except IndexError:
1605 warnings.warn("null string passed to Keyword; use Empty() instead",
1606 SyntaxWarning, stacklevel=2)
1607 self.name = '"%s"' % self.match
1608 self.errmsg = "Expected " + self.name
1609 self.mayReturnEmpty = False
1610 self.mayIndexError = False
1611 self.caseless = caseless
1612 if caseless:
1613 self.caselessmatch = matchString.upper()
1614 identChars = identChars.upper()
1615 self.identChars = set(identChars)
1616
1617 - def parseImpl( self, instring, loc, doActions=True ):
1618 if self.caseless:
1619 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1620 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1621 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1622 return loc+self.matchLen, self.match
1623 else:
1624 if (instring[loc] == self.firstMatchChar and
1625 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1626 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1627 (loc == 0 or instring[loc-1] not in self.identChars) ):
1628 return loc+self.matchLen, self.match
1629 raise ParseException(instring, loc, self.errmsg, self)
1630
1635
1640 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1641
1643 """Token to match a specified string, ignoring case of letters.
1644 Note: the matched results will always be in the case of the given
1645 match string, NOT the case of the input text.
1646 """
1648 super(CaselessLiteral,self).__init__( matchString.upper() )
1649
1650 self.returnString = matchString
1651 self.name = "'%s'" % self.returnString
1652 self.errmsg = "Expected " + self.name
1653
1654 - def parseImpl( self, instring, loc, doActions=True ):
1655 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1656 return loc+self.matchLen, self.returnString
1657 raise ParseException(instring, loc, self.errmsg, self)
1658
1662
1663 - def parseImpl( self, instring, loc, doActions=True ):
1664 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1665 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1666 return loc+self.matchLen, self.match
1667 raise ParseException(instring, loc, self.errmsg, self)
1668
1670 """Token for matching words composed of allowed character sets.
1671 Defined with string containing all allowed initial characters,
1672 an optional string containing allowed body characters (if omitted,
1673 defaults to the initial character set), and an optional minimum,
1674 maximum, and/or exact length. The default value for C{min} is 1 (a
1675 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1676 are 0, meaning no maximum or exact length restriction. An optional
1677 C{exclude} parameter can list characters that might be found in
1678 the input C{bodyChars} string; useful to define a word of all printables
1679 except for one or two characters, for instance.
1680 """
1681 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1682 super(Word,self).__init__()
1683 if excludeChars:
1684 initChars = ''.join(c for c in initChars if c not in excludeChars)
1685 if bodyChars:
1686 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
1687 self.initCharsOrig = initChars
1688 self.initChars = set(initChars)
1689 if bodyChars :
1690 self.bodyCharsOrig = bodyChars
1691 self.bodyChars = set(bodyChars)
1692 else:
1693 self.bodyCharsOrig = initChars
1694 self.bodyChars = set(initChars)
1695
1696 self.maxSpecified = max > 0
1697
1698 if min < 1:
1699 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1700
1701 self.minLen = min
1702
1703 if max > 0:
1704 self.maxLen = max
1705 else:
1706 self.maxLen = _MAX_INT
1707
1708 if exact > 0:
1709 self.maxLen = exact
1710 self.minLen = exact
1711
1712 self.name = _ustr(self)
1713 self.errmsg = "Expected " + self.name
1714 self.mayIndexError = False
1715 self.asKeyword = asKeyword
1716
1717 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1718 if self.bodyCharsOrig == self.initCharsOrig:
1719 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1720 elif len(self.bodyCharsOrig) == 1:
1721 self.reString = "%s[%s]*" % \
1722 (re.escape(self.initCharsOrig),
1723 _escapeRegexRangeChars(self.bodyCharsOrig),)
1724 else:
1725 self.reString = "[%s][%s]*" % \
1726 (_escapeRegexRangeChars(self.initCharsOrig),
1727 _escapeRegexRangeChars(self.bodyCharsOrig),)
1728 if self.asKeyword:
1729 self.reString = r"\b"+self.reString+r"\b"
1730 try:
1731 self.re = re.compile( self.reString )
1732 except:
1733 self.re = None
1734
1735 - def parseImpl( self, instring, loc, doActions=True ):
1736 if self.re:
1737 result = self.re.match(instring,loc)
1738 if not result:
1739 raise ParseException(instring, loc, self.errmsg, self)
1740
1741 loc = result.end()
1742 return loc, result.group()
1743
1744 if not(instring[ loc ] in self.initChars):
1745 raise ParseException(instring, loc, self.errmsg, self)
1746
1747 start = loc
1748 loc += 1
1749 instrlen = len(instring)
1750 bodychars = self.bodyChars
1751 maxloc = start + self.maxLen
1752 maxloc = min( maxloc, instrlen )
1753 while loc < maxloc and instring[loc] in bodychars:
1754 loc += 1
1755
1756 throwException = False
1757 if loc - start < self.minLen:
1758 throwException = True
1759 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1760 throwException = True
1761 if self.asKeyword:
1762 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1763 throwException = True
1764
1765 if throwException:
1766 raise ParseException(instring, loc, self.errmsg, self)
1767
1768 return loc, instring[start:loc]
1769
1771 try:
1772 return super(Word,self).__str__()
1773 except:
1774 pass
1775
1776
1777 if self.strRepr is None:
1778
1779 def charsAsStr(s):
1780 if len(s)>4:
1781 return s[:4]+"..."
1782 else:
1783 return s
1784
1785 if ( self.initCharsOrig != self.bodyCharsOrig ):
1786 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1787 else:
1788 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1789
1790 return self.strRepr
1791
1792
1794 """Token for matching strings that match a given regular expression.
1795 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1796 """
1797 compiledREtype = type(re.compile("[A-Z]"))
1798 - def __init__( self, pattern, flags=0):
1799 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
1800 super(Regex,self).__init__()
1801
1802 if isinstance(pattern, basestring):
1803 if len(pattern) == 0:
1804 warnings.warn("null string passed to Regex; use Empty() instead",
1805 SyntaxWarning, stacklevel=2)
1806
1807 self.pattern = pattern
1808 self.flags = flags
1809
1810 try:
1811 self.re = re.compile(self.pattern, self.flags)
1812 self.reString = self.pattern
1813 except sre_constants.error:
1814 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1815 SyntaxWarning, stacklevel=2)
1816 raise
1817
1818 elif isinstance(pattern, Regex.compiledREtype):
1819 self.re = pattern
1820 self.pattern = \
1821 self.reString = str(pattern)
1822 self.flags = flags
1823
1824 else:
1825 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
1826
1827 self.name = _ustr(self)
1828 self.errmsg = "Expected " + self.name
1829 self.mayIndexError = False
1830 self.mayReturnEmpty = True
1831
1832 - def parseImpl( self, instring, loc, doActions=True ):
1833 result = self.re.match(instring,loc)
1834 if not result:
1835 raise ParseException(instring, loc, self.errmsg, self)
1836
1837 loc = result.end()
1838 d = result.groupdict()
1839 ret = ParseResults(result.group())
1840 if d:
1841 for k in d:
1842 ret[k] = d[k]
1843 return loc,ret
1844
1846 try:
1847 return super(Regex,self).__str__()
1848 except:
1849 pass
1850
1851 if self.strRepr is None:
1852 self.strRepr = "Re:(%s)" % repr(self.pattern)
1853
1854 return self.strRepr
1855
1856
1858 """Token for matching strings that are delimited by quoting characters.
1859 """
1860 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1861 """
1862 Defined with the following parameters:
1863 - quoteChar - string of one or more characters defining the quote delimiting string
1864 - escChar - character to escape quotes, typically backslash (default=None)
1865 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1866 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
1867 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
1868 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
1869 """
1870 super(QuotedString,self).__init__()
1871
1872
1873 quoteChar = quoteChar.strip()
1874 if len(quoteChar) == 0:
1875 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1876 raise SyntaxError()
1877
1878 if endQuoteChar is None:
1879 endQuoteChar = quoteChar
1880 else:
1881 endQuoteChar = endQuoteChar.strip()
1882 if len(endQuoteChar) == 0:
1883 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1884 raise SyntaxError()
1885
1886 self.quoteChar = quoteChar
1887 self.quoteCharLen = len(quoteChar)
1888 self.firstQuoteChar = quoteChar[0]
1889 self.endQuoteChar = endQuoteChar
1890 self.endQuoteCharLen = len(endQuoteChar)
1891 self.escChar = escChar
1892 self.escQuote = escQuote
1893 self.unquoteResults = unquoteResults
1894
1895 if multiline:
1896 self.flags = re.MULTILINE | re.DOTALL
1897 self.pattern = r'%s(?:[^%s%s]' % \
1898 ( re.escape(self.quoteChar),
1899 _escapeRegexRangeChars(self.endQuoteChar[0]),
1900 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1901 else:
1902 self.flags = 0
1903 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1904 ( re.escape(self.quoteChar),
1905 _escapeRegexRangeChars(self.endQuoteChar[0]),
1906 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1907 if len(self.endQuoteChar) > 1:
1908 self.pattern += (
1909 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1910 _escapeRegexRangeChars(self.endQuoteChar[i]))
1911 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
1912 )
1913 if escQuote:
1914 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1915 if escChar:
1916 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1917 charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-')
1918 self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset)
1919 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1920
1921 try:
1922 self.re = re.compile(self.pattern, self.flags)
1923 self.reString = self.pattern
1924 except sre_constants.error:
1925 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1926 SyntaxWarning, stacklevel=2)
1927 raise
1928
1929 self.name = _ustr(self)
1930 self.errmsg = "Expected " + self.name
1931 self.mayIndexError = False
1932 self.mayReturnEmpty = True
1933
1934 - def parseImpl( self, instring, loc, doActions=True ):
1935 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1936 if not result:
1937 raise ParseException(instring, loc, self.errmsg, self)
1938
1939 loc = result.end()
1940 ret = result.group()
1941
1942 if self.unquoteResults:
1943
1944
1945 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1946
1947 if isinstance(ret,basestring):
1948
1949 if self.escChar:
1950 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1951
1952
1953 if self.escQuote:
1954 ret = ret.replace(self.escQuote, self.endQuoteChar)
1955
1956 return loc, ret
1957
1959 try:
1960 return super(QuotedString,self).__str__()
1961 except:
1962 pass
1963
1964 if self.strRepr is None:
1965 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1966
1967 return self.strRepr
1968
1969
1971 """Token for matching words composed of characters *not* in a given set.
1972 Defined with string containing all disallowed characters, and an optional
1973 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
1974 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1975 are 0, meaning no maximum or exact length restriction.
1976 """
1977 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1978 super(CharsNotIn,self).__init__()
1979 self.skipWhitespace = False
1980 self.notChars = notChars
1981
1982 if min < 1:
1983 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1984
1985 self.minLen = min
1986
1987 if max > 0:
1988 self.maxLen = max
1989 else:
1990 self.maxLen = _MAX_INT
1991
1992 if exact > 0:
1993 self.maxLen = exact
1994 self.minLen = exact
1995
1996 self.name = _ustr(self)
1997 self.errmsg = "Expected " + self.name
1998 self.mayReturnEmpty = ( self.minLen == 0 )
1999 self.mayIndexError = False
2000
2001 - def parseImpl( self, instring, loc, doActions=True ):
2002 if instring[loc] in self.notChars:
2003 raise ParseException(instring, loc, self.errmsg, self)
2004
2005 start = loc
2006 loc += 1
2007 notchars = self.notChars
2008 maxlen = min( start+self.maxLen, len(instring) )
2009 while loc < maxlen and \
2010 (instring[loc] not in notchars):
2011 loc += 1
2012
2013 if loc - start < self.minLen:
2014 raise ParseException(instring, loc, self.errmsg, self)
2015
2016 return loc, instring[start:loc]
2017
2019 try:
2020 return super(CharsNotIn, self).__str__()
2021 except:
2022 pass
2023
2024 if self.strRepr is None:
2025 if len(self.notChars) > 4:
2026 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2027 else:
2028 self.strRepr = "!W:(%s)" % self.notChars
2029
2030 return self.strRepr
2031
2033 """Special matching class for matching whitespace. Normally, whitespace is ignored
2034 by pyparsing grammars. This class is included when some whitespace structures
2035 are significant. Define with a string containing the whitespace characters to be
2036 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2037 as defined for the C{L{Word}} class."""
2038 whiteStrs = {
2039 " " : "<SPC>",
2040 "\t": "<TAB>",
2041 "\n": "<LF>",
2042 "\r": "<CR>",
2043 "\f": "<FF>",
2044 }
2045 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2046 super(White,self).__init__()
2047 self.matchWhite = ws
2048 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2049
2050 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2051 self.mayReturnEmpty = True
2052 self.errmsg = "Expected " + self.name
2053
2054 self.minLen = min
2055
2056 if max > 0:
2057 self.maxLen = max
2058 else:
2059 self.maxLen = _MAX_INT
2060
2061 if exact > 0:
2062 self.maxLen = exact
2063 self.minLen = exact
2064
2065 - def parseImpl( self, instring, loc, doActions=True ):
2066 if not(instring[ loc ] in self.matchWhite):
2067 raise ParseException(instring, loc, self.errmsg, self)
2068 start = loc
2069 loc += 1
2070 maxloc = start + self.maxLen
2071 maxloc = min( maxloc, len(instring) )
2072 while loc < maxloc and instring[loc] in self.matchWhite:
2073 loc += 1
2074
2075 if loc - start < self.minLen:
2076 raise ParseException(instring, loc, self.errmsg, self)
2077
2078 return loc, instring[start:loc]
2079
2080
2083 super(_PositionToken,self).__init__()
2084 self.name=self.__class__.__name__
2085 self.mayReturnEmpty = True
2086 self.mayIndexError = False
2087
2089 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2093
2095 if col(loc,instring) != self.col:
2096 instrlen = len(instring)
2097 if self.ignoreExprs:
2098 loc = self._skipIgnorables( instring, loc )
2099 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2100 loc += 1
2101 return loc
2102
2103 - def parseImpl( self, instring, loc, doActions=True ):
2104 thiscol = col( loc, instring )
2105 if thiscol > self.col:
2106 raise ParseException( instring, loc, "Text not in expected column", self )
2107 newloc = loc + self.col - thiscol
2108 ret = instring[ loc: newloc ]
2109 return newloc, ret
2110
2112 """Matches if current position is at the beginning of a line within the parse string"""
2117
2119 preloc = super(LineStart,self).preParse(instring,loc)
2120 if instring[preloc] == "\n":
2121 loc += 1
2122 return loc
2123
2124 - def parseImpl( self, instring, loc, doActions=True ):
2125 if not( loc==0 or
2126 (loc == self.preParse( instring, 0 )) or
2127 (instring[loc-1] == "\n") ):
2128 raise ParseException(instring, loc, self.errmsg, self)
2129 return loc, []
2130
2132 """Matches if current position is at the end of a line within the parse string"""
2137
2138 - def parseImpl( self, instring, loc, doActions=True ):
2139 if loc<len(instring):
2140 if instring[loc] == "\n":
2141 return loc+1, "\n"
2142 else:
2143 raise ParseException(instring, loc, self.errmsg, self)
2144 elif loc == len(instring):
2145 return loc+1, []
2146 else:
2147 raise ParseException(instring, loc, self.errmsg, self)
2148
2150 """Matches if current position is at the beginning of the parse string"""
2154
2155 - def parseImpl( self, instring, loc, doActions=True ):
2156 if loc != 0:
2157
2158 if loc != self.preParse( instring, 0 ):
2159 raise ParseException(instring, loc, self.errmsg, self)
2160 return loc, []
2161
2163 """Matches if current position is at the end of the parse string"""
2167
2168 - def parseImpl( self, instring, loc, doActions=True ):
2169 if loc < len(instring):
2170 raise ParseException(instring, loc, self.errmsg, self)
2171 elif loc == len(instring):
2172 return loc+1, []
2173 elif loc > len(instring):
2174 return loc, []
2175 else:
2176 raise ParseException(instring, loc, self.errmsg, self)
2177
2179 """Matches if the current position is at the beginning of a Word, and
2180 is not preceded by any character in a given set of C{wordChars}
2181 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2182 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
2183 the string being parsed, or at the beginning of a line.
2184 """
2186 super(WordStart,self).__init__()
2187 self.wordChars = set(wordChars)
2188 self.errmsg = "Not at the start of a word"
2189
2190 - def parseImpl(self, instring, loc, doActions=True ):
2191 if loc != 0:
2192 if (instring[loc-1] in self.wordChars or
2193 instring[loc] not in self.wordChars):
2194 raise ParseException(instring, loc, self.errmsg, self)
2195 return loc, []
2196
2198 """Matches if the current position is at the end of a Word, and
2199 is not followed by any character in a given set of C{wordChars}
2200 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2201 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
2202 the string being parsed, or at the end of a line.
2203 """
2205 super(WordEnd,self).__init__()
2206 self.wordChars = set(wordChars)
2207 self.skipWhitespace = False
2208 self.errmsg = "Not at the end of a word"
2209
2210 - def parseImpl(self, instring, loc, doActions=True ):
2211 instrlen = len(instring)
2212 if instrlen>0 and loc<instrlen:
2213 if (instring[loc] in self.wordChars or
2214 instring[loc-1] not in self.wordChars):
2215 raise ParseException(instring, loc, self.errmsg, self)
2216 return loc, []
2217
2218
2220 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2221 - def __init__( self, exprs, savelist = False ):
2222 super(ParseExpression,self).__init__(savelist)
2223 if isinstance( exprs, _generatorType ):
2224 exprs = list(exprs)
2225
2226 if isinstance( exprs, basestring ):
2227 self.exprs = [ Literal( exprs ) ]
2228 elif isinstance( exprs, collections.Sequence ):
2229
2230 if all(isinstance(expr, basestring) for expr in exprs):
2231 exprs = map(Literal, exprs)
2232 self.exprs = list(exprs)
2233 else:
2234 try:
2235 self.exprs = list( exprs )
2236 except TypeError:
2237 self.exprs = [ exprs ]
2238 self.callPreparse = False
2239
2241 return self.exprs[i]
2242
2244 self.exprs.append( other )
2245 self.strRepr = None
2246 return self
2247
2249 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
2250 all contained expressions."""
2251 self.skipWhitespace = False
2252 self.exprs = [ e.copy() for e in self.exprs ]
2253 for e in self.exprs:
2254 e.leaveWhitespace()
2255 return self
2256
2258 if isinstance( other, Suppress ):
2259 if other not in self.ignoreExprs:
2260 super( ParseExpression, self).ignore( other )
2261 for e in self.exprs:
2262 e.ignore( self.ignoreExprs[-1] )
2263 else:
2264 super( ParseExpression, self).ignore( other )
2265 for e in self.exprs:
2266 e.ignore( self.ignoreExprs[-1] )
2267 return self
2268
2270 try:
2271 return super(ParseExpression,self).__str__()
2272 except:
2273 pass
2274
2275 if self.strRepr is None:
2276 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2277 return self.strRepr
2278
2280 super(ParseExpression,self).streamline()
2281
2282 for e in self.exprs:
2283 e.streamline()
2284
2285
2286
2287
2288 if ( len(self.exprs) == 2 ):
2289 other = self.exprs[0]
2290 if ( isinstance( other, self.__class__ ) and
2291 not(other.parseAction) and
2292 other.resultsName is None and
2293 not other.debug ):
2294 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2295 self.strRepr = None
2296 self.mayReturnEmpty |= other.mayReturnEmpty
2297 self.mayIndexError |= other.mayIndexError
2298
2299 other = self.exprs[-1]
2300 if ( isinstance( other, self.__class__ ) and
2301 not(other.parseAction) and
2302 other.resultsName is None and
2303 not other.debug ):
2304 self.exprs = self.exprs[:-1] + other.exprs[:]
2305 self.strRepr = None
2306 self.mayReturnEmpty |= other.mayReturnEmpty
2307 self.mayIndexError |= other.mayIndexError
2308
2309 return self
2310
2314
2315 - def validate( self, validateTrace=[] ):
2316 tmp = validateTrace[:]+[self]
2317 for e in self.exprs:
2318 e.validate(tmp)
2319 self.checkRecursion( [] )
2320
2325
2326 -class And(ParseExpression):
2327 """Requires all given C{ParseExpression}s to be found in the given order.
2328 Expressions may be separated by whitespace.
2329 May be constructed using the C{'+'} operator.
2330 """
2331
2337
2338 - def __init__( self, exprs, savelist = True ):
2339 super(And,self).__init__(exprs, savelist)
2340 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2341 self.setWhitespaceChars( exprs[0].whiteChars )
2342 self.skipWhitespace = exprs[0].skipWhitespace
2343 self.callPreparse = True
2344
2345 - def parseImpl( self, instring, loc, doActions=True ):
2346
2347
2348 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2349 errorStop = False
2350 for e in self.exprs[1:]:
2351 if isinstance(e, And._ErrorStop):
2352 errorStop = True
2353 continue
2354 if errorStop:
2355 try:
2356 loc, exprtokens = e._parse( instring, loc, doActions )
2357 except ParseSyntaxException:
2358 raise
2359 except ParseBaseException as pe:
2360 pe.__traceback__ = None
2361 raise ParseSyntaxException(pe)
2362 except IndexError:
2363 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2364 else:
2365 loc, exprtokens = e._parse( instring, loc, doActions )
2366 if exprtokens or exprtokens.haskeys():
2367 resultlist += exprtokens
2368 return loc, resultlist
2369
2371 if isinstance( other, basestring ):
2372 other = Literal( other )
2373 return self.append( other )
2374
2376 subRecCheckList = parseElementList[:] + [ self ]
2377 for e in self.exprs:
2378 e.checkRecursion( subRecCheckList )
2379 if not e.mayReturnEmpty:
2380 break
2381
2383 if hasattr(self,"name"):
2384 return self.name
2385
2386 if self.strRepr is None:
2387 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
2388
2389 return self.strRepr
2390
2391
2392 -class Or(ParseExpression):
2393 """Requires that at least one C{ParseExpression} is found.
2394 If two expressions match, the expression that matches the longest string will be used.
2395 May be constructed using the C{'^'} operator.
2396 """
2397 - def __init__( self, exprs, savelist = False ):
2398 super(Or,self).__init__(exprs, savelist)
2399 if self.exprs:
2400 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2401 else:
2402 self.mayReturnEmpty = True
2403
2404 - def parseImpl( self, instring, loc, doActions=True ):
2405 maxExcLoc = -1
2406 maxMatchLoc = -1
2407 maxException = None
2408 for e in self.exprs:
2409 try:
2410 loc2 = e.tryParse( instring, loc )
2411 except ParseException as err:
2412 err.__traceback__ = None
2413 if err.loc > maxExcLoc:
2414 maxException = err
2415 maxExcLoc = err.loc
2416 except IndexError:
2417 if len(instring) > maxExcLoc:
2418 maxException = ParseException(instring,len(instring),e.errmsg,self)
2419 maxExcLoc = len(instring)
2420 else:
2421 if loc2 > maxMatchLoc:
2422 maxMatchLoc = loc2
2423 maxMatchExp = e
2424
2425 if maxMatchLoc < 0:
2426 if maxException is not None:
2427 raise maxException
2428 else:
2429 raise ParseException(instring, loc, "no defined alternatives to match", self)
2430
2431 return maxMatchExp._parse( instring, loc, doActions )
2432
2434 if isinstance( other, basestring ):
2435 other = ParserElement.literalStringClass( other )
2436 return self.append( other )
2437
2439 if hasattr(self,"name"):
2440 return self.name
2441
2442 if self.strRepr is None:
2443 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
2444
2445 return self.strRepr
2446
2448 subRecCheckList = parseElementList[:] + [ self ]
2449 for e in self.exprs:
2450 e.checkRecursion( subRecCheckList )
2451
2452
2454 """Requires that at least one C{ParseExpression} is found.
2455 If two expressions match, the first one listed is the one that will match.
2456 May be constructed using the C{'|'} operator.
2457 """
2458 - def __init__( self, exprs, savelist = False ):
2459 super(MatchFirst,self).__init__(exprs, savelist)
2460 if self.exprs:
2461 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2462 else:
2463 self.mayReturnEmpty = True
2464
2465 - def parseImpl( self, instring, loc, doActions=True ):
2466 maxExcLoc = -1
2467 maxException = None
2468 for e in self.exprs:
2469 try:
2470 ret = e._parse( instring, loc, doActions )
2471 return ret
2472 except ParseException as err:
2473 if err.loc > maxExcLoc:
2474 maxException = err
2475 maxExcLoc = err.loc
2476 except IndexError:
2477 if len(instring) > maxExcLoc:
2478 maxException = ParseException(instring,len(instring),e.errmsg,self)
2479 maxExcLoc = len(instring)
2480
2481
2482 else:
2483 if maxException is not None:
2484 raise maxException
2485 else:
2486 raise ParseException(instring, loc, "no defined alternatives to match", self)
2487
2489 if isinstance( other, basestring ):
2490 other = ParserElement.literalStringClass( other )
2491 return self.append( other )
2492
2494 if hasattr(self,"name"):
2495 return self.name
2496
2497 if self.strRepr is None:
2498 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
2499
2500 return self.strRepr
2501
2503 subRecCheckList = parseElementList[:] + [ self ]
2504 for e in self.exprs:
2505 e.checkRecursion( subRecCheckList )
2506
2507
2508 -class Each(ParseExpression):
2509 """Requires all given C{ParseExpression}s to be found, but in any order.
2510 Expressions may be separated by whitespace.
2511 May be constructed using the C{'&'} operator.
2512 """
2513 - def __init__( self, exprs, savelist = True ):
2514 super(Each,self).__init__(exprs, savelist)
2515 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2516 self.skipWhitespace = True
2517 self.initExprGroups = True
2518
2519 - def parseImpl( self, instring, loc, doActions=True ):
2520 if self.initExprGroups:
2521 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2522 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ]
2523 self.optionals = opt1 + opt2
2524 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2525 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2526 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2527 self.required += self.multirequired
2528 self.initExprGroups = False
2529 tmpLoc = loc
2530 tmpReqd = self.required[:]
2531 tmpOpt = self.optionals[:]
2532 matchOrder = []
2533
2534 keepMatching = True
2535 while keepMatching:
2536 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2537 failed = []
2538 for e in tmpExprs:
2539 try:
2540 tmpLoc = e.tryParse( instring, tmpLoc )
2541 except ParseException:
2542 failed.append(e)
2543 else:
2544 matchOrder.append(e)
2545 if e in tmpReqd:
2546 tmpReqd.remove(e)
2547 elif e in tmpOpt:
2548 tmpOpt.remove(e)
2549 if len(failed) == len(tmpExprs):
2550 keepMatching = False
2551
2552 if tmpReqd:
2553 missing = ", ".join(_ustr(e) for e in tmpReqd)
2554 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2555
2556
2557 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2558
2559 resultlist = []
2560 for e in matchOrder:
2561 loc,results = e._parse(instring,loc,doActions)
2562 resultlist.append(results)
2563
2564 finalResults = ParseResults([])
2565 for r in resultlist:
2566 dups = {}
2567 for k in r.keys():
2568 if k in finalResults:
2569 tmp = ParseResults(finalResults[k])
2570 tmp += ParseResults(r[k])
2571 dups[k] = tmp
2572 finalResults += ParseResults(r)
2573 for k,v in dups.items():
2574 finalResults[k] = v
2575 return loc, finalResults
2576
2578 if hasattr(self,"name"):
2579 return self.name
2580
2581 if self.strRepr is None:
2582 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
2583
2584 return self.strRepr
2585
2587 subRecCheckList = parseElementList[:] + [ self ]
2588 for e in self.exprs:
2589 e.checkRecursion( subRecCheckList )
2590
2591
2593 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2594 - def __init__( self, expr, savelist=False ):
2595 super(ParseElementEnhance,self).__init__(savelist)
2596 if isinstance( expr, basestring ):
2597 expr = Literal(expr)
2598 self.expr = expr
2599 self.strRepr = None
2600 if expr is not None:
2601 self.mayIndexError = expr.mayIndexError
2602 self.mayReturnEmpty = expr.mayReturnEmpty
2603 self.setWhitespaceChars( expr.whiteChars )
2604 self.skipWhitespace = expr.skipWhitespace
2605 self.saveAsList = expr.saveAsList
2606 self.callPreparse = expr.callPreparse
2607 self.ignoreExprs.extend(expr.ignoreExprs)
2608
2609 - def parseImpl( self, instring, loc, doActions=True ):
2610 if self.expr is not None:
2611 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2612 else:
2613 raise ParseException("",loc,self.errmsg,self)
2614
2616 self.skipWhitespace = False
2617 self.expr = self.expr.copy()
2618 if self.expr is not None:
2619 self.expr.leaveWhitespace()
2620 return self
2621
2623 if isinstance( other, Suppress ):
2624 if other not in self.ignoreExprs:
2625 super( ParseElementEnhance, self).ignore( other )
2626 if self.expr is not None:
2627 self.expr.ignore( self.ignoreExprs[-1] )
2628 else:
2629 super( ParseElementEnhance, self).ignore( other )
2630 if self.expr is not None:
2631 self.expr.ignore( self.ignoreExprs[-1] )
2632 return self
2633
2639
2641 if self in parseElementList:
2642 raise RecursiveGrammarException( parseElementList+[self] )
2643 subRecCheckList = parseElementList[:] + [ self ]
2644 if self.expr is not None:
2645 self.expr.checkRecursion( subRecCheckList )
2646
2647 - def validate( self, validateTrace=[] ):
2648 tmp = validateTrace[:]+[self]
2649 if self.expr is not None:
2650 self.expr.validate(tmp)
2651 self.checkRecursion( [] )
2652
2654 try:
2655 return super(ParseElementEnhance,self).__str__()
2656 except:
2657 pass
2658
2659 if self.strRepr is None and self.expr is not None:
2660 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2661 return self.strRepr
2662
2663
2665 """Lookahead matching of the given parse expression. C{FollowedBy}
2666 does *not* advance the parsing position within the input string, it only
2667 verifies that the specified parse expression matches at the current
2668 position. C{FollowedBy} always returns a null token list."""
2672
2673 - def parseImpl( self, instring, loc, doActions=True ):
2674 self.expr.tryParse( instring, loc )
2675 return loc, []
2676
2677
2678 -class NotAny(ParseElementEnhance):
2679 """Lookahead to disallow matching with the given parse expression. C{NotAny}
2680 does *not* advance the parsing position within the input string, it only
2681 verifies that the specified parse expression does *not* match at the current
2682 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
2683 always returns a null token list. May be constructed using the '~' operator."""
2685 super(NotAny,self).__init__(expr)
2686
2687 self.skipWhitespace = False
2688 self.mayReturnEmpty = True
2689 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2690
2691 - def parseImpl( self, instring, loc, doActions=True ):
2692 try:
2693 self.expr.tryParse( instring, loc )
2694 except (ParseException,IndexError):
2695 pass
2696 else:
2697 raise ParseException(instring, loc, self.errmsg, self)
2698 return loc, []
2699
2701 if hasattr(self,"name"):
2702 return self.name
2703
2704 if self.strRepr is None:
2705 self.strRepr = "~{" + _ustr(self.expr) + "}"
2706
2707 return self.strRepr
2708
2709
2711 """Optional repetition of zero or more of the given expression."""
2715
2716 - def parseImpl( self, instring, loc, doActions=True ):
2717 tokens = []
2718 try:
2719 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2720 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2721 while 1:
2722 if hasIgnoreExprs:
2723 preloc = self._skipIgnorables( instring, loc )
2724 else:
2725 preloc = loc
2726 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2727 if tmptokens or tmptokens.haskeys():
2728 tokens += tmptokens
2729 except (ParseException,IndexError):
2730 pass
2731
2732 return loc, tokens
2733
2735 if hasattr(self,"name"):
2736 return self.name
2737
2738 if self.strRepr is None:
2739 self.strRepr = "[" + _ustr(self.expr) + "]..."
2740
2741 return self.strRepr
2742
2747
2748
2750 """Repetition of one or more of the given expression."""
2751 - def parseImpl( self, instring, loc, doActions=True ):
2752
2753 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2754 try:
2755 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2756 while 1:
2757 if hasIgnoreExprs:
2758 preloc = self._skipIgnorables( instring, loc )
2759 else:
2760 preloc = loc
2761 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2762 if tmptokens or tmptokens.haskeys():
2763 tokens += tmptokens
2764 except (ParseException,IndexError):
2765 pass
2766
2767 return loc, tokens
2768
2770 if hasattr(self,"name"):
2771 return self.name
2772
2773 if self.strRepr is None:
2774 self.strRepr = "{" + _ustr(self.expr) + "}..."
2775
2776 return self.strRepr
2777
2782
2789
2790 _optionalNotMatched = _NullToken()
2792 """Optional matching of the given expression.
2793 A default return string can also be specified, if the optional expression
2794 is not found.
2795 """
2797 super(Optional,self).__init__( expr, savelist=False )
2798 self.defaultValue = default
2799 self.mayReturnEmpty = True
2800
2801 - def parseImpl( self, instring, loc, doActions=True ):
2802 try:
2803 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2804 except (ParseException,IndexError):
2805 if self.defaultValue is not _optionalNotMatched:
2806 if self.expr.resultsName:
2807 tokens = ParseResults([ self.defaultValue ])
2808 tokens[self.expr.resultsName] = self.defaultValue
2809 else:
2810 tokens = [ self.defaultValue ]
2811 else:
2812 tokens = []
2813 return loc, tokens
2814
2816 if hasattr(self,"name"):
2817 return self.name
2818
2819 if self.strRepr is None:
2820 self.strRepr = "[" + _ustr(self.expr) + "]"
2821
2822 return self.strRepr
2823
2824
2825 -class SkipTo(ParseElementEnhance):
2826 """Token for skipping over all undefined text until the matched expression is found.
2827 If C{include} is set to true, the matched expression is also parsed (the skipped text
2828 and matched expression are returned as a 2-element list). The C{ignore}
2829 argument is used to define grammars (typically quoted strings and comments) that
2830 might contain false matches.
2831 """
2832 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2833 super( SkipTo, self ).__init__( other )
2834 self.ignoreExpr = ignore
2835 self.mayReturnEmpty = True
2836 self.mayIndexError = False
2837 self.includeMatch = include
2838 self.asList = False
2839 if failOn is not None and isinstance(failOn, basestring):
2840 self.failOn = Literal(failOn)
2841 else:
2842 self.failOn = failOn
2843 self.errmsg = "No match found for "+_ustr(self.expr)
2844
2845 - def parseImpl( self, instring, loc, doActions=True ):
2846 startLoc = loc
2847 instrlen = len(instring)
2848 expr = self.expr
2849 failParse = False
2850 while loc <= instrlen:
2851 try:
2852 if self.failOn:
2853 try:
2854 self.failOn.tryParse(instring, loc)
2855 except ParseBaseException:
2856 pass
2857 else:
2858 failParse = True
2859 raise ParseException(instring, loc, "Found expression " + str(self.failOn))
2860 failParse = False
2861 if self.ignoreExpr is not None:
2862 while 1:
2863 try:
2864 loc = self.ignoreExpr.tryParse(instring,loc)
2865
2866 except ParseBaseException:
2867 break
2868 expr._parse( instring, loc, doActions=False, callPreParse=False )
2869 skipText = instring[startLoc:loc]
2870 if self.includeMatch:
2871 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2872 if mat:
2873 skipRes = ParseResults( skipText )
2874 skipRes += mat
2875 return loc, [ skipRes ]
2876 else:
2877 return loc, [ skipText ]
2878 else:
2879 return loc, [ skipText ]
2880 except (ParseException,IndexError):
2881 if failParse:
2882 raise
2883 else:
2884 loc += 1
2885 raise ParseException(instring, loc, self.errmsg, self)
2886
2887 -class Forward(ParseElementEnhance):
2888 """Forward declaration of an expression to be defined later -
2889 used for recursive grammars, such as algebraic infix notation.
2890 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
2891
2892 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
2893 Specifically, '|' has a lower precedence than '<<', so that::
2894 fwdExpr << a | b | c
2895 will actually be evaluated as::
2896 (fwdExpr << a) | b | c
2897 thereby leaving b and c out as parseable alternatives. It is recommended that you
2898 explicitly group the values inserted into the C{Forward}::
2899 fwdExpr << (a | b | c)
2900 Converting to use the '<<=' operator instead will avoid this problem.
2901 """
2904
2906 if isinstance( other, basestring ):
2907 other = ParserElement.literalStringClass(other)
2908 self.expr = other
2909 self.mayReturnEmpty = other.mayReturnEmpty
2910 self.strRepr = None
2911 self.mayIndexError = self.expr.mayIndexError
2912 self.mayReturnEmpty = self.expr.mayReturnEmpty
2913 self.setWhitespaceChars( self.expr.whiteChars )
2914 self.skipWhitespace = self.expr.skipWhitespace
2915 self.saveAsList = self.expr.saveAsList
2916 self.ignoreExprs.extend(self.expr.ignoreExprs)
2917 return self
2918
2920 return self << other
2921
2923 self.skipWhitespace = False
2924 return self
2925
2927 if not self.streamlined:
2928 self.streamlined = True
2929 if self.expr is not None:
2930 self.expr.streamline()
2931 return self
2932
2933 - def validate( self, validateTrace=[] ):
2934 if self not in validateTrace:
2935 tmp = validateTrace[:]+[self]
2936 if self.expr is not None:
2937 self.expr.validate(tmp)
2938 self.checkRecursion([])
2939
2941 if hasattr(self,"name"):
2942 return self.name
2943
2944 self._revertClass = self.__class__
2945 self.__class__ = _ForwardNoRecurse
2946 try:
2947 if self.expr is not None:
2948 retString = _ustr(self.expr)
2949 else:
2950 retString = "None"
2951 finally:
2952 self.__class__ = self._revertClass
2953 return self.__class__.__name__ + ": " + retString
2954
2956 if self.expr is not None:
2957 return super(Forward,self).copy()
2958 else:
2959 ret = Forward()
2960 ret <<= self
2961 return ret
2962
2966
2968 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
2969 - def __init__( self, expr, savelist=False ):
2972
2973 -class Upcase(TokenConverter):
2974 """Converter to upper case all matching tokens."""
2976 super(Upcase,self).__init__(*args)
2977 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2978 DeprecationWarning,stacklevel=2)
2979
2980 - def postParse( self, instring, loc, tokenlist ):
2981 return list(map( str.upper, tokenlist ))
2982
2983
2985 """Converter to concatenate all matching tokens to a single string.
2986 By default, the matching patterns must also be contiguous in the input string;
2987 this can be disabled by specifying C{'adjacent=False'} in the constructor.
2988 """
2989 - def __init__( self, expr, joinString="", adjacent=True ):
2990 super(Combine,self).__init__( expr )
2991
2992 if adjacent:
2993 self.leaveWhitespace()
2994 self.adjacent = adjacent
2995 self.skipWhitespace = True
2996 self.joinString = joinString
2997 self.callPreparse = True
2998
3005
3006 - def postParse( self, instring, loc, tokenlist ):
3007 retToks = tokenlist.copy()
3008 del retToks[:]
3009 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3010
3011 if self.resultsName and retToks.haskeys():
3012 return [ retToks ]
3013 else:
3014 return retToks
3015
3016 -class Group(TokenConverter):
3017 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3019 super(Group,self).__init__( expr )
3020 self.saveAsList = True
3021
3022 - def postParse( self, instring, loc, tokenlist ):
3023 return [ tokenlist ]
3024
3025 -class Dict(TokenConverter):
3026 """Converter to return a repetitive expression as a list, but also as a dictionary.
3027 Each element can also be referenced using the first token in the expression as its key.
3028 Useful for tabular report scraping when the first column can be used as a item key.
3029 """
3031 super(Dict,self).__init__( expr )
3032 self.saveAsList = True
3033
3034 - def postParse( self, instring, loc, tokenlist ):
3035 for i,tok in enumerate(tokenlist):
3036 if len(tok) == 0:
3037 continue
3038 ikey = tok[0]
3039 if isinstance(ikey,int):
3040 ikey = _ustr(tok[0]).strip()
3041 if len(tok)==1:
3042 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3043 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3044 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3045 else:
3046 dictvalue = tok.copy()
3047 del dictvalue[0]
3048 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
3049 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3050 else:
3051 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3052
3053 if self.resultsName:
3054 return [ tokenlist ]
3055 else:
3056 return tokenlist
3057
3058
3060 """Converter for ignoring the results of a parsed expression."""
3061 - def postParse( self, instring, loc, tokenlist ):
3063
3066
3067
3069 """Wrapper for parse actions, to ensure they are only called once."""
3071 self.callable = _trim_arity(methodCall)
3072 self.called = False
3074 if not self.called:
3075 results = self.callable(s,l,t)
3076 self.called = True
3077 return results
3078 raise ParseException(s,l,"")
3081
3083 """Decorator for debugging parse actions."""
3084 f = _trim_arity(f)
3085 def z(*paArgs):
3086 thisFunc = f.func_name
3087 s,l,t = paArgs[-3:]
3088 if len(paArgs)>3:
3089 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3090 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3091 try:
3092 ret = f(*paArgs)
3093 except Exception as exc:
3094 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3095 raise
3096 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3097 return ret
3098 try:
3099 z.__name__ = f.__name__
3100 except AttributeError:
3101 pass
3102 return z
3103
3104
3105
3106
3108 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3109 By default, the list elements and delimiters can have intervening whitespace, and
3110 comments, but this can be overridden by passing C{combine=True} in the constructor.
3111 If C{combine} is set to C{True}, the matching tokens are returned as a single token
3112 string, with the delimiters included; otherwise, the matching tokens are returned
3113 as a list of tokens, with the delimiters suppressed.
3114 """
3115 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3116 if combine:
3117 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3118 else:
3119 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3120
3122 """Helper to define a counted list of expressions.
3123 This helper defines a pattern of the form::
3124 integer expr expr expr...
3125 where the leading integer tells how many expr expressions follow.
3126 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3127 """
3128 arrayExpr = Forward()
3129 def countFieldParseAction(s,l,t):
3130 n = t[0]
3131 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3132 return []
3133 if intExpr is None:
3134 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
3135 else:
3136 intExpr = intExpr.copy()
3137 intExpr.setName("arrayLen")
3138 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
3139 return ( intExpr + arrayExpr )
3140
3142 ret = []
3143 for i in L:
3144 if isinstance(i,list):
3145 ret.extend(_flatten(i))
3146 else:
3147 ret.append(i)
3148 return ret
3149
3151 """Helper to define an expression that is indirectly defined from
3152 the tokens matched in a previous expression, that is, it looks
3153 for a 'repeat' of a previous expression. For example::
3154 first = Word(nums)
3155 second = matchPreviousLiteral(first)
3156 matchExpr = first + ":" + second
3157 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
3158 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
3159 If this is not desired, use C{matchPreviousExpr}.
3160 Do *not* use with packrat parsing enabled.
3161 """
3162 rep = Forward()
3163 def copyTokenToRepeater(s,l,t):
3164 if t:
3165 if len(t) == 1:
3166 rep << t[0]
3167 else:
3168
3169 tflat = _flatten(t.asList())
3170 rep << And( [ Literal(tt) for tt in tflat ] )
3171 else:
3172 rep << Empty()
3173 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3174 return rep
3175
3177 """Helper to define an expression that is indirectly defined from
3178 the tokens matched in a previous expression, that is, it looks
3179 for a 'repeat' of a previous expression. For example::
3180 first = Word(nums)
3181 second = matchPreviousExpr(first)
3182 matchExpr = first + ":" + second
3183 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
3184 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
3185 the expressions are evaluated first, and then compared, so
3186 C{"1"} is compared with C{"10"}.
3187 Do *not* use with packrat parsing enabled.
3188 """
3189 rep = Forward()
3190 e2 = expr.copy()
3191 rep <<= e2
3192 def copyTokenToRepeater(s,l,t):
3193 matchTokens = _flatten(t.asList())
3194 def mustMatchTheseTokens(s,l,t):
3195 theseTokens = _flatten(t.asList())
3196 if theseTokens != matchTokens:
3197 raise ParseException("",0,"")
3198 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3199 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3200 return rep
3201
3203
3204 for c in r"\^-]":
3205 s = s.replace(c,_bslash+c)
3206 s = s.replace("\n",r"\n")
3207 s = s.replace("\t",r"\t")
3208 return _ustr(s)
3209
3210 -def oneOf( strs, caseless=False, useRegex=True ):
3211 """Helper to quickly define a set of alternative Literals, and makes sure to do
3212 longest-first testing when there is a conflict, regardless of the input order,
3213 but returns a C{L{MatchFirst}} for best performance.
3214
3215 Parameters:
3216 - strs - a string of space-delimited literals, or a list of string literals
3217 - caseless - (default=False) - treat all literals as caseless
3218 - useRegex - (default=True) - as an optimization, will generate a Regex
3219 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
3220 if creating a C{Regex} raises an exception)
3221 """
3222 if caseless:
3223 isequal = ( lambda a,b: a.upper() == b.upper() )
3224 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3225 parseElementClass = CaselessLiteral
3226 else:
3227 isequal = ( lambda a,b: a == b )
3228 masks = ( lambda a,b: b.startswith(a) )
3229 parseElementClass = Literal
3230
3231 if isinstance(strs,basestring):
3232 symbols = strs.split()
3233 elif isinstance(strs, collections.Sequence):
3234 symbols = list(strs[:])
3235 elif isinstance(strs, _generatorType):
3236 symbols = list(strs)
3237 else:
3238 warnings.warn("Invalid argument to oneOf, expected string or list",
3239 SyntaxWarning, stacklevel=2)
3240
3241 i = 0
3242 while i < len(symbols)-1:
3243 cur = symbols[i]
3244 for j,other in enumerate(symbols[i+1:]):
3245 if ( isequal(other, cur) ):
3246 del symbols[i+j+1]
3247 break
3248 elif ( masks(cur, other) ):
3249 del symbols[i+j+1]
3250 symbols.insert(i,other)
3251 cur = other
3252 break
3253 else:
3254 i += 1
3255
3256 if not caseless and useRegex:
3257
3258 try:
3259 if len(symbols)==len("".join(symbols)):
3260 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) )
3261 else:
3262 return Regex( "|".join(re.escape(sym) for sym in symbols) )
3263 except:
3264 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3265 SyntaxWarning, stacklevel=2)
3266
3267
3268
3269 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3270
3272 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3273 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
3274 in the proper order. The key pattern can include delimiting markers or punctuation,
3275 as long as they are suppressed, thereby leaving the significant key text. The value
3276 pattern can include named results, so that the C{Dict} results can include named token
3277 fields.
3278 """
3279 return Dict( ZeroOrMore( Group ( key + value ) ) )
3280
3281 -def originalTextFor(expr, asString=True):
3282 """Helper to return the original, untokenized text for a given expression. Useful to
3283 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3284 revert separate tokens with intervening whitespace back to the original matching
3285 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
3286 require the inspect module to chase up the call stack. By default, returns a
3287 string containing the original parsed text.
3288
3289 If the optional C{asString} argument is passed as C{False}, then the return value is a
3290 C{L{ParseResults}} containing any results names that were originally matched, and a
3291 single token containing the original matched text from the input string. So if
3292 the expression passed to C{L{originalTextFor}} contains expressions with defined
3293 results names, you must set C{asString} to C{False} if you want to preserve those
3294 results name values."""
3295 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3296 endlocMarker = locMarker.copy()
3297 endlocMarker.callPreparse = False
3298 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
3299 if asString:
3300 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3301 else:
3302 def extractText(s,l,t):
3303 del t[:]
3304 t.insert(0, s[t._original_start:t._original_end])
3305 del t["_original_start"]
3306 del t["_original_end"]
3307 matchExpr.setParseAction(extractText)
3308 return matchExpr
3309
3311 """Helper to undo pyparsing's default grouping of And expressions, even
3312 if all but one are non-empty."""
3313 return TokenConverter(expr).setParseAction(lambda t:t[0])
3314
3316 """Helper to decorate a returned token with its starting and ending locations in the input string.
3317 This helper adds the following results names:
3318 - locn_start = location where matched expression begins
3319 - locn_end = location where matched expression ends
3320 - value = the actual parsed results
3321
3322 Be careful if the input text contains C{<TAB>} characters, you may want to call
3323 C{L{ParserElement.parseWithTabs}}
3324 """
3325 locator = Empty().setParseAction(lambda s,l,t: l)
3326 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3327
3328
3329
3330 empty = Empty().setName("empty")
3331 lineStart = LineStart().setName("lineStart")
3332 lineEnd = LineEnd().setName("lineEnd")
3333 stringStart = StringStart().setName("stringStart")
3334 stringEnd = StringEnd().setName("stringEnd")
3335
3336 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3337 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
3338 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
3339 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1)
3340 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3341 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3342
3344 r"""Helper to easily define string ranges for use in Word construction. Borrows
3345 syntax from regexp '[]' string range definitions::
3346 srange("[0-9]") -> "0123456789"
3347 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3348 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3349 The input string must be enclosed in []'s, and the returned string is the expanded
3350 character set joined into a single string.
3351 The values enclosed in the []'s may be::
3352 a single character
3353 an escaped character with a leading backslash (such as \- or \])
3354 an escaped hex character with a leading '\x' (\x21, which is a '!' character)
3355 (\0x## is also supported for backwards compatibility)
3356 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3357 a range of any of the above, separated by a dash ('a-z', etc.)
3358 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3359 """
3360 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
3361 try:
3362 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
3363 except:
3364 return ""
3365
3367 """Helper method for defining parse actions that require matching at a specific
3368 column in the input text.
3369 """
3370 def verifyCol(strg,locn,toks):
3371 if col(locn,strg) != n:
3372 raise ParseException(strg,locn,"matched token not at column %d" % n)
3373 return verifyCol
3374
3376 """Helper method for common parse actions that simply return a literal value. Especially
3377 useful when used with C{L{transformString<ParserElement.transformString>}()}.
3378 """
3379 def _replFunc(*args):
3380 return [replStr]
3381 return _replFunc
3382
3384 """Helper parse action for removing quotation marks from parsed quoted strings.
3385 To use, add this parse action to quoted string using::
3386 quotedString.setParseAction( removeQuotes )
3387 """
3388 return t[0][1:-1]
3389
3391 """Helper parse action to convert tokens to upper case."""
3392 return [ tt.upper() for tt in map(_ustr,t) ]
3393
3395 """Helper parse action to convert tokens to lower case."""
3396 return [ tt.lower() for tt in map(_ustr,t) ]
3397
3398 -def keepOriginalText(s,startLoc,t):
3399 """DEPRECATED - use new helper method C{L{originalTextFor}}.
3400 Helper parse action to preserve original parsed text,
3401 overriding any nested parse actions."""
3402 try:
3403 endloc = getTokensEndLoc()
3404 except ParseException:
3405 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3406 del t[:]
3407 t += ParseResults(s[startLoc:endloc])
3408 return t
3409
3411 """Method to be called from within a parse action to determine the end
3412 location of the parsed tokens."""
3413 import inspect
3414 fstack = inspect.stack()
3415 try:
3416
3417 for f in fstack[2:]:
3418 if f[3] == "_parseNoCache":
3419 endloc = f[0].f_locals["loc"]
3420 return endloc
3421 else:
3422 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3423 finally:
3424 del fstack
3425
3454
3458
3462
3464 """Helper to create a validating parse action to be used with start tags created
3465 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
3466 with a required attribute value, to avoid false matches on common tags such as
3467 C{<TD>} or C{<DIV>}.
3468
3469 Call C{withAttribute} with a series of attribute names and values. Specify the list
3470 of filter attributes names and values as:
3471 - keyword arguments, as in C{(align="right")}, or
3472 - as an explicit dict with C{**} operator, when an attribute name is also a Python
3473 reserved word, as in C{**{"class":"Customer", "align":"right"}}
3474 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3475 For attribute names with a namespace prefix, you must use the second form. Attribute
3476 names are matched insensitive to upper/lower case.
3477
3478 To verify that the attribute exists, but without specifying a value, pass
3479 C{withAttribute.ANY_VALUE} as the value.
3480 """
3481 if args:
3482 attrs = args[:]
3483 else:
3484 attrs = attrDict.items()
3485 attrs = [(k,v) for k,v in attrs]
3486 def pa(s,l,tokens):
3487 for attrName,attrValue in attrs:
3488 if attrName not in tokens:
3489 raise ParseException(s,l,"no matching attribute " + attrName)
3490 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3491 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3492 (attrName, tokens[attrName], attrValue))
3493 return pa
3494 withAttribute.ANY_VALUE = object()
3495
3496 opAssoc = _Constants()
3497 opAssoc.LEFT = object()
3498 opAssoc.RIGHT = object()
3499
3501 """Helper method for constructing grammars of expressions made up of
3502 operators working in a precedence hierarchy. Operators may be unary or
3503 binary, left- or right-associative. Parse actions can also be attached
3504 to operator expressions.
3505
3506 Parameters:
3507 - baseExpr - expression representing the most basic element for the nested
3508 - opList - list of tuples, one for each operator precedence level in the
3509 expression grammar; each tuple is of the form
3510 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3511 - opExpr is the pyparsing expression for the operator;
3512 may also be a string, which will be converted to a Literal;
3513 if numTerms is 3, opExpr is a tuple of two expressions, for the
3514 two operators separating the 3 terms
3515 - numTerms is the number of terms for this operator (must
3516 be 1, 2, or 3)
3517 - rightLeftAssoc is the indicator whether the operator is
3518 right or left associative, using the pyparsing-defined
3519 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
3520 - parseAction is the parse action to be associated with
3521 expressions matching this operator expression (the
3522 parse action tuple member may be omitted)
3523 - lpar - expression for matching left-parentheses (default=Suppress('('))
3524 - rpar - expression for matching right-parentheses (default=Suppress(')'))
3525 """
3526 ret = Forward()
3527 lastExpr = baseExpr | ( lpar + ret + rpar )
3528 for i,operDef in enumerate(opList):
3529 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3530 if arity == 3:
3531 if opExpr is None or len(opExpr) != 2:
3532 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3533 opExpr1, opExpr2 = opExpr
3534 thisExpr = Forward()
3535 if rightLeftAssoc == opAssoc.LEFT:
3536 if arity == 1:
3537 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3538 elif arity == 2:
3539 if opExpr is not None:
3540 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3541 else:
3542 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3543 elif arity == 3:
3544 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3545 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3546 else:
3547 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3548 elif rightLeftAssoc == opAssoc.RIGHT:
3549 if arity == 1:
3550
3551 if not isinstance(opExpr, Optional):
3552 opExpr = Optional(opExpr)
3553 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3554 elif arity == 2:
3555 if opExpr is not None:
3556 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3557 else:
3558 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3559 elif arity == 3:
3560 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3561 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3562 else:
3563 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3564 else:
3565 raise ValueError("operator must indicate right or left associativity")
3566 if pa:
3567 matchExpr.setParseAction( pa )
3568 thisExpr <<= ( matchExpr | lastExpr )
3569 lastExpr = thisExpr
3570 ret <<= lastExpr
3571 return ret
3572 operatorPrecedence = infixNotation
3573
3574 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3575 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3576 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3577 unicodeString = Combine(_L('u') + quotedString.copy())
3578
3580 """Helper method for defining nested lists enclosed in opening and closing
3581 delimiters ("(" and ")" are the default).
3582
3583 Parameters:
3584 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3585 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3586 - content - expression for items within the nested lists (default=None)
3587 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3588
3589 If an expression is not provided for the content argument, the nested
3590 expression will capture all whitespace-delimited content between delimiters
3591 as a list of separate values.
3592
3593 Use the C{ignoreExpr} argument to define expressions that may contain
3594 opening or closing characters that should not be treated as opening
3595 or closing characters for nesting, such as quotedString or a comment
3596 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
3597 The default is L{quotedString}, but if no expressions are to be ignored,
3598 then pass C{None} for this argument.
3599 """
3600 if opener == closer:
3601 raise ValueError("opening and closing strings cannot be the same")
3602 if content is None:
3603 if isinstance(opener,basestring) and isinstance(closer,basestring):
3604 if len(opener) == 1 and len(closer)==1:
3605 if ignoreExpr is not None:
3606 content = (Combine(OneOrMore(~ignoreExpr +
3607 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3608 ).setParseAction(lambda t:t[0].strip()))
3609 else:
3610 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3611 ).setParseAction(lambda t:t[0].strip()))
3612 else:
3613 if ignoreExpr is not None:
3614 content = (Combine(OneOrMore(~ignoreExpr +
3615 ~Literal(opener) + ~Literal(closer) +
3616 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3617 ).setParseAction(lambda t:t[0].strip()))
3618 else:
3619 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3620 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3621 ).setParseAction(lambda t:t[0].strip()))
3622 else:
3623 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3624 ret = Forward()
3625 if ignoreExpr is not None:
3626 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3627 else:
3628 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3629 return ret
3630
3631 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3632 """Helper method for defining space-delimited indentation blocks, such as
3633 those used to define block statements in Python source code.
3634
3635 Parameters:
3636 - blockStatementExpr - expression defining syntax of statement that
3637 is repeated within the indented block
3638 - indentStack - list created by caller to manage indentation stack
3639 (multiple statementWithIndentedBlock expressions within a single grammar
3640 should share a common indentStack)
3641 - indent - boolean indicating whether block must be indented beyond the
3642 the current level; set to False for block of left-most statements
3643 (default=True)
3644
3645 A valid block must contain at least one C{blockStatement}.
3646 """
3647 def checkPeerIndent(s,l,t):
3648 if l >= len(s): return
3649 curCol = col(l,s)
3650 if curCol != indentStack[-1]:
3651 if curCol > indentStack[-1]:
3652 raise ParseFatalException(s,l,"illegal nesting")
3653 raise ParseException(s,l,"not a peer entry")
3654
3655 def checkSubIndent(s,l,t):
3656 curCol = col(l,s)
3657 if curCol > indentStack[-1]:
3658 indentStack.append( curCol )
3659 else:
3660 raise ParseException(s,l,"not a subentry")
3661
3662 def checkUnindent(s,l,t):
3663 if l >= len(s): return
3664 curCol = col(l,s)
3665 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3666 raise ParseException(s,l,"not an unindent")
3667 indentStack.pop()
3668
3669 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3670 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3671 PEER = Empty().setParseAction(checkPeerIndent)
3672 UNDENT = Empty().setParseAction(checkUnindent)
3673 if indent:
3674 smExpr = Group( Optional(NL) +
3675
3676 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3677 else:
3678 smExpr = Group( Optional(NL) +
3679 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3680 blockStatementExpr.ignore(_bslash + LineEnd())
3681 return smExpr
3682
3683 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3684 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3685
3686 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3687 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
3688 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
3689 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3690
3691
3692 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3693
3694 htmlComment = Regex(r"<!--[\s\S]*?-->")
3695 restOfLine = Regex(r".*").leaveWhitespace()
3696 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3697 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3698
3699 javaStyleComment = cppStyleComment
3700 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3701 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
3702 Optional( Word(" \t") +
3703 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3704 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3705
3706
3707 if __name__ == "__main__":
3708
3709 - def test( teststring ):
3710 try:
3711 tokens = simpleSQL.parseString( teststring )
3712 tokenlist = tokens.asList()
3713 print (teststring + "->" + str(tokenlist))
3714 print ("tokens = " + str(tokens))
3715 print ("tokens.columns = " + str(tokens.columns))
3716 print ("tokens.tables = " + str(tokens.tables))
3717 print (tokens.asXML("SQL",True))
3718 except ParseBaseException as err:
3719 print (teststring + "->")
3720 print (err.line)
3721 print (" "*(err.column-1) + "^")
3722 print (err)
3723 print()
3724
3725 selectToken = CaselessLiteral( "select" )
3726 fromToken = CaselessLiteral( "from" )
3727
3728 ident = Word( alphas, alphanums + "_$" )
3729 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3730 columnNameList = Group( delimitedList( columnName ) )
3731 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3732 tableNameList = Group( delimitedList( tableName ) )
3733 simpleSQL = ( selectToken + \
3734 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3735 fromToken + \
3736 tableNameList.setResultsName( "tables" ) )
3737
3738 test( "SELECT * from XYZZY, ABC" )
3739 test( "select * from SYS.XYZZY" )
3740 test( "Select A from Sys.dual" )
3741 test( "Select AA,BB,CC from Sys.dual" )
3742 test( "Select A, B, C from Sys.dual" )
3743 test( "Select A, B, C from Sys.dual" )
3744 test( "Xelect A, B, C from Sys.dual" )
3745 test( "Select A, B, C frox Sys.dual" )
3746 test( "Select" )
3747 test( "Select ^^^ frox Sys.dual" )
3748 test( "Select A, B, C from Sys.dual, Table2 " )
3749