Package pyparsing :: Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing.pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2013  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.0.2" 
  61  __versionTime__ = "13 April 2014 11:10" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  74   
  75  __all__ = [ 
  76  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  77  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  78  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  79  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  80  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  81  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  82  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  83  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  84  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  85  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  86  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  87  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  88  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  89  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  90  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  91  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  92  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 
  93  ] 
  94   
  95  PY_3 = sys.version.startswith('3') 
  96  if PY_3: 
  97      _MAX_INT = sys.maxsize 
  98      basestring = str 
  99      unichr = chr 
 100      _ustr = str 
 101   
 102      # build list of single arg builtins, that can be used as parse actions 
 103      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 104   
 105  else: 
 106      _MAX_INT = sys.maxint 
 107      range = xrange 
 108   
109 - def _ustr(obj):
110 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 111 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 112 then < returns the unicode object | encodes it with the default encoding | ... >. 113 """ 114 if isinstance(obj,unicode): 115 return obj 116 117 try: 118 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 119 # it won't break any existing code. 120 return str(obj) 121 122 except UnicodeEncodeError: 123 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 124 # state that "The return value must be a string object". However, does a 125 # unicode object (being a subclass of basestring) count as a "string 126 # object"? 127 # If so, then return a unicode object: 128 return unicode(obj)
129 # Else encode it... but how? There are many choices... :) 130 # Replace unprintables with escape codes? 131 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 132 # Replace unprintables with question marks? 133 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 134 # ... 135 136 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 137 singleArgBuiltins = [] 138 import __builtin__ 139 for fname in "sum len sorted reversed list tuple set any all min max".split(): 140 try: 141 singleArgBuiltins.append(getattr(__builtin__,fname)) 142 except AttributeError: 143 continue 144 145 _generatorType = type((y for y in range(1))) 146
147 -def _xml_escape(data):
148 """Escape &, <, >, ", ', etc. in a string of data.""" 149 150 # ampersand must be replaced first 151 from_symbols = '&><"\'' 152 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 153 for from_,to_ in zip(from_symbols, to_symbols): 154 data = data.replace(from_, to_) 155 return data
156
157 -class _Constants(object):
158 pass
159 160 alphas = string.ascii_lowercase + string.ascii_uppercase 161 nums = "0123456789" 162 hexnums = nums + "ABCDEFabcdef" 163 alphanums = alphas + nums 164 _bslash = chr(92) 165 printables = "".join(c for c in string.printable if c not in string.whitespace) 166
167 -class ParseBaseException(Exception):
168 """base exception class for all parsing runtime exceptions""" 169 # Performance tuning: we construct a *lot* of these, so keep this 170 # constructor as small and fast as possible
171 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
172 self.loc = loc 173 if msg is None: 174 self.msg = pstr 175 self.pstr = "" 176 else: 177 self.msg = msg 178 self.pstr = pstr 179 self.parserElement = elem
180
181 - def __getattr__( self, aname ):
182 """supported attributes by name are: 183 - lineno - returns the line number of the exception text 184 - col - returns the column number of the exception text 185 - line - returns the line containing the exception text 186 """ 187 if( aname == "lineno" ): 188 return lineno( self.loc, self.pstr ) 189 elif( aname in ("col", "column") ): 190 return col( self.loc, self.pstr ) 191 elif( aname == "line" ): 192 return line( self.loc, self.pstr ) 193 else: 194 raise AttributeError(aname)
195
196 - def __str__( self ):
197 return "%s (at char %d), (line:%d, col:%d)" % \ 198 ( self.msg, self.loc, self.lineno, self.column )
199 - def __repr__( self ):
200 return _ustr(self)
201 - def markInputline( self, markerString = ">!<" ):
202 """Extracts the exception line from the input string, and marks 203 the location of the exception with a special symbol. 204 """ 205 line_str = self.line 206 line_column = self.column - 1 207 if markerString: 208 line_str = "".join((line_str[:line_column], 209 markerString, line_str[line_column:])) 210 return line_str.strip()
211 - def __dir__(self):
212 return "loc msg pstr parserElement lineno col line " \ 213 "markInputline __str__ __repr__".split()
214
215 -class ParseException(ParseBaseException):
216 """exception thrown when parse expressions don't match class; 217 supported attributes by name are: 218 - lineno - returns the line number of the exception text 219 - col - returns the column number of the exception text 220 - line - returns the line containing the exception text 221 """ 222 pass
223
224 -class ParseFatalException(ParseBaseException):
225 """user-throwable exception thrown when inconsistent parse content 226 is found; stops all parsing immediately""" 227 pass
228
229 -class ParseSyntaxException(ParseFatalException):
230 """just like C{L{ParseFatalException}}, but thrown internally when an 231 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 232 an unbacktrackable syntax error has been found"""
233 - def __init__(self, pe):
234 super(ParseSyntaxException, self).__init__( 235 pe.pstr, pe.loc, pe.msg, pe.parserElement)
236 237 #~ class ReparseException(ParseBaseException): 238 #~ """Experimental class - parse actions can raise this exception to cause 239 #~ pyparsing to reparse the input string: 240 #~ - with a modified input string, and/or 241 #~ - with a modified start location 242 #~ Set the values of the ReparseException in the constructor, and raise the 243 #~ exception in a parse action to cause pyparsing to use the new string/location. 244 #~ Setting the values as None causes no change to be made. 245 #~ """ 246 #~ def __init_( self, newstring, restartLoc ): 247 #~ self.newParseText = newstring 248 #~ self.reparseLoc = restartLoc 249
250 -class RecursiveGrammarException(Exception):
251 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
252 - def __init__( self, parseElementList ):
253 self.parseElementTrace = parseElementList
254
255 - def __str__( self ):
256 return "RecursiveGrammarException: %s" % self.parseElementTrace
257
258 -class _ParseResultsWithOffset(object):
259 - def __init__(self,p1,p2):
260 self.tup = (p1,p2)
261 - def __getitem__(self,i):
262 return self.tup[i]
263 - def __repr__(self):
264 return repr(self.tup)
265 - def setOffset(self,i):
266 self.tup = (self.tup[0],i)
267
268 -class ParseResults(object):
269 """Structured parse results, to provide multiple means of access to the parsed data: 270 - as a list (C{len(results)}) 271 - by list index (C{results[0], results[1]}, etc.) 272 - by attribute (C{results.<resultsName>}) 273 """
274 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
275 if isinstance(toklist, cls): 276 return toklist 277 retobj = object.__new__(cls) 278 retobj.__doinit = True 279 return retobj
280 281 # Performance tuning: we construct a *lot* of these, so keep this 282 # constructor as small and fast as possible
283 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
284 if self.__doinit: 285 self.__doinit = False 286 self.__name = None 287 self.__parent = None 288 self.__accumNames = {} 289 if isinstance(toklist, list): 290 self.__toklist = toklist[:] 291 elif isinstance(toklist, _generatorType): 292 self.__toklist = list(toklist) 293 else: 294 self.__toklist = [toklist] 295 self.__tokdict = dict() 296 297 if name is not None and name: 298 if not modal: 299 self.__accumNames[name] = 0 300 if isinstance(name,int): 301 name = _ustr(name) # will always return a str, but use _ustr for consistency 302 self.__name = name 303 if not toklist in (None,'',[]): 304 if isinstance(toklist,basestring): 305 toklist = [ toklist ] 306 if asList: 307 if isinstance(toklist,ParseResults): 308 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 309 else: 310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 311 self[name].__name = name 312 else: 313 try: 314 self[name] = toklist[0] 315 except (KeyError,TypeError,IndexError): 316 self[name] = toklist
317
318 - def __getitem__( self, i ):
319 if isinstance( i, (int,slice) ): 320 return self.__toklist[i] 321 else: 322 if i not in self.__accumNames: 323 return self.__tokdict[i][-1][0] 324 else: 325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
327 - def __setitem__( self, k, v, isinstance=isinstance ):
328 if isinstance(v,_ParseResultsWithOffset): 329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 330 sub = v[0] 331 elif isinstance(k,int): 332 self.__toklist[k] = v 333 sub = v 334 else: 335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 336 sub = v 337 if isinstance(sub,ParseResults): 338 sub.__parent = wkref(self)
339
340 - def __delitem__( self, i ):
341 if isinstance(i,(int,slice)): 342 mylen = len( self.__toklist ) 343 del self.__toklist[i] 344 345 # convert int to slice 346 if isinstance(i, int): 347 if i < 0: 348 i += mylen 349 i = slice(i, i+1) 350 # get removed indices 351 removed = list(range(*i.indices(mylen))) 352 removed.reverse() 353 # fixup indices in token dictionary 354 for name in self.__tokdict: 355 occurrences = self.__tokdict[name] 356 for j in removed: 357 for k, (value, position) in enumerate(occurrences): 358 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 359 else: 360 del self.__tokdict[i]
361
362 - def __contains__( self, k ):
363 return k in self.__tokdict
364
365 - def __len__( self ): return len( self.__toklist )
366 - def __bool__(self): return len( self.__toklist ) > 0
367 __nonzero__ = __bool__
368 - def __iter__( self ): return iter( self.__toklist )
369 - def __reversed__( self ): return iter( self.__toklist[::-1] )
370 - def iterkeys( self ):
371 """Returns all named result keys.""" 372 if hasattr(self.__tokdict, "iterkeys"): 373 return self.__tokdict.iterkeys() 374 else: 375 return iter(self.__tokdict)
376
377 - def itervalues( self ):
378 """Returns all named result values.""" 379 return (self[k] for k in self.iterkeys())
380
381 - def iteritems( self ):
382 return ((k, self[k]) for k in self.iterkeys())
383 384 if PY_3: 385 keys = iterkeys 386 values = itervalues 387 items = iteritems 388 else:
389 - def keys( self ):
390 """Returns all named result keys.""" 391 return list(self.iterkeys())
392
393 - def values( self ):
394 """Returns all named result values.""" 395 return list(self.itervalues())
396
397 - def items( self ):
398 """Returns all named result keys and values as a list of tuples.""" 399 return list(self.iteritems())
400
401 - def haskeys( self ):
402 """Since keys() returns an iterator, this method is helpful in bypassing 403 code that looks for the existence of any defined results names.""" 404 return bool(self.__tokdict)
405
406 - def pop( self, *args, **kwargs):
407 """Removes and returns item at specified index (default=last). 408 Supports both list and dict semantics for pop(). If passed no 409 argument or an integer argument, it will use list semantics 410 and pop tokens from the list of parsed tokens. If passed a 411 non-integer argument (most likely a string), it will use dict 412 semantics and pop the corresponding value from any defined 413 results names. A second default return value argument is 414 supported, just as in dict.pop().""" 415 if not args: 416 args = [-1] 417 if 'default' in kwargs: 418 args.append(kwargs['default']) 419 if (isinstance(args[0], int) or 420 len(args) == 1 or 421 args[0] in self): 422 ret = self[index] 423 del self[index] 424 return ret 425 else: 426 defaultvalue = args[1] 427 return defaultvalue
428
429 - def get(self, key, defaultValue=None):
430 """Returns named result matching the given key, or if there is no 431 such name, then returns the given C{defaultValue} or C{None} if no 432 C{defaultValue} is specified.""" 433 if key in self: 434 return self[key] 435 else: 436 return defaultValue
437
438 - def insert( self, index, insStr ):
439 """Inserts new element at location index in the list of parsed tokens.""" 440 self.__toklist.insert(index, insStr) 441 # fixup indices in token dictionary 442 for name in self.__tokdict: 443 occurrences = self.__tokdict[name] 444 for k, (value, position) in enumerate(occurrences): 445 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
446
447 - def append( self, item ):
448 """Add single element to end of ParseResults list of elements.""" 449 self.__toklist.append(item)
450
451 - def extend( self, itemseq ):
452 """Add sequence of elements to end of ParseResults list of elements.""" 453 if isinstance(itemseq, ParseResults): 454 self += itemseq 455 else: 456 self.__toklist.extend(itemseq)
457
458 - def clear( self ):
459 """Clear all elements and results names.""" 460 del self.__toklist[:] 461 self.__tokdict.clear()
462
463 - def __getattr__( self, name ):
464 try: 465 return self[name] 466 except KeyError: 467 return "" 468 469 if name in self.__tokdict: 470 if name not in self.__accumNames: 471 return self.__tokdict[name][-1][0] 472 else: 473 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 474 else: 475 return ""
476
477 - def __add__( self, other ):
478 ret = self.copy() 479 ret += other 480 return ret
481
482 - def __iadd__( self, other ):
483 if other.__tokdict: 484 offset = len(self.__toklist) 485 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 486 otheritems = other.__tokdict.items() 487 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 488 for (k,vlist) in otheritems for v in vlist] 489 for k,v in otherdictitems: 490 self[k] = v 491 if isinstance(v[0],ParseResults): 492 v[0].__parent = wkref(self) 493 494 self.__toklist += other.__toklist 495 self.__accumNames.update( other.__accumNames ) 496 return self
497
498 - def __radd__(self, other):
499 if isinstance(other,int) and other == 0: 500 return self.copy()
501
502 - def __repr__( self ):
503 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
504
505 - def __str__( self ):
506 out = [] 507 for i in self.__toklist: 508 if isinstance(i, ParseResults): 509 out.append(_ustr(i)) 510 else: 511 out.append(repr(i)) 512 return '[' + ', '.join(out) + ']'
513
514 - def _asStringList( self, sep='' ):
515 out = [] 516 for item in self.__toklist: 517 if out and sep: 518 out.append(sep) 519 if isinstance( item, ParseResults ): 520 out += item._asStringList() 521 else: 522 out.append( _ustr(item) ) 523 return out
524
525 - def asList( self ):
526 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 527 out = [] 528 for res in self.__toklist: 529 if isinstance(res,ParseResults): 530 out.append( res.asList() ) 531 else: 532 out.append( res ) 533 return out
534
535 - def asDict( self ):
536 """Returns the named parse results as dictionary.""" 537 if PY_3: 538 return dict( self.items() ) 539 else: 540 return dict( self.iteritems() )
541
542 - def copy( self ):
543 """Returns a new copy of a C{ParseResults} object.""" 544 ret = ParseResults( self.__toklist ) 545 ret.__tokdict = self.__tokdict.copy() 546 ret.__parent = self.__parent 547 ret.__accumNames.update( self.__accumNames ) 548 ret.__name = self.__name 549 return ret
550
551 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
552 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 553 nl = "\n" 554 out = [] 555 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 556 for v in vlist) 557 nextLevelIndent = indent + " " 558 559 # collapse out indents if formatting is not desired 560 if not formatted: 561 indent = "" 562 nextLevelIndent = "" 563 nl = "" 564 565 selfTag = None 566 if doctag is not None: 567 selfTag = doctag 568 else: 569 if self.__name: 570 selfTag = self.__name 571 572 if not selfTag: 573 if namedItemsOnly: 574 return "" 575 else: 576 selfTag = "ITEM" 577 578 out += [ nl, indent, "<", selfTag, ">" ] 579 580 worklist = self.__toklist 581 for i,res in enumerate(worklist): 582 if isinstance(res,ParseResults): 583 if i in namedItems: 584 out += [ res.asXML(namedItems[i], 585 namedItemsOnly and doctag is None, 586 nextLevelIndent, 587 formatted)] 588 else: 589 out += [ res.asXML(None, 590 namedItemsOnly and doctag is None, 591 nextLevelIndent, 592 formatted)] 593 else: 594 # individual token, see if there is a name for it 595 resTag = None 596 if i in namedItems: 597 resTag = namedItems[i] 598 if not resTag: 599 if namedItemsOnly: 600 continue 601 else: 602 resTag = "ITEM" 603 xmlBodyText = _xml_escape(_ustr(res)) 604 out += [ nl, nextLevelIndent, "<", resTag, ">", 605 xmlBodyText, 606 "</", resTag, ">" ] 607 608 out += [ nl, indent, "</", selfTag, ">" ] 609 return "".join(out)
610
611 - def __lookup(self,sub):
612 for k,vlist in self.__tokdict.items(): 613 for v,loc in vlist: 614 if sub is v: 615 return k 616 return None
617
618 - def getName(self):
619 """Returns the results name for this token expression.""" 620 if self.__name: 621 return self.__name 622 elif self.__parent: 623 par = self.__parent() 624 if par: 625 return par.__lookup(self) 626 else: 627 return None 628 elif (len(self) == 1 and 629 len(self.__tokdict) == 1 and 630 self.__tokdict.values()[0][0][1] in (0,-1)): 631 return self.__tokdict.keys()[0] 632 else: 633 return None
634
635 - def dump(self,indent='',depth=0):
636 """Diagnostic method for listing out the contents of a C{ParseResults}. 637 Accepts an optional C{indent} argument so that this string can be embedded 638 in a nested display of other data.""" 639 out = [] 640 out.append( indent+_ustr(self.asList()) ) 641 items = sorted(self.items()) 642 for k,v in items: 643 if out: 644 out.append('\n') 645 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 646 if isinstance(v,ParseResults): 647 if v.haskeys(): 648 out.append( v.dump(indent,depth+1) ) 649 else: 650 out.append(_ustr(v)) 651 else: 652 out.append(_ustr(v)) 653 return "".join(out)
654
655 - def pprint(self, *args, **kwargs):
656 """Pretty-printer for parsed results as a list, using the C{pprint} module. 657 Accepts additional positional or keyword args as defined for the 658 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 659 pprint.pprint(self.asList(), *args, **kwargs)
660 661 # add support for pickle protocol
662 - def __getstate__(self):
663 return ( self.__toklist, 664 ( self.__tokdict.copy(), 665 self.__parent is not None and self.__parent() or None, 666 self.__accumNames, 667 self.__name ) )
668
669 - def __setstate__(self,state):
670 self.__toklist = state[0] 671 (self.__tokdict, 672 par, 673 inAccumNames, 674 self.__name) = state[1] 675 self.__accumNames = {} 676 self.__accumNames.update(inAccumNames) 677 if par is not None: 678 self.__parent = wkref(par) 679 else: 680 self.__parent = None
681
682 - def __dir__(self):
683 return dir(super(ParseResults,self)) + list(self.keys())
684 685 collections.MutableMapping.register(ParseResults) 686
687 -def col (loc,strg):
688 """Returns current column within a string, counting newlines as line separators. 689 The first column is number 1. 690 691 Note: the default parsing behavior is to expand tabs in the input string 692 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 693 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 694 consistent view of the parsed string, the parse location, and line and column 695 positions within the parsed string. 696 """ 697 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
698
699 -def lineno(loc,strg):
700 """Returns current line number within a string, counting newlines as line separators. 701 The first line is number 1. 702 703 Note: the default parsing behavior is to expand tabs in the input string 704 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 705 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 706 consistent view of the parsed string, the parse location, and line and column 707 positions within the parsed string. 708 """ 709 return strg.count("\n",0,loc) + 1
710
711 -def line( loc, strg ):
712 """Returns the line of text containing loc within a string, counting newlines as line separators. 713 """ 714 lastCR = strg.rfind("\n", 0, loc) 715 nextCR = strg.find("\n", loc) 716 if nextCR >= 0: 717 return strg[lastCR+1:nextCR] 718 else: 719 return strg[lastCR+1:]
720
721 -def _defaultStartDebugAction( instring, loc, expr ):
722 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
723
724 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
725 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
726
727 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
728 print ("Exception raised:" + _ustr(exc))
729
730 -def nullDebugAction(*args):
731 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 732 pass
733 734 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 735 #~ 'decorator to trim function calls to match the arity of the target' 736 #~ def _trim_arity(func, maxargs=3): 737 #~ if func in singleArgBuiltins: 738 #~ return lambda s,l,t: func(t) 739 #~ limit = 0 740 #~ foundArity = False 741 #~ def wrapper(*args): 742 #~ nonlocal limit,foundArity 743 #~ while 1: 744 #~ try: 745 #~ ret = func(*args[limit:]) 746 #~ foundArity = True 747 #~ return ret 748 #~ except TypeError: 749 #~ if limit == maxargs or foundArity: 750 #~ raise 751 #~ limit += 1 752 #~ continue 753 #~ return wrapper 754 755 # this version is Python 2.x-3.x cross-compatible 756 'decorator to trim function calls to match the arity of the target'
757 -def _trim_arity(func, maxargs=2):
758 if func in singleArgBuiltins: 759 return lambda s,l,t: func(t) 760 limit = [0] 761 foundArity = [False] 762 def wrapper(*args): 763 while 1: 764 try: 765 ret = func(*args[limit[0]:]) 766 foundArity[0] = True 767 return ret 768 except TypeError: 769 if limit[0] <= maxargs and not foundArity[0]: 770 limit[0] += 1 771 continue 772 raise
773 return wrapper 774
775 -class ParserElement(object):
776 """Abstract base level parser element class.""" 777 DEFAULT_WHITE_CHARS = " \n\t\r" 778 verbose_stacktrace = False 779
780 - def setDefaultWhitespaceChars( chars ):
781 """Overrides the default whitespace chars 782 """ 783 ParserElement.DEFAULT_WHITE_CHARS = chars
784 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 785
786 - def inlineLiteralsUsing(cls):
787 """ 788 Set class to be used for inclusion of string literals into a parser. 789 """ 790 ParserElement.literalStringClass = cls
791 inlineLiteralsUsing = staticmethod(inlineLiteralsUsing) 792
793 - def __init__( self, savelist=False ):
794 self.parseAction = list() 795 self.failAction = None 796 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 797 self.strRepr = None 798 self.resultsName = None 799 self.saveAsList = savelist 800 self.skipWhitespace = True 801 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 802 self.copyDefaultWhiteChars = True 803 self.mayReturnEmpty = False # used when checking for left-recursion 804 self.keepTabs = False 805 self.ignoreExprs = list() 806 self.debug = False 807 self.streamlined = False 808 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 809 self.errmsg = "" 810 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 811 self.debugActions = ( None, None, None ) #custom debug actions 812 self.re = None 813 self.callPreparse = True # used to avoid redundant calls to preParse 814 self.callDuringTry = False
815
816 - def copy( self ):
817 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 818 for the same parsing pattern, using copies of the original parse element.""" 819 cpy = copy.copy( self ) 820 cpy.parseAction = self.parseAction[:] 821 cpy.ignoreExprs = self.ignoreExprs[:] 822 if self.copyDefaultWhiteChars: 823 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 824 return cpy
825
826 - def setName( self, name ):
827 """Define name for this expression, for use in debugging.""" 828 self.name = name 829 self.errmsg = "Expected " + self.name 830 if hasattr(self,"exception"): 831 self.exception.msg = self.errmsg 832 return self
833
834 - def setResultsName( self, name, listAllMatches=False ):
835 """Define name for referencing matching tokens as a nested attribute 836 of the returned parse results. 837 NOTE: this returns a *copy* of the original C{ParserElement} object; 838 this is so that the client can define a basic element, such as an 839 integer, and reference it in multiple places with different names. 840 841 You can also set results names using the abbreviated syntax, 842 C{expr("name")} in place of C{expr.setResultsName("name")} - 843 see L{I{__call__}<__call__>}. 844 """ 845 newself = self.copy() 846 if name.endswith("*"): 847 name = name[:-1] 848 listAllMatches=True 849 newself.resultsName = name 850 newself.modalResults = not listAllMatches 851 return newself
852
853 - def setBreak(self,breakFlag = True):
854 """Method to invoke the Python pdb debugger when this element is 855 about to be parsed. Set C{breakFlag} to True to enable, False to 856 disable. 857 """ 858 if breakFlag: 859 _parseMethod = self._parse 860 def breaker(instring, loc, doActions=True, callPreParse=True): 861 import pdb 862 pdb.set_trace() 863 return _parseMethod( instring, loc, doActions, callPreParse )
864 breaker._originalParseMethod = _parseMethod 865 self._parse = breaker 866 else: 867 if hasattr(self._parse,"_originalParseMethod"): 868 self._parse = self._parse._originalParseMethod 869 return self
870
871 - def setParseAction( self, *fns, **kwargs ):
872 """Define action to perform when successfully matching parse element definition. 873 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 874 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 875 - s = the original string being parsed (see note below) 876 - loc = the location of the matching substring 877 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 878 If the functions in fns modify the tokens, they can return them as the return 879 value from fn, and the modified list of tokens will replace the original. 880 Otherwise, fn does not need to return any value. 881 882 Note: the default parsing behavior is to expand tabs in the input string 883 before starting the parsing process. See L{I{parseString}<parseString>} for more information 884 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 885 consistent view of the parsed string, the parse location, and line and column 886 positions within the parsed string. 887 """ 888 self.parseAction = list(map(_trim_arity, list(fns))) 889 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 890 return self
891
892 - def addParseAction( self, *fns, **kwargs ):
893 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 894 self.parseAction += list(map(_trim_arity, list(fns))) 895 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 896 return self
897
898 - def setFailAction( self, fn ):
899 """Define action to perform if parsing fails at this expression. 900 Fail acton fn is a callable function that takes the arguments 901 C{fn(s,loc,expr,err)} where: 902 - s = string being parsed 903 - loc = location where expression match was attempted and failed 904 - expr = the parse expression that failed 905 - err = the exception thrown 906 The function returns no value. It may throw C{L{ParseFatalException}} 907 if it is desired to stop parsing immediately.""" 908 self.failAction = fn 909 return self
910
911 - def _skipIgnorables( self, instring, loc ):
912 exprsFound = True 913 while exprsFound: 914 exprsFound = False 915 for e in self.ignoreExprs: 916 try: 917 while 1: 918 loc,dummy = e._parse( instring, loc ) 919 exprsFound = True 920 except ParseException: 921 pass 922 return loc
923
924 - def preParse( self, instring, loc ):
925 if self.ignoreExprs: 926 loc = self._skipIgnorables( instring, loc ) 927 928 if self.skipWhitespace: 929 wt = self.whiteChars 930 instrlen = len(instring) 931 while loc < instrlen and instring[loc] in wt: 932 loc += 1 933 934 return loc
935
936 - def parseImpl( self, instring, loc, doActions=True ):
937 return loc, []
938
939 - def postParse( self, instring, loc, tokenlist ):
940 return tokenlist
941 942 #~ @profile
943 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
944 debugging = ( self.debug ) #and doActions ) 945 946 if debugging or self.failAction: 947 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 948 if (self.debugActions[0] ): 949 self.debugActions[0]( instring, loc, self ) 950 if callPreParse and self.callPreparse: 951 preloc = self.preParse( instring, loc ) 952 else: 953 preloc = loc 954 tokensStart = preloc 955 try: 956 try: 957 loc,tokens = self.parseImpl( instring, preloc, doActions ) 958 except IndexError: 959 raise ParseException( instring, len(instring), self.errmsg, self ) 960 except ParseBaseException as err: 961 #~ print ("Exception raised:", err) 962 if self.debugActions[2]: 963 self.debugActions[2]( instring, tokensStart, self, err ) 964 if self.failAction: 965 self.failAction( instring, tokensStart, self, err ) 966 raise 967 else: 968 if callPreParse and self.callPreparse: 969 preloc = self.preParse( instring, loc ) 970 else: 971 preloc = loc 972 tokensStart = preloc 973 if self.mayIndexError or loc >= len(instring): 974 try: 975 loc,tokens = self.parseImpl( instring, preloc, doActions ) 976 except IndexError: 977 raise ParseException( instring, len(instring), self.errmsg, self ) 978 else: 979 loc,tokens = self.parseImpl( instring, preloc, doActions ) 980 981 tokens = self.postParse( instring, loc, tokens ) 982 983 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 984 if self.parseAction and (doActions or self.callDuringTry): 985 if debugging: 986 try: 987 for fn in self.parseAction: 988 tokens = fn( instring, tokensStart, retTokens ) 989 if tokens is not None: 990 retTokens = ParseResults( tokens, 991 self.resultsName, 992 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 993 modal=self.modalResults ) 994 except ParseBaseException as err: 995 #~ print "Exception raised in user parse action:", err 996 if (self.debugActions[2] ): 997 self.debugActions[2]( instring, tokensStart, self, err ) 998 raise 999 else: 1000 for fn in self.parseAction: 1001 tokens = fn( instring, tokensStart, retTokens ) 1002 if tokens is not None: 1003 retTokens = ParseResults( tokens, 1004 self.resultsName, 1005 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1006 modal=self.modalResults ) 1007 1008 if debugging: 1009 #~ print ("Matched",self,"->",retTokens.asList()) 1010 if (self.debugActions[1] ): 1011 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1012 1013 return loc, retTokens
1014
1015 - def tryParse( self, instring, loc ):
1016 try: 1017 return self._parse( instring, loc, doActions=False )[0] 1018 except ParseFatalException: 1019 raise ParseException( instring, loc, self.errmsg, self)
1020 1021 # this method gets repeatedly called during backtracking with the same arguments - 1022 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1023 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1024 lookup = (self,instring,loc,callPreParse,doActions) 1025 if lookup in ParserElement._exprArgCache: 1026 value = ParserElement._exprArgCache[ lookup ] 1027 if isinstance(value, Exception): 1028 raise value 1029 return (value[0],value[1].copy()) 1030 else: 1031 try: 1032 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1033 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1034 return value 1035 except ParseBaseException as pe: 1036 pe.__traceback__ = None 1037 ParserElement._exprArgCache[ lookup ] = pe 1038 raise
1039 1040 _parse = _parseNoCache 1041 1042 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1043 _exprArgCache = {}
1044 - def resetCache():
1045 ParserElement._exprArgCache.clear()
1046 resetCache = staticmethod(resetCache) 1047 1048 _packratEnabled = False
1049 - def enablePackrat():
1050 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1051 Repeated parse attempts at the same string location (which happens 1052 often in many complex grammars) can immediately return a cached value, 1053 instead of re-executing parsing/validating code. Memoizing is done of 1054 both valid results and parsing exceptions. 1055 1056 This speedup may break existing programs that use parse actions that 1057 have side-effects. For this reason, packrat parsing is disabled when 1058 you first import pyparsing. To activate the packrat feature, your 1059 program must call the class method C{ParserElement.enablePackrat()}. If 1060 your program uses C{psyco} to "compile as you go", you must call 1061 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1062 Python will crash. For best results, call C{enablePackrat()} immediately 1063 after importing pyparsing. 1064 """ 1065 if not ParserElement._packratEnabled: 1066 ParserElement._packratEnabled = True 1067 ParserElement._parse = ParserElement._parseCache
1068 enablePackrat = staticmethod(enablePackrat) 1069
1070 - def parseString( self, instring, parseAll=False ):
1071 """Execute the parse expression with the given string. 1072 This is the main interface to the client code, once the complete 1073 expression has been built. 1074 1075 If you want the grammar to require that the entire input string be 1076 successfully parsed, then set C{parseAll} to True (equivalent to ending 1077 the grammar with C{L{StringEnd()}}). 1078 1079 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1080 in order to report proper column numbers in parse actions. 1081 If the input string contains tabs and 1082 the grammar uses parse actions that use the C{loc} argument to index into the 1083 string being parsed, you can ensure you have a consistent view of the input 1084 string by: 1085 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1086 (see L{I{parseWithTabs}<parseWithTabs>}) 1087 - define your parse action using the full C{(s,loc,toks)} signature, and 1088 reference the input string using the parse action's C{s} argument 1089 - explictly expand the tabs in your input string before calling 1090 C{parseString} 1091 """ 1092 ParserElement.resetCache() 1093 if not self.streamlined: 1094 self.streamline() 1095 #~ self.saveAsList = True 1096 for e in self.ignoreExprs: 1097 e.streamline() 1098 if not self.keepTabs: 1099 instring = instring.expandtabs() 1100 try: 1101 loc, tokens = self._parse( instring, 0 ) 1102 if parseAll: 1103 loc = self.preParse( instring, loc ) 1104 se = Empty() + StringEnd() 1105 se._parse( instring, loc ) 1106 except ParseBaseException as exc: 1107 if ParserElement.verbose_stacktrace: 1108 raise 1109 else: 1110 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1111 raise exc 1112 else: 1113 return tokens
1114
1115 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1116 """Scan the input string for expression matches. Each match will return the 1117 matching tokens, start location, and end location. May be called with optional 1118 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1119 C{overlap} is specified, then overlapping matches will be reported. 1120 1121 Note that the start and end locations are reported relative to the string 1122 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1123 strings with embedded tabs.""" 1124 if not self.streamlined: 1125 self.streamline() 1126 for e in self.ignoreExprs: 1127 e.streamline() 1128 1129 if not self.keepTabs: 1130 instring = _ustr(instring).expandtabs() 1131 instrlen = len(instring) 1132 loc = 0 1133 preparseFn = self.preParse 1134 parseFn = self._parse 1135 ParserElement.resetCache() 1136 matches = 0 1137 try: 1138 while loc <= instrlen and matches < maxMatches: 1139 try: 1140 preloc = preparseFn( instring, loc ) 1141 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1142 except ParseException: 1143 loc = preloc+1 1144 else: 1145 if nextLoc > loc: 1146 matches += 1 1147 yield tokens, preloc, nextLoc 1148 if overlap: 1149 nextloc = preparseFn( instring, loc ) 1150 if nextloc > loc: 1151 loc = nextLoc 1152 else: 1153 loc += 1 1154 else: 1155 loc = nextLoc 1156 else: 1157 loc = preloc+1 1158 except ParseBaseException as exc: 1159 if ParserElement.verbose_stacktrace: 1160 raise 1161 else: 1162 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1163 raise exc
1164
1165 - def transformString( self, instring ):
1166 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1167 be returned from a parse action. To use C{transformString}, define a grammar and 1168 attach a parse action to it that modifies the returned token list. 1169 Invoking C{transformString()} on a target string will then scan for matches, 1170 and replace the matched text patterns according to the logic in the parse 1171 action. C{transformString()} returns the resulting transformed string.""" 1172 out = [] 1173 lastE = 0 1174 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1175 # keep string locs straight between transformString and scanString 1176 self.keepTabs = True 1177 try: 1178 for t,s,e in self.scanString( instring ): 1179 out.append( instring[lastE:s] ) 1180 if t: 1181 if isinstance(t,ParseResults): 1182 out += t.asList() 1183 elif isinstance(t,list): 1184 out += t 1185 else: 1186 out.append(t) 1187 lastE = e 1188 out.append(instring[lastE:]) 1189 out = [o for o in out if o] 1190 return "".join(map(_ustr,_flatten(out))) 1191 except ParseBaseException as exc: 1192 if ParserElement.verbose_stacktrace: 1193 raise 1194 else: 1195 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1196 raise exc
1197
1198 - def searchString( self, instring, maxMatches=_MAX_INT ):
1199 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1200 to match the given parse expression. May be called with optional 1201 C{maxMatches} argument, to clip searching after 'n' matches are found. 1202 """ 1203 try: 1204 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1205 except ParseBaseException as exc: 1206 if ParserElement.verbose_stacktrace: 1207 raise 1208 else: 1209 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1210 raise exc
1211
1212 - def __add__(self, other ):
1213 """Implementation of + operator - returns C{L{And}}""" 1214 if isinstance( other, basestring ): 1215 other = ParserElement.literalStringClass( other ) 1216 if not isinstance( other, ParserElement ): 1217 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1218 SyntaxWarning, stacklevel=2) 1219 return None 1220 return And( [ self, other ] )
1221
1222 - def __radd__(self, other ):
1223 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1224 if isinstance( other, basestring ): 1225 other = ParserElement.literalStringClass( other ) 1226 if not isinstance( other, ParserElement ): 1227 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1228 SyntaxWarning, stacklevel=2) 1229 return None 1230 return other + self
1231
1232 - def __sub__(self, other):
1233 """Implementation of - operator, returns C{L{And}} with error stop""" 1234 if isinstance( other, basestring ): 1235 other = ParserElement.literalStringClass( other ) 1236 if not isinstance( other, ParserElement ): 1237 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1238 SyntaxWarning, stacklevel=2) 1239 return None 1240 return And( [ self, And._ErrorStop(), other ] )
1241
1242 - def __rsub__(self, other ):
1243 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1244 if isinstance( other, basestring ): 1245 other = ParserElement.literalStringClass( other ) 1246 if not isinstance( other, ParserElement ): 1247 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1248 SyntaxWarning, stacklevel=2) 1249 return None 1250 return other - self
1251
1252 - def __mul__(self,other):
1253 """Implementation of * operator, allows use of C{expr * 3} in place of 1254 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1255 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1256 may also include C{None} as in: 1257 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1258 to C{expr*n + L{ZeroOrMore}(expr)} 1259 (read as "at least n instances of C{expr}") 1260 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1261 (read as "0 to n instances of C{expr}") 1262 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1263 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1264 1265 Note that C{expr*(None,n)} does not raise an exception if 1266 more than n exprs exist in the input stream; that is, 1267 C{expr*(None,n)} does not enforce a maximum number of expr 1268 occurrences. If this behavior is desired, then write 1269 C{expr*(None,n) + ~expr} 1270 1271 """ 1272 if isinstance(other,int): 1273 minElements, optElements = other,0 1274 elif isinstance(other,tuple): 1275 other = (other + (None, None))[:2] 1276 if other[0] is None: 1277 other = (0, other[1]) 1278 if isinstance(other[0],int) and other[1] is None: 1279 if other[0] == 0: 1280 return ZeroOrMore(self) 1281 if other[0] == 1: 1282 return OneOrMore(self) 1283 else: 1284 return self*other[0] + ZeroOrMore(self) 1285 elif isinstance(other[0],int) and isinstance(other[1],int): 1286 minElements, optElements = other 1287 optElements -= minElements 1288 else: 1289 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1290 else: 1291 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1292 1293 if minElements < 0: 1294 raise ValueError("cannot multiply ParserElement by negative value") 1295 if optElements < 0: 1296 raise ValueError("second tuple value must be greater or equal to first tuple value") 1297 if minElements == optElements == 0: 1298 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1299 1300 if (optElements): 1301 def makeOptionalList(n): 1302 if n>1: 1303 return Optional(self + makeOptionalList(n-1)) 1304 else: 1305 return Optional(self)
1306 if minElements: 1307 if minElements == 1: 1308 ret = self + makeOptionalList(optElements) 1309 else: 1310 ret = And([self]*minElements) + makeOptionalList(optElements) 1311 else: 1312 ret = makeOptionalList(optElements) 1313 else: 1314 if minElements == 1: 1315 ret = self 1316 else: 1317 ret = And([self]*minElements) 1318 return ret 1319
1320 - def __rmul__(self, other):
1321 return self.__mul__(other)
1322
1323 - def __or__(self, other ):
1324 """Implementation of | operator - returns C{L{MatchFirst}}""" 1325 if isinstance( other, basestring ): 1326 other = ParserElement.literalStringClass( other ) 1327 if not isinstance( other, ParserElement ): 1328 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1329 SyntaxWarning, stacklevel=2) 1330 return None 1331 return MatchFirst( [ self, other ] )
1332
1333 - def __ror__(self, other ):
1334 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1335 if isinstance( other, basestring ): 1336 other = ParserElement.literalStringClass( other ) 1337 if not isinstance( other, ParserElement ): 1338 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1339 SyntaxWarning, stacklevel=2) 1340 return None 1341 return other | self
1342
1343 - def __xor__(self, other ):
1344 """Implementation of ^ operator - returns C{L{Or}}""" 1345 if isinstance( other, basestring ): 1346 other = ParserElement.literalStringClass( other ) 1347 if not isinstance( other, ParserElement ): 1348 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1349 SyntaxWarning, stacklevel=2) 1350 return None 1351 return Or( [ self, other ] )
1352
1353 - def __rxor__(self, other ):
1354 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1355 if isinstance( other, basestring ): 1356 other = ParserElement.literalStringClass( other ) 1357 if not isinstance( other, ParserElement ): 1358 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1359 SyntaxWarning, stacklevel=2) 1360 return None 1361 return other ^ self
1362
1363 - def __and__(self, other ):
1364 """Implementation of & operator - returns C{L{Each}}""" 1365 if isinstance( other, basestring ): 1366 other = ParserElement.literalStringClass( other ) 1367 if not isinstance( other, ParserElement ): 1368 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1369 SyntaxWarning, stacklevel=2) 1370 return None 1371 return Each( [ self, other ] )
1372
1373 - def __rand__(self, other ):
1374 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1375 if isinstance( other, basestring ): 1376 other = ParserElement.literalStringClass( other ) 1377 if not isinstance( other, ParserElement ): 1378 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1379 SyntaxWarning, stacklevel=2) 1380 return None 1381 return other & self
1382
1383 - def __invert__( self ):
1384 """Implementation of ~ operator - returns C{L{NotAny}}""" 1385 return NotAny( self )
1386
1387 - def __call__(self, name=None):
1388 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1389 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1390 could be written as:: 1391 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1392 1393 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1394 passed as C{True}. 1395 1396 If C{name} is omitted, same as calling C{L{copy}}. 1397 """ 1398 if name is not None: 1399 return self.setResultsName(name) 1400 else: 1401 return self.copy()
1402
1403 - def suppress( self ):
1404 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1405 cluttering up returned output. 1406 """ 1407 return Suppress( self )
1408
1409 - def leaveWhitespace( self ):
1410 """Disables the skipping of whitespace before matching the characters in the 1411 C{ParserElement}'s defined pattern. This is normally only used internally by 1412 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1413 """ 1414 self.skipWhitespace = False 1415 return self
1416
1417 - def setWhitespaceChars( self, chars ):
1418 """Overrides the default whitespace chars 1419 """ 1420 self.skipWhitespace = True 1421 self.whiteChars = chars 1422 self.copyDefaultWhiteChars = False 1423 return self
1424
1425 - def parseWithTabs( self ):
1426 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1427 Must be called before C{parseString} when the input grammar contains elements that 1428 match C{<TAB>} characters.""" 1429 self.keepTabs = True 1430 return self
1431
1432 - def ignore( self, other ):
1433 """Define expression to be ignored (e.g., comments) while doing pattern 1434 matching; may be called repeatedly, to define multiple comment or other 1435 ignorable patterns. 1436 """ 1437 if isinstance( other, Suppress ): 1438 if other not in self.ignoreExprs: 1439 self.ignoreExprs.append( other.copy() ) 1440 else: 1441 self.ignoreExprs.append( Suppress( other.copy() ) ) 1442 return self
1443
1444 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1445 """Enable display of debugging messages while doing pattern matching.""" 1446 self.debugActions = (startAction or _defaultStartDebugAction, 1447 successAction or _defaultSuccessDebugAction, 1448 exceptionAction or _defaultExceptionDebugAction) 1449 self.debug = True 1450 return self
1451
1452 - def setDebug( self, flag=True ):
1453 """Enable display of debugging messages while doing pattern matching. 1454 Set C{flag} to True to enable, False to disable.""" 1455 if flag: 1456 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1457 else: 1458 self.debug = False 1459 return self
1460
1461 - def __str__( self ):
1462 return self.name
1463
1464 - def __repr__( self ):
1465 return _ustr(self)
1466
1467 - def streamline( self ):
1468 self.streamlined = True 1469 self.strRepr = None 1470 return self
1471
1472 - def checkRecursion( self, parseElementList ):
1473 pass
1474
1475 - def validate( self, validateTrace=[] ):
1476 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1477 self.checkRecursion( [] )
1478
1479 - def parseFile( self, file_or_filename, parseAll=False ):
1480 """Execute the parse expression on the given file or filename. 1481 If a filename is specified (instead of a file object), 1482 the entire file is opened, read, and closed before parsing. 1483 """ 1484 try: 1485 file_contents = file_or_filename.read() 1486 except AttributeError: 1487 f = open(file_or_filename, "r") 1488 file_contents = f.read() 1489 f.close() 1490 try: 1491 return self.parseString(file_contents, parseAll) 1492 except ParseBaseException as exc: 1493 if ParserElement.verbose_stacktrace: 1494 raise 1495 else: 1496 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1497 raise exc
1498
1499 - def __eq__(self,other):
1500 if isinstance(other, ParserElement): 1501 return self is other or self.__dict__ == other.__dict__ 1502 elif isinstance(other, basestring): 1503 try: 1504 self.parseString(_ustr(other), parseAll=True) 1505 return True 1506 except ParseBaseException: 1507 return False 1508 else: 1509 return super(ParserElement,self)==other
1510
1511 - def __ne__(self,other):
1512 return not (self == other)
1513
1514 - def __hash__(self):
1515 return hash(id(self))
1516
1517 - def __req__(self,other):
1518 return self == other
1519
1520 - def __rne__(self,other):
1521 return not (self == other)
1522 1523
1524 -class Token(ParserElement):
1525 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1526 - def __init__( self ):
1527 super(Token,self).__init__( savelist=False )
1528
1529 - def setName(self, name):
1530 s = super(Token,self).setName(name) 1531 self.errmsg = "Expected " + self.name 1532 return s
1533 1534
1535 -class Empty(Token):
1536 """An empty token, will always match."""
1537 - def __init__( self ):
1538 super(Empty,self).__init__() 1539 self.name = "Empty" 1540 self.mayReturnEmpty = True 1541 self.mayIndexError = False
1542 1543
1544 -class NoMatch(Token):
1545 """A token that will never match."""
1546 - def __init__( self ):
1547 super(NoMatch,self).__init__() 1548 self.name = "NoMatch" 1549 self.mayReturnEmpty = True 1550 self.mayIndexError = False 1551 self.errmsg = "Unmatchable token"
1552
1553 - def parseImpl( self, instring, loc, doActions=True ):
1554 raise ParseException(instring, loc, self.errmsg, self)
1555 1556
1557 -class Literal(Token):
1558 """Token to exactly match a specified string."""
1559 - def __init__( self, matchString ):
1560 super(Literal,self).__init__() 1561 self.match = matchString 1562 self.matchLen = len(matchString) 1563 try: 1564 self.firstMatchChar = matchString[0] 1565 except IndexError: 1566 warnings.warn("null string passed to Literal; use Empty() instead", 1567 SyntaxWarning, stacklevel=2) 1568 self.__class__ = Empty 1569 self.name = '"%s"' % _ustr(self.match) 1570 self.errmsg = "Expected " + self.name 1571 self.mayReturnEmpty = False 1572 self.mayIndexError = False
1573 1574 # Performance tuning: this routine gets called a *lot* 1575 # if this is a single character match string and the first character matches, 1576 # short-circuit as quickly as possible, and avoid calling startswith 1577 #~ @profile
1578 - def parseImpl( self, instring, loc, doActions=True ):
1579 if (instring[loc] == self.firstMatchChar and 1580 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1581 return loc+self.matchLen, self.match 1582 raise ParseException(instring, loc, self.errmsg, self)
1583 _L = Literal 1584 ParserElement.literalStringClass = Literal 1585
1586 -class Keyword(Token):
1587 """Token to exactly match a specified string as a keyword, that is, it must be 1588 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1589 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1590 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1591 Accepts two optional constructor arguments in addition to the keyword string: 1592 C{identChars} is a string of characters that would be valid identifier characters, 1593 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1594 matching, default is C{False}. 1595 """ 1596 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1597
1598 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1599 super(Keyword,self).__init__() 1600 self.match = matchString 1601 self.matchLen = len(matchString) 1602 try: 1603 self.firstMatchChar = matchString[0] 1604 except IndexError: 1605 warnings.warn("null string passed to Keyword; use Empty() instead", 1606 SyntaxWarning, stacklevel=2) 1607 self.name = '"%s"' % self.match 1608 self.errmsg = "Expected " + self.name 1609 self.mayReturnEmpty = False 1610 self.mayIndexError = False 1611 self.caseless = caseless 1612 if caseless: 1613 self.caselessmatch = matchString.upper() 1614 identChars = identChars.upper() 1615 self.identChars = set(identChars)
1616
1617 - def parseImpl( self, instring, loc, doActions=True ):
1618 if self.caseless: 1619 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1620 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1621 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1622 return loc+self.matchLen, self.match 1623 else: 1624 if (instring[loc] == self.firstMatchChar and 1625 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1626 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1627 (loc == 0 or instring[loc-1] not in self.identChars) ): 1628 return loc+self.matchLen, self.match 1629 raise ParseException(instring, loc, self.errmsg, self)
1630
1631 - def copy(self):
1632 c = super(Keyword,self).copy() 1633 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1634 return c
1635
1636 - def setDefaultKeywordChars( chars ):
1637 """Overrides the default Keyword chars 1638 """ 1639 Keyword.DEFAULT_KEYWORD_CHARS = chars
1640 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1641
1642 -class CaselessLiteral(Literal):
1643 """Token to match a specified string, ignoring case of letters. 1644 Note: the matched results will always be in the case of the given 1645 match string, NOT the case of the input text. 1646 """
1647 - def __init__( self, matchString ):
1648 super(CaselessLiteral,self).__init__( matchString.upper() ) 1649 # Preserve the defining literal. 1650 self.returnString = matchString 1651 self.name = "'%s'" % self.returnString 1652 self.errmsg = "Expected " + self.name
1653
1654 - def parseImpl( self, instring, loc, doActions=True ):
1655 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1656 return loc+self.matchLen, self.returnString 1657 raise ParseException(instring, loc, self.errmsg, self)
1658
1659 -class CaselessKeyword(Keyword):
1660 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1661 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1662
1663 - def parseImpl( self, instring, loc, doActions=True ):
1664 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1665 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1666 return loc+self.matchLen, self.match 1667 raise ParseException(instring, loc, self.errmsg, self)
1668
1669 -class Word(Token):
1670 """Token for matching words composed of allowed character sets. 1671 Defined with string containing all allowed initial characters, 1672 an optional string containing allowed body characters (if omitted, 1673 defaults to the initial character set), and an optional minimum, 1674 maximum, and/or exact length. The default value for C{min} is 1 (a 1675 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1676 are 0, meaning no maximum or exact length restriction. An optional 1677 C{exclude} parameter can list characters that might be found in 1678 the input C{bodyChars} string; useful to define a word of all printables 1679 except for one or two characters, for instance. 1680 """
1681 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1682 super(Word,self).__init__() 1683 if excludeChars: 1684 initChars = ''.join(c for c in initChars if c not in excludeChars) 1685 if bodyChars: 1686 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1687 self.initCharsOrig = initChars 1688 self.initChars = set(initChars) 1689 if bodyChars : 1690 self.bodyCharsOrig = bodyChars 1691 self.bodyChars = set(bodyChars) 1692 else: 1693 self.bodyCharsOrig = initChars 1694 self.bodyChars = set(initChars) 1695 1696 self.maxSpecified = max > 0 1697 1698 if min < 1: 1699 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1700 1701 self.minLen = min 1702 1703 if max > 0: 1704 self.maxLen = max 1705 else: 1706 self.maxLen = _MAX_INT 1707 1708 if exact > 0: 1709 self.maxLen = exact 1710 self.minLen = exact 1711 1712 self.name = _ustr(self) 1713 self.errmsg = "Expected " + self.name 1714 self.mayIndexError = False 1715 self.asKeyword = asKeyword 1716 1717 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1718 if self.bodyCharsOrig == self.initCharsOrig: 1719 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1720 elif len(self.bodyCharsOrig) == 1: 1721 self.reString = "%s[%s]*" % \ 1722 (re.escape(self.initCharsOrig), 1723 _escapeRegexRangeChars(self.bodyCharsOrig),) 1724 else: 1725 self.reString = "[%s][%s]*" % \ 1726 (_escapeRegexRangeChars(self.initCharsOrig), 1727 _escapeRegexRangeChars(self.bodyCharsOrig),) 1728 if self.asKeyword: 1729 self.reString = r"\b"+self.reString+r"\b" 1730 try: 1731 self.re = re.compile( self.reString ) 1732 except: 1733 self.re = None
1734
1735 - def parseImpl( self, instring, loc, doActions=True ):
1736 if self.re: 1737 result = self.re.match(instring,loc) 1738 if not result: 1739 raise ParseException(instring, loc, self.errmsg, self) 1740 1741 loc = result.end() 1742 return loc, result.group() 1743 1744 if not(instring[ loc ] in self.initChars): 1745 raise ParseException(instring, loc, self.errmsg, self) 1746 1747 start = loc 1748 loc += 1 1749 instrlen = len(instring) 1750 bodychars = self.bodyChars 1751 maxloc = start + self.maxLen 1752 maxloc = min( maxloc, instrlen ) 1753 while loc < maxloc and instring[loc] in bodychars: 1754 loc += 1 1755 1756 throwException = False 1757 if loc - start < self.minLen: 1758 throwException = True 1759 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1760 throwException = True 1761 if self.asKeyword: 1762 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1763 throwException = True 1764 1765 if throwException: 1766 raise ParseException(instring, loc, self.errmsg, self) 1767 1768 return loc, instring[start:loc]
1769
1770 - def __str__( self ):
1771 try: 1772 return super(Word,self).__str__() 1773 except: 1774 pass 1775 1776 1777 if self.strRepr is None: 1778 1779 def charsAsStr(s): 1780 if len(s)>4: 1781 return s[:4]+"..." 1782 else: 1783 return s
1784 1785 if ( self.initCharsOrig != self.bodyCharsOrig ): 1786 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1787 else: 1788 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1789 1790 return self.strRepr
1791 1792
1793 -class Regex(Token):
1794 """Token for matching strings that match a given regular expression. 1795 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1796 """ 1797 compiledREtype = type(re.compile("[A-Z]"))
1798 - def __init__( self, pattern, flags=0):
1799 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1800 super(Regex,self).__init__() 1801 1802 if isinstance(pattern, basestring): 1803 if len(pattern) == 0: 1804 warnings.warn("null string passed to Regex; use Empty() instead", 1805 SyntaxWarning, stacklevel=2) 1806 1807 self.pattern = pattern 1808 self.flags = flags 1809 1810 try: 1811 self.re = re.compile(self.pattern, self.flags) 1812 self.reString = self.pattern 1813 except sre_constants.error: 1814 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1815 SyntaxWarning, stacklevel=2) 1816 raise 1817 1818 elif isinstance(pattern, Regex.compiledREtype): 1819 self.re = pattern 1820 self.pattern = \ 1821 self.reString = str(pattern) 1822 self.flags = flags 1823 1824 else: 1825 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1826 1827 self.name = _ustr(self) 1828 self.errmsg = "Expected " + self.name 1829 self.mayIndexError = False 1830 self.mayReturnEmpty = True
1831
1832 - def parseImpl( self, instring, loc, doActions=True ):
1833 result = self.re.match(instring,loc) 1834 if not result: 1835 raise ParseException(instring, loc, self.errmsg, self) 1836 1837 loc = result.end() 1838 d = result.groupdict() 1839 ret = ParseResults(result.group()) 1840 if d: 1841 for k in d: 1842 ret[k] = d[k] 1843 return loc,ret
1844
1845 - def __str__( self ):
1846 try: 1847 return super(Regex,self).__str__() 1848 except: 1849 pass 1850 1851 if self.strRepr is None: 1852 self.strRepr = "Re:(%s)" % repr(self.pattern) 1853 1854 return self.strRepr
1855 1856
1857 -class QuotedString(Token):
1858 """Token for matching strings that are delimited by quoting characters. 1859 """
1860 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1861 """ 1862 Defined with the following parameters: 1863 - quoteChar - string of one or more characters defining the quote delimiting string 1864 - escChar - character to escape quotes, typically backslash (default=None) 1865 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1866 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 1867 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 1868 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 1869 """ 1870 super(QuotedString,self).__init__() 1871 1872 # remove white space from quote chars - wont work anyway 1873 quoteChar = quoteChar.strip() 1874 if len(quoteChar) == 0: 1875 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1876 raise SyntaxError() 1877 1878 if endQuoteChar is None: 1879 endQuoteChar = quoteChar 1880 else: 1881 endQuoteChar = endQuoteChar.strip() 1882 if len(endQuoteChar) == 0: 1883 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1884 raise SyntaxError() 1885 1886 self.quoteChar = quoteChar 1887 self.quoteCharLen = len(quoteChar) 1888 self.firstQuoteChar = quoteChar[0] 1889 self.endQuoteChar = endQuoteChar 1890 self.endQuoteCharLen = len(endQuoteChar) 1891 self.escChar = escChar 1892 self.escQuote = escQuote 1893 self.unquoteResults = unquoteResults 1894 1895 if multiline: 1896 self.flags = re.MULTILINE | re.DOTALL 1897 self.pattern = r'%s(?:[^%s%s]' % \ 1898 ( re.escape(self.quoteChar), 1899 _escapeRegexRangeChars(self.endQuoteChar[0]), 1900 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1901 else: 1902 self.flags = 0 1903 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1904 ( re.escape(self.quoteChar), 1905 _escapeRegexRangeChars(self.endQuoteChar[0]), 1906 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1907 if len(self.endQuoteChar) > 1: 1908 self.pattern += ( 1909 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1910 _escapeRegexRangeChars(self.endQuoteChar[i])) 1911 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 1912 ) 1913 if escQuote: 1914 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1915 if escChar: 1916 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1917 charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') 1918 self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) 1919 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1920 1921 try: 1922 self.re = re.compile(self.pattern, self.flags) 1923 self.reString = self.pattern 1924 except sre_constants.error: 1925 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1926 SyntaxWarning, stacklevel=2) 1927 raise 1928 1929 self.name = _ustr(self) 1930 self.errmsg = "Expected " + self.name 1931 self.mayIndexError = False 1932 self.mayReturnEmpty = True
1933
1934 - def parseImpl( self, instring, loc, doActions=True ):
1935 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1936 if not result: 1937 raise ParseException(instring, loc, self.errmsg, self) 1938 1939 loc = result.end() 1940 ret = result.group() 1941 1942 if self.unquoteResults: 1943 1944 # strip off quotes 1945 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1946 1947 if isinstance(ret,basestring): 1948 # replace escaped characters 1949 if self.escChar: 1950 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1951 1952 # replace escaped quotes 1953 if self.escQuote: 1954 ret = ret.replace(self.escQuote, self.endQuoteChar) 1955 1956 return loc, ret
1957
1958 - def __str__( self ):
1959 try: 1960 return super(QuotedString,self).__str__() 1961 except: 1962 pass 1963 1964 if self.strRepr is None: 1965 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1966 1967 return self.strRepr
1968 1969
1970 -class CharsNotIn(Token):
1971 """Token for matching words composed of characters *not* in a given set. 1972 Defined with string containing all disallowed characters, and an optional 1973 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 1974 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1975 are 0, meaning no maximum or exact length restriction. 1976 """
1977 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1978 super(CharsNotIn,self).__init__() 1979 self.skipWhitespace = False 1980 self.notChars = notChars 1981 1982 if min < 1: 1983 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 1984 1985 self.minLen = min 1986 1987 if max > 0: 1988 self.maxLen = max 1989 else: 1990 self.maxLen = _MAX_INT 1991 1992 if exact > 0: 1993 self.maxLen = exact 1994 self.minLen = exact 1995 1996 self.name = _ustr(self) 1997 self.errmsg = "Expected " + self.name 1998 self.mayReturnEmpty = ( self.minLen == 0 ) 1999 self.mayIndexError = False
2000
2001 - def parseImpl( self, instring, loc, doActions=True ):
2002 if instring[loc] in self.notChars: 2003 raise ParseException(instring, loc, self.errmsg, self) 2004 2005 start = loc 2006 loc += 1 2007 notchars = self.notChars 2008 maxlen = min( start+self.maxLen, len(instring) ) 2009 while loc < maxlen and \ 2010 (instring[loc] not in notchars): 2011 loc += 1 2012 2013 if loc - start < self.minLen: 2014 raise ParseException(instring, loc, self.errmsg, self) 2015 2016 return loc, instring[start:loc]
2017
2018 - def __str__( self ):
2019 try: 2020 return super(CharsNotIn, self).__str__() 2021 except: 2022 pass 2023 2024 if self.strRepr is None: 2025 if len(self.notChars) > 4: 2026 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2027 else: 2028 self.strRepr = "!W:(%s)" % self.notChars 2029 2030 return self.strRepr
2031
2032 -class White(Token):
2033 """Special matching class for matching whitespace. Normally, whitespace is ignored 2034 by pyparsing grammars. This class is included when some whitespace structures 2035 are significant. Define with a string containing the whitespace characters to be 2036 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2037 as defined for the C{L{Word}} class.""" 2038 whiteStrs = { 2039 " " : "<SPC>", 2040 "\t": "<TAB>", 2041 "\n": "<LF>", 2042 "\r": "<CR>", 2043 "\f": "<FF>", 2044 }
2045 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2046 super(White,self).__init__() 2047 self.matchWhite = ws 2048 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2049 #~ self.leaveWhitespace() 2050 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2051 self.mayReturnEmpty = True 2052 self.errmsg = "Expected " + self.name 2053 2054 self.minLen = min 2055 2056 if max > 0: 2057 self.maxLen = max 2058 else: 2059 self.maxLen = _MAX_INT 2060 2061 if exact > 0: 2062 self.maxLen = exact 2063 self.minLen = exact
2064
2065 - def parseImpl( self, instring, loc, doActions=True ):
2066 if not(instring[ loc ] in self.matchWhite): 2067 raise ParseException(instring, loc, self.errmsg, self) 2068 start = loc 2069 loc += 1 2070 maxloc = start + self.maxLen 2071 maxloc = min( maxloc, len(instring) ) 2072 while loc < maxloc and instring[loc] in self.matchWhite: 2073 loc += 1 2074 2075 if loc - start < self.minLen: 2076 raise ParseException(instring, loc, self.errmsg, self) 2077 2078 return loc, instring[start:loc]
2079 2080
2081 -class _PositionToken(Token):
2082 - def __init__( self ):
2083 super(_PositionToken,self).__init__() 2084 self.name=self.__class__.__name__ 2085 self.mayReturnEmpty = True 2086 self.mayIndexError = False
2087
2088 -class GoToColumn(_PositionToken):
2089 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2090 - def __init__( self, colno ):
2091 super(GoToColumn,self).__init__() 2092 self.col = colno
2093
2094 - def preParse( self, instring, loc ):
2095 if col(loc,instring) != self.col: 2096 instrlen = len(instring) 2097 if self.ignoreExprs: 2098 loc = self._skipIgnorables( instring, loc ) 2099 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2100 loc += 1 2101 return loc
2102
2103 - def parseImpl( self, instring, loc, doActions=True ):
2104 thiscol = col( loc, instring ) 2105 if thiscol > self.col: 2106 raise ParseException( instring, loc, "Text not in expected column", self ) 2107 newloc = loc + self.col - thiscol 2108 ret = instring[ loc: newloc ] 2109 return newloc, ret
2110
2111 -class LineStart(_PositionToken):
2112 """Matches if current position is at the beginning of a line within the parse string"""
2113 - def __init__( self ):
2114 super(LineStart,self).__init__() 2115 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2116 self.errmsg = "Expected start of line"
2117
2118 - def preParse( self, instring, loc ):
2119 preloc = super(LineStart,self).preParse(instring,loc) 2120 if instring[preloc] == "\n": 2121 loc += 1 2122 return loc
2123
2124 - def parseImpl( self, instring, loc, doActions=True ):
2125 if not( loc==0 or 2126 (loc == self.preParse( instring, 0 )) or 2127 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2128 raise ParseException(instring, loc, self.errmsg, self) 2129 return loc, []
2130
2131 -class LineEnd(_PositionToken):
2132 """Matches if current position is at the end of a line within the parse string"""
2133 - def __init__( self ):
2134 super(LineEnd,self).__init__() 2135 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2136 self.errmsg = "Expected end of line"
2137
2138 - def parseImpl( self, instring, loc, doActions=True ):
2139 if loc<len(instring): 2140 if instring[loc] == "\n": 2141 return loc+1, "\n" 2142 else: 2143 raise ParseException(instring, loc, self.errmsg, self) 2144 elif loc == len(instring): 2145 return loc+1, [] 2146 else: 2147 raise ParseException(instring, loc, self.errmsg, self)
2148
2149 -class StringStart(_PositionToken):
2150 """Matches if current position is at the beginning of the parse string"""
2151 - def __init__( self ):
2152 super(StringStart,self).__init__() 2153 self.errmsg = "Expected start of text"
2154
2155 - def parseImpl( self, instring, loc, doActions=True ):
2156 if loc != 0: 2157 # see if entire string up to here is just whitespace and ignoreables 2158 if loc != self.preParse( instring, 0 ): 2159 raise ParseException(instring, loc, self.errmsg, self) 2160 return loc, []
2161
2162 -class StringEnd(_PositionToken):
2163 """Matches if current position is at the end of the parse string"""
2164 - def __init__( self ):
2165 super(StringEnd,self).__init__() 2166 self.errmsg = "Expected end of text"
2167
2168 - def parseImpl( self, instring, loc, doActions=True ):
2169 if loc < len(instring): 2170 raise ParseException(instring, loc, self.errmsg, self) 2171 elif loc == len(instring): 2172 return loc+1, [] 2173 elif loc > len(instring): 2174 return loc, [] 2175 else: 2176 raise ParseException(instring, loc, self.errmsg, self)
2177
2178 -class WordStart(_PositionToken):
2179 """Matches if the current position is at the beginning of a Word, and 2180 is not preceded by any character in a given set of C{wordChars} 2181 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2182 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2183 the string being parsed, or at the beginning of a line. 2184 """
2185 - def __init__(self, wordChars = printables):
2186 super(WordStart,self).__init__() 2187 self.wordChars = set(wordChars) 2188 self.errmsg = "Not at the start of a word"
2189
2190 - def parseImpl(self, instring, loc, doActions=True ):
2191 if loc != 0: 2192 if (instring[loc-1] in self.wordChars or 2193 instring[loc] not in self.wordChars): 2194 raise ParseException(instring, loc, self.errmsg, self) 2195 return loc, []
2196
2197 -class WordEnd(_PositionToken):
2198 """Matches if the current position is at the end of a Word, and 2199 is not followed by any character in a given set of C{wordChars} 2200 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2201 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2202 the string being parsed, or at the end of a line. 2203 """
2204 - def __init__(self, wordChars = printables):
2205 super(WordEnd,self).__init__() 2206 self.wordChars = set(wordChars) 2207 self.skipWhitespace = False 2208 self.errmsg = "Not at the end of a word"
2209
2210 - def parseImpl(self, instring, loc, doActions=True ):
2211 instrlen = len(instring) 2212 if instrlen>0 and loc<instrlen: 2213 if (instring[loc] in self.wordChars or 2214 instring[loc-1] not in self.wordChars): 2215 raise ParseException(instring, loc, self.errmsg, self) 2216 return loc, []
2217 2218
2219 -class ParseExpression(ParserElement):
2220 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2221 - def __init__( self, exprs, savelist = False ):
2222 super(ParseExpression,self).__init__(savelist) 2223 if isinstance( exprs, _generatorType ): 2224 exprs = list(exprs) 2225 2226 if isinstance( exprs, basestring ): 2227 self.exprs = [ Literal( exprs ) ] 2228 elif isinstance( exprs, collections.Sequence ): 2229 # if sequence of strings provided, wrap with Literal 2230 if all(isinstance(expr, basestring) for expr in exprs): 2231 exprs = map(Literal, exprs) 2232 self.exprs = list(exprs) 2233 else: 2234 try: 2235 self.exprs = list( exprs ) 2236 except TypeError: 2237 self.exprs = [ exprs ] 2238 self.callPreparse = False
2239
2240 - def __getitem__( self, i ):
2241 return self.exprs[i]
2242
2243 - def append( self, other ):
2244 self.exprs.append( other ) 2245 self.strRepr = None 2246 return self
2247
2248 - def leaveWhitespace( self ):
2249 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2250 all contained expressions.""" 2251 self.skipWhitespace = False 2252 self.exprs = [ e.copy() for e in self.exprs ] 2253 for e in self.exprs: 2254 e.leaveWhitespace() 2255 return self
2256
2257 - def ignore( self, other ):
2258 if isinstance( other, Suppress ): 2259 if other not in self.ignoreExprs: 2260 super( ParseExpression, self).ignore( other ) 2261 for e in self.exprs: 2262 e.ignore( self.ignoreExprs[-1] ) 2263 else: 2264 super( ParseExpression, self).ignore( other ) 2265 for e in self.exprs: 2266 e.ignore( self.ignoreExprs[-1] ) 2267 return self
2268
2269 - def __str__( self ):
2270 try: 2271 return super(ParseExpression,self).__str__() 2272 except: 2273 pass 2274 2275 if self.strRepr is None: 2276 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2277 return self.strRepr
2278
2279 - def streamline( self ):
2280 super(ParseExpression,self).streamline() 2281 2282 for e in self.exprs: 2283 e.streamline() 2284 2285 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2286 # but only if there are no parse actions or resultsNames on the nested And's 2287 # (likewise for Or's and MatchFirst's) 2288 if ( len(self.exprs) == 2 ): 2289 other = self.exprs[0] 2290 if ( isinstance( other, self.__class__ ) and 2291 not(other.parseAction) and 2292 other.resultsName is None and 2293 not other.debug ): 2294 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2295 self.strRepr = None 2296 self.mayReturnEmpty |= other.mayReturnEmpty 2297 self.mayIndexError |= other.mayIndexError 2298 2299 other = self.exprs[-1] 2300 if ( isinstance( other, self.__class__ ) and 2301 not(other.parseAction) and 2302 other.resultsName is None and 2303 not other.debug ): 2304 self.exprs = self.exprs[:-1] + other.exprs[:] 2305 self.strRepr = None 2306 self.mayReturnEmpty |= other.mayReturnEmpty 2307 self.mayIndexError |= other.mayIndexError 2308 2309 return self
2310
2311 - def setResultsName( self, name, listAllMatches=False ):
2312 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2313 return ret
2314
2315 - def validate( self, validateTrace=[] ):
2316 tmp = validateTrace[:]+[self] 2317 for e in self.exprs: 2318 e.validate(tmp) 2319 self.checkRecursion( [] )
2320
2321 - def copy(self):
2322 ret = super(ParseExpression,self).copy() 2323 ret.exprs = [e.copy() for e in self.exprs] 2324 return ret
2325
2326 -class And(ParseExpression):
2327 """Requires all given C{ParseExpression}s to be found in the given order. 2328 Expressions may be separated by whitespace. 2329 May be constructed using the C{'+'} operator. 2330 """ 2331
2332 - class _ErrorStop(Empty):
2333 - def __init__(self, *args, **kwargs):
2334 super(And._ErrorStop,self).__init__(*args, **kwargs) 2335 self.name = '-' 2336 self.leaveWhitespace()
2337
2338 - def __init__( self, exprs, savelist = True ):
2339 super(And,self).__init__(exprs, savelist) 2340 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2341 self.setWhitespaceChars( exprs[0].whiteChars ) 2342 self.skipWhitespace = exprs[0].skipWhitespace 2343 self.callPreparse = True
2344
2345 - def parseImpl( self, instring, loc, doActions=True ):
2346 # pass False as last arg to _parse for first element, since we already 2347 # pre-parsed the string as part of our And pre-parsing 2348 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2349 errorStop = False 2350 for e in self.exprs[1:]: 2351 if isinstance(e, And._ErrorStop): 2352 errorStop = True 2353 continue 2354 if errorStop: 2355 try: 2356 loc, exprtokens = e._parse( instring, loc, doActions ) 2357 except ParseSyntaxException: 2358 raise 2359 except ParseBaseException as pe: 2360 pe.__traceback__ = None 2361 raise ParseSyntaxException(pe) 2362 except IndexError: 2363 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2364 else: 2365 loc, exprtokens = e._parse( instring, loc, doActions ) 2366 if exprtokens or exprtokens.haskeys(): 2367 resultlist += exprtokens 2368 return loc, resultlist
2369
2370 - def __iadd__(self, other ):
2371 if isinstance( other, basestring ): 2372 other = Literal( other ) 2373 return self.append( other ) #And( [ self, other ] )
2374
2375 - def checkRecursion( self, parseElementList ):
2376 subRecCheckList = parseElementList[:] + [ self ] 2377 for e in self.exprs: 2378 e.checkRecursion( subRecCheckList ) 2379 if not e.mayReturnEmpty: 2380 break
2381
2382 - def __str__( self ):
2383 if hasattr(self,"name"): 2384 return self.name 2385 2386 if self.strRepr is None: 2387 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 2388 2389 return self.strRepr
2390 2391
2392 -class Or(ParseExpression):
2393 """Requires that at least one C{ParseExpression} is found. 2394 If two expressions match, the expression that matches the longest string will be used. 2395 May be constructed using the C{'^'} operator. 2396 """
2397 - def __init__( self, exprs, savelist = False ):
2398 super(Or,self).__init__(exprs, savelist) 2399 if self.exprs: 2400 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2401 else: 2402 self.mayReturnEmpty = True
2403
2404 - def parseImpl( self, instring, loc, doActions=True ):
2405 maxExcLoc = -1 2406 maxMatchLoc = -1 2407 maxException = None 2408 for e in self.exprs: 2409 try: 2410 loc2 = e.tryParse( instring, loc ) 2411 except ParseException as err: 2412 err.__traceback__ = None 2413 if err.loc > maxExcLoc: 2414 maxException = err 2415 maxExcLoc = err.loc 2416 except IndexError: 2417 if len(instring) > maxExcLoc: 2418 maxException = ParseException(instring,len(instring),e.errmsg,self) 2419 maxExcLoc = len(instring) 2420 else: 2421 if loc2 > maxMatchLoc: 2422 maxMatchLoc = loc2 2423 maxMatchExp = e 2424 2425 if maxMatchLoc < 0: 2426 if maxException is not None: 2427 raise maxException 2428 else: 2429 raise ParseException(instring, loc, "no defined alternatives to match", self) 2430 2431 return maxMatchExp._parse( instring, loc, doActions )
2432
2433 - def __ixor__(self, other ):
2434 if isinstance( other, basestring ): 2435 other = ParserElement.literalStringClass( other ) 2436 return self.append( other ) #Or( [ self, other ] )
2437
2438 - def __str__( self ):
2439 if hasattr(self,"name"): 2440 return self.name 2441 2442 if self.strRepr is None: 2443 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2444 2445 return self.strRepr
2446
2447 - def checkRecursion( self, parseElementList ):
2448 subRecCheckList = parseElementList[:] + [ self ] 2449 for e in self.exprs: 2450 e.checkRecursion( subRecCheckList )
2451 2452
2453 -class MatchFirst(ParseExpression):
2454 """Requires that at least one C{ParseExpression} is found. 2455 If two expressions match, the first one listed is the one that will match. 2456 May be constructed using the C{'|'} operator. 2457 """
2458 - def __init__( self, exprs, savelist = False ):
2459 super(MatchFirst,self).__init__(exprs, savelist) 2460 if self.exprs: 2461 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2462 else: 2463 self.mayReturnEmpty = True
2464
2465 - def parseImpl( self, instring, loc, doActions=True ):
2466 maxExcLoc = -1 2467 maxException = None 2468 for e in self.exprs: 2469 try: 2470 ret = e._parse( instring, loc, doActions ) 2471 return ret 2472 except ParseException as err: 2473 if err.loc > maxExcLoc: 2474 maxException = err 2475 maxExcLoc = err.loc 2476 except IndexError: 2477 if len(instring) > maxExcLoc: 2478 maxException = ParseException(instring,len(instring),e.errmsg,self) 2479 maxExcLoc = len(instring) 2480 2481 # only got here if no expression matched, raise exception for match that made it the furthest 2482 else: 2483 if maxException is not None: 2484 raise maxException 2485 else: 2486 raise ParseException(instring, loc, "no defined alternatives to match", self)
2487
2488 - def __ior__(self, other ):
2489 if isinstance( other, basestring ): 2490 other = ParserElement.literalStringClass( other ) 2491 return self.append( other ) #MatchFirst( [ self, other ] )
2492
2493 - def __str__( self ):
2494 if hasattr(self,"name"): 2495 return self.name 2496 2497 if self.strRepr is None: 2498 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2499 2500 return self.strRepr
2501
2502 - def checkRecursion( self, parseElementList ):
2503 subRecCheckList = parseElementList[:] + [ self ] 2504 for e in self.exprs: 2505 e.checkRecursion( subRecCheckList )
2506 2507
2508 -class Each(ParseExpression):
2509 """Requires all given C{ParseExpression}s to be found, but in any order. 2510 Expressions may be separated by whitespace. 2511 May be constructed using the C{'&'} operator. 2512 """
2513 - def __init__( self, exprs, savelist = True ):
2514 super(Each,self).__init__(exprs, savelist) 2515 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2516 self.skipWhitespace = True 2517 self.initExprGroups = True
2518
2519 - def parseImpl( self, instring, loc, doActions=True ):
2520 if self.initExprGroups: 2521 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2522 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 2523 self.optionals = opt1 + opt2 2524 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2525 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2526 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2527 self.required += self.multirequired 2528 self.initExprGroups = False 2529 tmpLoc = loc 2530 tmpReqd = self.required[:] 2531 tmpOpt = self.optionals[:] 2532 matchOrder = [] 2533 2534 keepMatching = True 2535 while keepMatching: 2536 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2537 failed = [] 2538 for e in tmpExprs: 2539 try: 2540 tmpLoc = e.tryParse( instring, tmpLoc ) 2541 except ParseException: 2542 failed.append(e) 2543 else: 2544 matchOrder.append(e) 2545 if e in tmpReqd: 2546 tmpReqd.remove(e) 2547 elif e in tmpOpt: 2548 tmpOpt.remove(e) 2549 if len(failed) == len(tmpExprs): 2550 keepMatching = False 2551 2552 if tmpReqd: 2553 missing = ", ".join(_ustr(e) for e in tmpReqd) 2554 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2555 2556 # add any unmatched Optionals, in case they have default values defined 2557 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2558 2559 resultlist = [] 2560 for e in matchOrder: 2561 loc,results = e._parse(instring,loc,doActions) 2562 resultlist.append(results) 2563 2564 finalResults = ParseResults([]) 2565 for r in resultlist: 2566 dups = {} 2567 for k in r.keys(): 2568 if k in finalResults: 2569 tmp = ParseResults(finalResults[k]) 2570 tmp += ParseResults(r[k]) 2571 dups[k] = tmp 2572 finalResults += ParseResults(r) 2573 for k,v in dups.items(): 2574 finalResults[k] = v 2575 return loc, finalResults
2576
2577 - def __str__( self ):
2578 if hasattr(self,"name"): 2579 return self.name 2580 2581 if self.strRepr is None: 2582 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2583 2584 return self.strRepr
2585
2586 - def checkRecursion( self, parseElementList ):
2587 subRecCheckList = parseElementList[:] + [ self ] 2588 for e in self.exprs: 2589 e.checkRecursion( subRecCheckList )
2590 2591
2592 -class ParseElementEnhance(ParserElement):
2593 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2594 - def __init__( self, expr, savelist=False ):
2595 super(ParseElementEnhance,self).__init__(savelist) 2596 if isinstance( expr, basestring ): 2597 expr = Literal(expr) 2598 self.expr = expr 2599 self.strRepr = None 2600 if expr is not None: 2601 self.mayIndexError = expr.mayIndexError 2602 self.mayReturnEmpty = expr.mayReturnEmpty 2603 self.setWhitespaceChars( expr.whiteChars ) 2604 self.skipWhitespace = expr.skipWhitespace 2605 self.saveAsList = expr.saveAsList 2606 self.callPreparse = expr.callPreparse 2607 self.ignoreExprs.extend(expr.ignoreExprs)
2608
2609 - def parseImpl( self, instring, loc, doActions=True ):
2610 if self.expr is not None: 2611 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2612 else: 2613 raise ParseException("",loc,self.errmsg,self)
2614
2615 - def leaveWhitespace( self ):
2616 self.skipWhitespace = False 2617 self.expr = self.expr.copy() 2618 if self.expr is not None: 2619 self.expr.leaveWhitespace() 2620 return self
2621
2622 - def ignore( self, other ):
2623 if isinstance( other, Suppress ): 2624 if other not in self.ignoreExprs: 2625 super( ParseElementEnhance, self).ignore( other ) 2626 if self.expr is not None: 2627 self.expr.ignore( self.ignoreExprs[-1] ) 2628 else: 2629 super( ParseElementEnhance, self).ignore( other ) 2630 if self.expr is not None: 2631 self.expr.ignore( self.ignoreExprs[-1] ) 2632 return self
2633
2634 - def streamline( self ):
2635 super(ParseElementEnhance,self).streamline() 2636 if self.expr is not None: 2637 self.expr.streamline() 2638 return self
2639
2640 - def checkRecursion( self, parseElementList ):
2641 if self in parseElementList: 2642 raise RecursiveGrammarException( parseElementList+[self] ) 2643 subRecCheckList = parseElementList[:] + [ self ] 2644 if self.expr is not None: 2645 self.expr.checkRecursion( subRecCheckList )
2646
2647 - def validate( self, validateTrace=[] ):
2648 tmp = validateTrace[:]+[self] 2649 if self.expr is not None: 2650 self.expr.validate(tmp) 2651 self.checkRecursion( [] )
2652
2653 - def __str__( self ):
2654 try: 2655 return super(ParseElementEnhance,self).__str__() 2656 except: 2657 pass 2658 2659 if self.strRepr is None and self.expr is not None: 2660 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2661 return self.strRepr
2662 2663
2664 -class FollowedBy(ParseElementEnhance):
2665 """Lookahead matching of the given parse expression. C{FollowedBy} 2666 does *not* advance the parsing position within the input string, it only 2667 verifies that the specified parse expression matches at the current 2668 position. C{FollowedBy} always returns a null token list."""
2669 - def __init__( self, expr ):
2670 super(FollowedBy,self).__init__(expr) 2671 self.mayReturnEmpty = True
2672
2673 - def parseImpl( self, instring, loc, doActions=True ):
2674 self.expr.tryParse( instring, loc ) 2675 return loc, []
2676 2677
2678 -class NotAny(ParseElementEnhance):
2679 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2680 does *not* advance the parsing position within the input string, it only 2681 verifies that the specified parse expression does *not* match at the current 2682 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2683 always returns a null token list. May be constructed using the '~' operator."""
2684 - def __init__( self, expr ):
2685 super(NotAny,self).__init__(expr) 2686 #~ self.leaveWhitespace() 2687 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2688 self.mayReturnEmpty = True 2689 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2690
2691 - def parseImpl( self, instring, loc, doActions=True ):
2692 try: 2693 self.expr.tryParse( instring, loc ) 2694 except (ParseException,IndexError): 2695 pass 2696 else: 2697 raise ParseException(instring, loc, self.errmsg, self) 2698 return loc, []
2699
2700 - def __str__( self ):
2701 if hasattr(self,"name"): 2702 return self.name 2703 2704 if self.strRepr is None: 2705 self.strRepr = "~{" + _ustr(self.expr) + "}" 2706 2707 return self.strRepr
2708 2709
2710 -class ZeroOrMore(ParseElementEnhance):
2711 """Optional repetition of zero or more of the given expression."""
2712 - def __init__( self, expr ):
2713 super(ZeroOrMore,self).__init__(expr) 2714 self.mayReturnEmpty = True
2715
2716 - def parseImpl( self, instring, loc, doActions=True ):
2717 tokens = [] 2718 try: 2719 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2720 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2721 while 1: 2722 if hasIgnoreExprs: 2723 preloc = self._skipIgnorables( instring, loc ) 2724 else: 2725 preloc = loc 2726 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2727 if tmptokens or tmptokens.haskeys(): 2728 tokens += tmptokens 2729 except (ParseException,IndexError): 2730 pass 2731 2732 return loc, tokens
2733
2734 - def __str__( self ):
2735 if hasattr(self,"name"): 2736 return self.name 2737 2738 if self.strRepr is None: 2739 self.strRepr = "[" + _ustr(self.expr) + "]..." 2740 2741 return self.strRepr
2742
2743 - def setResultsName( self, name, listAllMatches=False ):
2744 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2745 ret.saveAsList = True 2746 return ret
2747 2748
2749 -class OneOrMore(ParseElementEnhance):
2750 """Repetition of one or more of the given expression."""
2751 - def parseImpl( self, instring, loc, doActions=True ):
2752 # must be at least one 2753 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2754 try: 2755 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2756 while 1: 2757 if hasIgnoreExprs: 2758 preloc = self._skipIgnorables( instring, loc ) 2759 else: 2760 preloc = loc 2761 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2762 if tmptokens or tmptokens.haskeys(): 2763 tokens += tmptokens 2764 except (ParseException,IndexError): 2765 pass 2766 2767 return loc, tokens
2768
2769 - def __str__( self ):
2770 if hasattr(self,"name"): 2771 return self.name 2772 2773 if self.strRepr is None: 2774 self.strRepr = "{" + _ustr(self.expr) + "}..." 2775 2776 return self.strRepr
2777
2778 - def setResultsName( self, name, listAllMatches=False ):
2779 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2780 ret.saveAsList = True 2781 return ret
2782
2783 -class _NullToken(object):
2784 - def __bool__(self):
2785 return False
2786 __nonzero__ = __bool__
2787 - def __str__(self):
2788 return ""
2789 2790 _optionalNotMatched = _NullToken()
2791 -class Optional(ParseElementEnhance):
2792 """Optional matching of the given expression. 2793 A default return string can also be specified, if the optional expression 2794 is not found. 2795 """
2796 - def __init__( self, expr, default=_optionalNotMatched ):
2797 super(Optional,self).__init__( expr, savelist=False ) 2798 self.defaultValue = default 2799 self.mayReturnEmpty = True
2800
2801 - def parseImpl( self, instring, loc, doActions=True ):
2802 try: 2803 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2804 except (ParseException,IndexError): 2805 if self.defaultValue is not _optionalNotMatched: 2806 if self.expr.resultsName: 2807 tokens = ParseResults([ self.defaultValue ]) 2808 tokens[self.expr.resultsName] = self.defaultValue 2809 else: 2810 tokens = [ self.defaultValue ] 2811 else: 2812 tokens = [] 2813 return loc, tokens
2814
2815 - def __str__( self ):
2816 if hasattr(self,"name"): 2817 return self.name 2818 2819 if self.strRepr is None: 2820 self.strRepr = "[" + _ustr(self.expr) + "]" 2821 2822 return self.strRepr
2823 2824
2825 -class SkipTo(ParseElementEnhance):
2826 """Token for skipping over all undefined text until the matched expression is found. 2827 If C{include} is set to true, the matched expression is also parsed (the skipped text 2828 and matched expression are returned as a 2-element list). The C{ignore} 2829 argument is used to define grammars (typically quoted strings and comments) that 2830 might contain false matches. 2831 """
2832 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2833 super( SkipTo, self ).__init__( other ) 2834 self.ignoreExpr = ignore 2835 self.mayReturnEmpty = True 2836 self.mayIndexError = False 2837 self.includeMatch = include 2838 self.asList = False 2839 if failOn is not None and isinstance(failOn, basestring): 2840 self.failOn = Literal(failOn) 2841 else: 2842 self.failOn = failOn 2843 self.errmsg = "No match found for "+_ustr(self.expr)
2844
2845 - def parseImpl( self, instring, loc, doActions=True ):
2846 startLoc = loc 2847 instrlen = len(instring) 2848 expr = self.expr 2849 failParse = False 2850 while loc <= instrlen: 2851 try: 2852 if self.failOn: 2853 try: 2854 self.failOn.tryParse(instring, loc) 2855 except ParseBaseException: 2856 pass 2857 else: 2858 failParse = True 2859 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2860 failParse = False 2861 if self.ignoreExpr is not None: 2862 while 1: 2863 try: 2864 loc = self.ignoreExpr.tryParse(instring,loc) 2865 # print("found ignoreExpr, advance to", loc) 2866 except ParseBaseException: 2867 break 2868 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2869 skipText = instring[startLoc:loc] 2870 if self.includeMatch: 2871 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2872 if mat: 2873 skipRes = ParseResults( skipText ) 2874 skipRes += mat 2875 return loc, [ skipRes ] 2876 else: 2877 return loc, [ skipText ] 2878 else: 2879 return loc, [ skipText ] 2880 except (ParseException,IndexError): 2881 if failParse: 2882 raise 2883 else: 2884 loc += 1 2885 raise ParseException(instring, loc, self.errmsg, self)
2886
2887 -class Forward(ParseElementEnhance):
2888 """Forward declaration of an expression to be defined later - 2889 used for recursive grammars, such as algebraic infix notation. 2890 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 2891 2892 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 2893 Specifically, '|' has a lower precedence than '<<', so that:: 2894 fwdExpr << a | b | c 2895 will actually be evaluated as:: 2896 (fwdExpr << a) | b | c 2897 thereby leaving b and c out as parseable alternatives. It is recommended that you 2898 explicitly group the values inserted into the C{Forward}:: 2899 fwdExpr << (a | b | c) 2900 Converting to use the '<<=' operator instead will avoid this problem. 2901 """
2902 - def __init__( self, other=None ):
2903 super(Forward,self).__init__( other, savelist=False )
2904
2905 - def __lshift__( self, other ):
2906 if isinstance( other, basestring ): 2907 other = ParserElement.literalStringClass(other) 2908 self.expr = other 2909 self.mayReturnEmpty = other.mayReturnEmpty 2910 self.strRepr = None 2911 self.mayIndexError = self.expr.mayIndexError 2912 self.mayReturnEmpty = self.expr.mayReturnEmpty 2913 self.setWhitespaceChars( self.expr.whiteChars ) 2914 self.skipWhitespace = self.expr.skipWhitespace 2915 self.saveAsList = self.expr.saveAsList 2916 self.ignoreExprs.extend(self.expr.ignoreExprs) 2917 return self
2918
2919 - def __ilshift__(self, other):
2920 return self << other
2921
2922 - def leaveWhitespace( self ):
2923 self.skipWhitespace = False 2924 return self
2925
2926 - def streamline( self ):
2927 if not self.streamlined: 2928 self.streamlined = True 2929 if self.expr is not None: 2930 self.expr.streamline() 2931 return self
2932
2933 - def validate( self, validateTrace=[] ):
2934 if self not in validateTrace: 2935 tmp = validateTrace[:]+[self] 2936 if self.expr is not None: 2937 self.expr.validate(tmp) 2938 self.checkRecursion([])
2939
2940 - def __str__( self ):
2941 if hasattr(self,"name"): 2942 return self.name 2943 2944 self._revertClass = self.__class__ 2945 self.__class__ = _ForwardNoRecurse 2946 try: 2947 if self.expr is not None: 2948 retString = _ustr(self.expr) 2949 else: 2950 retString = "None" 2951 finally: 2952 self.__class__ = self._revertClass 2953 return self.__class__.__name__ + ": " + retString
2954
2955 - def copy(self):
2956 if self.expr is not None: 2957 return super(Forward,self).copy() 2958 else: 2959 ret = Forward() 2960 ret <<= self 2961 return ret
2962
2963 -class _ForwardNoRecurse(Forward):
2964 - def __str__( self ):
2965 return "..."
2966
2967 -class TokenConverter(ParseElementEnhance):
2968 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
2969 - def __init__( self, expr, savelist=False ):
2970 super(TokenConverter,self).__init__( expr )#, savelist ) 2971 self.saveAsList = False
2972
2973 -class Upcase(TokenConverter):
2974 """Converter to upper case all matching tokens."""
2975 - def __init__(self, *args):
2976 super(Upcase,self).__init__(*args) 2977 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 2978 DeprecationWarning,stacklevel=2)
2979
2980 - def postParse( self, instring, loc, tokenlist ):
2981 return list(map( str.upper, tokenlist ))
2982 2983
2984 -class Combine(TokenConverter):
2985 """Converter to concatenate all matching tokens to a single string. 2986 By default, the matching patterns must also be contiguous in the input string; 2987 this can be disabled by specifying C{'adjacent=False'} in the constructor. 2988 """
2989 - def __init__( self, expr, joinString="", adjacent=True ):
2990 super(Combine,self).__init__( expr ) 2991 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 2992 if adjacent: 2993 self.leaveWhitespace() 2994 self.adjacent = adjacent 2995 self.skipWhitespace = True 2996 self.joinString = joinString 2997 self.callPreparse = True
2998
2999 - def ignore( self, other ):
3000 if self.adjacent: 3001 ParserElement.ignore(self, other) 3002 else: 3003 super( Combine, self).ignore( other ) 3004 return self
3005
3006 - def postParse( self, instring, loc, tokenlist ):
3007 retToks = tokenlist.copy() 3008 del retToks[:] 3009 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3010 3011 if self.resultsName and retToks.haskeys(): 3012 return [ retToks ] 3013 else: 3014 return retToks
3015
3016 -class Group(TokenConverter):
3017 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3018 - def __init__( self, expr ):
3019 super(Group,self).__init__( expr ) 3020 self.saveAsList = True
3021
3022 - def postParse( self, instring, loc, tokenlist ):
3023 return [ tokenlist ]
3024
3025 -class Dict(TokenConverter):
3026 """Converter to return a repetitive expression as a list, but also as a dictionary. 3027 Each element can also be referenced using the first token in the expression as its key. 3028 Useful for tabular report scraping when the first column can be used as a item key. 3029 """
3030 - def __init__( self, expr ):
3031 super(Dict,self).__init__( expr ) 3032 self.saveAsList = True
3033
3034 - def postParse( self, instring, loc, tokenlist ):
3035 for i,tok in enumerate(tokenlist): 3036 if len(tok) == 0: 3037 continue 3038 ikey = tok[0] 3039 if isinstance(ikey,int): 3040 ikey = _ustr(tok[0]).strip() 3041 if len(tok)==1: 3042 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3043 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3044 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3045 else: 3046 dictvalue = tok.copy() #ParseResults(i) 3047 del dictvalue[0] 3048 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 3049 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3050 else: 3051 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3052 3053 if self.resultsName: 3054 return [ tokenlist ] 3055 else: 3056 return tokenlist
3057 3058
3059 -class Suppress(TokenConverter):
3060 """Converter for ignoring the results of a parsed expression."""
3061 - def postParse( self, instring, loc, tokenlist ):
3062 return []
3063
3064 - def suppress( self ):
3065 return self
3066 3067
3068 -class OnlyOnce(object):
3069 """Wrapper for parse actions, to ensure they are only called once."""
3070 - def __init__(self, methodCall):
3071 self.callable = _trim_arity(methodCall) 3072 self.called = False
3073 - def __call__(self,s,l,t):
3074 if not self.called: 3075 results = self.callable(s,l,t) 3076 self.called = True 3077 return results 3078 raise ParseException(s,l,"")
3079 - def reset(self):
3080 self.called = False
3081
3082 -def traceParseAction(f):
3083 """Decorator for debugging parse actions.""" 3084 f = _trim_arity(f) 3085 def z(*paArgs): 3086 thisFunc = f.func_name 3087 s,l,t = paArgs[-3:] 3088 if len(paArgs)>3: 3089 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3090 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3091 try: 3092 ret = f(*paArgs) 3093 except Exception as exc: 3094 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3095 raise 3096 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3097 return ret
3098 try: 3099 z.__name__ = f.__name__ 3100 except AttributeError: 3101 pass 3102 return z 3103 3104 # 3105 # global helpers 3106 #
3107 -def delimitedList( expr, delim=",", combine=False ):
3108 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3109 By default, the list elements and delimiters can have intervening whitespace, and 3110 comments, but this can be overridden by passing C{combine=True} in the constructor. 3111 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3112 string, with the delimiters included; otherwise, the matching tokens are returned 3113 as a list of tokens, with the delimiters suppressed. 3114 """ 3115 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3116 if combine: 3117 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3118 else: 3119 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3120
3121 -def countedArray( expr, intExpr=None ):
3122 """Helper to define a counted list of expressions. 3123 This helper defines a pattern of the form:: 3124 integer expr expr expr... 3125 where the leading integer tells how many expr expressions follow. 3126 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3127 """ 3128 arrayExpr = Forward() 3129 def countFieldParseAction(s,l,t): 3130 n = t[0] 3131 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3132 return []
3133 if intExpr is None: 3134 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3135 else: 3136 intExpr = intExpr.copy() 3137 intExpr.setName("arrayLen") 3138 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3139 return ( intExpr + arrayExpr ) 3140
3141 -def _flatten(L):
3142 ret = [] 3143 for i in L: 3144 if isinstance(i,list): 3145 ret.extend(_flatten(i)) 3146 else: 3147 ret.append(i) 3148 return ret
3149
3150 -def matchPreviousLiteral(expr):
3151 """Helper to define an expression that is indirectly defined from 3152 the tokens matched in a previous expression, that is, it looks 3153 for a 'repeat' of a previous expression. For example:: 3154 first = Word(nums) 3155 second = matchPreviousLiteral(first) 3156 matchExpr = first + ":" + second 3157 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3158 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3159 If this is not desired, use C{matchPreviousExpr}. 3160 Do *not* use with packrat parsing enabled. 3161 """ 3162 rep = Forward() 3163 def copyTokenToRepeater(s,l,t): 3164 if t: 3165 if len(t) == 1: 3166 rep << t[0] 3167 else: 3168 # flatten t tokens 3169 tflat = _flatten(t.asList()) 3170 rep << And( [ Literal(tt) for tt in tflat ] ) 3171 else: 3172 rep << Empty()
3173 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3174 return rep 3175
3176 -def matchPreviousExpr(expr):
3177 """Helper to define an expression that is indirectly defined from 3178 the tokens matched in a previous expression, that is, it looks 3179 for a 'repeat' of a previous expression. For example:: 3180 first = Word(nums) 3181 second = matchPreviousExpr(first) 3182 matchExpr = first + ":" + second 3183 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3184 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3185 the expressions are evaluated first, and then compared, so 3186 C{"1"} is compared with C{"10"}. 3187 Do *not* use with packrat parsing enabled. 3188 """ 3189 rep = Forward() 3190 e2 = expr.copy() 3191 rep <<= e2 3192 def copyTokenToRepeater(s,l,t): 3193 matchTokens = _flatten(t.asList()) 3194 def mustMatchTheseTokens(s,l,t): 3195 theseTokens = _flatten(t.asList()) 3196 if theseTokens != matchTokens: 3197 raise ParseException("",0,"")
3198 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3199 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3200 return rep 3201
3202 -def _escapeRegexRangeChars(s):
3203 #~ escape these chars: ^-] 3204 for c in r"\^-]": 3205 s = s.replace(c,_bslash+c) 3206 s = s.replace("\n",r"\n") 3207 s = s.replace("\t",r"\t") 3208 return _ustr(s)
3209
3210 -def oneOf( strs, caseless=False, useRegex=True ):
3211 """Helper to quickly define a set of alternative Literals, and makes sure to do 3212 longest-first testing when there is a conflict, regardless of the input order, 3213 but returns a C{L{MatchFirst}} for best performance. 3214 3215 Parameters: 3216 - strs - a string of space-delimited literals, or a list of string literals 3217 - caseless - (default=False) - treat all literals as caseless 3218 - useRegex - (default=True) - as an optimization, will generate a Regex 3219 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3220 if creating a C{Regex} raises an exception) 3221 """ 3222 if caseless: 3223 isequal = ( lambda a,b: a.upper() == b.upper() ) 3224 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3225 parseElementClass = CaselessLiteral 3226 else: 3227 isequal = ( lambda a,b: a == b ) 3228 masks = ( lambda a,b: b.startswith(a) ) 3229 parseElementClass = Literal 3230 3231 if isinstance(strs,basestring): 3232 symbols = strs.split() 3233 elif isinstance(strs, collections.Sequence): 3234 symbols = list(strs[:]) 3235 elif isinstance(strs, _generatorType): 3236 symbols = list(strs) 3237 else: 3238 warnings.warn("Invalid argument to oneOf, expected string or list", 3239 SyntaxWarning, stacklevel=2) 3240 3241 i = 0 3242 while i < len(symbols)-1: 3243 cur = symbols[i] 3244 for j,other in enumerate(symbols[i+1:]): 3245 if ( isequal(other, cur) ): 3246 del symbols[i+j+1] 3247 break 3248 elif ( masks(cur, other) ): 3249 del symbols[i+j+1] 3250 symbols.insert(i,other) 3251 cur = other 3252 break 3253 else: 3254 i += 1 3255 3256 if not caseless and useRegex: 3257 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3258 try: 3259 if len(symbols)==len("".join(symbols)): 3260 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ) 3261 else: 3262 return Regex( "|".join(re.escape(sym) for sym in symbols) ) 3263 except: 3264 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3265 SyntaxWarning, stacklevel=2) 3266 3267 3268 # last resort, just use MatchFirst 3269 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3270
3271 -def dictOf( key, value ):
3272 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3273 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3274 in the proper order. The key pattern can include delimiting markers or punctuation, 3275 as long as they are suppressed, thereby leaving the significant key text. The value 3276 pattern can include named results, so that the C{Dict} results can include named token 3277 fields. 3278 """ 3279 return Dict( ZeroOrMore( Group ( key + value ) ) )
3280
3281 -def originalTextFor(expr, asString=True):
3282 """Helper to return the original, untokenized text for a given expression. Useful to 3283 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3284 revert separate tokens with intervening whitespace back to the original matching 3285 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 3286 require the inspect module to chase up the call stack. By default, returns a 3287 string containing the original parsed text. 3288 3289 If the optional C{asString} argument is passed as C{False}, then the return value is a 3290 C{L{ParseResults}} containing any results names that were originally matched, and a 3291 single token containing the original matched text from the input string. So if 3292 the expression passed to C{L{originalTextFor}} contains expressions with defined 3293 results names, you must set C{asString} to C{False} if you want to preserve those 3294 results name values.""" 3295 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3296 endlocMarker = locMarker.copy() 3297 endlocMarker.callPreparse = False 3298 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3299 if asString: 3300 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3301 else: 3302 def extractText(s,l,t): 3303 del t[:] 3304 t.insert(0, s[t._original_start:t._original_end]) 3305 del t["_original_start"] 3306 del t["_original_end"]
3307 matchExpr.setParseAction(extractText) 3308 return matchExpr 3309
3310 -def ungroup(expr):
3311 """Helper to undo pyparsing's default grouping of And expressions, even 3312 if all but one are non-empty.""" 3313 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3314
3315 -def locatedExpr(expr):
3316 """Helper to decorate a returned token with its starting and ending locations in the input string. 3317 This helper adds the following results names: 3318 - locn_start = location where matched expression begins 3319 - locn_end = location where matched expression ends 3320 - value = the actual parsed results 3321 3322 Be careful if the input text contains C{<TAB>} characters, you may want to call 3323 C{L{ParserElement.parseWithTabs}} 3324 """ 3325 locator = Empty().setParseAction(lambda s,l,t: l) 3326 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3327 3328 3329 # convenience constants for positional expressions 3330 empty = Empty().setName("empty") 3331 lineStart = LineStart().setName("lineStart") 3332 lineEnd = LineEnd().setName("lineEnd") 3333 stringStart = StringStart().setName("stringStart") 3334 stringEnd = StringEnd().setName("stringEnd") 3335 3336 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3337 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3338 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3339 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) 3340 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3341 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3342
3343 -def srange(s):
3344 r"""Helper to easily define string ranges for use in Word construction. Borrows 3345 syntax from regexp '[]' string range definitions:: 3346 srange("[0-9]") -> "0123456789" 3347 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3348 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3349 The input string must be enclosed in []'s, and the returned string is the expanded 3350 character set joined into a single string. 3351 The values enclosed in the []'s may be:: 3352 a single character 3353 an escaped character with a leading backslash (such as \- or \]) 3354 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3355 (\0x## is also supported for backwards compatibility) 3356 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3357 a range of any of the above, separated by a dash ('a-z', etc.) 3358 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3359 """ 3360 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 3361 try: 3362 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3363 except: 3364 return ""
3365
3366 -def matchOnlyAtCol(n):
3367 """Helper method for defining parse actions that require matching at a specific 3368 column in the input text. 3369 """ 3370 def verifyCol(strg,locn,toks): 3371 if col(locn,strg) != n: 3372 raise ParseException(strg,locn,"matched token not at column %d" % n)
3373 return verifyCol 3374
3375 -def replaceWith(replStr):
3376 """Helper method for common parse actions that simply return a literal value. Especially 3377 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3378 """ 3379 def _replFunc(*args): 3380 return [replStr]
3381 return _replFunc 3382
3383 -def removeQuotes(s,l,t):
3384 """Helper parse action for removing quotation marks from parsed quoted strings. 3385 To use, add this parse action to quoted string using:: 3386 quotedString.setParseAction( removeQuotes ) 3387 """ 3388 return t[0][1:-1]
3389
3390 -def upcaseTokens(s,l,t):
3391 """Helper parse action to convert tokens to upper case.""" 3392 return [ tt.upper() for tt in map(_ustr,t) ]
3393
3394 -def downcaseTokens(s,l,t):
3395 """Helper parse action to convert tokens to lower case.""" 3396 return [ tt.lower() for tt in map(_ustr,t) ]
3397
3398 -def keepOriginalText(s,startLoc,t):
3399 """DEPRECATED - use new helper method C{L{originalTextFor}}. 3400 Helper parse action to preserve original parsed text, 3401 overriding any nested parse actions.""" 3402 try: 3403 endloc = getTokensEndLoc() 3404 except ParseException: 3405 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3406 del t[:] 3407 t += ParseResults(s[startLoc:endloc]) 3408 return t
3409
3410 -def getTokensEndLoc():
3411 """Method to be called from within a parse action to determine the end 3412 location of the parsed tokens.""" 3413 import inspect 3414 fstack = inspect.stack() 3415 try: 3416 # search up the stack (through intervening argument normalizers) for correct calling routine 3417 for f in fstack[2:]: 3418 if f[3] == "_parseNoCache": 3419 endloc = f[0].f_locals["loc"] 3420 return endloc 3421 else: 3422 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3423 finally: 3424 del fstack
3425
3426 -def _makeTags(tagStr, xml):
3427 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3428 if isinstance(tagStr,basestring): 3429 resname = tagStr 3430 tagStr = Keyword(tagStr, caseless=not xml) 3431 else: 3432 resname = tagStr.name 3433 3434 tagAttrName = Word(alphas,alphanums+"_-:") 3435 if (xml): 3436 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3437 openTag = Suppress("<") + tagStr("tag") + \ 3438 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3439 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3440 else: 3441 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3442 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3443 openTag = Suppress("<") + tagStr("tag") + \ 3444 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3445 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3446 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3447 closeTag = Combine(_L("</") + tagStr + ">") 3448 3449 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3450 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3451 openTag.tag = resname 3452 closeTag.tag = resname 3453 return openTag, closeTag
3454
3455 -def makeHTMLTags(tagStr):
3456 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3457 return _makeTags( tagStr, False )
3458
3459 -def makeXMLTags(tagStr):
3460 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3461 return _makeTags( tagStr, True )
3462
3463 -def withAttribute(*args,**attrDict):
3464 """Helper to create a validating parse action to be used with start tags created 3465 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3466 with a required attribute value, to avoid false matches on common tags such as 3467 C{<TD>} or C{<DIV>}. 3468 3469 Call C{withAttribute} with a series of attribute names and values. Specify the list 3470 of filter attributes names and values as: 3471 - keyword arguments, as in C{(align="right")}, or 3472 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3473 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3474 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3475 For attribute names with a namespace prefix, you must use the second form. Attribute 3476 names are matched insensitive to upper/lower case. 3477 3478 To verify that the attribute exists, but without specifying a value, pass 3479 C{withAttribute.ANY_VALUE} as the value. 3480 """ 3481 if args: 3482 attrs = args[:] 3483 else: 3484 attrs = attrDict.items() 3485 attrs = [(k,v) for k,v in attrs] 3486 def pa(s,l,tokens): 3487 for attrName,attrValue in attrs: 3488 if attrName not in tokens: 3489 raise ParseException(s,l,"no matching attribute " + attrName) 3490 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3491 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3492 (attrName, tokens[attrName], attrValue))
3493 return pa 3494 withAttribute.ANY_VALUE = object() 3495 3496 opAssoc = _Constants() 3497 opAssoc.LEFT = object() 3498 opAssoc.RIGHT = object() 3499
3500 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3501 """Helper method for constructing grammars of expressions made up of 3502 operators working in a precedence hierarchy. Operators may be unary or 3503 binary, left- or right-associative. Parse actions can also be attached 3504 to operator expressions. 3505 3506 Parameters: 3507 - baseExpr - expression representing the most basic element for the nested 3508 - opList - list of tuples, one for each operator precedence level in the 3509 expression grammar; each tuple is of the form 3510 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3511 - opExpr is the pyparsing expression for the operator; 3512 may also be a string, which will be converted to a Literal; 3513 if numTerms is 3, opExpr is a tuple of two expressions, for the 3514 two operators separating the 3 terms 3515 - numTerms is the number of terms for this operator (must 3516 be 1, 2, or 3) 3517 - rightLeftAssoc is the indicator whether the operator is 3518 right or left associative, using the pyparsing-defined 3519 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3520 - parseAction is the parse action to be associated with 3521 expressions matching this operator expression (the 3522 parse action tuple member may be omitted) 3523 - lpar - expression for matching left-parentheses (default=Suppress('(')) 3524 - rpar - expression for matching right-parentheses (default=Suppress(')')) 3525 """ 3526 ret = Forward() 3527 lastExpr = baseExpr | ( lpar + ret + rpar ) 3528 for i,operDef in enumerate(opList): 3529 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3530 if arity == 3: 3531 if opExpr is None or len(opExpr) != 2: 3532 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3533 opExpr1, opExpr2 = opExpr 3534 thisExpr = Forward()#.setName("expr%d" % i) 3535 if rightLeftAssoc == opAssoc.LEFT: 3536 if arity == 1: 3537 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3538 elif arity == 2: 3539 if opExpr is not None: 3540 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3541 else: 3542 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3543 elif arity == 3: 3544 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3545 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3546 else: 3547 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3548 elif rightLeftAssoc == opAssoc.RIGHT: 3549 if arity == 1: 3550 # try to avoid LR with this extra test 3551 if not isinstance(opExpr, Optional): 3552 opExpr = Optional(opExpr) 3553 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3554 elif arity == 2: 3555 if opExpr is not None: 3556 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3557 else: 3558 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3559 elif arity == 3: 3560 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3561 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3562 else: 3563 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3564 else: 3565 raise ValueError("operator must indicate right or left associativity") 3566 if pa: 3567 matchExpr.setParseAction( pa ) 3568 thisExpr <<= ( matchExpr | lastExpr ) 3569 lastExpr = thisExpr 3570 ret <<= lastExpr 3571 return ret
3572 operatorPrecedence = infixNotation 3573 3574 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3575 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3576 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3577 unicodeString = Combine(_L('u') + quotedString.copy()) 3578
3579 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3580 """Helper method for defining nested lists enclosed in opening and closing 3581 delimiters ("(" and ")" are the default). 3582 3583 Parameters: 3584 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3585 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3586 - content - expression for items within the nested lists (default=None) 3587 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3588 3589 If an expression is not provided for the content argument, the nested 3590 expression will capture all whitespace-delimited content between delimiters 3591 as a list of separate values. 3592 3593 Use the C{ignoreExpr} argument to define expressions that may contain 3594 opening or closing characters that should not be treated as opening 3595 or closing characters for nesting, such as quotedString or a comment 3596 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3597 The default is L{quotedString}, but if no expressions are to be ignored, 3598 then pass C{None} for this argument. 3599 """ 3600 if opener == closer: 3601 raise ValueError("opening and closing strings cannot be the same") 3602 if content is None: 3603 if isinstance(opener,basestring) and isinstance(closer,basestring): 3604 if len(opener) == 1 and len(closer)==1: 3605 if ignoreExpr is not None: 3606 content = (Combine(OneOrMore(~ignoreExpr + 3607 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3608 ).setParseAction(lambda t:t[0].strip())) 3609 else: 3610 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3611 ).setParseAction(lambda t:t[0].strip())) 3612 else: 3613 if ignoreExpr is not None: 3614 content = (Combine(OneOrMore(~ignoreExpr + 3615 ~Literal(opener) + ~Literal(closer) + 3616 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3617 ).setParseAction(lambda t:t[0].strip())) 3618 else: 3619 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3620 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3621 ).setParseAction(lambda t:t[0].strip())) 3622 else: 3623 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3624 ret = Forward() 3625 if ignoreExpr is not None: 3626 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3627 else: 3628 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3629 return ret
3630
3631 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3632 """Helper method for defining space-delimited indentation blocks, such as 3633 those used to define block statements in Python source code. 3634 3635 Parameters: 3636 - blockStatementExpr - expression defining syntax of statement that 3637 is repeated within the indented block 3638 - indentStack - list created by caller to manage indentation stack 3639 (multiple statementWithIndentedBlock expressions within a single grammar 3640 should share a common indentStack) 3641 - indent - boolean indicating whether block must be indented beyond the 3642 the current level; set to False for block of left-most statements 3643 (default=True) 3644 3645 A valid block must contain at least one C{blockStatement}. 3646 """ 3647 def checkPeerIndent(s,l,t): 3648 if l >= len(s): return 3649 curCol = col(l,s) 3650 if curCol != indentStack[-1]: 3651 if curCol > indentStack[-1]: 3652 raise ParseFatalException(s,l,"illegal nesting") 3653 raise ParseException(s,l,"not a peer entry")
3654 3655 def checkSubIndent(s,l,t): 3656 curCol = col(l,s) 3657 if curCol > indentStack[-1]: 3658 indentStack.append( curCol ) 3659 else: 3660 raise ParseException(s,l,"not a subentry") 3661 3662 def checkUnindent(s,l,t): 3663 if l >= len(s): return 3664 curCol = col(l,s) 3665 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3666 raise ParseException(s,l,"not an unindent") 3667 indentStack.pop() 3668 3669 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3670 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3671 PEER = Empty().setParseAction(checkPeerIndent) 3672 UNDENT = Empty().setParseAction(checkUnindent) 3673 if indent: 3674 smExpr = Group( Optional(NL) + 3675 #~ FollowedBy(blockStatementExpr) + 3676 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3677 else: 3678 smExpr = Group( Optional(NL) + 3679 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3680 blockStatementExpr.ignore(_bslash + LineEnd()) 3681 return smExpr 3682 3683 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3684 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3685 3686 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3687 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3688 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3689 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3690 3691 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3692 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3693 3694 htmlComment = Regex(r"<!--[\s\S]*?-->") 3695 restOfLine = Regex(r".*").leaveWhitespace() 3696 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3697 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3698 3699 javaStyleComment = cppStyleComment 3700 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3701 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 3702 Optional( Word(" \t") + 3703 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3704 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3705 3706 3707 if __name__ == "__main__": 3708
3709 - def test( teststring ):
3710 try: 3711 tokens = simpleSQL.parseString( teststring ) 3712 tokenlist = tokens.asList() 3713 print (teststring + "->" + str(tokenlist)) 3714 print ("tokens = " + str(tokens)) 3715 print ("tokens.columns = " + str(tokens.columns)) 3716 print ("tokens.tables = " + str(tokens.tables)) 3717 print (tokens.asXML("SQL",True)) 3718 except ParseBaseException as err: 3719 print (teststring + "->") 3720 print (err.line) 3721 print (" "*(err.column-1) + "^") 3722 print (err) 3723 print()
3724 3725 selectToken = CaselessLiteral( "select" ) 3726 fromToken = CaselessLiteral( "from" ) 3727 3728 ident = Word( alphas, alphanums + "_$" ) 3729 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3730 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3731 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3732 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3733 simpleSQL = ( selectToken + \ 3734 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3735 fromToken + \ 3736 tableNameList.setResultsName( "tables" ) ) 3737 3738 test( "SELECT * from XYZZY, ABC" ) 3739 test( "select * from SYS.XYZZY" ) 3740 test( "Select A from Sys.dual" ) 3741 test( "Select AA,BB,CC from Sys.dual" ) 3742 test( "Select A, B, C from Sys.dual" ) 3743 test( "Select A, B, C from Sys.dual" ) 3744 test( "Xelect A, B, C from Sys.dual" ) 3745 test( "Select A, B, C frox Sys.dual" ) 3746 test( "Select" ) 3747 test( "Select ^^^ frox Sys.dual" ) 3748 test( "Select A, B, C from Sys.dual, Table2 " ) 3749