Package parsedatetime
[hide private]
[frames] | no frames]

Source Code for Package parsedatetime

   1  # -*- coding: utf-8 -*- 
   2  # 
   3  # vim: sw=2 ts=2 sts=2 
   4  # 
   5  # Copyright 2004-2015 Mike Taylor 
   6  # 
   7  # Licensed under the Apache License, Version 2.0 (the "License"); 
   8  # you may not use this file except in compliance with the License. 
   9  # You may obtain a copy of the License at 
  10  # 
  11  #     http://www.apache.org/licenses/LICENSE-2.0 
  12  # 
  13  # Unless required by applicable law or agreed to in writing, software 
  14  # distributed under the License is distributed on an "AS IS" BASIS, 
  15  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
  16  # See the License for the specific language governing permissions and 
  17  # limitations under the License. 
  18   
  19  """parsedatetime 
  20   
  21  Parse human-readable date/time text. 
  22   
  23  Requires Python 2.6 or later 
  24  """ 
  25  from __future__ import with_statement 
  26   
  27  __author__ = 'Mike Taylor (bear@bear.im)' 
  28  __copyright__ = 'Copyright (c) 2004 Mike Taylor' 
  29  __license__ = 'Apache v2.0' 
  30  __version__ = '1.5' 
  31  __contributors__ = ['Darshana Chhajed', 
  32                      'Michael Lim (lim.ck.michael@gmail.com)', 
  33                      'Bernd Zeimetz (bzed@debian.org)'] 
  34   
  35  import re 
  36  import time 
  37  import datetime 
  38  import calendar 
  39  import contextlib 
  40  import email.utils 
  41   
  42  try: 
  43      from itertools import imap 
  44  except ImportError: 
  45      imap = map 
  46   
  47  from . import pdt_locales 
  48   
  49  # as a library, do *not* setup logging 
  50  # see docs.python.org/2/howto/logging.html#configuring-logging-for-a-library 
  51  # Set default logging handler to avoid "No handler found" warnings. 
  52  import logging 
  53   
  54  try:  # Python 2.7+ 
  55      from logging import NullHandler 
  56  except ImportError: 
57 - class NullHandler(logging.Handler):
58 - def emit(self, record):
59 pass
60 61 log = logging.getLogger(__name__) 62 log.addHandler(NullHandler()) 63 64 debug = False 65 66 pdtLocales = {'icu': pdt_locales.pdtLocale_icu, 67 'en_US': pdt_locales.pdtLocale_en, 68 'en_AU': pdt_locales.pdtLocale_au, 69 'es_ES': pdt_locales.pdtLocale_es, 70 'de_DE': pdt_locales.pdtLocale_de, 71 'nl_NL': pdt_locales.pdtLocale_nl}
72 73 74 # Copied from feedparser.py 75 # Universal Feedparser 76 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 77 # Originally a def inside of _parse_date_w3dtf() 78 -def _extract_date(m):
79 year = int(m.group('year')) 80 if year < 100: 81 year = 100 * int(time.gmtime()[0] / 100) + int(year) 82 if year < 1000: 83 return 0, 0, 0 84 julian = m.group('julian') 85 if julian: 86 julian = int(julian) 87 month = julian / 30 + 1 88 day = julian % 30 + 1 89 jday = None 90 while jday != julian: 91 t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) 92 jday = time.gmtime(t)[-2] 93 diff = abs(jday - julian) 94 if jday > julian: 95 if diff < day: 96 day = day - diff 97 else: 98 month = month - 1 99 day = 31 100 elif jday < julian: 101 if day + diff < 28: 102 day = day + diff 103 else: 104 month = month + 1 105 return year, month, day 106 month = m.group('month') 107 day = 1 108 if month is None: 109 month = 1 110 else: 111 month = int(month) 112 day = m.group('day') 113 if day: 114 day = int(day) 115 else: 116 day = 1 117 return year, month, day
118
119 120 # Copied from feedparser.py 121 # Universal Feedparser 122 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 123 # Originally a def inside of _parse_date_w3dtf() 124 -def _extract_time(m):
125 if not m: 126 return 0, 0, 0 127 hours = m.group('hours') 128 if not hours: 129 return 0, 0, 0 130 hours = int(hours) 131 minutes = int(m.group('minutes')) 132 seconds = m.group('seconds') 133 if seconds: 134 seconds = seconds.replace(',', '.').split('.', 1)[0] 135 seconds = int(seconds) 136 else: 137 seconds = 0 138 return hours, minutes, seconds
139
140 141 # Copied from feedparser.py 142 # Universal Feedparser 143 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 144 # Modified to return a tuple instead of mktime 145 # 146 # Original comment: 147 # W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by 148 # Drake and licensed under the Python license. Removed all range checking 149 # for month, day, hour, minute, and second, since mktime will normalize 150 # these later 151 -def __closure_parse_date_w3dtf():
152 # the __extract_date and __extract_time methods were 153 # copied-out so they could be used by my code --bear 154 def __extract_tzd(m): 155 '''Return the Time Zone Designator as an offset in seconds from UTC.''' 156 if not m: 157 return 0 158 tzd = m.group('tzd') 159 if not tzd: 160 return 0 161 if tzd == 'Z': 162 return 0 163 hours = int(m.group('tzdhours')) 164 minutes = m.group('tzdminutes') 165 if minutes: 166 minutes = int(minutes) 167 else: 168 minutes = 0 169 offset = (hours*60 + minutes) * 60 170 if tzd[0] == '+': 171 return -offset 172 return offset
173 174 def _parse_date_w3dtf(dateString): 175 m = __datetime_rx.match(dateString) 176 if m is None or m.group() != dateString: 177 return 178 return _extract_date(m) + _extract_time(m) + (0, 0, 0) 179 180 __date_re = (r'(?P<year>\d\d\d\d)' 181 r'(?:(?P<dsep>-|)' 182 r'(?:(?P<julian>\d\d\d)' 183 r'|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?') 184 __tzd_re = r'(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)' 185 # __tzd_rx = re.compile(__tzd_re) 186 __time_re = (r'(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)' 187 r'(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?' 188 + __tzd_re) 189 __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) 190 __datetime_rx = re.compile(__datetime_re) 191 192 return _parse_date_w3dtf 193 194 195 _parse_date_w3dtf = __closure_parse_date_w3dtf() 196 del __closure_parse_date_w3dtf 197 198 199 _monthnames = set([ 200 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 201 'aug', 'sep', 'oct', 'nov', 'dec', 202 'january', 'february', 'march', 'april', 'may', 'june', 'july', 203 'august', 'september', 'october', 'november', 'december']) 204 _daynames = set(['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'])
205 206 207 # Copied from feedparser.py 208 # Universal Feedparser 209 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 210 # Modified to return a tuple instead of mktime 211 -def _parse_date_rfc822(dateString):
212 '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' 213 data = dateString.split() 214 if data[0][-1] in (',', '.') or data[0].lower() in _daynames: 215 del data[0] 216 if len(data) == 4: 217 s = data[3] 218 s = s.split('+', 1) 219 if len(s) == 2: 220 data[3:] = s 221 else: 222 data.append('') 223 dateString = " ".join(data) 224 if len(data) < 5: 225 dateString += ' 00:00:00 GMT' 226 return email.utils.parsedate_tz(dateString)
227
228 # # rfc822.py defines several time zones, but we define some extra ones. 229 # # 'ET' is equivalent to 'EST', etc. 230 # _additional_timezones = {'AT': -400, 'ET': -500, 231 # 'CT': -600, 'MT': -700, 232 # 'PT': -800} 233 # email.utils._timezones.update(_additional_timezones) 234 235 236 -class Calendar:
237 """ 238 A collection of routines to input, parse and manipulate date and times. 239 The text can either be 'normal' date values or it can be human readable. 240 """ 241
242 - def __init__(self, constants=None):
243 """ 244 Default constructor for the L{Calendar} class. 245 246 @type constants: object 247 @param constants: Instance of the class L{Constants} 248 249 @rtype: object 250 @return: L{Calendar} instance 251 """ 252 # if a constants reference is not included, use default 253 if constants is None: 254 self.ptc = Constants() 255 else: 256 self.ptc = constants 257 258 self.weekdyFlag = False # monday/tuesday/... 259 self.dateStdFlag = False # 07/21/06 260 self.dateStrFlag = False # July 21st, 2006 261 self.timeStdFlag = False # 5:50 262 self.meridianFlag = False # am/pm 263 self.dayStrFlag = False # tomorrow/yesterday/today/.. 264 self.timeStrFlag = False # lunch/noon/breakfast/... 265 self.modifierFlag = False # after/before/prev/next/.. 266 self.modifier2Flag = False # after/before/prev/next/.. 267 self.unitsFlag = False # hrs/weeks/yrs/min/.. 268 self.qunitsFlag = False # h/m/t/d.. 269 270 self.timeFlag = 0 271 self.dateFlag = 0
272 273 @contextlib.contextmanager
274 - def _mergeFlags(self):
275 """ 276 Keep old dateFlag and timeFlag in cache and 277 merge them after context executed 278 """ 279 tempDateFlag = self.dateFlag 280 tempTimeFlag = self.timeFlag 281 yield 282 self.dateFlag = tempDateFlag | self.dateFlag 283 self.timeFlag = tempTimeFlag | self.timeFlag
284
285 - def _convertUnitAsWords(self, unitText):
286 """ 287 Converts text units into their number value. 288 289 @type unitText: string 290 @param unitText: number text to convert 291 292 @rtype: integer 293 @return: numerical value of unitText 294 """ 295 word_list, a, b = re.split(r"[,\s-]+", unitText), 0, 0 296 for word in word_list: 297 x = self.ptc.small.get(word) 298 if x is not None: 299 a += x 300 elif word == "hundred": 301 a *= 100 302 else: 303 x = self.ptc.magnitude.get(word) 304 if x is not None: 305 b += a * x 306 a = 0 307 elif word in self.ptc.ignore: 308 pass 309 else: 310 raise Exception("Unknown number: " + word) 311 return a + b
312
313 - def _buildTime(self, source, quantity, modifier, units):
314 """ 315 Take C{quantity}, C{modifier} and C{unit} strings and convert them 316 into values. After converting, calcuate the time and return the 317 adjusted sourceTime. 318 319 @type source: time 320 @param source: time to use as the base (or source) 321 @type quantity: string 322 @param quantity: quantity string 323 @type modifier: string 324 @param modifier: how quantity and units modify the source time 325 @type units: string 326 @param units: unit of the quantity (i.e. hours, days, months, etc) 327 328 @rtype: struct_time 329 @return: C{struct_time} of the calculated time 330 """ 331 debug and log.debug('_buildTime: [%s][%s][%s]', 332 quantity, modifier, units) 333 334 if source is None: 335 source = time.localtime() 336 337 if quantity is None: 338 quantity = '' 339 else: 340 quantity = quantity.strip() 341 342 qty = self._quantityToInt(quantity) 343 344 if modifier in self.ptc.Modifiers: 345 qty = qty * self.ptc.Modifiers[modifier] 346 347 if units is None or units == '': 348 units = 'dy' 349 350 # plurals are handled by regex's (could be a bug tho) 351 352 (yr, mth, dy, hr, mn, sec, _, _, _) = source 353 354 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 355 target = start 356 # realunit = next((key for key, values in self.ptc.units.items() 357 # if any(imap(units.__contains__, values))), None) 358 realunit = units 359 for key, values in self.ptc.units.items(): 360 if units in values: 361 realunit = key 362 break 363 364 debug and log.debug('units %s --> realunit %s', units, realunit) 365 366 if realunit == 'years': 367 target = self.inc(start, year=qty) 368 self.dateFlag = 1 369 elif realunit == 'months': 370 target = self.inc(start, month=qty) 371 self.dateFlag = 1 372 else: 373 if realunit == 'days': 374 target = start + datetime.timedelta(days=qty) 375 self.dateFlag = 1 376 elif realunit == 'hours': 377 target = start + datetime.timedelta(hours=qty) 378 self.timeFlag = 2 379 elif realunit == 'minutes': 380 target = start + datetime.timedelta(minutes=qty) 381 self.timeFlag = 2 382 elif realunit == 'seconds': 383 target = start + datetime.timedelta(seconds=qty) 384 self.timeFlag = 2 385 elif realunit == 'weeks': 386 target = start + datetime.timedelta(weeks=qty) 387 self.dateFlag = 1 388 389 return target.timetuple()
390
391 - def parseDate(self, dateString, sourceTime=None):
392 """ 393 Parse short-form date strings:: 394 395 '05/28/2006' or '04.21' 396 397 @type dateString: string 398 @param dateString: text to convert to a C{datetime} 399 400 @rtype: struct_time 401 @return: calculated C{struct_time} value of dateString 402 """ 403 if sourceTime is None: 404 yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime() 405 else: 406 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 407 408 # values pulled from regex's will be stored here and later 409 # assigned to mth, dy, yr based on information from the locale 410 # -1 is used as the marker value because we want zero values 411 # to be passed thru so they can be flagged as errors later 412 v1 = -1 413 v2 = -1 414 v3 = -1 415 416 s = dateString 417 m = self.ptc.CRE_DATE2.search(s) 418 if m is not None: 419 index = m.start() 420 v1 = int(s[:index]) 421 s = s[index + 1:] 422 423 m = self.ptc.CRE_DATE2.search(s) 424 if m is not None: 425 index = m.start() 426 v2 = int(s[:index]) 427 v3 = int(s[index + 1:]) 428 else: 429 v2 = int(s.strip()) 430 431 v = [v1, v2, v3] 432 d = {'m': mth, 'd': dy, 'y': yr} 433 434 for i in range(0, 3): 435 n = v[i] 436 c = self.ptc.dp_order[i] 437 if n >= 0: 438 d[c] = n 439 440 # if the year is not specified and the date has already 441 # passed, increment the year 442 if v3 == -1 and ((mth > d['m']) or (mth == d['m'] and dy > d['d'])): 443 yr = d['y'] + 1 444 else: 445 yr = d['y'] 446 447 mth = d['m'] 448 dy = d['d'] 449 450 # birthday epoch constraint 451 if yr < self.ptc.BirthdayEpoch: 452 yr += 2000 453 elif yr < 100: 454 yr += 1900 455 456 daysInCurrentMonth = self.ptc.daysInMonth(mth, yr) 457 debug and log.debug('parseDate: %s %s %s %s', 458 yr, mth, dy, daysInCurrentMonth) 459 460 if mth > 0 and mth <= 12 and dy > 0 and \ 461 dy <= daysInCurrentMonth: 462 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 463 else: 464 self.dateFlag = 0 465 self.timeFlag = 0 466 # return current time if date string is invalid 467 sourceTime = time.localtime() 468 469 return sourceTime
470
471 - def parseDateText(self, dateString, sourceTime=None):
472 """ 473 Parse long-form date strings:: 474 475 'May 31st, 2006' 476 'Jan 1st' 477 'July 2006' 478 479 @type dateString: string 480 @param dateString: text to convert to a datetime 481 482 @rtype: struct_time 483 @return: calculated C{struct_time} value of dateString 484 """ 485 if sourceTime is None: 486 yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime() 487 else: 488 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 489 490 currentMth = mth 491 currentDy = dy 492 493 debug and log.debug('parseDateText currentMth %s currentDy %s', 494 mth, dy) 495 496 s = dateString.lower() 497 m = self.ptc.CRE_DATE3.search(s) 498 mth = m.group('mthname') 499 mth = self.ptc.MonthOffsets[mth] 500 501 if m.group('day') is not None: 502 dy = int(m.group('day')) 503 else: 504 dy = 1 505 506 if m.group('year') is not None: 507 yr = int(m.group('year')) 508 509 # birthday epoch constraint 510 if yr < self.ptc.BirthdayEpoch: 511 yr += 2000 512 elif yr < 100: 513 yr += 1900 514 515 elif (mth < currentMth) or (mth == currentMth and dy < currentDy): 516 # if that day and month have already passed in this year, 517 # then increment the year by 1 518 yr += self.ptc.YearParseStyle 519 520 if dy > 0 and dy <= self.ptc.daysInMonth(mth, yr): 521 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 522 else: 523 # Return current time if date string is invalid 524 self.dateFlag = 0 525 self.timeFlag = 0 526 sourceTime = time.localtime() 527 528 debug and log.debug('parseDateText returned dateFlag %d ' 529 'timeFlag %d mth %d dy %d yr %d sourceTime %s', 530 self.dateFlag, self.timeFlag, 531 mth, dy, yr, sourceTime) 532 533 return sourceTime
534
535 - def evalRanges(self, datetimeString, sourceTime=None):
536 """ 537 Evaluate the C{datetimeString} text and determine if 538 it represents a date or time range. 539 540 @type datetimeString: string 541 @param datetimeString: datetime text to evaluate 542 @type sourceTime: struct_time 543 @param sourceTime: C{struct_time} value to use as the base 544 545 @rtype: tuple 546 @return: tuple of: start datetime, end datetime and the invalid flag 547 """ 548 rangeFlag = 0 549 startTime = '' 550 startDate = '' 551 endTime = '' 552 endDate = '' 553 554 s = datetimeString.strip().lower() 555 556 if self.ptc.rangeSep in s: 557 s = s.replace(self.ptc.rangeSep, ' %s ' % self.ptc.rangeSep) 558 s = s.replace(' ', ' ') 559 560 for cre, rflag in [(self.ptc.CRE_TIMERNG1, 1), 561 (self.ptc.CRE_TIMERNG2, 2), 562 (self.ptc.CRE_TIMERNG4, 7), 563 (self.ptc.CRE_TIMERNG3, 3), 564 (self.ptc.CRE_DATERNG1, 4), 565 (self.ptc.CRE_DATERNG2, 5), 566 (self.ptc.CRE_DATERNG3, 6)]: 567 m = cre.search(s) 568 if m is not None: 569 rangeFlag = rflag 570 break 571 572 debug and log.debug('evalRanges: rangeFlag = %s [%s]', rangeFlag, s) 573 574 if m is not None: 575 if (m.group() != s): 576 # capture remaining string 577 parseStr = m.group() 578 chunk1 = s[:m.start()] 579 chunk2 = s[m.end():] 580 s = '%s %s' % (chunk1, chunk2) 581 582 sourceTime, flag = self._parse(s, sourceTime) 583 584 if flag == 0: 585 sourceTime = None 586 else: 587 parseStr = s 588 589 if rangeFlag in (1, 2): 590 m = re.search(self.ptc.rangeSep, parseStr) 591 startTime, sflag = self._parse(parseStr[:m.start()], sourceTime) 592 endTime, eflag = self._parse(parseStr[m.start() + 1:], sourceTime) 593 594 if eflag != 0 and sflag != 0: 595 return startTime, endTime, 2 596 597 elif rangeFlag in (3, 7): 598 m = re.search(self.ptc.rangeSep, parseStr) 599 # capturing the meridian from the end time 600 if self.ptc.usesMeridian: 601 ampm = re.search(self.ptc.am[0], parseStr) 602 603 # appending the meridian to the start time 604 if ampm is not None: 605 startTime, sflag = self._parse( 606 parseStr[:m.start()] + self.ptc.meridian[0], 607 sourceTime) 608 else: 609 startTime, sflag = self._parse( 610 parseStr[:m.start()] + self.ptc.meridian[1], 611 sourceTime) 612 else: 613 startTime, sflag = self._parse( 614 parseStr[:m.start()], sourceTime) 615 616 endTime, eflag = self._parse(parseStr[m.start() + 1:], sourceTime) 617 618 if eflag != 0 and sflag != 0: 619 return (startTime, endTime, 2) 620 621 elif rangeFlag == 4: 622 m = re.search(self.ptc.rangeSep, parseStr) 623 startDate, sflag = self._parse(parseStr[:m.start()], sourceTime) 624 endDate, eflag = self._parse(parseStr[m.start() + 1:], sourceTime) 625 626 if eflag != 0 and sflag != 0: 627 return startDate, endDate, 1 628 629 elif rangeFlag == 5: 630 m = re.search(self.ptc.rangeSep, parseStr) 631 endDate = parseStr[m.start() + 1:] 632 633 # capturing the year from the end date 634 date = self.ptc.CRE_DATE3.search(endDate) 635 endYear = date.group('year') 636 637 # appending the year to the start date if the start date 638 # does not have year information and the end date does. 639 # eg : "Aug 21 - Sep 4, 2007" 640 if endYear is not None: 641 startDate = (parseStr[:m.start()]).strip() 642 date = self.ptc.CRE_DATE3.search(startDate) 643 startYear = date.group('year') 644 645 if startYear is None: 646 startDate = startDate + ', ' + endYear 647 else: 648 startDate = parseStr[:m.start()] 649 650 startDate, sflag = self._parse(startDate, sourceTime) 651 endDate, eflag = self._parse(endDate, sourceTime) 652 653 if eflag != 0 and sflag != 0: 654 return (startDate, endDate, 1) 655 656 elif rangeFlag == 6: 657 m = re.search(self.ptc.rangeSep, parseStr) 658 659 startDate = parseStr[:m.start()] 660 661 # capturing the month from the start date 662 mth = self.ptc.CRE_DATE3.search(startDate) 663 mth = mth.group('mthname') 664 665 # appending the month name to the end date 666 endDate = mth + parseStr[(m.start() + 1):] 667 668 startDate, sflag = self._parse(startDate, sourceTime) 669 endDate, eflag = self._parse(endDate, sourceTime) 670 671 if eflag != 0 and sflag != 0: 672 return (startDate, endDate, 1) 673 else: 674 # if range is not found 675 sourceTime = time.localtime() 676 677 return (sourceTime, sourceTime, 0)
678
679 - def _CalculateDOWDelta(self, wd, wkdy, offset, style, currentDayStyle):
680 """ 681 Based on the C{style} and C{currentDayStyle} determine what 682 day-of-week value is to be returned. 683 684 @type wd: integer 685 @param wd: day-of-week value for the current day 686 @type wkdy: integer 687 @param wkdy: day-of-week value for the parsed day 688 @type offset: integer 689 @param offset: offset direction for any modifiers (-1, 0, 1) 690 @type style: integer 691 @param style: normally the value 692 set in C{Constants.DOWParseStyle} 693 @type currentDayStyle: integer 694 @param currentDayStyle: normally the value 695 set in C{Constants.CurrentDOWParseStyle} 696 697 @rtype: integer 698 @return: calculated day-of-week 699 """ 700 diffBase = wkdy - wd 701 origOffset = offset 702 703 if offset == 2: 704 # no modifier is present. 705 # i.e. string to be parsed is just DOW 706 if wkdy * style > wd * style or \ 707 currentDayStyle and wkdy == wd: 708 # wkdy located in current week 709 offset = 0 710 elif style in (-1, 1): 711 # wkdy located in last (-1) or next (1) week 712 offset = style 713 else: 714 # invalid style, or should raise error? 715 offset = 0 716 717 # offset = -1 means last week 718 # offset = 0 means current week 719 # offset = 1 means next week 720 diff = diffBase + 7 * offset 721 if style == 1 and diff < -7: 722 diff += 7 723 elif style == -1 and diff > 7: 724 diff -= 7 725 726 debug and log.debug("wd %s, wkdy %s, offset %d, " 727 "style %d, currentDayStyle %d", 728 wd, wkdy, origOffset, style, currentDayStyle) 729 730 return diff
731
732 - def _quantityToInt(self, quantity):
733 """ 734 Convert a quantity, either spelled-out or numeric, to an integer 735 736 @type quantity: string 737 @param quantity: quantity to parse to int 738 @rtype: int 739 @return: the quantity as an integer, defaulting to 0 740 """ 741 if not quantity: 742 return 1 743 744 try: 745 return int(quantity) 746 except ValueError: 747 pass 748 749 try: 750 return self.ptc.numbers[quantity] 751 except KeyError: 752 pass 753 754 return 0
755
756 - def _evalModifier(self, modifier, chunk1, chunk2, sourceTime):
757 """ 758 Evaluate the C{modifier} string and following text (passed in 759 as C{chunk1} and C{chunk2}) and if they match any known modifiers 760 calculate the delta and apply it to C{sourceTime}. 761 762 @type modifier: string 763 @param modifier: modifier text to apply to sourceTime 764 @type chunk1: string 765 @param chunk1: first text chunk that followed modifier (if any) 766 @type chunk2: string 767 @param chunk2: second text chunk that followed modifier (if any) 768 @type sourceTime: struct_time 769 @param sourceTime: C{struct_time} value to use as the base 770 771 @rtype: tuple 772 @return: tuple of: remaining text and the modified sourceTime 773 """ 774 775 offset = self.ptc.Modifiers[modifier] 776 777 if sourceTime is not None: 778 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 779 else: 780 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() 781 782 if self.ptc.StartTimeFromSourceTime: 783 startHour = hr 784 startMinute = mn 785 startSecond = sec 786 else: 787 startHour = 9 788 startMinute = 0 789 startSecond = 0 790 791 # capture the units after the modifier and the remaining 792 # string after the unit 793 m = self.ptc.CRE_REMAINING.search(chunk2) 794 if m is not None: 795 index = m.start() + 1 796 unit = chunk2[:m.start()] 797 chunk2 = chunk2[index:] 798 else: 799 unit = chunk2 800 chunk2 = '' 801 802 flag = False 803 804 debug and log.debug("modifier [%s] chunk1 [%s] " 805 "chunk2 [%s] unit [%s] flag %s", 806 modifier, chunk1, chunk2, unit, flag) 807 808 if unit in self.ptc.units['months']: 809 currentDaysInMonth = self.ptc.daysInMonth(mth, yr) 810 if offset == 0: 811 dy = currentDaysInMonth 812 sourceTime = (yr, mth, dy, startHour, startMinute, 813 startSecond, wd, yd, isdst) 814 elif offset == 2: 815 # if day is the last day of the month, calculate the last day 816 # of the next month 817 if dy == currentDaysInMonth: 818 dy = self.ptc.daysInMonth(mth + 1, yr) 819 820 start = datetime.datetime(yr, mth, dy, startHour, 821 startMinute, startSecond) 822 target = self.inc(start, month=1) 823 sourceTime = target.timetuple() 824 else: 825 start = datetime.datetime(yr, mth, 1, startHour, 826 startMinute, startSecond) 827 target = self.inc(start, month=offset) 828 sourceTime = target.timetuple() 829 830 flag = True 831 self.dateFlag = 1 832 833 if unit in self.ptc.units['weeks']: 834 if offset == 0: 835 start = datetime.datetime(yr, mth, dy, 17, 0, 0) 836 target = start + datetime.timedelta(days=(4 - wd)) 837 sourceTime = target.timetuple() 838 elif offset == 2: 839 start = datetime.datetime(yr, mth, dy, startHour, 840 startMinute, startSecond) 841 target = start + datetime.timedelta(days=7) 842 sourceTime = target.timetuple() 843 else: 844 start = datetime.datetime(yr, mth, dy, startHour, 845 startMinute, startSecond) 846 target = start + offset * datetime.timedelta(weeks=1) 847 sourceTime = target.timetuple() 848 849 flag = True 850 self.dateFlag = 1 851 852 if unit in self.ptc.units['days']: 853 if offset == 0: 854 sourceTime = (yr, mth, dy, 17, 0, 0, wd, yd, isdst) 855 self.timeFlag = 2 856 elif offset == 2: 857 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 858 target = start + datetime.timedelta(days=1) 859 sourceTime = target.timetuple() 860 else: 861 start = datetime.datetime(yr, mth, dy, startHour, 862 startMinute, startSecond) 863 target = start + datetime.timedelta(days=offset) 864 sourceTime = target.timetuple() 865 866 flag = True 867 self.dateFlag = 1 868 869 if unit in self.ptc.units['hours']: 870 if offset == 0: 871 sourceTime = (yr, mth, dy, hr, 0, 0, wd, yd, isdst) 872 else: 873 start = datetime.datetime(yr, mth, dy, hr, 0, 0) 874 target = start + datetime.timedelta(hours=offset) 875 sourceTime = target.timetuple() 876 877 flag = True 878 self.timeFlag = 2 879 880 if unit in self.ptc.units['years']: 881 if offset == 0: 882 sourceTime = (yr, 12, 31, hr, mn, sec, wd, yd, isdst) 883 elif offset == 2: 884 sourceTime = (yr + 1, mth, dy, hr, mn, sec, wd, yd, isdst) 885 else: 886 sourceTime = (yr + offset, 1, 1, startHour, startMinute, 887 startSecond, wd, yd, isdst) 888 889 flag = True 890 self.dateFlag = 1 891 892 if not flag: 893 if modifier == 'eom': 894 self.modifierFlag = False 895 dy = self.ptc.daysInMonth(mth, yr) 896 sourceTime = (yr, mth, dy, startHour, startMinute, 897 startSecond, wd, yd, isdst) 898 self.dateFlag = 2 899 flag = True 900 elif modifier == 'eoy': 901 self.modifierFlag = False 902 mth = 12 903 dy = self.ptc.daysInMonth(mth, yr) 904 sourceTime = (yr, mth, dy, startHour, startMinute, 905 startSecond, wd, yd, isdst) 906 self.dateFlag = 2 907 flag = True 908 909 if not flag: 910 m = self.ptc.CRE_WEEKDAY.match(unit) 911 if m is not None: 912 debug and log.debug('CRE_WEEKDAY matched') 913 wkdy = m.group() 914 self.dateFlag = 1 915 916 if modifier == 'eod': 917 # Calculate the upcoming weekday 918 self.modifierFlag = False 919 sourceTime, _ = self._parse(wkdy, sourceTime) 920 self.timeFlag = 2 921 sTime = self.ptc.getSource(modifier, sourceTime) 922 if sTime is not None: 923 sourceTime = sTime 924 else: 925 wkdy = self.ptc.WeekdayOffsets[wkdy] 926 diff = self._CalculateDOWDelta( 927 wd, wkdy, offset, self.ptc.DOWParseStyle, 928 self.ptc.CurrentDOWParseStyle) 929 start = datetime.datetime(yr, mth, dy, startHour, 930 startMinute, startSecond) 931 target = start + datetime.timedelta(days=diff) 932 sourceTime = target.timetuple() 933 934 flag = True 935 self.dateFlag = 1 936 937 if not flag: 938 m = self.ptc.CRE_TIME.match(unit) 939 if m is not None: 940 debug and log.debug('CRE_TIME matched') 941 self.modifierFlag = False 942 (yr, mth, dy, hr, mn, sec, wd, yd, isdst), _ = \ 943 self._parse(unit, None) 944 945 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 946 target = start + datetime.timedelta(days=offset) 947 sourceTime = target.timetuple() 948 flag = True 949 else: 950 # check if the remaining text is parsable and if so, 951 # use it as the base time for the modifier source time 952 self.modifierFlag = False 953 954 debug and log.debug('check for modifications ' 955 'to source time [%s] [%s]', 956 chunk1, unit) 957 958 unit = unit.strip() 959 if unit: 960 with self._mergeFlags(): 961 s = '%s %s' % (unit, chunk2) 962 t, flag2 = self._parse(s, sourceTime) 963 964 if flag2 == 1: # working with dates 965 u = unit.lower() 966 if u in self.ptc.Months or u in self.ptc.shortMonths: 967 yr, mth, dy, hr, mn, sec, wd, yd, isdst = t 968 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 969 t = self.inc(start, year=offset).timetuple() 970 elif u in self.ptc.Weekdays: 971 t = t + datetime.timedelta(weeks=offset) 972 973 debug and log.debug('flag2 = %s t = %s', flag2, t) 974 if flag2 != 0: 975 sourceTime = t 976 chunk2 = '' 977 978 chunk1 = chunk1.strip() 979 980 if chunk1: 981 try: 982 m = list(self.ptc.CRE_NUMBER.finditer(chunk1))[-1] 983 except IndexError: 984 pass 985 else: 986 qty = None 987 debug and log.debug('CRE_NUMBER matched') 988 qty = self._quantityToInt(m.group()) * offset 989 chunk1 = '%s%s%s' % (chunk1[:m.start()], 990 qty, chunk1[m.end():]) 991 with self._mergeFlags(): 992 t, flag3 = self._parse(chunk1, sourceTime) 993 994 chunk1 = '' 995 996 debug and log.debug('flag3 = %s t = %s', flag3, t) 997 if flag3 != 0: 998 sourceTime = t 999 1000 flag = True 1001 debug and log.debug('looking for modifier %s', modifier) 1002 sTime = self.ptc.getSource(modifier, sourceTime) 1003 if sTime is not None: 1004 debug and log.debug('modifier found in sources') 1005 sourceTime = sTime 1006 flag = True 1007 self.timeFlag = 2 1008 1009 # if the word after next is a number, the string is more than likely 1010 # to be "next 4 hrs" which we will have to combine the units with the 1011 # rest of the string 1012 if not flag: 1013 if offset < 0: 1014 # if offset is negative, the unit has to be made negative 1015 unit = '-%s' % unit 1016 1017 chunk2 = '%s %s' % (unit, chunk2) 1018 1019 self.modifierFlag = False 1020 1021 debug and log.debug('returning chunk = "%s %s" and sourceTime = %s', 1022 chunk1, chunk2, sourceTime) 1023 1024 return '%s %s' % (chunk1, chunk2), sourceTime
1025
1026 - def _evalString(self, datetimeString, sourceTime=None):
1027 """ 1028 Calculate the datetime based on flags set by the L{parse()} routine 1029 1030 Examples handled:: 1031 RFC822, W3CDTF formatted dates 1032 HH:MM[:SS][ am/pm] 1033 MM/DD/YYYY 1034 DD MMMM YYYY 1035 1036 @type datetimeString: string 1037 @param datetimeString: text to try and parse as more "traditional" 1038 date/time text 1039 @type sourceTime: struct_time 1040 @param sourceTime: C{struct_time} value to use as the base 1041 1042 @rtype: datetime 1043 @return: calculated C{struct_time} value or current C{struct_time} 1044 if not parsed 1045 """ 1046 s = datetimeString.strip() 1047 now = sourceTime or time.localtime() 1048 1049 debug and log.debug('_evalString(%s, %s)', datetimeString, sourceTime) 1050 1051 # Given string date is a RFC822 date 1052 if sourceTime is None: 1053 sourceTime = _parse_date_rfc822(s) 1054 debug and log.debug( 1055 'attempt to parse as rfc822 - %s', str(sourceTime)) 1056 1057 if sourceTime is not None: 1058 (yr, mth, dy, hr, mn, sec, wd, yd, isdst, _) = sourceTime 1059 self.dateFlag = 1 1060 1061 if (hr != 0) and (mn != 0) and (sec != 0): 1062 self.timeFlag = 2 1063 1064 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1065 1066 # Given string date is a W3CDTF date 1067 if sourceTime is None: 1068 sourceTime = _parse_date_w3dtf(s) 1069 1070 if sourceTime is not None: 1071 self.dateFlag = 1 1072 self.timeFlag = 2 1073 1074 if sourceTime is None: 1075 s = s.lower() 1076 1077 # Given string is in the format HH:MM(:SS)(am/pm) 1078 if self.meridianFlag: 1079 if sourceTime is None: 1080 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now 1081 else: 1082 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1083 1084 m = self.ptc.CRE_TIMEHMS2.search(s) 1085 if m is not None: 1086 dt = s[:m.start('meridian')].strip() 1087 if len(dt) <= 2: 1088 hr = int(dt) 1089 mn = 0 1090 sec = 0 1091 else: 1092 hr, mn, sec = _extract_time(m) 1093 1094 if hr == 24: 1095 hr = 0 1096 1097 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1098 meridian = m.group('meridian').lower() 1099 1100 # if 'am' found and hour is 12 - force hour to 0 (midnight) 1101 if (meridian in self.ptc.am) and hr == 12: 1102 sourceTime = (yr, mth, dy, 0, mn, sec, wd, yd, isdst) 1103 1104 # if 'pm' found and hour < 12, add 12 to shift to evening 1105 if (meridian in self.ptc.pm) and hr < 12: 1106 sourceTime = (yr, mth, dy, hr + 12, mn, sec, wd, yd, isdst) 1107 1108 # invalid time 1109 if hr > 24 or mn > 59 or sec > 59: 1110 sourceTime = now 1111 self.dateFlag = 0 1112 self.timeFlag = 0 1113 1114 self.meridianFlag = False 1115 1116 # Given string is in the format HH:MM(:SS) 1117 if self.timeStdFlag: 1118 if sourceTime is None: 1119 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now 1120 else: 1121 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1122 1123 m = self.ptc.CRE_TIMEHMS.search(s) 1124 if m is not None: 1125 hr, mn, sec = _extract_time(m) 1126 if hr == 24: 1127 hr = 0 1128 1129 if hr > 24 or mn > 59 or sec > 59: 1130 # invalid time 1131 sourceTime = now 1132 self.dateFlag = 0 1133 self.timeFlag = 0 1134 else: 1135 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1136 1137 self.timeStdFlag = False 1138 1139 # Given string is in the format 07/21/2006 1140 if self.dateStdFlag: 1141 sourceTime = self.parseDate(s, sourceTime) 1142 self.dateStdFlag = False 1143 1144 # Given string is in the format "May 23rd, 2005" 1145 if self.dateStrFlag: 1146 debug and log.debug('checking for MMM DD YYYY') 1147 sourceTime = self.parseDateText(s, sourceTime) 1148 debug and log.debug('parseDateText(%s) returned %s', s, sourceTime) 1149 self.dateStrFlag = False 1150 1151 # Given string is a weekday 1152 if self.weekdyFlag: 1153 debug and log.debug('weekdyFlag is set') 1154 if sourceTime is None: 1155 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = now 1156 else: 1157 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1158 1159 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 1160 wkdy = self.ptc.WeekdayOffsets[s] 1161 1162 if wkdy > wd: 1163 qty = self._CalculateDOWDelta(wd, wkdy, 2, 1164 self.ptc.DOWParseStyle, 1165 self.ptc.CurrentDOWParseStyle) 1166 else: 1167 qty = self._CalculateDOWDelta(wd, wkdy, 2, 1168 self.ptc.DOWParseStyle, 1169 self.ptc.CurrentDOWParseStyle) 1170 1171 target = start + datetime.timedelta(days=qty) 1172 wd = wkdy 1173 1174 sourceTime = target.timetuple() 1175 self.weekdyFlag = False 1176 1177 # Given string is a natural language time string like 1178 # lunch, midnight, etc 1179 if self.timeStrFlag: 1180 debug and log.debug('timeStrFlag is set') 1181 if s in self.ptc.re_values['now']: 1182 sourceTime = now 1183 else: 1184 sTime = self.ptc.getSource(s, sourceTime) 1185 if sTime is None: 1186 sourceTime = now 1187 self.dateFlag = 0 1188 self.timeFlag = 0 1189 else: 1190 sourceTime = sTime 1191 1192 self.timeStrFlag = False 1193 1194 # Given string is a natural language date string like today, tomorrow.. 1195 if self.dayStrFlag: 1196 debug and log.debug('dayStrFlag is set') 1197 if sourceTime is None: 1198 sourceTime = now 1199 1200 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1201 1202 try: 1203 offset = self.ptc.dayOffsets[s] 1204 except KeyError: 1205 offset = 0 1206 1207 if self.ptc.StartTimeFromSourceTime: 1208 startHour = hr 1209 startMinute = mn 1210 startSecond = sec 1211 else: 1212 startHour = 9 1213 startMinute = 0 1214 startSecond = 0 1215 1216 start = datetime.datetime(yr, mth, dy, startHour, 1217 startMinute, startSecond) 1218 target = start + datetime.timedelta(days=offset) 1219 sourceTime = target.timetuple() 1220 1221 self.dayStrFlag = False 1222 1223 # Given string is a time string with units like "5 hrs 30 min" 1224 if self.unitsFlag: 1225 debug and log.debug('unitsFlag is set') 1226 modifier = '' # TODO 1227 1228 if sourceTime is None: 1229 sourceTime = now 1230 1231 m = self.ptc.CRE_UNITS.search(s) 1232 if m is not None: 1233 units = m.group('units') 1234 quantity = s[:m.start('units')] 1235 1236 sourceTime = self._buildTime(sourceTime, quantity, modifier, units) 1237 self.unitsFlag = False 1238 1239 # Given string is a time string with single char units like "5 h 30 m" 1240 if self.qunitsFlag: 1241 debug and log.debug('qunitsFlag is set') 1242 modifier = '' # TODO 1243 1244 if sourceTime is None: 1245 sourceTime = now 1246 1247 m = self.ptc.CRE_QUNITS.search(s) 1248 if m is not None: 1249 units = m.group('qunits') 1250 quantity = s[:m.start('qunits')] 1251 1252 sourceTime = self._buildTime(sourceTime, quantity, modifier, units) 1253 self.qunitsFlag = False 1254 1255 # Given string does not match anything 1256 if sourceTime is None: 1257 debug and log.debug('sourceTime is None - setting to current date') 1258 sourceTime = now 1259 self.dateFlag = 0 1260 self.timeFlag = 0 1261 1262 return sourceTime
1263
1264 - def _UnitsTrapped(self, s, m, key):
1265 # check if a day suffix got trapped by a unit match 1266 # for example Dec 31st would match for 31s (aka 31 seconds) 1267 # Dec 31st 1268 # ^ ^ 1269 # | +-- m.start('units') 1270 # | and also m2.start('suffix') 1271 # +---- m.start('qty') 1272 # and also m2.start('day') 1273 m2 = self.ptc.CRE_DAY2.search(s) 1274 if m2 is not None: 1275 t = '%s%s' % (m2.group('day'), m.group(key)) 1276 if m.start(key) == m2.start('suffix') and \ 1277 m.start('qty') == m2.start('day') and \ 1278 m.group('qty') == t: 1279 return True 1280 else: 1281 return False 1282 else: 1283 return False
1284
1285 - def parseDT(self, datetimeString, sourceTime=None, tzinfo=None):
1286 """ 1287 C{datetimeString} is as C{.parse}, C{sourceTime} has the same semantic 1288 meaning as C{.parse}, but now also accepts datetime objects. C{tzinfo} 1289 accepts a tzinfo object. It is advisable to use pytz. 1290 1291 1292 @type datetimeString: string 1293 @param datetimeString: date/time text to evaluate 1294 @type sourceTime: struct_time, datetime, date, time 1295 @param sourceTime: time value to use as the base 1296 @type tzinfo: tzinfo 1297 @param tzinfo: Timezone to apply to generated datetime objs. 1298 1299 @rtype: tuple 1300 @return: tuple of datetime object and an int of the return code 1301 1302 see .parse for return code details. 1303 """ 1304 # if sourceTime has a timetuple method, use thet, else, just pass the 1305 # entire thing to parse and prey the user knows what the hell they are 1306 # doing. 1307 sourceTime = getattr(sourceTime, 'timetuple', (lambda: sourceTime))() 1308 # You REALLY SHOULD be using pytz. Using localize if available, 1309 # hacking if not. Note, None is a valid tzinfo object in the case of 1310 # the ugly hack. 1311 localize = getattr( 1312 tzinfo, 1313 'localize', 1314 (lambda dt: dt.replace(tzinfo=tzinfo)), # ugly hack is ugly :( 1315 ) 1316 1317 # Punt 1318 time_struct, ret_code = self.parse( 1319 datetimeString, 1320 sourceTime=sourceTime 1321 ) 1322 1323 # Comments from GHI indicate that it is desired to have the same return 1324 # signature on this method as that one it punts to, with the exception 1325 # of using datetime objects instead of time_structs. 1326 dt = localize(datetime.datetime(*time_struct[:6])) 1327 return (dt, ret_code)
1328
1329 - def parse(self, datetimeString, sourceTime=None):
1330 """ 1331 Splits the given C{datetimeString} into tokens, finds the regex 1332 patterns that match and then calculates a C{struct_time} value from 1333 the chunks. 1334 1335 If C{sourceTime} is given then the C{struct_time} value will be 1336 calculated from that value, otherwise from the current date/time. 1337 1338 If the C{datetimeString} is parsed and date/time value found then 1339 the second item of the returned tuple will be a flag to let you know 1340 what kind of C{struct_time} value is being returned:: 1341 1342 0 = not parsed at all 1343 1 = parsed as a C{date} 1344 2 = parsed as a C{time} 1345 3 = parsed as a C{datetime} 1346 1347 @type datetimeString: string 1348 @param datetimeString: date/time text to evaluate 1349 @type sourceTime: struct_time 1350 @param sourceTime: C{struct_time} value to use as the base 1351 1352 @rtype: tuple 1353 @return: tuple of: modified C{sourceTime} and the result flag 1354 """ 1355 debug and log.debug('parse()') 1356 1357 datetimeString = re.sub(r'(\w)\.(\s)', r'\1\2', datetimeString) 1358 datetimeString = re.sub(r'(\w)[\'"](\s|$)', r'\1 \2', datetimeString) 1359 datetimeString = re.sub(r'(\s|^)[\'"](\w)', r'\1 \2', datetimeString) 1360 1361 if sourceTime: 1362 if isinstance(sourceTime, datetime.datetime): 1363 debug and log.debug('coercing datetime to timetuple') 1364 sourceTime = sourceTime.timetuple() 1365 else: 1366 if not isinstance(sourceTime, time.struct_time) and \ 1367 not isinstance(sourceTime, tuple): 1368 raise Exception('sourceTime is not a struct_time') 1369 1370 return self._parse(datetimeString.lower(), sourceTime)
1371
1372 - def _parse(self, datetimeString, sourceTime):
1373 """Internal method for C{.parse} 1374 1375 Please do NOT call this method directly! 1376 You should call C{.parse} instead! 1377 """ 1378 s = datetimeString.strip() 1379 parseStr = '' 1380 totalTime = sourceTime 1381 1382 if s == '': 1383 if sourceTime is not None: 1384 return sourceTime, self.dateFlag + self.timeFlag 1385 else: 1386 return time.localtime(), 0 1387 1388 self.timeFlag = 0 1389 self.dateFlag = 0 1390 1391 while s: 1392 flag = False 1393 chunk1 = '' 1394 chunk2 = '' 1395 1396 debug and log.debug('parse (top of loop): [%s][%s]', s, parseStr) 1397 1398 if parseStr == '': 1399 # Modifier like next/prev/from/after/prior.. 1400 m = self.ptc.CRE_MODIFIER.search(s) 1401 if m is not None: 1402 self.modifierFlag = True 1403 if m.group() != s: 1404 # capture remaining string 1405 parseStr = m.group() 1406 chunk1 = s[:m.start()].strip() 1407 chunk2 = s[m.end():].strip() 1408 flag = True 1409 else: 1410 parseStr = s 1411 1412 debug and log.debug('parse (modifier) [%s][%s][%s]', 1413 parseStr, chunk1, chunk2) 1414 1415 if parseStr == '': 1416 # Quantity + Units 1417 m = self.ptc.CRE_UNITS.search(s) 1418 if m is not None: 1419 debug and log.debug('CRE_UNITS matched') 1420 if self._UnitsTrapped(s, m, 'units'): 1421 debug and log.debug('day suffix trapped by unit match') 1422 else: 1423 self.unitsFlag = True 1424 if (m.group('qty') != s): 1425 # capture remaining string 1426 parseStr = m.group('qty') 1427 chunk1 = s[:m.start('qty')].strip() 1428 chunk2 = s[m.end('qty'):].strip() 1429 1430 if chunk1[-1:] == '-': 1431 parseStr = '-%s' % parseStr 1432 chunk1 = chunk1[:-1] 1433 1434 s = '%s %s' % (chunk1, chunk2) 1435 flag = True 1436 else: 1437 parseStr = s 1438 1439 debug and log.debug( 1440 'parse (units) [%s][%s][%s]', parseStr, chunk1, chunk2) 1441 1442 if parseStr == '': 1443 # Quantity + Units 1444 m = self.ptc.CRE_QUNITS.search(s) 1445 if m is not None: 1446 debug and log.debug('CRE_QUNITS matched') 1447 if self._UnitsTrapped(s, m, 'qunits'): 1448 debug and log.debug( 1449 'day suffix trapped by qunit match') 1450 else: 1451 self.qunitsFlag = True 1452 1453 if (m.group('qty') != s): 1454 # capture remaining string 1455 parseStr = m.group('qty') 1456 chunk1 = s[:m.start('qty')].strip() 1457 chunk2 = s[m.end('qty'):].strip() 1458 1459 if chunk1[-1:] == '-': 1460 parseStr = '-%s' % parseStr 1461 chunk1 = chunk1[:-1] 1462 1463 s = '%s %s' % (chunk1, chunk2) 1464 flag = True 1465 else: 1466 parseStr = s 1467 1468 debug and log.debug( 1469 'parse (qunits) [%s][%s][%s]', parseStr, chunk1, chunk2) 1470 1471 if parseStr == '': 1472 m = self.ptc.CRE_DATE3.search(s) 1473 # NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW 1474 # for match in self.ptc.CRE_DATE3.finditer(s): 1475 # # to prevent "HH:MM(:SS) time strings" expressions from 1476 # # triggering this regex, we checks if the month field 1477 # # exists in the searched expression, if it doesn't exist, 1478 # # the date field is not valid 1479 # if match.group('mthname'): 1480 # m = self.ptc.CRE_DATE3.search(s, match.start()) 1481 # valid_date = True 1482 # break 1483 1484 # String date format 1485 if m is not None: 1486 self.dateStrFlag = True 1487 self.dateFlag = 1 1488 1489 if (m.group('date') != s): 1490 # capture remaining string 1491 mStart = m.start('date') 1492 mEnd = m.end('date') 1493 parseStr = m.group('date') 1494 chunk1 = s[:mStart] 1495 chunk2 = s[mEnd:] 1496 1497 # we need to check that anything following the parsed 1498 # date is a time expression because it is often picked 1499 # up as a valid year if the hour is 2 digits 1500 fTime = False 1501 mm = self.ptc.CRE_TIMEHMS2.search(s) 1502 # "February 24th 1PM" doesn't get caught 1503 # "February 24th 12PM" does 1504 if mm is not None and m.group('year') is not None: 1505 fTime = True 1506 else: 1507 # "February 24th 12:00" 1508 mm = self.ptc.CRE_TIMEHMS.search(s) 1509 if mm is not None and m.group('year') is None: 1510 fTime = True 1511 if fTime: 1512 n = mm.end('hours') - mm.start('hours') 1513 sEnd = parseStr[-n:] 1514 sStart = mm.group('hours') 1515 1516 if sStart == sEnd: 1517 parseStr = parseStr[:mEnd - n].strip() 1518 chunk2 = s[mEnd - n:] 1519 1520 s = '%s %s' % (chunk1, chunk2) 1521 flag = True 1522 else: 1523 parseStr = s 1524 1525 debug and log.debug( 1526 'parse (date3) [%s][%s][%s]', parseStr, chunk1, chunk2) 1527 1528 if parseStr == '': 1529 # Standard date format 1530 m = self.ptc.CRE_DATE.search(s) 1531 if m is not None: 1532 self.dateStdFlag = True 1533 self.dateFlag = 1 1534 if (m.group('date') != s): 1535 # capture remaining string 1536 parseStr = m.group('date') 1537 chunk1 = s[:m.start('date')] 1538 chunk2 = s[m.end('date'):] 1539 s = '%s %s' % (chunk1, chunk2) 1540 flag = True 1541 else: 1542 parseStr = s 1543 1544 debug and log.debug( 1545 'parse (date) [%s][%s][%s]', parseStr, chunk1, chunk2) 1546 1547 if parseStr == '': 1548 # Natural language day strings 1549 m = self.ptc.CRE_DAY.search(s) 1550 if m is not None: 1551 self.dayStrFlag = True 1552 self.dateFlag = 1 1553 if (m.group() != s): 1554 # capture remaining string 1555 parseStr = m.group() 1556 chunk1 = s[:m.start()] 1557 chunk2 = s[m.end():] 1558 s = '%s %s' % (chunk1, chunk2) 1559 flag = True 1560 else: 1561 parseStr = s 1562 1563 debug and log.debug( 1564 'parse (day) [%s][%s][%s]', parseStr, chunk1, chunk2) 1565 1566 if parseStr == '': 1567 # Weekday 1568 m = self.ptc.CRE_WEEKDAY.search(s) 1569 if m is not None: 1570 gv = m.group() 1571 if s not in self.ptc.dayOffsets: 1572 self.weekdyFlag = True 1573 self.dateFlag = 1 1574 if (gv != s): 1575 # capture remaining string 1576 parseStr = gv 1577 chunk1 = s[:m.start()] 1578 chunk2 = s[m.end():] 1579 s = '%s %s' % (chunk1, chunk2) 1580 flag = True 1581 else: 1582 parseStr = s 1583 1584 debug and log.debug( 1585 'parse (weekday) [%s][%s][%s]', parseStr, chunk1, chunk2) 1586 1587 if parseStr == '': 1588 # Natural language time strings 1589 m = self.ptc.CRE_TIME.search(s) 1590 if m is not None or s in self.ptc.re_values['now']: 1591 self.timeStrFlag = True 1592 self.timeFlag = 2 1593 if (m and m.group() != s): 1594 # capture remaining string 1595 parseStr = m.group() 1596 chunk1 = s[:m.start()] 1597 chunk2 = s[m.end():] 1598 s = '%s %s' % (chunk1, chunk2) 1599 flag = True 1600 else: 1601 parseStr = s 1602 1603 debug and log.debug( 1604 'parse (time) [%s][%s][%s]', parseStr, chunk1, chunk2) 1605 1606 if parseStr == '': 1607 # HH:MM(:SS) am/pm time strings 1608 m = self.ptc.CRE_TIMEHMS2.search(s) 1609 if m is not None: 1610 self.meridianFlag = True 1611 self.timeFlag = 2 1612 if m.group('minutes') is not None: 1613 if m.group('seconds') is not None: 1614 parseStr = '%s:%s:%s %s' % (m.group('hours'), 1615 m.group('minutes'), 1616 m.group('seconds'), 1617 m.group('meridian')) 1618 else: 1619 parseStr = '%s:%s %s' % (m.group('hours'), 1620 m.group('minutes'), 1621 m.group('meridian')) 1622 else: 1623 parseStr = '%s %s' % (m.group('hours'), 1624 m.group('meridian')) 1625 1626 chunk1 = s[:m.start('hours')] 1627 chunk2 = s[m.end('meridian'):] 1628 1629 s = '%s %s' % (chunk1, chunk2) 1630 flag = True 1631 1632 debug and log.debug('parse (meridian) [%s][%s][%s]', 1633 parseStr, chunk1, chunk2) 1634 1635 if parseStr == '': 1636 # HH:MM(:SS) time strings 1637 m = self.ptc.CRE_TIMEHMS.search(s) 1638 if m is not None: 1639 self.timeStdFlag = True 1640 self.timeFlag = 2 1641 if m.group('seconds') is not None: 1642 parseStr = '%s:%s:%s' % (m.group('hours'), 1643 m.group('minutes'), 1644 m.group('seconds')) 1645 chunk1 = s[:m.start('hours')] 1646 chunk2 = s[m.end('seconds'):] 1647 else: 1648 parseStr = '%s:%s' % (m.group('hours'), 1649 m.group('minutes')) 1650 chunk1 = s[:m.start('hours')] 1651 chunk2 = s[m.end('minutes'):] 1652 1653 s = '%s %s' % (chunk1, chunk2) 1654 flag = True 1655 1656 debug and log.debug( 1657 'parse (hms) [%s][%s][%s]', parseStr, chunk1, chunk2) 1658 1659 # if string does not match any regex, empty string to 1660 # come out of the while loop 1661 if not flag: 1662 s = '' 1663 1664 debug and log.debug('dateFlag %s, timeFlag %s', 1665 self.dateFlag, self.timeFlag) 1666 debug and log.debug('parse (bottom) [%s][%s][%s][%s]', 1667 s, parseStr, chunk1, chunk2) 1668 debug and log.debug('weekday %s, dateStd %s, dateStr %s, ' 1669 'time %s, timeStr %s, meridian %s', 1670 self.weekdyFlag, self.dateStdFlag, 1671 self.dateStrFlag, self.timeStdFlag, 1672 self.timeStrFlag, self.meridianFlag) 1673 debug and log.debug('dayStr %s, modifier %s, units %s, qunits %s', 1674 self.dayStrFlag, self.modifierFlag, 1675 self.unitsFlag, self.qunitsFlag) 1676 1677 # evaluate the matched string 1678 1679 if parseStr != '': 1680 if self.modifierFlag is True: 1681 t, totalTime = self._evalModifier(parseStr, chunk1, 1682 chunk2, totalTime) 1683 # t is the unparsed part of the chunks. 1684 # If it is not date/time, return current 1685 # totalTime as it is; else return the output 1686 # after parsing t. 1687 if (t != '') and (t is not None): 1688 with self._mergeFlags(): 1689 totalTime2, flag = self._parse(t, totalTime) 1690 1691 if flag == 0 and totalTime is not None: 1692 return (totalTime, self.dateFlag + self.timeFlag) 1693 else: 1694 return (totalTime2, self.dateFlag + self.timeFlag) 1695 1696 else: 1697 totalTime = self._evalString(parseStr, totalTime) 1698 parseStr = '' 1699 1700 # String is not parsed at all 1701 if totalTime is None: 1702 debug and log.debug('not parsed [%s]', str(totalTime)) 1703 totalTime = time.localtime() 1704 self.dateFlag = 0 1705 self.timeFlag = 0 1706 debug and log.debug( 1707 'parse() return dateFlag %d timeFlag %d totalTime %s', 1708 self.dateFlag, self.timeFlag, totalTime) 1709 return totalTime, self.dateFlag + self.timeFlag
1710
1711 - def inc(self, source, month=None, year=None):
1712 """ 1713 Takes the given C{source} date, or current date if none is 1714 passed, and increments it according to the values passed in 1715 by month and/or year. 1716 1717 This routine is needed because Python's C{timedelta()} function 1718 does not allow for month or year increments. 1719 1720 @type source: struct_time 1721 @param source: C{struct_time} value to increment 1722 @type month: integer 1723 @param month: optional number of months to increment 1724 @type year: integer 1725 @param year: optional number of years to increment 1726 1727 @rtype: datetime 1728 @return: C{source} incremented by the number of months and/or years 1729 """ 1730 yr = source.year 1731 mth = source.month 1732 dy = source.day 1733 1734 if year: 1735 try: 1736 yi = int(year) 1737 except ValueError: 1738 yi = 0 1739 1740 yr += yi 1741 1742 if month: 1743 try: 1744 mi = int(month) 1745 except ValueError: 1746 mi = 0 1747 1748 m = abs(mi) 1749 y = m // 12 # how many years are in month increment 1750 m = m % 12 # get remaining months 1751 1752 if mi < 0: 1753 y *= -1 # otherwise negative mi will give future dates 1754 mth = mth - m # sub months from start month 1755 if mth < 1: # cross start-of-year? 1756 y -= 1 # yes - decrement year 1757 mth += 12 # and fix month 1758 else: 1759 mth = mth + m # add months to start month 1760 if mth > 12: # cross end-of-year? 1761 y += 1 # yes - increment year 1762 mth -= 12 # and fix month 1763 1764 yr += y 1765 1766 # if the day ends up past the last day of 1767 # the new month, set it to the last day 1768 if dy > self.ptc.daysInMonth(mth, yr): 1769 dy = self.ptc.daysInMonth(mth, yr) 1770 1771 d = source.replace(year=yr, month=mth, day=dy) 1772 1773 return source + (d - source)
1774
1775 - def nlp(self, inputString, sourceTime=None):
1776 """Utilizes parse() after making judgements about what datetime 1777 information belongs together. 1778 1779 It makes logical groupings based on proximity and returns a parsed 1780 datetime for each matched grouping of datetime text, along with 1781 location info within the given inputString. 1782 1783 @type inputString: string 1784 @param inputString: natural language text to evaluate 1785 @type sourceTime: struct_time 1786 @param sourceTime: C{struct_time} value to use as the base 1787 1788 @rtype: tuple or None 1789 @return: tuple of tuples in the format (parsed_datetime as 1790 datetime.datetime, flags as int, start_pos as int, 1791 end_pos as int, matched_text as string) or None if there 1792 were no matches 1793 """ 1794 1795 orig_inputstring = inputString 1796 1797 # replace periods at the end of sentences w/ spaces 1798 # opposed to removing them altogether in order to 1799 # retain relative positions (identified by alpha, period, space). 1800 # this is required for some of the regex patterns to match 1801 inputString = re.sub(r'(\w)(\.)(\s)', r'\1 \3', inputString).lower() 1802 inputString = re.sub(r'(\w)(\'|")(\s|$)', r'\1 \3', inputString) 1803 inputString = re.sub(r'(\s|^)(\'|")(\w)', r'\1 \3', inputString) 1804 1805 startpos = 0 # the start position in the inputString during the loop 1806 1807 # list of lists in format: 1808 # [startpos, endpos, matchedstring, flags, type] 1809 matches = [] 1810 1811 while startpos < len(inputString): 1812 1813 # empty match 1814 leftmost_match = [0, 0, None, 0, None] 1815 1816 # Modifier like next\prev.. 1817 m = self.ptc.CRE_MODIFIER.search(inputString[startpos:]) 1818 if m is not None: 1819 if leftmost_match[1] == 0 or \ 1820 leftmost_match[0] > m.start() + startpos: 1821 leftmost_match[0] = m.start() + startpos 1822 leftmost_match[1] = m.end() + startpos 1823 leftmost_match[2] = m.group() 1824 leftmost_match[3] = 0 1825 leftmost_match[4] = 'modifier' 1826 1827 # Quantity + Units 1828 m = self.ptc.CRE_UNITS.search(inputString[startpos:]) 1829 if m is not None: 1830 debug and log.debug('CRE_UNITS matched') 1831 if self._UnitsTrapped(inputString[startpos:], m, 'units'): 1832 debug and log.debug('day suffix trapped by unit match') 1833 else: 1834 1835 if leftmost_match[1] == 0 or \ 1836 leftmost_match[0] > m.start('qty') + startpos: 1837 leftmost_match[0] = m.start('qty') + startpos 1838 leftmost_match[1] = m.end('qty') + startpos 1839 leftmost_match[2] = m.group('qty') 1840 leftmost_match[3] = 3 1841 leftmost_match[4] = 'units' 1842 1843 if m.start('qty') > 0 and \ 1844 inputString[m.start('qty') - 1] == '-': 1845 leftmost_match[0] = leftmost_match[0] - 1 1846 leftmost_match[2] = '-' + leftmost_match[2] 1847 1848 # Quantity + Units 1849 m = self.ptc.CRE_QUNITS.search(inputString[startpos:]) 1850 if m is not None: 1851 debug and log.debug('CRE_QUNITS matched') 1852 if self._UnitsTrapped(inputString[startpos:], m, 'qunits'): 1853 debug and log.debug('day suffix trapped by qunit match') 1854 else: 1855 if leftmost_match[1] == 0 or \ 1856 leftmost_match[0] > m.start('qty') + startpos: 1857 leftmost_match[0] = m.start('qty') + startpos 1858 leftmost_match[1] = m.end('qty') + startpos 1859 leftmost_match[2] = m.group('qty') 1860 leftmost_match[3] = 3 1861 leftmost_match[4] = 'qunits' 1862 1863 if m.start('qty') > 0 and \ 1864 inputString[m.start('qty') - 1] == '-': 1865 leftmost_match[0] = leftmost_match[0] - 1 1866 leftmost_match[2] = '-' + leftmost_match[2] 1867 1868 m = self.ptc.CRE_DATE3.search(inputString[startpos:]) 1869 # NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW 1870 # for match in self.ptc.CRE_DATE3.finditer(inputString[startpos:]): 1871 # # to prevent "HH:MM(:SS) time strings" expressions from 1872 # # triggering this regex, we checks if the month field exists 1873 # # in the searched expression, if it doesn't exist, the date 1874 # # field is not valid 1875 # if match.group('mthname'): 1876 # m = self.ptc.CRE_DATE3.search(inputString[startpos:], 1877 # match.start()) 1878 # break 1879 1880 # String date format 1881 if m is not None: 1882 if leftmost_match[1] == 0 or \ 1883 leftmost_match[0] > m.start('date') + startpos: 1884 leftmost_match[0] = m.start('date') + startpos 1885 leftmost_match[1] = m.end('date') + startpos 1886 leftmost_match[2] = m.group('date') 1887 leftmost_match[3] = 1 1888 leftmost_match[4] = 'dateStr' 1889 1890 # Standard date format 1891 m = self.ptc.CRE_DATE.search(inputString[startpos:]) 1892 if m is not None: 1893 if leftmost_match[1] == 0 or \ 1894 leftmost_match[0] > m.start('date') + startpos: 1895 leftmost_match[0] = m.start('date') + startpos 1896 leftmost_match[1] = m.end('date') + startpos 1897 leftmost_match[2] = m.group('date') 1898 leftmost_match[3] = 1 1899 leftmost_match[4] = 'dateStd' 1900 1901 # Natural language day strings 1902 m = self.ptc.CRE_DAY.search(inputString[startpos:]) 1903 if m is not None: 1904 if leftmost_match[1] == 0 or \ 1905 leftmost_match[0] > m.start() + startpos: 1906 leftmost_match[0] = m.start() + startpos 1907 leftmost_match[1] = m.end() + startpos 1908 leftmost_match[2] = m.group() 1909 leftmost_match[3] = 1 1910 leftmost_match[4] = 'dayStr' 1911 1912 # Weekday 1913 m = self.ptc.CRE_WEEKDAY.search(inputString[startpos:]) 1914 if m is not None: 1915 if inputString[startpos:] not in self.ptc.dayOffsets: 1916 if leftmost_match[1] == 0 or \ 1917 leftmost_match[0] > m.start() + startpos: 1918 leftmost_match[0] = m.start() + startpos 1919 leftmost_match[1] = m.end() + startpos 1920 leftmost_match[2] = m.group() 1921 leftmost_match[3] = 1 1922 leftmost_match[4] = 'weekdy' 1923 1924 # Natural language time strings 1925 m = self.ptc.CRE_TIME.search(inputString[startpos:]) 1926 if m is not None: 1927 if leftmost_match[1] == 0 or \ 1928 leftmost_match[0] > m.start() + startpos: 1929 leftmost_match[0] = m.start() + startpos 1930 leftmost_match[1] = m.end() + startpos 1931 leftmost_match[2] = m.group() 1932 leftmost_match[3] = 2 1933 leftmost_match[4] = 'timeStr' 1934 1935 # HH:MM(:SS) am/pm time strings 1936 m = self.ptc.CRE_TIMEHMS2.search(inputString[startpos:]) 1937 if m is not None: 1938 if leftmost_match[1] == 0 or \ 1939 leftmost_match[0] > m.start('hours') + startpos: 1940 leftmost_match[0] = m.start('hours') + startpos 1941 leftmost_match[1] = m.end('meridian') + startpos 1942 leftmost_match[2] = inputString[leftmost_match[0]: 1943 leftmost_match[1]] 1944 leftmost_match[3] = 2 1945 leftmost_match[4] = 'meridian' 1946 1947 # HH:MM(:SS) time strings 1948 m = self.ptc.CRE_TIMEHMS.search(inputString[startpos:]) 1949 if m is not None: 1950 if leftmost_match[1] == 0 or \ 1951 leftmost_match[0] > m.start('hours') + startpos: 1952 leftmost_match[0] = m.start('hours') + startpos 1953 if m.group('seconds') is not None: 1954 leftmost_match[1] = m.end('seconds') + startpos 1955 else: 1956 leftmost_match[1] = m.end('minutes') + startpos 1957 leftmost_match[2] = inputString[leftmost_match[0]: 1958 leftmost_match[1]] 1959 leftmost_match[3] = 2 1960 leftmost_match[4] = 'timeStd' 1961 1962 # Units only; must be preceded by a modifier 1963 if len(matches) > 0 and matches[-1][3] == 0: 1964 m = self.ptc.CRE_UNITS_ONLY.search(inputString[startpos:]) 1965 # Ensure that any match is immediately proceded by the 1966 # modifier. "Next is the word 'month'" should not parse as a 1967 # date while "next month" should 1968 if m is not None and inputString[startpos:startpos+m.start()].strip() == '': 1969 debug and log.debug('CRE_UNITS_ONLY matched [%s]' % m.group()) 1970 if leftmost_match[1] == 0 or \ 1971 leftmost_match[0] > m.start() + startpos: 1972 leftmost_match[0] = m.start() + startpos 1973 leftmost_match[1] = m.end() + startpos 1974 leftmost_match[2] = m.group() 1975 leftmost_match[3] = 3 1976 leftmost_match[4] = 'unitsOnly' 1977 1978 # set the start position to the end pos of the leftmost match 1979 startpos = leftmost_match[1] 1980 1981 # nothing was detected 1982 # so break out of the loop 1983 if startpos == 0: 1984 startpos = len(inputString) 1985 else: 1986 if leftmost_match[3] > 0: 1987 m = self.ptc.CRE_NLP_PREFIX.search( 1988 inputString[:leftmost_match[0]] + 1989 ' ' + str(leftmost_match[3])) 1990 if m is not None: 1991 leftmost_match[0] = m.start('nlp_prefix') 1992 leftmost_match[2] = inputString[leftmost_match[0]: 1993 leftmost_match[1]] 1994 matches.append(leftmost_match) 1995 1996 # find matches in proximity with one another and 1997 # return all the parsed values 1998 proximity_matches = [] 1999 if len(matches) > 1: 2000 combined = '' 2001 from_match_index = 0 2002 date = matches[0][3] == 1 2003 time = matches[0][3] == 2 2004 units = matches[0][3] == 3 2005 for i in range(1, len(matches)): 2006 2007 # test proximity (are there characters between matches?) 2008 endofprevious = matches[i - 1][1] 2009 begofcurrent = matches[i][0] 2010 if orig_inputstring[endofprevious: 2011 begofcurrent].lower().strip() != '': 2012 # this one isn't in proximity, but maybe 2013 # we have enough to make a datetime 2014 # TODO: make sure the combination of 2015 # formats (modifier, dateStd, etc) makes logical sense 2016 # before parsing together 2017 if date or time or units: 2018 combined = orig_inputstring[matches[from_match_index] 2019 [0]:matches[i - 1][1]] 2020 parsed_datetime, flags = self.parse(combined, 2021 sourceTime) 2022 proximity_matches.append(( 2023 datetime.datetime(*parsed_datetime[:6]), 2024 flags, 2025 matches[from_match_index][0], 2026 matches[i - 1][1], 2027 combined)) 2028 # not in proximity, reset starting from current 2029 from_match_index = i 2030 date = matches[i][3] == 1 2031 time = matches[i][3] == 2 2032 units = matches[i][3] == 3 2033 continue 2034 else: 2035 if matches[i][3] == 1: 2036 date = True 2037 if matches[i][3] == 2: 2038 time = True 2039 if matches[i][3] == 3: 2040 units = True 2041 2042 # check last 2043 # we have enough to make a datetime 2044 if date or time or units: 2045 2046 combined = orig_inputstring[matches[from_match_index][0]: 2047 matches[len(matches) - 1][1]] 2048 parsed_datetime, flags = self.parse(combined, sourceTime) 2049 proximity_matches.append(( 2050 datetime.datetime(*parsed_datetime[:6]), 2051 flags, 2052 matches[from_match_index][0], 2053 matches[len(matches) - 1][1], 2054 combined)) 2055 2056 elif len(matches) == 0: 2057 return None 2058 else: 2059 if matches[0][3] == 0: # not enough info to parse 2060 return None 2061 else: 2062 combined = orig_inputstring[matches[0][0]:matches[0][1]] 2063 parsed_datetime, flags = self.parse(matches[0][2], sourceTime) 2064 proximity_matches.append(( 2065 datetime.datetime(*parsed_datetime[:6]), 2066 flags, 2067 matches[0][0], 2068 matches[0][1], 2069 combined)) 2070 2071 return tuple(proximity_matches)
2072
2073 2074 -def _initSymbols(ptc):
2075 """ 2076 Initialize symbols and single character constants. 2077 """ 2078 # build am and pm lists to contain 2079 # original case, lowercase, first-char and dotted 2080 # versions of the meridian text 2081 ptc.am = ['', ''] 2082 ptc.pm = ['', ''] 2083 for idx, xm in enumerate(ptc.locale.meridian[:2]): 2084 # 0: am 2085 # 1: pm 2086 target = ['am', 'pm'][idx] 2087 setattr(ptc, target, [xm]) 2088 target = getattr(ptc, target) 2089 if xm: 2090 lxm = xm.lower() 2091 target.extend((xm[0], '{0}.{1}.'.format(*xm), 2092 lxm, lxm[0], '{0}.{1}.'.format(*lxm)))
2093
2094 2095 -class Constants(object):
2096 """ 2097 Default set of constants for parsedatetime. 2098 2099 If PyICU is present, then the class will first try to get PyICU 2100 to return a locale specified by C{localeID}. If either C{localeID} is 2101 None or if the locale does not exist within PyICU, then each of the 2102 locales defined in C{fallbackLocales} is tried in order. 2103 2104 If PyICU is not present or none of the specified locales can be used, 2105 then the class will initialize itself to the en_US locale. 2106 2107 if PyICU is not present or not requested, only the locales defined by 2108 C{pdtLocales} will be searched. 2109 """
2110 - def __init__(self, localeID=None, usePyICU=True, 2111 fallbackLocales=['en_US']):
2112 self.localeID = localeID 2113 self.fallbackLocales = fallbackLocales[:] 2114 2115 if 'en_US' not in self.fallbackLocales: 2116 self.fallbackLocales.append('en_US') 2117 2118 # define non-locale specific constants 2119 self.locale = None 2120 self.usePyICU = usePyICU 2121 2122 # starting cache of leap years 2123 # daysInMonth will add to this if during 2124 # runtime it gets a request for a year not found 2125 self._leapYears = list(range(1904, 2097, 4)) 2126 2127 self.Second = 1 2128 self.Minute = 60 # 60 * self.Second 2129 self.Hour = 3600 # 60 * self.Minute 2130 self.Day = 86400 # 24 * self.Hour 2131 self.Week = 604800 # 7 * self.Day 2132 self.Month = 2592000 # 30 * self.Day 2133 self.Year = 31536000 # 365 * self.Day 2134 2135 self._DaysInMonthList = (31, 28, 31, 30, 31, 30, 2136 31, 31, 30, 31, 30, 31) 2137 self.rangeSep = '-' 2138 self.BirthdayEpoch = 50 2139 2140 # When True the starting time for all relative calculations will come 2141 # from the given SourceTime, otherwise it will be 9am 2142 2143 self.StartTimeFromSourceTime = False 2144 2145 # YearParseStyle controls how we parse "Jun 12", i.e. dates that do 2146 # not have a year present. The default is to compare the date given 2147 # to the current date, and if prior, then assume the next year. 2148 # Setting this to 0 will prevent that. 2149 2150 self.YearParseStyle = 1 2151 2152 # DOWParseStyle controls how we parse "Tuesday" 2153 # If the current day was Thursday and the text to parse is "Tuesday" 2154 # then the following table shows how each style would be returned 2155 # -1, 0, +1 2156 # 2157 # Current day marked as *** 2158 # 2159 # Sun Mon Tue Wed Thu Fri Sat 2160 # week -1 2161 # current -1,0 *** 2162 # week +1 +1 2163 # 2164 # If the current day was Monday and the text to parse is "Tuesday" 2165 # then the following table shows how each style would be returned 2166 # -1, 0, +1 2167 # 2168 # Sun Mon Tue Wed Thu Fri Sat 2169 # week -1 -1 2170 # current *** 0,+1 2171 # week +1 2172 2173 self.DOWParseStyle = 1 2174 2175 # CurrentDOWParseStyle controls how we parse "Friday" 2176 # If the current day was Friday and the text to parse is "Friday" 2177 # then the following table shows how each style would be returned 2178 # True/False. This also depends on DOWParseStyle. 2179 # 2180 # Current day marked as *** 2181 # 2182 # DOWParseStyle = 0 2183 # Sun Mon Tue Wed Thu Fri Sat 2184 # week -1 2185 # current T,F 2186 # week +1 2187 # 2188 # DOWParseStyle = -1 2189 # Sun Mon Tue Wed Thu Fri Sat 2190 # week -1 F 2191 # current T 2192 # week +1 2193 # 2194 # DOWParseStyle = +1 2195 # 2196 # Sun Mon Tue Wed Thu Fri Sat 2197 # week -1 2198 # current T 2199 # week +1 F 2200 2201 self.CurrentDOWParseStyle = False 2202 2203 if self.usePyICU: 2204 self.locale = pdtLocales['icu'](self.localeID) 2205 2206 if self.locale.icu is None: 2207 self.usePyICU = False 2208 self.locale = None 2209 2210 if self.locale is None: 2211 if self.localeID not in pdtLocales: 2212 for localeId in range(0, len(self.fallbackLocales)): 2213 self.localeID = self.fallbackLocales[localeId] 2214 if self.localeID in pdtLocales: 2215 break 2216 2217 self.locale = pdtLocales[self.localeID]() 2218 2219 if self.locale is not None: 2220 2221 def _getLocaleDataAdjusted(localeData): 2222 """ 2223 If localeData is defined as ["mon|mnd", 'tu|tues'...] then this 2224 function splits those definitions on | 2225 """ 2226 adjusted = [] 2227 for d in localeData: 2228 if len(d.split('|')) > 0: 2229 adjusted += d.split("|") 2230 else: 2231 adjusted.append(d) 2232 return adjusted
2233 2234 mths = _getLocaleDataAdjusted(self.locale.Months) 2235 smths = _getLocaleDataAdjusted(self.locale.shortMonths) 2236 swds = _getLocaleDataAdjusted(self.locale.shortWeekdays) 2237 wds = _getLocaleDataAdjusted(self.locale.Weekdays) 2238 2239 re_join = lambda g: '|'.join(re.escape(i) for i in g) 2240 2241 # escape any regex special characters that may be found 2242 self.locale.re_values['months'] = re_join(mths) 2243 self.locale.re_values['shortmonths'] = re_join(smths) 2244 self.locale.re_values['days'] = re_join(wds) 2245 self.locale.re_values['shortdays'] = re_join(swds) 2246 self.locale.re_values['dayoffsets'] = \ 2247 re_join(self.locale.dayOffsets) 2248 self.locale.re_values['numbers'] = \ 2249 re_join(self.locale.numbers) 2250 2251 units = [unit for units in self.locale.units.values() 2252 for unit in units] # flatten 2253 units.sort(key=len, reverse=True) # longest first 2254 self.locale.re_values['units'] = re_join(units) 2255 self.locale.re_values['modifiers'] = re_join(self.locale.Modifiers) 2256 self.locale.re_values['sources'] = re_join(self.locale.re_sources) 2257 2258 # build weekday offsets - yes, it assumes the Weekday and 2259 # shortWeekday lists are in the same order and Mon..Sun 2260 # (Python style) 2261 def _buildOffsets(offsetDict, localeData, indexStart): 2262 o = indexStart 2263 for key in localeData: 2264 key_split = key.split('|') 2265 if len(key_split) > 0: 2266 for k in key_split: 2267 offsetDict[k] = o 2268 else: 2269 offsetDict[key] = o 2270 o += 1
2271 2272 _buildOffsets(self.locale.WeekdayOffsets, 2273 self.locale.Weekdays, 0) 2274 _buildOffsets(self.locale.WeekdayOffsets, 2275 self.locale.shortWeekdays, 0) 2276 2277 # build month offsets - yes, it assumes the Months and shortMonths 2278 # lists are in the same order and Jan..Dec 2279 _buildOffsets(self.locale.MonthOffsets, 2280 self.locale.Months, 1) 2281 _buildOffsets(self.locale.MonthOffsets, 2282 self.locale.shortMonths, 1) 2283 2284 _initSymbols(self) 2285 2286 # TODO: add code to parse the date formats and build the regexes up 2287 # from sub-parts, find all hard-coded uses of date/time seperators 2288 2289 # not being used in code, but kept in case others are manually 2290 # utilizing this regex for their own purposes 2291 self.RE_DATE4 = r'''(?P<date> 2292 ( 2293 ( 2294 (?P<day>\d\d?) 2295 (?P<suffix>{daysuffix})? 2296 (,)? 2297 (\s)? 2298 ) 2299 (?P<mthname> 2300 \b({months}|{shortmonths})\b 2301 )\s? 2302 (?P<year>\d\d 2303 (\d\d)? 2304 )? 2305 ) 2306 )'''.format(**self.locale.re_values) 2307 2308 # still not completely sure of the behavior of the regex and 2309 # whether it would be best to consume all possible irrelevant 2310 # characters before the option groups (but within the {1,3} repetition 2311 # group or inside of each option group, as it currently does 2312 # however, right now, all tests are passing that were, 2313 # including fixing the bug of matching a 4-digit year as ddyy 2314 # when the day is absent from the string 2315 self.RE_DATE3 = r'''(?P<date> 2316 (?: 2317 (?:^|\s) 2318 (?P<mthname> 2319 {months}|{shortmonths} 2320 )\b 2321 | 2322 (?:^|\s) 2323 (?P<day>[1-9]|[012]\d|3[01]) 2324 (?!\d|pm|am) 2325 (?P<suffix>{daysuffix}|) 2326 | 2327 (?:,\s|\s) 2328 (?P<year>\d\d(?:\d\d|)) 2329 ){{1,3}} 2330 (?(mthname)|$-^) 2331 )'''.format(**self.locale.re_values) 2332 2333 # not being used in code, but kept in case others are manually 2334 # utilizing this regex for their own purposes 2335 self.RE_MONTH = r'''(\s|^) 2336 (?P<month> 2337 ( 2338 (?P<mthname> 2339 \b({months}|{shortmonths})\b 2340 ) 2341 (\s? 2342 (?P<year>(\d{{4}})) 2343 )? 2344 ) 2345 ) 2346 (?=\s|$|[^\w])'''.format(**self.locale.re_values) 2347 2348 self.RE_WEEKDAY = r'''\b 2349 (?: 2350 {days}|{shortdays} 2351 ) 2352 \b'''.format(**self.locale.re_values) 2353 2354 self.RE_NUMBER = (r'(\b(?:{numbers})\b|\d+)' 2355 .format(**self.locale.re_values)) 2356 2357 self.RE_SPECIAL = (r'(?P<special>^[{specials}]+)\s+' 2358 .format(**self.locale.re_values)) 2359 2360 self.RE_UNITS_ONLY = (r'''\b({units})\b''' 2361 .format(**self.locale.re_values)) 2362 2363 self.RE_UNITS = r'''\b(?P<qty> 2364 -? 2365 (?:\d+|(?:{numbers})\b)\s* 2366 (?P<units>{units}) 2367 )\b'''.format(**self.locale.re_values) 2368 2369 self.RE_QUNITS = r'''\b(?P<qty> 2370 -? 2371 (?:\d+|(?:{numbers})s)\s? 2372 (?P<qunits>{qunits}) 2373 )\b'''.format(**self.locale.re_values) 2374 2375 self.RE_MODIFIER = r'''\b(?: 2376 {modifiers} 2377 )\b'''.format(**self.locale.re_values) 2378 2379 self.RE_TIMEHMS = r'''([\s(\["'-]|^) 2380 (?P<hours>\d\d?) 2381 (?P<tsep>{timeseperator}|) 2382 (?P<minutes>\d\d) 2383 (?:(?P=tsep) 2384 (?P<seconds>\d\d 2385 (?:[\.,]\d+)? 2386 ) 2387 )?\b'''.format(**self.locale.re_values) 2388 2389 self.RE_TIMEHMS2 = r'''([\s(\["'-]|^) 2390 (?P<hours>\d\d?) 2391 (?: 2392 (?P<tsep>{timeseperator}|) 2393 (?P<minutes>\d\d?) 2394 (?:(?P=tsep) 2395 (?P<seconds>\d\d? 2396 (?:[\.,]\d+)? 2397 ) 2398 )? 2399 )?'''.format(**self.locale.re_values) 2400 2401 # 1, 2, and 3 here refer to the type of match date, time, or units 2402 self.RE_NLP_PREFIX = r'''\b(?P<nlp_prefix> 2403 (on) 2404 (\s)+1 2405 | 2406 (at|in) 2407 (\s)+2 2408 | 2409 (in) 2410 (\s)+3 2411 )''' 2412 2413 if 'meridian' in self.locale.re_values: 2414 self.RE_TIMEHMS2 += (r'\s?(?P<meridian>{meridian})\b' 2415 .format(**self.locale.re_values)) 2416 else: 2417 self.RE_TIMEHMS2 += r'\b' 2418 2419 dateSeps = ''.join(re.escape(s) for s in self.locale.dateSep) + '\.' 2420 2421 self.RE_DATE = r'''\b 2422 (?P<date>\d\d?[{0}]\d\d?(?:[{0}]\d\d(?:\d\d)?)?) 2423 \b'''.format(dateSeps) 2424 2425 self.RE_DATE2 = r'[{0}]'.format(dateSeps) 2426 2427 assert 'dayoffsets' in self.locale.re_values 2428 2429 self.RE_DAY = r'''\b 2430 (?: 2431 {dayoffsets} 2432 ) 2433 \b'''.format(**self.locale.re_values) 2434 2435 self.RE_DAY2 = r'''(?P<day>\d\d?) 2436 (?P<suffix>{daysuffix})? 2437 '''.format(**self.locale.re_values) 2438 2439 self.RE_TIME = r'''\b 2440 (?: 2441 {sources} 2442 ) 2443 \b'''.format(**self.locale.re_values) 2444 2445 self.RE_REMAINING = r'\s+' 2446 2447 # Regex for date/time ranges 2448 self.RE_RTIMEHMS = r'''(\s?|^) 2449 (\d\d?){timeseperator} 2450 (\d\d) 2451 ({timeseperator}(\d\d))? 2452 (\s?|$)'''.format(**self.locale.re_values) 2453 2454 self.RE_RTIMEHMS2 = (r'''(\s?|^) 2455 (\d\d?) 2456 ({timeseperator}(\d\d?))? 2457 ({timeseperator}(\d\d?))?''' 2458 .format(**self.locale.re_values)) 2459 2460 if 'meridian' in self.locale.re_values: 2461 self.RE_RTIMEHMS2 += (r'\s?({meridian})' 2462 .format(**self.locale.re_values)) 2463 2464 self.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps 2465 self.RE_RDATE3 = r'''( 2466 ( 2467 ( 2468 \b({months})\b 2469 )\s? 2470 ( 2471 (\d\d?) 2472 (\s?|{daysuffix}|$)+ 2473 )? 2474 (,\s?\d{{4}})? 2475 ) 2476 )'''.format(**self.locale.re_values) 2477 2478 # "06/07/06 - 08/09/06" 2479 self.DATERNG1 = (r'{0}\s?{rangeseperator}\s?{0}' 2480 .format(self.RE_RDATE, **self.locale.re_values)) 2481 2482 # "march 31 - june 1st, 2006" 2483 self.DATERNG2 = (r'{0}\s?{rangeseperator}\s?{0}' 2484 .format(self.RE_RDATE3, **self.locale.re_values)) 2485 2486 # "march 1rd -13th" 2487 self.DATERNG3 = (r'{0}\s?{rangeseperator}\s?(\d\d?)\s?(rd|st|nd|th)?' 2488 .format(self.RE_RDATE3, **self.locale.re_values)) 2489 2490 # "4:00:55 pm - 5:90:44 am", '4p-5p' 2491 self.TIMERNG1 = (r'{0}\s?{rangeseperator}\s?{0}' 2492 .format(self.RE_RTIMEHMS2, **self.locale.re_values)) 2493 2494 self.TIMERNG2 = (r'{0}\s?{rangeseperator}\s?{0}' 2495 .format(self.RE_RTIMEHMS, **self.locale.re_values)) 2496 2497 # "4-5pm " 2498 self.TIMERNG3 = (r'\d\d?\s?{rangeseperator}\s?{0}' 2499 .format(self.RE_RTIMEHMS2, **self.locale.re_values)) 2500 2501 # "4:30-5pm " 2502 self.TIMERNG4 = (r'{0}\s?{rangeseperator}\s?{1}' 2503 .format(self.RE_RTIMEHMS, self.RE_RTIMEHMS2, 2504 **self.locale.re_values)) 2505 2506 self.re_option = re.IGNORECASE + re.VERBOSE 2507 self.cre_source = {'CRE_SPECIAL': self.RE_SPECIAL, 2508 'CRE_NUMBER': self.RE_NUMBER, 2509 'CRE_UNITS': self.RE_UNITS, 2510 'CRE_UNITS_ONLY': self.RE_UNITS_ONLY, 2511 'CRE_QUNITS': self.RE_QUNITS, 2512 'CRE_MODIFIER': self.RE_MODIFIER, 2513 'CRE_TIMEHMS': self.RE_TIMEHMS, 2514 'CRE_TIMEHMS2': self.RE_TIMEHMS2, 2515 'CRE_DATE': self.RE_DATE, 2516 'CRE_DATE2': self.RE_DATE2, 2517 'CRE_DATE3': self.RE_DATE3, 2518 'CRE_DATE4': self.RE_DATE4, 2519 'CRE_MONTH': self.RE_MONTH, 2520 'CRE_WEEKDAY': self.RE_WEEKDAY, 2521 'CRE_DAY': self.RE_DAY, 2522 'CRE_DAY2': self.RE_DAY2, 2523 'CRE_TIME': self.RE_TIME, 2524 'CRE_REMAINING': self.RE_REMAINING, 2525 'CRE_RTIMEHMS': self.RE_RTIMEHMS, 2526 'CRE_RTIMEHMS2': self.RE_RTIMEHMS2, 2527 'CRE_RDATE': self.RE_RDATE, 2528 'CRE_RDATE3': self.RE_RDATE3, 2529 'CRE_TIMERNG1': self.TIMERNG1, 2530 'CRE_TIMERNG2': self.TIMERNG2, 2531 'CRE_TIMERNG3': self.TIMERNG3, 2532 'CRE_TIMERNG4': self.TIMERNG4, 2533 'CRE_DATERNG1': self.DATERNG1, 2534 'CRE_DATERNG2': self.DATERNG2, 2535 'CRE_DATERNG3': self.DATERNG3, 2536 'CRE_NLP_PREFIX': self.RE_NLP_PREFIX} 2537 self.cre_keys = set(self.cre_source.keys()) 2538
2539 - def __getattr__(self, name):
2540 if name in self.cre_keys: 2541 value = re.compile(self.cre_source[name], self.re_option) 2542 setattr(self, name, value) 2543 return value 2544 elif name in self.locale.locale_keys: 2545 return getattr(self.locale, name) 2546 else: 2547 raise AttributeError(name)
2548
2549 - def daysInMonth(self, month, year):
2550 """ 2551 Take the given month (1-12) and a given year (4 digit) return 2552 the number of days in the month adjusting for leap year as needed 2553 """ 2554 result = None 2555 debug and log.debug('daysInMonth(%s, %s)', month, year) 2556 if month > 0 and month <= 12: 2557 result = self._DaysInMonthList[month - 1] 2558 2559 if month == 2: 2560 if year in self._leapYears: 2561 result += 1 2562 else: 2563 if calendar.isleap(year): 2564 self._leapYears.append(year) 2565 result += 1 2566 2567 return result
2568
2569 - def getSource(self, sourceKey, sourceTime=None):
2570 """ 2571 GetReturn a date/time tuple based on the giving source key 2572 and the corresponding key found in self.re_sources. 2573 2574 The current time is used as the default and any specified 2575 item found in self.re_sources is inserted into the value 2576 and the generated dictionary is returned. 2577 """ 2578 if sourceKey not in self.re_sources: 2579 return None 2580 2581 if sourceTime is None: 2582 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() 2583 else: 2584 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 2585 2586 defaults = {'yr': yr, 'mth': mth, 'dy': dy, 2587 'hr': hr, 'mn': mn, 'sec': sec} 2588 2589 source = self.re_sources[sourceKey] 2590 2591 values = {} 2592 2593 for key, default in defaults.items(): 2594 values[key] = source.get(key, default) 2595 2596 return (values['yr'], values['mth'], values['dy'], 2597 values['hr'], values['mn'], values['sec'], 2598 wd, yd, isdst)
2599