X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=dateparse%2Fdateparse_utils.py;h=6ba647c847e48931017292947bfe36cb423f772f;hb=f2600f30801c849fc1d139386e3ddc3c9eb43e30;hp=fb55eef6b973fd9ad67f8a19c74c2d184172272b;hpb=497fb9e21f45ec08e1486abaee6dfa7b20b8a691;p=python_utils.git diff --git a/dateparse/dateparse_utils.py b/dateparse/dateparse_utils.py index fb55eef..6ba647c 100755 --- a/dateparse/dateparse_utils.py +++ b/dateparse/dateparse_utils.py @@ -1,137 +1,479 @@ #!/usr/bin/env python3 +# type: ignore + +""" +Parse dates in a variety of formats. + +""" -import antlr4 # type: ignore import datetime -import dateutil.easter -import holidays # type: ignore +import functools +import logging import re import sys -from typing import Any, Dict, Optional +from typing import Any, Callable, Dict, Optional + +import antlr4 # type: ignore +import dateutil.easter +import dateutil.tz +import holidays # type: ignore +import pytz +import acl +import bootstrap +import decorator_utils from dateparse.dateparse_utilsLexer import dateparse_utilsLexer # type: ignore from dateparse.dateparse_utilsListener import dateparse_utilsListener # type: ignore from dateparse.dateparse_utilsParser import dateparse_utilsParser # type: ignore +from datetime_utils import ( + TimeUnit, + date_to_datetime, + datetime_to_date, + n_timeunits_from_base, +) + +logger = logging.getLogger(__name__) + + +def debug_parse(enter_or_exit_f: Callable[[Any, Any], None]): + @functools.wraps(enter_or_exit_f) + def debug_parse_wrapper(*args, **kwargs): + # slf = args[0] + ctx = args[1] + depth = ctx.depth() + logger.debug( + ' ' * (depth - 1) + + f'Entering {enter_or_exit_f.__name__} ({ctx.invokingState} / {ctx.exception})' + ) + for c in ctx.getChildren(): + logger.debug(' ' * (depth - 1) + f'{c} {type(c)}') + retval = enter_or_exit_f(*args, **kwargs) + return retval + + return debug_parse_wrapper class ParseException(Exception): + """An exception thrown during parsing because of unrecognized input.""" + def __init__(self, message: str) -> None: self.message = message +class RaisingErrorListener(antlr4.DiagnosticErrorListener): + """An error listener that raises ParseExceptions.""" + + def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): + raise ParseException(msg) + + def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs): + pass + + def reportAttemptingFullContext( + self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs + ): + pass + + def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs): + pass + + +@decorator_utils.decorate_matching_methods_with( + debug_parse, + acl=acl.StringWildcardBasedACL( + allowed_patterns=[ + 'enter*', + 'exit*', + ], + denied_patterns=['enterEveryRule', 'exitEveryRule'], + order_to_check_allow_deny=acl.Order.DENY_ALLOW, + default_answer=False, + ), +) class DateParser(dateparse_utilsListener): PARSE_TYPE_SINGLE_DATE_EXPR = 1 PARSE_TYPE_BASE_AND_OFFSET_EXPR = 2 - CONSTANT_DAYS = 7 - CONSTANT_WEEKS = 8 - CONSTANT_MONTHS = 9 - CONSTANT_YEARS = 10 - - def __init__(self): + PARSE_TYPE_SINGLE_TIME_EXPR = 3 + PARSE_TYPE_BASE_AND_OFFSET_TIME_EXPR = 4 + + def __init__(self, *, override_now_for_test_purposes=None) -> None: + """C'tor. Passing a value to override_now_for_test_purposes can be + used to force this instance to use a custom date/time for its + idea of "now" so that the code can be more easily unittested. + Leave as None for real use cases. + """ self.month_name_to_number = { - "jan": 1, - "feb": 2, - "mar": 3, - "apr": 4, - "may": 5, - "jun": 6, - "jul": 7, - "aug": 8, - "sep": 9, - "oct": 10, - "nov": 11, - "dec": 12, + 'jan': 1, + 'feb': 2, + 'mar': 3, + 'apr': 4, + 'may': 5, + 'jun': 6, + 'jul': 7, + 'aug': 8, + 'sep': 9, + 'oct': 10, + 'nov': 11, + 'dec': 12, } + + # Used only for ides/nones. Month length on a non-leap year. + self.typical_days_per_month = { + 1: 31, + 2: 28, + 3: 31, + 4: 30, + 5: 31, + 6: 30, + 7: 31, + 8: 31, + 9: 30, + 10: 31, + 11: 30, + 12: 31, + } + + # N.B. day number is also synched with datetime_utils.TimeUnit values + # which allows expressions like "3 wednesdays from now" to work. self.day_name_to_number = { - "mon": 0, - "tue": 1, - "wed": 2, - "thu": 3, - "fri": 4, - "sat": 5, - "sun": 6, + 'mon': 0, + 'tue': 1, + 'wed': 2, + 'thu': 3, + 'fri': 4, + 'sat': 5, + 'sun': 6, + } + + # These TimeUnits are defined in datetime_utils and are used as params + # to datetime_utils.n_timeunits_from_base. + self.time_delta_unit_to_constant = { + 'hou': TimeUnit.HOURS, + 'min': TimeUnit.MINUTES, + 'sec': TimeUnit.SECONDS, } self.delta_unit_to_constant = { - "day": DateParser.CONSTANT_DAYS, - "wee": DateParser.CONSTANT_WEEKS, + 'day': TimeUnit.DAYS, + 'wor': TimeUnit.WORKDAYS, + 'wee': TimeUnit.WEEKS, + 'mon': TimeUnit.MONTHS, + 'yea': TimeUnit.YEARS, } - self.date: Optional[datetime.date] = None + self.override_now_for_test_purposes = override_now_for_test_purposes + self._reset() + + def parse(self, date_string: str) -> Optional[datetime.datetime]: + """Parse a date/time expression and return a timezone agnostic + datetime on success. Also sets self.datetime, self.date and + self.time which can each be accessed other methods on the + class: get_datetime(), get_date() and get_time(). Raises a + ParseException with a helpful(?) message on parse error or + confusion. - def parse_date_string(self, date_string: str) -> Optional[datetime.date]: + To get an idea of what expressions can be parsed, check out + the unittest and the grammar. + + Usage: + + txt = '3 weeks before last tues at 9:15am' + dp = DateParser() + dt1 = dp.parse(txt) + dt2 = dp.get_datetime(tz=pytz.timezone('US/Pacific')) + + # dt1 and dt2 will be identical other than the fact that + # the latter's tzinfo will be set to PST/PDT. + + This is the main entrypoint to this class for caller code. + """ + date_string = date_string.strip() + date_string = re.sub(r'\s+', ' ', date_string) + self._reset() + listener = RaisingErrorListener() input_stream = antlr4.InputStream(date_string) lexer = dateparse_utilsLexer(input_stream) + lexer.removeErrorListeners() + lexer.addErrorListener(listener) stream = antlr4.CommonTokenStream(lexer) parser = dateparse_utilsParser(stream) + parser.removeErrorListeners() + parser.addErrorListener(listener) tree = parser.parse() walker = antlr4.ParseTreeWalker() walker.walk(self, tree) - return self.get_date() + return self.datetime def get_date(self) -> Optional[datetime.date]: + """Return the date part or None.""" return self.date - def enterDateExpr(self, ctx: dateparse_utilsParser.DateExprContext): - self.date = None + def get_time(self) -> Optional[datetime.time]: + """Return the time part or None.""" + return self.time + + def get_datetime(self, *, tz=None) -> Optional[datetime.datetime]: + """Return as a datetime. Parsed date expressions without any time + part return hours = minutes = seconds = microseconds = 0 (i.e. at + midnight that day). Parsed time expressions without any date part + default to date = today. + + The optional tz param allows the caller to request the datetime be + timezone aware and sets the tzinfo to the indicated zone. Defaults + to timezone naive (i.e. tzinfo = None). + """ + dt = self.datetime + if dt is not None: + if tz is not None: + dt = dt.replace(tzinfo=None).astimezone(tz=tz) + return dt + + # -- helpers -- + + def _reset(self): + """Reset at init and between parses.""" + if self.override_now_for_test_purposes is None: + self.now_datetime = datetime.datetime.now() + self.today = datetime.date.today() + else: + self.now_datetime = self.override_now_for_test_purposes + self.today = datetime_to_date(self.override_now_for_test_purposes) + self.date: Optional[datetime.date] = None + self.time: Optional[datetime.time] = None + self.datetime: Optional[datetime.datetime] = None self.context: Dict[str, Any] = {} - if ctx.singleDateExpr() is not None: - self.main_type = DateParser.PARSE_TYPE_SINGLE_DATE_EXPR - elif ctx.baseAndOffsetDateExpr() is not None: - self.main_type = DateParser.PARSE_TYPE_BASE_AND_OFFSET_EXPR + self.timedelta = datetime.timedelta(seconds=0) + self.saw_overt_year = False @staticmethod - def normalize_special_day_name(name: str) -> str: + def _normalize_special_day_name(name: str) -> str: + """String normalization / canonicalization for date expressions.""" name = name.lower() - name = name.replace("'", "") - name = name.replace("xmas", "christmas") - name = name.replace("mlk", "martin luther king") - name = name.replace(" ", "") - eve = "eve" if name[-3:] == "eve" else "" + name = name.replace("'", '') + name = name.replace('xmas', 'christmas') + name = name.replace('mlk', 'martin luther king') + name = name.replace(' ', '') + eve = 'eve' if name[-3:] == 'eve' else '' name = name[:5] + eve - name = name.replace("washi", "presi") + name = name.replace('washi', 'presi') return name - def parse_special(self, name: str) -> Optional[datetime.date]: - today = datetime.date.today() - year = self.context.get("year", today.year) - name = DateParser.normalize_special_day_name(self.context["special"]) - if name == "today": + def _figure_out_date_unit(self, orig: str) -> TimeUnit: + """Figure out what unit a date expression piece is talking about.""" + if 'month' in orig: + return TimeUnit.MONTHS + txt = orig.lower()[:3] + if txt in self.day_name_to_number: + return TimeUnit(self.day_name_to_number[txt]) + elif txt in self.delta_unit_to_constant: + return TimeUnit(self.delta_unit_to_constant[txt]) + raise ParseException(f'Invalid date unit: {orig}') + + def _figure_out_time_unit(self, orig: str) -> int: + """Figure out what unit a time expression piece is talking about.""" + txt = orig.lower()[:3] + if txt in self.time_delta_unit_to_constant: + return self.time_delta_unit_to_constant[txt] + raise ParseException(f'Invalid time unit: {orig}') + + def _parse_special_date(self, name: str) -> Optional[datetime.date]: + """Parse what we think is a special date name and return its datetime + (or None if it can't be parsed). + """ + today = self.today + year = self.context.get('year', today.year) + name = DateParser._normalize_special_day_name(self.context['special']) + + # Yesterday, today, tomorrow -- ignore any next/last + if name == 'today' or name == 'now': return today - if name == "easte": + if name == 'yeste': + return today + datetime.timedelta(days=-1) + if name == 'tomor': + return today + datetime.timedelta(days=+1) + + next_last = self.context.get('special_next_last', '') + if next_last == 'next': + year += 1 + self.saw_overt_year = True + elif next_last == 'last': + year -= 1 + self.saw_overt_year = True + + # Holiday names + if name == 'easte': return dateutil.easter.easter(year=year) - for holiday_date, holiday_name in sorted( - holidays.US(years=year).items() - ): - if "Observed" not in holiday_name: - holiday_name = DateParser.normalize_special_day_name( - holiday_name - ) + elif name == 'hallo': + return datetime.date(year=year, month=10, day=31) + + for holiday_date, holiday_name in sorted(holidays.US(years=year).items()): + if 'Observed' not in holiday_name: + holiday_name = DateParser._normalize_special_day_name(holiday_name) if name == holiday_name: return holiday_date - if name == "chriseve": + if name == 'chriseve': return datetime.date(year=year, month=12, day=24) - elif name == "newyeeve": + elif name == 'newyeeve': return datetime.date(year=year, month=12, day=31) return None - def parse_normal(self) -> datetime.date: - if "month" not in self.context: - raise ParseException("Missing month") - if "day" not in self.context: - raise ParseException("Missing day") - if "year" not in self.context: - today = datetime.date.today() - self.context["year"] = today.year + def _resolve_ides_nones(self, day: str, month_number: int) -> int: + """Handle date expressions like "the ides of March" which require + both the "ides" and the month since the definition of the "ides" + changes based on the length of the month. + """ + assert 'ide' in day or 'non' in day + assert month_number in self.typical_days_per_month + typical_days_per_month = self.typical_days_per_month[month_number] + + # "full" month + if typical_days_per_month == 31: + if self.context['day'] == 'ide': + return 15 + else: + return 7 + + # "hollow" month + else: + if self.context['day'] == 'ide': + return 13 + else: + return 5 + + def _parse_normal_date(self) -> datetime.date: + if 'dow' in self.context: + d = self.today + while d.weekday() != self.context['dow']: + d += datetime.timedelta(days=1) + return d + + if 'month' not in self.context: + raise ParseException('Missing month') + if 'day' not in self.context: + raise ParseException('Missing day') + if 'year' not in self.context: + self.context['year'] = self.today.year + self.saw_overt_year = False + else: + self.saw_overt_year = True + + # Handling "ides" and "nones" requires both the day and month. + if self.context['day'] == 'ide' or self.context['day'] == 'non': + self.context['day'] = self._resolve_ides_nones( + self.context['day'], self.context['month'] + ) + return datetime.date( - year=int(self.context["year"]), - month=int(self.context["month"]), - day=int(self.context["day"]), + year=self.context['year'], + month=self.context['month'], + day=self.context['day'], + ) + + def _parse_tz(self, txt: str) -> Any: + if txt == 'Z': + txt = 'UTC' + + # Try pytz + try: + tz1 = pytz.timezone(txt) + if tz1 is not None: + return tz1 + except Exception: + pass + + # Try dateutil + try: + tz2 = dateutil.tz.gettz(txt) + if tz2 is not None: + return tz2 + except Exception: + pass + + # Try constructing an offset in seconds + try: + txt_sign = txt[0] + if txt_sign == '-' or txt_sign == '+': + sign = +1 if txt_sign == '+' else -1 + hour = int(txt[1:3]) + minute = int(txt[-2:]) + offset = sign * (hour * 60 * 60) + sign * (minute * 60) + tzoffset = dateutil.tz.tzoffset(txt, offset) + return tzoffset + except Exception: + pass + return None + + def _get_int(self, txt: str) -> int: + while not txt[0].isdigit() and txt[0] != '-' and txt[0] != '+': + txt = txt[1:] + while not txt[-1].isdigit(): + txt = txt[:-1] + return int(txt) + + # -- overridden methods invoked by parse walk -- + + def visitErrorNode(self, node: antlr4.ErrorNode) -> None: + pass + + def visitTerminal(self, node: antlr4.TerminalNode) -> None: + pass + + def exitParse(self, ctx: dateparse_utilsParser.ParseContext) -> None: + """Populate self.datetime.""" + if self.date is None: + self.date = self.today + year = self.date.year + month = self.date.month + day = self.date.day + + if self.time is None: + self.time = datetime.time(0, 0, 0) + hour = self.time.hour + minute = self.time.minute + second = self.time.second + micros = self.time.microsecond + + self.datetime = datetime.datetime( + year, + month, + day, + hour, + minute, + second, + micros, + tzinfo=self.time.tzinfo, ) + # Apply resudual adjustments to times here when we have a + # datetime. + self.datetime = self.datetime + self.timedelta + assert self.datetime is not None + self.time = datetime.time( + self.datetime.hour, + self.datetime.minute, + self.datetime.second, + self.datetime.microsecond, + self.datetime.tzinfo, + ) + + def enterDateExpr(self, ctx: dateparse_utilsParser.DateExprContext): + self.date = None + if ctx.singleDateExpr() is not None: + self.main_type = DateParser.PARSE_TYPE_SINGLE_DATE_EXPR + elif ctx.baseAndOffsetDateExpr() is not None: + self.main_type = DateParser.PARSE_TYPE_BASE_AND_OFFSET_EXPR + + def enterTimeExpr(self, ctx: dateparse_utilsParser.TimeExprContext): + self.time = None + if ctx.singleTimeExpr() is not None: + self.time_type = DateParser.PARSE_TYPE_SINGLE_TIME_EXPR + elif ctx.baseAndOffsetTimeExpr() is not None: + self.time_type = DateParser.PARSE_TYPE_BASE_AND_OFFSET_TIME_EXPR + def exitDateExpr(self, ctx: dateparse_utilsParser.DateExprContext) -> None: """When we leave the date expression, populate self.date.""" - if "special" in self.context: - self.date = self.parse_special(self.context["special"]) + if 'special' in self.context: + self.date = self._parse_special_date(self.context['special']) else: - self.date = self.parse_normal() + self.date = self._parse_normal_date() assert self.date is not None # For a single date, just return the date we pulled out. @@ -140,74 +482,164 @@ class DateParser(dateparse_utilsListener): # Otherwise treat self.date as a base date that we're modifying # with an offset. - if not "delta_int" in self.context: - raise ParseException("Missing delta_int?!") - count = self.context["delta_int"] + if 'delta_int' not in self.context: + raise ParseException('Missing delta_int?!') + count = self.context['delta_int'] if count == 0: return # Adjust count's sign based on the presence of 'before' or 'after'. - if "delta_before_after" in self.context: - before_after = self.context["delta_before_after"].lower() - if before_after == "before": + if 'delta_before_after' in self.context: + before_after = self.context['delta_before_after'].lower() + if ( + before_after == 'before' + or before_after == 'until' + or before_after == 'til' + or before_after == 'to' + ): count = -count # What are we counting units of? - if "delta_unit" not in self.context: - raise ParseException("Missing delta_unit?!") - unit = self.context["delta_unit"] - if unit == DateParser.CONSTANT_DAYS: - timedelta = datetime.timedelta(days=count) - self.date = self.date + timedelta - elif unit == DateParser.CONSTANT_WEEKS: - timedelta = datetime.timedelta(weeks=count) - self.date = self.date + timedelta + if 'delta_unit' not in self.context: + raise ParseException('Missing delta_unit?!') + unit = self.context['delta_unit'] + dt = n_timeunits_from_base(count, TimeUnit(unit), date_to_datetime(self.date)) + self.date = datetime_to_date(dt) + + def exitTimeExpr(self, ctx: dateparse_utilsParser.TimeExprContext) -> None: + # Simple time? + self.time = datetime.time( + self.context['hour'], + self.context['minute'], + self.context['seconds'], + self.context['micros'], + tzinfo=self.context.get('tz', None), + ) + if self.time_type == DateParser.PARSE_TYPE_SINGLE_TIME_EXPR: + return + + # If we get here there (should be) a relative adjustment to + # the time. + if 'nth' in self.context: + count = self.context['nth'] + elif 'time_delta_int' in self.context: + count = self.context['time_delta_int'] + else: + raise ParseException('Missing delta in relative time.') + if count == 0: + return + + # Adjust count's sign based on the presence of 'before' or 'after'. + if 'time_delta_before_after' in self.context: + before_after = self.context['time_delta_before_after'].lower() + if ( + before_after == 'before' + or before_after == 'until' + or before_after == 'til' + or before_after == 'to' + ): + count = -count + + # What are we counting units of... assume minutes. + if 'time_delta_unit' not in self.context: + self.timedelta += datetime.timedelta(minutes=count) + else: + unit = self.context['time_delta_unit'] + if unit == TimeUnit.SECONDS: + self.timedelta += datetime.timedelta(seconds=count) + elif unit == TimeUnit.MINUTES: + self.timedelta = datetime.timedelta(minutes=count) + elif unit == TimeUnit.HOURS: + self.timedelta = datetime.timedelta(hours=count) + else: + raise ParseException(f'Invalid Unit: "{unit}"') + + def exitDeltaPlusMinusExpr(self, ctx: dateparse_utilsParser.DeltaPlusMinusExprContext) -> None: + try: + n = ctx.nth() + if n is None: + raise ParseException(f'Bad N in Delta +/- Expr: {ctx.getText()}') + n = n.getText() + n = self._get_int(n) + unit = self._figure_out_date_unit(ctx.deltaUnit().getText().lower()) + except Exception: + raise ParseException(f'Invalid Delta +/-: {ctx.getText()}') + else: + self.context['delta_int'] = n + self.context['delta_unit'] = unit + + def exitNextLastUnit(self, ctx: dateparse_utilsParser.DeltaUnitContext) -> None: + try: + unit = self._figure_out_date_unit(ctx.getText().lower()) + except Exception: + raise ParseException(f'Bad delta unit: {ctx.getText()}') else: - direction = 1 if count > 0 else -1 - count = abs(count) - timedelta = datetime.timedelta(days=direction) - - while True: - dow = self.date.weekday() - if dow == unit: - count -= 1 - if count == 0: - return - self.date = self.date + timedelta - - def enterDeltaInt(self, ctx: dateparse_utilsParser.DeltaIntContext) -> None: - try: - i = int(ctx.getText()) - except: - raise ParseException(f"Bad delta int: {ctx.getText()}") + self.context['delta_unit'] = unit + + def exitDeltaNextLast(self, ctx: dateparse_utilsParser.DeltaNextLastContext) -> None: + try: + txt = ctx.getText().lower() + except Exception: + raise ParseException(f'Bad next/last: {ctx.getText()}') + if 'month' in self.context or 'day' in self.context or 'year' in self.context: + raise ParseException('Next/last expression expected to be relative to today.') + if txt[:4] == 'next': + self.context['delta_int'] = +1 + self.context['day'] = self.now_datetime.day + self.context['month'] = self.now_datetime.month + self.context['year'] = self.now_datetime.year + self.saw_overt_year = True + elif txt[:4] == 'last': + self.context['delta_int'] = -1 + self.context['day'] = self.now_datetime.day + self.context['month'] = self.now_datetime.month + self.context['year'] = self.now_datetime.year + self.saw_overt_year = True else: - self.context["delta_int"] = i + raise ParseException(f'Bad next/last: {ctx.getText()}') - def enterDeltaUnit( - self, ctx: dateparse_utilsParser.DeltaUnitContext + def exitCountUnitsBeforeAfterTimeExpr( + self, ctx: dateparse_utilsParser.CountUnitsBeforeAfterTimeExprContext ) -> None: + if 'nth' not in self.context: + raise ParseException(f'Bad count expression: {ctx.getText()}') try: - txt = ctx.getText().lower()[:3] - if txt in self.day_name_to_number: - txt = self.day_name_to_number[txt] - elif txt in self.delta_unit_to_constant: - txt = self.delta_unit_to_constant[txt] + unit = self._figure_out_time_unit(ctx.deltaTimeUnit().getText().lower()) + self.context['time_delta_unit'] = unit + except Exception: + raise ParseException(f'Bad delta unit: {ctx.getText()}') + if 'time_delta_before_after' not in self.context: + raise ParseException(f'Bad Before/After: {ctx.getText()}') + + def exitDeltaTimeFraction(self, ctx: dateparse_utilsParser.DeltaTimeFractionContext) -> None: + try: + txt = ctx.getText().lower()[:4] + if txt == 'quar': + self.context['time_delta_int'] = 15 + self.context['time_delta_unit'] = TimeUnit.MINUTES + elif txt == 'half': + self.context['time_delta_int'] = 30 + self.context['time_delta_unit'] = TimeUnit.MINUTES else: - raise ParseException(f"Bad delta unit: {ctx.getText()}") - except: - raise ParseException(f"Bad delta unit: {ctx.getText()}") + raise ParseException(f'Bad time fraction {ctx.getText()}') + except Exception: + raise ParseException(f'Bad time fraction {ctx.getText()}') + + def exitDeltaBeforeAfter(self, ctx: dateparse_utilsParser.DeltaBeforeAfterContext) -> None: + try: + txt = ctx.getText().lower() + except Exception: + raise ParseException(f'Bad delta before|after: {ctx.getText()}') else: - self.context["delta_unit"] = txt + self.context['delta_before_after'] = txt - def enterDeltaBeforeAfter( - self, ctx: dateparse_utilsParser.DeltaBeforeAfterContext - ) -> None: + def exitDeltaTimeBeforeAfter(self, ctx: dateparse_utilsParser.DeltaBeforeAfterContext) -> None: try: txt = ctx.getText().lower() - except: - raise ParseException(f"Bad delta before|after: {ctx.getText()}") + except Exception: + raise ParseException(f'Bad delta before|after: {ctx.getText()}') else: - self.context["delta_before_after"] = txt + self.context['time_delta_before_after'] = txt def exitNthWeekdayInMonthMaybeYearExpr( self, ctx: dateparse_utilsParser.NthWeekdayInMonthMaybeYearExprContext @@ -220,25 +652,23 @@ class DateParser(dateparse_utilsListener): ...into base + offset expressions instead. """ try: - if "nth" not in self.context: - raise ParseException(f"Missing nth number: {ctx.getText()}") - n = self.context["nth"] + if 'nth' not in self.context: + raise ParseException(f'Missing nth number: {ctx.getText()}') + n = self.context['nth'] if n < 1 or n > 5: # months never have more than 5 Foodays if n != -1: - raise ParseException(f"Invalid nth number: {ctx.getText()}") - del self.context["nth"] - self.context["delta_int"] = n + raise ParseException(f'Invalid nth number: {ctx.getText()}') + del self.context['nth'] + self.context['delta_int'] = n - year = self.context.get("year", datetime.date.today().year) - if "month" not in self.context: - raise ParseException( - f"Missing month expression: {ctx.getText()}" - ) - month = self.context["month"] + year = self.context.get('year', self.today.year) + if 'month' not in self.context: + raise ParseException(f'Missing month expression: {ctx.getText()}') + month = self.context['month'] - dow = self.context["dow"] - del self.context["dow"] - self.context["delta_unit"] = dow + dow = self.context['dow'] + del self.context['dow'] + self.context['delta_unit'] = dow # For the nth Fooday in Month, start at the 1st of the # month and count ahead N Foodays. For the last Fooday in @@ -252,22 +682,20 @@ class DateParser(dateparse_utilsListener): tmp_date = datetime.date(year=year, month=month, day=1) tmp_date = tmp_date - datetime.timedelta(days=1) - self.context["year"] = tmp_date.year - self.context["month"] = tmp_date.month - self.context["day"] = tmp_date.day + self.context['year'] = tmp_date.year + self.context['month'] = tmp_date.month + self.context['day'] = tmp_date.day # The delta adjustment code can handle the case where # the last day of the month is the day we're looking # for already. else: - self.context["year"] = year - self.context["month"] = month - self.context["day"] = 1 + self.context['year'] = year + self.context['month'] = month + self.context['day'] = 1 self.main_type = DateParser.PARSE_TYPE_BASE_AND_OFFSET_EXPR - except: - raise ParseException( - f"Invalid nthWeekday expression: {ctx.getText()}" - ) + except Exception: + raise ParseException(f'Invalid nthWeekday expression: {ctx.getText()}') def exitFirstLastWeekdayInMonthMaybeYearExpr( self, @@ -275,118 +703,319 @@ class DateParser(dateparse_utilsListener): ) -> None: self.exitNthWeekdayInMonthMaybeYearExpr(ctx) - def enterNth(self, ctx: dateparse_utilsParser.NthContext) -> None: + def exitNth(self, ctx: dateparse_utilsParser.NthContext) -> None: try: - i = ctx.getText() - m = re.match("\d+[a-z][a-z]", i) - if m is not None: - i = i[:-2] - i = int(i) - except: - raise ParseException(f"Bad nth expression: {ctx.getText()}") + i = self._get_int(ctx.getText()) + except Exception: + raise ParseException(f'Bad nth expression: {ctx.getText()}') else: - self.context["nth"] = i + self.context['nth'] = i - def enterFirstOrLast( - self, ctx: dateparse_utilsParser.FirstOrLastContext - ) -> None: + def exitFirstOrLast(self, ctx: dateparse_utilsParser.FirstOrLastContext) -> None: try: txt = ctx.getText() - if txt == "first": + if txt == 'first': txt = 1 - elif txt == "last": + elif txt == 'last': txt = -1 else: - raise ParseException( - f"Bad first|last expression: {ctx.getText()}" - ) - except: - raise ParseException(f"Bad first|last expression: {ctx.getText()}") + raise ParseException(f'Bad first|last expression: {ctx.getText()}') + except Exception: + raise ParseException(f'Bad first|last expression: {ctx.getText()}') else: - self.context["nth"] = txt + self.context['nth'] = txt - def enterDayName(self, ctx: dateparse_utilsParser.DayNameContext) -> None: + def exitDayName(self, ctx: dateparse_utilsParser.DayNameContext) -> None: try: dow = ctx.getText().lower()[:3] dow = self.day_name_to_number.get(dow, None) - except: - raise ParseException("Bad day of week") + except Exception: + raise ParseException('Bad day of week') else: - self.context["dow"] = dow + self.context['dow'] = dow - def enterDayOfMonth( - self, ctx: dateparse_utilsParser.DayOfMonthContext - ) -> None: + def exitDayOfMonth(self, ctx: dateparse_utilsParser.DayOfMonthContext) -> None: try: - day = int(ctx.getText()) + day = ctx.getText().lower() + if day[:3] == 'ide': + self.context['day'] = 'ide' + return + if day[:3] == 'non': + self.context['day'] = 'non' + return + if day[:3] == 'kal': + self.context['day'] = 1 + return + day = self._get_int(day) if day < 1 or day > 31: - raise ParseException( - f"Bad dayOfMonth expression: {ctx.getText()}" - ) - except: - raise ParseException(f"Bad dayOfMonth expression: {ctx.getText()}") - self.context["day"] = day - - def enterMonthName( - self, ctx: dateparse_utilsParser.MonthNameContext - ) -> None: + raise ParseException(f'Bad dayOfMonth expression: {ctx.getText()}') + except Exception: + raise ParseException(f'Bad dayOfMonth expression: {ctx.getText()}') + self.context['day'] = day + + def exitMonthName(self, ctx: dateparse_utilsParser.MonthNameContext) -> None: try: month = ctx.getText() - month = month.lower()[:3] + while month[0] == '/' or month[0] == '-': + month = month[1:] + month = month[:3].lower() month = self.month_name_to_number.get(month, None) if month is None: - raise ParseException( - f"Bad monthName expression: {ctx.getText()}" - ) - except: - raise ParseException(f"Bad monthName expression: {ctx.getText()}") + raise ParseException(f'Bad monthName expression: {ctx.getText()}') + except Exception: + raise ParseException(f'Bad monthName expression: {ctx.getText()}') else: - self.context["month"] = month + self.context['month'] = month - def enterMonthNumber( - self, ctx: dateparse_utilsParser.MonthNumberContext - ) -> None: + def exitMonthNumber(self, ctx: dateparse_utilsParser.MonthNumberContext) -> None: try: - month = int(ctx.getText()) + month = self._get_int(ctx.getText()) if month < 1 or month > 12: - raise ParseException( - f"Bad monthNumber expression: {ctx.getText()}" - ) - except: - raise ParseException(f"Bad monthNumber expression: {ctx.getText()}") + raise ParseException(f'Bad monthNumber expression: {ctx.getText()}') + except Exception: + raise ParseException(f'Bad monthNumber expression: {ctx.getText()}') else: - self.context["month"] = month + self.context['month'] = month - def enterYear(self, ctx: dateparse_utilsParser.YearContext) -> None: + def exitYear(self, ctx: dateparse_utilsParser.YearContext) -> None: try: - year = int(ctx.getText()) + year = self._get_int(ctx.getText()) if year < 1: - raise ParseException(f"Bad year expression: {ctx.getText()}") - except: - raise ParseException(f"Bad year expression: {ctx.getText()}") + raise ParseException(f'Bad year expression: {ctx.getText()}') + except Exception: + raise ParseException(f'Bad year expression: {ctx.getText()}') else: - self.context["year"] = year + self.saw_overt_year = True + self.context['year'] = year - def enterSpecialDate( - self, ctx: dateparse_utilsParser.SpecialDateContext + def exitSpecialDateMaybeYearExpr( + self, ctx: dateparse_utilsParser.SpecialDateMaybeYearExprContext ) -> None: try: - txt = ctx.getText().lower() - except: - raise ParseException(f"Bad specialDate expression: {ctx.getText()}") + special = ctx.specialDate().getText().lower() + self.context['special'] = special + except Exception: + raise ParseException(f'Bad specialDate expression: {ctx.specialDate().getText()}') + try: + mod = ctx.thisNextLast() + if mod is not None: + if mod.THIS() is not None: + self.context['special_next_last'] = 'this' + elif mod.NEXT() is not None: + self.context['special_next_last'] = 'next' + elif mod.LAST() is not None: + self.context['special_next_last'] = 'last' + except Exception: + raise ParseException(f'Bad specialDateNextLast expression: {ctx.getText()}') + + def exitNFoosFromTodayAgoExpr( + self, ctx: dateparse_utilsParser.NFoosFromTodayAgoExprContext + ) -> None: + d = self.now_datetime + try: + count = self._get_int(ctx.unsignedInt().getText()) + unit = ctx.deltaUnit().getText().lower() + ago_from_now = ctx.AGO_FROM_NOW().getText() + except Exception: + raise ParseException(f'Bad NFoosFromTodayAgoExpr: {ctx.getText()}') + + if "ago" in ago_from_now or "back" in ago_from_now: + count = -count + + unit = self._figure_out_date_unit(unit) + d = n_timeunits_from_base(count, TimeUnit(unit), d) + self.context['year'] = d.year + self.context['month'] = d.month + self.context['day'] = d.day + + def exitDeltaRelativeToTodayExpr( + self, ctx: dateparse_utilsParser.DeltaRelativeToTodayExprContext + ) -> None: + # When someone says "next week" they mean a week from now. + # Likewise next month or last year. These expressions are now + # +/- delta. + # + # But when someone says "this Friday" they mean "this coming + # Friday". It would be weird to say "this Friday" if today + # was already Friday but I'm parsing it to mean: the next day + # that is a Friday. So when you say "next Friday" you mean + # the Friday after this coming Friday, or 2 Fridays from now. + # + # This set handles this weirdness. + weekdays = set( + [ + TimeUnit.MONDAYS, + TimeUnit.TUESDAYS, + TimeUnit.WEDNESDAYS, + TimeUnit.THURSDAYS, + TimeUnit.FRIDAYS, + TimeUnit.SATURDAYS, + TimeUnit.SUNDAYS, + ] + ) + d = self.now_datetime + try: + mod = ctx.thisNextLast() + unit = ctx.deltaUnit().getText().lower() + unit = self._figure_out_date_unit(unit) + if mod.LAST(): + count = -1 + elif mod.THIS(): + if unit in weekdays: + count = +1 + else: + count = 0 + elif mod.NEXT(): + if unit in weekdays: + count = +2 + else: + count = +1 + else: + raise ParseException(f'Bad This/Next/Last modifier: {mod}') + except Exception: + raise ParseException(f'Bad DeltaRelativeToTodayExpr: {ctx.getText()}') + d = n_timeunits_from_base(count, TimeUnit(unit), d) + self.context['year'] = d.year + self.context['month'] = d.month + self.context['day'] = d.day + + def exitSpecialTimeExpr(self, ctx: dateparse_utilsParser.SpecialTimeExprContext) -> None: + try: + txt = ctx.specialTime().getText().lower() + except Exception: + raise ParseException(f'Bad special time expression: {ctx.getText()}') else: - self.context["special"] = txt + if txt == 'noon' or txt == 'midday': + self.context['hour'] = 12 + self.context['minute'] = 0 + self.context['seconds'] = 0 + self.context['micros'] = 0 + elif txt == 'midnight': + self.context['hour'] = 0 + self.context['minute'] = 0 + self.context['seconds'] = 0 + self.context['micros'] = 0 + else: + raise ParseException(f'Bad special time expression: {txt}') + + try: + tz = ctx.tzExpr().getText() + self.context['tz'] = self._parse_tz(tz) + except Exception: + pass + def exitTwelveHourTimeExpr(self, ctx: dateparse_utilsParser.TwelveHourTimeExprContext) -> None: + try: + hour = ctx.hour().getText() + while not hour[-1].isdigit(): + hour = hour[:-1] + hour = self._get_int(hour) + except Exception: + raise ParseException(f'Bad hour: {ctx.hour().getText()}') + if hour <= 0 or hour > 12: + raise ParseException(f'Bad hour (out of range): {hour}') + try: + minute = self._get_int(ctx.minute().getText()) + except Exception: + minute = 0 + if minute < 0 or minute > 59: + raise ParseException(f'Bad minute (out of range): {minute}') + self.context['minute'] = minute + + try: + seconds = self._get_int(ctx.second().getText()) + except Exception: + seconds = 0 + if seconds < 0 or seconds > 59: + raise ParseException(f'Bad second (out of range): {seconds}') + self.context['seconds'] = seconds + + try: + micros = self._get_int(ctx.micros().getText()) + except Exception: + micros = 0 + if micros < 0 or micros > 1000000: + raise ParseException(f'Bad micros (out of range): {micros}') + self.context['micros'] = micros + + try: + ampm = ctx.ampm().getText() + except Exception: + raise ParseException(f'Bad ampm: {ctx.ampm().getText()}') + if hour == 12: + hour = 0 + if ampm[0] == 'p': + hour += 12 + self.context['hour'] = hour + + try: + tz = ctx.tzExpr().getText() + self.context['tz'] = self._parse_tz(tz) + except Exception: + pass + + def exitTwentyFourHourTimeExpr( + self, ctx: dateparse_utilsParser.TwentyFourHourTimeExprContext + ) -> None: + try: + hour = ctx.hour().getText() + while not hour[-1].isdigit(): + hour = hour[:-1] + hour = self._get_int(hour) + except Exception: + raise ParseException(f'Bad hour: {ctx.hour().getText()}') + if hour < 0 or hour > 23: + raise ParseException(f'Bad hour (out of range): {hour}') + self.context['hour'] = hour + + try: + minute = self._get_int(ctx.minute().getText()) + except Exception: + minute = 0 + if minute < 0 or minute > 59: + raise ParseException(f'Bad minute (out of range): {ctx.getText()}') + self.context['minute'] = minute + + try: + seconds = self._get_int(ctx.second().getText()) + except Exception: + seconds = 0 + if seconds < 0 or seconds > 59: + raise ParseException(f'Bad second (out of range): {ctx.getText()}') + self.context['seconds'] = seconds + + try: + micros = self._get_int(ctx.micros().getText()) + except Exception: + micros = 0 + if micros < 0 or micros >= 1000000: + raise ParseException(f'Bad micros (out of range): {ctx.getText()}') + self.context['micros'] = micros + + try: + tz = ctx.tzExpr().getText() + self.context['tz'] = self._parse_tz(tz) + except Exception: + pass + + +@bootstrap.initialize def main() -> None: parser = DateParser() for line in sys.stdin: line = line.strip() - line = line.lower() line = re.sub(r"#.*$", "", line) if re.match(r"^ *$", line) is not None: continue - print(parser.parse_date_string(line)) + try: + dt = parser.parse(line) + except Exception as e: + logger.exception(e) + print("Unrecognized.") + else: + assert dt is not None + print(dt.strftime('%A %Y/%m/%d %H:%M:%S.%f %Z(%z)')) sys.exit(0)