Fix a couple of bugs in date parsing.
authorScott <[email protected]>
Mon, 31 Jan 2022 20:00:11 +0000 (12:00 -0800)
committerScott <[email protected]>
Mon, 31 Jan 2022 20:00:11 +0000 (12:00 -0800)
dateparse/dateparse_utils.py
datetime_utils.py

index 21fdb832b5c556317989e4f9855dae8daae67552..be5e1b5312a7beb83fea7dc7b0137f08059879a3 100755 (executable)
@@ -21,7 +21,10 @@ import pytz
 import acl
 import bootstrap
 from datetime_utils import (
-    TimeUnit, n_timeunits_from_base, datetime_to_date, date_to_datetime
+    TimeUnit,
+    n_timeunits_from_base,
+    datetime_to_date,
+    date_to_datetime,
 )
 from dateparse.dateparse_utilsLexer import dateparse_utilsLexer  # type: ignore
 from dateparse.dateparse_utilsListener import dateparse_utilsListener  # type: ignore
@@ -35,50 +38,46 @@ logger = logging.getLogger(__name__)
 def debug_parse(enter_or_exit_f: Callable[[Any, Any], None]):
     @functools.wraps(enter_or_exit_f)
     def debug_parse_wrapper(*args, **kwargs):
-        slf = args[0]
+        slf = args[0]
         ctx = args[1]
         depth = ctx.depth()
         logger.debug(
-            '  ' * (depth-1) +
-            f'Entering {enter_or_exit_f.__name__} ({ctx.invokingState} / {ctx.exception})'
+            '  ' * (depth - 1)
+            f'Entering {enter_or_exit_f.__name__} ({ctx.invokingState} / {ctx.exception})'
         )
         for c in ctx.getChildren():
-            logger.debug(
-                '  ' * (depth-1) +
-                f'{c} {type(c)}'
-            )
+            logger.debug('  ' * (depth - 1) + f'{c} {type(c)}')
         retval = enter_or_exit_f(*args, **kwargs)
         return retval
+
     return debug_parse_wrapper
 
 
 class ParseException(Exception):
     """An exception thrown during parsing because of unrecognized input."""
+
     def __init__(self, message: str) -> None:
         self.message = message
 
 
 class RaisingErrorListener(antlr4.DiagnosticErrorListener):
     """An error listener that raises ParseExceptions."""
-    def syntaxError(
-            self, recognizer, offendingSymbol, line, column, msg, e
-    ):
+
+    def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
         raise ParseException(msg)
 
     def reportAmbiguity(
-            self, recognizer, dfa, startIndex, stopIndex, exact,
-            ambigAlts, configs
+        self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs
     ):
         pass
 
     def reportAttemptingFullContext(
-            self, recognizer, dfa, startIndex, stopIndex, conflictingAlts,
-            configs
+        self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs
     ):
         pass
 
     def reportContextSensitivity(
-            self, recognizer, dfa, startIndex, stopIndex, prediction, configs
+        self, recognizer, dfa, startIndex, stopIndex, prediction, configs
     ):
         pass
 
@@ -90,13 +89,10 @@ class RaisingErrorListener(antlr4.DiagnosticErrorListener):
             'enter*',
             'exit*',
         ],
-        denied_patterns=[
-            'enterEveryRule',
-            'exitEveryRule'
-        ],
+        denied_patterns=['enterEveryRule', 'exitEveryRule'],
         order_to_check_allow_deny=acl.Order.DENY_ALLOW,
-        default_answer=False
-    )
+        default_answer=False,
+    ),
 )
 class DateParser(dateparse_utilsListener):
     PARSE_TYPE_SINGLE_DATE_EXPR = 1
@@ -104,11 +100,7 @@ class DateParser(dateparse_utilsListener):
     PARSE_TYPE_SINGLE_TIME_EXPR = 3
     PARSE_TYPE_BASE_AND_OFFSET_TIME_EXPR = 4
 
-    def __init__(
-            self,
-            *,
-            override_now_for_test_purposes = None
-    ) -> None:
+    def __init__(self, *, override_now_for_test_purposes=None) -> None:
         """C'tor.  Passing a value to override_now_for_test_purposes can be
         used to force this instance to use a custom date/time for its
         idea of "now" so that the code can be more easily unittested.
@@ -142,7 +134,7 @@ class DateParser(dateparse_utilsListener):
             9: 30,
             10: 31,
             11: 30,
-            12: 31
+            12: 31,
         }
 
         # N.B. day number is also synched with datetime_utils.TimeUnit values
@@ -198,7 +190,7 @@ class DateParser(dateparse_utilsListener):
         This is the main entrypoint to this class for caller code.
         """
         date_string = date_string.strip()
-        date_string = re.sub('\s+', ' ', date_string)
+        date_string = re.sub(r'\s+', ' ', date_string)
         self._reset()
         listener = RaisingErrorListener()
         input_stream = antlr4.InputStream(date_string)
@@ -246,9 +238,7 @@ class DateParser(dateparse_utilsListener):
             self.today = datetime.date.today()
         else:
             self.now_datetime = self.override_now_for_test_purposes
-            self.today = datetime_to_date(
-                self.override_now_for_test_purposes
-            )
+            self.today = datetime_to_date(self.override_now_for_test_purposes)
         self.date: Optional[datetime.date] = None
         self.time: Optional[datetime.time] = None
         self.datetime: Optional[datetime.datetime] = None
@@ -275,16 +265,16 @@ class DateParser(dateparse_utilsListener):
             return TimeUnit.MONTHS
         txt = orig.lower()[:3]
         if txt in self.day_name_to_number:
-            return(TimeUnit(self.day_name_to_number[txt]))
+            return TimeUnit(self.day_name_to_number[txt])
         elif txt in self.delta_unit_to_constant:
-            return(TimeUnit(self.delta_unit_to_constant[txt]))
+            return TimeUnit(self.delta_unit_to_constant[txt])
         raise ParseException(f'Invalid date unit: {orig}')
 
     def _figure_out_time_unit(self, orig: str) -> int:
         """Figure out what unit a time expression piece is talking about."""
         txt = orig.lower()[:3]
         if txt in self.time_delta_unit_to_constant:
-            return(self.time_delta_unit_to_constant[txt])
+            return self.time_delta_unit_to_constant[txt]
         raise ParseException(f'Invalid time unit: {orig}')
 
     def _parse_special_date(self, name: str) -> Optional[datetime.date]:
@@ -317,13 +307,9 @@ class DateParser(dateparse_utilsListener):
         elif name == 'hallo':
             return datetime.date(year=year, month=10, day=31)
 
-        for holiday_date, holiday_name in sorted(
-            holidays.US(years=year).items()
-        ):
+        for holiday_date, holiday_name in sorted(holidays.US(years=year).items()):
             if 'Observed' not in holiday_name:
-                holiday_name = DateParser._normalize_special_day_name(
-                    holiday_name
-                )
+                holiday_name = DateParser._normalize_special_day_name(holiday_name)
                 if name == holiday_name:
                     return holiday_date
         if name == 'chriseve':
@@ -373,10 +359,7 @@ class DateParser(dateparse_utilsListener):
             self.saw_overt_year = True
 
         # Handling "ides" and "nones" requires both the day and month.
-        if (
-                self.context['day'] == 'ide' or
-                self.context['day'] == 'non'
-        ):
+        if self.context['day'] == 'ide' or self.context['day'] == 'non':
             self.context['day'] = self._resolve_ides_nones(
                 self.context['day'], self.context['month']
             )
@@ -452,8 +435,7 @@ class DateParser(dateparse_utilsListener):
         micros = self.time.microsecond
 
         self.datetime = datetime.datetime(
-            year, month, day, hour, minute, second, micros,
-            tzinfo=self.time.tzinfo
+            year, month, day, hour, minute, second, micros, tzinfo=self.time.tzinfo
         )
 
         # Apply resudual adjustments to times here when we have a
@@ -464,7 +446,7 @@ class DateParser(dateparse_utilsListener):
             self.datetime.minute,
             self.datetime.second,
             self.datetime.microsecond,
-            self.datetime.tzinfo
+            self.datetime.tzinfo,
         )
 
     def enterDateExpr(self, ctx: dateparse_utilsParser.DateExprContext):
@@ -505,10 +487,10 @@ class DateParser(dateparse_utilsListener):
         if 'delta_before_after' in self.context:
             before_after = self.context['delta_before_after'].lower()
             if (
-                    before_after == 'before' or
-                    before_after == 'until' or
-                    before_after == 'til' or
-                    before_after == 'to'
+                before_after == 'before'
+                or before_after == 'until'
+                or before_after == 'til'
+                or before_after == 'to'
             ):
                 count = -count
 
@@ -516,11 +498,7 @@ class DateParser(dateparse_utilsListener):
         if 'delta_unit' not in self.context:
             raise ParseException('Missing delta_unit?!')
         unit = self.context['delta_unit']
-        dt = n_timeunits_from_base(
-            count,
-            TimeUnit(unit),
-            date_to_datetime(self.date)
-        )
+        dt = n_timeunits_from_base(count, TimeUnit(unit), date_to_datetime(self.date))
         self.date = datetime_to_date(dt)
 
     def exitTimeExpr(self, ctx: dateparse_utilsParser.TimeExprContext) -> None:
@@ -550,10 +528,10 @@ class DateParser(dateparse_utilsListener):
         if 'time_delta_before_after' in self.context:
             before_after = self.context['time_delta_before_after'].lower()
             if (
-                    before_after == 'before' or
-                    before_after == 'until' or
-                    before_after == 'til' or
-                    before_after == 'to'
+                before_after == 'before'
+                or before_after == 'until'
+                or before_after == 'til'
+                or before_after == 'to'
             ):
                 count = -count
 
@@ -577,23 +555,17 @@ class DateParser(dateparse_utilsListener):
         try:
             n = ctx.nth()
             if n is None:
-                raise ParseException(
-                    f'Bad N in Delta +/- Expr: {ctx.getText()}'
-                )
+                raise ParseException(f'Bad N in Delta +/- Expr: {ctx.getText()}')
             n = n.getText()
             n = self._get_int(n)
-            unit = self._figure_out_date_unit(
-                ctx.deltaUnit().getText().lower()
-            )
+            unit = self._figure_out_date_unit(ctx.deltaUnit().getText().lower())
         except Exception:
             raise ParseException(f'Invalid Delta +/-: {ctx.getText()}')
         else:
             self.context['delta_int'] = n
             self.context['delta_unit'] = unit
 
-    def exitNextLastUnit(
-        self, ctx: dateparse_utilsParser.DeltaUnitContext
-    ) -> None:
+    def exitNextLastUnit(self, ctx: dateparse_utilsParser.DeltaUnitContext) -> None:
         try:
             unit = self._figure_out_date_unit(ctx.getText().lower())
         except Exception:
@@ -602,17 +574,13 @@ class DateParser(dateparse_utilsListener):
             self.context['delta_unit'] = unit
 
     def exitDeltaNextLast(
-            self, ctx: dateparse_utilsParser.DeltaNextLastContext
+        self, ctx: dateparse_utilsParser.DeltaNextLastContext
     ) -> None:
         try:
             txt = ctx.getText().lower()
         except Exception:
             raise ParseException(f'Bad next/last: {ctx.getText()}')
-        if (
-                'month' in self.context or
-                'day' in self.context or
-                'year' in self.context
-        ):
+        if 'month' in self.context or 'day' in self.context or 'year' in self.context:
             raise ParseException(
                 'Next/last expression expected to be relative to today.'
             )
@@ -635,36 +603,26 @@ class DateParser(dateparse_utilsListener):
         self, ctx: dateparse_utilsParser.CountUnitsBeforeAfterTimeExprContext
     ) -> None:
         if 'nth' not in self.context:
-            raise ParseException(
-                f'Bad count expression: {ctx.getText()}'
-            )
+            raise ParseException(f'Bad count expression: {ctx.getText()}')
         try:
-            unit = self._figure_out_time_unit(
-                ctx.deltaTimeUnit().getText().lower()
-            )
+            unit = self._figure_out_time_unit(ctx.deltaTimeUnit().getText().lower())
             self.context['time_delta_unit'] = unit
         except Exception:
             raise ParseException(f'Bad delta unit: {ctx.getText()}')
         if 'time_delta_before_after' not in self.context:
-            raise ParseException(
-                f'Bad Before/After: {ctx.getText()}'
-            )
+            raise ParseException(f'Bad Before/After: {ctx.getText()}')
 
     def exitDeltaTimeFraction(
-            self, ctx: dateparse_utilsParser.DeltaTimeFractionContext
+        self, ctx: dateparse_utilsParser.DeltaTimeFractionContext
     ) -> None:
         try:
             txt = ctx.getText().lower()[:4]
             if txt == 'quar':
                 self.context['time_delta_int'] = 15
-                self.context[
-                    'time_delta_unit'
-                ] = TimeUnit.MINUTES
+                self.context['time_delta_unit'] = TimeUnit.MINUTES
             elif txt == 'half':
                 self.context['time_delta_int'] = 30
-                self.context[
-                    'time_delta_unit'
-                ] = TimeUnit.MINUTES
+                self.context['time_delta_unit'] = TimeUnit.MINUTES
             else:
                 raise ParseException(f'Bad time fraction {ctx.getText()}')
         except Exception:
@@ -712,9 +670,7 @@ class DateParser(dateparse_utilsListener):
 
             year = self.context.get('year', self.today.year)
             if 'month' not in self.context:
-                raise ParseException(
-                    f'Missing month expression: {ctx.getText()}'
-                )
+                raise ParseException(f'Missing month expression: {ctx.getText()}')
             month = self.context['month']
 
             dow = self.context['dow']
@@ -746,9 +702,7 @@ class DateParser(dateparse_utilsListener):
                 self.context['day'] = 1
             self.main_type = DateParser.PARSE_TYPE_BASE_AND_OFFSET_EXPR
         except Exception:
-            raise ParseException(
-                f'Invalid nthWeekday expression: {ctx.getText()}'
-            )
+            raise ParseException(f'Invalid nthWeekday expression: {ctx.getText()}')
 
     def exitFirstLastWeekdayInMonthMaybeYearExpr(
         self,
@@ -764,9 +718,7 @@ class DateParser(dateparse_utilsListener):
         else:
             self.context['nth'] = i
 
-    def exitFirstOrLast(
-        self, ctx: dateparse_utilsParser.FirstOrLastContext
-    ) -> None:
+    def exitFirstOrLast(self, ctx: dateparse_utilsParser.FirstOrLastContext) -> None:
         try:
             txt = ctx.getText()
             if txt == 'first':
@@ -774,9 +726,7 @@ class DateParser(dateparse_utilsListener):
             elif txt == 'last':
                 txt = -1
             else:
-                raise ParseException(
-                    f'Bad first|last expression: {ctx.getText()}'
-                )
+                raise ParseException(f'Bad first|last expression: {ctx.getText()}')
         except Exception:
             raise ParseException(f'Bad first|last expression: {ctx.getText()}')
         else:
@@ -791,9 +741,7 @@ class DateParser(dateparse_utilsListener):
         else:
             self.context['dow'] = dow
 
-    def exitDayOfMonth(
-        self, ctx: dateparse_utilsParser.DayOfMonthContext
-    ) -> None:
+    def exitDayOfMonth(self, ctx: dateparse_utilsParser.DayOfMonthContext) -> None:
         try:
             day = ctx.getText().lower()
             if day[:3] == 'ide':
@@ -807,16 +755,12 @@ class DateParser(dateparse_utilsListener):
                 return
             day = self._get_int(day)
             if day < 1 or day > 31:
-                raise ParseException(
-                    f'Bad dayOfMonth expression: {ctx.getText()}'
-                )
+                raise ParseException(f'Bad dayOfMonth expression: {ctx.getText()}')
         except Exception:
             raise ParseException(f'Bad dayOfMonth expression: {ctx.getText()}')
         self.context['day'] = day
 
-    def exitMonthName(
-        self, ctx: dateparse_utilsParser.MonthNameContext
-    ) -> None:
+    def exitMonthName(self, ctx: dateparse_utilsParser.MonthNameContext) -> None:
         try:
             month = ctx.getText()
             while month[0] == '/' or month[0] == '-':
@@ -824,27 +768,19 @@ class DateParser(dateparse_utilsListener):
             month = month[:3].lower()
             month = self.month_name_to_number.get(month, None)
             if month is None:
-                raise ParseException(
-                    f'Bad monthName expression: {ctx.getText()}'
-                )
+                raise ParseException(f'Bad monthName expression: {ctx.getText()}')
         except Exception:
             raise ParseException(f'Bad monthName expression: {ctx.getText()}')
         else:
             self.context['month'] = month
 
-    def exitMonthNumber(
-        self, ctx: dateparse_utilsParser.MonthNumberContext
-    ) -> None:
+    def exitMonthNumber(self, ctx: dateparse_utilsParser.MonthNumberContext) -> None:
         try:
             month = self._get_int(ctx.getText())
             if month < 1 or month > 12:
-                raise ParseException(
-                    f'Bad monthNumber expression: {ctx.getText()}'
-                )
+                raise ParseException(f'Bad monthNumber expression: {ctx.getText()}')
         except Exception:
-            raise ParseException(
-                f'Bad monthNumber expression: {ctx.getText()}'
-            )
+            raise ParseException(f'Bad monthNumber expression: {ctx.getText()}')
         else:
             self.context['month'] = month
 
@@ -879,9 +815,7 @@ class DateParser(dateparse_utilsListener):
                 elif mod.LAST() is not None:
                     self.context['special_next_last'] = 'last'
         except Exception:
-            raise ParseException(
-                f'Bad specialDateNextLast expression: {ctx.getText()}'
-            )
+            raise ParseException(f'Bad specialDateNextLast expression: {ctx.getText()}')
 
     def exitNFoosFromTodayAgoExpr(
         self, ctx: dateparse_utilsParser.NFoosFromTodayAgoExprContext
@@ -892,18 +826,13 @@ class DateParser(dateparse_utilsListener):
             unit = ctx.deltaUnit().getText().lower()
             ago_from_now = ctx.AGO_FROM_NOW().getText()
         except Exception:
-            raise ParseException(
-                f'Bad NFoosFromTodayAgoExpr: {ctx.getText()}'
-            )
+            raise ParseException(f'Bad NFoosFromTodayAgoExpr: {ctx.getText()}')
 
         if "ago" in ago_from_now or "back" in ago_from_now:
             count = -count
 
         unit = self._figure_out_date_unit(unit)
-        d = n_timeunits_from_base(
-            count,
-            TimeUnit(unit),
-            d)
+        d = n_timeunits_from_base(count, TimeUnit(unit), d)
         self.context['year'] = d.year
         self.context['month'] = d.month
         self.context['day'] = d.day
@@ -911,29 +840,50 @@ class DateParser(dateparse_utilsListener):
     def exitDeltaRelativeToTodayExpr(
         self, ctx: dateparse_utilsParser.DeltaRelativeToTodayExprContext
     ) -> None:
+        # When someone says "next week" they mean a week from now.
+        # Likewise next month or last year.  These expressions are now
+        # +/- delta.
+        #
+        # But when someone says "this Friday" they mean "this coming
+        # Friday".  It would be weird to say "this Friday" if today
+        # was already Friday but I'm parsing it to mean: the next day
+        # that is a Friday.  So when you say "next Friday" you mean
+        # the Friday after this coming Friday, or 2 Fridays from now.
+        #
+        # This set handles this weirdness.
+        weekdays = set(
+            [
+                TimeUnit.MONDAYS,
+                TimeUnit.TUESDAYS,
+                TimeUnit.WEDNESDAYS,
+                TimeUnit.THURSDAYS,
+                TimeUnit.FRIDAYS,
+                TimeUnit.SATURDAYS,
+                TimeUnit.SUNDAYS,
+            ]
+        )
         d = self.now_datetime
         try:
             mod = ctx.thisNextLast()
+            unit = ctx.deltaUnit().getText().lower()
+            unit = self._figure_out_date_unit(unit)
             if mod.LAST():
                 count = -1
             elif mod.THIS():
-                count = +1
+                if unit in weekdays:
+                    count = +1
+                else:
+                    count = 0
             elif mod.NEXT():
-                count = +2
+                if unit in weekdays:
+                    count = +2
+                else:
+                    count = +1
             else:
-                raise ParseException(
-                    f'Bad This/Next/Last modifier: {mod}'
-                )
-            unit = ctx.deltaUnit().getText().lower()
+                raise ParseException(f'Bad This/Next/Last modifier: {mod}')
         except Exception:
-            raise ParseException(
-                f'Bad DeltaRelativeToTodayExpr: {ctx.getText()}'
-            )
-        unit = self._figure_out_date_unit(unit)
-        d = n_timeunits_from_base(
-            count,
-            TimeUnit(unit),
-            d)
+            raise ParseException(f'Bad DeltaRelativeToTodayExpr: {ctx.getText()}')
+        d = n_timeunits_from_base(count, TimeUnit(unit), d)
         self.context['year'] = d.year
         self.context['month'] = d.month
         self.context['day'] = d.day
@@ -944,9 +894,7 @@ class DateParser(dateparse_utilsListener):
         try:
             txt = ctx.specialTime().getText().lower()
         except Exception:
-            raise ParseException(
-                f'Bad special time expression: {ctx.getText()}'
-            )
+            raise ParseException(f'Bad special time expression: {ctx.getText()}')
         else:
             if txt == 'noon' or txt == 'midday':
                 self.context['hour'] = 12
@@ -1076,6 +1024,7 @@ def main() -> None:
         try:
             dt = parser.parse(line)
         except Exception as e:
+            logger.exception(e)
             print("Unrecognized.")
         else:
             print(dt.strftime('%A %Y/%m/%d %H:%M:%S.%f %Z(%z)'))
index 3565936fce66c1197a04a8926f902452e6350ac4..60b859afd0a0ccfec361034f66b2bff615e5bc43 100644 (file)
@@ -304,6 +304,17 @@ def n_timeunits_from_base(
     >>> n_timeunits_from_base(50, TimeUnit.SECONDS, base)
     datetime.datetime(2021, 9, 10, 11, 25, 41, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200)))
 
+    Next month corner case -- it will try to make Feb 31, 2022 then count
+    backwards.
+    >>> base = string_to_datetime("2022/01/31 11:24:51AM-0700")[0]
+    >>> n_timeunits_from_base(1, TimeUnit.MONTHS, base)
+    datetime.datetime(2022, 2, 28, 11, 24, 51, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200)))
+
+    Last month with the same corner case
+    >>> base = string_to_datetime("2022/03/31 11:24:51AM-0700")[0]
+    >>> n_timeunits_from_base(-1, TimeUnit.MONTHS, base)
+    datetime.datetime(2022, 2, 28, 11, 24, 51, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200)))
+
     """
     assert TimeUnit.is_valid(unit)
     if count == 0:
@@ -364,16 +375,23 @@ def n_timeunits_from_base(
             new_month %= 12
             year_term += 1
         new_year = base.year + year_term
-        return datetime.datetime(
-            new_year,
-            new_month,
-            base.day,
-            base.hour,
-            base.minute,
-            base.second,
-            base.microsecond,
-            base.tzinfo,
-        )
+        day = base.day
+        while True:
+            try:
+                ret = datetime.datetime(
+                    new_year,
+                    new_month,
+                    day,
+                    base.hour,
+                    base.minute,
+                    base.second,
+                    base.microsecond,
+                    base.tzinfo,
+                )
+                break
+            except ValueError:
+                day -= 1
+        return ret
 
     # N years from base
     elif unit == TimeUnit.YEARS: