Improve dateparse and make string_utils.extract_date.
authorScott Gasch <[email protected]>
Thu, 9 Jun 2022 00:23:33 +0000 (17:23 -0700)
committerScott Gasch <[email protected]>
Thu, 9 Jun 2022 00:23:33 +0000 (17:23 -0700)
dateparse/dateparse_utils.g4
dateparse/dateparse_utils.py
string_utils.py

index 4701299402baa68189bde0e42492064d6e9c5603..364aa0f985592f84deb1e41b5bcd09c3c7126873 100644 (file)
@@ -57,6 +57,7 @@ singleDateExpr
     | nthWeekdayInMonthMaybeYearExpr
     | firstLastWeekdayInMonthMaybeYearExpr
     | deltaDateExprRelativeToTodayImplied
+    | dayName (SPACE|ddiv)+ monthDayMaybeYearExpr (SPACE|ddiv)* singleTimeExpr*
     | dayName
     ;
 
index ee2bbd9f6a9ef47519815d1747bde6124eb591af..2e6eabde5ee155f1ca81e4c1f0c3f542f96dfb96 100755 (executable)
@@ -348,7 +348,7 @@ class DateParser(dateparse_utilsListener):
                 return 5
 
     def _parse_normal_date(self) -> datetime.date:
-        if 'dow' in self.context:
+        if 'dow' in self.context and 'month' not in self.context:
             d = self.today
             while d.weekday() != self.context['dow']:
                 d += datetime.timedelta(days=1)
index 4127079fc0a1b5670e676986421fad26009a3733..7c40dc99cb978a1d527ee42b9082dd4bede955f5 100644 (file)
@@ -1388,6 +1388,29 @@ def to_date(in_str: str) -> Optional[datetime.date]:
     return None
 
 
+def extract_date(in_str: Any) -> Optional[str]:
+    import itertools
+
+    import dateparse.dateparse_utils as du
+
+    d = du.DateParser()  # type: ignore
+    chunks = in_str.split()
+    for ngram in itertools.chain(
+        list_utils.ngrams(chunks, 5),
+        list_utils.ngrams(chunks, 4),
+        list_utils.ngrams(chunks, 3),
+        list_utils.ngrams(chunks, 2),
+    ):
+        try:
+            expr = " ".join(ngram)
+            logger.debug(f"Trying {expr}")
+            if d.parse(expr):
+                return d.get_date()
+        except du.ParseException:  # type: ignore
+            pass
+    return None
+
+
 def is_valid_date(in_str: str) -> bool:
     """
     Args: