From 82e1225c6f8a6ee59f373c07a7873b373acb19dc Mon Sep 17 00:00:00 2001 From: Scott Gasch Date: Wed, 8 Jun 2022 17:23:33 -0700 Subject: [PATCH] Improve dateparse and make string_utils.extract_date. --- dateparse/dateparse_utils.g4 | 1 + dateparse/dateparse_utils.py | 2 +- string_utils.py | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/dateparse/dateparse_utils.g4 b/dateparse/dateparse_utils.g4 index 4701299..364aa0f 100644 --- a/dateparse/dateparse_utils.g4 +++ b/dateparse/dateparse_utils.g4 @@ -57,6 +57,7 @@ singleDateExpr | nthWeekdayInMonthMaybeYearExpr | firstLastWeekdayInMonthMaybeYearExpr | deltaDateExprRelativeToTodayImplied + | dayName (SPACE|ddiv)+ monthDayMaybeYearExpr (SPACE|ddiv)* singleTimeExpr* | dayName ; diff --git a/dateparse/dateparse_utils.py b/dateparse/dateparse_utils.py index ee2bbd9..2e6eabd 100755 --- a/dateparse/dateparse_utils.py +++ b/dateparse/dateparse_utils.py @@ -348,7 +348,7 @@ class DateParser(dateparse_utilsListener): return 5 def _parse_normal_date(self) -> datetime.date: - if 'dow' in self.context: + if 'dow' in self.context and 'month' not in self.context: d = self.today while d.weekday() != self.context['dow']: d += datetime.timedelta(days=1) diff --git a/string_utils.py b/string_utils.py index 4127079..7c40dc9 100644 --- a/string_utils.py +++ b/string_utils.py @@ -1388,6 +1388,29 @@ def to_date(in_str: str) -> Optional[datetime.date]: return None +def extract_date(in_str: Any) -> Optional[str]: + import itertools + + import dateparse.dateparse_utils as du + + d = du.DateParser() # type: ignore + chunks = in_str.split() + for ngram in itertools.chain( + list_utils.ngrams(chunks, 5), + list_utils.ngrams(chunks, 4), + list_utils.ngrams(chunks, 3), + list_utils.ngrams(chunks, 2), + ): + try: + expr = " ".join(ngram) + logger.debug(f"Trying {expr}") + if d.parse(expr): + return d.get_date() + except du.ParseException: # type: ignore + pass + return None + + def is_valid_date(in_str: str) -> bool: """ Args: -- 2.45.2