From: Scott Gasch Date: Thu, 9 Jun 2022 00:23:33 +0000 (-0700) Subject: Improve dateparse and make string_utils.extract_date. X-Git-Url: https://wannabe.guru.org/gitweb/?a=commitdiff_plain;h=82e1225c6f8a6ee59f373c07a7873b373acb19dc;p=python_utils.git Improve dateparse and make string_utils.extract_date. --- diff --git a/dateparse/dateparse_utils.g4 b/dateparse/dateparse_utils.g4 index 4701299..364aa0f 100644 --- a/dateparse/dateparse_utils.g4 +++ b/dateparse/dateparse_utils.g4 @@ -57,6 +57,7 @@ singleDateExpr | nthWeekdayInMonthMaybeYearExpr | firstLastWeekdayInMonthMaybeYearExpr | deltaDateExprRelativeToTodayImplied + | dayName (SPACE|ddiv)+ monthDayMaybeYearExpr (SPACE|ddiv)* singleTimeExpr* | dayName ; diff --git a/dateparse/dateparse_utils.py b/dateparse/dateparse_utils.py index ee2bbd9..2e6eabd 100755 --- a/dateparse/dateparse_utils.py +++ b/dateparse/dateparse_utils.py @@ -348,7 +348,7 @@ class DateParser(dateparse_utilsListener): return 5 def _parse_normal_date(self) -> datetime.date: - if 'dow' in self.context: + if 'dow' in self.context and 'month' not in self.context: d = self.today while d.weekday() != self.context['dow']: d += datetime.timedelta(days=1) diff --git a/string_utils.py b/string_utils.py index 4127079..7c40dc9 100644 --- a/string_utils.py +++ b/string_utils.py @@ -1388,6 +1388,29 @@ def to_date(in_str: str) -> Optional[datetime.date]: return None +def extract_date(in_str: Any) -> Optional[str]: + import itertools + + import dateparse.dateparse_utils as du + + d = du.DateParser() # type: ignore + chunks = in_str.split() + for ngram in itertools.chain( + list_utils.ngrams(chunks, 5), + list_utils.ngrams(chunks, 4), + list_utils.ngrams(chunks, 3), + list_utils.ngrams(chunks, 2), + ): + try: + expr = " ".join(ngram) + logger.debug(f"Trying {expr}") + if d.parse(expr): + return d.get_date() + except du.ParseException: # type: ignore + pass + return None + + def is_valid_date(in_str: str) -> bool: """ Args: