X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=string_utils.py;h=24fc59542ba96614e364c0331d9db738258bfa32;hb=1803c7b2af8fde615b81ac21a6dc56e56dc20594;hp=6ce4c50311393a11370a47081f7baa2af6f7e3da;hpb=02302bbd9363facb59c4df2c1f4013087702cfa6;p=python_utils.git diff --git a/string_utils.py b/string_utils.py index 6ce4c50..24fc595 100644 --- a/string_utils.py +++ b/string_utils.py @@ -1388,6 +1388,43 @@ def to_date(in_str: str) -> Optional[datetime.date]: return None +def extract_date(in_str: Any) -> Optional[datetime.datetime]: + """Finds and extracts a date from the string, if possible. + + Args: + in_str: the string to extract a date from + + Returns: + a datetime if date was found, otherwise None + + >>> extract_date("filename.txt dec 13, 2022") + datetime.datetime(2022, 12, 13, 0, 0) + + >>> extract_date("Dear Santa, please get me a pony.") + + """ + import itertools + + import dateparse.dateparse_utils as du + + d = du.DateParser() # type: ignore + chunks = in_str.split() + for ngram in itertools.chain( + list_utils.ngrams(chunks, 5), + list_utils.ngrams(chunks, 4), + list_utils.ngrams(chunks, 3), + list_utils.ngrams(chunks, 2), + ): + try: + expr = " ".join(ngram) + logger.debug(f"Trying {expr}") + if d.parse(expr): + return d.get_datetime() + except du.ParseException: # type: ignore + pass + return None + + def is_valid_date(in_str: str) -> bool: """ Args: @@ -1893,7 +1930,7 @@ def ngrams(txt: str, n: int): def ngrams_presplit(words: Sequence[str], n: int): """ - Same as :meth:ngrams but with the string pre-split. + Same as :meth:`ngrams` but with the string pre-split. """ return list_utils.ngrams(words, n)