X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=string_utils.py;h=45cf5aab7ac7f5202346745de733c792c984214d;hb=b843703134a166013518c707fa5a77373f1bf0bf;hp=7ad9c42a1e2af3304e18ba6beba021c35acbb086;hpb=3bc4daf1edc121cd633429187392227f2fa61885;p=python_utils.git diff --git a/string_utils.py b/string_utils.py index 7ad9c42..45cf5aa 100644 --- a/string_utils.py +++ b/string_utils.py @@ -1,19 +1,18 @@ #!/usr/bin/env python3 +import contextlib import datetime +import io from itertools import zip_longest import json import logging import random import re import string -from typing import Any, List, Optional +from typing import Any, Callable, Iterable, List, Optional import unicodedata from uuid import uuid4 -import dateparse.dateparse_utils as dp - - logger = logging.getLogger(__name__) NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$") @@ -228,10 +227,14 @@ def strip_escape_sequences(in_str: str) -> str: return in_str -def add_thousands_separator(in_str: str, *, separator_char = ',', places = 3) -> str: +def add_thousands_separator( + in_str: str, + *, + separator_char = ',', + places = 3 +) -> str: if isinstance(in_str, int): in_str = f'{in_str}' - if is_number(in_str): return _add_thousands_separator( in_str, @@ -815,6 +818,7 @@ def to_bool(in_str: str) -> bool: def to_date(in_str: str) -> Optional[datetime.date]: + import dateparse.dateparse_utils as dp try: d = dp.DateParser() d.parse(in_str) @@ -825,6 +829,7 @@ def to_date(in_str: str) -> Optional[datetime.date]: def valid_date(in_str: str) -> bool: + import dateparse.dateparse_utils as dp try: d = dp.DateParser() _ = d.parse(in_str) @@ -835,6 +840,7 @@ def valid_date(in_str: str) -> bool: def to_datetime(in_str: str) -> Optional[datetime.datetime]: + import dateparse.dateparse_utils as dp try: d = dp.DateParser() dt = d.parse(in_str) @@ -917,6 +923,22 @@ def sprintf(*args, **kwargs) -> str: return ret +class SprintfStdout(object): + def __init__(self) -> None: + self.destination = io.StringIO() + self.recorder = None + + def __enter__(self) -> Callable[[], str]: + self.recorder = contextlib.redirect_stdout(self.destination) + self.recorder.__enter__() + return lambda: self.destination.getvalue() + + def __exit__(self, *args) -> None: + self.recorder.__exit__(*args) + self.destination.seek(0) + return None # don't suppress exceptions + + def is_are(n: int) -> str: if n == 1: return "is" @@ -941,3 +963,21 @@ def thify(n: int) -> str: return "rd" else: return "th" + + +def ngrams(txt: str, n: int): + words = txt.split() + return ngrams_presplit(words, n) + + +def ngrams_presplit(words: Iterable[str], n: int): + for ngram in zip(*[words[i:] for i in range(n)]): + yield(' '.join(ngram)) + + +def bigrams(txt: str): + return ngrams(txt, 2) + + +def trigrams(txt: str): + return ngrams(txt, 3)