X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=string_utils.py;h=bca2b70d5cd18bc8bb1198782d356f2707c1cbd5;hb=b10d30a46e601c9ee1f843241f2d69a1f90f7a94;hp=b586ae1a7e82d62e92ba567b20e5a440254fe8b3;hpb=497fb9e21f45ec08e1486abaee6dfa7b20b8a691;p=python_utils.git diff --git a/string_utils.py b/string_utils.py index b586ae1..bca2b70 100644 --- a/string_utils.py +++ b/string_utils.py @@ -1,13 +1,20 @@ #!/usr/bin/env python3 +import contextlib +import datetime +import io +from itertools import zip_longest import json +import logging import random import re import string -from typing import Any, List, Optional +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple import unicodedata from uuid import uuid4 +logger = logging.getLogger(__name__) + NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$") HEX_NUMBER_RE = re.compile(r"^([+|-]?)0[x|X]([0-9A-Fa-f]+)$") @@ -220,6 +227,36 @@ def strip_escape_sequences(in_str: str) -> str: return in_str +def add_thousands_separator( + in_str: str, + *, + separator_char = ',', + places = 3 +) -> str: + if isinstance(in_str, int): + in_str = f'{in_str}' + if is_number(in_str): + return _add_thousands_separator( + in_str, + separator_char = separator_char, + places = places + ) + raise ValueError(in_str) + + +def _add_thousands_separator(in_str: str, *, separator_char = ',', places = 3) -> str: + decimal_part = "" + if '.' in in_str: + (in_str, decimal_part) = in_str.split('.') + tmp = [iter(in_str[::-1])] * places + ret = separator_char.join( + "".join(x) for x in zip_longest(*tmp, fillvalue=""))[::-1] + if len(decimal_part) > 0: + ret += '.' + ret += decimal_part + return ret + + # Full url example: # scheme://username:password@www.domain.com:8042/folder/subfolder/file.extension?param=value¶m2=value2#hash def is_url(in_str: Any, allowed_schemes: Optional[List[str]] = None) -> bool: @@ -326,13 +363,14 @@ def number_to_suffix_string(num: int) -> Optional[str]: d = 0.0 suffix = None for (sfx, size) in NUM_SUFFIXES.items(): - if num > size: + if num >= size: d = num / size suffix = sfx break if suffix is not None: return f"{d:.1f}{suffix}" - return None + else: + return f'{num:d}' def is_credit_card(in_str: Any, card_type: str = None) -> bool: @@ -779,6 +817,48 @@ def to_bool(in_str: str) -> bool: return in_str.lower() in ("true", "1", "yes", "y", "t") +def to_date(in_str: str) -> Optional[datetime.date]: + import dateparse.dateparse_utils as dp + try: + d = dp.DateParser() + d.parse(in_str) + return d.get_date() + except dp.ParseException: + logger.warning(f'Unable to parse date {in_str}.') + return None + + +def valid_date(in_str: str) -> bool: + import dateparse.dateparse_utils as dp + try: + d = dp.DateParser() + _ = d.parse(in_str) + return True + except dp.ParseException: + logger.warning(f'Unable to parse date {in_str}.') + return False + + +def to_datetime(in_str: str) -> Optional[datetime.datetime]: + import dateparse.dateparse_utils as dp + try: + d = dp.DateParser() + dt = d.parse(in_str) + if type(dt) == datetime.datetime: + return dt + except ValueError: + logger.warning(f'Unable to parse datetime {in_str}.') + return None + + +def valid_datetime(in_str: str) -> bool: + _ = to_datetime(in_str) + if _ is not None: + return True + logger.warning(f'Unable to parse datetime {in_str}.') + return False + + def dedent(in_str: str) -> str: """ Removes tab indentation from multi line strings (inspired by analogous Scala function). @@ -841,3 +921,97 @@ def sprintf(*args, **kwargs) -> str: ret += str(arg) ret += end return ret + + +class SprintfStdout(object): + def __init__(self) -> None: + self.destination = io.StringIO() + self.recorder = None + + def __enter__(self) -> Callable[[], str]: + self.recorder = contextlib.redirect_stdout(self.destination) + self.recorder.__enter__() + return lambda: self.destination.getvalue() + + def __exit__(self, *args) -> None: + self.recorder.__exit__(*args) + self.destination.seek(0) + return None # don't suppress exceptions + + +def is_are(n: int) -> str: + if n == 1: + return "is" + return "are" + + +def pluralize(n: int) -> str: + if n == 1: + return "" + return "s" + + +def thify(n: int) -> str: + digit = str(n) + assert is_integer_number(digit) + digit = digit[-1:] + if digit == "1": + return "st" + elif digit == "2": + return "nd" + elif digit == "3": + return "rd" + else: + return "th" + + +def ngrams(txt: str, n: int): + words = txt.split() + return ngrams_presplit(words, n) + + +def ngrams_presplit(words: Iterable[str], n: int): + for ngram in zip(*[words[i:] for i in range(n)]): + yield(' '.join(ngram)) + + +def bigrams(txt: str): + return ngrams(txt, 2) + + +def trigrams(txt: str): + return ngrams(txt, 3) + + +def shuffle_columns( + txt: Iterable[str], + specs: Iterable[Iterable[int]], + delim='' +) -> Iterable[str]: + out = [] + for spec in specs: + chunk = '' + for n in spec: + chunk = chunk + delim + txt[n] + chunk = chunk.strip(delim) + out.append(chunk) + return out + + +def shuffle_columns_into_dict( + txt: Iterable[str], + specs: Iterable[Tuple[str, Iterable[int]]], + delim='' +) -> Dict[str, str]: + out = {} + for spec in specs: + chunk = '' + for n in spec[1]: + chunk = chunk + delim + txt[n] + chunk = chunk.strip(delim) + out[spec[0]] = chunk + return out + + +def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str: + return sprintf(txt.format(**values), end='')