#!/usr/bin/env python3
+import contextlib
+import datetime
+import io
from itertools import zip_longest
import json
+import logging
import random
import re
import string
-from typing import Any, List, Optional
+from typing import Any, Callable, Iterable, List, Optional
import unicodedata
from uuid import uuid4
+logger = logging.getLogger(__name__)
+
NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
HEX_NUMBER_RE = re.compile(r"^([+|-]?)0[x|X]([0-9A-Fa-f]+)$")
return in_str
-def add_thousands_separator(in_str: str, *, separator_char = ',', places = 3) -> str:
+def add_thousands_separator(
+ in_str: str,
+ *,
+ separator_char = ',',
+ places = 3
+) -> str:
if isinstance(in_str, int):
in_str = f'{in_str}'
-
if is_number(in_str):
return _add_thousands_separator(
in_str,
return ret
-
# Full url example:
# scheme://username:
[email protected]:8042/folder/subfolder/file.extension?param=value¶m2=value2#hash
def is_url(in_str: Any, allowed_schemes: Optional[List[str]] = None) -> bool:
d = 0.0
suffix = None
for (sfx, size) in NUM_SUFFIXES.items():
- if num > size:
+ if num >= size:
d = num / size
suffix = sfx
break
if suffix is not None:
return f"{d:.1f}{suffix}"
- return None
+ else:
+ return f'{num:d}'
def is_credit_card(in_str: Any, card_type: str = None) -> bool:
return in_str.lower() in ("true", "1", "yes", "y", "t")
+def to_date(in_str: str) -> Optional[datetime.date]:
+ import dateparse.dateparse_utils as dp
+ try:
+ d = dp.DateParser()
+ d.parse(in_str)
+ return d.get_date()
+ except dp.ParseException:
+ logger.warning(f'Unable to parse date {in_str}.')
+ return None
+
+
+def valid_date(in_str: str) -> bool:
+ import dateparse.dateparse_utils as dp
+ try:
+ d = dp.DateParser()
+ _ = d.parse(in_str)
+ return True
+ except dp.ParseException:
+ logger.warning(f'Unable to parse date {in_str}.')
+ return False
+
+
+def to_datetime(in_str: str) -> Optional[datetime.datetime]:
+ import dateparse.dateparse_utils as dp
+ try:
+ d = dp.DateParser()
+ dt = d.parse(in_str)
+ if type(dt) == datetime.datetime:
+ return dt
+ except ValueError:
+ logger.warning(f'Unable to parse datetime {in_str}.')
+ return None
+
+
+def valid_datetime(in_str: str) -> bool:
+ _ = to_datetime(in_str)
+ if _ is not None:
+ return True
+ logger.warning(f'Unable to parse datetime {in_str}.')
+ return False
+
+
def dedent(in_str: str) -> str:
"""
Removes tab indentation from multi line strings (inspired by analogous Scala function).
ret += str(arg)
ret += end
return ret
+
+
+class SprintfStdout(object):
+ def __init__(self) -> None:
+ self.destination = io.StringIO()
+ self.recorder = None
+
+ def __enter__(self) -> Callable[[], str]:
+ self.recorder = contextlib.redirect_stdout(self.destination)
+ self.recorder.__enter__()
+ return lambda: self.destination.getvalue()
+
+ def __exit__(self, *args) -> None:
+ self.recorder.__exit__(*args)
+ self.destination.seek(0)
+ return None # don't suppress exceptions
+
+
+def is_are(n: int) -> str:
+ if n == 1:
+ return "is"
+ return "are"
+
+
+def pluralize(n: int) -> str:
+ if n == 1:
+ return ""
+ return "s"
+
+
+def thify(n: int) -> str:
+ digit = str(n)
+ assert is_integer_number(digit)
+ digit = digit[-1:]
+ if digit == "1":
+ return "st"
+ elif digit == "2":
+ return "nd"
+ elif digit == "3":
+ return "rd"
+ else:
+ return "th"
+
+
+def ngrams(txt: str, n: int):
+ words = txt.split()
+ return ngrams_presplit(words, n)
+
+
+def ngrams_presplit(words: Iterable[str], n: int):
+ for ngram in zip(*[words[i:] for i in range(n)]):
+ yield(' '.join(ngram))
+
+
+def bigrams(txt: str):
+ return ngrams(txt, 2)
+
+
+def trigrams(txt: str):
+ return ngrams(txt, 3)