#!/usr/bin/env python3
+import contextlib
import datetime
+import io
from itertools import zip_longest
import json
import logging
import random
import re
import string
-from typing import Any, List, Optional
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
import unicodedata
from uuid import uuid4
-import dateparse.dateparse_utils as dp
-
-
logger = logging.getLogger(__name__)
NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
return in_str
-def add_thousands_separator(in_str: str, *, separator_char = ',', places = 3) -> str:
+def add_thousands_separator(
+ in_str: str,
+ *,
+ separator_char = ',',
+ places = 3
+) -> str:
if isinstance(in_str, int):
in_str = f'{in_str}'
-
if is_number(in_str):
return _add_thousands_separator(
in_str,
def to_date(in_str: str) -> Optional[datetime.date]:
+ import dateparse.dateparse_utils as dp
try:
d = dp.DateParser()
d.parse(in_str)
def valid_date(in_str: str) -> bool:
+ import dateparse.dateparse_utils as dp
try:
d = dp.DateParser()
_ = d.parse(in_str)
def to_datetime(in_str: str) -> Optional[datetime.datetime]:
+ import dateparse.dateparse_utils as dp
try:
d = dp.DateParser()
dt = d.parse(in_str)
return ret
+class SprintfStdout(object):
+ def __init__(self) -> None:
+ self.destination = io.StringIO()
+ self.recorder = None
+
+ def __enter__(self) -> Callable[[], str]:
+ self.recorder = contextlib.redirect_stdout(self.destination)
+ self.recorder.__enter__()
+ return lambda: self.destination.getvalue()
+
+ def __exit__(self, *args) -> None:
+ self.recorder.__exit__(*args)
+ self.destination.seek(0)
+ return None # don't suppress exceptions
+
+
def is_are(n: int) -> str:
if n == 1:
return "is"
return "rd"
else:
return "th"
+
+
+def ngrams(txt: str, n: int):
+ words = txt.split()
+ return ngrams_presplit(words, n)
+
+
+def ngrams_presplit(words: Iterable[str], n: int):
+ for ngram in zip(*[words[i:] for i in range(n)]):
+ yield(' '.join(ngram))
+
+
+def bigrams(txt: str):
+ return ngrams(txt, 2)
+
+
+def trigrams(txt: str):
+ return ngrams(txt, 3)
+
+
+def shuffle_columns(
+ txt: Iterable[str],
+ specs: Iterable[Iterable[int]],
+ delim=''
+) -> Iterable[str]:
+ out = []
+ for spec in specs:
+ chunk = ''
+ for n in spec:
+ chunk = chunk + delim + txt[n]
+ chunk = chunk.strip(delim)
+ out.append(chunk)
+ return out
+
+
+def shuffle_columns_into_dict(
+ txt: Iterable[str],
+ specs: Iterable[Tuple[str, Iterable[int]]],
+ delim=''
+) -> Dict[str, str]:
+ out = {}
+ for spec in specs:
+ chunk = ''
+ for n in spec[1]:
+ chunk = chunk + delim + txt[n]
+ chunk = chunk.strip(delim)
+ out[spec[0]] = chunk
+ return out
+
+
+def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str:
+ return sprintf(txt.format(**values), end='')