Various changes.

[python_utils.git] / string_utils.py
diff --git a/string_utils.py b/string_utils.py

index 7ad9c42a1e2af3304e18ba6beba021c35acbb086..bca2b70d5cd18bc8bb1198782d356f2707c1cbd5 100644 (file)
--- a/string_utils.py
+++ b/string_utils.py
@@ -1,19 +1,18 @@
  #!/usr/bin/env python3
  
+import contextlib
  import datetime
+import io
  from itertools import zip_longest
  import json
  import logging
  import random
  import re
  import string
-from typing import Any, List, Optional
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
  import unicodedata
  from uuid import uuid4
  
-import dateparse.dateparse_utils as dp
-
-
  logger = logging.getLogger(__name__)
  
  NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
@@ -228,10 +227,14 @@ def strip_escape_sequences(in_str: str) -> str:
      return in_str
  
  
-def add_thousands_separator(in_str: str, *, separator_char = ',', places = 3) -> str:
+def add_thousands_separator(
+        in_str: str,
+        *,
+        separator_char = ',',
+        places = 3
+) -> str:
      if isinstance(in_str, int):
          in_str = f'{in_str}'
-
      if is_number(in_str):
          return _add_thousands_separator(
              in_str,
@@ -815,6 +818,7 @@ def to_bool(in_str: str) -> bool:
  
  
  def to_date(in_str: str) -> Optional[datetime.date]:
+    import dateparse.dateparse_utils as dp
      try:
          d = dp.DateParser()
          d.parse(in_str)
@@ -825,6 +829,7 @@ def to_date(in_str: str) -> Optional[datetime.date]:
  
  
  def valid_date(in_str: str) -> bool:
+    import dateparse.dateparse_utils as dp
      try:
          d = dp.DateParser()
          _ = d.parse(in_str)
@@ -835,6 +840,7 @@ def valid_date(in_str: str) -> bool:
  
  
  def to_datetime(in_str: str) -> Optional[datetime.datetime]:
+    import dateparse.dateparse_utils as dp
      try:
          d = dp.DateParser()
          dt = d.parse(in_str)
@@ -917,6 +923,22 @@ def sprintf(*args, **kwargs) -> str:
      return ret
  
  
+class SprintfStdout(object):
+    def __init__(self) -> None:
+        self.destination = io.StringIO()
+        self.recorder = None
+
+    def __enter__(self) -> Callable[[], str]:
+        self.recorder = contextlib.redirect_stdout(self.destination)
+        self.recorder.__enter__()
+        return lambda: self.destination.getvalue()
+
+    def __exit__(self, *args) -> None:
+        self.recorder.__exit__(*args)
+        self.destination.seek(0)
+        return None  # don't suppress exceptions
+
+
  def is_are(n: int) -> str:
      if n == 1:
          return "is"
@@ -941,3 +963,55 @@ def thify(n: int) -> str:
          return "rd"
      else:
          return "th"
+
+
+def ngrams(txt: str, n: int):
+    words = txt.split()
+    return ngrams_presplit(words, n)
+
+
+def ngrams_presplit(words: Iterable[str], n: int):
+    for ngram in zip(*[words[i:] for i in range(n)]):
+        yield(' '.join(ngram))
+
+
+def bigrams(txt: str):
+    return ngrams(txt, 2)
+
+
+def trigrams(txt: str):
+    return ngrams(txt, 3)
+
+
+def shuffle_columns(
+        txt: Iterable[str],
+        specs: Iterable[Iterable[int]],
+        delim=''
+) -> Iterable[str]:
+    out = []
+    for spec in specs:
+        chunk = ''
+        for n in spec:
+            chunk = chunk + delim + txt[n]
+        chunk = chunk.strip(delim)
+        out.append(chunk)
+    return out
+
+
+def shuffle_columns_into_dict(
+        txt: Iterable[str],
+        specs: Iterable[Tuple[str, Iterable[int]]],
+        delim=''
+) -> Dict[str, str]:
+    out = {}
+    for spec in specs:
+        chunk = ''
+        for n in spec[1]:
+            chunk = chunk + delim + txt[n]
+        chunk = chunk.strip(delim)
+        out[spec[0]] = chunk
+    return out
+
+
+def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str:
+    return sprintf(txt.format(**values), end='')