Make loadfile work.

[python_utils.git] / string_utils.py
diff --git a/string_utils.py b/string_utils.py

index 911008d4c93bc50d6d78bb7d09d9d4aaaffdbcd5..45cf5aab7ac7f5202346745de733c792c984214d 100644 (file)
--- a/string_utils.py
+++ b/string_utils.py
@@ -1,13 +1,15 @@
  #!/usr/bin/env python3
  
+import contextlib
  import datetime
+import io
  from itertools import zip_longest
  import json
  import logging
  import random
  import re
  import string
-from typing import Any, List, Optional
+from typing import Any, Callable, Iterable, List, Optional
  import unicodedata
  from uuid import uuid4
  
@@ -921,6 +923,22 @@ def sprintf(*args, **kwargs) -> str:
      return ret
  
  
+class SprintfStdout(object):
+    def __init__(self) -> None:
+        self.destination = io.StringIO()
+        self.recorder = None
+
+    def __enter__(self) -> Callable[[], str]:
+        self.recorder = contextlib.redirect_stdout(self.destination)
+        self.recorder.__enter__()
+        return lambda: self.destination.getvalue()
+
+    def __exit__(self, *args) -> None:
+        self.recorder.__exit__(*args)
+        self.destination.seek(0)
+        return None  # don't suppress exceptions
+
+
  def is_are(n: int) -> str:
      if n == 1:
          return "is"
@@ -945,3 +963,21 @@ def thify(n: int) -> str:
          return "rd"
      else:
          return "th"
+
+
+def ngrams(txt: str, n: int):
+    words = txt.split()
+    return ngrams_presplit(words, n)
+
+
+def ngrams_presplit(words: Iterable[str], n: int):
+    for ngram in zip(*[words[i:] for i in range(n)]):
+        yield(' '.join(ngram))
+
+
+def bigrams(txt: str):
+    return ngrams(txt, 2)
+
+
+def trigrams(txt: str):
+    return ngrams(txt, 3)