X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=string_utils.py;h=bca2b70d5cd18bc8bb1198782d356f2707c1cbd5;hb=b10d30a46e601c9ee1f843241f2d69a1f90f7a94;hp=911008d4c93bc50d6d78bb7d09d9d4aaaffdbcd5;hpb=a838c154135b2420d9047a101caf24a2c9f593c2;p=python_utils.git diff --git a/string_utils.py b/string_utils.py index 911008d..bca2b70 100644 --- a/string_utils.py +++ b/string_utils.py @@ -1,13 +1,15 @@ #!/usr/bin/env python3 +import contextlib import datetime +import io from itertools import zip_longest import json import logging import random import re import string -from typing import Any, List, Optional +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple import unicodedata from uuid import uuid4 @@ -921,6 +923,22 @@ def sprintf(*args, **kwargs) -> str: return ret +class SprintfStdout(object): + def __init__(self) -> None: + self.destination = io.StringIO() + self.recorder = None + + def __enter__(self) -> Callable[[], str]: + self.recorder = contextlib.redirect_stdout(self.destination) + self.recorder.__enter__() + return lambda: self.destination.getvalue() + + def __exit__(self, *args) -> None: + self.recorder.__exit__(*args) + self.destination.seek(0) + return None # don't suppress exceptions + + def is_are(n: int) -> str: if n == 1: return "is" @@ -945,3 +963,55 @@ def thify(n: int) -> str: return "rd" else: return "th" + + +def ngrams(txt: str, n: int): + words = txt.split() + return ngrams_presplit(words, n) + + +def ngrams_presplit(words: Iterable[str], n: int): + for ngram in zip(*[words[i:] for i in range(n)]): + yield(' '.join(ngram)) + + +def bigrams(txt: str): + return ngrams(txt, 2) + + +def trigrams(txt: str): + return ngrams(txt, 3) + + +def shuffle_columns( + txt: Iterable[str], + specs: Iterable[Iterable[int]], + delim='' +) -> Iterable[str]: + out = [] + for spec in specs: + chunk = '' + for n in spec: + chunk = chunk + delim + txt[n] + chunk = chunk.strip(delim) + out.append(chunk) + return out + + +def shuffle_columns_into_dict( + txt: Iterable[str], + specs: Iterable[Tuple[str, Iterable[int]]], + delim='' +) -> Dict[str, str]: + out = {} + for spec in specs: + chunk = '' + for n in spec[1]: + chunk = chunk + delim + txt[n] + chunk = chunk.strip(delim) + out[spec[0]] = chunk + return out + + +def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str: + return sprintf(txt.format(**values), end='')