From d3168b6a8464ceac7d4e5a95e63b5cc1d0b7c283 Mon Sep 17 00:00:00 2001 From: Scott Gasch Date: Tue, 9 May 2023 08:10:01 -0700 Subject: [PATCH] Add some functionality to string_utils and improve type hints in there. --- src/pyutils/list_utils.py | 20 ++++++++++--- src/pyutils/string_utils.py | 60 ++++++++++++++++++++++++++++++++++--- 2 files changed, 72 insertions(+), 8 deletions(-) diff --git a/src/pyutils/list_utils.py b/src/pyutils/list_utils.py index 16f6e55..f0a4e82 100644 --- a/src/pyutils/list_utils.py +++ b/src/pyutils/list_utils.py @@ -7,7 +7,16 @@ import random from collections import Counter from itertools import chain, combinations, islice -from typing import Any, Iterator, List, MutableSequence, Sequence, Tuple +from typing import ( + Any, + Generator, + Iterator, + List, + MutableSequence, + Sequence, + Tuple, + TypeVar, +) def shard(lst: List[Any], size: int) -> Iterator[Any]: @@ -252,7 +261,10 @@ def transpose(lst: List[Any]) -> List[Any]: return [list(_) for _ in transposed] -def ngrams(lst: Sequence[Any], n: int): +T = TypeVar('T') + + +def ngrams(lst: Sequence[T], n: int) -> Generator[Sequence[T], T, None]: """ Return the ngrams in the sequence. @@ -288,7 +300,7 @@ def ngrams(lst: Sequence[Any], n: int): yield lst[i : i + n] -def permute(seq: str): +def permute(seq: str) -> Generator[str, str, None]: """ Returns all permutations of a sequence. @@ -314,7 +326,7 @@ def permute(seq: str): yield from _permute(seq, "") -def _permute(seq: str, path: str): +def _permute(seq: str, path: str) -> Generator[str, str, None]: """Internal helper to permute items recursively.""" seq_len = len(seq) if seq_len == 0: diff --git a/src/pyutils/string_utils.py b/src/pyutils/string_utils.py index 0c18dcc..bc2c611 100644 --- a/src/pyutils/string_utils.py +++ b/src/pyutils/string_utils.py @@ -50,6 +50,7 @@ from typing import ( Any, Callable, Dict, + Generator, Iterable, List, Literal, @@ -2220,7 +2221,56 @@ def thify(n: int) -> str: return "th" -def ngrams(txt: str, n: int): +get_cardinal_suffix = thify + + +def add_cardinal_suffix(n: int): + """ + Args: + n: the number to return as a string with a cardinal suffix. + + Returns: + A string containing the number with its cardinal suffix. + + >>> add_cardinal_suffix(123) + '123rd' + + >>> add_cardinal_suffix(1) + '1st' + + >>> add_cardinal_suffix(0) + '0th' + + >>> add_cardinal_suffix(-123) + '-123rd' + """ + return f'{n}{get_cardinal_suffix(n)}' + + +def remove_cardinal_suffix(txt: str) -> Optional[str]: + """ + Args: + txt: the number with cardinal suffix to strip. + + Returns: + The same string with its cardinal suffix removed or None on error. + + >>> remove_cardinal_suffix('123rd') + '123' + + >>> remove_cardinal_suffix('-10th') + '-10' + + >>> remove_cardinal_suffix('1ero') is None + True + """ + suffix = txt[-2:] + if suffix in set(['st', 'nd', 'rd', 'th']): + return txt[:-2] + return None + + +def ngrams(txt: str, n: int) -> Generator[str, str, None]: """ Args: txt: the string to create ngrams using @@ -2242,7 +2292,9 @@ def ngrams(txt: str, n: int): yield ret.strip() -def ngrams_presplit(words: Sequence[str], n: int): +def ngrams_presplit( + words: Sequence[str], n: int +) -> Generator[Sequence[str], str, None]: """ Same as :meth:`ngrams` but with the string pre-split. @@ -2251,7 +2303,7 @@ def ngrams_presplit(words: Sequence[str], n: int): return list_utils.ngrams(words, n) -def bigrams(txt: str): +def bigrams(txt: str) -> Generator[str, str, None]: """Generates the bigrams (n=2) of the given string. See also :meth:`ngrams`, :meth:`trigrams`. @@ -2262,7 +2314,7 @@ def bigrams(txt: str): return ngrams(txt, 2) -def trigrams(txt: str): +def trigrams(txt: str) -> Generator[str, str, None]: """Generates the trigrams (n=3) of the given string. See also :meth:`ngrams`, :meth:`bigrams`. -- 2.47.1