X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=string_utils.py;h=78e72cca5a36e672fdc8931cf9a9b9b946ac148e;hb=b0bde5bef4a19382136112196b238088641738d5;hp=3aaf1d7efe4151c61a1739af72234abc0a69fbc3;hpb=4c315e387f18010ba0b5661744ad3c792f21d2d1;p=python_utils.git diff --git a/string_utils.py b/string_utils.py index 3aaf1d7..78e72cc 100644 --- a/string_utils.py +++ b/string_utils.py @@ -10,10 +10,12 @@ import numbers import random import re import string -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple +from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple import unicodedata from uuid import uuid4 +import list_utils + logger = logging.getLogger(__name__) NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$") @@ -1282,12 +1284,15 @@ def ngrams(txt: str, n: int): """ words = txt.split() - return ngrams_presplit(words, n) + for ngram in ngrams_presplit(words, n): + ret = '' + for word in ngram: + ret += f'{word} ' + yield ret.strip() -def ngrams_presplit(words: Iterable[str], n: int): - for ngram in zip(*[words[i:] for i in range(n)]): - yield(' '.join(ngram)) +def ngrams_presplit(words: Sequence[str], n: int): + return list_utils.ngrams(words, n) def bigrams(txt: str):