projects
/
python_utils.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Some binary tree methods to support the unscramble progam's sparsefile
[python_utils.git]
/
string_utils.py
diff --git
a/string_utils.py
b/string_utils.py
index 3aaf1d7efe4151c61a1739af72234abc0a69fbc3..78e72cca5a36e672fdc8931cf9a9b9b946ac148e 100644
(file)
--- a/
string_utils.py
+++ b/
string_utils.py
@@
-10,10
+10,12
@@
import numbers
import random
import re
import string
import random
import re
import string
-from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+from typing import Any, Callable, Dict, Iterable, List, Optional,
Sequence,
Tuple
import unicodedata
from uuid import uuid4
import unicodedata
from uuid import uuid4
+import list_utils
+
logger = logging.getLogger(__name__)
NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
logger = logging.getLogger(__name__)
NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
@@
-1282,12
+1284,15
@@
def ngrams(txt: str, n: int):
"""
words = txt.split()
"""
words = txt.split()
- return ngrams_presplit(words, n)
+ for ngram in ngrams_presplit(words, n):
+ ret = ''
+ for word in ngram:
+ ret += f'{word} '
+ yield ret.strip()
-def ngrams_presplit(words: Iterable[str], n: int):
- for ngram in zip(*[words[i:] for i in range(n)]):
- yield(' '.join(ngram))
+def ngrams_presplit(words: Sequence[str], n: int):
+ return list_utils.ngrams(words, n)
def bigrams(txt: str):
def bigrams(txt: str):