From b2eed6fefcfa14b03916c145ad3c0435b25374d0 Mon Sep 17 00:00:00 2001 From: Scott Gasch Date: Wed, 29 Sep 2021 11:21:41 -0700 Subject: [PATCH] Add tests on bidict. Fix bug in string_utils ngrams. --- collect/bidict.py | 37 ++++++++++++++++++++++++++++++++++--- string_utils.py | 9 ++++++--- tests/run_tests.sh | 26 ++++++++++++++------------ 3 files changed, 54 insertions(+), 18 deletions(-) diff --git a/collect/bidict.py b/collect/bidict.py index 5ba3fc3..e162179 100644 --- a/collect/bidict.py +++ b/collect/bidict.py @@ -2,7 +2,33 @@ class bidict(dict): def __init__(self, *args, **kwargs): - super(bidict, self).__init__(*args, **kwargs) + """ + A class that stores both a Mapping between keys and values and + also the inverse mapping between values and their keys to + allow for efficient lookups in either direction. Because it + is possible to have several keys with the same value, using + the inverse map returns a sequence of keys. + + >>> d = bidict() + >>> d['a'] = 1 + >>> d['b'] = 2 + >>> d['c'] = 2 + >>> d['a'] + 1 + >>> d.inverse[1] + ['a'] + >>> d.inverse[2] + ['b', 'c'] + >>> len(d) + 3 + >>> del d['c'] + >>> len(d) + 2 + >>> d.inverse[2] + ['b'] + + """ + super().__init__(*args, **kwargs) self.inverse = {} for key, value in self.items(): self.inverse.setdefault(value, []).append(key) @@ -10,11 +36,16 @@ class bidict(dict): def __setitem__(self, key, value): if key in self: self.inverse[self[key]].remove(key) - super(bidict, self).__setitem__(key, value) + super().__setitem__(key, value) self.inverse.setdefault(value, []).append(key) def __delitem__(self, key): self.inverse.setdefault(self[key], []).remove(key) if self[key] in self.inverse and not self.inverse[self[key]]: del self.inverse[self[key]] - super(bidict, self).__delitem__(key) + super().__delitem__(key) + + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/string_utils.py b/string_utils.py index b3019cf..78e72cc 100644 --- a/string_utils.py +++ b/string_utils.py @@ -10,7 +10,7 @@ import numbers import random import re import string -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple +from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple import unicodedata from uuid import uuid4 @@ -1285,10 +1285,13 @@ def ngrams(txt: str, n: int): """ words = txt.split() for ngram in ngrams_presplit(words, n): - return ' '.join(ngram) + ret = '' + for word in ngram: + ret += f'{word} ' + yield ret.strip() -def ngrams_presplit(words: Iterable[str], n: int): +def ngrams_presplit(words: Sequence[str], n: int): return list_utils.ngrams(words, n) diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 486bd4e..57fea28 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -85,18 +85,20 @@ fi FAILED_TESTS="" if [ ${DOCTEST} -eq 1 ]; then - for doctest in $(grep -lR doctest ${ROOT}/*.py); do - BASE=$(basename ${doctest}) - BASE="${BASE} (doctest)" - make_header "${BASE}" "${CYAN}" - OUT=$( python3 ${doctest} 2>&1 ) - FAILED=$( echo "${OUT}" | grep '\*\*\*Test Failed\*\*\*' | wc -l ) - if [ $FAILED == 0 ]; then - echo "OK" - else - echo -e "${FAILED}" - FAILURES=$((FAILURES+1)) - FAILED_TESTS="${FAILED_TESTS}, ${BASE}" + for doctest in $(grep -lR doctest ${ROOT}/*); do + if [[ ${doctest} == *.py ]]; then + BASE=$(basename ${doctest}) + BASE="${BASE} (doctest)" + make_header "${BASE}" "${CYAN}" + OUT=$( python3 ${doctest} 2>&1 ) + FAILED=$( echo "${OUT}" | grep '\*\*\*Test Failed\*\*\*' | wc -l ) + if [ $FAILED == 0 ]; then + echo "OK" + else + echo -e "${FAILED}" + FAILURES=$((FAILURES+1)) + FAILED_TESTS="${FAILED_TESTS}, ${BASE}" + fi fi done fi -- 2.47.1