Add tests on bidict. Fix bug in string_utils ngrams.
authorScott Gasch <[email protected]>
Wed, 29 Sep 2021 18:21:41 +0000 (11:21 -0700)
committerScott Gasch <[email protected]>
Wed, 29 Sep 2021 18:21:41 +0000 (11:21 -0700)
collect/bidict.py
string_utils.py
tests/run_tests.sh

index 5ba3fc30f6780537509ba26f49202852ef8a5e05..e16217994334a6af750cea13bd88a03f309b2329 100644 (file)
@@ -2,7 +2,33 @@
 
 class bidict(dict):
     def __init__(self, *args, **kwargs):
-        super(bidict, self).__init__(*args, **kwargs)
+        """
+        A class that stores both a Mapping between keys and values and
+        also the inverse mapping between values and their keys to
+        allow for efficient lookups in either direction.  Because it
+        is possible to have several keys with the same value, using
+        the inverse map returns a sequence of keys.
+
+        >>> d = bidict()
+        >>> d['a'] = 1
+        >>> d['b'] = 2
+        >>> d['c'] = 2
+        >>> d['a']
+        1
+        >>> d.inverse[1]
+        ['a']
+        >>> d.inverse[2]
+        ['b', 'c']
+        >>> len(d)
+        3
+        >>> del d['c']
+        >>> len(d)
+        2
+        >>> d.inverse[2]
+        ['b']
+
+        """
+        super().__init__(*args, **kwargs)
         self.inverse = {}
         for key, value in self.items():
             self.inverse.setdefault(value, []).append(key)
@@ -10,11 +36,16 @@ class bidict(dict):
     def __setitem__(self, key, value):
         if key in self:
             self.inverse[self[key]].remove(key)
-        super(bidict, self).__setitem__(key, value)
+        super().__setitem__(key, value)
         self.inverse.setdefault(value, []).append(key)
 
     def __delitem__(self, key):
         self.inverse.setdefault(self[key], []).remove(key)
         if self[key] in self.inverse and not self.inverse[self[key]]:
             del self.inverse[self[key]]
-        super(bidict, self).__delitem__(key)
+        super().__delitem__(key)
+
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
index b3019cfbbf70097a30e40fab70888701b02eb71c..78e72cca5a36e672fdc8931cf9a9b9b946ac148e 100644 (file)
@@ -10,7 +10,7 @@ import numbers
 import random
 import re
 import string
-from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
 import unicodedata
 from uuid import uuid4
 
@@ -1285,10 +1285,13 @@ def ngrams(txt: str, n: int):
     """
     words = txt.split()
     for ngram in ngrams_presplit(words, n):
-        return ' '.join(ngram)
+        ret = ''
+        for word in ngram:
+            ret += f'{word} '
+        yield ret.strip()
 
 
-def ngrams_presplit(words: Iterable[str], n: int):
+def ngrams_presplit(words: Sequence[str], n: int):
     return list_utils.ngrams(words, n)
 
 
index 486bd4e220024cdc194b5441d82677cc4e869a08..57fea2862e04f314e7bc2e93469f058ec3793619 100755 (executable)
@@ -85,18 +85,20 @@ fi
 
 FAILED_TESTS=""
 if [ ${DOCTEST} -eq 1 ]; then
-    for doctest in $(grep -lR doctest ${ROOT}/*.py); do
-        BASE=$(basename ${doctest})
-        BASE="${BASE} (doctest)"
-        make_header "${BASE}" "${CYAN}"
-        OUT=$( python3 ${doctest} 2>&1 )
-        FAILED=$( echo "${OUT}" | grep '\*\*\*Test Failed\*\*\*' | wc -l )
-        if [ $FAILED == 0 ]; then
-            echo "OK"
-        else
-            echo -e "${FAILED}"
-            FAILURES=$((FAILURES+1))
-            FAILED_TESTS="${FAILED_TESTS}, ${BASE}"
+    for doctest in $(grep -lR doctest ${ROOT}/*); do
+        if [[ ${doctest} == *.py ]]; then
+            BASE=$(basename ${doctest})
+            BASE="${BASE} (doctest)"
+            make_header "${BASE}" "${CYAN}"
+            OUT=$( python3 ${doctest} 2>&1 )
+            FAILED=$( echo "${OUT}" | grep '\*\*\*Test Failed\*\*\*' | wc -l )
+            if [ $FAILED == 0 ]; then
+                echo "OK"
+            else
+                echo -e "${FAILED}"
+                FAILURES=$((FAILURES+1))
+                FAILED_TESTS="${FAILED_TESTS}, ${BASE}"
+            fi
         fi
     done
 fi