Ran black code formatter on everything.
[python_utils.git] / logical_search.py
index 86c6352972dec2f3330d7b2796fc07fc1e69b28f..85f946135e406e74137a632bceff2a5a1845c699 100644 (file)
@@ -63,7 +63,42 @@ class Operation(enum.Enum):
 
 
 class Corpus(object):
-    """A collection of searchable documents."""
+    """A collection of searchable documents.
+
+    >>> c = Corpus()
+    >>> c.add_doc(Document(
+    ...                    docid=1,
+    ...                    tags=set(['urgent', 'important']),
+    ...                    properties=[
+    ...                                ('author', 'Scott'),
+    ...                                ('subject', 'your anniversary')
+    ...                    ],
+    ...                    reference=None,
+    ...                   )
+    ...          )
+    >>> c.add_doc(Document(
+    ...                    docid=2,
+    ...                    tags=set(['important']),
+    ...                    properties=[
+    ...                                ('author', 'Joe'),
+    ...                                ('subject', 'your performance at work')
+    ...                    ],
+    ...                    reference=None,
+    ...                   )
+    ...          )
+    >>> c.add_doc(Document(
+    ...                    docid=3,
+    ...                    tags=set(['urgent']),
+    ...                    properties=[
+    ...                                ('author', 'Scott'),
+    ...                                ('subject', 'car turning in front of you')
+    ...                    ],
+    ...                    reference=None,
+    ...                   )
+    ...          )
+    >>> c.query('author:Scott and important')
+    {1}
+    """
 
     def __init__(self) -> None:
         self.docids_by_tag: Dict[str, Set[str]] = defaultdict(set)
@@ -133,15 +168,15 @@ class Corpus(object):
     def get_docids_with_property(self, key: str) -> Set[str]:
         """Return the set of docids that have a particular property no matter
         what that property's value.
-        """
 
+        """
         return self.docids_with_property[key]
 
     def get_docids_by_property(self, key: str, value: str) -> Set[str]:
         """Return the set of docids that have a particular property with a
         particular value..
-        """
 
+        """
         return self.docids_by_property[(key, value)]
 
     def invert_docid_set(self, original: Set[str]) -> Set[str]:
@@ -205,7 +240,6 @@ class Corpus(object):
             return operator_precedence(token) is not None
 
         def lex(query: str):
-            query = query.lower()
             tokens = query.split()
             for token in tokens:
                 # Handle ( and ) operators stuck to the ends of tokens
@@ -365,3 +399,9 @@ class Node(object):
             else:
                 raise ParseError(f"Unexpected negation operand {_} ({type(_)})")
         return retval
+
+
+if __name__ == '__main__':
+    import doctest
+
+    doctest.testmod()