class Corpus(object):
- """A collection of searchable documents."""
+ """A collection of searchable documents.
+
+ >>> c = Corpus()
+ >>> c.add_doc(Document(
+ ... docid=1,
+ ... tags=set(['urgent', 'important']),
+ ... properties=[
+ ... ('author', 'Scott'),
+ ... ('subject', 'your anniversary')
+ ... ],
+ ... reference=None,
+ ... )
+ ... )
+ >>> c.add_doc(Document(
+ ... docid=2,
+ ... tags=set(['important']),
+ ... properties=[
+ ... ('author', 'Joe'),
+ ... ('subject', 'your performance at work')
+ ... ],
+ ... reference=None,
+ ... )
+ ... )
+ >>> c.add_doc(Document(
+ ... docid=3,
+ ... tags=set(['urgent']),
+ ... properties=[
+ ... ('author', 'Scott'),
+ ... ('subject', 'car turning in front of you')
+ ... ],
+ ... reference=None,
+ ... )
+ ... )
+ >>> c.query('author:Scott and important')
+ {1}
+ """
def __init__(self) -> None:
self.docids_by_tag: Dict[str, Set[str]] = defaultdict(set)
def get_docids_with_property(self, key: str) -> Set[str]:
"""Return the set of docids that have a particular property no matter
what that property's value.
- """
+ """
return self.docids_with_property[key]
def get_docids_by_property(self, key: str, value: str) -> Set[str]:
"""Return the set of docids that have a particular property with a
particular value..
- """
+ """
return self.docids_by_property[(key, value)]
def invert_docid_set(self, original: Set[str]) -> Set[str]:
return operator_precedence(token) is not None
def lex(query: str):
- query = query.lower()
tokens = query.split()
for token in tokens:
# Handle ( and ) operators stuck to the ends of tokens
else:
raise ParseError(f"Unexpected negation operand {_} ({type(_)})")
return retval
+
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()