X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=logical_search.py;h=76c2f86264be3fc49960166fe7a1b702f90e6c4b;hb=6e8e3458d9641394b8f061d55ba569a72e6a8493;hp=3ebaee5652040bef7d67620fb0e69f028c55c986;hpb=4c315e387f18010ba0b5661744ad3c792f21d2d1;p=python_utils.git diff --git a/logical_search.py b/logical_search.py index 3ebaee5..76c2f86 100644 --- a/logical_search.py +++ b/logical_search.py @@ -2,20 +2,10 @@ from __future__ import annotations -from collections import defaultdict import enum import sys -from typing import ( - Any, - Dict, - List, - NamedTuple, - Optional, - Set, - Sequence, - Tuple, - Union, -) +from collections import defaultdict +from typing import Any, Dict, List, NamedTuple, Optional, Sequence, Set, Tuple, Union class ParseError(Exception): @@ -30,9 +20,7 @@ class Document(NamedTuple): docid: str # a unique idenfier for the document tags: Set[str] # an optional set of tags - properties: List[ - Tuple[str, str] - ] # an optional set of key->value properties + properties: List[Tuple[str, str]] # an optional set of key->value properties reference: Any # an optional reference to something else @@ -102,9 +90,7 @@ class Corpus(object): def __init__(self) -> None: self.docids_by_tag: Dict[str, Set[str]] = defaultdict(set) - self.docids_by_property: Dict[Tuple[str, str], Set[str]] = defaultdict( - set - ) + self.docids_by_property: Dict[Tuple[str, str], Set[str]] = defaultdict(set) self.docids_with_property: Dict[str, Set[str]] = defaultdict(set) self.documents_by_docid: Dict[str, Document] = {} @@ -182,13 +168,7 @@ class Corpus(object): def invert_docid_set(self, original: Set[str]) -> Set[str]: """Invert a set of docids.""" - return set( - [ - docid - for docid in self.documents_by_docid.keys() - if docid not in original - ] - ) + return set([docid for docid in self.documents_by_docid.keys() if docid not in original]) def get_doc(self, docid: str) -> Optional[Document]: """Given a docid, retrieve the previously added Document.""" @@ -268,9 +248,7 @@ class Corpus(object): operation = Operation.from_token(token) operand_count = operation.num_operands() if len(node_stack) < operand_count: - raise ParseError( - f"Incorrect number of operations for {operation}" - ) + raise ParseError(f"Incorrect number of operations for {operation}") for _ in range(operation.num_operands()): args.append(node_stack.pop()) node = Node(corpus, operation, args) @@ -297,9 +275,7 @@ class Corpus(object): ok = True break if not ok: - raise ParseError( - "Unbalanced parenthesis in query expression" - ) + raise ParseError("Unbalanced parenthesis in query expression") # and, or, not else: @@ -362,9 +338,7 @@ class Node(object): try: key, value = tag.split(":") except ValueError as v: - raise ParseError( - f'Invalid key:value syntax at "{tag}"' - ) from v + raise ParseError(f'Invalid key:value syntax at "{tag}"') from v if value == "*": r = self.corpus.get_docids_with_property(key) else: @@ -376,23 +350,17 @@ class Node(object): raise ParseError(f"Unexpected query {tag}") elif self.op is Operation.DISJUNCTION: if len(evaled_operands) != 2: - raise ParseError( - "Operation.DISJUNCTION (or) expects two operands." - ) + raise ParseError("Operation.DISJUNCTION (or) expects two operands.") retval.update(evaled_operands[0]) retval.update(evaled_operands[1]) elif self.op is Operation.CONJUNCTION: if len(evaled_operands) != 2: - raise ParseError( - "Operation.CONJUNCTION (and) expects two operands." - ) + raise ParseError("Operation.CONJUNCTION (and) expects two operands.") retval.update(evaled_operands[0]) retval = retval.intersection(evaled_operands[1]) elif self.op is Operation.INVERSION: if len(evaled_operands) != 1: - raise ParseError( - "Operation.INVERSION (not) expects one operand." - ) + raise ParseError("Operation.INVERSION (not) expects one operand.") _ = evaled_operands[0] if isinstance(_, set): retval.update(self.corpus.invert_docid_set(_)) @@ -403,4 +371,5 @@ class Node(object): if __name__ == '__main__': import doctest + doctest.testmod()