X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=logical_search.py;h=b6d7479879010d6ea40ef813d03e84574ead7e55;hb=532df2c5b57c7517dfb3dddd8c1358fbadf8baf3;hp=4295aa0892fd0a67e8af778aa92f92a30d58e436;hpb=31c81f6539969a5eba864d3305f9fb7bf716a367;p=python_utils.git diff --git a/logical_search.py b/logical_search.py index 4295aa0..b6d7479 100644 --- a/logical_search.py +++ b/logical_search.py @@ -1,27 +1,45 @@ #!/usr/bin/env python3 -from __future__ import annotations +# © Copyright 2021-2022, Scott Gasch + +"""This is a module concerned with the creation of and searching of a +corpus of documents. The corpus is held in memory for fast +searching. +""" + +from __future__ import annotations import enum import sys from collections import defaultdict -from typing import Any, Dict, List, NamedTuple, Optional, Sequence, Set, Tuple, Union +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union class ParseError(Exception): """An error encountered while parsing a logical search expression.""" def __init__(self, message: str): + super().__init__() self.message = message -class Document(NamedTuple): - """A tuple representing a searchable document.""" +@dataclass +class Document: + """A class representing a searchable document.""" + + # A unique identifier for each document. + docid: str = '' + + # A set of tag strings for this document. May be empty. + tags: Set[str] = field(default_factory=set) + + # A list of key->value strings for this document. May be empty. + properties: List[Tuple[str, str]] = field(default_factory=list) - docid: str # a unique idenfier for the document - tags: Set[str] # an optional set of tags - properties: List[Tuple[str, str]] # an optional set of key->value properties - reference: Any # an optional reference to something else + # An optional reference to something else; interpreted only by + # caller code, ignored here. + reference: Optional[Any] = None class Operation(enum.Enum): @@ -168,9 +186,7 @@ class Corpus(object): def invert_docid_set(self, original: Set[str]) -> Set[str]: """Invert a set of docids.""" - return set( - [docid for docid in self.documents_by_docid.keys() if docid not in original] - ) + return {docid for docid in self.documents_by_docid if docid not in original} def get_doc(self, docid: str) -> Optional[Document]: """Given a docid, retrieve the previously added Document.""" @@ -250,9 +266,7 @@ class Corpus(object): operation = Operation.from_token(token) operand_count = operation.num_operands() if len(node_stack) < operand_count: - raise ParseError( - f"Incorrect number of operations for {operation}" - ) + raise ParseError(f"Incorrect number of operations for {operation}") for _ in range(operation.num_operands()): args.append(node_stack.pop()) node = Node(corpus, operation, args) @@ -342,9 +356,7 @@ class Node(object): try: key, value = tag.split(":") except ValueError as v: - raise ParseError( - f'Invalid key:value syntax at "{tag}"' - ) from v + raise ParseError(f'Invalid key:value syntax at "{tag}"') from v if value == "*": r = self.corpus.get_docids_with_property(key) else: