Improve wildcard semantics again.
[python_utils.git] / logical_search.py
index ef55a2bbdcbb5baafa5c29a53fc404fcfb4701bb..2f79db09d83dbe0b14e2ab6323e107f31384e148 100644 (file)
@@ -1,8 +1,12 @@
 #!/usr/bin/env python3
 
+# © Copyright 2021-2022, Scott Gasch
+
 """This is a module concerned with the creation of and searching of a
 corpus of documents.  The corpus is held in memory for fast
-searching."""
+searching.
+
+"""
 
 from __future__ import annotations
 import enum
@@ -100,6 +104,12 @@ class Corpus(object):
     ...          )
     >>> c.query('author:Scott and important')
     {1}
+    >>> c.query('*')
+    {1, 2, 3}
+    >>> c.query('*:*')
+    {1, 2, 3}
+    >>> c.query('*:Scott')
+    {1, 3}
     """
 
     def __init__(self) -> None:
@@ -152,7 +162,6 @@ class Corpus(object):
 
     def get_docids_by_exact_tag(self, tag: str) -> Set[str]:
         """Return the set of docids that have a particular tag."""
-
         return self.docids_by_tag[tag]
 
     def get_docids_by_searching_tags(self, tag: str) -> Set[str]:
@@ -353,12 +362,24 @@ class Node(object):
                             key, value = tag.split(":")
                         except ValueError as v:
                             raise ParseError(f'Invalid key:value syntax at "{tag}"') from v
-                        if value == "*":
-                            r = self.corpus.get_docids_with_property(key)
+
+                        if key == '*':
+                            r = set()
+                            for kv, s in self.corpus.docids_by_property.items():
+                                if value in ('*', kv[1]):
+                                    r.update(s)
                         else:
-                            r = self.corpus.get_docids_by_property(key, value)
+                            if value == '*':
+                                r = self.corpus.get_docids_with_property(key)
+                            else:
+                                r = self.corpus.get_docids_by_property(key, value)
                     else:
-                        r = self.corpus.get_docids_by_exact_tag(tag)
+                        if tag == '*':
+                            r = set()
+                            for s in self.corpus.docids_by_tag.values():
+                                r.update(s)
+                        else:
+                            r = self.corpus.get_docids_by_exact_tag(tag)
                     retval.update(r)
                 else:
                     raise ParseError(f"Unexpected query {tag}")