Improve wildcard semantics again.
[python_utils.git] / logical_search.py
index b6d7479879010d6ea40ef813d03e84574ead7e55..2f79db09d83dbe0b14e2ab6323e107f31384e148 100644 (file)
@@ -104,6 +104,12 @@ class Corpus(object):
     ...          )
     >>> c.query('author:Scott and important')
     {1}
+    >>> c.query('*')
+    {1, 2, 3}
+    >>> c.query('*:*')
+    {1, 2, 3}
+    >>> c.query('*:Scott')
+    {1, 3}
     """
 
     def __init__(self) -> None:
@@ -156,7 +162,6 @@ class Corpus(object):
 
     def get_docids_by_exact_tag(self, tag: str) -> Set[str]:
         """Return the set of docids that have a particular tag."""
-
         return self.docids_by_tag[tag]
 
     def get_docids_by_searching_tags(self, tag: str) -> Set[str]:
@@ -357,12 +362,24 @@ class Node(object):
                             key, value = tag.split(":")
                         except ValueError as v:
                             raise ParseError(f'Invalid key:value syntax at "{tag}"') from v
-                        if value == "*":
-                            r = self.corpus.get_docids_with_property(key)
+
+                        if key == '*':
+                            r = set()
+                            for kv, s in self.corpus.docids_by_property.items():
+                                if value in ('*', kv[1]):
+                                    r.update(s)
                         else:
-                            r = self.corpus.get_docids_by_property(key, value)
+                            if value == '*':
+                                r = self.corpus.get_docids_with_property(key)
+                            else:
+                                r = self.corpus.get_docids_by_property(key, value)
                     else:
-                        r = self.corpus.get_docids_by_exact_tag(tag)
+                        if tag == '*':
+                            r = set()
+                            for s in self.corpus.docids_by_tag.values():
+                                r.update(s)
+                        else:
+                            r = self.corpus.get_docids_by_exact_tag(tag)
                     retval.update(r)
                 else:
                     raise ParseError(f"Unexpected query {tag}")