Do the wildcard search thing better.
authorScott Gasch <[email protected]>
Mon, 18 Apr 2022 17:21:12 +0000 (10:21 -0700)
committerScott Gasch <[email protected]>
Mon, 18 Apr 2022 17:21:12 +0000 (10:21 -0700)
logical_search.py

index 2cbe0c781f4ba7889e1b3bcc05373c66a427888e..0cfbc8d22624bffe9bc227e3ffde0abc0f313053 100644 (file)
@@ -106,6 +106,8 @@ class Corpus(object):
     {1}
     >>> c.query('*')
     {1, 2, 3}
+    >>> c.query('*:*')
+    {1, 2, 3}
     """
 
     def __init__(self) -> None:
@@ -158,7 +160,6 @@ class Corpus(object):
 
     def get_docids_by_exact_tag(self, tag: str) -> Set[str]:
         """Return the set of docids that have a particular tag."""
-
         return self.docids_by_tag[tag]
 
     def get_docids_by_searching_tags(self, tag: str) -> Set[str]:
@@ -213,8 +214,6 @@ class Corpus(object):
         tag1 and key:*
         """
 
-        if query == '*':
-            return set(self.documents_by_docid.keys())
         try:
             root = self._parse_query(query)
         except ParseError as e:
@@ -361,12 +360,22 @@ class Node(object):
                             key, value = tag.split(":")
                         except ValueError as v:
                             raise ParseError(f'Invalid key:value syntax at "{tag}"') from v
-                        if value == "*":
-                            r = self.corpus.get_docids_with_property(key)
+                        if key == '*':
+                            r = set()
+                            for s in self.corpus.docids_by_tag.values():
+                                r.update(s)
                         else:
-                            r = self.corpus.get_docids_by_property(key, value)
+                            if value == '*':
+                                r = self.corpus.get_docids_with_property(key)
+                            else:
+                                r = self.corpus.get_docids_by_property(key, value)
                     else:
-                        r = self.corpus.get_docids_by_exact_tag(tag)
+                        if tag == '*':
+                            r = set()
+                            for s in self.corpus.docids_by_tag.values():
+                                r.update(s)
+                        else:
+                            r = self.corpus.get_docids_by_exact_tag(tag)
                     retval.update(r)
                 else:
                     raise ParseError(f"Unexpected query {tag}")