projects
/
python_utils.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Add powerset to list_utils; improve chord parser.
[python_utils.git]
/
logical_search.py
diff --git
a/logical_search.py
b/logical_search.py
index ef55a2bbdcbb5baafa5c29a53fc404fcfb4701bb..2f79db09d83dbe0b14e2ab6323e107f31384e148 100644
(file)
--- a/
logical_search.py
+++ b/
logical_search.py
@@
-1,8
+1,12
@@
#!/usr/bin/env python3
#!/usr/bin/env python3
+# © Copyright 2021-2022, Scott Gasch
+
"""This is a module concerned with the creation of and searching of a
corpus of documents. The corpus is held in memory for fast
"""This is a module concerned with the creation of and searching of a
corpus of documents. The corpus is held in memory for fast
-searching."""
+searching.
+
+"""
from __future__ import annotations
import enum
from __future__ import annotations
import enum
@@
-100,6
+104,12
@@
class Corpus(object):
... )
>>> c.query('author:Scott and important')
{1}
... )
>>> c.query('author:Scott and important')
{1}
+ >>> c.query('*')
+ {1, 2, 3}
+ >>> c.query('*:*')
+ {1, 2, 3}
+ >>> c.query('*:Scott')
+ {1, 3}
"""
def __init__(self) -> None:
"""
def __init__(self) -> None:
@@
-152,7
+162,6
@@
class Corpus(object):
def get_docids_by_exact_tag(self, tag: str) -> Set[str]:
"""Return the set of docids that have a particular tag."""
def get_docids_by_exact_tag(self, tag: str) -> Set[str]:
"""Return the set of docids that have a particular tag."""
-
return self.docids_by_tag[tag]
def get_docids_by_searching_tags(self, tag: str) -> Set[str]:
return self.docids_by_tag[tag]
def get_docids_by_searching_tags(self, tag: str) -> Set[str]:
@@
-353,12
+362,24
@@
class Node(object):
key, value = tag.split(":")
except ValueError as v:
raise ParseError(f'Invalid key:value syntax at "{tag}"') from v
key, value = tag.split(":")
except ValueError as v:
raise ParseError(f'Invalid key:value syntax at "{tag}"') from v
- if value == "*":
- r = self.corpus.get_docids_with_property(key)
+
+ if key == '*':
+ r = set()
+ for kv, s in self.corpus.docids_by_property.items():
+ if value in ('*', kv[1]):
+ r.update(s)
else:
else:
- r = self.corpus.get_docids_by_property(key, value)
+ if value == '*':
+ r = self.corpus.get_docids_with_property(key)
+ else:
+ r = self.corpus.get_docids_by_property(key, value)
else:
else:
- r = self.corpus.get_docids_by_exact_tag(tag)
+ if tag == '*':
+ r = set()
+ for s in self.corpus.docids_by_tag.values():
+ r.update(s)
+ else:
+ r = self.corpus.get_docids_by_exact_tag(tag)
retval.update(r)
else:
raise ParseError(f"Unexpected query {tag}")
retval.update(r)
else:
raise ParseError(f"Unexpected query {tag}")