3 # © Copyright 2021-2023, Scott Gasch
5 """This module contains the implementation of a Trie tree (or prefix
6 tree). See: https://en.wikipedia.org/wiki/Trie.
8 It can be used with arbitrary sequences as keys and stores its values
9 in a tree with paths determined by the sequence determined by each
10 keys' sequences. Thus, it can determine whether a given value is
11 contained in the tree via a simple traversal in :math:`O(n)` where n
12 is the number of steps in the item's sequence and can also check
13 whether a key-prefix is present in the tree in :math:`O(n)` time.
15 Given a node in the BST, it is easy to determine all items that are
16 stored beneath that node. See examples below.
21 from typing import Any, Dict, Generator, Sequence
23 logger = logging.getLogger(__name__)
28 This is a Trie class, see: https://en.wikipedia.org/wiki/Trie.
30 It attempts to follow Pythonic container patterns. See doctests
35 """Create an empty trie."""
40 self.content_generator: Generator[str] = None
42 def insert(self, item: Sequence[Any]) -> None:
44 Insert an item into the trie. Items are represented by a :class:`Sequence`
45 where each item in the sequence describes a set in the path to the item.
48 item: the item to be inserted.
52 >>> t.__contains__('test')
57 current = current.setdefault(child, {})
58 current[self.end] = self.end
61 def __contains__(self, item: Sequence[Any]) -> bool:
63 Checks whether an item is in the Trie.
66 item: the item whose presence is to be determined.
69 True if `item` is present, False otherwise.
73 >>> t.__contains__('test')
75 >>> t.__contains__('testing')
80 current = self.__traverse__(item)
84 return self.end in current
86 def contains_prefix(self, item: Sequence[Any]):
88 Check whether a prefix is in the Trie. The prefix may or may not
92 item: the item describing the prefix to be checked.
95 True if the prefix described by `item` is present, False otherwise.
98 >>> t.insert('tricycle')
99 >>> t.contains_prefix('tri')
101 >>> t.contains_prefix('tricycle')
103 >>> t.contains_prefix('triad')
107 current = self.__traverse__(item)
108 return current is not None
110 def __traverse__(self, item: Sequence[Any]):
114 current = current[child]
119 def __getitem__(self, item: Sequence[Any]) -> Dict[Any, Any]:
120 """Given an item, return its trie node which contains all
121 of the successor (child) node pointers.
124 item: the item whose node is to be retrieved
127 A mapping that represents item in the trie. If the
128 keyspace of the mapping includes '~END~' a valid item
129 ends at the node. If the mapping contains other keys,
130 each key indicates the presence of one or more children
131 on the edge below the node.
134 KeyError if item is not present in the trie.
138 >>> t.insert('testicle')
139 >>> t.insert('tessera')
140 >>> t.insert('tesack')
142 {'t': {'~END~': '~END~', 'i': {'c': {'l': {'e': {'~END~': '~END~'}}}}}, 's': {'e': {'r': {'a': {'~END~': '~END~'}}}}, 'a': {'c': {'k': {'~END~': '~END~'}}}}
145 ret = self.__traverse__(item)
147 raise KeyError(f"Node '{item}' is not in the trie")
150 def delete_recursively(self, node: Dict[Any, Any], item: Sequence[Any]) -> bool:
152 Deletes an item from the trie by walking the path from root to where it
156 node: root under which to search for item
157 item: item whose node is the root of the recursive deletion operation
160 True if the item was not the prefix of another item such that there
161 is nothing below item remaining anymore post delete and False if the
162 deleted item was a proper prefix of another item in the tree such that
163 there is still data below item remaining in the tree.
167 if len(node) == 0 and node is not self.root:
175 ret = self.delete_recursively(lower, cdr)
176 ret = ret and self.delete_recursively(node, car)
179 def __delitem__(self, item: Sequence[Any]):
181 Delete an item from the Trie.
184 item: the item to be deleted.
189 >>> t.insert('tessel')
193 {'t': {'e': {'s': {'t': {'~END~': '~END~'}, 's': {'~END~': '~END~', 'e': {'l': {'~END~': '~END~'}}}}}}}
194 >>> t.__delitem__('test')
198 {'t': {'e': {'s': {'s': {'~END~': '~END~', 'e': {'l': {'~END~': '~END~'}}}}}}}
203 >>> t.__delitem__('tessel')
207 {'t': {'e': {'s': {'s': {'~END~': '~END~'}}}}}
211 >>> t.__delitem__('tess')
218 >>> t.insert('testy')
225 raise KeyError(f"Node '{item}' is not in the trie")
226 self.delete_recursively(self.root, item)
232 A count of the trie's item population.
247 self.content_generator = self.generate_recursively(self.root, "")
250 def generate_recursively(self, node, path: Sequence[Any]):
253 A generator that yields the trie's items one at a time.
257 >>> t.insert('tickle')
258 >>> for item in t.generate_recursively(t.root, ''):
265 if child == self.end:
268 yield from self.generate_recursively(node[child], path + child)
272 Iterate through the contents of the trie.
276 >>> t.insert('tickle')
283 ret = next(self.content_generator)
288 def successors(self, item: Sequence[Any]):
291 A list of the successors of an item.
297 >>> t.successors('wh')
301 >>> u.insert(['this', 'is', 'a', 'test'])
302 >>> u.insert(['this', 'is', 'a', 'robbery'])
303 >>> u.insert(['this', 'is', 'a', 'walrus'])
304 >>> u.successors(['this', 'is', 'a'])
305 ['test', 'robbery', 'walrus']
307 node = self.__traverse__(item)
310 return [x for x in node if x != self.end]
320 Helper that return a fancy representation of the Trie, used by
325 if node is not self.root:
326 ret = f"\n{padding}{pointer}"
336 if child != self.end:
340 if child != self.end:
347 pointer += f"{child}"
349 ret += self._repr_fancy(padding, pointer, node[child], has_sibling)
352 def repr_brief(self, node: Dict[Any, Any], delimiter: str):
354 A friendly string representation of the contents of the Trie.
357 node: the root of the trie to represent.
358 delimiter: character or string to stuff between edges.
361 A brief string representation of the trie. See example.
364 >>> t.insert([10, 0, 0, 1])
365 >>> t.insert([10, 0, 0, 2])
366 >>> t.insert([10, 10, 10, 1])
367 >>> t.insert([10, 10, 10, 2])
368 >>> t.repr_brief(t.root, '.')
369 '10.[0.0.[1,2],10.10.[1,2]]'
375 if child != self.end:
377 child_rep = self.repr_brief(node[child], delimiter)
378 if len(child_rep) > 0:
379 my_rep += str(child) + delimiter + child_rep + ","
381 my_rep += str(child) + ","
385 my_rep = f"[{my_rep}]"
390 A friendly string representation of the contents of the Trie. Under
391 the covers uses _repr_fancy.
394 >>> t.insert([10, 0, 0, 1])
395 >>> t.insert([10, 0, 0, 2])
396 >>> t.insert([10, 10, 10, 1])
397 >>> t.insert([10, 10, 10, 2])
411 return self._repr_fancy("", "*", self.root, False)
414 if __name__ == "__main__":