3 # © Copyright 2021-2022, Scott Gasch
5 """This module contains the implementation of a Trie tree (or prefix
6 tree). See: https://en.wikipedia.org/wiki/Trie.
8 It can be used with arbitrary sequences as keys and stores its values
9 in a tree with paths determined by the sequence determined by each
10 keys' sequences. Thus, it can determine whether a given value is
11 contained in the tree via a simple traversal in :math:`O(n)` where n
12 is the number of steps in the item's sequence and can also check
13 whether a key-prefix is present in the tree in :math:`O(n)` time.
15 Given a node in the BST, it is easy to determine all items that are
16 stored beneath that node. See examples below.
21 from typing import Any, Dict, Generator, Sequence
23 logger = logging.getLogger(__name__)
28 This is a Trie class, see: https://en.wikipedia.org/wiki/Trie.
30 It attempts to follow Pythonic container patterns. See doctests
35 """Create an empty trie."""
40 self.content_generator: Generator[str] = None
42 def insert(self, item: Sequence[Any]) -> None:
44 Insert an item into the trie. Items are represented by a :class:`Sequence`
45 where each item in the sequence describes a set in the path to the item.
48 item: the item to be inserted.
52 >>> t.__contains__('test')
57 current = current.setdefault(child, {})
58 current[self.end] = self.end
61 def __contains__(self, item: Sequence[Any]) -> bool:
63 Checks whether an item is in the Trie.
66 item: the item whose presence is to be determined.
69 True if `item` is present, False otherwise.
73 >>> t.__contains__('test')
75 >>> t.__contains__('testing')
80 current = self.__traverse__(item)
84 return self.end in current
86 def contains_prefix(self, item: Sequence[Any]):
88 Check whether a prefix is in the Trie. The prefix may or may not
92 item: the item describing the prefix to be checked.
95 True if the prefix described by `item` is present, False otherwise.
98 >>> t.insert('tricycle')
99 >>> t.contains_prefix('tri')
101 >>> t.contains_prefix('tricycle')
103 >>> t.contains_prefix('triad')
107 current = self.__traverse__(item)
108 return current is not None
110 def __traverse__(self, item: Sequence[Any]):
114 current = current[child]
119 def __getitem__(self, item: Sequence[Any]) -> Dict[Any, Any]:
120 """Given an item, return its trie node which contains all
121 of the successor (child) node pointers. If the item is not
122 a node in the Trie, raise a KeyError.
125 item: the item whose node is to be retrieved
128 A mapping that represents item in the trie. If the
129 keyspace of the mapping includes '~END~' a valid item
130 ends at the node. If the mapping contains other keys,
131 each key indicates the presence of one or more children
132 on the edge below the node.
135 KeyError if item is not present in the trie.
139 >>> t.insert('testicle')
140 >>> t.insert('tessera')
141 >>> t.insert('tesack')
143 {'t': {'~END~': '~END~', 'i': {'c': {'l': {'e': {'~END~': '~END~'}}}}}, 's': {'e': {'r': {'a': {'~END~': '~END~'}}}}, 'a': {'c': {'k': {'~END~': '~END~'}}}}
146 ret = self.__traverse__(item)
148 raise KeyError(f"Node '{item}' is not in the trie")
151 def delete_recursively(self, node: Dict[Any, Any], item: Sequence[Any]) -> bool:
153 Deletes an item from the trie by walking the path from root to where it
157 root_node: root under which to search for item
158 item: item whose node is the root of the recursive deletion operation
161 True if the item was not the prefix of another item such that there
162 is nothing below item remaining anymore post delete and False if the
163 deleted item was a proper prefix of another item in the tree such that
164 there is still data below item remaining in the tree.
168 if len(node) == 0 and node is not self.root:
176 ret = self.delete_recursively(lower, cdr)
177 ret = ret and self.delete_recursively(node, car)
180 def __delitem__(self, item: Sequence[Any]):
182 Delete an item from the Trie.
185 item: the item to be deleted.
190 >>> t.insert('tessel')
194 {'t': {'e': {'s': {'t': {'~END~': '~END~'}, 's': {'~END~': '~END~', 'e': {'l': {'~END~': '~END~'}}}}}}}
195 >>> t.__delitem__('test')
199 {'t': {'e': {'s': {'s': {'~END~': '~END~', 'e': {'l': {'~END~': '~END~'}}}}}}}
204 >>> t.__delitem__('tessel')
208 {'t': {'e': {'s': {'s': {'~END~': '~END~'}}}}}
212 >>> t.__delitem__('tess')
219 >>> t.insert('testy')
226 raise KeyError(f"Node '{item}' is not in the trie")
227 self.delete_recursively(self.root, item)
233 A count of the trie's item population.
248 self.content_generator = self.generate_recursively(self.root, '')
251 def generate_recursively(self, node, path: Sequence[Any]):
254 A generator that yields the trie's items one at a time.
258 >>> t.insert('tickle')
259 >>> for item in t.generate_recursively(t.root, ''):
266 if child == self.end:
269 yield from self.generate_recursively(node[child], path + child)
273 Iterate through the contents of the trie.
277 >>> t.insert('tickle')
284 ret = next(self.content_generator)
289 def successors(self, item: Sequence[Any]):
292 A list of the successors of an item.
298 >>> t.successors('wh')
302 >>> u.insert(['this', 'is', 'a', 'test'])
303 >>> u.insert(['this', 'is', 'a', 'robbery'])
304 >>> u.insert(['this', 'is', 'a', 'walrus'])
305 >>> u.successors(['this', 'is', 'a'])
306 ['test', 'robbery', 'walrus']
308 node = self.__traverse__(item)
311 return [x for x in node if x != self.end]
321 Helper that return a fancy representation of the Trie, used by
326 if node is not self.root:
327 ret = f'\n{padding}{pointer}'
337 if child != self.end:
341 if child != self.end:
348 pointer += f'{child}'
350 ret += self._repr_fancy(padding, pointer, node[child], has_sibling)
353 def repr_brief(self, node: Dict[Any, Any], delimiter: str):
355 A friendly string representation of the contents of the Trie.
358 node: the root of the trie to represent.
359 delimiter: character or string to stuff between edges.
362 A brief string representation of the trie. See example.
365 >>> t.insert([10, 0, 0, 1])
366 >>> t.insert([10, 0, 0, 2])
367 >>> t.insert([10, 10, 10, 1])
368 >>> t.insert([10, 10, 10, 2])
369 >>> t.repr_brief(t.root, '.')
370 '10.[0.0.[1,2],10.10.[1,2]]'
376 if child != self.end:
378 child_rep = self.repr_brief(node[child], delimiter)
379 if len(child_rep) > 0:
380 my_rep += str(child) + delimiter + child_rep + ","
382 my_rep += str(child) + ","
386 my_rep = f'[{my_rep}]'
391 A friendly string representation of the contents of the Trie. Under
392 the covers uses _repr_fancy.
395 >>> t.insert([10, 0, 0, 1])
396 >>> t.insert([10, 0, 0, 2])
397 >>> t.insert([10, 10, 10, 1])
398 >>> t.insert([10, 10, 10, 2])
412 return self._repr_fancy('', '*', self.root, False)
415 if __name__ == '__main__':