#!/usr/bin/env python3
# © Copyright 2021-2023, Scott Gasch
"""This module contains the implementation of a Trie tree (or prefix
tree). See: https://en.wikipedia.org/wiki/Trie.
It can be used with arbitrary sequences as keys and stores its values
in a tree with paths determined by the sequence determined by each
keys' sequences. Thus, it can determine whether a given value is
contained in the tree via a simple traversal in :math:`O(n)` where n
is the number of steps in the item's sequence and can also check
whether a key-prefix is present in the tree in :math:`O(n)` time.
Given a node in the BST, it is easy to determine all items that are
stored beneath that node. See examples below.
"""
import logging
from typing import Any, Dict, Generator, List, Sequence
logger = logging.getLogger(__name__)
[docs]
class Trie(object):
"""
This is a Trie class, see: https://en.wikipedia.org/wiki/Trie.
It attempts to follow Pythonic container patterns. See doctests
for examples.
"""
def __init__(self):
"""Create an empty trie."""
self.root = {}
self.end = "~END~"
self.length = 0
self.viz = ""
self.content_generator: Generator[str] = None
[docs]
def insert(self, item: Sequence[Any]) -> None:
"""
Insert an item into the trie. Items are represented by a :class:`Sequence`
where each item in the sequence describes a set in the path to the item.
Args:
item: the item to be inserted.
>>> t = Trie()
>>> t.insert('test')
>>> t.__contains__('test')
True
"""
current = self.root
for child in item:
current = current.setdefault(child, {})
current[self.end] = self.end
self.length += 1
def __contains__(self, item: Sequence[Any]) -> bool:
"""
Checks whether an item is in the Trie.
Args:
item: the item whose presence is to be determined.
Returns:
True if `item` is present, False otherwise.
>>> t = Trie()
>>> t.insert('test')
>>> t.__contains__('test')
True
>>> t.__contains__('testing')
False
>>> 'test' in t
True
"""
current = self.__traverse__(item)
if current is None:
return False
else:
return self.end in current
[docs]
def contains_prefix(self, item: Sequence[Any]):
"""
Check whether a prefix is in the Trie. The prefix may or may not
be a full item.
Args:
item: the item describing the prefix to be checked.
Returns:
True if the prefix described by `item` is present, False otherwise.
>>> t = Trie()
>>> t.insert('tricycle')
>>> t.contains_prefix('tri')
True
>>> t.contains_prefix('tricycle')
True
>>> t.contains_prefix('triad')
False
"""
current = self.__traverse__(item)
return current is not None
def __traverse__(self, item: Sequence[Any]):
current = self.root
for child in item:
if child in current:
current = current[child]
else:
return None
return current
def __getitem__(self, item: Sequence[Any]) -> Dict[Any, Any]:
"""Given an item, return its trie node which contains all
of the successor (child) node pointers.
Args:
item: the item whose node is to be retrieved
Returns:
A mapping that represents item in the trie. If the
keyspace of the mapping includes '~END~' a valid item
ends at the node. If the mapping contains other keys,
each key indicates the presence of one or more children
on the edge below the node.
Raises:
KeyError if item is not present in the trie.
>>> t = Trie()
>>> t.insert('test')
>>> t.insert('testicle')
>>> t.insert('tessera')
>>> t.insert('tesack')
>>> t['tes']
{'t': {'~END~': '~END~', 'i': {'c': {'l': {'e': {'~END~': '~END~'}}}}}, 's': {'e': {'r': {'a': {'~END~': '~END~'}}}}, 'a': {'c': {'k': {'~END~': '~END~'}}}}
"""
ret = self.__traverse__(item)
if ret is None:
raise KeyError(f"Node '{item}' is not in the trie")
return ret
[docs]
def delete_recursively(self, node: Dict[Any, Any], item: Sequence[Any]) -> bool:
"""
Deletes an item from the trie by walking the path from root to where it
ends.
Args:
node: root under which to search for item
item: item whose node is the root of the recursive deletion operation
Returns:
True if the item was not the prefix of another item such that there
is nothing below item remaining anymore post delete and False if the
deleted item was a proper prefix of another item in the tree such that
there is still data below item remaining in the tree.
"""
if len(item) == 1:
del node[item]
if len(node) == 0 and node is not self.root:
del node
return True
return False
else:
car = item[0]
cdr = item[1:]
lower = node[car]
ret = self.delete_recursively(lower, cdr)
ret = ret and self.delete_recursively(node, car)
return ret
def __delitem__(self, item: Sequence[Any]):
"""
Delete an item from the Trie.
Args:
item: the item to be deleted.
>>> t = Trie()
>>> t.insert('test')
>>> t.insert('tess')
>>> t.insert('tessel')
>>> len(t)
3
>>> t.root
{'t': {'e': {'s': {'t': {'~END~': '~END~'}, 's': {'~END~': '~END~', 'e': {'l': {'~END~': '~END~'}}}}}}}
>>> t.__delitem__('test')
>>> len(t)
2
>>> t.root
{'t': {'e': {'s': {'s': {'~END~': '~END~', 'e': {'l': {'~END~': '~END~'}}}}}}}
>>> for x in t:
... print(x)
tess
tessel
>>> t.__delitem__('tessel')
>>> len(t)
1
>>> t.root
{'t': {'e': {'s': {'s': {'~END~': '~END~'}}}}}
>>> for x in t:
... print(x)
tess
>>> t.__delitem__('tess')
>>> len(t)
0
>>> t.length
0
>>> t.root
{}
>>> t.insert('testy')
>>> t.length
1
>>> len(t)
1
"""
if item not in self:
raise KeyError(f"Node '{item}' is not in the trie")
self.delete_recursively(self.root, item)
self.length -= 1
def __len__(self):
"""
Returns:
A count of the trie's item population.
>>> t = Trie()
>>> len(t)
0
>>> t.insert('test')
>>> len(t)
1
>>> t.insert('tree')
>>> len(t)
2
"""
return self.length
def __iter__(self):
self.content_generator = self.generate_recursively(self.root, "")
return self
[docs]
def generate_recursively(self, node, path: Sequence[Any]):
"""
Returns:
A generator that yields the trie's items one at a time.
>>> t = Trie()
>>> t.insert('test')
>>> t.insert('tickle')
>>> for item in t.generate_recursively(t.root, ''):
... print(item)
test
tickle
"""
for child in node:
if child == self.end:
yield path
else:
yield from self.generate_recursively(node[child], path + child)
def __next__(self):
"""
Iterate through the contents of the trie.
>>> t = Trie()
>>> t.insert('test')
>>> t.insert('tickle')
>>> for item in t:
... print(item)
test
tickle
"""
ret = next(self.content_generator)
if ret is not None:
return ret
raise StopIteration
[docs]
def successors(self, item: Sequence[Any]):
"""
Returns:
A list of the successors of an item.
>>> t = Trie()
>>> t.insert('what')
>>> t.insert('who')
>>> t.insert('when')
>>> t.successors('wh')
['a', 'o', 'e']
>>> u = Trie()
>>> u.insert(['this', 'is', 'a', 'test'])
>>> u.insert(['this', 'is', 'a', 'robbery'])
>>> u.insert(['this', 'is', 'a', 'walrus'])
>>> u.successors(['this', 'is', 'a'])
['test', 'robbery', 'walrus']
"""
node = self.__traverse__(item)
if node is None:
return None
return [x for x in node if x != self.end]
def _repr_fancy(
self,
padding: str,
pointer: str,
node: Any,
has_sibling: bool,
) -> str:
"""
Helper that return a fancy representation of the Trie, used by
:meth:`__repr__`.
"""
if node is None:
return ""
if node is not self.root:
ret = f"\n{padding}{pointer}"
if has_sibling:
padding += "│ "
else:
padding += " "
else:
ret = f"{pointer}"
child_count = 0
for child in node:
if child != self.end:
child_count += 1
for child in node:
if child != self.end:
if child_count > 1:
pointer = "├──"
has_sibling = True
else:
pointer = "└──"
has_sibling = False
pointer += f"{child}"
child_count -= 1
ret += self._repr_fancy(padding, pointer, node[child], has_sibling)
return ret
[docs]
def repr_brief(self, node: Dict[Any, Any], delimiter: str):
"""
A friendly string representation of the contents of the Trie.
Args:
node: the root of the trie to represent.
delimiter: character or string to stuff between edges.
Returns:
A brief string representation of the trie. See example.
>>> t = Trie()
>>> t.insert([10, 0, 0, 1])
>>> t.insert([10, 0, 0, 2])
>>> t.insert([10, 10, 10, 1])
>>> t.insert([10, 10, 10, 2])
>>> t.repr_brief(t.root, '.')
'10.[0.0.[1,2],10.10.[1,2]]'
"""
child_count = 0
my_rep = ""
for child in node:
if child != self.end:
child_count += 1
child_rep = self.repr_brief(node[child], delimiter)
if len(child_rep) > 0:
my_rep += str(child) + delimiter + child_rep + ","
else:
my_rep += str(child) + ","
if len(my_rep) > 1:
my_rep = my_rep[:-1]
if child_count > 1:
my_rep = f"[{my_rep}]"
return my_rep
[docs]
def repr_compressed_tree(
self, node: Dict[Any, Any], delimiter: str, prefix: str = ""
) -> str:
"""
A "compressed" tree representation of the Trie.
Args:
node: the root of the trie to represent.
delimiter: character or string to stuff between edges.
prefix: prefix string used to recursively construst the tree.
Returns:
A brief string representation of the trie. See example.
>>> t = Trie()
>>> t.insert([10, 0, 0, 1])
>>> t.insert([10, 0, 0, 2])
>>> t.insert([10, 0, 0, 3])
>>> t.insert([10, 1, 0, 1])
>>> r = t.repr_compressed_tree(t.root, '.')
>>> print(r)
└── 10
├── 0.0
│ ├── 1
│ ├── 2
│ └── 3
└── 1.0.1
"""
# Filter for real children only
children = [c for c in node if c != self.end]
# If we are at a leaf (only the end marker exists)
if not children:
return ""
lines: List[str] = []
for i, child in enumerate(children):
is_last = i == len(children) - 1
curr_label = str(child)
curr_node = node[child]
# Path compression; look ahead: while the next node has
# exactly one real child and isn't a terminal node (an
# 'end' of a sequence)...
next_children = [c for c in curr_node if c != self.end]
while len(next_children) == 1 and self.end not in curr_node:
only_child = next_children[0]
curr_label += f"{delimiter}{only_child}"
curr_node = curr_node[only_child]
next_children = [c for c in curr_node if c != self.end]
connector = "└── " if is_last else "├── "
lines.append(f"{prefix}{connector}{curr_label}")
extension = " " if is_last else "│ "
child_text = self.repr_compressed_tree(
curr_node, delimiter, prefix + extension
)
if child_text:
lines.append(child_text)
return "\n".join(lines)
def __repr__(self):
"""
A friendly string representation of the contents of the Trie. Under
the covers uses _repr_fancy.
>>> t = Trie()
>>> t.insert([10, 0, 0, 1])
>>> t.insert([10, 0, 0, 2])
>>> t.insert([10, 10, 10, 1])
>>> t.insert([10, 10, 10, 2])
>>> print(t)
*
└──10
├──0
│ └──0
│ ├──1
│ └──2
└──10
└──10
├──1
└──2
"""
return self._repr_fancy("", "*", self.root, False)
if __name__ == "__main__":
import doctest
doctest.testmod()