From 986d5f7ada15e56019518db43d07b76f94468e1a Mon Sep 17 00:00:00 2001 From: Scott Gasch Date: Tue, 28 Sep 2021 23:27:51 -0700 Subject: [PATCH] Various changes --- cached/weather_forecast.py | 5 +- collect/bst.py | 454 +++++++++++++++++++++++++++++++++++++ letter_compress.py | 11 +- list_utils.py | 15 +- profanity_filter.py | 2 + string_utils.py | 8 +- 6 files changed, 488 insertions(+), 7 deletions(-) create mode 100644 collect/bst.py diff --git a/cached/weather_forecast.py b/cached/weather_forecast.py index 4be53c8..6e2f5f9 100644 --- a/cached/weather_forecast.py +++ b/cached/weather_forecast.py @@ -102,8 +102,9 @@ class CachedDetailedWeatherForecast(object): sunrise = s['sunrise'] sunset = s['sunset'] - if dt.date == now.date and not said_temp: - blurb = f'{day.get_text()}: The current outside tempterature is {current_temp}. ' + txt.get_text() + if dt.date() == now.date() and not said_temp: + blurb = f'{day.get_text()}: The current outside tempterature is {current_temp}. ' + blurb += txt.get_text() said_temp = True else: blurb = f'{day.get_text()}: {txt.get_text()}' diff --git a/collect/bst.py b/collect/bst.py new file mode 100644 index 0000000..94570f4 --- /dev/null +++ b/collect/bst.py @@ -0,0 +1,454 @@ +#!/usr/bin/env python3 + +from typing import Any, Optional + + +class Node(object): + def __init__(self, value: Any) -> None: + self.left = None + self.right = None + self.value = value + + +class BinaryTree(object): + def __init__(self): + self.root = None + self.count = 0 + self.traverse = None + + def get_root(self) -> Optional[Node]: + return self.root + + def insert(self, value: Any): + """ + Insert something into the tree. + + >>> t = BinaryTree() + >>> t.insert(10) + >>> t.insert(20) + >>> t.insert(5) + >>> len(t) + 3 + + >>> t.get_root().value + 10 + + """ + if self.root is None: + self.root = Node(value) + self.count = 1 + else: + self._insert(value, self.root) + + def _insert(self, value: Any, node: Node): + """Insertion helper""" + if value < node.value: + if node.left is not None: + self._insert(value, node.left) + else: + node.left = Node(value) + self.count += 1 + else: + if node.right is not None: + self._insert(value, node.right) + else: + node.right = Node(value) + self.count += 1 + + def __getitem__(self, value: Any) -> Optional[Node]: + """ + Find an item in the tree and return its Node. Returns + None if the item is not in the tree. + + >>> t = BinaryTree() + >>> t[99] + + >>> t.insert(10) + >>> t.insert(20) + >>> t.insert(5) + >>> t[10].value + 10 + + >>> t[99] + + """ + if self.root is not None: + return self._find(value, self.root) + return None + + def _find(self, value: Any, node: Node) -> Optional[Node]: + """Find helper""" + if value == node.value: + return node + elif (value < node.value and node.left is not None): + return self._find(value, node.left) + else: + assert value > node.value + if node.right is not None: + return self._find(value, node.right) + return None + + def __delitem__(self, value: Any) -> bool: + """ + Delete an item from the tree and preserve the BST property. + + 50 + / \ + 25 75 + / / \ + 22 66 85 + / + 13 + + + >>> t = BinaryTree() + >>> t.insert(50) + >>> t.insert(75) + >>> t.insert(25) + >>> t.insert(66) + >>> t.insert(22) + >>> t.insert(13) + >>> t.insert(85) + + >>> for value in t.iterate_inorder(): + ... print(value) + 13 + 22 + 25 + 50 + 66 + 75 + 85 + + >>> t.__delitem__(22) + True + >>> for value in t.iterate_inorder(): + ... print(value) + 13 + 25 + 50 + 66 + 75 + 85 + + >>> t.__delitem__(13) + True + >>> for value in t.iterate_inorder(): + ... print(value) + 25 + 50 + 66 + 75 + 85 + + >>> t.__delitem__(75) + True + >>> for value in t.iterate_inorder(): + ... print(value) + 25 + 50 + 66 + 85 + + >>> t.__delitem__(99) + False + + """ + if self.root is not None: + ret = self._delete(value, None, self.root) + if ret: + self.count -= 1 + if self.count == 0: + self.root = None + return ret + return False + + def _delete(self, value: Any, parent: Optional[Node], node: Node) -> bool: + """Delete helper""" + if node.value == value: + # Deleting a leaf node + if node.left is None and node.right is None: + if parent is not None: + if parent.left == node: + parent.left = None + else: + assert parent.right == node + parent.right = None + return True + + # Node only has a right. + elif node.left is None: + assert node.right is not None + if parent is not None: + if parent.left == node: + parent.left = node.right + else: + assert parent.right == node + parent.right = node.right + return True + + # Node only has a left. + elif node.right is None: + assert node.left is not None + if parent is not None: + if parent.left == node: + parent.left = node.left + else: + assert parent.right == node + parent.right = node.left + return True + + # Node has both a left and right. + else: + assert node.left is not None and node.right is not None + descendent = node.right + while descendent.left is not None: + descendent = descendent.left + node.value = descendent.value + return self._delete(node.value, node, node.right) + elif value < node.value and node.left is not None: + return self._delete(value, node, node.left) + elif value > node.value and node.right is not None: + return self._delete(value, node, node.right) + return False + + def __len__(self): + """ + Returns the count of items in the tree. + + >>> t = BinaryTree() + >>> len(t) + 0 + >>> t.insert(50) + >>> len(t) + 1 + >>> t.__delitem__(50) + True + >>> len(t) + 0 + >>> t.insert(75) + >>> t.insert(25) + >>> t.insert(66) + >>> t.insert(22) + >>> t.insert(13) + >>> t.insert(85) + >>> len(t) + 6 + + """ + return self.count + + def __contains__(self, value: Any) -> bool: + """ + Returns True if the item is in the tree; False otherwise. + + """ + return self.__getitem__(value) is not None + + def _iterate_preorder(self, node: Node): + yield node.value + if node.left is not None: + yield from self._iterate_preorder(node.left) + if node.right is not None: + yield from self._iterate_preorder(node.right) + + def _iterate_inorder(self, node: Node): + if node.left is not None: + yield from self._iterate_inorder(node.left) + yield node.value + if node.right is not None: + yield from self._iterate_inorder(node.right) + + def _iterate_postorder(self, node: Node): + if node.left is not None: + yield from self._iterate_postorder(node.left) + if node.right is not None: + yield from self._iterate_postorder(node.right) + yield node.value + + def iterate_preorder(self): + """ + Yield the tree's items in a preorder traversal sequence. + + >>> t = BinaryTree() + >>> t.insert(50) + >>> t.insert(75) + >>> t.insert(25) + >>> t.insert(66) + >>> t.insert(22) + >>> t.insert(13) + + >>> for value in t.iterate_preorder(): + ... print(value) + 50 + 25 + 22 + 13 + 75 + 66 + + """ + if self.root is not None: + yield from self._iterate_preorder(self.root) + + def iterate_inorder(self): + """ + Yield the tree's items in a preorder traversal sequence. + + >>> t = BinaryTree() + >>> t.insert(50) + >>> t.insert(75) + >>> t.insert(25) + >>> t.insert(66) + >>> t.insert(22) + >>> t.insert(13) + + >>> for value in t.iterate_inorder(): + ... print(value) + 13 + 22 + 25 + 50 + 66 + 75 + + """ + if self.root is not None: + yield from self._iterate_inorder(self.root) + + def iterate_postorder(self): + """ + Yield the tree's items in a preorder traversal sequence. + + >>> t = BinaryTree() + >>> t.insert(50) + >>> t.insert(75) + >>> t.insert(25) + >>> t.insert(66) + >>> t.insert(22) + >>> t.insert(13) + + >>> for value in t.iterate_postorder(): + ... print(value) + 13 + 22 + 25 + 66 + 75 + 50 + + """ + if self.root is not None: + yield from self._iterate_postorder(self.root) + + def _iterate_leaves(self, node: Node): + if node.left is not None: + yield from self._iterate_leaves(node.left) + if node.right is not None: + yield from self._iterate_leaves(node.right) + if node.left is None and node.right is None: + yield node.value + + def iterate_leaves(self): + """ + Iterate only the leaf nodes in the tree. + + >>> t = BinaryTree() + >>> t.insert(50) + >>> t.insert(75) + >>> t.insert(25) + >>> t.insert(66) + >>> t.insert(22) + >>> t.insert(13) + + >>> for value in t.iterate_leaves(): + ... print(value) + 13 + 66 + + """ + if self.root is not None: + yield from self._iterate_leaves(self.root) + + def _iterate_by_depth(self, node: Node, depth: int): + if depth == 0: + yield node.value + else: + assert depth > 0 + if node.left is not None: + yield from self._iterate_by_depth(node.left, depth - 1) + if node.right is not None: + yield from self._iterate_by_depth(node.right, depth - 1) + + def iterate_nodes_by_depth(self, depth: int): + """ + Iterate only the leaf nodes in the tree. + + >>> t = BinaryTree() + >>> t.insert(50) + >>> t.insert(75) + >>> t.insert(25) + >>> t.insert(66) + >>> t.insert(22) + >>> t.insert(13) + + >>> for value in t.iterate_nodes_by_depth(2): + ... print(value) + 22 + 66 + + >>> for value in t.iterate_nodes_by_depth(3): + ... print(value) + 13 + + """ + if self.root is not None: + yield from self._iterate_by_depth(self.root, depth) + + def _depth(self, node: Node, sofar: int) -> int: + depth_left = sofar + 1 + depth_right = sofar + 1 + if node.left is not None: + depth_left = self._depth(node.left, sofar + 1) + if node.right is not None: + depth_right = self._depth(node.right, sofar + 1) + return max(depth_left, depth_right) + + def depth(self): + """ + Returns the max height (depth) of the tree in plies (edge distance + from root). + + >>> t = BinaryTree() + >>> t.depth() + 0 + + >>> t.insert(50) + >>> t.depth() + 1 + + >>> t.insert(65) + >>> t.depth() + 2 + + >>> t.insert(33) + >>> t.depth() + 2 + + >>> t.insert(2) + >>> t.insert(1) + >>> t.depth() + 4 + + """ + if self.root is None: + return 0 + return self._depth(self.root, 0) + + def height(self): + return self.depth() + + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/letter_compress.py b/letter_compress.py index 4374edd..378ecbc 100644 --- a/letter_compress.py +++ b/letter_compress.py @@ -4,7 +4,6 @@ import bitstring from collect.bidict import bidict - special_characters = bidict( { ' ': 27, @@ -26,6 +25,12 @@ def compress(uncompressed: str) -> bytes: >>> binascii.hexlify(compress('this is a test')) b'a2133da67b0ee859d0' + >>> binascii.hexlify(compress('scot')) + b'98df40' + + >>> binascii.hexlify(compress('scott')) + b'98df4a00' + """ compressed = bitstring.BitArray() for (n, letter) in enumerate(uncompressed): @@ -50,11 +55,15 @@ def decompress(kompressed: bytes) -> str: >>> decompress(binascii.unhexlify(b'a2133da67b0ee859d0')) 'this is a test' + >>> decompress(binascii.unhexlify(b'98df4a00')) + 'scott' + """ decompressed = '' compressed = bitstring.BitArray(kompressed) for chunk in compressed.cut(5): chunk = chunk.uint + print(f'0x{chunk:x}') if chunk == 0: break elif 1 <= chunk <= 26: diff --git a/list_utils.py b/list_utils.py index c04a534..182e2bc 100644 --- a/list_utils.py +++ b/list_utils.py @@ -2,7 +2,7 @@ from collections import Counter from itertools import islice -from typing import Any, Iterator, List, Mapping +from typing import Any, Iterator, List, Mapping, Sequence def shard(lst: List[Any], size: int) -> Iterator[Any]: @@ -97,6 +97,19 @@ def dedup_list(lst: List[Any]) -> List[Any]: return list(set(lst)) +def uniq(lst: List[Any]) -> List[Any]: + """ + Alias for dedup_list. + + """ + return dedup_list(lst) + + +def ngrams(lst: Sequence[Any], n): + for i in range(len(lst) - n + 1): + yield lst[i:i + n] + + if __name__ == '__main__': import doctest doctest.testmod() diff --git a/profanity_filter.py b/profanity_filter.py index e1b4743..f238e7d 100755 --- a/profanity_filter.py +++ b/profanity_filter.py @@ -484,12 +484,14 @@ class ProfanityFilter(object): if len(words) > 1: for bigram in string_utils.ngrams_presplit(words, 2): + bigram = ' '.join(bigram) if self.is_bad_word(bigram): logger.debug('"{bigram}" is profanity') return True if len(words) > 2: for trigram in string_utils.ngrams_presplit(words, 3): + trigram = ' '.join(trigram) if self.is_bad_word(trigram): logger.debug('"{trigram}" is profanity') return True diff --git a/string_utils.py b/string_utils.py index 3aaf1d7..b3019cf 100644 --- a/string_utils.py +++ b/string_utils.py @@ -14,6 +14,8 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple import unicodedata from uuid import uuid4 +import list_utils + logger = logging.getLogger(__name__) NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$") @@ -1282,12 +1284,12 @@ def ngrams(txt: str, n: int): """ words = txt.split() - return ngrams_presplit(words, n) + for ngram in ngrams_presplit(words, n): + return ' '.join(ngram) def ngrams_presplit(words: Iterable[str], n: int): - for ngram in zip(*[words[i:] for i in range(n)]): - yield(' '.join(ngram)) + return list_utils.ngrams(words, n) def bigrams(txt: str): -- 2.47.1