Workaround likely client bug in letter_compress. Update tests in bst.

author Scott Gasch <scott@gasch.org>

Wed, 29 Sep 2021 16:03:57 +0000 (09:03 -0700)

committer Scott Gasch <scott@gasch.org>

Wed, 29 Sep 2021 16:03:57 +0000 (09:03 -0700)
author Scott Gasch <scott@gasch.org>
Wed, 29 Sep 2021 16:03:57 +0000 (09:03 -0700)
committer Scott Gasch <scott@gasch.org>
Wed, 29 Sep 2021 16:03:57 +0000 (09:03 -0700)
diff --git a/.gitignore b/.gitignore

index 4ea5b145b1d013819759fd6b454ef99d90b2ec88..28e68dd0b5d167f7800c877fe85a9e4aa3c1b94e 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,3 @@ dateparse/dateparse_utilsLexer.tokens
  dateparse/dateparse_utilsListener.py
  dateparse/dateparse_utilsParser.py
  dateparse/duration_utils.g4
-pip_install.sh
diff --git a/collect/bst.py b/collect/bst.py

index 94570f49be8490b4656d2b4ea12185a44c636212..8e95fa23aeb09c4d86ffd74055358fd54f2002c7 100644 (file)
--- a/collect/bst.py
+++ b/collect/bst.py
@@ -120,8 +120,8 @@ class BinaryTree(object):
          75
          85
  
-        >>> t.__delitem__(22)
-        True
+        >>> del t[22]  # Note: bool result is discarded
+
          >>> for value in t.iterate_inorder():
          ...     print(value)
          13
diff --git a/letter_compress.py b/letter_compress.py

index 378ecbcbf433f02c006a21633110e7f0b397dea0..d5a4d60ef06483ee07bd2761e10a5ee9bba7385e 100644 (file)
--- a/letter_compress.py
+++ b/letter_compress.py
@@ -16,8 +16,7 @@ special_characters = bidict(
  
  
  def compress(uncompressed: str) -> bytes:
-    """
-    Compress a word sequence into a stream of bytes.  The compressed
+    """Compress a word sequence into a stream of bytes.  The compressed
      form will be 5/8th the size of the original.  Words can be lower
      case letters or special_characters (above).
  
@@ -28,7 +27,7 @@ def compress(uncompressed: str) -> bytes:
      >>> binascii.hexlify(compress('scot'))
      b'98df40'
  
-    >>> binascii.hexlify(compress('scott'))
+    >>> binascii.hexlify(compress('scott'))  # Note the last byte
      b'98df4a00'
  
      """
@@ -61,9 +60,34 @@ def decompress(kompressed: bytes) -> str:
      """
      decompressed = ''
      compressed = bitstring.BitArray(kompressed)
+
+    # There are compressed messages that legitimately end with the
+    # byte 0x00.  The message "scott" is an example; compressed it is
+    # 0x98df4a00.  It's 5 characters long which means there are 5 x 5
+    # bits of compressed info (25 bits, just over 3 bytes).  The last
+    # (25th) bit in the steam happens to be a zero.  The compress code
+    # padded out the compressed message by adding seven more zeros to
+    # complete the partial 4th byte.  In the 4th byte, however, one
+    # bit is information and seven are padding.
+    #
+    # It's likely that this APIs client code will treat a zero byte as
+    # a termination character and not regard it as part of the
+    # message.  This is a bug in the client code.
+    #
+    # However, it's a bug we can work around:
+
+    # Here, I'm appending an extra 0x00 byte to the compressed message
+    # passed in.  If the client code dropped the last 0x00 byte (and,
+    # with it, some of the legitimate message bits) by treating it as
+    # a termination mark, this 0x00 will replace it (and the missing
+    # message bits).  If the client code didn't drop the last 0x00 (or
+    # if the compressed message didn't end in 0x00), adding an extra
+    # 0x00 is a no op because the codepoint 0b00000 is a "stop" message
+    # so we'll ignore the extras.
+    compressed.append("uint:8=0")
+
      for chunk in compressed.cut(5):
          chunk = chunk.uint
-        print(f'0x{chunk:x}')
          if chunk == 0:
              break
          elif 1 <= chunk <= 26:
diff --git a/pip_install.sh b/pip_install.sh

new file mode 100755 (executable)

index 0000000..9d40902
--- /dev/null
+++ b/pip_install.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+set -e
+
+python3 -m ensurepip --upgrade
+for x in pip wheel aiohttp antlr4-python3-runtime astral bitstring python-dateutil \
+             grpcio holidays cloudpickle dill numpy protobuf psutil pyserial pytype \
+             pychromecast requests SpeechRecognition sklearn scikit-learn nltk; do
+    echo "--- Installing ${x} ---"
+    pip install -U ${x}
+done
author	Scott Gasch <scott@gasch.org>
	Wed, 29 Sep 2021 16:03:57 +0000 (09:03 -0700)
committer	Scott Gasch <scott@gasch.org>
	Wed, 29 Sep 2021 16:03:57 +0000 (09:03 -0700)
.gitignore		patch \| blob \| history
collect/bst.py		patch \| blob \| history
letter_compress.py		patch \| blob \| history
pip_install.sh	[new file with mode: 0755]	patch \| blob