#!/usr/bin/env python3
+# © Copyright 2021-2022, Scott Gasch
+
+"""A simple compression helper for lowercase ascii text."""
+
import bitstring
-from collect.bidict import bidict
+from collect.bidict import BiDict
-special_characters = bidict(
+special_characters = BiDict(
{
' ': 27,
'.': 28,
"""
compressed = bitstring.BitArray()
- for (n, letter) in enumerate(uncompressed):
+ for letter in uncompressed:
if 'a' <= letter <= 'z':
- bits = ord(letter) - ord('a') + 1 # 1..26
+ bits = ord(letter) - ord('a') + 1 # 1..26
else:
if letter not in special_characters:
raise Exception(f'"{uncompressed}" contains uncompressable char="{letter}"')
# complete the partial 4th byte. In the 4th byte, however, one
# bit is information and seven are padding.
#
- # It's likely that this APIs client code will treat a zero byte as
- # a termination character and not regard it as part of the
- # message. This is a bug in the client code.
+ # It's likely that this API's client code may treat a zero byte as
+ # a termination character and not regard it as a legitimate part
+ # of the message. This is a bug in that client code, to be clear.
#
# However, it's a bug we can work around:
-
+ #
# Here, I'm appending an extra 0x00 byte to the compressed message
# passed in. If the client code dropped the last 0x00 byte (and,
# with it, some of the legitimate message bits) by treating it as
if __name__ == '__main__':
import doctest
+
doctest.testmod()