+#!/usr/bin/env python3
+
+import bitstring
+
+import bidict
+
+
+special_characters = bidict.bidict(
+ {
+ ' ': 27,
+ '.': 28,
+ ',': 29,
+ "-": 30,
+ '"': 31,
+ }
+)
+
+
+def compress(uncompressed: str) -> bytes:
+ """
+ Compress a word sequence into a stream of bytes. The compressed
+ form will be 5/8th the size of the original. Words can be lower
+ case letters or special_characters (above).
+
+ >>> import binascii
+ >>> binascii.hexlify(compress('this is a test'))
+ b'99d12d225a06a6494c'
+
+ """
+ compressed = bitstring.BitArray()
+ for (n, letter) in enumerate(uncompressed):
+ if 'a' <= letter <= 'z':
+ bits = ord(letter) - ord('a') + 1 # 1..26
+ else:
+ if letter not in special_characters:
+ raise Exception(f'"{uncompressed}" contains uncompressable char="{letter}"')
+ bits = special_characters[letter]
+ compressed.append(f"uint:5={bits}")
+ while len(compressed) % 8 != 0:
+ compressed.append("uint:1=0")
+ return compressed.bytes
+
+
+def decompress(kompressed: bytes) -> str:
+ """
+ Decompress a previously compressed stream of bytes back into
+ its original form.
+
+ >>> import binascii
+ >>> decompress(binascii.unhexlify(b'99d12d225a06a6494c'))
+ 'this is a test'
+
+ """
+ decompressed = ''
+ compressed = bitstring.BitArray(kompressed)
+ for chunk in compressed.cut(5):
+ chunk = chunk.uint
+ if chunk == 0:
+ break
+ elif 1 <= chunk <= 26:
+ letter = chr(chunk - 1 + ord('a'))
+ else:
+ letter = special_characters.inverse[chunk][0]
+ decompressed += letter
+ return decompressed
+
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()