--- /dev/null
+#!/usr/bin/env python3
+
+class bidict(dict):
+ def __init__(self, *args, **kwargs):
+ super(bidict, self).__init__(*args, **kwargs)
+ self.inverse = {}
+ for key, value in self.items():
+ self.inverse.setdefault(value, []).append(key)
+
+ def __setitem__(self, key, value):
+ if key in self:
+ self.inverse[self[key]].remove(key)
+ super(bidict, self).__setitem__(key, value)
+ self.inverse.setdefault(value, []).append(key)
+
+ def __delitem__(self, key):
+ self.inverse.setdefault(self[key], []).remove(key)
+ if self[key] in self.inverse and not self.inverse[self[key]]:
+ del self.inverse[self[key]]
+ super(bidict, self).__delitem__(key)
--- /dev/null
+#!/usr/bin/env python3
+
+import bitstring
+
+import bidict
+
+
+special_characters = bidict.bidict(
+ {
+ ' ': 27,
+ '.': 28,
+ ',': 29,
+ "-": 30,
+ '"': 31,
+ }
+)
+
+
+def compress(uncompressed: str) -> bytes:
+ """
+ Compress a word sequence into a stream of bytes. The compressed
+ form will be 5/8th the size of the original. Words can be lower
+ case letters or special_characters (above).
+
+ >>> import binascii
+ >>> binascii.hexlify(compress('this is a test'))
+ b'99d12d225a06a6494c'
+
+ """
+ compressed = bitstring.BitArray()
+ for (n, letter) in enumerate(uncompressed):
+ if 'a' <= letter <= 'z':
+ bits = ord(letter) - ord('a') + 1 # 1..26
+ else:
+ if letter not in special_characters:
+ raise Exception(f'"{uncompressed}" contains uncompressable char="{letter}"')
+ bits = special_characters[letter]
+ compressed.append(f"uint:5={bits}")
+ while len(compressed) % 8 != 0:
+ compressed.append("uint:1=0")
+ return compressed.bytes
+
+
+def decompress(kompressed: bytes) -> str:
+ """
+ Decompress a previously compressed stream of bytes back into
+ its original form.
+
+ >>> import binascii
+ >>> decompress(binascii.unhexlify(b'99d12d225a06a6494c'))
+ 'this is a test'
+
+ """
+ decompressed = ''
+ compressed = bitstring.BitArray(kompressed)
+ for chunk in compressed.cut(5):
+ chunk = chunk.uint
+ if chunk == 0:
+ break
+ elif 1 <= chunk <= 26:
+ letter = chr(chunk - 1 + ord('a'))
+ else:
+ letter = special_characters.inverse[chunk][0]
+ decompressed += letter
+ return decompressed
+
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
--- /dev/null
+#!/usr/bin/env python3
+
+import random
+import math
+import unittest
+
+import bootstrap
+import letter_compress
+import unittest_utils as uu
+
+
+class TestLetterCompress(unittest.TestCase):
+
+ def test_with_random_strings(self):
+ alphabet = 'abcdefghijklmnopqrstuvwxyz .,"-'
+ for n in range(20):
+ message = ""
+ for letter in random.choices(alphabet, k=random.randrange(10, 5000)):
+ message += letter
+ mlen = len(message)
+ compressed = letter_compress.compress(message)
+ clen = len(compressed)
+ self.assertEqual(math.ceil(mlen * 5.0 / 8.0), clen)
+ decompressed = letter_compress.decompress(compressed)
+ self.assertEqual(decompressed, message, f'The bad message string was "{message}"')
+
+
+if __name__ == '__main__':
+ bootstrap.initialize(unittest.main)()