#!/usr/bin/env python3 import bitstring import bidict special_characters = bidict.bidict( { ' ': 27, '.': 28, ',': 29, "-": 30, '"': 31, } ) def compress(uncompressed: str) -> bytes: """ Compress a word sequence into a stream of bytes. The compressed form will be 5/8th the size of the original. Words can be lower case letters or special_characters (above). >>> import binascii >>> binascii.hexlify(compress('this is a test')) b'99d12d225a06a6494c' """ compressed = bitstring.BitArray() for (n, letter) in enumerate(uncompressed): if 'a' <= letter <= 'z': bits = ord(letter) - ord('a') + 1 # 1..26 else: if letter not in special_characters: raise Exception(f'"{uncompressed}" contains uncompressable char="{letter}"') bits = special_characters[letter] compressed.append(f"uint:5={bits}") while len(compressed) % 8 != 0: compressed.append("uint:1=0") return compressed.bytes def decompress(kompressed: bytes) -> str: """ Decompress a previously compressed stream of bytes back into its original form. >>> import binascii >>> decompress(binascii.unhexlify(b'99d12d225a06a6494c')) 'this is a test' """ decompressed = '' compressed = bitstring.BitArray(kompressed) for chunk in compressed.cut(5): chunk = chunk.uint if chunk == 0: break elif 1 <= chunk <= 26: letter = chr(chunk - 1 + ord('a')) else: letter = special_characters.inverse[chunk][0] decompressed += letter return decompressed if __name__ == '__main__': import doctest doctest.testmod()