8 special_characters = bidict.bidict(
19 def compress(uncompressed: str) -> bytes:
21 Compress a word sequence into a stream of bytes. The compressed
22 form will be 5/8th the size of the original. Words can be lower
23 case letters or special_characters (above).
26 >>> binascii.hexlify(compress('this is a test'))
30 compressed = bitstring.BitArray()
31 for (n, letter) in enumerate(uncompressed):
32 if 'a' <= letter <= 'z':
33 bits = ord(letter) - ord('a') + 1 # 1..26
35 if letter not in special_characters:
36 raise Exception(f'"{uncompressed}" contains uncompressable char="{letter}"')
37 bits = special_characters[letter]
38 compressed.append(f"uint:5={bits}")
39 while len(compressed) % 8 != 0:
40 compressed.append("uint:1=0")
41 return compressed.bytes
44 def decompress(kompressed: bytes) -> str:
46 Decompress a previously compressed stream of bytes back into
50 >>> decompress(binascii.unhexlify(b'99d12d225a06a6494c'))
55 compressed = bitstring.BitArray(kompressed)
56 for chunk in compressed.cut(5):
60 elif 1 <= chunk <= 26:
61 letter = chr(chunk - 1 + ord('a'))
63 letter = special_characters.inverse[chunk][0]
64 decompressed += letter
68 if __name__ == '__main__':