5 from collect.bidict import bidict
7 special_characters = bidict(
18 def compress(uncompressed: str) -> bytes:
20 Compress a word sequence into a stream of bytes. The compressed
21 form will be 5/8th the size of the original. Words can be lower
22 case letters or special_characters (above).
25 >>> binascii.hexlify(compress('this is a test'))
28 >>> binascii.hexlify(compress('scot'))
31 >>> binascii.hexlify(compress('scott'))
35 compressed = bitstring.BitArray()
36 for (n, letter) in enumerate(uncompressed):
37 if 'a' <= letter <= 'z':
38 bits = ord(letter) - ord('a') + 1 # 1..26
40 if letter not in special_characters:
41 raise Exception(f'"{uncompressed}" contains uncompressable char="{letter}"')
42 bits = special_characters[letter]
43 compressed.append(f"uint:5={bits}")
44 while len(compressed) % 8 != 0:
45 compressed.append("uint:1=0")
46 return compressed.bytes
49 def decompress(kompressed: bytes) -> str:
51 Decompress a previously compressed stream of bytes back into
55 >>> decompress(binascii.unhexlify(b'a2133da67b0ee859d0'))
58 >>> decompress(binascii.unhexlify(b'98df4a00'))
63 compressed = bitstring.BitArray(kompressed)
64 for chunk in compressed.cut(5):
69 elif 1 <= chunk <= 26:
70 letter = chr(chunk - 1 + ord('a'))
72 letter = special_characters.inverse[chunk][0]
73 decompressed += letter
77 if __name__ == '__main__':