letter_compress.py

   1 #!/usr/bin/env python3
   2
   3 import bitstring
   4
   5 import bidict
   6
   7
   8 special_characters = bidict.bidict(
   9     {
  10         ' ': 27,
  11         '.': 28,
  12         ',': 29,
  13         "-": 30,
  14         '"': 31,
  15     }
  16 )
  17
  18
  19 def compress(uncompressed: str) -> bytes:
  20     """
  21     Compress a word sequence into a stream of bytes.  The compressed
  22     form will be 5/8th the size of the original.  Words can be lower
  23     case letters or special_characters (above).
  24
  25     >>> import binascii
  26     >>> binascii.hexlify(compress('this is a test'))
  27     b'99d12d225a06a6494c'
  28
  29     """
  30     compressed = bitstring.BitArray()
  31     for (n, letter) in enumerate(uncompressed):
  32         if 'a' <= letter <= 'z':
  33             bits = ord(letter) - ord('a') + 1   # 1..26
  34         else:
  35             if letter not in special_characters:
  36                 raise Exception(f'"{uncompressed}" contains uncompressable char="{letter}"')
  37             bits = special_characters[letter]
  38         compressed.append(f"uint:5={bits}")
  39     while len(compressed) % 8 != 0:
  40         compressed.append("uint:1=0")
  41     return compressed.bytes
  42
  43
  44 def decompress(kompressed: bytes) -> str:
  45     """
  46     Decompress a previously compressed stream of bytes back into
  47     its original form.
  48
  49     >>> import binascii
  50     >>> decompress(binascii.unhexlify(b'99d12d225a06a6494c'))
  51     'this is a test'
  52
  53     """
  54     decompressed = ''
  55     compressed = bitstring.BitArray(kompressed)
  56     for chunk in compressed.cut(5):
  57         chunk = chunk.uint
  58         if chunk == 0:
  59             break
  60         elif 1 <= chunk <= 26:
  61             letter = chr(chunk - 1 + ord('a'))
  62         else:
  63             letter = special_characters.inverse[chunk][0]
  64         decompressed += letter
  65     return decompressed
  66
  67
  68 if __name__ == '__main__':
  69     import doctest
  70     doctest.testmod()