X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=src%2Fpyutils%2Fcompress%2Fletter_compress.py;h=21a15b728e94c8a0de44ec9c4a6c2de662ba2ad9;hb=993b0992473c12294ed659e52b532e1c8cf9cd1e;hp=c631803735b0562b105a9697997275180ee3df0f;hpb=b38920f24d1ac948958480c540bc4b8436186765;p=pyutils.git diff --git a/src/pyutils/compress/letter_compress.py b/src/pyutils/compress/letter_compress.py index c631803..21a15b7 100644 --- a/src/pyutils/compress/letter_compress.py +++ b/src/pyutils/compress/letter_compress.py @@ -2,7 +2,12 @@ # © Copyright 2021-2022, Scott Gasch -"""A simple toy compression helper for lowercase ascii text.""" +""" +This is a simple, honestly, toy compression scheme that uses a custom +alphabet of 32 characters which can each be represented in six bits +instead of eight. It therefore reduces the size of data composed of +only those letters by 25% without loss. +""" import bitstring @@ -24,6 +29,12 @@ def compress(uncompressed: str) -> bytes: form will be 5/8th the size of the original. Words can be lower case letters or special_characters (above). + Args: + uncompressed: the uncompressed string to be compressed + + Returns: + the compressed bytes + >>> import binascii >>> binascii.hexlify(compress('this is a test')) b'a2133da67b0ee859d0' @@ -51,11 +62,17 @@ def compress(uncompressed: str) -> bytes: return compressed.bytes -def decompress(kompressed: bytes) -> str: +def decompress(compressed: bytes) -> str: """ Decompress a previously compressed stream of bytes back into its original form. + Args: + compressed: the compressed data to decompress + + Returns: + The decompressed string + >>> import binascii >>> decompress(binascii.unhexlify(b'a2133da67b0ee859d0')) 'this is a test' @@ -65,7 +82,7 @@ def decompress(kompressed: bytes) -> str: """ decompressed = '' - compressed = bitstring.BitArray(kompressed) + kompressed = bitstring.BitArray(compressed) # There are compressed messages that legitimately end with the # byte 0x00. The message "scott" is an example; compressed it is @@ -90,9 +107,9 @@ def decompress(kompressed: bytes) -> str: # if the compressed message didn't end in 0x00), adding an extra # 0x00 is a no op because the codepoint 0b00000 is a "stop" message # so we'll ignore the extras. - compressed.append("uint:8=0") + kompressed.append("uint:8=0") - for chunk in compressed.cut(5): + for chunk in kompressed.cut(5): chunk = chunk.uint if chunk == 0: break