X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=directory_filter.py;h=69e5547255e20ac66cfea81658867c8697501126;hb=532df2c5b57c7517dfb3dddd8c1358fbadf8baf3;hp=b057f85a1c8c728a497171a37c4a216db231ba30;hpb=7ff2af6fe7bffea90dc4a31c93140c189917c659;p=python_utils.git diff --git a/directory_filter.py b/directory_filter.py index b057f85..69e5547 100644 --- a/directory_filter.py +++ b/directory_filter.py @@ -1,5 +1,15 @@ #!/usr/bin/env python3 +# © Copyright 2021-2022, Scott Gasch + +"""Two predicates that can help avoid unnecessary disk I/O by +detecting if a particular file is identical to the contents about to +be written or if a particular directory already contains a file that +is identical to the one to be written. See class docs below for +examples. + +""" + import hashlib import logging import os @@ -58,18 +68,18 @@ class DirectoryFileFilter(object): assert mtime is not None if self.mtime_by_filename.get(filename, 0) != mtime: md5 = file_utils.get_file_md5(filename) - logger.debug(f'Computed/stored {filename}\'s MD5 at ts={mtime} ({md5})') + logger.debug('Computed/stored %s\'s MD5 at ts=%.2f (%s)', filename, mtime, md5) self.mtime_by_filename[filename] = mtime self.md5_by_filename[filename] = md5 def apply(self, item: Any, filename: str) -> bool: self._update_file(filename) file_md5 = self.md5_by_filename.get(filename, 0) - logger.debug(f'{filename}\'s checksum is {file_md5}') + logger.debug('%s\'s checksum is %s', filename, file_md5) mem_hash = hashlib.md5() mem_hash.update(item) md5 = mem_hash.hexdigest() - logger.debug(f'Item\'s checksum is {md5}') + logger.debug('Item\'s checksum is %s', md5) return md5 != file_md5