X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=directory_filter.py;h=6985831fc3a6e3d8fe24550c831293e40383b9a6;hb=bf611fe56efcdce0dada32d292b1587057ab6dd0;hp=b057f85a1c8c728a497171a37c4a216db231ba30;hpb=7ff2af6fe7bffea90dc4a31c93140c189917c659;p=python_utils.git diff --git a/directory_filter.py b/directory_filter.py index b057f85..6985831 100644 --- a/directory_filter.py +++ b/directory_filter.py @@ -1,5 +1,11 @@ #!/usr/bin/env python3 +"""Two predicates that can help avoid unnecessary disk I/O by +detecting if a particular file is identical to the contents about to +be written or if a particular directory already contains a file that +is identical to the one to be written. See class docs below for +examples.""" + import hashlib import logging import os @@ -58,18 +64,18 @@ class DirectoryFileFilter(object): assert mtime is not None if self.mtime_by_filename.get(filename, 0) != mtime: md5 = file_utils.get_file_md5(filename) - logger.debug(f'Computed/stored {filename}\'s MD5 at ts={mtime} ({md5})') + logger.debug('Computed/stored %s\'s MD5 at ts=%.2f (%s)', filename, mtime, md5) self.mtime_by_filename[filename] = mtime self.md5_by_filename[filename] = md5 def apply(self, item: Any, filename: str) -> bool: self._update_file(filename) file_md5 = self.md5_by_filename.get(filename, 0) - logger.debug(f'{filename}\'s checksum is {file_md5}') + logger.debug('%s\'s checksum is %s', filename, file_md5) mem_hash = hashlib.md5() mem_hash.update(item) md5 = mem_hash.hexdigest() - logger.debug(f'Item\'s checksum is {md5}') + logger.debug('Item\'s checksum is %s', md5) return md5 != file_md5