#!/usr/bin/env python3
+"""Two predicates that can help avoid unnecessary disk I/O by
+detecting if a particular file is identical to the contents about to
+be written or if a particular directory already contains a file that
+is identical to the one to be written. See class docs below for
+examples."""
+
import hashlib
import logging
import os
assert mtime is not None
if self.mtime_by_filename.get(filename, 0) != mtime:
md5 = file_utils.get_file_md5(filename)
- logger.debug(f'Computed/stored {filename}\'s MD5 at ts={mtime} ({md5})')
+ logger.debug('Computed/stored %s\'s MD5 at ts=%.2f (%s)', filename, mtime, md5)
self.mtime_by_filename[filename] = mtime
self.md5_by_filename[filename] = md5
def apply(self, item: Any, filename: str) -> bool:
self._update_file(filename)
file_md5 = self.md5_by_filename.get(filename, 0)
- logger.debug(f'{filename}\'s checksum is {file_md5}')
+ logger.debug('%s\'s checksum is %s', filename, file_md5)
mem_hash = hashlib.md5()
mem_hash.update(item)
md5 = mem_hash.hexdigest()
- logger.debug(f'Item\'s checksum is {md5}')
+ logger.debug('Item\'s checksum is %s', md5)
return md5 != file_md5