X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=directory_filter.py;h=69e5547255e20ac66cfea81658867c8697501126;hb=532df2c5b57c7517dfb3dddd8c1358fbadf8baf3;hp=b076badf25dff7e34e2358b3e45d52c49692a424;hpb=5317c50ce7a96a37acfab3800c0935580766dbbf;p=python_utils.git diff --git a/directory_filter.py b/directory_filter.py index b076bad..69e5547 100644 --- a/directory_filter.py +++ b/directory_filter.py @@ -1,5 +1,15 @@ #!/usr/bin/env python3 +# © Copyright 2021-2022, Scott Gasch + +"""Two predicates that can help avoid unnecessary disk I/O by +detecting if a particular file is identical to the contents about to +be written or if a particular directory already contains a file that +is identical to the one to be written. See class docs below for +examples. + +""" + import hashlib import logging import os @@ -55,21 +65,21 @@ class DirectoryFileFilter(object): assert file_utils.does_file_exist(filename) if mtime is None: mtime = file_utils.get_file_raw_mtime(filename) - assert mtime + assert mtime is not None if self.mtime_by_filename.get(filename, 0) != mtime: md5 = file_utils.get_file_md5(filename) - logger.debug(f'Computed/stored {filename}\'s MD5 at ts={mtime} ({md5})') + logger.debug('Computed/stored %s\'s MD5 at ts=%.2f (%s)', filename, mtime, md5) self.mtime_by_filename[filename] = mtime self.md5_by_filename[filename] = md5 def apply(self, item: Any, filename: str) -> bool: self._update_file(filename) file_md5 = self.md5_by_filename.get(filename, 0) - logger.debug(f'{filename}\'s checksum is {file_md5}') + logger.debug('%s\'s checksum is %s', filename, file_md5) mem_hash = hashlib.md5() mem_hash.update(item) md5 = mem_hash.hexdigest() - logger.debug(f'Item\'s checksum is {md5}') + logger.debug('Item\'s checksum is %s', md5) return md5 != file_md5 @@ -112,7 +122,7 @@ class DirectoryAllFilesFilter(DirectoryFileFilter): assert file_utils.does_file_exist(filename) if mtime is None: mtime = file_utils.get_file_raw_mtime(filename) - assert mtime + assert mtime is not None if self.mtime_by_filename.get(filename, 0) != mtime: md5 = file_utils.get_file_md5(filename) self.mtime_by_filename[filename] = mtime @@ -120,7 +130,7 @@ class DirectoryAllFilesFilter(DirectoryFileFilter): self.all_md5s.add(md5) def apply(self, item: Any, ignored_filename: str = None) -> bool: - assert not ignored_filename + assert ignored_filename is None self._update() mem_hash = hashlib.md5() mem_hash.update(item)