5 from typing import Any, Optional
8 class DirectoryFileFilter(object):
9 """A predicate that will return False if when a proposed file's
10 content to-be-written is identical to the contents of the file;
14 def __init__(self, directory: str):
17 if not file_utils.does_directory_exist(directory):
18 raise ValueError(directory)
19 self.directory = directory
20 self.md5_by_filename = {}
21 self.mtime_by_filename = {}
25 for direntry in os.scandir(self.directory):
26 if direntry.is_file(follow_symlinks=True):
27 mtime = direntry.stat(follow_symlinks=True).st_mtime
28 path = f'{self.directory}/{direntry.name}'
29 self._update_file(path, mtime)
31 def _update_file(self, filename: str, mtime: Optional[float] = None):
33 assert file_utils.does_file_exist(filename)
35 mtime = file_utils.get_file_raw_mtime(filename)
36 if self.mtime_by_filename.get(filename, 0) != mtime:
37 md5 = file_utils.get_file_md5(filename)
38 self.mtime_by_filename[filename] = mtime
39 self.md5_by_filename[filename] = md5
41 def apply(self, item: Any, filename: str) -> bool:
42 self._update_file(filename)
43 file_md5 = self.md5_by_filename.get(filename, 0)
44 mem_hash = hashlib.md5()
46 md5 = mem_hash.hexdigest()
47 return md5 != file_md5
50 class DirectoryAllFilesFilter(DirectoryFileFilter):
51 """A predicate that will return False if a file to-be-written to a
52 particular directory is identical to any other file in that same
56 def __init__(self, directory: str):
58 super().__init__(directory)
61 def _update_file(self, filename: str, mtime: Optional[float] = None):
63 assert file_utils.does_file_exist(filename)
65 mtime = file_utils.get_file_raw_mtime(filename)
66 if self.mtime_by_filename.get(filename, 0) != mtime:
67 md5 = file_utils.get_file_md5(filename)
68 self.mtime_by_filename[filename] = mtime
69 self.md5_by_filename[filename] = md5
70 self.all_md5s.add(md5)
72 def apply(self, item: Any) -> bool:
74 mem_hash = hashlib.md5()
76 md5 = mem_hash.hexdigest()
77 return md5 not in self.all_md5s