5 from typing import Any, Optional
10 class DirectoryFileFilter(object):
11 """A predicate that will return False if when a proposed file's
12 content to-be-written is identical to the contents of the file;
16 def __init__(self, directory: str):
18 if not file_utils.does_directory_exist(directory):
19 raise ValueError(directory)
20 self.directory = directory
21 self.md5_by_filename = {}
22 self.mtime_by_filename = {}
26 for direntry in os.scandir(self.directory):
27 if direntry.is_file(follow_symlinks=True):
28 mtime = direntry.stat(follow_symlinks=True).st_mtime
29 path = f'{self.directory}/{direntry.name}'
30 self._update_file(path, mtime)
32 def _update_file(self, filename: str, mtime: Optional[float] = None):
33 assert file_utils.does_file_exist(filename)
35 mtime = file_utils.get_file_raw_mtime(filename)
36 if self.mtime_by_filename.get(filename, 0) != mtime:
37 md5 = file_utils.get_file_md5(filename)
38 self.mtime_by_filename[filename] = mtime
39 self.md5_by_filename[filename] = md5
41 def apply(self, item: Any, filename: str) -> bool:
42 self._update_file(filename)
43 file_md5 = self.md5_by_filename.get(filename, 0)
44 mem_hash = hashlib.md5()
46 md5 = mem_hash.hexdigest()
47 return md5 != file_md5
50 class DirectoryAllFilesFilter(DirectoryFileFilter):
51 """A predicate that will return False if a file to-be-written to a
52 particular directory is identical to any other file in that same
56 def __init__(self, directory: str):
58 super().__init__(directory)
61 def _update_file(self, filename: str, mtime: Optional[float] = None):
62 assert file_utils.does_file_exist(filename)
64 mtime = file_utils.get_file_raw_mtime(filename)
65 if self.mtime_by_filename.get(filename, 0) != mtime:
66 md5 = file_utils.get_file_md5(filename)
67 self.mtime_by_filename[filename] = mtime
68 self.md5_by_filename[filename] = md5
69 self.all_md5s.add(md5)
71 def apply(self, item: Any) -> bool:
73 mem_hash = hashlib.md5()
75 md5 = mem_hash.hexdigest()
76 return md5 not in self.all_md5s