5 from typing import Any, Optional
8 class DirectoryFileFilter(object):
9 """A predicate that will return False if / when a proposed file's
10 content to-be-written is identical to the contents of the file;
13 def __init__(self, directory: str):
16 if not file_utils.does_directory_exist(directory):
17 raise ValueError(directory)
18 self.directory = directory
19 self.md5_by_filename = {}
20 self.mtime_by_filename = {}
24 for direntry in os.scandir(self.directory):
25 if direntry.is_file(follow_symlinks=True):
26 mtime = direntry.stat(follow_symlinks=True).st_mtime
27 path = f'{self.directory}/{direntry.name}'
28 self._update_file(path, mtime)
30 def _update_file(self, filename: str, mtime: Optional[float] = None):
32 assert file_utils.does_file_exist(filename)
34 mtime = file_utils.get_file_raw_mtime(filename)
35 if self.mtime_by_filename.get(filename, 0) != mtime:
36 md5 = file_utils.get_file_md5(filename)
37 self.mtime_by_filename[filename] = mtime
38 self.md5_by_filename[filename] = md5
40 def apply(self, item: Any, filename: str) -> bool:
41 self._update_file(filename)
42 file_md5 = self.md5_by_filename.get(filename, 0)
43 mem_hash = hashlib.md5()
45 md5 = mem_hash.hexdigest()
46 return md5 != file_md5
49 class DirectoryAllFilesFilter(DirectoryFileFilter):
50 """A predicate that will return False if a file to-be-written to a
51 particular directory is identical to any other file in that same
55 def __init__(self, directory: str):
57 super().__init__(directory)
60 def _update_file(self, filename: str, mtime: Optional[float] = None):
62 assert file_utils.does_file_exist(filename)
64 mtime = file_utils.get_file_raw_mtime(filename)
65 if self.mtime_by_filename.get(filename, 0) != mtime:
66 md5 = file_utils.get_file_md5(filename)
67 self.mtime_by_filename[filename] = mtime
68 self.md5_by_filename[filename] = md5
69 self.all_md5s.add(md5)
71 def apply(self, item: Any) -> bool:
73 mem_hash = hashlib.md5()
75 md5 = mem_hash.hexdigest()
76 return md5 not in self.all_md5s