5 from typing import Any, Optional
11 class DirectoryFileFilter(predicate.Predicate):
12 """A predicate that will return False if when a proposed file's
13 content to-be-written is identical to the contents of the file;
17 def __init__(self, directory: str):
19 if not file_utils.does_directory_exist(directory):
20 raise ValueError(directory)
21 self.directory = directory
22 self.md5_by_filename = {}
23 self.mtime_by_filename = {}
27 for direntry in os.scandir(self.directory):
28 if direntry.is_file(follow_symlinks=True):
29 mtime = direntry.stat(follow_symlinks=True).st_mtime
30 path = f'{self.directory}/{direntry.name}'
31 self._update_file(path, mtime)
33 def _update_file(self, filename: str, mtime: Optional[float] = None):
34 assert file_utils.does_file_exist(filename)
36 mtime = file_utils.get_file_raw_mtime(filename)
37 if self.mtime_by_filename.get(filename, 0) != mtime:
38 md5 = file_utils.get_file_md5(filename)
39 self.mtime_by_filename[filename] = mtime
40 self.md5_by_filename[filename] = md5
42 def apply(self, item: Any, filename: str) -> bool:
43 self._update_file(filename)
44 file_md5 = self.md5_by_filename.get(filename, 0)
45 mem_hash = hashlib.md5()
47 md5 = mem_hash.hexdigest()
48 return md5 != file_md5
51 class DirectoryAllFilesFilter(DirectoryFileFilter):
52 """A predicate that will return False if a file to-be-written to a
53 particular directory is identical to any other file in that same
57 def __init__(self, directory: str):
59 super().__init__(directory)
62 def _update_file(self, filename: str, mtime: Optional[float] = None):
63 assert file_utils.does_file_exist(filename)
65 mtime = file_utils.get_file_raw_mtime(filename)
66 if self.mtime_by_filename.get(filename, 0) != mtime:
67 md5 = file_utils.get_file_md5(filename)
68 self.mtime_by_filename[filename] = mtime
69 self.md5_by_filename[filename] = md5
70 self.all_md5s.add(md5)
72 def apply(self, item: Any) -> bool:
74 mem_hash = hashlib.md5()
76 md5 = mem_hash.hexdigest()
77 return md5 not in self.all_md5s