3 """Utilities for working with files."""
5 from dataclasses import dataclass
14 from typing import Optional
16 from os.path import isfile, join, exists
17 from typing import List
18 from uuid import uuid4
21 logger = logging.getLogger(__name__)
24 def remove(path: str) -> None:
25 """Deletes a file. Raises if path refers to a directory or a file
29 >>> filename = '/tmp/file_utils_test_file'
30 >>> os.system(f'touch {filename}')
32 >>> does_file_exist(filename)
35 >>> does_file_exist(filename)
42 def delete(path: str) -> None:
46 def without_extension(path: str) -> str:
47 """Remove one extension from a file or path.
49 >>> without_extension('foobar.txt')
52 >>> without_extension('/home/scott/frapp.py')
55 >>> without_extension('a.b.c.tar.gz')
58 >>> without_extension('foobar')
62 return os.path.splitext(path)[0]
65 def without_all_extensions(path: str) -> str:
66 """Removes all extensions from a path; handles multiple extensions
67 like foobar.tar.gz -> foobar.
69 >>> without_all_extensions('/home/scott/foobar.1.tar.gz')
74 path = without_extension(path)
78 def get_extension(path: str) -> str:
79 """Extract and return one extension from a file or path.
81 >>> get_extension('this_is_a_test.txt')
84 >>> get_extension('/home/scott/test.py')
87 >>> get_extension('foobar')
91 return os.path.splitext(path)[1]
94 def get_all_extensions(path: str) -> List[str]:
95 """Return the extensions of a file or path in order.
97 >>> get_all_extensions('/home/scott/foo.tar.gz.1')
103 ext = get_extension(path)
104 path = without_extension(path)
112 def without_path(filespec: str) -> str:
113 """Returns the base filename without any leading path.
115 >>> without_path('/home/scott/foo.py')
118 >>> without_path('foo.py')
122 return os.path.split(filespec)[1]
125 def get_path(filespec: str) -> str:
126 """Returns just the path of the filespec by removing the filename and
129 >>> get_path('/home/scott/foobar.py')
132 >>> get_path('~scott/frapp.txt')
136 return os.path.split(filespec)[0]
139 def get_canonical_path(filespec: str) -> str:
140 """Returns a canonicalized absolute path.
142 >>> get_canonical_path('/home/scott/../../home/lynn/../scott/foo.txt')
143 '/usr/home/scott/foo.txt'
146 return os.path.realpath(filespec)
149 def create_path_if_not_exist(path, on_error=None):
151 Attempts to create path if it does not exist. If on_error is
152 specified, it is called with an exception if one occurs, otherwise
153 exception is rethrown.
157 >>> path = os.path.join("/tmp", str(uuid.uuid4()), str(uuid.uuid4()))
158 >>> os.path.exists(path)
160 >>> create_path_if_not_exist(path)
161 >>> os.path.exists(path)
164 logger.debug(f"Creating path {path}")
165 previous_umask = os.umask(0)
168 os.chmod(path, 0o777)
169 except OSError as ex:
170 if ex.errno != errno.EEXIST and not os.path.isdir(path):
171 if on_error is not None:
176 os.umask(previous_umask)
179 def does_file_exist(filename: str) -> bool:
180 """Returns True if a file exists and is a normal file.
182 >>> does_file_exist(__file__)
185 return os.path.exists(filename) and os.path.isfile(filename)
188 def does_directory_exist(dirname: str) -> bool:
189 """Returns True if a file exists and is a directory.
191 >>> does_directory_exist('/tmp')
194 return os.path.exists(dirname) and os.path.isdir(dirname)
197 def does_path_exist(pathname: str) -> bool:
198 """Just a more verbose wrapper around os.path.exists."""
199 return os.path.exists(pathname)
202 def get_file_size(filename: str) -> int:
203 """Returns the size of a file in bytes."""
204 return os.path.getsize(filename)
207 def is_normal_file(filename: str) -> bool:
208 """Returns True if filename is a normal file.
210 >>> is_normal_file(__file__)
213 return os.path.isfile(filename)
216 def is_directory(filename: str) -> bool:
217 """Returns True if filename is a directory.
219 >>> is_directory('/tmp')
222 return os.path.isdir(filename)
225 def is_symlink(filename: str) -> bool:
226 """True if filename is a symlink, False otherwise.
228 >>> is_symlink('/tmp')
231 >>> is_symlink('/home')
235 return os.path.islink(filename)
238 def is_same_file(file1: str, file2: str) -> bool:
239 """Returns True if the two files are the same inode.
241 >>> is_same_file('/tmp', '/tmp/../tmp')
244 >>> is_same_file('/tmp', '/home')
248 return os.path.samefile(file1, file2)
251 def get_file_raw_timestamps(filename: str) -> Optional[os.stat_result]:
252 """Stats the file and returns an os.stat_result or None on error."""
254 return os.stat(filename)
255 except Exception as e:
260 def get_file_raw_timestamp(filename: str, extractor) -> Optional[float]:
261 tss = get_file_raw_timestamps(filename)
263 return extractor(tss)
267 def get_file_raw_atime(filename: str) -> Optional[float]:
268 return get_file_raw_timestamp(filename, lambda x: x.st_atime)
271 def get_file_raw_mtime(filename: str) -> Optional[float]:
272 return get_file_raw_timestamp(filename, lambda x: x.st_mtime)
275 def get_file_raw_ctime(filename: str) -> Optional[float]:
276 return get_file_raw_timestamp(filename, lambda x: x.st_ctime)
279 def get_file_md5(filename: str) -> str:
280 """Hashes filename's contents and returns an MD5."""
281 file_hash = hashlib.md5()
282 with open(filename, "rb") as f:
285 file_hash.update(chunk)
287 return file_hash.hexdigest()
290 def set_file_raw_atime(filename: str, atime: float):
291 mtime = get_file_raw_mtime(filename)
292 os.utime(filename, (atime, mtime))
295 def set_file_raw_mtime(filename: str, mtime: float):
296 atime = get_file_raw_atime(filename)
297 os.utime(filename, (atime, mtime))
300 def set_file_raw_atime_and_mtime(filename: str, ts: float = None):
302 os.utime(filename, (ts, ts))
304 os.utime(filename, None)
307 def convert_file_timestamp_to_datetime(
308 filename: str, producer
309 ) -> Optional[datetime.datetime]:
310 ts = producer(filename)
312 return datetime.datetime.fromtimestamp(ts)
316 def get_file_atime_as_datetime(filename: str) -> Optional[datetime.datetime]:
317 return convert_file_timestamp_to_datetime(filename, get_file_raw_atime)
320 def get_file_mtime_as_datetime(filename: str) -> Optional[datetime.datetime]:
321 return convert_file_timestamp_to_datetime(filename, get_file_raw_mtime)
324 def get_file_ctime_as_datetime(filename: str) -> Optional[datetime.datetime]:
325 return convert_file_timestamp_to_datetime(filename, get_file_raw_ctime)
328 def get_file_timestamp_age_seconds(filename: str, extractor) -> Optional[int]:
330 ts = get_file_raw_timestamps(filename)
333 result = extractor(ts)
337 def get_file_atime_age_seconds(filename: str) -> Optional[int]:
338 return get_file_timestamp_age_seconds(filename, lambda x: x.st_atime)
341 def get_file_ctime_age_seconds(filename: str) -> Optional[int]:
342 return get_file_timestamp_age_seconds(filename, lambda x: x.st_ctime)
345 def get_file_mtime_age_seconds(filename: str) -> Optional[int]:
346 return get_file_timestamp_age_seconds(filename, lambda x: x.st_mtime)
349 def get_file_timestamp_timedelta(
350 filename: str, extractor
351 ) -> Optional[datetime.timedelta]:
352 age = get_file_timestamp_age_seconds(filename, extractor)
354 return datetime.timedelta(seconds=float(age))
358 def get_file_atime_timedelta(filename: str) -> Optional[datetime.timedelta]:
359 return get_file_timestamp_timedelta(filename, lambda x: x.st_atime)
362 def get_file_ctime_timedelta(filename: str) -> Optional[datetime.timedelta]:
363 return get_file_timestamp_timedelta(filename, lambda x: x.st_ctime)
366 def get_file_mtime_timedelta(filename: str) -> Optional[datetime.timedelta]:
367 return get_file_timestamp_timedelta(filename, lambda x: x.st_mtime)
370 def describe_file_timestamp(filename: str, extractor, *, brief=False) -> Optional[str]:
371 from datetime_utils import describe_duration, describe_duration_briefly
373 age = get_file_timestamp_age_seconds(filename, extractor)
377 return describe_duration_briefly(age)
379 return describe_duration(age)
382 def describe_file_atime(filename: str, *, brief=False) -> Optional[str]:
383 return describe_file_timestamp(filename, lambda x: x.st_atime, brief=brief)
386 def describe_file_ctime(filename: str, *, brief=False) -> Optional[str]:
387 return describe_file_timestamp(filename, lambda x: x.st_ctime, brief=brief)
390 def describe_file_mtime(filename: str, *, brief=False) -> Optional[str]:
391 return describe_file_timestamp(filename, lambda x: x.st_mtime, brief=brief)
394 def touch_file(filename: str, *, mode: Optional[int] = 0o666) -> bool:
395 return pathlib.Path(filename, mode=mode).touch()
398 def expand_globs(in_filename: str):
399 for filename in glob.glob(in_filename):
403 def get_files(directory: str):
404 for filename in os.listdir(directory):
405 full_path = join(directory, filename)
406 if isfile(full_path) and exists(full_path):
410 def get_directories(directory: str):
411 for d in os.listdir(directory):
412 full_path = join(directory, d)
413 if not isfile(full_path) and exists(full_path):
417 def get_files_recursive(directory: str):
418 for filename in get_files(directory):
420 for subdir in get_directories(directory):
421 for file_or_directory in get_files_recursive(subdir):
422 yield file_or_directory
425 class FileWriter(object):
426 def __init__(self, filename: str) -> None:
427 self.filename = filename
429 self.tempfile = f'{filename}-{uuid}.tmp'
432 def __enter__(self) -> io.TextIOWrapper:
433 assert not does_path_exist(self.tempfile)
434 self.handle = open(self.tempfile, mode="w")
437 def __exit__(self, exc_type, exc_val, exc_tb) -> bool:
438 if self.handle is not None:
440 cmd = f'/bin/mv -f {self.tempfile} {self.filename}'
443 raise Exception(f'{cmd} failed, exit value {ret>>8}')
447 if __name__ == '__main__':