3 # © Copyright 2021-2022, Scott Gasch
5 """Utilities for working with files."""
17 from os.path import exists, isfile, join
18 from typing import Callable, List, Literal, Optional, TextIO
19 from uuid import uuid4
21 logger = logging.getLogger(__name__)
24 def remove_newlines(x: str) -> str:
25 """Trivial function to be used as a line_transformer in
26 :meth:`slurp_file` for no newlines in file contents"""
27 return x.replace('\n', '')
30 def strip_whitespace(x: str) -> str:
31 """Trivial function to be used as a line_transformer in
32 :meth:`slurp_file` for no leading / trailing whitespace in
37 def remove_hash_comments(x: str) -> str:
38 """Trivial function to be used as a line_transformer in
39 :meth:`slurp_file` for no # comments in file contents"""
40 return re.sub(r'#.*$', '', x)
46 skip_blank_lines=False,
47 line_transformers: Optional[List[Callable[[str], str]]] = None,
49 """Reads in a file's contents line-by-line to a memory buffer applying
50 each line transformation in turn.
53 filename: file to be read
54 skip_blank_lines: should reading skip blank lines?
55 line_transformers: little string->string transformations
60 if line_transformers is not None:
61 for x in line_transformers:
63 if not file_is_readable(filename):
64 raise Exception(f'{filename} can\'t be read.')
65 with open(filename) as rf:
67 for transformation in xforms:
68 line = transformation(line)
69 if skip_blank_lines and line == '':
75 def remove(path: str) -> None:
76 """Deletes a file. Raises if path refers to a directory or a file
80 path: the path of the file to delete
83 >>> filename = '/tmp/file_utils_test_file'
84 >>> os.system(f'touch {filename}')
86 >>> does_file_exist(filename)
89 >>> does_file_exist(filename)
95 def delete(path: str) -> None:
96 """This is a convenience for my dumb ass who can't remember os.remove
102 def without_extension(path: str) -> str:
103 """Remove one (the last) extension from a file or path.
106 path: the path from which to remove an extension
109 the path with one extension removed.
111 >>> without_extension('foobar.txt')
114 >>> without_extension('/home/scott/frapp.py')
117 >>> f = 'a.b.c.tar.gz'
119 ... f = without_extension(f)
126 >>> without_extension('foobar')
130 return os.path.splitext(path)[0]
133 def without_all_extensions(path: str) -> str:
134 """Removes all extensions from a path; handles multiple extensions
135 like foobar.tar.gz -> foobar.
138 path: the path from which to remove all extensions
141 the path with all extensions removed.
143 >>> without_all_extensions('/home/scott/foobar.1.tar.gz')
148 path = without_extension(path)
152 def get_extension(path: str) -> str:
153 """Extract and return one (the last) extension from a file or path.
156 path: the path from which to extract an extension
159 The last extension from the file path.
161 >>> get_extension('this_is_a_test.txt')
164 >>> get_extension('/home/scott/test.py')
167 >>> get_extension('foobar')
171 return os.path.splitext(path)[1]
174 def get_all_extensions(path: str) -> List[str]:
175 """Return the extensions of a file or path in order.
178 path: the path from which to extract all extensions.
181 a list containing each extension which may be empty.
183 >>> get_all_extensions('/home/scott/foo.tar.gz.1')
184 ['.tar', '.gz', '.1']
186 >>> get_all_extensions('/home/scott/foobar')
192 ext = get_extension(path)
193 path = without_extension(path)
201 def without_path(filespec: str) -> str:
202 """Returns the base filename without any leading path.
205 filespec: path to remove leading directories from
208 filespec without leading dir components.
210 >>> without_path('/home/scott/foo.py')
213 >>> without_path('foo.py')
217 return os.path.split(filespec)[1]
220 def get_path(filespec: str) -> str:
221 """Returns just the path of the filespec by removing the filename and
225 filespec: path to remove filename / extension(s) from
228 filespec with just the leading directory components and no
229 filename or extension(s)
231 >>> get_path('/home/scott/foobar.py')
234 >>> get_path('/home/scott/test.1.2.3.gz')
237 >>> get_path('~scott/frapp.txt')
241 return os.path.split(filespec)[0]
244 def get_canonical_path(filespec: str) -> str:
245 """Returns a canonicalized absolute path.
248 filespec: the path to canonicalize
251 the canonicalized path
253 >>> get_canonical_path('/home/scott/../../home/lynn/../scott/foo.txt')
254 '/usr/home/scott/foo.txt'
257 return os.path.realpath(filespec)
260 def create_path_if_not_exist(path, on_error=None) -> None:
262 Attempts to create path if it does not exist already.
266 Files are created with mode 0x0777 (i.e. world read/writeable).
269 path: the path to attempt to create
270 on_error: If True, it's invoked on error conditions. Otherwise
271 any exceptions are raised.
275 >>> path = os.path.join("/tmp", str(uuid.uuid4()), str(uuid.uuid4()))
276 >>> os.path.exists(path)
278 >>> create_path_if_not_exist(path)
279 >>> os.path.exists(path)
282 logger.debug("Creating path %s", path)
283 previous_umask = os.umask(0)
286 os.chmod(path, 0o777)
287 except OSError as ex:
288 if ex.errno != errno.EEXIST and not os.path.isdir(path):
289 if on_error is not None:
294 os.umask(previous_umask)
297 def does_file_exist(filename: str) -> bool:
298 """Returns True if a file exists and is a normal file.
301 filename: filename to check
304 True if filename exists and is a normal file.
306 >>> does_file_exist(__file__)
308 >>> does_file_exist('/tmp/2492043r9203r9230r9230r49230r42390r4230')
311 return os.path.exists(filename) and os.path.isfile(filename)
314 def file_is_readable(filename: str) -> bool:
315 """True if file exists, is a normal file and is readable by the
316 current process. False otherwise.
319 filename: the filename to check for read access
321 return does_file_exist(filename) and os.access(filename, os.R_OK)
324 def file_is_writable(filename: str) -> bool:
325 """True if file exists, is a normal file and is writable by the
326 current process. False otherwise.
329 filename: the file to check for write access.
331 return does_file_exist(filename) and os.access(filename, os.W_OK)
334 def file_is_executable(filename: str) -> bool:
335 """True if file exists, is a normal file and is executable by the
336 current process. False otherwise.
339 filename: the file to check for execute access.
341 return does_file_exist(filename) and os.access(filename, os.X_OK)
344 def does_directory_exist(dirname: str) -> bool:
345 """Returns True if a file exists and is a directory.
347 >>> does_directory_exist('/tmp')
349 >>> does_directory_exist('/xyzq/21341')
352 return os.path.exists(dirname) and os.path.isdir(dirname)
355 def does_path_exist(pathname: str) -> bool:
356 """Just a more verbose wrapper around os.path.exists."""
357 return os.path.exists(pathname)
360 def get_file_size(filename: str) -> int:
361 """Returns the size of a file in bytes.
364 filename: the filename to size
367 size of filename in bytes
369 return os.path.getsize(filename)
372 def is_normal_file(filename: str) -> bool:
373 """Returns True if filename is a normal file.
375 >>> is_normal_file(__file__)
378 return os.path.isfile(filename)
381 def is_directory(filename: str) -> bool:
382 """Returns True if filename is a directory.
384 >>> is_directory('/tmp')
387 return os.path.isdir(filename)
390 def is_symlink(filename: str) -> bool:
391 """True if filename is a symlink, False otherwise.
393 >>> is_symlink('/tmp')
396 >>> is_symlink('/home')
400 return os.path.islink(filename)
403 def is_same_file(file1: str, file2: str) -> bool:
404 """Returns True if the two files are the same inode.
406 >>> is_same_file('/tmp', '/tmp/../tmp')
409 >>> is_same_file('/tmp', '/home')
413 return os.path.samefile(file1, file2)
416 def get_file_raw_timestamps(filename: str) -> Optional[os.stat_result]:
417 """Stats the file and returns an os.stat_result or None on error.
420 filename: the file whose timestamps to fetch
423 the os.stat_result or None to indicate an error occurred
426 return os.stat(filename)
427 except Exception as e:
432 def get_file_raw_timestamp(
433 filename: str, extractor: Callable[[os.stat_result], Optional[float]]
434 ) -> Optional[float]:
435 """Stat a file and, if successful, use extractor to fetch some
436 subset of the information in the os.stat_result. See also
437 :meth:`get_file_raw_atime`, :meth:`get_file_raw_mtime`, and
438 :meth:`get_file_raw_ctime` which just call this with a lambda
442 filename: the filename to stat
443 extractor: Callable that takes a os.stat_result and produces
444 something useful(?) with it.
447 whatever the extractor produced or None on error.
449 tss = get_file_raw_timestamps(filename)
451 return extractor(tss)
455 def get_file_raw_atime(filename: str) -> Optional[float]:
456 """Get a file's raw access time or None on error.
458 See also :meth:`get_file_atime_as_datetime`,
459 :meth:`get_file_atime_timedelta`,
460 and :meth:`get_file_atime_age_seconds`.
462 return get_file_raw_timestamp(filename, lambda x: x.st_atime)
465 def get_file_raw_mtime(filename: str) -> Optional[float]:
466 """Get a file's raw modification time or None on error.
468 See also :meth:`get_file_mtime_as_datetime`,
469 :meth:`get_file_mtime_timedelta`,
470 and :meth:`get_file_mtime_age_seconds`.
472 return get_file_raw_timestamp(filename, lambda x: x.st_mtime)
475 def get_file_raw_ctime(filename: str) -> Optional[float]:
476 """Get a file's raw creation time or None on error.
478 See also :meth:`get_file_ctime_as_datetime`,
479 :meth:`get_file_ctime_timedelta`,
480 and :meth:`get_file_ctime_age_seconds`.
482 return get_file_raw_timestamp(filename, lambda x: x.st_ctime)
485 def get_file_md5(filename: str) -> str:
486 """Hashes filename's disk contents and returns the MD5 digest.
489 filename: the file whose contents to hash
492 the MD5 digest of the file's contents. Raises on errors.
494 file_hash = hashlib.md5()
495 with open(filename, "rb") as f:
498 file_hash.update(chunk)
500 return file_hash.hexdigest()
503 def set_file_raw_atime(filename: str, atime: float):
504 """Sets a file's raw access time.
506 See also :meth:`get_file_atime_as_datetime`,
507 :meth:`get_file_atime_timedelta`,
508 :meth:`get_file_atime_age_seconds`,
509 and :meth:`get_file_raw_atime`.
511 mtime = get_file_raw_mtime(filename)
512 assert mtime is not None
513 os.utime(filename, (atime, mtime))
516 def set_file_raw_mtime(filename: str, mtime: float):
517 """Sets a file's raw modification time.
519 See also :meth:`get_file_mtime_as_datetime`,
520 :meth:`get_file_mtime_timedelta`,
521 :meth:`get_file_mtime_age_seconds`,
522 and :meth:`get_file_raw_mtime`.
524 atime = get_file_raw_atime(filename)
525 assert atime is not None
526 os.utime(filename, (atime, mtime))
529 def set_file_raw_atime_and_mtime(filename: str, ts: float = None):
530 """Sets both a file's raw modification and access times
533 filename: the file whose times to set
534 ts: the raw time to set or None to indicate time should be
535 set to the current time.
538 os.utime(filename, (ts, ts))
540 os.utime(filename, None)
543 def convert_file_timestamp_to_datetime(filename: str, producer) -> Optional[datetime.datetime]:
544 """Convert a raw file timestamp into a python datetime."""
545 ts = producer(filename)
547 return datetime.datetime.fromtimestamp(ts)
551 def get_file_atime_as_datetime(filename: str) -> Optional[datetime.datetime]:
552 """Fetch a file's access time as a python datetime.
554 See also :meth:`get_file_atime_as_datetime`,
555 :meth:`get_file_atime_timedelta`,
556 :meth:`get_file_atime_age_seconds`,
557 :meth:`describe_file_atime`,
558 and :meth:`get_file_raw_atime`.
560 return convert_file_timestamp_to_datetime(filename, get_file_raw_atime)
563 def get_file_mtime_as_datetime(filename: str) -> Optional[datetime.datetime]:
564 """Fetches a file's modification time as a python datetime.
566 See also :meth:`get_file_mtime_as_datetime`,
567 :meth:`get_file_mtime_timedelta`,
568 :meth:`get_file_mtime_age_seconds`,
569 and :meth:`get_file_raw_mtime`.
571 return convert_file_timestamp_to_datetime(filename, get_file_raw_mtime)
574 def get_file_ctime_as_datetime(filename: str) -> Optional[datetime.datetime]:
575 """Fetches a file's creation time as a python datetime.
577 See also :meth:`get_file_ctime_as_datetime`,
578 :meth:`get_file_ctime_timedelta`,
579 :meth:`get_file_ctime_age_seconds`,
580 and :meth:`get_file_raw_ctime`.
582 return convert_file_timestamp_to_datetime(filename, get_file_raw_ctime)
585 def get_file_timestamp_age_seconds(filename: str, extractor) -> Optional[int]:
586 """~Internal helper"""
588 ts = get_file_raw_timestamps(filename)
591 result = extractor(ts)
595 def get_file_atime_age_seconds(filename: str) -> Optional[int]:
596 """Gets a file's access time as an age in seconds (ago).
598 See also :meth:`get_file_atime_as_datetime`,
599 :meth:`get_file_atime_timedelta`,
600 :meth:`get_file_atime_age_seconds`,
601 :meth:`describe_file_atime`,
602 and :meth:`get_file_raw_atime`.
604 return get_file_timestamp_age_seconds(filename, lambda x: x.st_atime)
607 def get_file_ctime_age_seconds(filename: str) -> Optional[int]:
608 """Gets a file's creation time as an age in seconds (ago).
610 See also :meth:`get_file_ctime_as_datetime`,
611 :meth:`get_file_ctime_timedelta`,
612 :meth:`get_file_ctime_age_seconds`,
613 and :meth:`get_file_raw_ctime`.
615 return get_file_timestamp_age_seconds(filename, lambda x: x.st_ctime)
618 def get_file_mtime_age_seconds(filename: str) -> Optional[int]:
619 """Gets a file's modification time as seconds (ago).
621 See also :meth:`get_file_mtime_as_datetime`,
622 :meth:`get_file_mtime_timedelta`,
623 :meth:`get_file_mtime_age_seconds`,
624 and :meth:`get_file_raw_mtime`.
626 return get_file_timestamp_age_seconds(filename, lambda x: x.st_mtime)
629 def get_file_timestamp_timedelta(filename: str, extractor) -> Optional[datetime.timedelta]:
630 """~Internal helper"""
631 age = get_file_timestamp_age_seconds(filename, extractor)
633 return datetime.timedelta(seconds=float(age))
637 def get_file_atime_timedelta(filename: str) -> Optional[datetime.timedelta]:
638 """How long ago was a file accessed as a timedelta?
640 See also :meth:`get_file_atime_as_datetime`,
641 :meth:`get_file_atime_timedelta`,
642 :meth:`get_file_atime_age_seconds`,
643 :meth:`describe_file_atime`,
644 and :meth:`get_file_raw_atime`.
646 return get_file_timestamp_timedelta(filename, lambda x: x.st_atime)
649 def get_file_ctime_timedelta(filename: str) -> Optional[datetime.timedelta]:
650 """How long ago was a file created as a timedelta?
652 See also :meth:`get_file_ctime_as_datetime`,
653 :meth:`get_file_ctime_timedelta`,
654 :meth:`get_file_ctime_age_seconds`,
655 and :meth:`get_file_raw_ctime`.
657 return get_file_timestamp_timedelta(filename, lambda x: x.st_ctime)
660 def get_file_mtime_timedelta(filename: str) -> Optional[datetime.timedelta]:
662 Gets a file's modification time as a python timedelta.
664 See also :meth:`get_file_mtime_as_datetime`,
665 :meth:`get_file_mtime_timedelta`,
666 :meth:`get_file_mtime_age_seconds`,
667 and :meth:`get_file_raw_mtime`.
669 return get_file_timestamp_timedelta(filename, lambda x: x.st_mtime)
672 def describe_file_timestamp(filename: str, extractor, *, brief=False) -> Optional[str]:
673 """~Internal helper"""
674 from datetime_utils import describe_duration, describe_duration_briefly
676 age = get_file_timestamp_age_seconds(filename, extractor)
680 return describe_duration_briefly(age)
682 return describe_duration(age)
685 def describe_file_atime(filename: str, *, brief=False) -> Optional[str]:
687 Describe how long ago a file was accessed.
689 See also :meth:`get_file_atime_as_datetime`,
690 :meth:`get_file_atime_timedelta`,
691 :meth:`get_file_atime_age_seconds`,
692 :meth:`describe_file_atime`,
693 and :meth:`get_file_raw_atime`.
695 return describe_file_timestamp(filename, lambda x: x.st_atime, brief=brief)
698 def describe_file_ctime(filename: str, *, brief=False) -> Optional[str]:
699 """Describes a file's creation time.
701 See also :meth:`get_file_ctime_as_datetime`,
702 :meth:`get_file_ctime_timedelta`,
703 :meth:`get_file_ctime_age_seconds`,
704 and :meth:`get_file_raw_ctime`.
706 return describe_file_timestamp(filename, lambda x: x.st_ctime, brief=brief)
709 def describe_file_mtime(filename: str, *, brief=False) -> Optional[str]:
711 Describes how long ago a file was modified.
713 See also :meth:`get_file_mtime_as_datetime`,
714 :meth:`get_file_mtime_timedelta`,
715 :meth:`get_file_mtime_age_seconds`,
716 and :meth:`get_file_raw_mtime`.
718 return describe_file_timestamp(filename, lambda x: x.st_mtime, brief=brief)
721 def touch_file(filename: str, *, mode: Optional[int] = 0o666):
722 """Like unix "touch" command's semantics: update the timestamp
723 of a file to the current time if the file exists. Create the
724 file if it doesn't exist.
727 filename: the filename
728 mode: the mode to create the file with
730 pathlib.Path(filename, mode=mode).touch()
733 def expand_globs(in_filename: str):
734 """Expands shell globs (* and ? wildcards) to the matching files."""
735 for filename in glob.glob(in_filename):
739 def get_files(directory: str):
740 """Returns the files in a directory as a generator."""
741 for filename in os.listdir(directory):
742 full_path = join(directory, filename)
743 if isfile(full_path) and exists(full_path):
747 def get_directories(directory: str):
748 """Returns the subdirectories in a directory as a generator."""
749 for d in os.listdir(directory):
750 full_path = join(directory, d)
751 if not isfile(full_path) and exists(full_path):
755 def get_files_recursive(directory: str):
756 """Find the files and directories under a root recursively."""
757 for filename in get_files(directory):
759 for subdir in get_directories(directory):
760 for file_or_directory in get_files_recursive(subdir):
761 yield file_or_directory
764 class FileWriter(contextlib.AbstractContextManager):
765 """A helper that writes a file to a temporary location and then moves
766 it atomically to its ultimate destination on close.
769 def __init__(self, filename: str) -> None:
770 self.filename = filename
772 self.tempfile = f'{filename}-{uuid}.tmp'
773 self.handle: Optional[TextIO] = None
775 def __enter__(self) -> TextIO:
776 assert not does_path_exist(self.tempfile)
777 self.handle = open(self.tempfile, mode="w")
780 def __exit__(self, exc_type, exc_val, exc_tb) -> Literal[False]:
781 if self.handle is not None:
783 cmd = f'/bin/mv -f {self.tempfile} {self.filename}'
786 raise Exception(f'{cmd} failed, exit value {ret>>8}!')
790 if __name__ == '__main__':