X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=file_utils.py;h=7a64f9f3eef7f8073736863bc87d408db8f49695;hb=a9bdfd8fc9f84b7b2c09a57cd12ba32259e84d1c;hp=98e8c2670fd173fb11be51327fb6925a2b47f1eb;hpb=2f5b47c8b30d1b7d86443391332be2f3805cdafd;p=python_utils.git diff --git a/file_utils.py b/file_utils.py index 98e8c26..7a64f9f 100644 --- a/file_utils.py +++ b/file_utils.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +# © Copyright 2021-2022, Scott Gasch + """Utilities for working with files.""" import contextlib @@ -20,14 +22,21 @@ logger = logging.getLogger(__name__) def remove_newlines(x: str) -> str: + """Trivial function to be used as a line_transformer in + :meth:`slurp_file` for no newlines in file contents""" return x.replace('\n', '') def strip_whitespace(x: str) -> str: + """Trivial function to be used as a line_transformer in + :meth:`slurp_file` for no leading / trailing whitespace in + file contents""" return x.strip() def remove_hash_comments(x: str) -> str: + """Trivial function to be used as a line_transformer in + :meth:`slurp_file` for no # comments in file contents""" return re.sub(r'#.*$', '', x) @@ -37,14 +46,26 @@ def slurp_file( skip_blank_lines=False, line_transformers: Optional[List[Callable[[str], str]]] = None, ): + """Reads in a file's contents line-by-line to a memory buffer applying + each line transformation in turn. + + Args: + filename: file to be read + skip_blank_lines: should reading skip blank lines? + line_transformers: little string->string transformations + """ + ret = [] + xforms = [] + if line_transformers is not None: + for x in line_transformers: + xforms.append(x) if not file_is_readable(filename): raise Exception(f'{filename} can\'t be read.') with open(filename) as rf: for line in rf: - if line_transformers is not None: - for transformation in line_transformers: - line = transformation(line) + for transformation in xforms: + line = transformation(line) if skip_blank_lines and line == '': continue ret.append(line) @@ -55,6 +76,9 @@ def remove(path: str) -> None: """Deletes a file. Raises if path refers to a directory or a file that doesn't exist. + Args: + path: the path of the file to delete + >>> import os >>> filename = '/tmp/file_utils_test_file' >>> os.system(f'touch {filename}') @@ -64,17 +88,25 @@ def remove(path: str) -> None: >>> remove(filename) >>> does_file_exist(filename) False - """ os.remove(path) def delete(path: str) -> None: + """This is a convenience for my dumb ass who can't remember os.remove + sometimes. + """ os.remove(path) def without_extension(path: str) -> str: - """Remove one extension from a file or path. + """Remove one (the last) extension from a file or path. + + Args: + path: the path from which to remove an extension + + Returns: + the path with one extension removed. >>> without_extension('foobar.txt') 'foobar' @@ -82,8 +114,14 @@ def without_extension(path: str) -> str: >>> without_extension('/home/scott/frapp.py') '/home/scott/frapp' - >>> without_extension('a.b.c.tar.gz') - 'a.b.c.tar' + >>> f = 'a.b.c.tar.gz' + >>> while('.' in f): + ... f = without_extension(f) + ... print(f) + a.b.c.tar + a.b.c + a.b + a >>> without_extension('foobar') 'foobar' @@ -96,6 +134,12 @@ def without_all_extensions(path: str) -> str: """Removes all extensions from a path; handles multiple extensions like foobar.tar.gz -> foobar. + Args: + path: the path from which to remove all extensions + + Returns: + the path with all extensions removed. + >>> without_all_extensions('/home/scott/foobar.1.tar.gz') '/home/scott/foobar' @@ -106,7 +150,13 @@ def without_all_extensions(path: str) -> str: def get_extension(path: str) -> str: - """Extract and return one extension from a file or path. + """Extract and return one (the last) extension from a file or path. + + Args: + path: the path from which to extract an extension + + Returns: + The last extension from the file path. >>> get_extension('this_is_a_test.txt') '.txt' @@ -124,9 +174,18 @@ def get_extension(path: str) -> str: def get_all_extensions(path: str) -> List[str]: """Return the extensions of a file or path in order. + Args: + path: the path from which to extract all extensions. + + Returns: + a list containing each extension which may be empty. + >>> get_all_extensions('/home/scott/foo.tar.gz.1') ['.tar', '.gz', '.1'] + >>> get_all_extensions('/home/scott/foobar') + [] + """ ret = [] while True: @@ -142,6 +201,12 @@ def get_all_extensions(path: str) -> List[str]: def without_path(filespec: str) -> str: """Returns the base filename without any leading path. + Args: + filespec: path to remove leading directories from + + Returns: + filespec without leading dir components. + >>> without_path('/home/scott/foo.py') 'foo.py' @@ -156,9 +221,19 @@ def get_path(filespec: str) -> str: """Returns just the path of the filespec by removing the filename and extension. + Args: + filespec: path to remove filename / extension(s) from + + Returns: + filespec with just the leading directory components and no + filename or extension(s) + >>> get_path('/home/scott/foobar.py') '/home/scott' + >>> get_path('/home/scott/test.1.2.3.gz') + '/home/scott' + >>> get_path('~scott/frapp.txt') '~scott' @@ -169,6 +244,12 @@ def get_path(filespec: str) -> str: def get_canonical_path(filespec: str) -> str: """Returns a canonicalized absolute path. + Args: + filespec: the path to canonicalize + + Returns: + the canonicalized path + >>> get_canonical_path('/home/scott/../../home/lynn/../scott/foo.txt') '/usr/home/scott/foo.txt' @@ -176,11 +257,18 @@ def get_canonical_path(filespec: str) -> str: return os.path.realpath(filespec) -def create_path_if_not_exist(path, on_error=None): +def create_path_if_not_exist(path, on_error=None) -> None: """ - Attempts to create path if it does not exist. If on_error is - specified, it is called with an exception if one occurs, otherwise - exception is rethrown. + Attempts to create path if it does not exist already. + + .. warning:: + + Files are created with mode 0x0777 (i.e. world read/writeable). + + Args: + path: the path to attempt to create + on_error: If True, it's invoked on error conditions. Otherwise + any exceptions are raised. >>> import uuid >>> import os @@ -209,21 +297,47 @@ def create_path_if_not_exist(path, on_error=None): def does_file_exist(filename: str) -> bool: """Returns True if a file exists and is a normal file. + Args: + filename: filename to check + + Returns: + True if filename exists and is a normal file. + >>> does_file_exist(__file__) True + >>> does_file_exist('/tmp/2492043r9203r9230r9230r49230r42390r4230') + False """ return os.path.exists(filename) and os.path.isfile(filename) def file_is_readable(filename: str) -> bool: + """True if file exists, is a normal file and is readable by the + current process. False otherwise. + + Args: + filename: the filename to check for read access + """ return does_file_exist(filename) and os.access(filename, os.R_OK) def file_is_writable(filename: str) -> bool: + """True if file exists, is a normal file and is writable by the + current process. False otherwise. + + Args: + filename: the file to check for write access. + """ return does_file_exist(filename) and os.access(filename, os.W_OK) def file_is_executable(filename: str) -> bool: + """True if file exists, is a normal file and is executable by the + current process. False otherwise. + + Args: + filename: the file to check for execute access. + """ return does_file_exist(filename) and os.access(filename, os.X_OK) @@ -232,6 +346,8 @@ def does_directory_exist(dirname: str) -> bool: >>> does_directory_exist('/tmp') True + >>> does_directory_exist('/xyzq/21341') + False """ return os.path.exists(dirname) and os.path.isdir(dirname) @@ -242,7 +358,14 @@ def does_path_exist(pathname: str) -> bool: def get_file_size(filename: str) -> int: - """Returns the size of a file in bytes.""" + """Returns the size of a file in bytes. + + Args: + filename: the filename to size + + Returns: + size of filename in bytes + """ return os.path.getsize(filename) @@ -291,7 +414,14 @@ def is_same_file(file1: str, file2: str) -> bool: def get_file_raw_timestamps(filename: str) -> Optional[os.stat_result]: - """Stats the file and returns an os.stat_result or None on error.""" + """Stats the file and returns an os.stat_result or None on error. + + Args: + filename: the file whose timestamps to fetch + + Returns: + the os.stat_result or None to indicate an error occurred + """ try: return os.stat(filename) except Exception as e: @@ -299,7 +429,23 @@ def get_file_raw_timestamps(filename: str) -> Optional[os.stat_result]: return None -def get_file_raw_timestamp(filename: str, extractor) -> Optional[float]: +def get_file_raw_timestamp( + filename: str, extractor: Callable[[os.stat_result], Optional[float]] +) -> Optional[float]: + """Stat a file and, if successful, use extractor to fetch some + subset of the information in the os.stat_result. See also + :meth:`get_file_raw_atime`, :meth:`get_file_raw_mtime`, and + :meth:`get_file_raw_ctime` which just call this with a lambda + extractor. + + Args: + filename: the filename to stat + extractor: Callable that takes a os.stat_result and produces + something useful(?) with it. + + Returns: + whatever the extractor produced or None on error. + """ tss = get_file_raw_timestamps(filename) if tss is not None: return extractor(tss) @@ -307,19 +453,44 @@ def get_file_raw_timestamp(filename: str, extractor) -> Optional[float]: def get_file_raw_atime(filename: str) -> Optional[float]: + """Get a file's raw access time or None on error. + + See also :meth:`get_file_atime_as_datetime`, + :meth:`get_file_atime_timedelta`, + and :meth:`get_file_atime_age_seconds`. + """ return get_file_raw_timestamp(filename, lambda x: x.st_atime) def get_file_raw_mtime(filename: str) -> Optional[float]: + """Get a file's raw modification time or None on error. + + See also :meth:`get_file_mtime_as_datetime`, + :meth:`get_file_mtime_timedelta`, + and :meth:`get_file_mtime_age_seconds`. + """ return get_file_raw_timestamp(filename, lambda x: x.st_mtime) def get_file_raw_ctime(filename: str) -> Optional[float]: + """Get a file's raw creation time or None on error. + + See also :meth:`get_file_ctime_as_datetime`, + :meth:`get_file_ctime_timedelta`, + and :meth:`get_file_ctime_age_seconds`. + """ return get_file_raw_timestamp(filename, lambda x: x.st_ctime) def get_file_md5(filename: str) -> str: - """Hashes filename's contents and returns an MD5.""" + """Hashes filename's disk contents and returns the MD5 digest. + + Args: + filename: the file whose contents to hash + + Returns: + the MD5 digest of the file's contents. Raises on errors. + """ file_hash = hashlib.md5() with open(filename, "rb") as f: chunk = f.read(8192) @@ -330,18 +501,39 @@ def get_file_md5(filename: str) -> str: def set_file_raw_atime(filename: str, atime: float): + """Sets a file's raw access time. + + See also :meth:`get_file_atime_as_datetime`, + :meth:`get_file_atime_timedelta`, + :meth:`get_file_atime_age_seconds`, + and :meth:`get_file_raw_atime`. + """ mtime = get_file_raw_mtime(filename) assert mtime is not None os.utime(filename, (atime, mtime)) def set_file_raw_mtime(filename: str, mtime: float): + """Sets a file's raw modification time. + + See also :meth:`get_file_mtime_as_datetime`, + :meth:`get_file_mtime_timedelta`, + :meth:`get_file_mtime_age_seconds`, + and :meth:`get_file_raw_mtime`. + """ atime = get_file_raw_atime(filename) assert atime is not None os.utime(filename, (atime, mtime)) def set_file_raw_atime_and_mtime(filename: str, ts: float = None): + """Sets both a file's raw modification and access times + + Args: + filename: the file whose times to set + ts: the raw time to set or None to indicate time should be + set to the current time. + """ if ts is not None: os.utime(filename, (ts, ts)) else: @@ -349,6 +541,7 @@ def set_file_raw_atime_and_mtime(filename: str, ts: float = None): def convert_file_timestamp_to_datetime(filename: str, producer) -> Optional[datetime.datetime]: + """Convert a raw file timestamp into a python datetime.""" ts = producer(filename) if ts is not None: return datetime.datetime.fromtimestamp(ts) @@ -356,18 +549,41 @@ def convert_file_timestamp_to_datetime(filename: str, producer) -> Optional[date def get_file_atime_as_datetime(filename: str) -> Optional[datetime.datetime]: + """Fetch a file's access time as a python datetime. + + See also :meth:`get_file_atime_as_datetime`, + :meth:`get_file_atime_timedelta`, + :meth:`get_file_atime_age_seconds`, + :meth:`describe_file_atime`, + and :meth:`get_file_raw_atime`. + """ return convert_file_timestamp_to_datetime(filename, get_file_raw_atime) def get_file_mtime_as_datetime(filename: str) -> Optional[datetime.datetime]: + """Fetches a file's modification time as a python datetime. + + See also :meth:`get_file_mtime_as_datetime`, + :meth:`get_file_mtime_timedelta`, + :meth:`get_file_mtime_age_seconds`, + and :meth:`get_file_raw_mtime`. + """ return convert_file_timestamp_to_datetime(filename, get_file_raw_mtime) def get_file_ctime_as_datetime(filename: str) -> Optional[datetime.datetime]: + """Fetches a file's creation time as a python datetime. + + See also :meth:`get_file_ctime_as_datetime`, + :meth:`get_file_ctime_timedelta`, + :meth:`get_file_ctime_age_seconds`, + and :meth:`get_file_raw_ctime`. + """ return convert_file_timestamp_to_datetime(filename, get_file_raw_ctime) def get_file_timestamp_age_seconds(filename: str, extractor) -> Optional[int]: + """~Internal helper""" now = time.time() ts = get_file_raw_timestamps(filename) if ts is None: @@ -377,18 +593,41 @@ def get_file_timestamp_age_seconds(filename: str, extractor) -> Optional[int]: def get_file_atime_age_seconds(filename: str) -> Optional[int]: + """Gets a file's access time as an age in seconds (ago). + + See also :meth:`get_file_atime_as_datetime`, + :meth:`get_file_atime_timedelta`, + :meth:`get_file_atime_age_seconds`, + :meth:`describe_file_atime`, + and :meth:`get_file_raw_atime`. + """ return get_file_timestamp_age_seconds(filename, lambda x: x.st_atime) def get_file_ctime_age_seconds(filename: str) -> Optional[int]: + """Gets a file's creation time as an age in seconds (ago). + + See also :meth:`get_file_ctime_as_datetime`, + :meth:`get_file_ctime_timedelta`, + :meth:`get_file_ctime_age_seconds`, + and :meth:`get_file_raw_ctime`. + """ return get_file_timestamp_age_seconds(filename, lambda x: x.st_ctime) def get_file_mtime_age_seconds(filename: str) -> Optional[int]: + """Gets a file's modification time as seconds (ago). + + See also :meth:`get_file_mtime_as_datetime`, + :meth:`get_file_mtime_timedelta`, + :meth:`get_file_mtime_age_seconds`, + and :meth:`get_file_raw_mtime`. + """ return get_file_timestamp_age_seconds(filename, lambda x: x.st_mtime) def get_file_timestamp_timedelta(filename: str, extractor) -> Optional[datetime.timedelta]: + """~Internal helper""" age = get_file_timestamp_age_seconds(filename, extractor) if age is not None: return datetime.timedelta(seconds=float(age)) @@ -396,18 +635,42 @@ def get_file_timestamp_timedelta(filename: str, extractor) -> Optional[datetime. def get_file_atime_timedelta(filename: str) -> Optional[datetime.timedelta]: + """How long ago was a file accessed as a timedelta? + + See also :meth:`get_file_atime_as_datetime`, + :meth:`get_file_atime_timedelta`, + :meth:`get_file_atime_age_seconds`, + :meth:`describe_file_atime`, + and :meth:`get_file_raw_atime`. + """ return get_file_timestamp_timedelta(filename, lambda x: x.st_atime) def get_file_ctime_timedelta(filename: str) -> Optional[datetime.timedelta]: + """How long ago was a file created as a timedelta? + + See also :meth:`get_file_ctime_as_datetime`, + :meth:`get_file_ctime_timedelta`, + :meth:`get_file_ctime_age_seconds`, + and :meth:`get_file_raw_ctime`. + """ return get_file_timestamp_timedelta(filename, lambda x: x.st_ctime) def get_file_mtime_timedelta(filename: str) -> Optional[datetime.timedelta]: + """ + Gets a file's modification time as a python timedelta. + + See also :meth:`get_file_mtime_as_datetime`, + :meth:`get_file_mtime_timedelta`, + :meth:`get_file_mtime_age_seconds`, + and :meth:`get_file_raw_mtime`. + """ return get_file_timestamp_timedelta(filename, lambda x: x.st_mtime) def describe_file_timestamp(filename: str, extractor, *, brief=False) -> Optional[str]: + """~Internal helper""" from datetime_utils import describe_duration, describe_duration_briefly age = get_file_timestamp_age_seconds(filename, extractor) @@ -420,27 +683,61 @@ def describe_file_timestamp(filename: str, extractor, *, brief=False) -> Optiona def describe_file_atime(filename: str, *, brief=False) -> Optional[str]: + """ + Describe how long ago a file was accessed. + + See also :meth:`get_file_atime_as_datetime`, + :meth:`get_file_atime_timedelta`, + :meth:`get_file_atime_age_seconds`, + :meth:`describe_file_atime`, + and :meth:`get_file_raw_atime`. + """ return describe_file_timestamp(filename, lambda x: x.st_atime, brief=brief) def describe_file_ctime(filename: str, *, brief=False) -> Optional[str]: + """Describes a file's creation time. + + See also :meth:`get_file_ctime_as_datetime`, + :meth:`get_file_ctime_timedelta`, + :meth:`get_file_ctime_age_seconds`, + and :meth:`get_file_raw_ctime`. + """ return describe_file_timestamp(filename, lambda x: x.st_ctime, brief=brief) def describe_file_mtime(filename: str, *, brief=False) -> Optional[str]: + """ + Describes how long ago a file was modified. + + See also :meth:`get_file_mtime_as_datetime`, + :meth:`get_file_mtime_timedelta`, + :meth:`get_file_mtime_age_seconds`, + and :meth:`get_file_raw_mtime`. + """ return describe_file_timestamp(filename, lambda x: x.st_mtime, brief=brief) def touch_file(filename: str, *, mode: Optional[int] = 0o666): + """Like unix "touch" command's semantics: update the timestamp + of a file to the current time if the file exists. Create the + file if it doesn't exist. + + Args: + filename: the filename + mode: the mode to create the file with + """ pathlib.Path(filename, mode=mode).touch() def expand_globs(in_filename: str): + """Expands shell globs (* and ? wildcards) to the matching files.""" for filename in glob.glob(in_filename): yield filename def get_files(directory: str): + """Returns the files in a directory as a generator.""" for filename in os.listdir(directory): full_path = join(directory, filename) if isfile(full_path) and exists(full_path): @@ -448,6 +745,7 @@ def get_files(directory: str): def get_directories(directory: str): + """Returns the subdirectories in a directory as a generator.""" for d in os.listdir(directory): full_path = join(directory, d) if not isfile(full_path) and exists(full_path): @@ -455,6 +753,7 @@ def get_directories(directory: str): def get_files_recursive(directory: str): + """Find the files and directories under a root recursively.""" for filename in get_files(directory): yield filename for subdir in get_directories(directory): @@ -465,7 +764,6 @@ def get_files_recursive(directory: str): class FileWriter(contextlib.AbstractContextManager): """A helper that writes a file to a temporary location and then moves it atomically to its ultimate destination on close. - """ def __init__(self, filename: str) -> None: