#!/usr/bin/env python3
+# © Copyright 2021-2022, Scott Gasch
+
"""Utilities for working with files."""
+import contextlib
import datetime
import errno
+import glob
import hashlib
import logging
import os
import pathlib
+import re
import time
-from typing import Optional
-import glob
-from os.path import isfile, join, exists
+from os.path import exists, isfile, join
+from typing import Callable, List, Literal, Optional, TextIO
+from uuid import uuid4
logger = logging.getLogger(__name__)
+def remove_newlines(x: str) -> str:
+ return x.replace('\n', '')
+
+
+def strip_whitespace(x: str) -> str:
+ return x.strip()
+
+
+def remove_hash_comments(x: str) -> str:
+ return re.sub(r'#.*$', '', x)
+
+
+def slurp_file(
+ filename: str,
+ *,
+ skip_blank_lines=False,
+ line_transformers: Optional[List[Callable[[str], str]]] = None,
+):
+ ret = []
+ if not file_is_readable(filename):
+ raise Exception(f'{filename} can\'t be read.')
+ with open(filename) as rf:
+ for line in rf:
+ if line_transformers is not None:
+ for transformation in line_transformers:
+ line = transformation(line)
+ if skip_blank_lines and line == '':
+ continue
+ ret.append(line)
+ return ret
+
+
+def remove(path: str) -> None:
+ """Deletes a file. Raises if path refers to a directory or a file
+ that doesn't exist.
+
+ >>> import os
+ >>> filename = '/tmp/file_utils_test_file'
+ >>> os.system(f'touch {filename}')
+ 0
+ >>> does_file_exist(filename)
+ True
+ >>> remove(filename)
+ >>> does_file_exist(filename)
+ False
+
+ """
+ os.remove(path)
+
+
+def delete(path: str) -> None:
+ os.remove(path)
+
+
+def without_extension(path: str) -> str:
+ """Remove one extension from a file or path.
+
+ >>> without_extension('foobar.txt')
+ 'foobar'
+
+ >>> without_extension('/home/scott/frapp.py')
+ '/home/scott/frapp'
+
+ >>> without_extension('a.b.c.tar.gz')
+ 'a.b.c.tar'
+
+ >>> without_extension('foobar')
+ 'foobar'
+
+ """
+ return os.path.splitext(path)[0]
+
+
+def without_all_extensions(path: str) -> str:
+ """Removes all extensions from a path; handles multiple extensions
+ like foobar.tar.gz -> foobar.
+
+ >>> without_all_extensions('/home/scott/foobar.1.tar.gz')
+ '/home/scott/foobar'
+
+ """
+ while '.' in path:
+ path = without_extension(path)
+ return path
+
+
+def get_extension(path: str) -> str:
+ """Extract and return one extension from a file or path.
+
+ >>> get_extension('this_is_a_test.txt')
+ '.txt'
+
+ >>> get_extension('/home/scott/test.py')
+ '.py'
+
+ >>> get_extension('foobar')
+ ''
+
+ """
+ return os.path.splitext(path)[1]
+
+
+def get_all_extensions(path: str) -> List[str]:
+ """Return the extensions of a file or path in order.
+
+ >>> get_all_extensions('/home/scott/foo.tar.gz.1')
+ ['.tar', '.gz', '.1']
+
+ """
+ ret = []
+ while True:
+ ext = get_extension(path)
+ path = without_extension(path)
+ if ext:
+ ret.append(ext)
+ else:
+ ret.reverse()
+ return ret
+
+
+def without_path(filespec: str) -> str:
+ """Returns the base filename without any leading path.
+
+ >>> without_path('/home/scott/foo.py')
+ 'foo.py'
+
+ >>> without_path('foo.py')
+ 'foo.py'
+
+ """
+ return os.path.split(filespec)[1]
+
+
+def get_path(filespec: str) -> str:
+ """Returns just the path of the filespec by removing the filename and
+ extension.
+
+ >>> get_path('/home/scott/foobar.py')
+ '/home/scott'
+
+ >>> get_path('~scott/frapp.txt')
+ '~scott'
+
+ """
+ return os.path.split(filespec)[0]
+
+
+def get_canonical_path(filespec: str) -> str:
+ """Returns a canonicalized absolute path.
+
+ >>> get_canonical_path('/home/scott/../../home/lynn/../scott/foo.txt')
+ '/usr/home/scott/foo.txt'
+
+ """
+ return os.path.realpath(filespec)
+
+
def create_path_if_not_exist(path, on_error=None):
"""
Attempts to create path if it does not exist. If on_error is
>>> os.path.exists(path)
True
"""
- logger.debug(f"Creating path {path}")
+ logger.debug("Creating path %s", path)
previous_umask = os.umask(0)
try:
os.makedirs(path)
def does_file_exist(filename: str) -> bool:
+ """Returns True if a file exists and is a normal file.
+
+ >>> does_file_exist(__file__)
+ True
+ """
return os.path.exists(filename) and os.path.isfile(filename)
+def file_is_readable(filename: str) -> bool:
+ return does_file_exist(filename) and os.access(filename, os.R_OK)
+
+
+def file_is_writable(filename: str) -> bool:
+ return does_file_exist(filename) and os.access(filename, os.W_OK)
+
+
+def file_is_executable(filename: str) -> bool:
+ return does_file_exist(filename) and os.access(filename, os.X_OK)
+
+
def does_directory_exist(dirname: str) -> bool:
+ """Returns True if a file exists and is a directory.
+
+ >>> does_directory_exist('/tmp')
+ True
+ """
return os.path.exists(dirname) and os.path.isdir(dirname)
def does_path_exist(pathname: str) -> bool:
+ """Just a more verbose wrapper around os.path.exists."""
return os.path.exists(pathname)
def get_file_size(filename: str) -> int:
+ """Returns the size of a file in bytes."""
return os.path.getsize(filename)
def is_normal_file(filename: str) -> bool:
+ """Returns True if filename is a normal file.
+
+ >>> is_normal_file(__file__)
+ True
+ """
return os.path.isfile(filename)
def is_directory(filename: str) -> bool:
+ """Returns True if filename is a directory.
+
+ >>> is_directory('/tmp')
+ True
+ """
return os.path.isdir(filename)
def is_symlink(filename: str) -> bool:
+ """True if filename is a symlink, False otherwise.
+
+ >>> is_symlink('/tmp')
+ False
+
+ >>> is_symlink('/home')
+ True
+
+ """
return os.path.islink(filename)
def is_same_file(file1: str, file2: str) -> bool:
+ """Returns True if the two files are the same inode.
+
+ >>> is_same_file('/tmp', '/tmp/../tmp')
+ True
+
+ >>> is_same_file('/tmp', '/home')
+ False
+
+ """
return os.path.samefile(file1, file2)
def get_file_raw_timestamps(filename: str) -> Optional[os.stat_result]:
+ """Stats the file and returns an os.stat_result or None on error."""
try:
return os.stat(filename)
except Exception as e:
def get_file_md5(filename: str) -> str:
+ """Hashes filename's contents and returns an MD5."""
file_hash = hashlib.md5()
with open(filename, "rb") as f:
chunk = f.read(8192)
def set_file_raw_atime(filename: str, atime: float):
mtime = get_file_raw_mtime(filename)
+ assert mtime is not None
os.utime(filename, (atime, mtime))
def set_file_raw_mtime(filename: str, mtime: float):
atime = get_file_raw_atime(filename)
+ assert atime is not None
os.utime(filename, (atime, mtime))
os.utime(filename, None)
-def convert_file_timestamp_to_datetime(
- filename: str, producer
-) -> Optional[datetime.datetime]:
+def convert_file_timestamp_to_datetime(filename: str, producer) -> Optional[datetime.datetime]:
ts = producer(filename)
if ts is not None:
return datetime.datetime.fromtimestamp(ts)
return get_file_timestamp_age_seconds(filename, lambda x: x.st_mtime)
-def get_file_timestamp_timedelta(
- filename: str, extractor
-) -> Optional[datetime.timedelta]:
+def get_file_timestamp_timedelta(filename: str, extractor) -> Optional[datetime.timedelta]:
age = get_file_timestamp_age_seconds(filename, extractor)
if age is not None:
return datetime.timedelta(seconds=float(age))
return get_file_timestamp_timedelta(filename, lambda x: x.st_mtime)
-def describe_file_timestamp(
- filename: str, extractor, *, brief=False
-) -> Optional[str]:
+def describe_file_timestamp(filename: str, extractor, *, brief=False) -> Optional[str]:
from datetime_utils import describe_duration, describe_duration_briefly
+
age = get_file_timestamp_age_seconds(filename, extractor)
if age is None:
return None
return describe_file_timestamp(filename, lambda x: x.st_mtime, brief=brief)
-def touch_file(filename: str) -> bool:
- return pathlib.Path(filename).touch()
+def touch_file(filename: str, *, mode: Optional[int] = 0o666):
+ pathlib.Path(filename, mode=mode).touch()
def expand_globs(in_filename: str):
for subdir in get_directories(directory):
for file_or_directory in get_files_recursive(subdir):
yield file_or_directory
+
+
+class FileWriter(contextlib.AbstractContextManager):
+ """A helper that writes a file to a temporary location and then moves
+ it atomically to its ultimate destination on close.
+
+ """
+
+ def __init__(self, filename: str) -> None:
+ self.filename = filename
+ uuid = uuid4()
+ self.tempfile = f'{filename}-{uuid}.tmp'
+ self.handle: Optional[TextIO] = None
+
+ def __enter__(self) -> TextIO:
+ assert not does_path_exist(self.tempfile)
+ self.handle = open(self.tempfile, mode="w")
+ return self.handle
+
+ def __exit__(self, exc_type, exc_val, exc_tb) -> Literal[False]:
+ if self.handle is not None:
+ self.handle.close()
+ cmd = f'/bin/mv -f {self.tempfile} {self.filename}'
+ ret = os.system(cmd)
+ if (ret >> 8) != 0:
+ raise Exception(f'{cmd} failed, exit value {ret>>8}!')
+ return False
+
+
+if __name__ == '__main__':
+ import doctest
+
+ doctest.testmod()