3 # © Copyright 2021-2022, Scott Gasch
5 """Utilities for working with files."""
17 from os.path import exists, isfile, join
18 from typing import Callable, List, Literal, Optional, TextIO
19 from uuid import uuid4
21 logger = logging.getLogger(__name__)
24 def remove_newlines(x: str) -> str:
25 return x.replace('\n', '')
28 def strip_whitespace(x: str) -> str:
32 def remove_hash_comments(x: str) -> str:
33 return re.sub(r'#.*$', '', x)
39 skip_blank_lines=False,
40 line_transformers: Optional[List[Callable[[str], str]]] = None,
43 if not file_is_readable(filename):
44 raise Exception(f'{filename} can\'t be read.')
45 with open(filename) as rf:
47 if line_transformers is not None:
48 for transformation in line_transformers:
49 line = transformation(line)
50 if skip_blank_lines and line == '':
56 def remove(path: str) -> None:
57 """Deletes a file. Raises if path refers to a directory or a file
61 >>> filename = '/tmp/file_utils_test_file'
62 >>> os.system(f'touch {filename}')
64 >>> does_file_exist(filename)
67 >>> does_file_exist(filename)
74 def delete(path: str) -> None:
78 def without_extension(path: str) -> str:
79 """Remove one extension from a file or path.
81 >>> without_extension('foobar.txt')
84 >>> without_extension('/home/scott/frapp.py')
87 >>> without_extension('a.b.c.tar.gz')
90 >>> without_extension('foobar')
94 return os.path.splitext(path)[0]
97 def without_all_extensions(path: str) -> str:
98 """Removes all extensions from a path; handles multiple extensions
99 like foobar.tar.gz -> foobar.
101 >>> without_all_extensions('/home/scott/foobar.1.tar.gz')
106 path = without_extension(path)
110 def get_extension(path: str) -> str:
111 """Extract and return one extension from a file or path.
113 >>> get_extension('this_is_a_test.txt')
116 >>> get_extension('/home/scott/test.py')
119 >>> get_extension('foobar')
123 return os.path.splitext(path)[1]
126 def get_all_extensions(path: str) -> List[str]:
127 """Return the extensions of a file or path in order.
129 >>> get_all_extensions('/home/scott/foo.tar.gz.1')
130 ['.tar', '.gz', '.1']
135 ext = get_extension(path)
136 path = without_extension(path)
144 def without_path(filespec: str) -> str:
145 """Returns the base filename without any leading path.
147 >>> without_path('/home/scott/foo.py')
150 >>> without_path('foo.py')
154 return os.path.split(filespec)[1]
157 def get_path(filespec: str) -> str:
158 """Returns just the path of the filespec by removing the filename and
161 >>> get_path('/home/scott/foobar.py')
164 >>> get_path('~scott/frapp.txt')
168 return os.path.split(filespec)[0]
171 def get_canonical_path(filespec: str) -> str:
172 """Returns a canonicalized absolute path.
174 >>> get_canonical_path('/home/scott/../../home/lynn/../scott/foo.txt')
175 '/usr/home/scott/foo.txt'
178 return os.path.realpath(filespec)
181 def create_path_if_not_exist(path, on_error=None):
183 Attempts to create path if it does not exist. If on_error is
184 specified, it is called with an exception if one occurs, otherwise
185 exception is rethrown.
189 >>> path = os.path.join("/tmp", str(uuid.uuid4()), str(uuid.uuid4()))
190 >>> os.path.exists(path)
192 >>> create_path_if_not_exist(path)
193 >>> os.path.exists(path)
196 logger.debug("Creating path %s", path)
197 previous_umask = os.umask(0)
200 os.chmod(path, 0o777)
201 except OSError as ex:
202 if ex.errno != errno.EEXIST and not os.path.isdir(path):
203 if on_error is not None:
208 os.umask(previous_umask)
211 def does_file_exist(filename: str) -> bool:
212 """Returns True if a file exists and is a normal file.
214 >>> does_file_exist(__file__)
217 return os.path.exists(filename) and os.path.isfile(filename)
220 def file_is_readable(filename: str) -> bool:
221 return does_file_exist(filename) and os.access(filename, os.R_OK)
224 def file_is_writable(filename: str) -> bool:
225 return does_file_exist(filename) and os.access(filename, os.W_OK)
228 def file_is_executable(filename: str) -> bool:
229 return does_file_exist(filename) and os.access(filename, os.X_OK)
232 def does_directory_exist(dirname: str) -> bool:
233 """Returns True if a file exists and is a directory.
235 >>> does_directory_exist('/tmp')
238 return os.path.exists(dirname) and os.path.isdir(dirname)
241 def does_path_exist(pathname: str) -> bool:
242 """Just a more verbose wrapper around os.path.exists."""
243 return os.path.exists(pathname)
246 def get_file_size(filename: str) -> int:
247 """Returns the size of a file in bytes."""
248 return os.path.getsize(filename)
251 def is_normal_file(filename: str) -> bool:
252 """Returns True if filename is a normal file.
254 >>> is_normal_file(__file__)
257 return os.path.isfile(filename)
260 def is_directory(filename: str) -> bool:
261 """Returns True if filename is a directory.
263 >>> is_directory('/tmp')
266 return os.path.isdir(filename)
269 def is_symlink(filename: str) -> bool:
270 """True if filename is a symlink, False otherwise.
272 >>> is_symlink('/tmp')
275 >>> is_symlink('/home')
279 return os.path.islink(filename)
282 def is_same_file(file1: str, file2: str) -> bool:
283 """Returns True if the two files are the same inode.
285 >>> is_same_file('/tmp', '/tmp/../tmp')
288 >>> is_same_file('/tmp', '/home')
292 return os.path.samefile(file1, file2)
295 def get_file_raw_timestamps(filename: str) -> Optional[os.stat_result]:
296 """Stats the file and returns an os.stat_result or None on error."""
298 return os.stat(filename)
299 except Exception as e:
304 def get_file_raw_timestamp(filename: str, extractor) -> Optional[float]:
305 tss = get_file_raw_timestamps(filename)
307 return extractor(tss)
311 def get_file_raw_atime(filename: str) -> Optional[float]:
312 return get_file_raw_timestamp(filename, lambda x: x.st_atime)
315 def get_file_raw_mtime(filename: str) -> Optional[float]:
316 return get_file_raw_timestamp(filename, lambda x: x.st_mtime)
319 def get_file_raw_ctime(filename: str) -> Optional[float]:
320 return get_file_raw_timestamp(filename, lambda x: x.st_ctime)
323 def get_file_md5(filename: str) -> str:
324 """Hashes filename's contents and returns an MD5."""
325 file_hash = hashlib.md5()
326 with open(filename, "rb") as f:
329 file_hash.update(chunk)
331 return file_hash.hexdigest()
334 def set_file_raw_atime(filename: str, atime: float):
335 mtime = get_file_raw_mtime(filename)
336 assert mtime is not None
337 os.utime(filename, (atime, mtime))
340 def set_file_raw_mtime(filename: str, mtime: float):
341 atime = get_file_raw_atime(filename)
342 assert atime is not None
343 os.utime(filename, (atime, mtime))
346 def set_file_raw_atime_and_mtime(filename: str, ts: float = None):
348 os.utime(filename, (ts, ts))
350 os.utime(filename, None)
353 def convert_file_timestamp_to_datetime(filename: str, producer) -> Optional[datetime.datetime]:
354 ts = producer(filename)
356 return datetime.datetime.fromtimestamp(ts)
360 def get_file_atime_as_datetime(filename: str) -> Optional[datetime.datetime]:
361 return convert_file_timestamp_to_datetime(filename, get_file_raw_atime)
364 def get_file_mtime_as_datetime(filename: str) -> Optional[datetime.datetime]:
365 return convert_file_timestamp_to_datetime(filename, get_file_raw_mtime)
368 def get_file_ctime_as_datetime(filename: str) -> Optional[datetime.datetime]:
369 return convert_file_timestamp_to_datetime(filename, get_file_raw_ctime)
372 def get_file_timestamp_age_seconds(filename: str, extractor) -> Optional[int]:
374 ts = get_file_raw_timestamps(filename)
377 result = extractor(ts)
381 def get_file_atime_age_seconds(filename: str) -> Optional[int]:
382 return get_file_timestamp_age_seconds(filename, lambda x: x.st_atime)
385 def get_file_ctime_age_seconds(filename: str) -> Optional[int]:
386 return get_file_timestamp_age_seconds(filename, lambda x: x.st_ctime)
389 def get_file_mtime_age_seconds(filename: str) -> Optional[int]:
390 return get_file_timestamp_age_seconds(filename, lambda x: x.st_mtime)
393 def get_file_timestamp_timedelta(filename: str, extractor) -> Optional[datetime.timedelta]:
394 age = get_file_timestamp_age_seconds(filename, extractor)
396 return datetime.timedelta(seconds=float(age))
400 def get_file_atime_timedelta(filename: str) -> Optional[datetime.timedelta]:
401 return get_file_timestamp_timedelta(filename, lambda x: x.st_atime)
404 def get_file_ctime_timedelta(filename: str) -> Optional[datetime.timedelta]:
405 return get_file_timestamp_timedelta(filename, lambda x: x.st_ctime)
408 def get_file_mtime_timedelta(filename: str) -> Optional[datetime.timedelta]:
409 return get_file_timestamp_timedelta(filename, lambda x: x.st_mtime)
412 def describe_file_timestamp(filename: str, extractor, *, brief=False) -> Optional[str]:
413 from datetime_utils import describe_duration, describe_duration_briefly
415 age = get_file_timestamp_age_seconds(filename, extractor)
419 return describe_duration_briefly(age)
421 return describe_duration(age)
424 def describe_file_atime(filename: str, *, brief=False) -> Optional[str]:
425 return describe_file_timestamp(filename, lambda x: x.st_atime, brief=brief)
428 def describe_file_ctime(filename: str, *, brief=False) -> Optional[str]:
429 return describe_file_timestamp(filename, lambda x: x.st_ctime, brief=brief)
432 def describe_file_mtime(filename: str, *, brief=False) -> Optional[str]:
433 return describe_file_timestamp(filename, lambda x: x.st_mtime, brief=brief)
436 def touch_file(filename: str, *, mode: Optional[int] = 0o666):
437 pathlib.Path(filename, mode=mode).touch()
440 def expand_globs(in_filename: str):
441 for filename in glob.glob(in_filename):
445 def get_files(directory: str):
446 for filename in os.listdir(directory):
447 full_path = join(directory, filename)
448 if isfile(full_path) and exists(full_path):
452 def get_directories(directory: str):
453 for d in os.listdir(directory):
454 full_path = join(directory, d)
455 if not isfile(full_path) and exists(full_path):
459 def get_files_recursive(directory: str):
460 for filename in get_files(directory):
462 for subdir in get_directories(directory):
463 for file_or_directory in get_files_recursive(subdir):
464 yield file_or_directory
467 class FileWriter(contextlib.AbstractContextManager):
468 """A helper that writes a file to a temporary location and then moves
469 it atomically to its ultimate destination on close.
473 def __init__(self, filename: str) -> None:
474 self.filename = filename
476 self.tempfile = f'{filename}-{uuid}.tmp'
477 self.handle: Optional[TextIO] = None
479 def __enter__(self) -> TextIO:
480 assert not does_path_exist(self.tempfile)
481 self.handle = open(self.tempfile, mode="w")
484 def __exit__(self, exc_type, exc_val, exc_tb) -> Literal[False]:
485 if self.handle is not None:
487 cmd = f'/bin/mv -f {self.tempfile} {self.filename}'
490 raise Exception(f'{cmd} failed, exit value {ret>>8}!')
494 if __name__ == '__main__':