3 """Utilities for working with files."""
15 from os.path import exists, isfile, join
16 from typing import List, Optional, TextIO
17 from uuid import uuid4
19 logger = logging.getLogger(__name__)
22 def remove_newlines(x):
23 return x.replace('\n', '')
26 def strip_whitespace(x):
30 def remove_hash_comments(x):
31 return re.sub(r'#.*$', '', x)
37 skip_blank_lines=False,
41 if not file_is_readable(filename):
42 raise Exception(f'{filename} can\'t be read.')
43 with open(filename) as rf:
45 for transformation in line_transformers:
46 line = transformation(line)
47 if skip_blank_lines and line == '':
53 def remove(path: str) -> None:
54 """Deletes a file. Raises if path refers to a directory or a file
58 >>> filename = '/tmp/file_utils_test_file'
59 >>> os.system(f'touch {filename}')
61 >>> does_file_exist(filename)
64 >>> does_file_exist(filename)
71 def delete(path: str) -> None:
75 def without_extension(path: str) -> str:
76 """Remove one extension from a file or path.
78 >>> without_extension('foobar.txt')
81 >>> without_extension('/home/scott/frapp.py')
84 >>> without_extension('a.b.c.tar.gz')
87 >>> without_extension('foobar')
91 return os.path.splitext(path)[0]
94 def without_all_extensions(path: str) -> str:
95 """Removes all extensions from a path; handles multiple extensions
96 like foobar.tar.gz -> foobar.
98 >>> without_all_extensions('/home/scott/foobar.1.tar.gz')
103 path = without_extension(path)
107 def get_extension(path: str) -> str:
108 """Extract and return one extension from a file or path.
110 >>> get_extension('this_is_a_test.txt')
113 >>> get_extension('/home/scott/test.py')
116 >>> get_extension('foobar')
120 return os.path.splitext(path)[1]
123 def get_all_extensions(path: str) -> List[str]:
124 """Return the extensions of a file or path in order.
126 >>> get_all_extensions('/home/scott/foo.tar.gz.1')
127 ['.tar', '.gz', '.1']
132 ext = get_extension(path)
133 path = without_extension(path)
141 def without_path(filespec: str) -> str:
142 """Returns the base filename without any leading path.
144 >>> without_path('/home/scott/foo.py')
147 >>> without_path('foo.py')
151 return os.path.split(filespec)[1]
154 def get_path(filespec: str) -> str:
155 """Returns just the path of the filespec by removing the filename and
158 >>> get_path('/home/scott/foobar.py')
161 >>> get_path('~scott/frapp.txt')
165 return os.path.split(filespec)[0]
168 def get_canonical_path(filespec: str) -> str:
169 """Returns a canonicalized absolute path.
171 >>> get_canonical_path('/home/scott/../../home/lynn/../scott/foo.txt')
172 '/usr/home/scott/foo.txt'
175 return os.path.realpath(filespec)
178 def create_path_if_not_exist(path, on_error=None):
180 Attempts to create path if it does not exist. If on_error is
181 specified, it is called with an exception if one occurs, otherwise
182 exception is rethrown.
186 >>> path = os.path.join("/tmp", str(uuid.uuid4()), str(uuid.uuid4()))
187 >>> os.path.exists(path)
189 >>> create_path_if_not_exist(path)
190 >>> os.path.exists(path)
193 logger.debug(f"Creating path {path}")
194 previous_umask = os.umask(0)
197 os.chmod(path, 0o777)
198 except OSError as ex:
199 if ex.errno != errno.EEXIST and not os.path.isdir(path):
200 if on_error is not None:
205 os.umask(previous_umask)
208 def does_file_exist(filename: str) -> bool:
209 """Returns True if a file exists and is a normal file.
211 >>> does_file_exist(__file__)
214 return os.path.exists(filename) and os.path.isfile(filename)
217 def file_is_readable(filename: str) -> bool:
218 return does_file_exist(filename) and os.access(filename, os.R_OK)
221 def file_is_writable(filename: str) -> bool:
222 return does_file_exist(filename) and os.access(filename, os.W_OK)
225 def file_is_executable(filename: str) -> bool:
226 return does_file_exist(filename) and os.access(filename, os.X_OK)
229 def does_directory_exist(dirname: str) -> bool:
230 """Returns True if a file exists and is a directory.
232 >>> does_directory_exist('/tmp')
235 return os.path.exists(dirname) and os.path.isdir(dirname)
238 def does_path_exist(pathname: str) -> bool:
239 """Just a more verbose wrapper around os.path.exists."""
240 return os.path.exists(pathname)
243 def get_file_size(filename: str) -> int:
244 """Returns the size of a file in bytes."""
245 return os.path.getsize(filename)
248 def is_normal_file(filename: str) -> bool:
249 """Returns True if filename is a normal file.
251 >>> is_normal_file(__file__)
254 return os.path.isfile(filename)
257 def is_directory(filename: str) -> bool:
258 """Returns True if filename is a directory.
260 >>> is_directory('/tmp')
263 return os.path.isdir(filename)
266 def is_symlink(filename: str) -> bool:
267 """True if filename is a symlink, False otherwise.
269 >>> is_symlink('/tmp')
272 >>> is_symlink('/home')
276 return os.path.islink(filename)
279 def is_same_file(file1: str, file2: str) -> bool:
280 """Returns True if the two files are the same inode.
282 >>> is_same_file('/tmp', '/tmp/../tmp')
285 >>> is_same_file('/tmp', '/home')
289 return os.path.samefile(file1, file2)
292 def get_file_raw_timestamps(filename: str) -> Optional[os.stat_result]:
293 """Stats the file and returns an os.stat_result or None on error."""
295 return os.stat(filename)
296 except Exception as e:
301 def get_file_raw_timestamp(filename: str, extractor) -> Optional[float]:
302 tss = get_file_raw_timestamps(filename)
304 return extractor(tss)
308 def get_file_raw_atime(filename: str) -> Optional[float]:
309 return get_file_raw_timestamp(filename, lambda x: x.st_atime)
312 def get_file_raw_mtime(filename: str) -> Optional[float]:
313 return get_file_raw_timestamp(filename, lambda x: x.st_mtime)
316 def get_file_raw_ctime(filename: str) -> Optional[float]:
317 return get_file_raw_timestamp(filename, lambda x: x.st_ctime)
320 def get_file_md5(filename: str) -> str:
321 """Hashes filename's contents and returns an MD5."""
322 file_hash = hashlib.md5()
323 with open(filename, "rb") as f:
326 file_hash.update(chunk)
328 return file_hash.hexdigest()
331 def set_file_raw_atime(filename: str, atime: float):
332 mtime = get_file_raw_mtime(filename)
333 assert mtime is not None
334 os.utime(filename, (atime, mtime))
337 def set_file_raw_mtime(filename: str, mtime: float):
338 atime = get_file_raw_atime(filename)
339 assert atime is not None
340 os.utime(filename, (atime, mtime))
343 def set_file_raw_atime_and_mtime(filename: str, ts: float = None):
345 os.utime(filename, (ts, ts))
347 os.utime(filename, None)
350 def convert_file_timestamp_to_datetime(
351 filename: str, producer
352 ) -> Optional[datetime.datetime]:
353 ts = producer(filename)
355 return datetime.datetime.fromtimestamp(ts)
359 def get_file_atime_as_datetime(filename: str) -> Optional[datetime.datetime]:
360 return convert_file_timestamp_to_datetime(filename, get_file_raw_atime)
363 def get_file_mtime_as_datetime(filename: str) -> Optional[datetime.datetime]:
364 return convert_file_timestamp_to_datetime(filename, get_file_raw_mtime)
367 def get_file_ctime_as_datetime(filename: str) -> Optional[datetime.datetime]:
368 return convert_file_timestamp_to_datetime(filename, get_file_raw_ctime)
371 def get_file_timestamp_age_seconds(filename: str, extractor) -> Optional[int]:
373 ts = get_file_raw_timestamps(filename)
376 result = extractor(ts)
380 def get_file_atime_age_seconds(filename: str) -> Optional[int]:
381 return get_file_timestamp_age_seconds(filename, lambda x: x.st_atime)
384 def get_file_ctime_age_seconds(filename: str) -> Optional[int]:
385 return get_file_timestamp_age_seconds(filename, lambda x: x.st_ctime)
388 def get_file_mtime_age_seconds(filename: str) -> Optional[int]:
389 return get_file_timestamp_age_seconds(filename, lambda x: x.st_mtime)
392 def get_file_timestamp_timedelta(
393 filename: str, extractor
394 ) -> Optional[datetime.timedelta]:
395 age = get_file_timestamp_age_seconds(filename, extractor)
397 return datetime.timedelta(seconds=float(age))
401 def get_file_atime_timedelta(filename: str) -> Optional[datetime.timedelta]:
402 return get_file_timestamp_timedelta(filename, lambda x: x.st_atime)
405 def get_file_ctime_timedelta(filename: str) -> Optional[datetime.timedelta]:
406 return get_file_timestamp_timedelta(filename, lambda x: x.st_ctime)
409 def get_file_mtime_timedelta(filename: str) -> Optional[datetime.timedelta]:
410 return get_file_timestamp_timedelta(filename, lambda x: x.st_mtime)
413 def describe_file_timestamp(filename: str, extractor, *, brief=False) -> Optional[str]:
414 from datetime_utils import describe_duration, describe_duration_briefly
416 age = get_file_timestamp_age_seconds(filename, extractor)
420 return describe_duration_briefly(age)
422 return describe_duration(age)
425 def describe_file_atime(filename: str, *, brief=False) -> Optional[str]:
426 return describe_file_timestamp(filename, lambda x: x.st_atime, brief=brief)
429 def describe_file_ctime(filename: str, *, brief=False) -> Optional[str]:
430 return describe_file_timestamp(filename, lambda x: x.st_ctime, brief=brief)
433 def describe_file_mtime(filename: str, *, brief=False) -> Optional[str]:
434 return describe_file_timestamp(filename, lambda x: x.st_mtime, brief=brief)
437 def touch_file(filename: str, *, mode: Optional[int] = 0o666):
438 pathlib.Path(filename, mode=mode).touch()
441 def expand_globs(in_filename: str):
442 for filename in glob.glob(in_filename):
446 def get_files(directory: str):
447 for filename in os.listdir(directory):
448 full_path = join(directory, filename)
449 if isfile(full_path) and exists(full_path):
453 def get_directories(directory: str):
454 for d in os.listdir(directory):
455 full_path = join(directory, d)
456 if not isfile(full_path) and exists(full_path):
460 def get_files_recursive(directory: str):
461 for filename in get_files(directory):
463 for subdir in get_directories(directory):
464 for file_or_directory in get_files_recursive(subdir):
465 yield file_or_directory
468 class FileWriter(object):
469 def __init__(self, filename: str) -> None:
470 self.filename = filename
472 self.tempfile = f'{filename}-{uuid}.tmp'
473 self.handle: Optional[TextIO] = None
475 def __enter__(self) -> TextIO:
476 assert not does_path_exist(self.tempfile)
477 self.handle = open(self.tempfile, mode="w")
480 def __exit__(self, exc_type, exc_val, exc_tb) -> Optional[bool]:
481 if self.handle is not None:
483 cmd = f'/bin/mv -f {self.tempfile} {self.filename}'
486 raise Exception(f'{cmd} failed, exit value {ret>>8}')
490 if __name__ == '__main__':