3 """File-based locking helper."""
5 from __future__ import annotations
14 from dataclasses import dataclass
15 from typing import Literal, Optional
19 import decorator_utils
21 cfg = config.add_commandline_args(f'Lockfile ({__file__})', 'Args related to lockfiles')
23 '--lockfile_held_duration_warning_threshold_sec',
27 help='If a lock is held for longer than this threshold we log a warning',
29 logger = logging.getLogger(__name__)
32 class LockFileException(Exception):
33 """An exception related to lock files."""
39 class LockFileContents:
40 """The contents we'll write to each lock file."""
44 expiration_timestamp: Optional[float]
47 class LockFile(contextlib.AbstractContextManager):
48 """A file locking mechanism that has context-manager support so you
49 can use it in a with statement. e.g.
51 with LockFile('./foo.lock'):
52 # do a bunch of stuff... if the process dies we have a signal
53 # handler to do cleanup. Other code (in this process or another)
54 # that tries to take the same lockfile will block. There is also
55 # some logic for detecting stale locks.
63 do_signal_cleanup: bool = True,
64 expiration_timestamp: Optional[float] = None,
65 override_command: Optional[str] = None,
67 self.is_locked: bool = False
68 self.lockfile: str = lockfile_path
69 self.locktime: Optional[int] = None
70 self.override_command: Optional[str] = override_command
72 signal.signal(signal.SIGINT, self._signal)
73 signal.signal(signal.SIGTERM, self._signal)
74 self.expiration_timestamp = expiration_timestamp
80 return not os.path.exists(self.lockfile)
82 def try_acquire_lock_once(self) -> bool:
83 logger.debug("Trying to acquire %s.", self.lockfile)
85 # Attempt to create the lockfile. These flags cause
86 # os.open to raise an OSError if the file already
88 fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR)
89 with os.fdopen(fd, "a") as f:
90 contents = self._get_lockfile_contents()
91 logger.debug(contents)
93 logger.debug('Success; I own %s.', self.lockfile)
98 logger.warning('Couldn\'t acquire %s.', self.lockfile)
101 def acquire_with_retries(
104 initial_delay: float = 1.0,
105 backoff_factor: float = 2.0,
108 @decorator_utils.retry_if_false(
109 tries=max_attempts, delay_sec=initial_delay, backoff=backoff_factor
111 def _try_acquire_lock_with_retries() -> bool:
112 success = self.try_acquire_lock_once()
113 if not success and os.path.exists(self.lockfile):
114 self._detect_stale_lockfile()
117 if os.path.exists(self.lockfile):
118 self._detect_stale_lockfile()
119 return _try_acquire_lock_with_retries()
123 os.unlink(self.lockfile)
124 except Exception as e:
126 self.is_locked = False
129 if self.acquire_with_retries():
130 self.locktime = datetime.datetime.now().timestamp()
132 msg = f"Couldn't acquire {self.lockfile}; giving up."
134 raise LockFileException(msg)
136 def __exit__(self, _, value, traceback) -> Literal[False]:
138 ts = datetime.datetime.now().timestamp()
139 duration = ts - self.locktime
140 if duration >= config.config['lockfile_held_duration_warning_threshold_sec']:
141 # Note: describe duration briefly only does 1s granularity...
142 str_duration = datetime_utils.describe_duration_briefly(int(duration))
143 msg = f'Held {self.lockfile} for {str_duration}'
145 warnings.warn(msg, stacklevel=2)
153 def _signal(self, *args):
157 def _get_lockfile_contents(self) -> str:
158 if self.override_command:
159 cmd = self.override_command
161 cmd = ' '.join(sys.argv)
162 contents = LockFileContents(
165 expiration_timestamp=self.expiration_timestamp,
167 return json.dumps(contents.__dict__)
169 def _detect_stale_lockfile(self) -> None:
171 with open(self.lockfile, 'r') as rf:
172 lines = rf.readlines()
175 line_dict = json.loads(line)
176 contents = LockFileContents(**line_dict)
177 logger.debug('Blocking lock contents="%s"', contents)
179 # Does the PID exist still?
181 os.kill(contents.pid, 0)
184 'Lockfile %s\'s pid (%d) is stale; force acquiring...',
190 # Has the lock expiration expired?
191 if contents.expiration_timestamp is not None:
192 now = datetime.datetime.now().timestamp()
193 if now > contents.expiration_timestamp:
195 'Lockfile %s\'s expiration time has passed; force acquiring',
200 pass # If the lockfile doesn't exist or disappears, good.