3 from dataclasses import dataclass
10 from typing import Optional
14 import decorator_utils
17 cfg = config.add_commandline_args(
18 f'Lockfile ({__file__})',
19 'Args related to lockfiles')
21 '--lockfile_held_duration_warning_threshold_sec',
25 help='If a lock is held for longer than this threshold we log a warning'
27 logger = logging.getLogger(__name__)
30 class LockFileException(Exception):
35 class LockFileContents:
38 expiration_timestamp: float
41 class LockFile(object):
42 """A file locking mechanism that has context-manager support so you
43 can use it in a with statement. e.g.
45 with LockFile('./foo.lock'):
46 # do a bunch of stuff... if the process dies we have a signal
47 # handler to do cleanup. Other code (in this process or another)
48 # that tries to take the same lockfile will block. There is also
49 # some logic for detecting stale locks.
56 do_signal_cleanup: bool = True,
57 expiration_timestamp: Optional[float] = None,
58 override_command: Optional[str] = None,
60 self.is_locked = False
61 self.lockfile = lockfile_path
62 self.override_command = override_command
64 signal.signal(signal.SIGINT, self._signal)
65 signal.signal(signal.SIGTERM, self._signal)
66 self.expiration_timestamp = expiration_timestamp
72 return not os.path.exists(self.lockfile)
74 def try_acquire_lock_once(self) -> bool:
75 logger.debug(f"Trying to acquire {self.lockfile}.")
77 # Attempt to create the lockfile. These flags cause
78 # os.open to raise an OSError if the file already
80 fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR)
81 with os.fdopen(fd, "a") as f:
82 contents = self._get_lockfile_contents()
83 logger.debug(contents)
85 logger.debug(f'Success; I own {self.lockfile}.')
90 logger.warning(f'Could not acquire {self.lockfile}.')
93 def acquire_with_retries(
96 initial_delay: float = 1.0,
97 backoff_factor: float = 2.0,
101 @decorator_utils.retry_if_false(tries = max_attempts,
102 delay_sec = initial_delay,
103 backoff = backoff_factor)
104 def _try_acquire_lock_with_retries() -> bool:
105 success = self.try_acquire_lock_once()
106 if not success and os.path.exists(self.lockfile):
107 self._detect_stale_lockfile()
110 if os.path.exists(self.lockfile):
111 self._detect_stale_lockfile()
112 return _try_acquire_lock_with_retries()
116 os.unlink(self.lockfile)
117 except Exception as e:
119 self.is_locked = False
122 if self.acquire_with_retries():
123 self.locktime = datetime.datetime.now().timestamp()
125 msg = f"Couldn't acquire {self.lockfile}; giving up."
127 raise LockFileException(msg)
129 def __exit__(self, type, value, traceback):
131 ts = datetime.datetime.now().timestamp()
132 duration = ts - self.locktime
133 if duration >= config.config['lockfile_held_duration_warning_threshold_sec']:
134 str_duration = datetime_utils.describe_duration_briefly(duration)
135 logger.warning(f'Held {self.lockfile} for {str_duration}')
142 def _signal(self, *args):
146 def _get_lockfile_contents(self) -> str:
147 if self.override_command:
148 cmd = self.override_command
150 cmd = ' '.join(sys.argv)
152 contents = LockFileContents(
155 expiration_timestamp = self.expiration_timestamp,
157 return json.dumps(contents.__dict__)
159 def _detect_stale_lockfile(self) -> None:
161 with open(self.lockfile, 'r') as rf:
162 lines = rf.readlines()
165 line_dict = json.loads(line)
166 contents = LockFileContents(**line_dict)
167 logger.debug(f'Blocking lock contents="{contents}"')
169 # Does the PID exist still?
171 os.kill(contents.pid, 0)
173 logger.warning(f'Lockfile {self.lockfile}\'s pid ({contents.pid}) is stale; ' +
177 # Has the lock expiration expired?
178 if contents.expiration_timestamp is not None:
179 now = datetime.datetime.now().timestamp()
180 if now > contents.expiration_datetime:
181 logger.warning(f'Lockfile {self.lockfile} expiration time has passed; ' +