Add another doctest.
[python_utils.git] / lockfile.py
index ee8c2559606b0fc9488d3c2bc5ed772ec70254a0..ca190df09f9245bca7153945f0fee61859daa1d7 100644 (file)
@@ -9,9 +9,20 @@ import signal
 import sys
 from typing import Optional
 
+import config
 import decorator_utils
 
 
+cfg = config.add_commandline_args(
+    f'Lockfile ({__file__})',
+    'Args related to lockfiles')
+cfg.add_argument(
+    '--lockfile_held_duration_warning_threshold_sec',
+    type=float,
+    default=10.0,
+    metavar='SECONDS',
+    help='If a lock is held for longer than this threshold we log a warning'
+)
 logger = logging.getLogger(__name__)
 
 
@@ -28,18 +39,26 @@ class LockFileContents:
 
 class LockFile(object):
     """A file locking mechanism that has context-manager support so you
-    can use it in a with statement.
-    """
+    can use it in a with statement.  e.g.
+
+    with LockFile('./foo.lock'):
+        # do a bunch of stuff... if the process dies we have a signal
+        # handler to do cleanup.  Other code (in this process or another)
+        # that tries to take the same lockfile will block.  There is also
+        # some logic for detecting stale locks.
 
+    """
     def __init__(
             self,
             lockfile_path: str,
             *,
             do_signal_cleanup: bool = True,
             expiration_timestamp: Optional[float] = None,
+            override_command: Optional[str] = None,
     ) -> None:
         self.is_locked = False
         self.lockfile = lockfile_path
+        self.override_command = override_command
         if do_signal_cleanup:
             signal.signal(signal.SIGINT, self._signal)
             signal.signal(signal.SIGTERM, self._signal)
@@ -67,7 +86,7 @@ class LockFile(object):
             return True
         except OSError:
             pass
-        logger.debug(f'Failed; I could not acquire {self.lockfile}.')
+        logger.warning(f'Could not acquire {self.lockfile}.')
         return False
 
     def acquire_with_retries(
@@ -100,12 +119,19 @@ class LockFile(object):
 
     def __enter__(self):
         if self.acquire_with_retries():
+            self.locktime = datetime.datetime.now().timestamp()
             return self
         msg = f"Couldn't acquire {self.lockfile}; giving up."
         logger.warning(msg)
         raise LockFileException(msg)
 
     def __exit__(self, type, value, traceback):
+        if self.locktime:
+            ts = datetime.datetime.now().timestamp()
+            duration = ts - self.locktime
+            if duration >= config.config['lockfile_held_duration_warning_threshold_sec']:
+                str_duration = datetime_utils.describe_duration_briefly(duration)
+                logger.warning(f'Held {self.lockfile} for {str_duration}')
         self.release()
 
     def __del__(self):
@@ -117,10 +143,15 @@ class LockFile(object):
             self.release()
 
     def _get_lockfile_contents(self) -> str:
+        if self.override_command:
+            cmd = self.override_command
+        else:
+            cmd = ' '.join(sys.argv)
+        print(cmd)
         contents = LockFileContents(
             pid = os.getpid(),
-            commandline = ' '.join(sys.argv),
-            expiration_timestamp = self.expiration_timestamp
+            commandline = cmd,
+            expiration_timestamp = self.expiration_timestamp,
         )
         return json.dumps(contents.__dict__)
 
@@ -138,15 +169,16 @@ class LockFile(object):
                     try:
                         os.kill(contents.pid, 0)
                     except OSError:
-                        logger.debug('The pid seems stale; killing the lock.')
+                        logger.warning(f'Lockfile {self.lockfile}\'s pid ({contents.pid}) is stale; ' +
+                                       'force acquiring')
                         self.release()
 
                     # Has the lock expiration expired?
                     if contents.expiration_timestamp is not None:
                         now = datetime.datetime.now().timestamp()
                         if now > contents.expiration_datetime:
-                            logger.debug('The expiration time has passed; ' +
-                                         'killing the lock')
+                            logger.warning(f'Lockfile {self.lockfile} expiration time has passed; ' +
+                                           'force acquiring')
                             self.release()
         except Exception:
             pass