Improve persistent after actually using it.
authorScott Gasch <[email protected]>
Wed, 22 Sep 2021 20:40:41 +0000 (13:40 -0700)
committerScott Gasch <[email protected]>
Wed, 22 Sep 2021 20:40:41 +0000 (13:40 -0700)
persistent.py

index 30e4ccbfcbed724ead5445b9674a2334d621497c..4c18c23f43be7b60fb2198158b7647bf12c8e1b2 100644 (file)
@@ -2,18 +2,23 @@
 
 from abc import ABC, abstractmethod
 import atexit
+import datetime
+import enum
 import functools
 import logging
+from typing import Callable, Optional
 
 import dill
 
 import file_utils
 
-
 logger = logging.getLogger(__name__)
 
 
 class Persistent(ABC):
+    """
+    A base class of an object with a load/save method.
+    """
     @abstractmethod
     def save(self):
         pass
@@ -23,10 +28,101 @@ class Persistent(ABC):
         pass
 
 
+def reuse_if_mtime_is_today() -> Callable[[datetime.datetime], bool]:
+    """
+    A helper that returns a lambda appropriate for use in the
+    persistent_autoload_singleton decorator's may_reuse_persisted
+    parameter that allows persisted state to be reused as long as it
+    was persisted on the same day as the load.
+
+    """
+    now = datetime.datetime.now()
+    return lambda dt: (
+        dt.month == now.month and
+        dt.day == now.day and
+        dt.year == now.year
+    )
+
+
+def reuse_if_mtime_less_than_limit(
+        limit_seconds: int
+) -> Callable[[datetime.datetime], bool]:
+    """
+    A helper that returns a lambda appropriate for use in the
+    persistent_autoload_singleton decorator's may_reuse_persisted
+    parameter that allows persisted state to be reused as long as it
+    was persisted within the past limit_seconds.
+
+    """
+    now = datetime.datetime.now()
+    return lambda dt: (now - dt).total_seconds() <= limit_seconds
+
+
+class PersistAtShutdown(enum.Enum):
+    """
+    An enum to describe the conditions under which state is persisted
+    to disk.  See details below.
+
+    """
+    NEVER = 0,
+    IF_NOT_INITIALIZED_FROM_DISK = 1,
+    ALWAYS = 2,
+
+
 class persistent_autoload_singleton(Persistent):
-    def __init__(self, filename: str, *, max_age_sec: int = 0):
+    """This class is meant to be used as a decorator around a class that:
+
+        1. Is a singleton; one global instance per python program.
+        2. Has a complex state that is initialized fully by __init__()
+        3. Would benefit from caching said state on disk and reloading
+           it on future invokations rather than recomputing and
+           reinitializing.
+
+    Here's and example usage pattern:
+
+        @persistent_autoload_singleton(
+            filename = "my_cache_file.bin",
+            may_reuse_persisted = reuse_if_mtime_less_than_limit(60),
+            persist_at_shutdown = False
+        )
+        class MyComplexObject(object):
+            def __init__(self, ...):
+                # do a bunch of work to fully initialize this instance
+
+            def another_method(self, ...):
+                # use the state stored in this instance to do some work
+
+    What does this do, exactly?
+
+        1. Anytime you attempt to instantiate MyComplexObject you will
+           get the same instance.  This class is now a singleton.
+        2. The first time you attempt to instantiate MyComplexObject
+           the wrapper scaffolding will check "my_cache_file.bin".  If
+           it exists and any may_reuse_persisted predicate indicates
+           that reusing persisted state is allowed, we will skip the
+           call to __init__ and return an unpickled instance read from
+           the disk file.
+        3. If the file doesn't exist or the predicate indicates that
+           the persisted state cannot be reused, MyComplexObject's
+           __init__ will be invoked and will be expected to fully
+           initialize the instance.
+        4. At program exit time, depending on the value of the
+           persist_at_shutdown parameter, the state of MyComplexObject
+           will be written to disk using the same filename so that
+           future instances may potentially reuse saved state.  Note
+           that the state that is persisted is the state at program
+           exit time.
+
+    """
+    def __init__(
+            self,
+            filename: str,
+            *,
+            may_reuse_persisted: Optional[Callable[[datetime.datetime], bool]] = None,
+            persist_at_shutdown: PersistAtShutdown = PersistAtShutdown.NEVER):
         self.filename = filename
-        self.max_age_sec = max_age_sec
+        self.may_reuse_persisted = may_reuse_persisted
+        self.persist_at_shutdown = persist_at_shutdown
         self.instance = None
 
     def __call__(self, cls):
@@ -42,37 +138,52 @@ class persistent_autoload_singleton(Persistent):
                 )
                 return self.instance
 
-            if not self.load():
-                assert self.instance is None
+            was_loaded_from_disk = False
+            if file_utils.does_file_exist(self.filename):
+                cache_mtime_dt = file_utils.get_file_mtime_as_datetime(
+                    self.filename
+                )
+                now = datetime.datetime.now()
+                if (
+                        self.may_reuse_persisted is not None and
+                        self.may_reuse_persisted(cache_mtime_dt)
+                ):
+                    logger.debug(
+                        f'Attempting to load from persisted cache (mtime={cache_mtime_dt}, {now-cache_mtime_dt} ago)')
+                    if not self.load():
+                        logger.warning('Loading from cache failed?!')
+                        assert self.instance is None
+                    else:
+                        assert self.instance is not None
+                        was_loaded_from_disk = True
+
+            if self.instance is None:
                 logger.debug(
-                    f'Instantiating {cls.__name__} directly.'
+                    f'Attempting to instantiate {cls.__name__} directly.'
                 )
                 self.instance = cls(*args, **kwargs)
+                was_loaded_from_disk = False
 
-            # On program exit, save state to disk.
-            atexit.register(self.save)
             assert self.instance is not None
+            if (
+                    self.persist_at_shutdown is PersistAtShutdown.ALWAYS or
+                    (
+                        not was_loaded_from_disk and
+                        self.persist_at_shutdown is PersistAtShutdown.IF_NOT_INITIALIZED_FROM_DISK
+                    )
+            ):
+                atexit.register(self.save)
             return self.instance
         return _load
 
     def load(self) -> bool:
-        if (
-                file_utils.does_file_exist(self.filename)
-                and (
-                    self.max_age_sec == 0 or
-                    file_utils.get_file_mtime_age_seconds(self.filename) <= self.max_age_sec
-                )
-        ):
-            logger.debug(
-                f'Attempting to load from file {self.filename}'
-            )
-            try:
-                with open(self.filename, 'rb') as f:
-                    self.instance = dill.load(f)
-                    return True
-            except Exception:
-                self.instance = None
-                return False
+        try:
+            with open(self.filename, 'rb') as f:
+                self.instance = dill.load(f)
+                return True
+        except Exception:
+            self.instance = None
+            return False
         return False
 
     def save(self) -> bool: