From: Scott Gasch Date: Sat, 25 Sep 2021 04:17:58 +0000 (-0700) Subject: Oh my god, I fucking hate pickle, dill, cloudpickle and all that X-Git-Url: https://wannabe.guru.org/gitweb/?a=commitdiff_plain;h=e0d685fdfd930e72353e00fe3b750d4ebfbf5111;p=python_utils.git Oh my god, I fucking hate pickle, dill, cloudpickle and all that stuff. What a total pain in the ass. I kept getting pickle complaining about class not really being class and refusing to write if any other modules were imported between the creation and the save that I gave up and moved save and load to user defined things. Maybe a better design anyway. --- diff --git a/persistent.py b/persistent.py index 0ba9315..f6ca0a0 100644 --- a/persistent.py +++ b/persistent.py @@ -6,9 +6,7 @@ import datetime import enum import functools import logging -from typing import Callable, Optional - -import dill +from typing import Any import file_utils @@ -17,50 +15,76 @@ logger = logging.getLogger(__name__) class Persistent(ABC): """ - A base class of an object with a load/save method. + A base class of an object with a load/save method. Classes that are + decorated with @persistent_autoloaded_singleton should subclass this + and implement their save() and load() methods. + """ @abstractmethod - def save(self): + def save(self) -> bool: + """ + Save this thing somewhere that you'll remember when someone calls + load() later on in a way that makes sense to your code. + + """ pass + @classmethod @abstractmethod - def load(self): + def load(cls) -> Any: + """ + Load this thing from somewhere and give back an instance which + will become the global singleton and which will may (see + below) be save()d at program exit time. + + Oh, in case this is handy, here's how to write a factory + method that doesn't call the c'tor in python: + + @classmethod + def load_from_somewhere(cls, somewhere): + # Note: __new__ does not call __init__. + obj = cls.__new__(cls) + + # Don't forget to call any polymorphic base class initializers + super(MyClass, obj).__init__() + + # Load the piece(s) of obj that you want to from somewhere. + obj._state = load_from_somewhere(somewhere) + return obj + + """ pass -def reuse_if_mtime_is_today() -> Callable[[datetime.datetime], bool]: - """ - A helper that returns a lambda appropriate for use in the - persistent_autoloaded_singleton decorator's may_reuse_persisted - parameter that allows persisted state to be reused as long as it - was persisted on the same day as the load. +def was_file_written_today(filename: str) -> bool: + """Returns True if filename was written today.""" - """ + if not file_utils.does_file_exist(filename): + return False + + mtime = file_utils.get_file_mtime_as_datetime(filename) now = datetime.datetime.now() - return lambda dt: ( - dt.month == now.month and - dt.day == now.day and - dt.year == now.year + return ( + mtime.month == now.month and + mtime.day == now.day and + mtime.year == now.year ) -def reuse_if_mtime_less_than_limit_sec( - limit_seconds: int -) -> Callable[[datetime.datetime], bool]: - """ - A helper that returns a lambda appropriate for use in the - persistent_autoloaded_singleton decorator's may_reuse_persisted - parameter that allows persisted state to be reused as long as it - was persisted within the past limit_seconds. +def was_file_written_within_n_seconds( + filename: str, + limit_seconds: int, +) -> bool: + """Returns True if filename was written within the pas limit_seconds + seconds. """ - now = datetime.datetime.now() - return lambda dt: (now - dt).total_seconds() <= limit_seconds - + if not file_utils.does_file_exist(filename): + return False -def dont_reuse_persisted_state_force_refresh( -) -> Callable[[datetime.datetime], bool]: - return lambda dt: False + mtime = file_utils.get_file_mtime_as_datetime(filename) + now = datetime.datetime.now() + return (now - mtime).total_seconds() <= limit_seconds class PersistAtShutdown(enum.Enum): @@ -70,70 +94,38 @@ class PersistAtShutdown(enum.Enum): """ NEVER = 0, - IF_NOT_INITIALIZED_FROM_DISK = 1, + IF_NOT_LOADED = 1, ALWAYS = 2, -class persistent_autoloaded_singleton(Persistent): - """This class is meant to be used as a decorator around a class that: - - 1. Is a singleton; one global instance per python program. - 2. Has a complex state that is initialized fully by __init__() - 3. Would benefit from caching said state on disk and reloading - it on future invokations rather than recomputing and - reinitializing. - - Here's and example usage pattern: - - @persistent_autoloaded_singleton( - filename = "my_cache_file.bin", - may_reuse_persisted = reuse_if_mtime_less_than_limit_sec(60), - persist_at_shutdown = PersistAtShutdown.IF_NOT_INITIALIZED_FROM_DISK, - ) - class MyComplexObject(object): - def __init__(self, ...): - # do a bunch of work to fully initialize this instance - - def another_method(self, ...): - # use the state stored in this instance to do some work - - What does this do, exactly? - - 1. Anytime you attempt to instantiate MyComplexObject you will - get the same instance. This class is now a singleton. - 2. The first time you attempt to instantiate MyComplexObject - the wrapper scaffolding will check "my_cache_file.bin". If - it exists and any may_reuse_persisted predicate indicates - that reusing persisted state is allowed, we will skip the - call to __init__ and return an unpickled instance read from - the disk file. In the example above the predicate allows - reuse of saved state if it is <= 60s old. - 3. If the file doesn't exist or the predicate indicates that - the persisted state cannot be reused (e.g. too stale), - MyComplexObject's __init__ will be invoked and will be - expected to fully initialize the instance. - 4. At program exit time, depending on the value of the - persist_at_shutdown parameter, the state of MyComplexObject - will be written to disk using the same filename so that - future instances may potentially reuse saved state. Note - that the state that is persisted is the state at program - exit time. In the example above this parameter indicates - that we should persist state so long as we were not - initialized from cached state on disk. +class persistent_autoloaded_singleton(object): + """A decorator that can be applied to a Persistent subclass (i.e. a + class with a save() and load() method. It will intercept attempts + to instantiate the class via it's c'tor and, instead, invoke the + class' load() method to give it a chance to read state from + somewhere persistent. + + If load() fails (returns None), the c'tor is invoked with the + original args as a fallback. + + Based upon the value of the optional argument persist_at_shutdown, + (NEVER, IF_NOT_LOADED, ALWAYS), the save() method of the class will + be invoked just before program shutdown to give the class a chance + to save its state somewhere. + + The implementations of save() and load() and where the class + persists its state are details left to the Persistent + implementation. """ def __init__( self, - filename: str, *, - may_reuse_persisted: Optional[Callable[[datetime.datetime], bool]] = None, - persist_at_shutdown: PersistAtShutdown = PersistAtShutdown.NEVER): - self.filename = filename - self.may_reuse_persisted = may_reuse_persisted + persist_at_shutdown: PersistAtShutdown = PersistAtShutdown.IF_NOT_LOADED): self.persist_at_shutdown = persist_at_shutdown self.instance = None - def __call__(self, cls): + def __call__(self, cls: Persistent): @functools.wraps(cls) def _load(*args, **kwargs): @@ -146,63 +138,28 @@ class persistent_autoloaded_singleton(Persistent): ) return self.instance - was_loaded_from_disk = False - if file_utils.does_file_exist(self.filename): - cache_mtime_dt = file_utils.get_file_mtime_as_datetime( - self.filename - ) - now = datetime.datetime.now() - if ( - self.may_reuse_persisted is not None and - self.may_reuse_persisted(cache_mtime_dt) - ): - logger.debug( - f'Attempting to load from persisted cache (mtime={cache_mtime_dt}, {now-cache_mtime_dt} ago)') - if not self.load(): - logger.warning('Loading from cache failed?!') - assert self.instance is None - else: - assert self.instance is not None - was_loaded_from_disk = True - - if self.instance is None: - logger.debug( - f'Attempting to instantiate {cls.__name__} directly.' - ) + # Otherwise, try to load it from persisted state. + was_loaded = False + logger.debug(f'Attempting to load {cls.__name__} from persisted state.') + self.instance = cls.load() + if not self.instance: + logger.warning('Loading from cache failed.') + logger.debug(f'Attempting to instantiate {cls.__name__} directly.') self.instance = cls(*args, **kwargs) - was_loaded_from_disk = False + else: + logger.debug(f'Class {cls.__name__} was loaded from persisted state successfully.') + was_loaded = True assert self.instance is not None + if ( self.persist_at_shutdown is PersistAtShutdown.ALWAYS or ( - not was_loaded_from_disk and - self.persist_at_shutdown is PersistAtShutdown.IF_NOT_INITIALIZED_FROM_DISK + not was_loaded and + self.persist_at_shutdown is PersistAtShutdown.IF_NOT_LOADED ) ): - atexit.register(self.save) + logger.debug('Scheduling a deferred called to save at process shutdown time.') + atexit.register(self.instance.save) return self.instance return _load - - def load(self) -> bool: - try: - with open(self.filename, 'rb') as f: - self.instance = dill.load(f) - return True - except Exception: - self.instance = None - return False - return False - - def save(self) -> bool: - if self.instance is not None: - logger.debug( - f'Attempting to save {type(self.instance).__name__} to file {self.filename}' - ) - try: - with open(self.filename, 'wb') as f: - dill.dump(self.instance, f, dill.HIGHEST_PROTOCOL) - return True - except Exception: - return False - return False