Easier and more self documenting patterns for loading/saving Persistent
[python_utils.git] / datetime_utils.py
index 310ffe154d116348fa65e4c707a421c3c6721de0..10b166605570b14332fe7c7f5ebf74b645a05960 100644 (file)
@@ -1,12 +1,14 @@
 #!/usr/bin/env python3
 
-"""Utilities related to dates and times and datetimes."""
+# © Copyright 2021-2022, Scott Gasch
+
+"""Utilities related to dates, times, and datetimes."""
 
 import datetime
 import enum
 import logging
 import re
-from typing import Any, NewType, Tuple
+from typing import Any, NewType, Optional, Tuple
 
 import holidays  # type: ignore
 import pytz
@@ -17,8 +19,14 @@ logger = logging.getLogger(__name__)
 
 
 def is_timezone_aware(dt: datetime.datetime) -> bool:
-    """See: https://docs.python.org/3/library/datetime.html
-                               #determining-if-an-object-is-aware-or-naive
+    """Returns true if the datetime argument is timezone aware or
+    False if not.
+
+    See: https://docs.python.org/3/library/datetime.html
+    #determining-if-an-object-is-aware-or-naive
+
+    Args:
+        dt: The datetime object to check
 
     >>> is_timezone_aware(datetime.datetime.now())
     False
@@ -27,22 +35,110 @@ def is_timezone_aware(dt: datetime.datetime) -> bool:
     True
 
     """
-    return (
-        dt.tzinfo is not None and
-        dt.tzinfo.utcoffset(dt) is not None
-    )
+    return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None
 
 
 def is_timezone_naive(dt: datetime.datetime) -> bool:
+    """Inverse of is_timezone_aware -- returns true if the dt argument
+    is timezone naive.
+
+    See: https://docs.python.org/3/library/datetime.html
+    #determining-if-an-object-is-aware-or-naive
+
+    Args:
+        dt: The datetime object to check
+
+    >>> is_timezone_naive(datetime.datetime.now())
+    True
+
+    >>> is_timezone_naive(now_pacific())
+    False
+
+    """
     return not is_timezone_aware(dt)
 
 
-def replace_timezone(dt: datetime.datetime,
-                     tz: datetime.tzinfo) -> datetime.datetime:
+def strip_timezone(dt: datetime.datetime) -> datetime.datetime:
+    """Remove the timezone from a datetime.
+
+    .. warning::
+
+        This does not change the hours, minutes, seconds,
+        months, days, years, etc... Thus the instant to which this
+        timestamp refers will change.  Silently ignores datetimes
+        which are already timezone naive.
+
+    >>> now = now_pacific()
+    >>> now.tzinfo == None
+    False
+
+    >>> dt = strip_timezone(now)
+    >>> dt == now
+    False
+
+    >>> dt.tzinfo == None
+    True
+
+    >>> dt.hour == now.hour
+    True
+
+    """
+    if is_timezone_naive(dt):
+        return dt
+    return replace_timezone(dt, None)
+
+
+def add_timezone(dt: datetime.datetime, tz: datetime.tzinfo) -> datetime.datetime:
+    """
+    Adds a timezone to a timezone naive datetime.  This does not
+    change the instant to which the timestamp refers.  See also:
+    replace_timezone.
+
+    >>> now = datetime.datetime.now()
+    >>> is_timezone_aware(now)
+    False
+
+    >>> now_pacific = add_timezone(now, pytz.timezone('US/Pacific'))
+    >>> is_timezone_aware(now_pacific)
+    True
+
+    >>> now.hour == now_pacific.hour
+    True
+    >>> now.minute == now_pacific.minute
+    True
+
     """
-    Replaces the timezone on a datetime object directly (leaving
-    the year, month, day, hour, minute, second, micro, etc... alone).
-    Note: this changes the instant to which this dt refers.
+
+    # This doesn't work, tz requires a timezone naive dt.  Two options
+    # here:
+    #     1. Use strip_timezone and try again.
+    #     2. Replace the timezone on your dt object via replace_timezone.
+    #        Be aware that this changes the instant to which the dt refers
+    #        and, further, can introduce weirdness like UTC offsets that
+    #        are weird (e.g. not an even multiple of an hour, etc...)
+    if is_timezone_aware(dt):
+        if dt.tzinfo == tz:
+            return dt
+        raise Exception(
+            f'{dt} is already timezone aware; use replace_timezone or translate_timezone '
+            + 'depending on the semantics you want.  See the pydocs / code.'
+        )
+    return dt.replace(tzinfo=tz)
+
+
+def replace_timezone(dt: datetime.datetime, tz: Optional[datetime.tzinfo]) -> datetime.datetime:
+    """Replaces the timezone on a timezone aware datetime object directly
+    (leaving the year, month, day, hour, minute, second, micro,
+    etc... alone).
+
+    Works with timezone aware and timezone naive dts but for the
+    latter it is probably better to use add_timezone or just create it
+    with a tz parameter.  Using this can have weird side effects like
+    UTC offsets that are not an even multiple of an hour, etc...
+
+    .. warning::
+
+        This changes the instant to which this dt refers.
 
     >>> from pytz import UTC
     >>> d = now_pacific()
@@ -56,37 +152,75 @@ def replace_timezone(dt: datetime.datetime,
     True
 
     """
-    return datetime.datetime(
-        dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond,
-        tzinfo=tz
-    )
+    if is_timezone_aware(dt):
+        logger.warning(
+            '%s already has a timezone; klobbering it anyway.\n  Be aware that this operation changed the instant to which the object refers.',
+            dt,
+        )
+        return datetime.datetime(
+            dt.year,
+            dt.month,
+            dt.day,
+            dt.hour,
+            dt.minute,
+            dt.second,
+            dt.microsecond,
+            tzinfo=tz,
+        )
+    else:
+        if tz:
+            return add_timezone(dt, tz)
+        else:
+            return dt
+
+
+def replace_time_timezone(t: datetime.time, tz: datetime.tzinfo) -> datetime.time:
+    """Replaces the timezone on a datetime.time directly without performing
+    any translation.
 
+    .. warning::
 
-def translate_timezone(dt: datetime.datetime,
-                       tz: datetime.tzinfo) -> datetime.datetime:
+        Note that, as above, this will change the instant to
+        which the time refers.
+
+    >>> t = datetime.time(8, 15, 12, 0, pytz.UTC)
+    >>> t.tzname()
+    'UTC'
+
+    >>> t = replace_time_timezone(t, pytz.timezone('US/Pacific'))
+    >>> t.tzname()
+    'US/Pacific'
+    """
+    return t.replace(tzinfo=tz)
+
+
+def translate_timezone(dt: datetime.datetime, tz: datetime.tzinfo) -> datetime.datetime:
     """
     Translates dt into a different timezone by adjusting the year, month,
     day, hour, minute, second, micro, etc... appropriately.  The returned
     dt is the same instant in another timezone.
 
-    >>> from pytz import UTC
+    >>> import pytz
     >>> d = now_pacific()
     >>> d.tzinfo.tzname(d)[0]     # Note: could be PST or PDT
     'P'
     >>> h = d.hour
-    >>> o = translate_timezone(d, UTC)
-    >>> o.tzinfo.tzname(o)
-    'UTC'
+    >>> o = translate_timezone(d, pytz.timezone('US/Eastern'))
+    >>> o.tzinfo.tzname(o)[0]     # Again, could be EST or EDT
+    'E'
     >>> o.hour == h
     False
-
+    >>> expected = h + 3          # Three hours later in E?T than P?T
+    >>> expected = expected % 24  # Handle edge case
+    >>> expected == o.hour
+    True
     """
-    return dt.replace(tzinfo=None).astimezone(tz=tz)
+    return dt.replace().astimezone(tz=tz)
 
 
 def now() -> datetime.datetime:
     """
-    What time is it?  Result returned in UTC
+    What time is it?  Result is a timezone naive datetime.
     """
     return datetime.datetime.now()
 
@@ -107,16 +241,45 @@ def date_to_datetime(date: datetime.date) -> datetime.datetime:
     datetime.datetime(2021, 12, 25, 0, 0)
 
     """
-    return datetime.datetime(
-        date.year,
-        date.month,
-        date.day,
-        0, 0, 0, 0
-    )
+    return datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0)
 
 
-def date_and_time_to_datetime(date: datetime.date,
-                              time: datetime.time) -> datetime.datetime:
+def time_to_datetime_today(time: datetime.time) -> datetime.datetime:
+    """
+    Given a time, returns that time as a datetime with a date component
+    set based on the current date.  If the time passed is timezone aware,
+    the resulting datetime will also be (and will use the same tzinfo).
+    If the time is timezone naive, the datetime returned will be too.
+
+    >>> t = datetime.time(13, 14, 0)
+    >>> d = now_pacific().date()
+    >>> dt = time_to_datetime_today(t)
+    >>> dt.date() == d
+    True
+
+    >>> dt.time() == t
+    True
+
+    >>> dt.tzinfo == t.tzinfo
+    True
+
+    >>> dt.tzinfo == None
+    True
+
+    >>> t = datetime.time(8, 15, 12, 0, pytz.UTC)
+    >>> t.tzinfo == None
+    False
+
+    >>> dt = time_to_datetime_today(t)
+    >>> dt.tzinfo == None
+    False
+
+    """
+    tz = time.tzinfo
+    return datetime.datetime.combine(now_pacific(), time, tz)
+
+
+def date_and_time_to_datetime(date: datetime.date, time: datetime.time) -> datetime.datetime:
     """
     Given a date and time, merge them and return a datetime.
 
@@ -139,9 +302,10 @@ def date_and_time_to_datetime(date: datetime.date,
 
 
 def datetime_to_date_and_time(
-        dt: datetime.datetime
+    dt: datetime.datetime,
 ) -> Tuple[datetime.date, datetime.time]:
-    """Return the component date and time objects of a datetime.
+    """Return the component date and time objects of a datetime in a
+    Tuple given a datetime.
 
     >>> import datetime
     >>> dt = datetime.datetime(2021, 12, 25, 12, 30)
@@ -156,7 +320,7 @@ def datetime_to_date_and_time(
 
 
 def datetime_to_date(dt: datetime.datetime) -> datetime.date:
-    """Return the date part of a datetime.
+    """Return just the date part of a datetime.
 
     >>> import datetime
     >>> dt = datetime.datetime(2021, 12, 25, 12, 30)
@@ -168,7 +332,7 @@ def datetime_to_date(dt: datetime.datetime) -> datetime.date:
 
 
 def datetime_to_time(dt: datetime.datetime) -> datetime.time:
-    """Return the time part of a datetime.
+    """Return just the time part of a datetime.
 
     >>> import datetime
     >>> dt = datetime.datetime(2021, 12, 25, 12, 30)
@@ -179,8 +343,9 @@ def datetime_to_time(dt: datetime.datetime) -> datetime.time:
     return datetime_to_date_and_time(dt)[1]
 
 
-class TimeUnit(enum.Enum):
+class TimeUnit(enum.IntEnum):
     """An enum to represent units with which we can compute deltas."""
+
     MONDAYS = 0
     TUESDAYS = 1
     WEDNESDAYS = 2
@@ -199,22 +364,18 @@ class TimeUnit(enum.Enum):
 
     @classmethod
     def is_valid(cls, value: Any):
-        if type(value) is int:
-            return value in cls._value2member_map_
-        elif type(value) is TimeUnit:
-            return value.value in cls._value2member_map_
-        elif type(value) is str:
-            return value in cls._member_names_
+        if isinstance(value, int):
+            return cls(value) is not None
+        elif isinstance(value, TimeUnit):
+            return cls(value.value) is not None
+        elif isinstance(value, str):
+            return cls.__members__[value] is not None
         else:
             print(type(value))
             return False
 
 
-def n_timeunits_from_base(
-    count: int,
-    unit: TimeUnit,
-    base: datetime.datetime
-) -> datetime.datetime:
+def n_timeunits_from_base(count: int, unit: TimeUnit, base: datetime.datetime) -> datetime.datetime:
     """Return a datetime that is N units before/after a base datetime.
     e.g.  3 Wednesdays from base datetime, 2 weeks from base date, 10
     years before base datetime, 13 minutes after base datetime, etc...
@@ -255,6 +416,17 @@ def n_timeunits_from_base(
     >>> n_timeunits_from_base(50, TimeUnit.SECONDS, base)
     datetime.datetime(2021, 9, 10, 11, 25, 41, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200)))
 
+    Next month corner case -- it will try to make Feb 31, 2022 then count
+    backwards.
+    >>> base = string_to_datetime("2022/01/31 11:24:51AM-0700")[0]
+    >>> n_timeunits_from_base(1, TimeUnit.MONTHS, base)
+    datetime.datetime(2022, 2, 28, 11, 24, 51, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200)))
+
+    Last month with the same corner case
+    >>> base = string_to_datetime("2022/03/31 11:24:51AM-0700")[0]
+    >>> n_timeunits_from_base(-1, TimeUnit.MONTHS, base)
+    datetime.datetime(2022, 2, 28, 11, 24, 51, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200)))
+
     """
     assert TimeUnit.is_valid(unit)
     if count == 0:
@@ -293,12 +465,7 @@ def n_timeunits_from_base(
             base += timedelta
             if base.year != old_year:
                 skips = holidays.US(years=base.year).keys()
-            if (
-                    base.weekday() < 5 and
-                    datetime.date(base.year,
-                                  base.month,
-                                  base.day) not in skips
-            ):
+            if base.weekday() < 5 and datetime.date(base.year, base.month, base.day) not in skips:
                 count -= 1
         return base
 
@@ -317,16 +484,23 @@ def n_timeunits_from_base(
             new_month %= 12
             year_term += 1
         new_year = base.year + year_term
-        return datetime.datetime(
-            new_year,
-            new_month,
-            base.day,
-            base.hour,
-            base.minute,
-            base.second,
-            base.microsecond,
-            base.tzinfo,
-        )
+        day = base.day
+        while True:
+            try:
+                ret = datetime.datetime(
+                    new_year,
+                    new_month,
+                    day,
+                    base.hour,
+                    base.minute,
+                    base.second,
+                    base.microsecond,
+                    base.tzinfo,
+                )
+                break
+            except ValueError:
+                day -= 1
+        return ret
 
     # N years from base
     elif unit == TimeUnit.YEARS:
@@ -342,13 +516,17 @@ def n_timeunits_from_base(
             base.tzinfo,
         )
 
-    if unit not in set([TimeUnit.MONDAYS,
-                        TimeUnit.TUESDAYS,
-                        TimeUnit.WEDNESDAYS,
-                        TimeUnit.THURSDAYS,
-                        TimeUnit.FRIDAYS,
-                        TimeUnit.SATURDAYS,
-                        TimeUnit.SUNDAYS]):
+    if unit not in set(
+        [
+            TimeUnit.MONDAYS,
+            TimeUnit.TUESDAYS,
+            TimeUnit.WEDNESDAYS,
+            TimeUnit.THURSDAYS,
+            TimeUnit.FRIDAYS,
+            TimeUnit.SATURDAYS,
+            TimeUnit.SUNDAYS,
+        ]
+    ):
         raise ValueError(unit)
 
     # N weekdays from base (e.g. 4 wednesdays from today)
@@ -366,14 +544,14 @@ def n_timeunits_from_base(
 
 
 def get_format_string(
-        *,
-        date_time_separator=" ",
-        include_timezone=True,
-        include_dayname=False,
-        use_month_abbrevs=False,
-        include_seconds=True,
-        include_fractional=False,
-        twelve_hour=True,
+    *,
+    date_time_separator=" ",
+    include_timezone=True,
+    include_dayname=False,
+    use_month_abbrevs=False,
+    include_seconds=True,
+    include_fractional=False,
+    twelve_hour=True,
 ) -> str:
     """
     Helper to return a format string without looking up the documentation
@@ -446,22 +624,24 @@ def datetime_to_string(
         date_time_separator=date_time_separator,
         include_timezone=include_timezone,
         include_dayname=include_dayname,
+        use_month_abbrevs=use_month_abbrevs,
         include_seconds=include_seconds,
         include_fractional=include_fractional,
-        twelve_hour=twelve_hour)
+        twelve_hour=twelve_hour,
+    )
     return dt.strftime(fstring).strip()
 
 
 def string_to_datetime(
-        txt: str,
-        *,
-        date_time_separator=" ",
-        include_timezone=True,
-        include_dayname=False,
-        use_month_abbrevs=False,
-        include_seconds=True,
-        include_fractional=False,
-        twelve_hour=True,
+    txt: str,
+    *,
+    date_time_separator=" ",
+    include_timezone=True,
+    include_dayname=False,
+    use_month_abbrevs=False,
+    include_seconds=True,
+    include_fractional=False,
+    twelve_hour=True,
 ) -> Tuple[datetime.datetime, str]:
     """A nice way to convert a string into a datetime.  Returns both the
     datetime and the format string used to parse it.  Also consider
@@ -478,13 +658,12 @@ def string_to_datetime(
         date_time_separator=date_time_separator,
         include_timezone=include_timezone,
         include_dayname=include_dayname,
+        use_month_abbrevs=use_month_abbrevs,
         include_seconds=include_seconds,
         include_fractional=include_fractional,
-        twelve_hour=twelve_hour)
-    return (
-        datetime.datetime.strptime(txt, fstring),
-        fstring
+        twelve_hour=twelve_hour,
     )
+    return (datetime.datetime.strptime(txt, fstring), fstring)
 
 
 def timestamp() -> str:
@@ -651,7 +830,7 @@ def parse_duration(duration: str) -> int:
     return seconds
 
 
-def describe_duration(seconds: int, *, include_seconds = False) -> str:
+def describe_duration(seconds: int, *, include_seconds=False) -> str:
     """
     Describe a duration represented as a count of seconds nicely.
 
@@ -712,7 +891,7 @@ def describe_timedelta(delta: datetime.timedelta) -> str:
     '1 day, and 10 minutes'
 
     """
-    return describe_duration(delta.total_seconds())
+    return describe_duration(int(delta.total_seconds()))  # Note: drops milliseconds
 
 
 def describe_duration_briefly(seconds: int, *, include_seconds=False) -> str:
@@ -757,9 +936,10 @@ def describe_timedelta_briefly(delta: datetime.timedelta) -> str:
     '1d 10m'
 
     """
-    return describe_duration_briefly(delta.total_seconds())
+    return describe_duration_briefly(int(delta.total_seconds()))  # Note: drops milliseconds
 
 
 if __name__ == '__main__':
     import doctest
+
     doctest.testmod()