More work to improve documentation generated by sphinx. Also fixes

author Scott Gasch <scott@gasch.org>

Sun, 16 Oct 2022 18:48:07 +0000 (11:48 -0700)

committer Scott Gasch <scott@gasch.org>

Sun, 16 Oct 2022 18:48:07 +0000 (11:48 -0700)
author Scott Gasch <scott@gasch.org>
Sun, 16 Oct 2022 18:48:07 +0000 (11:48 -0700)
committer Scott Gasch <scott@gasch.org>
Sun, 16 Oct 2022 18:48:07 +0000 (11:48 -0700)
diff --git a/docs/conf.py b/docs/conf.py

index 90fa6a7fabbacae59d2a85a2742bcd8130b1b59c..b619fa0183d5596e92d5a5155eb551c5e4a282d0 100644 (file)
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -55,6 +55,9 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
  # a list of builtin themes.
  #
  html_theme = 'sphinx_rtd_theme'
+html_theme_options = {
+    'navigation_depth': 5,
+}
  
  # Add any paths that contain custom static files (such as style sheets) here,
  # relative to this directory. They are copied after the builtin static files,
@@ -64,8 +67,6 @@ html_static_path = ['_static']
  
  # Don't skip __init__()!
  def skip(app, what, name, obj, would_skip, options):
-    if name == "__repr__":
-        return False
      return would_skip
  
  
diff --git a/docs/pyutils.collectionz.rst b/docs/pyutils.collectionz.rst

index 60518dcc9812e8b95e2a513839bc5c002ac0d984..0b0123797f7cfa159ea5b0f58aebed67830a3347 100644 (file)
--- a/docs/pyutils.collectionz.rst
+++ b/docs/pyutils.collectionz.rst
@@ -1,42 +1,18 @@
  pyutils.collectionz package
  ===========================
  
+This subpackage contains some homegrown collections that try to emulate
+:mod:`collections` included in the Python standard library.  It ends
+with a 'z' so as not to collide with the standard library package.
+
  Submodules
  ----------
  
  pyutils.collectionz.bidict module
  ---------------------------------
  
-The bidict.BiDict class is a subclass of :py:class:`dict` that
-implements a bidirectional dictionary.  That is, it maps each key to a
-value in constant time and each value back to the one or more keys it
-is associated with in constant time.  It does this by simply storing
-the data twice.
-
-Sample usage::
-
-    # Initialize with a normal dict...
-    third_party_wierdos = BiDict({
-        'prometheus-fastapi-instrumentator': 'prometheus_fastapi_instrumentator',
-        'scikit-learn': 'sklearn',
-        'antlr4-python3-runtime' : 'antlr4',
-        'python-dateutil': 'dateutil',
-        'speechrecognition': 'speech_recognition',
-        'beautifulsoup4': 'bs4',
-        'python-dateutil': 'dateutil',
-        'homeassistant-api': 'homeassistant_api',
-    })
-
-    # Use in one direction:
-    x = third_party_wierdos['scikit-learn']
-
-    # Use in opposite direction:
-    y = third_party_wierdos.inverse['python_dateutil']
-
-    # Note: type(y) is List since one value may map back to multiple keys.
-
-
  .. automodule:: pyutils.collectionz.bidict
+   :imported-members:
     :members:
     :undoc-members:
     :show-inheritance:
@@ -44,8 +20,6 @@ Sample usage::
  pyutils.collectionz.bst module
  ------------------------------
  
-The bst.BinarySearchTree class is a binary search tree container.
-
  .. automodule:: pyutils.collectionz.bst
     :members:
     :undoc-members:
@@ -70,13 +44,6 @@ size.
  pyutils.collectionz.trie module
  -------------------------------
  
-The trie.Trie class is a Trie or prefix tree.  It can be used with
-arbitrary sequences as keys and stores its values in a tree with paths
-determined by the sequence determined by each key.  Thus, it can
-determine whether a value is contained in the tree via a simple
-traversal in linear time and can also check whether a key-prefix is
-present in the tree in linear time.
-
  .. automodule:: pyutils.collectionz.trie
     :members:
     :undoc-members:
diff --git a/docs/pyutils.compress.rst b/docs/pyutils.compress.rst

index 7957494e773138987ff211aed2045132a97bb944..67a2eb827ed9f28b706995fb2c52bc915352dc6d 100644 (file)
--- a/docs/pyutils.compress.rst
+++ b/docs/pyutils.compress.rst
@@ -1,17 +1,14 @@
  pyutils.compress package
  ========================
  
+This subpackage includes code related to data compression.
+
  Submodules
  ----------
  
  pyutils.compress.letter\_compress module
  ----------------------------------------
  
-This is a simple, honestly, toy compression scheme that uses a custom
-alphabet of 32 characters which can each be represented in six bits
-instead of eight.  It therefore reduces the size of data composed of
-only those letters by 25% without loss.
-
  .. automodule:: pyutils.compress.letter_compress
     :members:
     :undoc-members:
diff --git a/docs/pyutils.datetimez.rst b/docs/pyutils.datetimez.rst

index fe7cdd18dd6944ec4b3faca97a38ebc2a7f3234e..302efedd8d20b2df7f8ec5ce0045cd29abfc6e8e 100644 (file)
--- a/docs/pyutils.datetimez.rst
+++ b/docs/pyutils.datetimez.rst
@@ -1,14 +1,14 @@
-yutils.datetimez package
+pyutils.datetimez package
  =========================
  
+This subpackage contains code for parsing and manipulating dates, times and datetimes.
+
  Submodules
  ----------
  
  pyutils.datetimez.constants module
  ----------------------------------
  
-A set of date and time related constants.
-
  .. automodule:: pyutils.datetimez.constants
     :members:
     :undoc-members:
@@ -17,20 +17,16 @@ A set of date and time related constants.
  pyutils.datetimez.dateparse\_utils module
  -----------------------------------------
  
-The dateparse\_utils.DateParser class uses an English language grammar
-(see dateparse\_utils.g4) to parse free form English text into a Python
-datetime.  It can handle somewhat complex constructs such as: "20 days
-from next Wed at 3pm", "last Christmas", and "The 2nd Sunday in May,
-2022".  See the dateparse_utils_test.py for more examples.
-
-This code is used by other code in the pyutils library; for example,
-when using argparse_utils.py to pass an argument of type datetime it
-allows the user to use free form english expressions.
-
  .. automodule:: pyutils.datetimez.dateparse_utils
     :members:
     :undoc-members:
-   :exclude-members: enterAmpm,
+   :exclude-members: PARSE_TYPE_BASE_AND_OFFSET_EXPR,
+                     PARSE_TYPE_BASE_AND_OFFSET_TIME_EXPR,
+                     PARSE_TYPE_SINGLE_DATE_EXPR,
+                     PARSE_TYPE_SINGLE_TIME_EXPR,
+                     RaisingErrorListener,
+                     debug_parse,
+                     enterAmpm,
                       enterBaseAndOffsetDateExpr,
                       enterBaseAndOffsetTimeExpr,
                       enterBaseDate,
@@ -147,32 +143,9 @@ allows the user to use free form english expressions.
                       visitErrorNode,
                       visitTerminal
  
-pyutils.datetimez.dateparse\_utilsLexer module
-----------------------------------------------
-
-This code is auto-generated by ANTLR from the dateparse\_utils.g4
-grammar.
-
-pyutils.datetimez.dateparse\_utilsListener module
--------------------------------------------------
-
-This code is auto-generated by ANTLR from the dateparse\_utils.g4
-grammar.
-
-pyutils.datetimez.dateparse\_utilsParser module
------------------------------------------------
-
-This code is auto-generated by ANTLR from the dateparse\_utils.g4
-grammar.
-
  pyutils.datetimez.datetime\_utils module
  ----------------------------------------
  
-This is a set of utilities for dealing with Python datetimes and
-dates.  It supports operations such as checking timezones,
-manipulating timezones, easy formatting, and using offsets with
-datetimes.
-
  .. automodule:: pyutils.datetimez.datetime_utils
     :members:
     :undoc-members:
@@ -181,8 +154,6 @@ datetimes.
  Module contents
  ---------------
  
-This module contains utilities for dealing with Python datetimes.
-
  .. automodule:: pyutils.datetimez
     :members:
     :undoc-members:
diff --git a/docs/pyutils.files.rst b/docs/pyutils.files.rst

index c1152d8e3ea17c8e9a9dbb803c3c34dfaa15ccd6..40ced78404b77c97e9573c5aca7d85ed9db8bf80 100644 (file)
--- a/docs/pyutils.files.rst
+++ b/docs/pyutils.files.rst
@@ -1,21 +1,14 @@
  pyutils.files package
  =====================
  
+This subpackage contains utilities for dealing with files on disk.
+
  Submodules
  ----------
  
  pyutils.files.directory\_filter module
  --------------------------------------
  
-This module contains two classes meant to help reduce unnecessary disk
-I/O operations:
-
-The first determines when the contents of a file held in memory are
-identical to the file copy already on disk.  The second is basically
-the same except for the caller need not indicate the name of the disk
-file because it will check the memory file's signature against a set
-of signatures of all files in a particular directory on disk.
-
  .. automodule:: pyutils.files.directory_filter
     :members:
     :undoc-members:
@@ -24,11 +17,6 @@ of signatures of all files in a particular directory on disk.
  pyutils.files.file\_utils module
  --------------------------------
  
-This is a grab bag of file-related utilities.  It has code to, for example,
-read files transforming the text as its read, normalize pathnames, strip
-extensions, read and manipulate atimes/mtimes/ctimes, compute a signature
-based on a file's contents, traverse the file system recursively, etc...
-
  .. automodule:: pyutils.files.file_utils
     :members:
     :undoc-members:
@@ -37,14 +25,6 @@ based on a file's contents, traverse the file system recursively, etc...
  pyutils.files.lockfile module
  -----------------------------
  
-This is a lockfile implementation I created for use with cronjobs on
-my machine to prevent multiple copies of a job from running in
-parallel.  When one job is running this code keeps a file on disk to
-indicate a lock is held.  Other copies will fail to start if they
-detect this lock until the lock is released.  There are provisions in
-the code for timing out locks, cleaning up a lock when a signal is
-received, gracefully retrying lock acquisition on failure, etc...
-
  .. automodule:: pyutils.files.lockfile
     :members:
     :undoc-members:
@@ -53,8 +33,6 @@ received, gracefully retrying lock acquisition on failure, etc...
  Module contents
  ---------------
  
-This module contains utilities for dealing with files on disk.
-
  .. automodule:: pyutils.files
     :members:
     :undoc-members:
diff --git a/docs/pyutils.parallelize.rst b/docs/pyutils.parallelize.rst

index 32cdc255a0753b79f53dc9d4128d9caba7ee60d0..722d4746c9def612643accf5758cc907adabcb51 100644 (file)
--- a/docs/pyutils.parallelize.rst
+++ b/docs/pyutils.parallelize.rst
@@ -1,29 +1,42 @@
  pyutils.parallelize package
  ===========================
  
+This package contains code related to parallelization including some
+utilities (:file:`thread_utils.py`) and a frameworks for simple
+parallelization (everything else).
+
  Submodules
  ----------
  
  pyutils.parallelize.deferred\_operand module
  --------------------------------------------
  
-DeferredOperand is the base class for SmartFuture.
-
  .. automodule:: pyutils.parallelize.deferred_operand
     :members:
     :undoc-members:
+   :exclude-members: __repr__,
+                     and_,
+                     bool,
+                     delitem,
+                     getitem,
+                     invert,
+                     is_,
+                     is_not,
+                     lshift,
+                     matmul,
+                     mod,
+                     neg,
+                     or_,
+                     pos,
+                     rshift,
+                     setitem,
+                     truth,
+                     xor
     :show-inheritance:
  
  pyutils.parallelize.executors module
  ------------------------------------
  
-This module defines three executors: one for threads in the same
-process, one for separate processes on the same machine and the third
-for separate processes on remote machines.  Each can be used via the
-@parallelize decorator.  These executor pools are automatically
-cleaned up at program exit.
-
-
  .. automodule:: pyutils.parallelize.executors
     :members:
     :undoc-members:
@@ -32,8 +45,6 @@ cleaned up at program exit.
  pyutils.parallelize.parallelize module
  --------------------------------------
  
-This module defines a decorator that can be used for simple parallelization.
-
  .. automodule:: pyutils.parallelize.parallelize
     :members:
     :undoc-members:
@@ -60,8 +71,6 @@ Also contains some utilility code for waiting for one/many futures.
  pyutils.parallelize.thread\_utils module
  ----------------------------------------
  
-Simple utils that deal with threads.
-
  .. automodule:: pyutils.parallelize.thread_utils
     :members:
     :undoc-members:
diff --git a/docs/pyutils.rst b/docs/pyutils.rst

index 38224ac443313c5bb963f98c63f7b0b1e3aecc8c..57a76f0a973c486c64631b936f0ef207ee0cb143 100644 (file)
--- a/docs/pyutils.rst
+++ b/docs/pyutils.rst
@@ -58,7 +58,7 @@ Subpackages
  -----------
  
  .. toctree::
-   :maxdepth: 4
+   :maxdepth: 5
     :name: mastertoc
  
     pyutils.collectionz
diff --git a/docs/pyutils.security.rst b/docs/pyutils.security.rst

index 16af04926d314afcd3afc17cf967bcb28763dda5..ba0a518d90e6a050d62cb93ad3d54dd663e5512e 100644 (file)
--- a/docs/pyutils.security.rst
+++ b/docs/pyutils.security.rst
@@ -1,81 +1,6 @@
  pyutils.security package
  ========================
  
-Right now this package only contains an implementation that allows you to
-define and evaluate Access Control Lists (ACLs) easily.  For example::
-
-        even = acl.SetBasedACL(
-            allow_set=set([2, 4, 6, 8, 10]),
-            deny_set=set([1, 3, 5, 7, 9]),
-            order_to_check_allow_deny=acl.Order.ALLOW_DENY,
-            default_answer=False,
-        )
-        self.assertTrue(even(2))
-        self.assertFalse(even(3))
-        self.assertFalse(even(-4))
-
-ACLs can also be defined based on other criteria, for example::
-
-        a_or_b = acl.StringWildcardBasedACL(
-            allowed_patterns=['a*', 'b*'],
-            order_to_check_allow_deny=acl.Order.ALLOW_DENY,
-            default_answer=False,
-        )
-        self.assertTrue(a_or_b('aardvark'))
-        self.assertTrue(a_or_b('baboon'))
-        self.assertFalse(a_or_b('cheetah'))
-
-Or::
-
-        weird = acl.StringREBasedACL(
-            denied_regexs=[re.compile('^a.*a$'), re.compile('^b.*b$')],
-            order_to_check_allow_deny=acl.Order.DENY_ALLOW,
-            default_answer=True,
-        )
-        self.assertTrue(weird('aardvark'))
-        self.assertFalse(weird('anaconda'))
-        self.assertFalse(weird('blackneb'))
-        self.assertTrue(weird('crow'))
-
-There are implementations for wildcards, sets, regular expressions,
-allow lists, deny lists, sequences of user defined predicates, etc...
-You can also just subclass the base :class:`SimpleACL` interface to
-define your own ACLs easily.  Its __call__ method simply needs to
-decide whether an item is allowed or denied.
-
-Once a :class:`SimpleACL` is defined, it can be used in :class:`CompoundACLs`::
-
-        a_b_c = acl.StringWildcardBasedACL(
-            allowed_patterns=['a*', 'b*', 'c*'],
-            order_to_check_allow_deny=acl.Order.ALLOW_DENY,
-            default_answer=False,
-        )
-        c_d_e = acl.StringWildcardBasedACL(
-            allowed_patterns=['c*', 'd*', 'e*'],
-            order_to_check_allow_deny=acl.Order.ALLOW_DENY,
-            default_answer=False,
-        )
-        conjunction = acl.AllCompoundACL(
-            subacls=[a_b_c, c_d_e],
-            order_to_check_allow_deny=acl.Order.ALLOW_DENY,
-            default_answer=False,
-        )
-        self.assertFalse(conjunction('aardvark'))
-        self.assertTrue(conjunction('caribou'))
-        self.assertTrue(conjunction('condor'))
-        self.assertFalse(conjunction('eagle'))
-        self.assertFalse(conjunction('newt'))
-
-a :class:`CompoundACL` can also be used inside another :class:`CompoundACL`
-so this should be a flexible framework when defining complex access control
-requirements:
-
-There are two flavors of :class:`CompoundACLs`:
-:class:`AllCompoundACL` and :class:`AnyCompoundAcl`.  The former only
-admits an item if all of its sub-acls admit it and the latter will
-admit an item if any of its sub-acls admit it.:
-
-
  Submodules
  ----------
  
diff --git a/docs/pyutils.typez.rst b/docs/pyutils.typez.rst

index 5f33cd42a0b750000c5ff1b5169dde0c48ecb2fc..5957ef4055df42e81e271ace0133d2662bf8217d 100644 (file)
--- a/docs/pyutils.typez.rst
+++ b/docs/pyutils.typez.rst
@@ -10,37 +10,13 @@ pyutils.typez.centcount module
  .. automodule:: pyutils.typez.centcount
     :members:
     :undoc-members:
+   :exclude-members:  CENTCOUNT_RE,
+                      CURRENCY_RE
     :show-inheritance:
  
  pyutils.typez.histogram module
  ------------------------------
  
-A Histogram helper class.  Creates outputs like this::
-
-      [4..5): ▏                                                     ( 0.16% n=1)
-      [5..6): ██▍                                                   ( 0.64% n=4)
-      [6..7): ██████▏                                               ( 1.60% n=10)
-      [7..8): ████████████▍                                         ( 3.20% n=20)
-      [8..9): █████████████████████▊                                ( 5.60% n=35)
-     [9..10): ████████████████████████████████▍                     ( 8.32% n=52)
-    [10..11): ██████████████████████████████████████████▍           (10.88% n=68)
-    [11..12): █████████████████████████████████████████████████▉    (12.80% n=80)
-    [12..13): ████████████████████████████████████████████████████▉ (13.60% n=85)
-    [13..14): █████████████████████████████████████████████████▉    (12.80% n=80)
-    [14..15): ██████████████████████████████████████████▍           (10.88% n=68)
-    [15..16): ████████████████████████████████▍                     ( 8.32% n=52)
-    [16..17): █████████████████████▊                                ( 5.60% n=35)
-    [17..18): ████████████▍                                         ( 3.20% n=20)
-    [18..19): ██████▏                                               ( 1.60% n=10)
-    [19..20): ██▍                                                   ( 0.64% n=4)
-    [20..21): ▏                                                     ( 0.16% n=1)
-    --------------------------------------------------------------------------------
-     [4..21):                                                         pop(Σn)=625
-                                                                      mean(x̄)=12.000
-                                                                  median(p50)=12.000
-                                                                     mode(Mo)=12.000
-                                                                     stdev(σ)=0.113
-
  .. automodule:: pyutils.typez.histogram
     :members:
     :undoc-members:
@@ -52,6 +28,8 @@ pyutils.typez.money module
  .. automodule:: pyutils.typez.money
     :members:
     :undoc-members:
+   :exclude-members:  CENTCOUNT_RE,
+                      CURRENCY_RE
     :show-inheritance:
  
  pyutils.typez.rate module
diff --git a/src/pyutils/ansi.py b/src/pyutils/ansi.py

index b29278be09069134fa9f0acdbc29b7a5f78e988a..00323baaf8f71d9bcaff6b3bb86b4f59163fe2b3 100755 (executable)
--- a/src/pyutils/ansi.py
+++ b/src/pyutils/ansi.py
@@ -15,7 +15,7 @@ import logging
  import re
  import sys
  from abc import abstractmethod
-from typing import Any, Callable, Dict, Iterable, Literal, Optional, Tuple
+from typing import Callable, Dict, Iterable, Optional, Tuple
  
  from overrides import overrides
  
@@ -2255,10 +2255,9 @@ class _StdoutInterceptor(io.TextIOBase, contextlib.AbstractContextManager):
          sys.stdout = self
          return self
  
-    def __exit__(self, *args) -> Literal[False]:
+    def __exit__(self, *args) -> None:
          sys.stdout = self.saved_stdout
          print(self.buf)
-        return False
  
  
  class ProgrammableColorizer(_StdoutInterceptor):
@@ -2270,10 +2269,10 @@ class ProgrammableColorizer(_StdoutInterceptor):
      could just as easily insert escape sequences returned from
      :py:meth:`fg`, :py:meth:`bg`, and :py:meth:`reset`.
  
-    >>> def red(match: re.Match) -> str:
+    >>> def red(match: re.Match, s: str) -> str:
      ...     return '[RED]'
  
-    >>> def reset(match: re.Match) -> str:
+    >>> def reset(match: re.Match, s: str) -> str:
      ...     return '[RESET]'
  
      >>> with ProgrammableColorizer( [ (re.compile('^[^ ]+'), red),
@@ -2285,7 +2284,7 @@ class ProgrammableColorizer(_StdoutInterceptor):
  
      def __init__(
          self,
-        patterns: Iterable[Tuple[re.Pattern, Callable[[Any, re.Pattern], str]]],
+        patterns: Iterable[Tuple[re.Pattern, Callable[[re.Match[str], str], str]]],
      ):
          """
          Setup the programmable colorizing context; tell it how to operate.
diff --git a/src/pyutils/collectionz/bidict.py b/src/pyutils/collectionz/bidict.py

index 000fdb36f47f54929db81124f5001aee7762c18b..77d11e08daaecb8dd93b5c6f447f6f101c46a848 100644 (file)
--- a/src/pyutils/collectionz/bidict.py
+++ b/src/pyutils/collectionz/bidict.py
@@ -2,7 +2,36 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""A bidirectional dictionary."""
+"""
+The :class:`pyutils.collectionz.bidict.BiDict` class is a subclass
+of :py:class:`dict` that implements a bidirectional dictionary.  That
+is, it maps each key to a value in constant time and each value back
+to the one or more keys it is associated with in constant time.  It
+does this by simply storing the data twice.
+
+Sample usage::
+
+    # Initialize with a normal dict...
+    third_party_wierdos = BiDict({
+        'prometheus-fastapi-instrumentator': 'prometheus_fastapi_instrumentator',
+        'scikit-learn': 'sklearn',
+        'antlr4-python3-runtime' : 'antlr4',
+        'python-dateutil': 'dateutil',
+        'speechrecognition': 'speech_recognition',
+        'beautifulsoup4': 'bs4',
+        'python-dateutil': 'dateutil',
+        'homeassistant-api': 'homeassistant_api',
+    })
+
+    # Use in one direction:
+    x = third_party_wierdos['scikit-learn']
+
+    # Use in opposite direction:
+    y = third_party_wierdos.inverse['python_dateutil']
+
+    # Note: type(y) is List since one value may map back to multiple keys.
+
+"""
  
  
  class BiDict(dict):
@@ -51,3 +80,9 @@ class BiDict(dict):
          if value in self.inverse and not self.inverse[value]:
              del self.inverse[value]
          super().__delitem__(key)
+
+
+if __name__ == '__main__':
+    import doctest
+
+    doctest.testmod()
diff --git a/src/pyutils/collectionz/bst.py b/src/pyutils/collectionz/bst.py

index 2e5e3ce95599811aecb553fe5b18e2ebd97c1434..4c0bacdd051374a3f700ceba33b4beaad143b956 100644 (file)
--- a/src/pyutils/collectionz/bst.py
+++ b/src/pyutils/collectionz/bst.py
@@ -2,16 +2,20 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""A binary search tree."""
+"""A binary search tree implementation."""
  
  from typing import Any, Generator, List, Optional
  
  
  class Node(object):
      def __init__(self, value: Any) -> None:
-        """Note that value can be anything as long as it is
-        comparable.  Check out @functools.total_ordering.
+        """
+        A BST node.  Note that value can be anything as long as it
+        is comparable.  Check out :meth:`functools.total_ordering`
+        (https://docs.python.org/3/library/functools.html#functools.total_ordering)
  
+        Args:
+            value: a reference to the value of the node.
          """
          self.left: Optional[Node] = None
          self.right: Optional[Node] = None
@@ -25,14 +29,20 @@ class BinarySearchTree(object):
          self.traverse = None
  
      def get_root(self) -> Optional[Node]:
-        """:returns the root of the BST."""
+        """
+        Returns:
+            The root of the BST
+        """
  
          return self.root
  
-    def insert(self, value: Any):
+    def insert(self, value: Any) -> None:
          """
          Insert something into the tree.
  
+        Args:
+            value: the value to be inserted.
+
          >>> t = BinarySearchTree()
          >>> t.insert(10)
          >>> t.insert(20)
@@ -99,6 +109,7 @@ class BinarySearchTree(object):
      def _parent_path(
          self, current: Optional[Node], target: Node
      ) -> List[Optional[Node]]:
+        """Internal helper"""
          if current is None:
              return [None]
          ret: List[Optional[Node]] = [current]
@@ -113,11 +124,20 @@ class BinarySearchTree(object):
              return ret
  
      def parent_path(self, node: Node) -> List[Optional[Node]]:
-        """Return a list of nodes representing the path from
-        the tree's root to the node argument.  If the node does
-        not exist in the tree for some reason, the last element
-        on the path will be None but the path will indicate the
-        ancestor path of that node were it inserted.
+        """Get a node's parent path.
+
+        Args:
+            node: the node to check
+
+        Returns:
+            a list of nodes representing the path from
+            the tree's root to the node.
+
+        .. note::
+
+            If the node does not exist in the tree, the last element
+            on the path will be None but the path will indicate the
+            ancestor path of that node were it to be inserted.
  
          >>> t = BinarySearchTree()
          >>> t.insert(50)
@@ -162,6 +182,13 @@ class BinarySearchTree(object):
          """
          Delete an item from the tree and preserve the BST property.
  
+        Args:
+            value: the value of the node to be deleted.
+
+        Returns:
+            True if the value was found and its associated node was
+            successfully deleted and False otherwise.
+
          >>> t = BinarySearchTree()
          >>> t.insert(50)
          >>> t.insert(75)
@@ -288,7 +315,8 @@ class BinarySearchTree(object):
  
      def __len__(self):
          """
-        Returns the count of items in the tree.
+        Returns:
+            The count of items in the tree.
  
          >>> t = BinarySearchTree()
          >>> len(t)
@@ -314,7 +342,8 @@ class BinarySearchTree(object):
  
      def __contains__(self, value: Any) -> bool:
          """
-        Returns True if the item is in the tree; False otherwise.
+        Returns:
+            True if the item is in the tree; False otherwise.
          """
          return self.__getitem__(value) is not None
  
@@ -341,7 +370,9 @@ class BinarySearchTree(object):
  
      def iterate_preorder(self):
          """
-        Yield the tree's items in a preorder traversal sequence.
+        Returns:
+            A Generator that yields the tree's items in a
+            preorder traversal sequence.
  
          >>> t = BinarySearchTree()
          >>> t.insert(50)
@@ -366,7 +397,9 @@ class BinarySearchTree(object):
  
      def iterate_inorder(self):
          """
-        Yield the tree's items in a preorder traversal sequence.
+        Returns:
+            A Generator that yield the tree's items in a preorder
+            traversal sequence.
  
          >>> t = BinarySearchTree()
          >>> t.insert(50)
@@ -401,7 +434,9 @@ class BinarySearchTree(object):
  
      def iterate_postorder(self):
          """
-        Yield the tree's items in a preorder traversal sequence.
+        Returns:
+            A Generator that yield the tree's items in a preorder
+            traversal sequence.
  
          >>> t = BinarySearchTree()
          >>> t.insert(50)
@@ -434,7 +469,9 @@ class BinarySearchTree(object):
  
      def iterate_leaves(self):
          """
-        Iterate only the leaf nodes in the tree.
+        Returns:
+            A Gemerator that yielde only the leaf nodes in the
+            tree.
  
          >>> t = BinarySearchTree()
          >>> t.insert(50)
@@ -465,7 +502,12 @@ class BinarySearchTree(object):
  
      def iterate_nodes_by_depth(self, depth: int) -> Generator[Node, None, None]:
          """
-        Iterate only the leaf nodes in the tree.
+        Args:
+            depth: the desired depth
+
+        Returns:
+            A Generator that yields nodes at the prescribed depth in
+            the tree.
  
          >>> t = BinarySearchTree()
          >>> t.insert(50)
@@ -490,7 +532,11 @@ class BinarySearchTree(object):
  
      def get_next_node(self, node: Node) -> Node:
          """
-        Given a tree node, get the next greater node in the tree.
+        Args:
+            node: the node whose next greater successor is desired
+
+        Returns:
+            Given a tree node, returns the next greater node in the tree.
  
          >>> t = BinarySearchTree()
          >>> t.insert(50)
@@ -547,8 +593,9 @@ class BinarySearchTree(object):
  
      def depth(self) -> int:
          """
-        Returns the max height (depth) of the tree in plies (edge distance
-        from root).
+        Returns:
+            The max height (depth) of the tree in plies (edge distance
+            from root).
  
          >>> t = BinarySearchTree()
          >>> t.depth()
@@ -609,7 +656,8 @@ class BinarySearchTree(object):
  
      def __repr__(self):
          """
-        Draw the tree in ASCII.
+        Returns:
+            An ASCII string representation of the tree.
  
          >>> t = BinarySearchTree()
          >>> t.insert(50)
@@ -643,3 +691,9 @@ class BinarySearchTree(object):
          )
          ret += self.repr_traverse('', pointer_right, self.root.right, False)
          return ret
+
+
+if __name__ == '__main__':
+    import doctest
+
+    doctest.testmod()
diff --git a/src/pyutils/collectionz/trie.py b/src/pyutils/collectionz/trie.py

index 762ae3a992fcc860f69a1e2897d73ea1af17c4cb..0454ffa57cfa274faa5dd770b7e001478844407a 100644 (file)
--- a/src/pyutils/collectionz/trie.py
+++ b/src/pyutils/collectionz/trie.py
@@ -2,15 +2,23 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""This is a Trie class, see: https://en.wikipedia.org/wiki/Trie.
+"""This module contains the implementation of a Trie tree (or prefix
+tree).  See: https://en.wikipedia.org/wiki/Trie.
  
-It attempts to follow Pythonic container patterns.  See doctests
-for examples.
+It can be used with arbitrary sequences as keys and stores its values
+in a tree with paths determined by the sequence determined by each
+keys' sequences.  Thus, it can determine whether a given value is
+contained in the tree via a simple traversal in :math:`O(n)` where n
+is the number of steps in the item's sequence and can also check
+whether a key-prefix is present in the tree in :math:`O(n)` time.
+
+Given a node in the BST, it is easy to determine all items that are
+stored beneath that node.  See examples below.
  
  """
  
  import logging
-from typing import Any, Generator, Sequence
+from typing import Any, Dict, Generator, Sequence
  
  logger = logging.getLogger(__name__)
  
@@ -21,25 +29,28 @@ class Trie(object):
  
      It attempts to follow Pythonic container patterns.  See doctests
      for examples.
-
      """
  
      def __init__(self):
+        """Create an empty trie."""
          self.root = {}
          self.end = "~END~"
          self.length = 0
          self.viz = ''
          self.content_generator: Generator[str] = None
  
-    def insert(self, item: Sequence[Any]):
+    def insert(self, item: Sequence[Any]) -> None:
          """
-        Insert an item.
+        Insert an item into the trie.  Items are represented by a :class:`Sequence`
+        where each item in the sequence describes a set in the path to the item.
+
+        Args:
+            item: the item to be inserted.
  
          >>> t = Trie()
          >>> t.insert('test')
          >>> t.__contains__('test')
          True
-
          """
          current = self.root
          for child in item:
@@ -49,7 +60,13 @@ class Trie(object):
  
      def __contains__(self, item: Sequence[Any]) -> bool:
          """
-        Check whether an item is in the Trie.
+        Checks whether an item is in the Trie.
+
+        Args:
+            item: the item whose presence is to be determined.
+
+        Returns:
+            True if `item` is present, False otherwise.
  
          >>> t = Trie()
          >>> t.insert('test')
@@ -59,7 +76,6 @@ class Trie(object):
          False
          >>> 'test' in t
          True
-
          """
          current = self.__traverse__(item)
          if current is None:
@@ -72,6 +88,12 @@ class Trie(object):
          Check whether a prefix is in the Trie.  The prefix may or may not
          be a full item.
  
+        Args:
+            item: the item describing the prefix to be checked.
+
+        Returns:
+            True if the prefix described by `item` is present, False otherwise.
+
          >>> t = Trie()
          >>> t.insert('tricycle')
          >>> t.contains_prefix('tri')
@@ -94,11 +116,24 @@ class Trie(object):
                  return None
          return current
  
-    def __getitem__(self, item: Sequence[Any]):
-        """Given an item, return its Trie node which contains all
+    def __getitem__(self, item: Sequence[Any]) -> Dict[Any, Any]:
+        """Given an item, return its trie node which contains all
          of the successor (child) node pointers.  If the item is not
          a node in the Trie, raise a KeyError.
  
+        Args:
+            item: the item whose node is to be retrieved
+
+        Returns:
+            A mapping that represents item in the trie.  If the
+            keyspace of the mapping includes '~END~' a valid item
+            ends at the node.  If the mapping contains other keys,
+            each key indicates the presence of one or more children
+            on the edge below the node.
+
+        Raises:
+            KeyError if item is not present in the trie.
+
          >>> t = Trie()
          >>> t.insert('test')
          >>> t.insert('testicle')
@@ -113,26 +148,42 @@ class Trie(object):
              raise KeyError(f"Node '{item}' is not in the trie")
          return ret
  
-    def delete_recursively(self, node, item: Sequence[Any]) -> bool:
+    def delete_recursively(self, node: Dict[Any, Any], item: Sequence[Any]) -> bool:
+        """
+        Deletes an item from the trie by walking the path from root to where it
+        ends.
+
+        Args:
+            root_node: root under which to search for item
+            item: item whose node is the root of the recursive deletion operation
+
+        Returns:
+            True if the item was not the prefix of another item such that there
+            is nothing below item remaining anymore post delete and False if the
+            deleted item was a proper prefix of another item in the tree such that
+            there is still data below item remaining in the tree.
+        """
          if len(item) == 1:
              del node[item]
              if len(node) == 0 and node is not self.root:
                  del node
                  return True
-            else:
-                return False
+            return False
          else:
              car = item[0]
              cdr = item[1:]
              lower = node[car]
-            if self.delete_recursively(lower, cdr):
-                return self.delete_recursively(node, car)
-            return False
+            ret = self.delete_recursively(lower, cdr)
+            ret = ret and self.delete_recursively(node, car)
+            return ret
  
      def __delitem__(self, item: Sequence[Any]):
          """
          Delete an item from the Trie.
  
+        Args:
+            item: the item to be deleted.
+
          >>> t = Trie()
          >>> t.insert('test')
          >>> t.insert('tess')
@@ -161,12 +212,15 @@ class Trie(object):
          >>> t.__delitem__('tess')
          >>> len(t)
          0
+        >>> t.length
+        0
          >>> t.root
          {}
          >>> t.insert('testy')
+        >>> t.length
+        1
          >>> len(t)
          1
-
          """
          if item not in self:
              raise KeyError(f"Node '{item}' is not in the trie")
@@ -175,7 +229,8 @@ class Trie(object):
  
      def __len__(self):
          """
-        Returns a count of the Trie's item population.
+        Returns:
+            A count of the trie's item population.
  
          >>> t = Trie()
          >>> len(t)
@@ -183,10 +238,9 @@ class Trie(object):
          >>> t.insert('test')
          >>> len(t)
          1
-        >>> t.insert('testicle')
+        >>> t.insert('tree')
          >>> len(t)
          2
-
          """
          return self.length
  
@@ -196,7 +250,8 @@ class Trie(object):
  
      def generate_recursively(self, node, path: Sequence[Any]):
          """
-        Generate items in the trie one by one.
+        Returns:
+            A generator that yields the trie's items one at a time.
  
          >>> t = Trie()
          >>> t.insert('test')
@@ -233,7 +288,8 @@ class Trie(object):
  
      def successors(self, item: Sequence[Any]):
          """
-        Return a list of the successors of an item.
+        Returns:
+            A list of the successors of an item.
  
          >>> t = Trie()
          >>> t.insert('what')
@@ -248,7 +304,6 @@ class Trie(object):
          >>> u.insert(['this', 'is', 'a', 'walrus'])
          >>> u.successors(['this', 'is', 'a'])
          ['test', 'robbery', 'walrus']
-
          """
          node = self.__traverse__(item)
          if node is None:
@@ -263,7 +318,8 @@ class Trie(object):
          has_sibling: bool,
      ) -> str:
          """
-        Helper that return a fancy representation of the Trie:
+        Helper that return a fancy representation of the Trie, used by
+        :meth:`__repr__`.
          """
          if node is None:
              return ''
@@ -294,10 +350,17 @@ class Trie(object):
                  ret += self._repr_fancy(padding, pointer, node[child], has_sibling)
          return ret
  
-    def repr_brief(self, node, delimiter):
+    def repr_brief(self, node: Dict[Any, Any], delimiter: str):
          """
          A friendly string representation of the contents of the Trie.
  
+        Args:
+            node: the root of the trie to represent.
+            delimiter: character or string to stuff between edges.
+
+        Returns:
+            A brief string representation of the trie.  See example.
+
          >>> t = Trie()
          >>> t.insert([10, 0, 0, 1])
          >>> t.insert([10, 0, 0, 2])
@@ -347,3 +410,9 @@ class Trie(object):
  
          """
          return self._repr_fancy('', '*', self.root, False)
+
+
+if __name__ == '__main__':
+    import doctest
+
+    doctest.testmod()
diff --git a/src/pyutils/compress/letter_compress.py b/src/pyutils/compress/letter_compress.py

index c631803735b0562b105a9697997275180ee3df0f..21a15b728e94c8a0de44ec9c4a6c2de662ba2ad9 100644 (file)
--- a/src/pyutils/compress/letter_compress.py
+++ b/src/pyutils/compress/letter_compress.py
@@ -2,7 +2,12 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""A simple toy compression helper for lowercase ascii text."""
+"""
+This is a simple, honestly, toy compression scheme that uses a custom
+alphabet of 32 characters which can each be represented in six bits
+instead of eight.  It therefore reduces the size of data composed of
+only those letters by 25% without loss.
+"""
  
  import bitstring
  
@@ -24,6 +29,12 @@ def compress(uncompressed: str) -> bytes:
      form will be 5/8th the size of the original.  Words can be lower
      case letters or special_characters (above).
  
+    Args:
+        uncompressed: the uncompressed string to be compressed
+
+    Returns:
+        the compressed bytes
+
      >>> import binascii
      >>> binascii.hexlify(compress('this is a test'))
      b'a2133da67b0ee859d0'
@@ -51,11 +62,17 @@ def compress(uncompressed: str) -> bytes:
      return compressed.bytes
  
  
-def decompress(kompressed: bytes) -> str:
+def decompress(compressed: bytes) -> str:
      """
      Decompress a previously compressed stream of bytes back into
      its original form.
  
+    Args:
+        compressed: the compressed data to decompress
+
+    Returns:
+        The decompressed string
+
      >>> import binascii
      >>> decompress(binascii.unhexlify(b'a2133da67b0ee859d0'))
      'this is a test'
@@ -65,7 +82,7 @@ def decompress(kompressed: bytes) -> str:
  
      """
      decompressed = ''
-    compressed = bitstring.BitArray(kompressed)
+    kompressed = bitstring.BitArray(compressed)
  
      # There are compressed messages that legitimately end with the
      # byte 0x00.  The message "scott" is an example; compressed it is
@@ -90,9 +107,9 @@ def decompress(kompressed: bytes) -> str:
      # if the compressed message didn't end in 0x00), adding an extra
      # 0x00 is a no op because the codepoint 0b00000 is a "stop" message
      # so we'll ignore the extras.
-    compressed.append("uint:8=0")
+    kompressed.append("uint:8=0")
  
-    for chunk in compressed.cut(5):
+    for chunk in kompressed.cut(5):
          chunk = chunk.uint
          if chunk == 0:
              break
diff --git a/src/pyutils/datetimez/dateparse_utils.g4 b/src/pyutils/datetimez/dateparse_utils.g4

index 364aa0f985592f84deb1e41b5bcd09c3c7126873..75db1e81d1495496b0054002ad0915cfcf985c14 100644 (file)
--- a/src/pyutils/datetimez/dateparse_utils.g4
+++ b/src/pyutils/datetimez/dateparse_utils.g4
@@ -1,4 +1,4 @@
-// © Copyright 2021-2022, Scott Gasch
+//  © Copyright 2021-2022, Scott Gasch
  //
  // antlr4 -Dlanguage=Python3 ./dateparse_utils.g4
  //
@@ -6,7 +6,7 @@
  // for the lexer and those for the parser.  The former begin with a CAPITAL
  // whereas the latter begin with lowercase.  The order of the lexer symbols
  // is the order that the lexer will recognize them in.  There's a good tutorial
-// on this shit at:
+// on this stuff at:
  //
  //    https://tomassetti.me/antlr-mega-tutorial/
  //
diff --git a/src/pyutils/datetimez/dateparse_utils.py b/src/pyutils/datetimez/dateparse_utils.py

index 89112b4e29fb91a235f13c8f6dbdd74952068fe8..7e8b6d6d01f71fd8f2178b3a9c0d549371551807 100755 (executable)
--- a/src/pyutils/datetimez/dateparse_utils.py
+++ b/src/pyutils/datetimez/dateparse_utils.py
@@ -5,7 +5,88 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""Parse dates in a variety of formats."""
+"""
+Parse dates / datetimes in a variety of formats.  Some examples:
+
+    |    today
+    |    tomorrow
+    |    yesterday
+    |    21:30
+    |    12:01am
+    |    12:01pm
+    |    last Wednesday
+    |    this Wednesday
+    |    next Wed
+    |    this coming Tues
+    |    this past Mon
+    |    4 days ago
+    |    4 Mondays ago
+    |    4 months ago
+    |    3 days back
+    |    13 weeks from now
+    |    1 year from now
+    |    4 weeks from now
+    |    3 saturdays ago
+    |    4 months from today
+    |    5 years from yesterday
+    |    6 weeks from tomorrow
+    |    april 15, 2005
+    |    april 21
+    |    9:30am on last Wednesday
+    |    2005/apr/15
+    |    2005 apr 15
+    |    the 1st wednesday in may
+    |    the last sun of june
+    |    this easter
+    |    last xmas
+    |    Christmas, 1999
+    |    next MLK day
+    |    Halloween, 2020
+    |    5 work days after independence day
+    |    50 working days from last wed
+    |    25 working days before xmas
+    |    today +1 week
+    |    sunday -3 weeks
+    |    3 weeks before xmas, 1999
+    |    3 days before new years eve, 2000
+    |    july 4th
+    |    the ides of march
+    |    the nones of april
+    |    the kalends of may
+    |    4 sundays before veterans' day
+    |    xmas eve
+    |    this friday at 5pm
+    |    presidents day
+    |    memorial day, 1921
+    |    thanksgiving
+    |    2 sun in jun
+    |    easter -40 days
+    |    2 days before last xmas at 3:14:15.92a
+    |    3 weeks after xmas, 1995 at midday
+    |    4 months before easter, 1992 at midnight
+    |    5 months before halloween, 1995 at noon
+    |    4 days before last wednesday
+    |    44 months after today
+    |    44 years before today
+    |    44 weeks ago
+    |    15 minutes to 3am
+    |    quarter past 4pm
+    |    half past 9
+    |    4 seconds to midnight
+    |    4 seconds to midnight, tomorrow
+    |    2021/apr/15T21:30:44.55
+    |    2021/apr/15 at 21:30:44.55
+    |    2021/4/15 at 21:30:44.55
+    |    2021/04/15 at 21:30:44.55Z
+    |    2021/04/15 at 21:30:44.55EST
+    |    13 days after last memorial day at 12 seconds before 4pm
+
+This code is used by other code in the pyutils library; for example,
+when using :file:`argparse_utils.py` to pass an argument of type
+datetime it allows the user to use free form English expressions.
+
+See the `unittest <https://wannabe.guru.org/gitweb/?p=pyutils.git;a=blob_plain;f=tests/datetimez/dateparse_utils_test.py;h=93c7b96e4c19af217fbafcf1ed5dbde13ec599c5;hb=HEAD>`_ for more examples and the `grammar <https://wannabe.guru.org/gitweb/?p=pyutils.git;a=blob_plain;f=src/pyutils/datetimez/dateparse_utils.g4;hb=HEAD>`_ for more details.
+"""
  
  import datetime
  import functools
@@ -61,6 +142,10 @@ class ParseException(Exception):
      """An exception thrown during parsing because of unrecognized input."""
  
      def __init__(self, message: str) -> None:
+        """
+        Args:
+            message: parse error message description.
+        """
          super().__init__()
          self.message = message
  
@@ -100,15 +185,19 @@ class RaisingErrorListener(antlr4.DiagnosticErrorListener):
      ),
  )
  class DateParser(dateparse_utilsListener):
-    """A class to parse dates expressed in human language.  Example usage::
+    """A class to parse dates expressed in human language (English).
+    Example usage::
  
          d = DateParser()
          d.parse('next wednesday')
          dt = d.get_datetime()
+        print(dt)
+        Wednesday 2022/10/26 00:00:00.000000
  
      Note that the interface is somewhat klunky here because this class is
      conforming to interfaces auto-generated by ANTLR as it parses the grammar.
-    See also: string_utils.parse_date.
+    See also :meth:`pyutils.string_utils.to_date`.
+
      """
  
      PARSE_TYPE_SINGLE_DATE_EXPR = 1
@@ -117,10 +206,14 @@ class DateParser(dateparse_utilsListener):
      PARSE_TYPE_BASE_AND_OFFSET_TIME_EXPR = 4
  
      def __init__(self, *, override_now_for_test_purposes=None) -> None:
-        """C'tor.  Passing a value to override_now_for_test_purposes can be
-        used to force this instance to use a custom date/time for its
-        idea of "now" so that the code can be more easily unittested.
-        Leave as None for real use cases.
+        """Construct a parser.
+
+        Args:
+            override_now_for_test_purposes: passing a value to
+                override_now_for_test_purposes can be used to force
+                this parser instance to use a custom date/time for its
+                idea of "now" so that the code can be more easily
+                unittested.  Leave as None for real use cases.
          """
          self.month_name_to_number = {
              'jan': 1,
@@ -188,27 +281,44 @@ class DateParser(dateparse_utilsListener):
          self._reset()
  
      def parse(self, date_string: str) -> Optional[datetime.datetime]:
-        """Parse a date/time expression and return a timezone agnostic
-        datetime on success.  Also sets self.datetime, self.date and
-        self.time which can each be accessed other methods on the
-        class: get_datetime(), get_date() and get_time().  Raises a
-        ParseException with a helpful(?) message on parse error or
+        """
+        Parse a ~free form date/time expression and return a
+        timezone agnostic datetime on success.  Also sets
+        `self.datetime`, `self.date` and `self.time` which can each be
+        accessed other methods on the class: :meth:`get_datetime`,
+        :meth:`get_date` and :meth:`get_time`.  Raises a
+        `ParseException` with a helpful(?) message on parse error or
          confusion.
  
+        This is the main entrypoint to this class for caller code.
+
          To get an idea of what expressions can be parsed, check out
          the unittest and the grammar.
  
-        Usage:
+        Args:
+            date_string: the string to parse
  
-        txt = '3 weeks before last tues at 9:15am'
-        dp = DateParser()
-        dt1 = dp.parse(txt)
-        dt2 = dp.get_datetime(tz=pytz.timezone('US/Pacific'))
+        Returns:
+            A datetime.datetime representing the parsed date/time or
+            None on error.
  
-        # dt1 and dt2 will be identical other than the fact that
-        # the latter's tzinfo will be set to PST/PDT.
+        .. note::
+
+            Parsed date expressions without any time part return
+            hours = minutes = seconds = microseconds = 0 (i.e. at
+            midnight that day).  Parsed time expressions without any
+            date part default to date = today.
+
+        Example usage::
+
+            txt = '3 weeks before last tues at 9:15am'
+            dp = DateParser()
+            dt1 = dp.parse(txt)
+            dt2 = dp.get_datetime(tz=pytz.timezone('US/Pacific'))
+
+            # dt1 and dt2 will be identical other than the fact that
+            # the latter's tzinfo will be set to PST/PDT.
  
-        This is the main entrypoint to this class for caller code.
          """
          date_string = date_string.strip()
          date_string = re.sub(r'\s+', ' ', date_string)
@@ -228,22 +338,40 @@ class DateParser(dateparse_utilsListener):
          return self.datetime
  
      def get_date(self) -> Optional[datetime.date]:
-        """Return the date part or None."""
+        """
+        Returns:
+            The date part of the last :meth:`parse` operation again
+            or None.
+        """
          return self.date
  
      def get_time(self) -> Optional[datetime.time]:
-        """Return the time part or None."""
+        """
+        Returns:
+            The time part of the last :meth:`parse` operation again
+            or None.
+        """
          return self.time
  
      def get_datetime(self, *, tz=None) -> Optional[datetime.datetime]:
-        """Return as a datetime.  Parsed date expressions without any time
-        part return hours = minutes = seconds = microseconds = 0 (i.e. at
-        midnight that day).  Parsed time expressions without any date part
-        default to date = today.
-
-        The optional tz param allows the caller to request the datetime be
-        timezone aware and sets the tzinfo to the indicated zone.  Defaults
-        to timezone naive (i.e. tzinfo = None).
+        """Get the datetime of the last :meth:`parse` operation again
+        ot None.
+
+        Args:
+            tz: the timezone to set on output times.  By default we
+                return timezone-naive datetime objects.
+
+        Returns:
+            the same datetime that :meth:`parse` last did, optionally
+            overriding the timezone.  Returns None of no calls to
+            :meth:`parse` have yet been made.
+
+        .. note::
+
+            Parsed date expressions without any time part return
+            hours = minutes = seconds = microseconds = 0 (i.e. at
+            midnight that day).  Parsed time expressions without any
+            date part default to date = today.
          """
          dt = self.datetime
          if dt is not None:
diff --git a/src/pyutils/datetimez/datetime_utils.py b/src/pyutils/datetimez/datetime_utils.py

index c47d38c0aab88da0835cb6919434532c5288ad08..5ddf4b6014f66b171bb88bf3421ab0623cdf1eba 100644 (file)
--- a/src/pyutils/datetimez/datetime_utils.py
+++ b/src/pyutils/datetimez/datetime_utils.py
@@ -19,14 +19,16 @@ logger = logging.getLogger(__name__)
  
  
  def is_timezone_aware(dt: datetime.datetime) -> bool:
-    """Returns true if the datetime argument is timezone aware or
-    False if not.
-
-    See: https://docs.python.org/3/library/datetime.html
-    #determining-if-an-object-is-aware-or-naive
+    """
+    Checks whether a datetime is timezone aware or naive.
+    See: https://docs.python.org/3/library/datetime.html#determining-if-an-object-is-aware-or-naive
  
      Args:
-        dt: The datetime object to check
+        dt: the datetime to check for timezone awareness
+
+    Returns:
+        True if the datetime argument is timezone aware or
+        False if not.
  
      >>> is_timezone_aware(datetime.datetime.now())
      False
@@ -39,14 +41,14 @@ def is_timezone_aware(dt: datetime.datetime) -> bool:
  
  
  def is_timezone_naive(dt: datetime.datetime) -> bool:
-    """Inverse of is_timezone_aware -- returns true if the dt argument
-    is timezone naive.
-
-    See: https://docs.python.org/3/library/datetime.html
-    #determining-if-an-object-is-aware-or-naive
+    """Inverse of :meth:`is_timezone_aware`.
+    See: https://docs.python.org/3/library/datetime.html#determining-if-an-object-is-aware-or-naive
  
      Args:
-        dt: The datetime object to check
+        dt: the datetime to check
+
+    Returns:
+        True if the dt argument is timezone naive, False otherwise
  
      >>> is_timezone_naive(datetime.datetime.now())
      True
@@ -59,19 +61,33 @@ def is_timezone_naive(dt: datetime.datetime) -> bool:
  
  
  def strip_timezone(dt: datetime.datetime) -> datetime.datetime:
-    """Remove the timezone from a datetime.
+    """
+    Remove the timezone from a datetime.  Silently ignores datetimes
+    which are already timezone naive.
+
+    Args:
+        dt: the datetime to remove timezone from
+
+    Returns:
+        A datetime identical to dt, the input argument, except for
+        that the timezone has been removed.
+
+    See also :meth:`add_timezone`, :meth:`replace_timezone`, :meth:`translate_timezone`.
  
      .. warning::
  
          This does not change the hours, minutes, seconds,
-        months, days, years, etc... Thus the instant to which this
-        timestamp refers will change.  Silently ignores datetimes
-        which are already timezone naive.
+        months, days, years, etc... Thus, the instant to which this
+        timestamp refers will change when the timezone is added.
+        See examples.
  
      >>> now = now_pacific()
      >>> now.tzinfo == None
      False
  
+    >>> "US/Pacific" in now.tzinfo.__repr__()
+    True
+
      >>> dt = strip_timezone(now)
      >>> dt == now
      False
@@ -81,7 +97,6 @@ def strip_timezone(dt: datetime.datetime) -> datetime.datetime:
  
      >>> dt.hour == now.hour
      True
-
      """
      if is_timezone_naive(dt):
          return dt
@@ -90,9 +105,24 @@ def strip_timezone(dt: datetime.datetime) -> datetime.datetime:
  
  def add_timezone(dt: datetime.datetime, tz: datetime.tzinfo) -> datetime.datetime:
      """
-    Adds a timezone to a timezone naive datetime.  This does not
-    change the instant to which the timestamp refers.  See also:
-    replace_timezone.
+    Adds a timezone to a timezone naive datetime.
+
+    Args:
+        dt: the datetime to insert a timezone onto
+        tz: the timezone to insert
+
+    See also :meth:`replace_timezone`, :meth:`strip_timezone`, :meth:`translate_timezone`.
+
+    Returns:
+        A datetime identical to dt, the input datetime, except for
+        that a timezone has been added.
+
+    .. warning::
+
+        This doesn't change the hour, minute, second, day, month, etc...
+        of the input timezone.  It simply adds a timezone to it.  Adding
+        a timezone this way will likely change the instant to which the
+        datetime refers.  See examples.
  
      >>> now = datetime.datetime.now()
      >>> is_timezone_aware(now)
@@ -102,6 +132,9 @@ def add_timezone(dt: datetime.datetime, tz: datetime.tzinfo) -> datetime.datetim
      >>> is_timezone_aware(now_pacific)
      True
  
+    >>> "US/Pacific" in now_pacific.tzinfo.__repr__()
+    True
+
      >>> now.hour == now_pacific.hour
      True
      >>> now.minute == now_pacific.minute
@@ -129,18 +162,36 @@ def add_timezone(dt: datetime.datetime, tz: datetime.tzinfo) -> datetime.datetim
  def replace_timezone(
      dt: datetime.datetime, tz: Optional[datetime.tzinfo]
  ) -> datetime.datetime:
-    """Replaces the timezone on a timezone aware datetime object directly
+    """
+    Replaces the timezone on a timezone aware datetime object directly
      (leaving the year, month, day, hour, minute, second, micro,
-    etc... alone).
+    etc... alone).  The same as calling :meth:`strip_timezone` followed
+    by :meth:`add_timezone`.
  
      Works with timezone aware and timezone naive dts but for the
-    latter it is probably better to use add_timezone or just create it
-    with a tz parameter.  Using this can have weird side effects like
-    UTC offsets that are not an even multiple of an hour, etc...
+    latter it is probably better to use :meth:`add_timezone` or just
+    create it with a `tz` parameter.  Using this can have weird side
+    effects like UTC offsets that are not an even multiple of an hour,
+    etc...
+
+    Args:
+        dt: the datetime whose timezone should be changed
+        tz: the new timezone
+
+    Returns:
+        The resulting datetime.  Hour, minute, second, etc... are unmodified.
+        See warning below.
+
+    See also :meth:`add_timezone`, :meth:`strip_timezone`, :meth:`translate_timezone`.
  
      .. warning::
  
-        This changes the instant to which this dt refers.
+        This code isn't changing the hour, minute, second, day, month, etc...
+        of the datetime.  It's just messing with the timezone.  Changing
+        the timezone without changing the time causes the instant to which
+        the datetime refers to change.  For example, if passed 7:01pm PST
+        and asked to make it EST, the result will be 7:01pm EST.  See
+        examples.
  
      >>> from pytz import UTC
      >>> d = now_pacific()
@@ -152,7 +203,6 @@ def replace_timezone(
      'UTC'
      >>> o.hour == h
      True
-
      """
      if is_timezone_aware(dt):
          logger.warning(
@@ -180,10 +230,22 @@ def replace_time_timezone(t: datetime.time, tz: datetime.tzinfo) -> datetime.tim
      """Replaces the timezone on a datetime.time directly without performing
      any translation.
  
+    Args:
+        t: the time to change the timezone on
+        tz: the new timezone desired
+
+    Returns:
+        A time with hour, minute, second, etc... identical to the input
+        time but with timezone replaced.
+
      .. warning::
  
-        Note that, as above, this will change the instant to
-        which the time refers.
+        This code isn't changing the hour, minute, second, etc...
+        of the time.  It's just messing with the timezone.  Changing
+        the timezone without changing the time causes the instant to which
+        the datetime refers to change.  For example, if passed 7:01pm PST
+        and asked to make it EST, the result will be 7:01pm EST.  See
+        examples.
  
      >>> t = datetime.time(8, 15, 12, 0, pytz.UTC)
      >>> t.tzname()
@@ -202,6 +264,20 @@ def translate_timezone(dt: datetime.datetime, tz: datetime.tzinfo) -> datetime.d
      day, hour, minute, second, micro, etc... appropriately.  The returned
      dt is the same instant in another timezone.
  
+    Args:
+        dt: the datetime whose timezone should be translated.
+        tz: the desired timezone
+
+    Returns:
+        A new datetime object that represents the same instant as the
+        input datetime but in the desired timezone.  Modifies hour, minute,
+        seconds, day, etc... as necessary for the instant to be preserved.
+        For example, if you pass 11:01pm PST in and ask for it to be
+        translated to EST you would get 2:01am the next day EST back
+        out.
+
+    See also :meth:`replace_timezone`, :meth:`strip_timezone`.
+
      >>> import pytz
      >>> d = now_pacific()
      >>> d.tzinfo.tzname(d)[0]     # Note: could be PST or PDT
@@ -238,10 +314,16 @@ def date_to_datetime(date: datetime.date) -> datetime.datetime:
      """
      Given a date, return a datetime with hour/min/sec zero (midnight)
  
+    Arg:
+        date: the date desired
+
+    Returns:
+        A datetime with the same month, day, and year as the input
+        date and hours, minutes, seconds set to 12:00:00am.
+
      >>> import datetime
      >>> date_to_datetime(datetime.date(2021, 12, 25))
      datetime.datetime(2021, 12, 25, 0, 0)
-
      """
      return datetime.datetime(date.year, date.month, date.day, 0, 0, 0, 0)
  
@@ -253,6 +335,13 @@ def time_to_datetime_today(time: datetime.time) -> datetime.datetime:
      the resulting datetime will also be (and will use the same tzinfo).
      If the time is timezone naive, the datetime returned will be too.
  
+    Args:
+        time: the time desired
+
+    Returns:
+        datetime with hour, minute, second, timezone set to time and
+        day, month, year set to "today".
+
      >>> t = datetime.time(13, 14, 0)
      >>> d = now_pacific().date()
      >>> dt = time_to_datetime_today(t)
@@ -287,12 +376,19 @@ def date_and_time_to_datetime(
      """
      Given a date and time, merge them and return a datetime.
  
+    Args:
+        date: the date component
+        time: the time component
+
+    Returns:
+        A datetime with the time component set from time and the date
+        component set from date.
+
      >>> import datetime
      >>> d = datetime.date(2021, 12, 25)
      >>> t = datetime.time(12, 30, 0, 0)
      >>> date_and_time_to_datetime(d, t)
      datetime.datetime(2021, 12, 25, 12, 30)
-
      """
      return datetime.datetime(
          date.year,
@@ -311,6 +407,15 @@ def datetime_to_date_and_time(
      """Return the component date and time objects of a datetime in a
      Tuple given a datetime.
  
+    Args:
+        dt: the datetime to decompose
+
+    Returns:
+        A tuple whose first element contains a datetime.date that holds
+        the day, month, year, etc... from the input dt and whose second
+        element contains a datetime.time with hour, minute, second, micros,
+        and timezone set from the input dt.
+
      >>> import datetime
      >>> dt = datetime.datetime(2021, 12, 25, 12, 30)
      >>> (d, t) = datetime_to_date_and_time(dt)
@@ -318,7 +423,6 @@ def datetime_to_date_and_time(
      datetime.date(2021, 12, 25)
      >>> t
      datetime.time(12, 30)
-
      """
      return (dt.date(), dt.timetz())
  
@@ -326,11 +430,16 @@ def datetime_to_date_and_time(
  def datetime_to_date(dt: datetime.datetime) -> datetime.date:
      """Return just the date part of a datetime.
  
+    Args:
+        dt: the datetime
+
+    Returns:
+        A datetime.date with month, day and year set from input dt.
+
      >>> import datetime
      >>> dt = datetime.datetime(2021, 12, 25, 12, 30)
      >>> datetime_to_date(dt)
      datetime.date(2021, 12, 25)
-
      """
      return datetime_to_date_and_time(dt)[0]
  
@@ -338,11 +447,17 @@ def datetime_to_date(dt: datetime.datetime) -> datetime.date:
  def datetime_to_time(dt: datetime.datetime) -> datetime.time:
      """Return just the time part of a datetime.
  
+    Args:
+        dt: the datetime
+
+    Returns:
+        A datetime.time with hour, minute, second, micros, and
+        timezone set from the input dt.
+
      >>> import datetime
      >>> dt = datetime.datetime(2021, 12, 25, 12, 30)
      >>> datetime_to_time(dt)
      datetime.time(12, 30)
-
      """
      return datetime_to_date_and_time(dt)[1]
  
@@ -368,6 +483,13 @@ class TimeUnit(enum.IntEnum):
  
      @classmethod
      def is_valid(cls, value: Any):
+        """
+        Args:
+            value: a value to be checked
+
+        Returns:
+            True is input value is a valid TimeUnit, False otherwise.
+        """
          if isinstance(value, int):
              return cls(value) is not None
          elif isinstance(value, TimeUnit):
@@ -383,10 +505,26 @@ def n_timeunits_from_base(
      count: int, unit: TimeUnit, base: datetime.datetime
  ) -> datetime.datetime:
      """Return a datetime that is N units before/after a base datetime.
-    e.g.  3 Wednesdays from base datetime, 2 weeks from base date, 10
-    years before base datetime, 13 minutes after base datetime, etc...
-    Note: to indicate before/after the base date, use a positive or
-    negative count.
+    For example:
+
+        - 3 Wednesdays from base datetime,
+        - 2 weeks from base date,
+        - 10 years before base datetime,
+        - 13 minutes after base datetime, etc...
+
+    Args:
+        count: signed number that indicates N units before/after the base.
+        unit: the timeunit that we are counting by.
+        base: a datetime representing the base date the result should be
+            relative to.
+
+    Returns:
+        A datetime that is count units before of after the base datetime.
+
+    .. note::
+
+        To indicate before/after the base date, use a positive or
+        negative count.
  
      >>> base = string_to_datetime("2021/09/10 11:24:51AM-0700")[0]
  
@@ -442,7 +580,6 @@ def n_timeunits_from_base(
      >>> base = string_to_datetime("2022/03/31 11:24:51AM-0700")[0]
      >>> n_timeunits_from_base(-1, TimeUnit.MONTHS, base)
      datetime.datetime(2022, 2, 28, 11, 24, 51, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200)))
-
      """
      assert TimeUnit.is_valid(unit)
      if count == 0:
@@ -576,6 +713,22 @@ def get_format_string(
      Helper to return a format string without looking up the documentation
      for strftime.
  
+    Args:
+        date_time_separator: character or string to use between the date
+            and time outputs.
+        include_timezone: whether or not the result should include a timezone
+        include_dayname: whether or not the result should incude the dayname
+            (e.g. Monday, Wednesday, etc...)
+        use_month_abbrevs: whether or not to abbreviate (e.g. Jan) or spell out
+            (e.g. January) month names.
+        include_seconds: whether or not to include seconds in time.
+        include_fractional: whether or not to include micros in time output.
+        twelve_hour: use twelve hour (with am/pm) or twenty four hour time format?
+
+    Returns:
+        The format string for use with strftime that follows the given
+        requirements.
+
      >>> get_format_string()
      '%Y/%m/%d %I:%M:%S%p%z'
  
@@ -628,6 +781,19 @@ def datetime_to_string(
      A nice way to convert a datetime into a string; arguably better than
      just printing it and relying on it __repr__().
  
+    Args:
+        dt: the datetime to represent
+        date_time_separator: the character or string to separate the date and time
+            pieces of the representation.
+        include_timezone: should we include a timezone in the representation?
+        include_dayname: should we include the dayname (e.g. Mon) in
+            the representation or omit it?
+        use_month_abbrevs: should we name the month briefly (e.g. Jan) or spell
+            it out fully (e.g. January) in the representation?
+        include_seconds: should we include seconds in the time?
+        include_fractional: should we include micros in the time?
+        twelve_hour: should we use twelve or twenty-four hour time format?
+
      >>> d = string_to_datetime(
      ...                        "2021/09/10 11:24:51AM-0700",
      ...                       )[0]
@@ -637,7 +803,6 @@ def datetime_to_string(
      '2021/09/10 11:24:51AM-0700'
      >>> datetime_to_string(d, include_dayname=True, include_seconds=False)
      'Fri/2021/09/10 11:24AM-0700'
-
      """
      fstring = get_format_string(
          date_time_separator=date_time_separator,
@@ -664,7 +829,23 @@ def string_to_datetime(
  ) -> Tuple[datetime.datetime, str]:
      """A nice way to convert a string into a datetime.  Returns both the
      datetime and the format string used to parse it.  Also consider
-    dateparse.dateparse_utils for a full parser alternative.
+    :mod:`pyutils.datetimez.dateparse_utils` for a full parser alternative.
+
+    Args:
+        txt: the string to be converted into a datetime
+        date_time_separator: the character or string between the time and date
+            portions.
+        include_timezone: does the string include a timezone?
+        include_dayname: does the string include a dayname?
+        use_month_abbrevs: is the month abbreviated in the string (e.g. Feb)
+            or spelled out completely (e.g. February)?
+        include_seconds: does the string's time include seconds?
+        include_fractional: does the string's time include micros?
+        twelve_hour: is the string's time in twelve or twenty-four hour format?
+
+    Returns:
+        A tuple containing the datetime parsed from string and the formatting
+        string used to parse it.
  
      >>> d = string_to_datetime(
      ...                        "2021/09/10 11:24:51AM-0700",
@@ -686,7 +867,10 @@ def string_to_datetime(
  
  
  def timestamp() -> str:
-    """Return a timestamp for right now in Pacific timezone."""
+    """
+    Returns:
+        A timestamp for right now in Pacific timezone.
+    """
      ts = datetime.datetime.now(tz=pytz.timezone("US/Pacific"))
      return datetime_to_string(ts, include_timezone=True)
  
@@ -700,7 +884,17 @@ def time_to_string(
      twelve_hour=True,
  ) -> str:
      """A nice way to convert a datetime into a time (only) string.
-    This ignores the date part of the datetime.
+    This ignores the date part of the datetime completely.
+
+    Args:
+        dt: the datetime whose time to represent
+        include_seconds: should seconds be included in the output?
+        include_fractional: should micros be included in the output?
+        include_timezone: should timezone be included in the output?
+        twelve_hour: use twelve or twenty-four hour format?
+
+    Returns:
+        A string representing the time of the input datetime.
  
      >>> d = string_to_datetime(
      ...                        "2021/09/10 11:24:51AM-0700",
@@ -736,7 +930,13 @@ def time_to_string(
  
  
  def seconds_to_timedelta(seconds: int) -> datetime.timedelta:
-    """Convert a delta in seconds into a timedelta."""
+    """
+    Args:
+        seconds: a count of seconds
+
+    Returns:
+        A datetime.timedelta representing that count of seconds.
+    """
      return datetime.timedelta(seconds=seconds)
  
  
@@ -745,7 +945,16 @@ MinuteOfDay = NewType("MinuteOfDay", int)
  
  def minute_number(hour: int, minute: int) -> MinuteOfDay:
      """
-    Convert hour:minute into minute number from start of day.
+    Convert hour:minute into minute number from start of day.  That is,
+    if you imagine a day as a sequence of minutes from minute #0 up
+    to minute #1439, what minute number is, e.g., 6:52am?
+
+    Args:
+        hour: the hour to convert (0 <= hour <= 23)
+        minute: the minute to convert (0 <= minute <= 59)
+
+    Returns:
+        The minute number requested.  Raises `ValueError` on bad input.
  
      >>> minute_number(0, 0)
      0
@@ -755,16 +964,26 @@ def minute_number(hour: int, minute: int) -> MinuteOfDay:
  
      >>> minute_number(23, 59)
      1439
-
      """
+    if hour < 0 or hour > 23:
+        raise ValueError(f'Bad hour: {hour}.  Expected 0 <= hour <= 23')
+    if minute < 0 or minute > 59:
+        raise ValueError(f'Bad minute: {minute}.  Expected 0 <= minute <= 59')
      return MinuteOfDay(hour * 60 + minute)
  
  
  def datetime_to_minute_number(dt: datetime.datetime) -> MinuteOfDay:
      """
-    Convert a datetime into a minute number (of the day).  Note that
-    this ignores the date part of the datetime and only uses the time
-    part.
+    Convert a datetime's time component into a minute number (of
+    the day).  Note that this ignores the date part of the datetime
+    and only uses the time part.
+
+    Args:
+        dt: the datetime whose time is to be converted
+
+    Returns:
+        The minute number (of the day) that represents the input datetime's
+        time.
  
      >>> d = string_to_datetime(
      ...                        "2021/09/10 11:24:51AM-0700",
@@ -772,7 +991,6 @@ def datetime_to_minute_number(dt: datetime.datetime) -> MinuteOfDay:
  
      >>> datetime_to_minute_number(d)
      684
-
      """
      return minute_number(dt.hour, dt.minute)
  
@@ -781,10 +999,15 @@ def time_to_minute_number(t: datetime.time) -> MinuteOfDay:
      """
      Convert a datetime.time into a minute number.
  
+    Args:
+        t: a datetime.time to convert into a minute number.
+
+    Returns:
+        The minute number (of the day) of the input time.
+
      >>> t = datetime.time(5, 15)
      >>> time_to_minute_number(t)
      315
-
      """
      return minute_number(t.hour, t.minute)
  
@@ -794,12 +1017,18 @@ def minute_number_to_time_string(minute_num: MinuteOfDay) -> str:
      Convert minute number from start of day into hour:minute am/pm
      string.
  
+    Args:
+        minute_num: the minute number to convert into a string
+
+    Returns:
+        A string of the format "HH:MM[a|p]" that represents the time
+        that the input minute_num refers to.
+
      >>> minute_number_to_time_string(315)
      ' 5:15a'
  
      >>> minute_number_to_time_string(684)
      '11:24a'
-
      """
      hour = minute_num // 60
      minute = minute_num % 60
@@ -818,6 +1047,14 @@ def parse_duration(duration: str, raise_on_error=False) -> int:
      """
      Parse a duration in string form into a delta seconds.
  
+    Args:
+        duration: a string form duration, see examples.
+        raise_on_error: should we raise on invalid input or just
+            return a zero duration?
+
+    Returns:
+        A count of seconds represented by the input string.
+
      >>> parse_duration('15 days, 2 hours')
      1303200
  
@@ -837,7 +1074,6 @@ def parse_duration(duration: str, raise_on_error=False) -> int:
      Traceback (most recent call last):
      ...
      ValueError: recent is not a valid duration.
-
      """
      if duration.isdigit():
          return int(duration)
@@ -869,6 +1105,16 @@ def describe_duration(seconds: int, *, include_seconds=False) -> str:
      """
      Describe a duration represented as a count of seconds nicely.
  
+    Args:
+        seconds: the number of seconds in the duration to be represented.
+        include_seconds: should we include or drop the seconds part in
+            the representation?
+
+    .. note::
+
+        Of course if we drop the seconds part the result is not precise.
+        See examples.
+
      >>> describe_duration(182)
      '3 minutes'
  
@@ -880,7 +1126,6 @@ def describe_duration(seconds: int, *, include_seconds=False) -> str:
  
      describe_duration(1303200)
      '15 days, 2 hours'
-
      """
      days = divmod(seconds, constants.SECONDS_PER_DAY)
      hours = divmod(days[1], constants.SECONDS_PER_HOUR)
@@ -921,10 +1166,22 @@ def describe_timedelta(delta: datetime.timedelta) -> str:
      """
      Describe a duration represented by a timedelta object.
  
+    Args:
+        delta: the timedelta object that represents the duration to describe.
+
+    Returns:
+        A string representation of the input duration.
+
+    .. warning::
+
+        Milliseconds are never included in the string representation of
+        durations even through they may be represented by an input
+        `datetime.timedelta`.  Not for use when this level of precision
+        is needed.
+
      >>> d = datetime.timedelta(1, 600)
      >>> describe_timedelta(d)
      '1 day, and 10 minutes'
-
      """
      return describe_duration(int(delta.total_seconds()))  # Note: drops milliseconds
  
@@ -933,6 +1190,18 @@ def describe_duration_briefly(seconds: int, *, include_seconds=False) -> str:
      """
      Describe a duration briefly.
  
+    Args:
+        seconds: the number of seconds in the duration to describe.
+        include_seconds: should we include seconds in our description or omit?
+
+    Returns:
+        A string describing the duration represented by the input seconds briefly.
+
+    .. note::
+
+        Of course if we drop the seconds part the result is not precise.
+        See examples.
+
      >>> describe_duration_briefly(182)
      '3m'
  
@@ -962,17 +1231,32 @@ def describe_duration_briefly(seconds: int, *, include_seconds=False) -> str:
      return descr.strip()
  
  
-def describe_timedelta_briefly(delta: datetime.timedelta) -> str:
+def describe_timedelta_briefly(
+    delta: datetime.timedelta, *, include_seconds=False
+) -> str:
      """
      Describe a duration represented by a timedelta object.
  
+    Args:
+        delta: the timedelta to describe briefly
+
+    Returns:
+        A string description of the input timedelta object.
+
+    .. warning::
+
+        Milliseconds are never included in the string representation of
+        durations even through they may be represented by an input
+        `datetime.timedelta`.  Not for use when this level of precision
+        is needed.
+
      >>> d = datetime.timedelta(1, 600)
      >>> describe_timedelta_briefly(d)
      '1d 10m'
-
      """
      return describe_duration_briefly(
-        int(delta.total_seconds())
+        int(delta.total_seconds()),
+        include_seconds=include_seconds,
      )  # Note: drops milliseconds
  
  
diff --git a/src/pyutils/files/directory_filter.py b/src/pyutils/files/directory_filter.py

index 3d0522b7feab95d7df25c30bd1dec23b99ae743f..d7499a2622960449673f283c9426a20d7d3bb251 100644 (file)
--- a/src/pyutils/files/directory_filter.py
+++ b/src/pyutils/files/directory_filter.py
@@ -2,10 +2,19 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""Two predicates that can help avoid unnecessary disk I/O by
-detecting if a particular file is identical to the contents about to
-be written or if a particular directory already contains a file that
-is identical to the one about to be written.  See examples below.
+"""This module contains two classes meant to help reduce unnecessary disk
+I/O operations:
+
+The first, :class:`DirectoryFileFilter`, determines when the contents
+of a file held in memory are identical to the file copy already on
+disk.
+
+The second, :class:`DirectoryAllFilesFilter`, is basically the same
+except for the caller need not indicate the name of the disk file
+because it will check the memory file's signature against *all file
+signatures* in a particular directory on disk.
+
+See examples below.
  """
  
  import hashlib
@@ -36,12 +45,10 @@ class DirectoryFileFilter(object):
      True
  
      >>> os.remove(testfile)
-
      """
  
      def __init__(self, directory: str):
-        """C'tor.
-
+        """
          Args:
              directory: the directory we're filtering accesses to
          """
@@ -117,11 +124,11 @@ class DirectoryAllFilesFilter(DirectoryFileFilter):
      directory (regardless of its name).
  
      i.e. this is the same as :class:`DirectoryFileFilter` except that
-    our apply() method will return true not only if the contents to be
-    written are identical to the contents of filename on the disk but
-    also it returns true if there exists some other file sitting in
-    the same directory which already contains those identical
-    contents.
+    our :meth:`apply` method will return true not only if the contents
+    to be written are identical to the contents of filename on the
+    disk but also it returns true if there exists some other file
+    sitting in the same directory which already contains those
+    identical contents.
  
      >>> testfile = '/tmp/directory_filter_text_f39e5b58-c260-40da-9448-ad1c3b2a69c3.txt'
  
@@ -143,8 +150,7 @@ class DirectoryAllFilesFilter(DirectoryFileFilter):
      """
  
      def __init__(self, directory: str):
-        """C'tor.
-
+        """
          Args:
              directory: the directory we're watching
          """
diff --git a/src/pyutils/files/file_utils.py b/src/pyutils/files/file_utils.py

index dd6cf16e1f71d32e375cc3f2bdc061df52ae38ac..bddc63101b648de8e4d45431f4757276dee71d63 100644 (file)
--- a/src/pyutils/files/file_utils.py
+++ b/src/pyutils/files/file_utils.py
@@ -2,7 +2,12 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""Utilities for working with files."""
+"""
+This is a grab bag of file-related utilities.  It has code to, for example,
+read files transforming the text as its read, normalize pathnames, strip
+extensions, read and manipulate atimes/mtimes/ctimes, compute a signature
+based on a file's contents, traverse the file system recursively, etc...
+"""
  
  import contextlib
  import datetime
@@ -54,6 +59,9 @@ def slurp_file(
          filename: file to be read
          skip_blank_lines: should reading skip blank lines?
          line_transformers: little string->string transformations
+
+    Returns:
+        A list of lines from the read and transformed file contents.
      """
  
      ret = []
@@ -126,6 +134,8 @@ def without_extension(path: str) -> str:
      Returns:
          the path with one extension removed.
  
+    See also :meth:`without_all_extensions`.
+
      >>> without_extension('foobar.txt')
      'foobar'
  
@@ -158,6 +168,8 @@ def without_all_extensions(path: str) -> str:
      Returns:
          the path with all extensions removed.
  
+    See also :meth:`without_extension`
+
      >>> without_all_extensions('/home/scott/foobar.1.tar.gz')
      '/home/scott/foobar'
  
@@ -176,6 +188,9 @@ def get_extension(path: str) -> str:
      Returns:
          The last extension from the file path.
  
+    See also :meth:`without_extension`, :meth:`without_all_extensions`,
+    :meth:`get_all_extensions`.
+
      >>> get_extension('this_is_a_test.txt')
      '.txt'
  
@@ -198,6 +213,9 @@ def get_all_extensions(path: str) -> List[str]:
      Returns:
          a list containing each extension which may be empty.
  
+    See also :meth:`without_extension`, :meth:`without_all_extensions`,
+    :meth:`get_extension`.
+
      >>> get_all_extensions('/home/scott/foo.tar.gz.1')
      ['.tar', '.gz', '.1']
  
@@ -225,6 +243,8 @@ def without_path(filespec: str) -> str:
      Returns:
          filespec without leading dir components.
  
+    See also :meth:`get_path`, :meth:`get_canonical_path`.
+
      >>> without_path('/home/scott/foo.py')
      'foo.py'
  
@@ -246,6 +266,8 @@ def get_path(filespec: str) -> str:
          filespec with just the leading directory components and no
              filename or extension(s)
  
+    See also :meth:`without_path`, :meth:`get_canonical_path`.
+
      >>> get_path('/home/scott/foobar.py')
      '/home/scott'
  
@@ -268,6 +290,8 @@ def get_canonical_path(filespec: str) -> str:
      Returns:
          the canonicalized path
  
+    See also :meth:`get_path`, :meth:`without_path`.
+
      >>> get_canonical_path('/home/scott/../../home/lynn/../scott/foo.txt')
      '/usr/home/scott/foo.txt'
  
@@ -279,15 +303,17 @@ def create_path_if_not_exist(path, on_error=None) -> None:
      """
      Attempts to create path if it does not exist already.
  
-    .. warning::
-
-        Files are created with mode 0x0777 (i.e. world read/writeable).
-
      Args:
          path: the path to attempt to create
          on_error: If True, it's invoked on error conditions.  Otherwise
              any exceptions are raised.
  
+    See also :meth:`does_file_exist`.
+
+    .. warning::
+
+        Files are created with mode 0x0777 (i.e. world read/writeable).
+
      >>> import uuid
      >>> import os
      >>> path = os.path.join("/tmp", str(uuid.uuid4()), str(uuid.uuid4()))
@@ -321,6 +347,8 @@ def does_file_exist(filename: str) -> bool:
      Returns:
          True if filename exists and is a normal file.
  
+    See also :meth:`create_path_if_not_exist`, :meth:`file_is_readable`.
+
      >>> does_file_exist(__file__)
      True
      >>> does_file_exist('/tmp/2492043r9203r9230r9230r49230r42390r4230')
@@ -330,37 +358,62 @@ def does_file_exist(filename: str) -> bool:
  
  
  def file_is_readable(filename: str) -> bool:
-    """True if file exists, is a normal file and is readable by the
-    current process.  False otherwise.
+    """Is the file readable?
  
      Args:
          filename: the filename to check for read access
+
+    Returns:
+        True if the file exists, is a normal file, and is readable
+        by the current process.  False otherwise.
+
+    See also :meth:`does_file_exist`, :meth:`file_is_writable`,
+    :meth:`file_is_executable`.
      """
      return does_file_exist(filename) and os.access(filename, os.R_OK)
  
  
  def file_is_writable(filename: str) -> bool:
-    """True if file exists, is a normal file and is writable by the
-    current process.  False otherwise.
+    """Is the file writable?
  
      Args:
          filename: the file to check for write access.
+
+    Returns:
+        True if file exists, is a normal file and is writable by the
+        current process.  False otherwise.
+
+    See also :meth:`file_is_readable`, :meth:`does_file_exist`.
      """
      return does_file_exist(filename) and os.access(filename, os.W_OK)
  
  
  def file_is_executable(filename: str) -> bool:
-    """True if file exists, is a normal file and is executable by the
-    current process.  False otherwise.
+    """Is the file executable?
  
      Args:
          filename: the file to check for execute access.
+
+    Returns:
+        True if file exists, is a normal file and is executable by the
+        current process.  False otherwise.
+
+    See also :meth:`does_file_exist`, :meth:`file_is_readable`,
+    :meth:`file_is_writable`.
      """
      return does_file_exist(filename) and os.access(filename, os.X_OK)
  
  
  def does_directory_exist(dirname: str) -> bool:
-    """Returns True if a file exists and is a directory.
+    """Does the given directory exist?
+
+    Args:
+        dirname: the name of the directory to check
+
+    Returns:
+        True if a path exists and is a directory, not a regular file.
+
+    See also :meth:`does_file_exist`.
  
      >>> does_directory_exist('/tmp')
      True
@@ -388,7 +441,15 @@ def get_file_size(filename: str) -> int:
  
  
  def is_normal_file(filename: str) -> bool:
-    """Returns True if filename is a normal file.
+    """Is that file normal (not a directory or some special file?)
+
+    Args:
+        filename: the path of the file to check
+
+    Returns:
+        True if filename is a normal file.
+
+    See also :meth:`is_directory`, :meth:`does_file_exist`, :meth:`is_symlink`.
  
      >>> is_normal_file(__file__)
      True
@@ -397,7 +458,16 @@ def is_normal_file(filename: str) -> bool:
  
  
  def is_directory(filename: str) -> bool:
-    """Returns True if filename is a directory.
+    """Is that path a directory (not a normal file?)
+
+    Args:
+        filename: the path of the file to check
+
+    Returns:
+        True if filename is a directory
+
+    See also :meth:`does_directory_exist`, :meth:`is_normal_file`,
+    :meth:`is_symlink`.
  
      >>> is_directory('/tmp')
      True
@@ -406,39 +476,60 @@ def is_directory(filename: str) -> bool:
  
  
  def is_symlink(filename: str) -> bool:
-    """True if filename is a symlink, False otherwise.
+    """Is that path a symlink?
+
+    Args:
+        filename: the path of the file to check
+
+    Returns:
+        True if filename is a symlink, False otherwise.
+
+    See also :meth:`is_directory`, :meth:`is_normal_file`.
  
      >>> is_symlink('/tmp')
      False
  
      >>> is_symlink('/home')
      True
-
      """
      return os.path.islink(filename)
  
  
  def is_same_file(file1: str, file2: str) -> bool:
-    """Returns True if the two files are the same inode.
+    """Determine if two paths reference the same inode.
+
+    Args:
+        file1: the first file
+        file2: the second file
+
+    Returns:
+        True if the two files are the same file.
+
+    See also :meth:`is_symlink`, :meth:`is_normal_file`.
  
      >>> is_same_file('/tmp', '/tmp/../tmp')
      True
  
      >>> is_same_file('/tmp', '/home')
      False
-
      """
      return os.path.samefile(file1, file2)
  
  
  def get_file_raw_timestamps(filename: str) -> Optional[os.stat_result]:
-    """Stats the file and returns an os.stat_result or None on error.
+    """Stats the file and returns an `os.stat_result` or None on error.
  
      Args:
          filename: the file whose timestamps to fetch
  
      Returns:
          the os.stat_result or None to indicate an error occurred
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamp`
      """
      try:
          return os.stat(filename)
@@ -451,10 +542,7 @@ def get_file_raw_timestamp(
      filename: str, extractor: Callable[[os.stat_result], Optional[float]]
  ) -> Optional[float]:
      """Stat a file and, if successful, use extractor to fetch some
-    subset of the information in the os.stat_result.  See also
-    :meth:`get_file_raw_atime`, :meth:`get_file_raw_mtime`, and
-    :meth:`get_file_raw_ctime` which just call this with a lambda
-    extractor.
+    subset of the information in the `os.stat_result`.
  
      Args:
          filename: the filename to stat
@@ -463,6 +551,12 @@ def get_file_raw_timestamp(
  
      Returns:
          whatever the extractor produced or None on error.
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`
      """
      tss = get_file_raw_timestamps(filename)
      if tss is not None:
@@ -471,31 +565,64 @@ def get_file_raw_timestamp(
  
  
  def get_file_raw_atime(filename: str) -> Optional[float]:
-    """Get a file's raw access time or None on error.
+    """Get a file's raw access time.
  
-    See also :meth:`get_file_atime_as_datetime`,
+    Args:
+        filename: the path to the file to stat
+
+    Returns:
+        The file's raw atime (seconds since the Epoch) or
+        None on error.
+
+    See also
+    :meth:`get_file_atime_age_seconds`,
+    :meth:`get_file_atime_as_datetime`,
      :meth:`get_file_atime_timedelta`,
-    and :meth:`get_file_atime_age_seconds`.
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`
      """
      return get_file_raw_timestamp(filename, lambda x: x.st_atime)
  
  
  def get_file_raw_mtime(filename: str) -> Optional[float]:
-    """Get a file's raw modification time or None on error.
+    """Get a file's raw modification time.
  
-    See also :meth:`get_file_mtime_as_datetime`,
+    Args:
+        filename: the path to the file to stat
+
+    Returns:
+        The file's raw mtime (seconds since the Epoch) or
+        None on error.
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_mtime_age_seconds`,
+    :meth:`get_file_mtime_as_datetime`,
      :meth:`get_file_mtime_timedelta`,
-    and :meth:`get_file_mtime_age_seconds`.
+    :meth:`get_file_raw_timestamps`
      """
      return get_file_raw_timestamp(filename, lambda x: x.st_mtime)
  
  
  def get_file_raw_ctime(filename: str) -> Optional[float]:
-    """Get a file's raw creation time or None on error.
+    """Get a file's raw creation time.
+
+    Args:
+        filename: the path to the file to stat
  
-    See also :meth:`get_file_ctime_as_datetime`,
+    Returns:
+        The file's raw ctime (seconds since the Epoch) or
+        None on error.
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_ctime_age_seconds`,
+    :meth:`get_file_ctime_as_datetime`,
      :meth:`get_file_ctime_timedelta`,
-    and :meth:`get_file_ctime_age_seconds`.
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`
      """
      return get_file_raw_timestamp(filename, lambda x: x.st_ctime)
  
@@ -507,7 +634,7 @@ def get_file_md5(filename: str) -> str:
          filename: the file whose contents to hash
  
      Returns:
-        the MD5 digest of the file's contents.  Raises on errors.
+        the MD5 digest of the file's contents.  Raises on error.
      """
      file_hash = hashlib.md5()
      with open(filename, "rb") as f:
@@ -518,13 +645,22 @@ def get_file_md5(filename: str) -> str:
      return file_hash.hexdigest()
  
  
-def set_file_raw_atime(filename: str, atime: float):
+def set_file_raw_atime(filename: str, atime: float) -> None:
      """Sets a file's raw access time.
  
-    See also :meth:`get_file_atime_as_datetime`,
-    :meth:`get_file_atime_timedelta`,
+    Args:
+        filename: the file whose atime should be set
+        atime: raw atime as number of seconds since the Epoch to set
+
+    See also
+    :meth:`get_file_raw_atime`,
      :meth:`get_file_atime_age_seconds`,
-    and :meth:`get_file_raw_atime`.
+    :meth:`get_file_atime_as_datetime`,
+    :meth:`get_file_atime_timedelta`,
+    :meth:`get_file_raw_timestamps`,
+    :meth:`set_file_raw_mtime`,
+    :meth:`set_file_raw_atime_and_mtime`,
+    :meth:`touch_file`
      """
      mtime = get_file_raw_mtime(filename)
      assert mtime is not None
@@ -534,10 +670,19 @@ def set_file_raw_atime(filename: str, atime: float):
  def set_file_raw_mtime(filename: str, mtime: float):
      """Sets a file's raw modification time.
  
-    See also :meth:`get_file_mtime_as_datetime`,
-    :meth:`get_file_mtime_timedelta`,
+    Args:
+        filename: the file whose mtime should be set
+        mtime: the raw mtime as number of seconds since the Epoch to set
+
+    See also
+    :meth:`get_file_raw_mtime`,
      :meth:`get_file_mtime_age_seconds`,
-    and :meth:`get_file_raw_mtime`.
+    :meth:`get_file_mtime_as_datetime`,
+    :meth:`get_file_mtime_timedelta`,
+    :meth:`get_file_raw_timestamps`,
+    :meth:`set_file_raw_atime`,
+    :meth:`set_file_raw_atime_and_mtime`,
+    :meth:`touch_file`
      """
      atime = get_file_raw_atime(filename)
      assert atime is not None
@@ -545,12 +690,19 @@ def set_file_raw_mtime(filename: str, mtime: float):
  
  
  def set_file_raw_atime_and_mtime(filename: str, ts: float = None):
-    """Sets both a file's raw modification and access times
+    """Sets both a file's raw modification and access times.
  
      Args:
          filename: the file whose times to set
          ts: the raw time to set or None to indicate time should be
              set to the current time.
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`,
+    :meth:`set_file_raw_atime`,
+    :meth:`set_file_raw_mtime`
      """
      if ts is not None:
          os.utime(filename, (ts, ts))
@@ -558,10 +710,19 @@ def set_file_raw_atime_and_mtime(filename: str, ts: float = None):
          os.utime(filename, None)
  
  
-def convert_file_timestamp_to_datetime(
+def _convert_file_timestamp_to_datetime(
      filename: str, producer
  ) -> Optional[datetime.datetime]:
-    """Convert a raw file timestamp into a python datetime."""
+    """
+    Converts a raw file timestamp into a Python datetime.
+
+    Args:
+        filename: file whose timestamps should be converted.
+        producer: source of the timestamp.
+
+    Returns:
+        The datetime.
+    """
      ts = producer(filename)
      if ts is not None:
          return datetime.datetime.fromtimestamp(ts)
@@ -569,40 +730,70 @@ def convert_file_timestamp_to_datetime(
  
  
  def get_file_atime_as_datetime(filename: str) -> Optional[datetime.datetime]:
-    """Fetch a file's access time as a python datetime.
+    """Fetch a file's access time as a Python datetime.
  
-    See also :meth:`get_file_atime_as_datetime`,
-    :meth:`get_file_atime_timedelta`,
+    Args:
+        filename: the file whose atime should be fetched.
+
+    Returns:
+        The file's atime as a Python :class:`datetime.datetime`.
+
+    See also
+    :meth:`get_file_raw_atime`,
      :meth:`get_file_atime_age_seconds`,
-    :meth:`describe_file_atime`,
-    and :meth:`get_file_raw_atime`.
+    :meth:`get_file_atime_timedelta`,
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`,
+    :meth:`set_file_raw_atime`,
+    :meth:`set_file_raw_atime_and_mtime`
      """
-    return convert_file_timestamp_to_datetime(filename, get_file_raw_atime)
+    return _convert_file_timestamp_to_datetime(filename, get_file_raw_atime)
  
  
  def get_file_mtime_as_datetime(filename: str) -> Optional[datetime.datetime]:
-    """Fetches a file's modification time as a python datetime.
+    """Fetch a file's modification time as a Python datetime.
  
-    See also :meth:`get_file_mtime_as_datetime`,
-    :meth:`get_file_mtime_timedelta`,
+    Args:
+        filename: the file whose mtime should be fetched.
+
+    Returns:
+        The file's mtime as a Python :class:`datetime.datetime`.
+
+    See also
+    :meth:`get_file_raw_mtime`,
      :meth:`get_file_mtime_age_seconds`,
-    and :meth:`get_file_raw_mtime`.
+    :meth:`get_file_mtime_timedelta`,
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_timestamps`,
+    :meth:`set_file_raw_atime`,
+    :meth:`set_file_raw_atime_and_mtime`
      """
-    return convert_file_timestamp_to_datetime(filename, get_file_raw_mtime)
+    return _convert_file_timestamp_to_datetime(filename, get_file_raw_mtime)
  
  
  def get_file_ctime_as_datetime(filename: str) -> Optional[datetime.datetime]:
-    """Fetches a file's creation time as a python datetime.
+    """Fetches a file's creation time as a Python datetime.
  
-    See also :meth:`get_file_ctime_as_datetime`,
-    :meth:`get_file_ctime_timedelta`,
+    Args:
+        filename: the file whose ctime should be fetched.
+
+    Returns:
+        The file's ctime as a Python :class:`datetime.datetime`.
+
+    See also
+    :meth:`get_file_raw_ctime`,
      :meth:`get_file_ctime_age_seconds`,
-    and :meth:`get_file_raw_ctime`.
+    :meth:`get_file_ctime_timedelta`,
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`
      """
-    return convert_file_timestamp_to_datetime(filename, get_file_raw_ctime)
+    return _convert_file_timestamp_to_datetime(filename, get_file_raw_ctime)
  
  
-def get_file_timestamp_age_seconds(filename: str, extractor) -> Optional[int]:
+def _get_file_timestamp_age_seconds(filename: str, extractor) -> Optional[int]:
      """~Internal helper"""
      now = time.time()
      ts = get_file_raw_timestamps(filename)
@@ -615,42 +806,73 @@ def get_file_timestamp_age_seconds(filename: str, extractor) -> Optional[int]:
  def get_file_atime_age_seconds(filename: str) -> Optional[int]:
      """Gets a file's access time as an age in seconds (ago).
  
-    See also :meth:`get_file_atime_as_datetime`,
+    Args:
+        filename: file whose atime should be checked.
+
+    Returns:
+        The number of seconds ago that filename was last accessed.
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_atime_as_datetime`,
      :meth:`get_file_atime_timedelta`,
-    :meth:`get_file_atime_age_seconds`,
-    :meth:`describe_file_atime`,
-    and :meth:`get_file_raw_atime`.
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`,
+    :meth:`set_file_raw_atime`,
+    :meth:`set_file_raw_atime_and_mtime`
      """
-    return get_file_timestamp_age_seconds(filename, lambda x: x.st_atime)
+    return _get_file_timestamp_age_seconds(filename, lambda x: x.st_atime)
  
  
  def get_file_ctime_age_seconds(filename: str) -> Optional[int]:
      """Gets a file's creation time as an age in seconds (ago).
  
-    See also :meth:`get_file_ctime_as_datetime`,
-    :meth:`get_file_ctime_timedelta`,
+    Args:
+        filename: file whose ctime should be checked.
+
+    Returns:
+        The number of seconds ago that filename was created.
+
+    See also
+    :meth:`get_file_raw_ctime`,
      :meth:`get_file_ctime_age_seconds`,
-    and :meth:`get_file_raw_ctime`.
+    :meth:`get_file_ctime_as_datetime`,
+    :meth:`get_file_ctime_timedelta`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_timestamps`
      """
-    return get_file_timestamp_age_seconds(filename, lambda x: x.st_ctime)
+    return _get_file_timestamp_age_seconds(filename, lambda x: x.st_ctime)
  
  
  def get_file_mtime_age_seconds(filename: str) -> Optional[int]:
      """Gets a file's modification time as seconds (ago).
  
-    See also :meth:`get_file_mtime_as_datetime`,
+    Args:
+        filename: file whose mtime should be checked.
+
+    Returns:
+        The number of seconds ago that filename was last modified.
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_mtime_as_datetime`,
      :meth:`get_file_mtime_timedelta`,
-    :meth:`get_file_mtime_age_seconds`,
-    and :meth:`get_file_raw_mtime`.
+    :meth:`get_file_raw_timestamps`,
+    :meth:`set_file_raw_atime`,
+    :meth:`set_file_raw_atime_and_mtime`
      """
-    return get_file_timestamp_age_seconds(filename, lambda x: x.st_mtime)
+    return _get_file_timestamp_age_seconds(filename, lambda x: x.st_mtime)
  
  
-def get_file_timestamp_timedelta(
+def _get_file_timestamp_timedelta(
      filename: str, extractor
  ) -> Optional[datetime.timedelta]:
      """~Internal helper"""
-    age = get_file_timestamp_age_seconds(filename, extractor)
+    age = _get_file_timestamp_age_seconds(filename, extractor)
      if age is not None:
          return datetime.timedelta(seconds=float(age))
      return None
@@ -659,36 +881,69 @@ def get_file_timestamp_timedelta(
  def get_file_atime_timedelta(filename: str) -> Optional[datetime.timedelta]:
      """How long ago was a file accessed as a timedelta?
  
-    See also :meth:`get_file_atime_as_datetime`,
-    :meth:`get_file_atime_timedelta`,
+    Args:
+        filename: the file whose atime should be checked.
+
+    Returns:
+        A Python :class:`datetime.timedelta` representing how long
+        ago filename was last accessed.
+
+    See also
+    :meth:`get_file_raw_atime`,
      :meth:`get_file_atime_age_seconds`,
-    :meth:`describe_file_atime`,
-    and :meth:`get_file_raw_atime`.
+    :meth:`get_file_atime_as_datetime`,
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`,
+    :meth:`set_file_raw_atime`,
+    :meth:`set_file_raw_atime_and_mtime`
      """
-    return get_file_timestamp_timedelta(filename, lambda x: x.st_atime)
+    return _get_file_timestamp_timedelta(filename, lambda x: x.st_atime)
  
  
  def get_file_ctime_timedelta(filename: str) -> Optional[datetime.timedelta]:
      """How long ago was a file created as a timedelta?
  
-    See also :meth:`get_file_ctime_as_datetime`,
-    :meth:`get_file_ctime_timedelta`,
+    Args:
+        filename: the file whose ctime should be checked.
+
+    Returns:
+        A Python :class:`datetime.timedelta` representing how long
+        ago filename was created.
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_ctime`,
      :meth:`get_file_ctime_age_seconds`,
-    and :meth:`get_file_raw_ctime`.
+    :meth:`get_file_ctime_as_datetime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`
      """
-    return get_file_timestamp_timedelta(filename, lambda x: x.st_ctime)
+    return _get_file_timestamp_timedelta(filename, lambda x: x.st_ctime)
  
  
  def get_file_mtime_timedelta(filename: str) -> Optional[datetime.timedelta]:
      """
-    Gets a file's modification time as a python timedelta.
+    Gets a file's modification time as a Python timedelta.
  
-    See also :meth:`get_file_mtime_as_datetime`,
-    :meth:`get_file_mtime_timedelta`,
+    Args:
+        filename: the file whose mtime should be checked.
+
+    Returns:
+        A Python :class:`datetime.timedelta` representing how long
+        ago filename was last modified.
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_mtime`,
      :meth:`get_file_mtime_age_seconds`,
-    and :meth:`get_file_raw_mtime`.
+    :meth:`get_file_mtime_as_datetime`,
+    :meth:`get_file_raw_timestamps`,
+    :meth:`set_file_raw_atime`,
+    :meth:`set_file_raw_atime_and_mtime`
      """
-    return get_file_timestamp_timedelta(filename, lambda x: x.st_mtime)
+    return _get_file_timestamp_timedelta(filename, lambda x: x.st_mtime)
  
  
  def describe_file_timestamp(filename: str, extractor, *, brief=False) -> Optional[str]:
@@ -698,7 +953,7 @@ def describe_file_timestamp(filename: str, extractor, *, brief=False) -> Optiona
          describe_duration_briefly,
      )
  
-    age = get_file_timestamp_age_seconds(filename, extractor)
+    age = _get_file_timestamp_age_seconds(filename, extractor)
      if age is None:
          return None
      if brief:
@@ -711,11 +966,25 @@ def describe_file_atime(filename: str, *, brief=False) -> Optional[str]:
      """
      Describe how long ago a file was accessed.
  
-    See also :meth:`get_file_atime_as_datetime`,
-    :meth:`get_file_atime_timedelta`,
+    Args:
+        filename: the file whose atime should be described.
+        brief: if True, describe atime briefly.
+
+    Returns:
+        A string that represents how long ago filename was last
+        accessed.  The description will be verbose or brief depending
+        on the brief argument.
+
+    See also
+    :meth:`get_file_raw_atime`,
      :meth:`get_file_atime_age_seconds`,
-    :meth:`describe_file_atime`,
-    and :meth:`get_file_raw_atime`.
+    :meth:`get_file_atime_as_datetime`,
+    :meth:`get_file_atime_timedelta`,
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`
+    :meth:`set_file_raw_atime`,
+    :meth:`set_file_raw_atime_and_mtime`
      """
      return describe_file_timestamp(filename, lambda x: x.st_atime, brief=brief)
  
@@ -723,22 +992,49 @@ def describe_file_atime(filename: str, *, brief=False) -> Optional[str]:
  def describe_file_ctime(filename: str, *, brief=False) -> Optional[str]:
      """Describes a file's creation time.
  
-    See also :meth:`get_file_ctime_as_datetime`,
-    :meth:`get_file_ctime_timedelta`,
+    Args:
+        filename: the file whose ctime should be described.
+        brief: if True, describe ctime briefly.
+
+    Returns:
+        A string that represents how long ago filename was created.
+        The description will be verbose or brief depending
+        on the brief argument.
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_ctime`,
      :meth:`get_file_ctime_age_seconds`,
-    and :meth:`get_file_raw_ctime`.
+    :meth:`get_file_ctime_as_datetime`,
+    :meth:`get_file_ctime_timedelta`,
+    :meth:`get_file_raw_mtime`,
+    :meth:`get_file_raw_timestamps`
      """
      return describe_file_timestamp(filename, lambda x: x.st_ctime, brief=brief)
  
  
  def describe_file_mtime(filename: str, *, brief=False) -> Optional[str]:
-    """
-    Describes how long ago a file was modified.
+    """Describes how long ago a file was modified.
  
-    See also :meth:`get_file_mtime_as_datetime`,
-    :meth:`get_file_mtime_timedelta`,
+    Args:
+        filename: the file whose mtime should be described.
+        brief: if True, describe mtime briefly.
+
+    Returns:
+        A string that represents how long ago filename was last
+        modified.  The description will be verbose or brief depending
+        on the brief argument.
+
+    See also
+    :meth:`get_file_raw_atime`,
+    :meth:`get_file_raw_ctime`,
+    :meth:`get_file_raw_mtime`,
      :meth:`get_file_mtime_age_seconds`,
-    and :meth:`get_file_raw_mtime`.
+    :meth:`get_file_mtime_as_datetime`,
+    :meth:`get_file_mtime_timedelta`,
+    :meth:`get_file_raw_timestamps`,
+    :meth:`set_file_raw_atime`,
+    :meth:`set_file_raw_atime_and_mtime`
      """
      return describe_file_timestamp(filename, lambda x: x.st_mtime, brief=brief)
  
@@ -751,18 +1047,48 @@ def touch_file(filename: str, *, mode: Optional[int] = 0o666):
      Args:
          filename: the filename
          mode: the mode to create the file with
+
+    .. warning::
+
+        The default creation mode is 0x666 which is world readable
+        and writable.  Override this by passing in your own mode
+        parameter if desired.
+
+    See also :meth:`set_file_raw_atime`, :meth:`set_file_raw_atime_and_mtime`,
+    :meth:`set_file_raw_mtime`, :meth:`create_path_if_not_exist`
      """
      pathlib.Path(filename, mode=mode).touch()
  
  
  def expand_globs(in_filename: str):
-    """Expands shell globs (* and ? wildcards) to the matching files."""
+    """
+    Expands shell globs (* and ? wildcards) to the matching files.
+
+    Args:
+        in_filename: the filepath to be expanded.  May contain '*' and '?'
+            globbing characters.
+
+    Returns:
+        A Generator that yields filenames that match the input pattern.
+
+    See also :meth:`get_files`, :meth:`get_files_recursive`.
+    """
      for filename in glob.glob(in_filename):
          yield filename
  
  
  def get_files(directory: str):
-    """Returns the files in a directory as a generator."""
+    """Returns the files in a directory as a generator.
+
+    Args:
+        directory: the directory to list files under.
+
+    Returns:
+        A generator that yields all files in the input directory.
+
+    See also :meth:`expand_globs`, :meth:`get_files_recursive`,
+    :meth:`get_matching_files`.
+    """
      for filename in os.listdir(directory):
          full_path = join(directory, filename)
          if isfile(full_path) and exists(full_path):
@@ -770,14 +1096,38 @@ def get_files(directory: str):
  
  
  def get_matching_files(directory: str, glob: str):
-    """Returns the subset of files whose name matches a glob."""
+    """
+    Returns the subset of files whose name matches a glob.
+
+    Args:
+        directory: the directory to match files within.
+        glob: the globbing pattern (may include '*' and '?') to
+            use when matching files.
+
+    Returns:
+        A generator that yields filenames in directory that match
+        the given glob pattern.
+
+    See also :meth:`get_files`, :meth:`expand_globs`.
+    """
      for filename in get_files(directory):
          if fnmatch.fnmatch(filename, glob):
              yield filename
  
  
  def get_directories(directory: str):
-    """Returns the subdirectories in a directory as a generator."""
+    """
+    Returns the subdirectories in a directory as a generator.
+
+    Args:
+        directory: the directory to list subdirectories within.
+
+    Returns:
+        A generator that yields all subdirectories within the given
+        input directory.
+
+    See also :meth:`get_files`, :meth:`get_files_recursive`.
+    """
      for d in os.listdir(directory):
          full_path = join(directory, d)
          if not isfile(full_path) and exists(full_path):
@@ -785,7 +1135,20 @@ def get_directories(directory: str):
  
  
  def get_files_recursive(directory: str):
-    """Find the files and directories under a root recursively."""
+    """
+    Find the files and directories under a root recursively.
+
+    Args:
+        directory: the root directory under which to list subdirectories
+            and file contents.
+
+    Returns:
+        A generator that yields all directories and files beneath the input
+        root directory.
+
+    See also :meth:`get_files`, :meth:`get_matching_files`,
+    :meth:`get_matching_files_recursive`
+    """
      for filename in get_files(directory):
          yield filename
      for subdir in get_directories(directory):
@@ -794,18 +1157,50 @@ def get_files_recursive(directory: str):
  
  
  def get_matching_files_recursive(directory: str, glob: str):
-    """Returns the subset of files whose name matches a glob under a root recursively."""
+    """
+    Returns the subset of files whose name matches a glob under a root recursively.
+
+    Args:
+        directory: the root under which to search
+        glob: a globbing pattern that describes the subset of files and directories
+            to return.  May contain '?' and '*'.
+
+    Returns:
+        A generator that yields all files and directories under the given root
+        directory that match the given globbing pattern.
+
+    See also :meth:`get_files_recursive`.
+    """
      for filename in get_files_recursive(directory):
          if fnmatch.fnmatch(filename, glob):
              yield filename
  
  
  class FileWriter(contextlib.AbstractContextManager):
-    """A helper that writes a file to a temporary location and then moves
-    it atomically to its ultimate destination on close.
+    """A helper that writes a file to a temporary location and then
+    moves it atomically to its ultimate destination on close.
+
+    Example usage.  Creates a temporary file that is populated by the
+    print statements within the context.  Until the context is exited,
+    the true destination file does not exist so no reader of it can
+    see partial writes due to buffering or code timing.  Once the
+    context is exited, the file is moved from its temporary location
+    to its permanent location by a call to `/bin/mv` which should be
+    atomic::
+
+        with FileWriter('/home/bob/foobar.txt') as w:
+            print("This is a test!", file=w)
+            time.sleep(2)
+            print("This is only a test...", file=w)
+
      """
  
      def __init__(self, filename: str) -> None:
+        """
+        Args:
+            filename: the ultimate destination file we want to populate.
+                On exit, the file will be atomically created.
+        """
          self.filename = filename
          uuid = uuid4()
          self.tempfile = f'{filename}-{uuid}.tmp'
diff --git a/src/pyutils/files/lockfile.py b/src/pyutils/files/lockfile.py

index ee7346bf1042e0a0b85e36273c0ef8182c05f7b3..0febca6bf5b754682f5e91f8604c70e566fd8a0d 100644 (file)
--- a/src/pyutils/files/lockfile.py
+++ b/src/pyutils/files/lockfile.py
@@ -2,7 +2,14 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""File-based locking helper."""
+"""This is a lockfile implementation I created for use with cronjobs
+on my machine to prevent multiple copies of a job from running in
+parallel.  When one job is running this code keeps a file on disk to
+indicate a lock is held.  Other copies will fail to start if they
+detect this lock until the lock is released.  There are provisions in
+the code for timing out locks, cleaning up a lock when a signal is
+received, gracefully retrying lock acquisition on failure, etc...
+"""
  
  from __future__ import annotations
  
@@ -91,11 +98,11 @@ class LockFile(contextlib.AbstractContextManager):
              signal.signal(signal.SIGTERM, self._signal)
          self.expiration_timestamp = expiration_timestamp
  
-    def locked(self):
+    def locked(self) -> bool:
          """Is it locked currently?"""
          return self.is_locked
  
-    def available(self):
+    def available(self) -> bool:
          """Is it available currently?"""
          return not os.path.exists(self.lockfile)
  
@@ -157,7 +164,7 @@ class LockFile(contextlib.AbstractContextManager):
              self._detect_stale_lockfile()
          return _try_acquire_lock_with_retries()
  
-    def release(self):
+    def release(self) -> None:
          """Release the lock"""
          try:
              os.unlink(self.lockfile)
diff --git a/src/pyutils/function_utils.py b/src/pyutils/function_utils.py

index ecfd4c32c9b4e8372749da2ad9b4a21f83795513..f4202d38ce03bf97bb837044fbfa01ec4d06160a 100644 (file)
--- a/src/pyutils/function_utils.py
+++ b/src/pyutils/function_utils.py
@@ -38,3 +38,9 @@ def function_identifier(f: Callable) -> str:
          return f'{module}:{f.__name__}'
      else:
          return f'{f.__module__}:{f.__name__}'
+
+
+if __name__ == '__main__':
+    import doctest
+
+    doctest.testmod()
diff --git a/src/pyutils/logging_utils.py b/src/pyutils/logging_utils.py

index 94fe5a31c4fd463c8c58f3343ecb8df69639d745..71b2a6e5460833263fe131cc07a9c6dc90e21c7e 100644 (file)
--- a/src/pyutils/logging_utils.py
+++ b/src/pyutils/logging_utils.py
@@ -621,9 +621,10 @@ def initialize_logging(logger=None) -> logging.Logger:
  
      # Global default logging level (--logging_level); messages below
      # this level will be silenced.
-    default_logging_level = getattr(
-        logging, config.config['logging_level'].upper(), None
-    )
+    logging_level = config.config['logging_level']
+    assert logging_level
+    logging_level = logging_level.upper()
+    default_logging_level = getattr(logging, logging_level, None)
      if not isinstance(default_logging_level, int):
          raise ValueError(f'Invalid level: {config.config["logging_level"]}')
  
@@ -667,10 +668,14 @@ def initialize_logging(logger=None) -> logging.Logger:
      # --logging_filename_maxsize) set up logging to a file on the
      # filesystem with automatic rotation when it gets too big.
      if config.config['logging_filename']:
+        max_bytes = config.config['logging_filename_maxsize']
+        assert max_bytes and type(max_bytes) == int
+        backup_count = config.config['logging_filename_count']
+        assert backup_count and type(backup_count) == int
          handler = RotatingFileHandler(
              config.config['logging_filename'],
-            maxBytes=config.config['logging_filename_maxsize'],
-            backupCount=config.config['logging_filename_count'],
+            maxBytes=max_bytes,
+            backupCount=backup_count,
          )
          handler.setFormatter(
              MillisecondAwareFormatter(
diff --git a/src/pyutils/parallelize/deferred_operand.py b/src/pyutils/parallelize/deferred_operand.py

index 884967091607ee5a57376c83548fd324b349def6..c234c0574e7a6ba29e810210229cb08db9966c27 100644 (file)
--- a/src/pyutils/parallelize/deferred_operand.py
+++ b/src/pyutils/parallelize/deferred_operand.py
@@ -2,11 +2,20 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""This is the base class of :class:`SmartFuture`.  It is essentially
-a class that tries to have every Python __dunder__ method defined
-reasonably for it such that, when it is used in a manner that requires
-its value to be known, it calls a `resolve` method to wait for the
-data it represents.
+"""This is the base class of
+:class:`pyutils.parallelize.smart_future.SmartFuture`, which is a
+piece of the simple parallelization framework.
+
+This base class is essentially tries to have every Python `__dunder__`
+method defined with a reasonabe default implementation so that, when
+it is used in a manner that requires the value to be known, it calls
+:meth:`DeferredOperand.resolve` and either gets the requisite value or
+blocks until the data necessary to resolve the value is ready.  This
+is meant to enable more transparent :class:`Future` objects that can
+be just used directly.
+
+See :class:`pyutils.parallelize.smart_future.SmartFuture` for more
+information.
  
  """
  
@@ -21,8 +30,10 @@ T = TypeVar('T')
  
  class DeferredOperand(ABC, Generic[T]):
      """A wrapper around an operand whose value is deferred until it is
-    needed (i.e. accessed).  See the subclass :class:`SmartFuture` for
-    an example usage and/or a more useful patten.
+    needed (i.e. accessed).  See the subclass
+    :class:`pyutils.parallelize.smart_future.SmartFuture` for an
+    example usage and/or a more useful patten.
+
      """
  
      @abstractmethod
@@ -31,6 +42,18 @@ class DeferredOperand(ABC, Generic[T]):
  
      @staticmethod
      def resolve(x: Any) -> Any:
+        """
+        When this object is used in a manner that requires it to know
+        its value, this method is called to either return the value or
+        block until it can do so.
+
+        Args:
+            x: the object whose value is required
+
+        Returns:
+            The value of x... immediately if possible, eventually if
+            not possible.
+        """
          while isinstance(x, DeferredOperand):
              x = x._resolve()
          return x
diff --git a/src/pyutils/parallelize/executors.py b/src/pyutils/parallelize/executors.py

index fd70e327b75b81a25e6e20a470de3c36a296d125..fe00b5427c0876b04fba223241c2072ae6a1de4f 100644 (file)
--- a/src/pyutils/parallelize/executors.py
+++ b/src/pyutils/parallelize/executors.py
@@ -3,13 +3,38 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""Defines three executors: a thread executor for doing work using a
-threadpool, a process executor for doing work in other processes on
-the same machine and a remote executor for farming out work to other
-machines.
-
-Also defines DefaultExecutors which is a container for references to
-global executors / worker pools with automatic shutdown semantics."""
+"""
+This module defines a :class:`BaseExecutor` interface and three
+implementations:
+
+    - :class:`ThreadExecutor`
+    - :class:`ProcessExecutor`
+    - :class:`RemoteExecutor`
+
+The :class:`ThreadExecutor` is used to dispatch work to background
+threads in the same Python process for parallelized work.  Of course,
+until the Global Interpreter Lock (GIL) bottleneck is resolved, this
+is not terribly useful for compute-bound code.  But it's good for
+work that is mostly I/O bound.
+
+The :class:`ProcessExecutor` is used to dispatch work to other
+processes on the same machine and is more useful for compute-bound
+workloads.
+
+The :class:`RemoteExecutor` is used in conjunection with `ssh`,
+the `cloudpickle` dependency, and `remote_worker.py <https://wannabe.guru.org/gitweb/?p=pyutils.git;a=blob_plain;f=src/pyutils/remote_worker.py;hb=HEAD>`_ file
+to dispatch work to a set of remote worker machines on your
+network.  You can configure this pool via a JSON configuration file,
+an example of which `can be found in examples <https://wannabe.guru.org/gitweb/?p=pyutils.git;a=blob_plain;f=examples/parallelize_config/.remote_worker_records;hb=HEAD>`_.
+
+Finally, this file defines a :class:`DefaultExecutors` pool that
+contains a pre-created and ready instance of each of the three
+executors discussed.  It has the added benefit of being automatically
+cleaned up at process termination time.
+
+See instructions in :mod:`pyutils.parallelize.parallelize` for
+setting up and using the framework.
+"""
  
  from __future__ import annotations
  
@@ -76,6 +101,13 @@ parser.add_argument(
      help='Path of the remote worker records file (JSON)',
      default=f'{os.environ.get("HOME", ".")}/.remote_worker_records',
  )
+parser.add_argument(
+    '--remote_worker_helper_path',
+    type=str,
+    metavar='PATH_TO_REMOTE_WORKER_PY',
+    help='Path to remote_worker.py on remote machines',
+    default='source py39-venv/bin/activate && /home/scott/lib/release/pyutils/src/pyutils/remote_worker.py',
+)
  
  
  SSH = '/usr/bin/ssh -oForwardX11=no'
@@ -95,6 +127,10 @@ class BaseExecutor(ABC):
      """
  
      def __init__(self, *, title=''):
+        """
+        Args:
+            title: the name of this executor.
+        """
          self.title = title
          self.histogram = hist.SimpleHistogram(
              hist.SimpleHistogram.n_evenly_spaced_buckets(int(0), int(500), 50)
@@ -103,10 +139,27 @@ class BaseExecutor(ABC):
  
      @abstractmethod
      def submit(self, function: Callable, *args, **kwargs) -> fut.Future:
+        """Submit work for the executor to do.
+
+        Args:
+            function: the Callable to be executed.
+            *args: the arguments to function
+            **kwargs: the arguments to function
+
+        Returns:
+            A concurrent :class:`Future` representing the result of the
+            work.
+        """
          pass
  
      @abstractmethod
      def shutdown(self, *, wait: bool = True, quiet: bool = False) -> None:
+        """Shutdown the executor.
+
+        Args:
+            wait: wait for the shutdown to complete before returning?
+            quiet: keep it quiet, please.
+        """
          pass
  
      def shutdown_if_idle(self, *, quiet: bool = False) -> bool:
@@ -115,6 +168,12 @@ class BaseExecutor(ABC):
          otherwise.  Note: this should only be called by the launcher
          process.
  
+        Args:
+            quiet: keep it quiet, please.
+
+        Returns:
+            True if the executor could be shut down because it has no
+            pending work, False otherwise.
          """
          if self.task_count == 0:
              self.shutdown(wait=True, quiet=quiet)
@@ -126,6 +185,8 @@ class BaseExecutor(ABC):
          worker, it should only be called by the launcher process /
          thread / machine.
  
+        Args:
+            delta: the delta value by which to adjust task count.
          """
          self.task_count += delta
          logger.debug('Adjusted task count by %d to %d.', delta, self.task_count)
@@ -135,12 +196,14 @@ class BaseExecutor(ABC):
          worker, it should only be called by the launcher process /
          thread / machine.
  
+        Returns:
+            The executor's current task count.
          """
          return self.task_count
  
  
  class ThreadExecutor(BaseExecutor):
-    """A threadpool executor.  This executor uses python threads to
+    """A threadpool executor.  This executor uses Python threads to
      schedule tasks.  Note that, at least as of python3.10, because of
      the global lock in the interpreter itself, these do not
      parallelize very well so this class is useful mostly for non-CPU
@@ -150,6 +213,10 @@ class ThreadExecutor(BaseExecutor):
      """
  
      def __init__(self, max_workers: Optional[int] = None):
+        """
+        Args:
+            max_workers: maximum number of threads to create in the pool.
+        """
          super().__init__()
          workers = None
          if max_workers is not None:
@@ -167,7 +234,7 @@ class ThreadExecutor(BaseExecutor):
  
      # This is run on a different thread; do not adjust task count here.
      @staticmethod
-    def run_local_bundle(fun, *args, **kwargs):
+    def _run_local_bundle(fun, *args, **kwargs):
          logger.debug("Running local bundle at %s", fun.__name__)
          result = fun(*args, **kwargs)
          return result
@@ -183,7 +250,7 @@ class ThreadExecutor(BaseExecutor):
              newargs.append(arg)
          start = time.time()
          result = self._thread_pool_executor.submit(
-            ThreadExecutor.run_local_bundle, *newargs, **kwargs
+            ThreadExecutor._run_local_bundle, *newargs, **kwargs
          )
          result.add_done_callback(lambda _: self.histogram.add_item(time.time() - start))
          result.add_done_callback(lambda _: self.adjust_task_count(-1))
@@ -206,6 +273,10 @@ class ProcessExecutor(BaseExecutor):
      """
  
      def __init__(self, max_workers=None):
+        """
+        Args:
+            max_workers: the max number of worker processes to create.
+        """
          super().__init__()
          workers = None
          if max_workers is not None:
@@ -223,7 +294,7 @@ class ProcessExecutor(BaseExecutor):
  
      # This is run in another process; do not adjust task count here.
      @staticmethod
-    def run_cloud_pickle(pickle):
+    def _run_cloud_pickle(pickle):
          fun, args, kwargs = cloudpickle.loads(pickle)
          logger.debug("Running pickled bundle at %s", fun.__name__)
          result = fun(*args, **kwargs)
@@ -236,7 +307,9 @@ class ProcessExecutor(BaseExecutor):
          start = time.time()
          self.adjust_task_count(+1)
          pickle = _make_cloud_pickle(function, *args, **kwargs)
-        result = self._process_executor.submit(ProcessExecutor.run_cloud_pickle, pickle)
+        result = self._process_executor.submit(
+            ProcessExecutor._run_cloud_pickle, pickle
+        )
          result.add_done_callback(lambda _: self.histogram.add_item(time.time() - start))
          result.add_done_callback(lambda _: self.adjust_task_count(-1))
          return result
@@ -395,11 +468,9 @@ class RemoteExecutorStatus:
      """
  
      def __init__(self, total_worker_count: int) -> None:
-        """C'tor.
-
+        """
          Args:
              total_worker_count: number of workers in the pool
-
          """
          self.worker_count: int = total_worker_count
          self.known_workers: Set[RemoteWorkerRecord] = set()
@@ -688,8 +759,10 @@ class RemoteExecutor(BaseExecutor):
      executed.  Each bundle is assigned a remote worker based on some policy
      heuristics.  Once assigned to a remote worker, a local subprocess is
      created.  It copies the pickled code to the remote machine via ssh/scp
-    and then starts up work on the remote machine again using ssh.  When
-    the work is complete it copies the results back to the local machine.
+    and then starts up work on the remote machine again using ssh to invoke
+    the :file:`remote_worker.py` (`--remote_worker_helper_path`).  When
+    the work is complete, the local subprocess copies the results back to
+    the local machine via ssh/scp.
  
      So there is essentially one "controller" machine (which may also be
      in the remote executor pool and therefore do task work in addition to
@@ -709,6 +782,9 @@ class RemoteExecutor(BaseExecutor):
          computationally expensive tasks such as jobs that will execute
          for ~30 seconds or longer.
  
+    Instructions for how to set this up are provided in
+    :class:`pyutils.parallelize.parallelize`.
+
      See also :class:`ProcessExecutor` and :class:`ThreadExecutor`.
      """
  
@@ -717,8 +793,7 @@ class RemoteExecutor(BaseExecutor):
          workers: List[RemoteWorkerRecord],
          policy: RemoteWorkerSelectionPolicy,
      ) -> None:
-        """C'tor.
-
+        """
          Args:
              workers: A list of remote workers we can call on to do tasks.
              policy: A policy for selecting remote workers for tasks.
@@ -1040,11 +1115,10 @@ class RemoteExecutor(BaseExecutor):
          # Kick off the work.  Note that if this fails we let
          # _wait_for_process deal with it.
          self.status.record_processing_began(uuid)
+        helper_path = config.config['remote_worker_helper_path']
          cmd = (
              f'{SSH} {bundle.username}@{bundle.machine} '
-            f'"source py39-venv/bin/activate &&'
-            f' /home/scott/lib/python_modules/remote_worker.py'
-            f' --code_file {bundle.code_file} --result_file {bundle.result_file}"'
+            f'"{helper_path} --code_file {bundle.code_file} --result_file {bundle.result_file}"'
          )
          logger.debug(
              '%s: Executing %s in the background to kick off work...', bundle, cmd
diff --git a/src/pyutils/parallelize/parallelize.py b/src/pyutils/parallelize/parallelize.py

index 6d31174424c2b415866a0b539a6cb4d64579a7fc..41d9093735d0c2e5b8d69195e19fa2c31c154b49 100644 (file)
--- a/src/pyutils/parallelize/parallelize.py
+++ b/src/pyutils/parallelize/parallelize.py
@@ -2,21 +2,58 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""A decorator to help with dead simple parallelization.  See usage
-below.
-
-This will just work with `Method.THREAD` and `Method.PROCESS` but to
-use `Method.REMOTE` you need to do some setup work.  You need to
-configure a pool of workers.  Each worker should run the same version
-of Python, ideally in identically configured virtual environments.
-And you need to be able to ssh into each machine using key-based
-authentication (i.e. non-iteractively) and run python.  List machines
-in the location set by `--remote_worker_records_file` (see
-:file:executors.h for flag and an example JSON file under examples).
+"""A decorator to help with dead simple parallelization.  When decorated
+functions are invoked they execute on a background thread, process or
+remote machine depending on the style of decoration::
+
+    @parallelize    # defaults to thread-mode
+    def my_function(a, b, c) -> int:
+        ...do some slow / expensive work, e.g., an http request
+
+    @parallelize(method=Method.PROCESS)
+    def my_other_function(d, e, f) -> str:
+        ...do more really expensive work, e.g., a network read
+
+    @parallelize(method=Method.REMOTE)
+    def my_other_other_function(g, h) -> int:
+        ...this work will be distributed to a remote machine pool
+
+This will just work out of the box with `Method.THREAD` (the default)
+and `Method.PROCESS` but in otder to use `Method.REMOTE` you need to
+do some setup work:
+
+    1. To use this stuff you need to hook into :mod:`pyutils.config`
+       so that this code can see commandline arguments.
+
+    2. You need to create and configure a pool of worker machines.
+       All of these machines should run the same version of Python,
+       ideally in a virtual environment (venv) with the same
+       Python dependencies installed.  Different versions of code
+       or of the interpreter itself can cause issues with running
+       cloudpicked code.
+
+    3. You need an account that can ssh into any / all of these
+       machines non-interactively and run Python in the aforementioned
+       virtual environment.  This likely means setting up ssh with
+       key-based authentication.
+
+    4. You need to tell this parallelization framework about the pool
+       of machines where it can dispatch work by creating a JSON based
+       configuration file.  The location of this file defaults to
+       :file:`.remote_worker_records` in your home directory but can
+       be overridden via the `--remote_worker_records_file`
+       commandline argument.  An example JSON configuration `can be
+       found under examples
+       <https://wannabe.guru.org/gitweb/?p=pyutils.git;a=blob_plain;f=examples/parallelize_config/.remote_worker_records;hb=HEAD>`_.
+
+    5. Finally, you will also need tell the
+       :class:`executors.RemoteExecutor` how to invoke the
+       :file:`remote_worker.py` on remote machines by passing its path
+       on remote worker machines in your setup via the
+       `--remote_worker_helper_path` commandline flag.
  
  """
  
-
  import atexit
  import functools
  import typing
@@ -35,7 +72,7 @@ def parallelize(
      _funct: typing.Optional[typing.Callable] = None, *, method: Method = Method.THREAD
  ) -> typing.Callable:
      """This is a decorator that was created to make multi-threading,
-    multi-processing and remote machine parallelism simple in python.
+    multi-processing and remote machine parallelism simple in Python.
  
      Sample usage::
  
@@ -59,10 +96,15 @@ def parallelize(
  
      The wrapped function returns immediately with a value that is
      wrapped in a :class:`SmartFuture`.  This value will block if it is
-    either read directly (via a call to :meth:`_resolve`) or indirectly
-    (by using the result in an expression, printing it, hashing it,
-    passing it a function argument, etc...).  See comments on
-    :class:`SmartFuture` for details.
+    either read directly (via a call to :meth:`_resolve`) or
+    indirectly (by using the result in an expression, printing it,
+    hashing it, passing it a function argument, etc...).  See comments
+    on :class:`SmartFuture` for details.  The value can be safely
+    stored (without hashing) or passed as an argument without causing
+    it to block waiting on a result.  There are some convenience
+    methods for dealing with collections of :class:`SmartFuture`
+    objects defined in :file:`smart_future.py`, namely
+    :meth:`smart_future.wait_any` and :meth:`smart_future.wait_all`.
  
      .. warning::
          You may stack @parallelized methods and it will "work".
@@ -73,11 +115,12 @@ def parallelize(
          beyond the control mechanisms built into one instance of the pool.
          Be careful.
  
-    .. note::
+    .. warning::
          There is non-trivial overhead of pickling code and
          copying it over the network when you use :code:`Method.REMOTE`.  There's
          a smaller but still considerable cost of creating a new process
          and passing code to/from it when you use :code:`Method.PROCESS`.
+
      """
  
      def wrapper(funct: typing.Callable):
diff --git a/src/pyutils/parallelize/smart_future.py b/src/pyutils/parallelize/smart_future.py

index c29124d83646185ea2599ee4eda5c8e987c433e5..310560db21e06459c728380a402e583571aebd0c 100644 (file)
--- a/src/pyutils/parallelize/smart_future.py
+++ b/src/pyutils/parallelize/smart_future.py
@@ -2,10 +2,14 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""A :class:`Future` that can be treated as a substutute for the result
+"""
+A :class:`Future` that can be treated as a substutute for the result
  that it contains and will not block until it is used.  At that point,
  if the underlying value is not yet available yet, it will block until
  the internal result actually becomes available.
+
+Results from :class:`parallelize.parallelize` are returned wrapped
+in :class:`SmartFuture` instances.
  """
  
  from __future__ import annotations
@@ -47,6 +51,10 @@ def wait_any(
              silently ignore then?
          timeout: invoke callback with a periodicity of timeout while
              awaiting futures
+
+    Returns:
+        A :class:`SmartFuture` from the futures list with a result
+        available without blocking.
      """
  
      real_futures = []
@@ -96,6 +104,10 @@ def wait_all(
          log_exceptions: Should we log (warning + exception) any
              underlying exceptions raised during future processing or
              silently ignore then?
+
+    Returns:
+        Only when all futures in the input list are ready.  Blocks
+        until such time.
      """
  
      real_futures = []
@@ -130,14 +142,28 @@ class SmartFuture(DeferredOperand):
      """
  
      def __init__(self, wrapped_future: fut.Future) -> None:
+        """
+        Args:
+            wrapped_future: a normal Python :class:`concurrent.Future`
+                object that we are wrapping.
+        """
          assert isinstance(wrapped_future, fut.Future)
          self.wrapped_future = wrapped_future
          self.id = id_generator.get("smart_future_id")
  
      def get_id(self) -> int:
+        """
+        Returns:
+            A unique identifier for this instance.
+        """
          return self.id
  
      def is_ready(self) -> bool:
+        """
+        Returns:
+            True if the wrapped future is ready without blocking, False
+            otherwise.
+        """
          return self.wrapped_future.done()
  
      # You shouldn't have to call this; instead, have a look at defining a
diff --git a/src/pyutils/parallelize/thread_utils.py b/src/pyutils/parallelize/thread_utils.py

index aaef13bf6ea8d53a788699f37c45f02bef47ccbf..e3747fd51231474d04e3d83f62c70b25ce2eb6e4 100644 (file)
--- a/src/pyutils/parallelize/thread_utils.py
+++ b/src/pyutils/parallelize/thread_utils.py
@@ -72,7 +72,11 @@ def background_thread(
  ) -> Callable[..., Tuple[threading.Thread, threading.Event]]:
      """A function decorator to create a background thread.
  
-    Usage::
+    Args:
+        _funct: The function being wrapped such that it is invoked
+            on a background thread.
+
+    Example usage::
  
          @background_thread
          def random(a: int, b: str, stop_event: threading.Event) -> None:
diff --git a/src/pyutils/search/logical_search.py b/src/pyutils/search/logical_search.py

index 2b52864049848f39048eafcb058a7aa7821f5a52..194590068b5819f28a3316efb0c6cd21e0fcd7f2 100644 (file)
--- a/src/pyutils/search/logical_search.py
+++ b/src/pyutils/search/logical_search.py
@@ -4,6 +4,8 @@
  
  """This is a module concerned with the creation of and searching of a
  corpus of documents.  The corpus and index are held in memory.
+The query language contains AND, OR, NOT, and parenthesis to support
+flexible search semantics.
  """
  
  from __future__ import annotations
diff --git a/src/pyutils/security/acl.py b/src/pyutils/security/acl.py

index 0d223f2756142b7a16cecb9fc081d1c9d1da7fb7..12ba89135c6f4f0e63c84db9548f056cc63235c5 100644 (file)
--- a/src/pyutils/security/acl.py
+++ b/src/pyutils/security/acl.py
@@ -2,7 +2,81 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""This module defines various flavors of Access Control Lists."""
+"""Right now this package only contains an implementation that allows you to
+define and evaluate Access Control Lists (ACLs) easily.  For example::
+
+        even = acl.SetBasedACL(
+            allow_set=set([2, 4, 6, 8, 10]),
+            deny_set=set([1, 3, 5, 7, 9]),
+            order_to_check_allow_deny=acl.Order.ALLOW_DENY,
+            default_answer=False,
+        )
+        self.assertTrue(even(2))
+        self.assertFalse(even(3))
+        self.assertFalse(even(-4))
+
+ACLs can also be defined based on other criteria, for example::
+
+        a_or_b = acl.StringWildcardBasedACL(
+            allowed_patterns=['a*', 'b*'],
+            order_to_check_allow_deny=acl.Order.ALLOW_DENY,
+            default_answer=False,
+        )
+        self.assertTrue(a_or_b('aardvark'))
+        self.assertTrue(a_or_b('baboon'))
+        self.assertFalse(a_or_b('cheetah'))
+
+Or::
+
+        weird = acl.StringREBasedACL(
+            denied_regexs=[re.compile('^a.*a$'), re.compile('^b.*b$')],
+            order_to_check_allow_deny=acl.Order.DENY_ALLOW,
+            default_answer=True,
+        )
+        self.assertTrue(weird('aardvark'))
+        self.assertFalse(weird('anaconda'))
+        self.assertFalse(weird('blackneb'))
+        self.assertTrue(weird('crow'))
+
+There are implementations for wildcards, sets, regular expressions,
+allow lists, deny lists, sequences of user defined predicates, etc...
+You can also just subclass the base :class:`SimpleACL` interface to
+define your own ACLs easily.  Its :meth:`__call__` simply needs to
+decide whether an item is allowed or denied.
+
+Once a :class:`SimpleACL` is defined, it can be used within a
+:class:`CompoundACL`::
+
+        a_b_c = acl.StringWildcardBasedACL(
+            allowed_patterns=['a*', 'b*', 'c*'],
+            order_to_check_allow_deny=acl.Order.ALLOW_DENY,
+            default_answer=False,
+        )
+        c_d_e = acl.StringWildcardBasedACL(
+            allowed_patterns=['c*', 'd*', 'e*'],
+            order_to_check_allow_deny=acl.Order.ALLOW_DENY,
+            default_answer=False,
+        )
+        conjunction = acl.AllCompoundACL(
+            subacls=[a_b_c, c_d_e],
+            order_to_check_allow_deny=acl.Order.ALLOW_DENY,
+            default_answer=False,
+        )
+        self.assertFalse(conjunction('aardvark'))
+        self.assertTrue(conjunction('caribou'))
+        self.assertTrue(conjunction('condor'))
+        self.assertFalse(conjunction('eagle'))
+        self.assertFalse(conjunction('newt'))
+
+A :class:`CompoundACL` can also be used inside another :class:`CompoundACL`
+so this should be a flexible framework when defining complex access control
+requirements:
+
+There are two flavors of :class:`CompoundACL`:
+:class:`AllCompoundACL` and :class:`AnyCompoundAcl`.  The former only
+admits an item if all of its sub-acls admit it and the latter will
+admit an item if any of its sub-acls admit it.:
+"""
  
  import enum
  import fnmatch
@@ -33,6 +107,23 @@ class SimpleACL(ABC):
      """A simple Access Control List interface."""
  
      def __init__(self, *, order_to_check_allow_deny: Order, default_answer: bool):
+        """
+        Args:
+            order_to_check_allow_deny: set this argument to indicate what
+                order to check items for allow and deny.  Pass either
+                `Order.ALLOW_DENY` to check allow first or `Order.DENY_ALLOW`
+                to check deny first.
+            default_answer: pass this argument to provide the ACL with a
+                default answer.
+
+        .. note::
+
+            By using `order_to_check_allow_deny` and `default_answer` you
+            can create both *allow lists* and *deny lists*.  The former
+            uses `Order.ALLOW_DENY` with a default anwser of False whereas
+            the latter uses `Order.DENY_ALLOW` with a default answer of
+            True.
+        """
          if order_to_check_allow_deny not in (
              Order.ALLOW_DENY,
              Order.DENY_ALLOW,
@@ -45,7 +136,10 @@ class SimpleACL(ABC):
          self.default_answer = default_answer
  
      def __call__(self, x: Any) -> bool:
-        """Returns True if x is allowed, False otherwise."""
+        """
+        Returns:
+            True if x is allowed, False otherwise.
+        """
          logger.debug('SimpleACL checking %s', x)
          if self.order_to_check_allow_deny == Order.ALLOW_DENY:
              logger.debug('Checking allowed first...')
@@ -73,12 +167,23 @@ class SimpleACL(ABC):
  
      @abstractmethod
      def check_allowed(self, x: Any) -> bool:
-        """Return True if x is explicitly allowed, False otherwise."""
+        """
+        Args:
+            x: the object being tested.
+
+        Returns:
+            True if x is explicitly allowed, False otherwise.
+        """
          pass
  
      @abstractmethod
      def check_denied(self, x: Any) -> bool:
-        """Return True if x is explicitly denied, False otherwise."""
+        """
+        Args:
+            x: the object being tested.
+
+        Returns:
+            True if x is explicitly denied, False otherwise."""
          pass
  
  
@@ -93,6 +198,25 @@ class SetBasedACL(SimpleACL):
          order_to_check_allow_deny: Order,
          default_answer: bool,
      ) -> None:
+        """
+        Args:
+            allow_set: the set of items that are allowed.
+            deny_set: the set of items that are denied.
+            order_to_check_allow_deny: set this argument to indicate what
+                order to check items for allow and deny.  Pass either
+                `Order.ALLOW_DENY` to check allow first or `Order.DENY_ALLOW`
+                to check deny first.
+            default_answer: pass this argument to provide the ACL with a
+                default answer.
+
+        .. note::
+
+            By using `order_to_check_allow_deny` and `default_answer` you
+            can create both *allow lists* and *deny lists*.  The former
+            uses `Order.ALLOW_DENY` with a default anwser of False whereas
+            the latter uses `Order.DENY_ALLOW` with a default answer of
+            True.
+        """
          super().__init__(
              order_to_check_allow_deny=order_to_check_allow_deny,
              default_answer=default_answer,
@@ -119,6 +243,10 @@ class AllowListACL(SetBasedACL):
      """
  
      def __init__(self, *, allow_set: Optional[Set[Any]]) -> None:
+        """
+        Args:
+            allow_set: a set containing the items that are allowed.
+        """
          super().__init__(
              allow_set=allow_set,
              order_to_check_allow_deny=Order.ALLOW_DENY,
@@ -132,9 +260,13 @@ class DenyListACL(SetBasedACL):
      """
  
      def __init__(self, *, deny_set: Optional[Set[Any]]) -> None:
+        """
+        Args:
+            deny_set: a set containing the items that are denied.
+        """
          super().__init__(
              deny_set=deny_set,
-            order_to_check_allow_deny=Order.ALLOW_DENY,
+            order_to_check_allow_deny=Order.DENY_ALLOW,
              default_answer=True,
          )
  
@@ -145,9 +277,13 @@ class BlockListACL(SetBasedACL):
      """
  
      def __init__(self, *, deny_set: Optional[Set[Any]]) -> None:
+        """
+        Args:
+            deny_set: a set containing the items that are denied.
+        """
          super().__init__(
              deny_set=deny_set,
-            order_to_check_allow_deny=Order.ALLOW_DENY,
+            order_to_check_allow_deny=Order.DENY_ALLOW,
              default_answer=True,
          )
  
@@ -163,6 +299,27 @@ class PredicateListBasedACL(SimpleACL):
          order_to_check_allow_deny: Order,
          default_answer: bool,
      ) -> None:
+        """
+        Args:
+            allow_predicate_list: a list of callables that indicate that
+                an item should be allowed if they return True.
+            deny_predicate_list: a list of callables that indicate that an
+                item should be denied if they return True.
+            order_to_check_allow_deny: set this argument to indicate what
+                order to check items for allow and deny.  Pass either
+                `Order.ALLOW_DENY` to check allow first or `Order.DENY_ALLOW`
+                to check deny first.
+            default_answer: pass this argument to provide the ACL with a
+                default answer.
+
+        .. note::
+
+            By using `order_to_check_allow_deny` and `default_answer` you
+            can create both *allow lists* and *deny lists*.  The former
+            uses `Order.ALLOW_DENY` with a default anwser of False whereas
+            the latter uses `Order.DENY_ALLOW` with a default answer of
+            True.
+        """
          super().__init__(
              order_to_check_allow_deny=order_to_check_allow_deny,
              default_answer=default_answer,
@@ -196,6 +353,29 @@ class StringWildcardBasedACL(PredicateListBasedACL):
          order_to_check_allow_deny: Order,
          default_answer: bool,
      ) -> None:
+        """
+        Args:
+            allowed_patterns: a list of string, optionally containing glob-style
+                wildcards, that, if they match an item, indicate it should be
+                allowed.
+            denied_patterns: a list of string, optionally containing glob-style
+                wildcards, that, if they match an item, indicate it should be
+                denied.
+            order_to_check_allow_deny: set this argument to indicate what
+                order to check items for allow and deny.  Pass either
+                `Order.ALLOW_DENY` to check allow first or `Order.DENY_ALLOW`
+                to check deny first.
+            default_answer: pass this argument to provide the ACL with a
+                default answer.
+
+        .. note::
+
+            By using `order_to_check_allow_deny` and `default_answer` you
+            can create both *allow lists* and *deny lists*.  The former
+            uses `Order.ALLOW_DENY` with a default anwser of False whereas
+            the latter uses `Order.DENY_ALLOW` with a default answer of
+            True.
+        """
          allow_predicates = []
          if allowed_patterns is not None:
              for pattern in allowed_patterns:
@@ -229,6 +409,27 @@ class StringREBasedACL(PredicateListBasedACL):
          order_to_check_allow_deny: Order,
          default_answer: bool,
      ) -> None:
+        """
+        Args:
+            allowed_regexs: a list of regular expressions that, if they match an
+                item, indicate that the item should be allowed.
+            denied_regexs: a list of regular expressions that, if they match an
+                item, indicate that the item should be denied.
+            order_to_check_allow_deny: set this argument to indicate what
+                order to check items for allow and deny.  Pass either
+                `Order.ALLOW_DENY` to check allow first or `Order.DENY_ALLOW`
+                to check deny first.
+            default_answer: pass this argument to provide the ACL with a
+                default answer.
+
+        .. note::
+
+            By using `order_to_check_allow_deny` and `default_answer` you
+            can create both *allow lists* and *deny lists*.  The former
+            uses `Order.ALLOW_DENY` with a default anwser of False whereas
+            the latter uses `Order.DENY_ALLOW` with a default answer of
+            True.
+        """
          allow_predicates = None
          if allowed_regexs is not None:
              allow_predicates = []
@@ -261,6 +462,25 @@ class AnyCompoundACL(SimpleACL):
          order_to_check_allow_deny: Order,
          default_answer: bool,
      ) -> None:
+        """
+        Args:
+            subacls: a list of sub-ACLs we will consult for each item.  If
+                *any* of these sub-ACLs allow the item we will also allow it.
+            order_to_check_allow_deny: set this argument to indicate what
+                order to check items for allow and deny.  Pass either
+                `Order.ALLOW_DENY` to check allow first or `Order.DENY_ALLOW`
+                to check deny first.
+            default_answer: pass this argument to provide the ACL with a
+                default answer.
+
+        .. note::
+
+            By using `order_to_check_allow_deny` and `default_answer` you
+            can create both *allow lists* and *deny lists*.  The former
+            uses `Order.ALLOW_DENY` with a default anwser of False whereas
+            the latter uses `Order.DENY_ALLOW` with a default answer of
+            True.
+        """
          super().__init__(
              order_to_check_allow_deny=order_to_check_allow_deny,
              default_answer=default_answer,
@@ -290,6 +510,25 @@ class AllCompoundACL(SimpleACL):
          order_to_check_allow_deny: Order,
          default_answer: bool,
      ) -> None:
+        """
+        Args:
+            subacls: a list of sub-ACLs that we will consult for each item.  *All*
+                sub-ACLs must allow an item for us to also allow that item.
+            order_to_check_allow_deny: set this argument to indicate what
+                order to check items for allow and deny.  Pass either
+                `Order.ALLOW_DENY` to check allow first or `Order.DENY_ALLOW`
+                to check deny first.
+            default_answer: pass this argument to provide the ACL with a
+                default answer.
+
+        .. note::
+
+            By using `order_to_check_allow_deny` and `default_answer` you
+            can create both *allow lists* and *deny lists*.  The former
+            uses `Order.ALLOW_DENY` with a default anwser of False whereas
+            the latter uses `Order.DENY_ALLOW` with a default answer of
+            True.
+        """
          super().__init__(
              order_to_check_allow_deny=order_to_check_allow_deny,
              default_answer=default_answer,
diff --git a/src/pyutils/string_utils.py b/src/pyutils/string_utils.py

index dbe3c1f1c4fd43aa487118201dc184f450671f5a..f6056d0f69e5097355bc018d62c9640c8a2377c5 100644 (file)
--- a/src/pyutils/string_utils.py
+++ b/src/pyutils/string_utils.py
@@ -569,7 +569,7 @@ def number_string_to_integer(in_str: str) -> int:
      ValueError: Unknown word: xyzzy
      """
      if type(in_str) == int:
-        return in_str
+        return int(in_str)
  
      current = result = 0
      in_str = in_str.replace('-', ' ')
diff --git a/src/pyutils/typez/centcount.py b/src/pyutils/typez/centcount.py

index b37898f341239c41a3542d8dd9d8719cd6e909db..c0c841823d8ec7b1e9b3fa92bb599c625308f9d6 100644 (file)
--- a/src/pyutils/typez/centcount.py
+++ b/src/pyutils/typez/centcount.py
@@ -2,13 +2,52 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""An amount of money (USD) represented as an integral count of
-cents."""
+"""An amount of money represented as an integral count of cents so as
+to avoid floating point artifacts.  Multiplication and division are
+performed using floating point arithmetic but the quotient is cast
+back to an integer number thus truncating the result and
+avoiding floating point arithmetic artifacts.  See details below.
  
-import re
-from typing import Optional, Tuple
+The type guards against inadvertent aggregation of instances with
+non-matching currencies, the division of one CentCount by another, and
+has a strict mode which disallows comparison or aggregation with
+non-CentCount operands (i.e. no comparison or aggregation with literal
+numbers).
+
+.. note::
+
+    Multiplication and division are performed by converting the
+    `CentCount` into a float and operating on two floating point
+    numbers.  The result is then cast back to an int which loses
+    precision beyond the 1-cent granularity in order to avoid floating
+    point representation artifacts.
  
-from pyutils import math_utils
+    This can cause "problems" such as the one illustrated
+    below::
+
+        >>> c = CentCount(100.00)
+        >>> c
+        100.00 USD
+        >>> c = c * 2
+        >>> c
+        200.00 USD
+        >>> c = c / 3
+        >>> c
+        66.66 USD
+
+    Two-thirds of $100.00 is $66.66666... which might be
+    expected to round upwards to $66.67 but it does not
+    because the `int` cast truncates the result.  Be aware
+    of this and decide whether it's suitable for your
+    application.
+
+See also the :class:`pyutils.typez.Money` class which uses Python
+Decimals (see: https://docs.python.org/3/library/decimal.html) to
+represent monetary amounts.
+"""
+
+import re
+from typing import Optional, Tuple, Union
  
  
  class CentCount(object):
@@ -17,7 +56,25 @@ class CentCount(object):
      issues by treating amount as a simple integral count of cents.
      """
  
-    def __init__(self, centcount, currency: str = 'USD', *, strict_mode=False):
+    def __init__(
+        self,
+        centcount: Union[int, float, str, 'CentCount'] = 0,
+        currency: str = 'USD',
+        *,
+        strict_mode=False,
+    ):
+        """
+        Args:
+            centcount: the amount of money being represented; this can be
+                a float, int, CentCount or str.
+            currency: optionally declare the currency being represented by
+                this instance.  If provided it will guard against operations
+                such as attempting to add it to non-matching currencies.
+            strict_mode: if True, the instance created will object if you
+                compare or aggregate it with non-CentCount objects; that is,
+                strict_mode disallows comparison with literal numbers or
+                aggregation with literal numbers.
+        """
          self.strict_mode = strict_mode
          if isinstance(centcount, str):
              ret = CentCount._parse(centcount)
@@ -36,10 +93,9 @@ class CentCount(object):
              self.currency = currency
  
      def __repr__(self):
-        a = float(self.centcount)
-        a /= 100
-        a = round(a, 2)
-        s = f'{a:,.2f}'
+        w = self.centcount // 100
+        p = self.centcount % 100
+        s = f'{w}.{p:02d}'
          if self.currency is not None:
              return f'{s} {self.currency}'
          else:
@@ -82,6 +138,30 @@ class CentCount(object):
                  return self.__sub__(CentCount(other, self.currency))
  
      def __mul__(self, other):
+        """
+        .. note::
+
+            Multiplication and division are performed by converting the
+            CentCount into a float and operating on two floating point
+            numbers.  But the result is then cast back to an int which
+            loses precision beyond the 1-cent granularity in order to
+            avoid floating point representation artifacts.
+
+            This can cause "problems" such as the one illustrated
+            below::
+
+                >>> c = CentCount(100.00)
+                >>> c = c * 2
+                >>> c = c / 3
+                >>> c
+                66.66 USD
+
+            Two-thirds of $100.00 is $66.66666... which might be
+            expected to round upwards to $66.67 but it does not
+            because the int cast truncates the result.  Be aware
+            of this and decide whether it's suitable for your
+            application.
+        """
          if isinstance(other, CentCount):
              raise TypeError('can not multiply monetary quantities')
          else:
@@ -91,6 +171,30 @@ class CentCount(object):
              )
  
      def __truediv__(self, other):
+        """
+        .. note::
+
+            Multiplication and division are performed by converting the
+            CentCount into a float and operating on two floating point
+            numbers.  But the result is then cast back to an int which
+            loses precision beyond the 1-cent granularity in order to
+            avoid floating point representation artifacts.
+
+            This can cause "problems" such as the one illustrated
+            below::
+
+                >>> c = CentCount(100.00)
+                >>> c = c * 2
+                >>> c = c / 3
+                >>> c
+                66.66 USD
+
+            Two-thirds of $100.00 is $66.66666... which might be
+            expected to round upwards to $66.67 but it does not
+            because the int cast truncates the result.  Be aware
+            of this and decide whether it's suitable for your
+            application.
+        """
          if isinstance(other, CentCount):
              raise TypeError('can not divide monetary quantities')
          else:
@@ -105,16 +209,6 @@ class CentCount(object):
      def __float__(self):
          return self.centcount.__float__() / 100.0
  
-    def truncate_fractional_cents(self):
-        x = int(self)
-        self.centcount = int(math_utils.truncate_float(x))
-        return self.centcount
-
-    def round_fractional_cents(self):
-        x = int(self)
-        self.centcount = int(round(x, 2))
-        return self.centcount
-
      __radd__ = __add__
  
      def __rsub__(self, other):
@@ -214,7 +308,19 @@ class CentCount(object):
  
      @classmethod
      def parse(cls, s: str) -> 'CentCount':
+        """Parses a string format monetary amount and returns a CentCount
+        if possible.
+
+        Args:
+            s: the string to be parsed
+        """
          chunks = CentCount._parse(s)
          if chunks is not None:
              return CentCount(chunks[0], chunks[1])
          raise Exception(f'Unable to parse money string "{s}"')
+
+
+if __name__ == '__main__':
+    import doctest
+
+    doctest.testmod()
diff --git a/src/pyutils/typez/histogram.py b/src/pyutils/typez/histogram.py

index 2887525b053991fcec299bc8f2fcf9b8a91fefa4..d0a755b3b572d46f0dbf407b4d87673db6bbc466 100644 (file)
--- a/src/pyutils/typez/histogram.py
+++ b/src/pyutils/typez/histogram.py
@@ -3,7 +3,36 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""A text-based simple histogram helper class."""
+"""
+This is a text-based histogram class.  It creates output like this:
+
+A Histogram helper class.  Creates outputs like this::
+
+      [4..5): ▏                                                     ( 0.16% n=1)
+      [5..6): ██▍                                                   ( 0.64% n=4)
+      [6..7): ██████▏                                               ( 1.60% n=10)
+      [7..8): ████████████▍                                         ( 3.20% n=20)
+      [8..9): █████████████████████▊                                ( 5.60% n=35)
+     [9..10): ████████████████████████████████▍                     ( 8.32% n=52)
+    [10..11): ██████████████████████████████████████████▍           (10.88% n=68)
+    [11..12): █████████████████████████████████████████████████▉    (12.80% n=80)
+    [12..13): ████████████████████████████████████████████████████▉ (13.60% n=85)
+    [13..14): █████████████████████████████████████████████████▉    (12.80% n=80)
+    [14..15): ██████████████████████████████████████████▍           (10.88% n=68)
+    [15..16): ████████████████████████████████▍                     ( 8.32% n=52)
+    [16..17): █████████████████████▊                                ( 5.60% n=35)
+    [17..18): ████████████▍                                         ( 3.20% n=20)
+    [18..19): ██████▏                                               ( 1.60% n=10)
+    [19..20): ██▍                                                   ( 0.64% n=4)
+    [20..21): ▏                                                     ( 0.16% n=1)
+    --------------------------------------------------------------------------------
+     [4..21):                                                         pop(Σn)=625
+                                                                      mean(x̄)=12.000
+                                                                  median(p50)=12.000
+                                                                     mode(Mo)=12.000
+                                                                     stdev(σ)=0.113
+
+"""
  
  import math
  from dataclasses import dataclass
diff --git a/src/pyutils/typez/money.py b/src/pyutils/typez/money.py

index 47c0a8e200ba8021a20849d4c6e5c43af83dfe9c..d208f7d3b79ce549f07fcd0e0b62191139835ff9 100644 (file)
--- a/src/pyutils/typez/money.py
+++ b/src/pyutils/typez/money.py
@@ -2,13 +2,23 @@
  
  # © Copyright 2021-2022, Scott Gasch
  
-"""A class to represent money.  See also centcount.py"""
+"""A class to represent money.  This class represents monetary amounts as Python Decimals
+(see https://docs.python.org/3/library/decimal.html) internally.
  
-import re
-from decimal import Decimal
-from typing import Optional, Tuple
+The type guards against inadvertent aggregation of instances with
+non-matching currencies, the division of one Money by another, and has
+a strict mode which disallows comparison or aggregation with
+non-CentCount operands (i.e. no comparison or aggregation with literal
+numbers).
+
+See also :class:`pyutils.typez.CentCount` which represents monetary
+amounts as an integral number of cents.
+
+"""
  
-from pyutils import math_utils
+import re
+from decimal import ROUND_FLOOR, ROUND_HALF_DOWN, Decimal
+from typing import Optional, Tuple, Union
  
  
  class Money(object):
@@ -18,11 +28,23 @@ class Money(object):
  
      def __init__(
          self,
-        amount: Decimal = Decimal("0"),
+        amount: Union[Decimal, str, float, int, 'Money'] = Decimal("0"),
          currency: str = 'USD',
          *,
          strict_mode=False,
      ):
+        """
+        Args:
+            amount: the initial monetary amount to be represented; can be a
+                Money, int, float, Decimal, str, etc...
+            currency: if provided, indicates what currency this amount is
+                units of and guards against operations such as attempting
+                to aggregate Money instances with non-matching currencies
+                directly.
+            strict_mode: if True, disallows comparison or arithmetic operations
+                between Money instances and any non-Money types (e.g. literal
+                numbers).
+        """
          self.strict_mode = strict_mode
          if isinstance(amount, str):
              ret = Money._parse(amount)
@@ -39,13 +61,28 @@ class Money(object):
              self.currency = currency
  
      def __repr__(self):
-        a = float(self.amount)
-        a = round(a, 2)
-        s = f'{a:,.2f}'
-        if self.currency is not None:
-            return f'{s} {self.currency}'
+        q = Decimal(10) ** -2
+        sign, digits, exp = self.amount.quantize(q).as_tuple()
+        result = []
+        digits = list(map(str, digits))
+        build, next = result.append, digits.pop
+        for i in range(2):
+            build(next() if digits else '0')
+        build('.')
+        if not digits:
+            build('0')
+        i = 0
+        while digits:
+            build(next())
+            i += 1
+            if i == 3 and digits:
+                i = 0
+        if sign:
+            build('-')
+        if self.currency:
+            return ''.join(reversed(result)) + ' ' + self.currency
          else:
-            return f'${s}'
+            return '$' + ''.join(reversed(result))
  
      def __pos__(self):
          return Money(amount=self.amount, currency=self.currency)
@@ -105,13 +142,71 @@ class Money(object):
          return self.amount.__float__()
  
      def truncate_fractional_cents(self):
-        x = float(self)
-        self.amount = Decimal(math_utils.truncate_float(x))
+        """
+        Truncates fractional cents being represented.  e.g.
+
+        >>> m = Money(100.00)
+        >>> m *= 2
+        >>> m /= 3
+
+        At this point the internal representation of `m` is a long
+        `Decimal`:
+
+        >>> m.amount
+        Decimal('66.66666666666666666666666667')
+
+        It will be rendered by `__repr__` reasonably:
+
+        >>> m
+        66.67 USD
+
+        If you want to truncate this long decimal representation, this
+        method will do that for you:
+
+        >>> m.truncate_fractional_cents()
+        Decimal('66.66')
+        >>> m.amount
+        Decimal('66.66')
+        >>> m
+        66.66 USD
+
+        See also :meth:`round_fractional_cents`
+        """
+        self.amount = self.amount.quantize(Decimal('.01'), rounding=ROUND_FLOOR)
          return self.amount
  
      def round_fractional_cents(self):
-        x = float(self)
-        self.amount = Decimal(round(x, 2))
+        """
+        Rounds fractional cents being represented.  e.g.
+
+        >>> m = Money(100.00)
+        >>> m *= 2
+        >>> m /= 3
+
+        At this point the internal representation of `m` is a long
+        `Decimal`:
+
+        >>> m.amount
+        Decimal('66.66666666666666666666666667')
+
+        It will be rendered by `__repr__` reasonably:
+
+        >>> m
+        66.67 USD
+
+        If you want to round this long decimal representation, this
+        method will do that for you:
+
+        >>> m.round_fractional_cents()
+        Decimal('66.67')
+        >>> m.amount
+        Decimal('66.67')
+        >>> m
+        66.67 USD
+
+        See also :meth:`truncate_fractional_cents`
+        """
+        self.amount = self.amount.quantize(Decimal('.01'), rounding=ROUND_HALF_DOWN)
          return self.amount
  
      __radd__ = __add__
@@ -210,7 +305,18 @@ class Money(object):
  
      @classmethod
      def parse(cls, s: str) -> 'Money':
+        """Parses a string an attempts to create a Money instance.
+
+        Args:
+            s: the string to parse
+        """
          chunks = Money._parse(s)
          if chunks is not None:
              return Money(chunks[0], chunks[1])
          raise Exception(f'Unable to parse money string "{s}"')
+
+
+if __name__ == '__main__':
+    import doctest
+
+    doctest.testmod()
diff --git a/tests/typez/centcount_test.py b/tests/typez/centcount_test.py

index 5ba60b147895ab9ed3aa0ba354177cc7c11792d4..2bb6d3e8e3b21b6cdb02d985ae74db35dc231547 100755 (executable)
--- a/tests/typez/centcount_test.py
+++ b/tests/typez/centcount_test.py
@@ -51,6 +51,8 @@ class TestCentCount(unittest.TestCase):
          amount = CentCount(10.00)
          x = amount / 5.0
          self.assertEqual(CentCount(2.00), x)
+        y = amount / 1.9999999999
+        self.assertEqual(CentCount(5.00), y)
          with self.assertRaises(TypeError):
              another = CentCount(1.33)
              amount /= another
@@ -94,12 +96,6 @@ class TestCentCount(unittest.TestCase):
              print(two > 1.0)
          self.assertTrue(two > one)
  
-    def test_truncate_and_round(self):
-        ten = CentCount(10.0)
-        x = ten * 2 / 3
-        x.truncate_fractional_cents()
-        self.assertEqual(CentCount(6.66), x)
-
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/tests/typez/money_test.py b/tests/typez/money_test.py

index ee1e3923bc5bdaec7adac3df74c59b703b032a1e..e296e5154a02f242c44e21e9c1be3df4243b45e9 100755 (executable)
--- a/tests/typez/money_test.py
+++ b/tests/typez/money_test.py
@@ -5,6 +5,7 @@
  """money unittest."""
  
  import unittest
+from decimal import Decimal
  
  from pyutils import unittest_utils
  from pyutils.typez.money import Money
@@ -97,9 +98,13 @@ class TestMoney(unittest.TestCase):
      def test_truncate_and_round(self):
          ten = Money(10.0)
          x = ten * 2 / 3
-        self.assertEqual(6.66, x.truncate_fractional_cents())
+        expected = Decimal(6.66)
+        expected = expected.quantize(Decimal('.01'))
+        self.assertEqual(expected, x.truncate_fractional_cents())
          x = ten * 2 / 3
-        self.assertEqual(6.67, x.round_fractional_cents())
+        expected = Decimal(6.67)
+        expected = expected.quantize(Decimal('.01'))
+        self.assertEqual(expected, x.round_fractional_cents())
  
  
  if __name__ == '__main__':
author	Scott Gasch <scott@gasch.org>
	Sun, 16 Oct 2022 18:48:07 +0000 (11:48 -0700)
committer	Scott Gasch <scott@gasch.org>
	Sun, 16 Oct 2022 18:48:07 +0000 (11:48 -0700)
docs/conf.py		patch \| blob \| history
docs/pyutils.collectionz.rst		patch \| blob \| history
docs/pyutils.compress.rst		patch \| blob \| history
docs/pyutils.datetimez.rst		patch \| blob \| history
docs/pyutils.files.rst		patch \| blob \| history
docs/pyutils.parallelize.rst		patch \| blob \| history
docs/pyutils.rst		patch \| blob \| history
docs/pyutils.security.rst		patch \| blob \| history
docs/pyutils.typez.rst		patch \| blob \| history
src/pyutils/ansi.py		patch \| blob \| history
src/pyutils/collectionz/bidict.py		patch \| blob \| history
src/pyutils/collectionz/bst.py		patch \| blob \| history
src/pyutils/collectionz/trie.py		patch \| blob \| history
src/pyutils/compress/letter_compress.py		patch \| blob \| history
src/pyutils/datetimez/dateparse_utils.g4		patch \| blob \| history
src/pyutils/datetimez/dateparse_utils.py		patch \| blob \| history
src/pyutils/datetimez/datetime_utils.py		patch \| blob \| history
src/pyutils/files/directory_filter.py		patch \| blob \| history
src/pyutils/files/file_utils.py		patch \| blob \| history
src/pyutils/files/lockfile.py		patch \| blob \| history
src/pyutils/function_utils.py		patch \| blob \| history
src/pyutils/logging_utils.py		patch \| blob \| history
src/pyutils/parallelize/deferred_operand.py		patch \| blob \| history
src/pyutils/parallelize/executors.py		patch \| blob \| history
src/pyutils/parallelize/parallelize.py		patch \| blob \| history
src/pyutils/parallelize/smart_future.py		patch \| blob \| history
src/pyutils/parallelize/thread_utils.py		patch \| blob \| history
src/pyutils/search/logical_search.py		patch \| blob \| history
src/pyutils/security/acl.py		patch \| blob \| history
src/pyutils/string_utils.py		patch \| blob \| history
src/pyutils/typez/centcount.py		patch \| blob \| history
src/pyutils/typez/histogram.py		patch \| blob \| history
src/pyutils/typez/money.py		patch \| blob \| history
tests/typez/centcount_test.py		patch \| blob \| history
tests/typez/money_test.py		patch \| blob \| history