From 5bbcfd4dfccd411df0afd03f9790f67b4c852b9c Mon Sep 17 00:00:00 2001 From: Scott Gasch Date: Fri, 14 Oct 2022 15:01:24 -0700 Subject: [PATCH] More documentation changes but includes a change to config.py that removes the ability to set flags via environment variables. It was hacky and I don't use it. --- docs/pyutils.rst | 93 ++++++++--------- src/pyutils/bootstrap.py | 20 +--- src/pyutils/config.py | 209 +++++++++++++++++++-------------------- 3 files changed, 148 insertions(+), 174 deletions(-) diff --git a/docs/pyutils.rst b/docs/pyutils.rst index 8fc96f7..97a2221 100644 --- a/docs/pyutils.rst +++ b/docs/pyutils.rst @@ -7,7 +7,8 @@ library grew into pyutils: a set of collections, helpers and utilities that I find useful and hope you will too. Code is under `src/pyutils/`. Most code includes documentation and inline -doctests. +doctests. I've tried to organize it into logical packages based on the +code's functionality. Unit and integration tests are under `tests/*`. To run all tests:: @@ -15,27 +16,28 @@ Unit and integration tests are under `tests/*`. To run all tests:: ./run_tests.py --all [--coverage] See the README under `tests/` and the code of `run_tests.py` for more -options / information. +options / information about running tests. This package generates Sphinx docs which are available at: https://wannabe.guru.org/pydocs/pyutils/pyutils.html Package code is checked into a local git server and available to clone -from git at https://wannabe.guru.org/git/pyutils.git or from a web browser -at: +from git at https://wannabe.guru.org/git/pyutils.git or to view in a +web browser at: https://wannabe.guru.org/gitweb/?p=pyutils.git;a=summary For a long time this was just a local library on my machine that my -tools imported but I've now decided to release it on PyPi. Early +tools imported but I've now decided to release it on PyPi. Earlier development happened in a different git repo: https://wannabe.guru.org/gitweb/?p=python_utils.git;a=summary -I hope you find this useful. LICENSE and NOTICE describe reusing this -code and where everything came from. Drop me a line if you are using -this, find a bug, have a question, or have a suggestion: +The LICENSE and NOTICE files at the root of the project describe +reusing this code and where everything came from. Drop me a line +if you are using this, find a bug, have a question, or have a +suggestion: --Scott Gasch (scott.gasch@gmail.com) @@ -45,6 +47,7 @@ Subpackages .. toctree:: :maxdepth: 4 + :name: mastertoc pyutils.collectionz pyutils.compress @@ -62,14 +65,21 @@ pyutils.ansi module ------------------- This file mainly contains code for changing the nature of text printed -to the console via ANSI escape sequences. e.g. it can be used to emit -text that is bolded, underlined, italicised, colorized, etc... +to the console via ANSI escape sequences. For example, it can be used +to emit text that is bolded, underlined, italicised, colorized, etc... -It also contains a colorizing context that will apply color patterns -based on regular expressions to any data emitted to stdout that may be -useful in adding color to other programs' outputs, for instance. +It does *not* contain ANSI escape sequences that do things like move +the cursor or record/restore its position. It is focused on text style +only. -Here are the predefined color names it knows: +The file includes a colorizing context that will apply color patterns +based on regular expressions / callables to any data emitted to stdout +that may be useful in adding color to other programs' outputs, for +instance. + +In addition, it contains a mapping from color name to RGB value that it +uses to enable friendlier color names in most of its functions. Here +is the list of predefined color names it knows: .. raw:: html @@ -2135,7 +2145,7 @@ Here are the predefined color names it knows: .. note:: You can also use raw RGB values with this module so you do not have to use - the predefined color names. + these predefined color names unless you want to. --- @@ -2147,15 +2157,9 @@ Here are the predefined color names it knows: pyutils.argparse\_utils module ------------------------------ -I use the Python internal :py:mod:`argparse` module for commandline argument -parsing but found it lacking in some ways. This module contains code to -fill those gaps. It include stuff like: - - - An :class:`argparse.Action` to create pairs of flags such as - `--feature` and `--no_feature`. - - A helper to parse and validate bools, IP addresses, MAC - addresses, filenames, percentages, dates, datetimes, and - durations passed as flags. +I use and love the Python internal :py:mod:`argparse` module for +commandline argument parsing but found it lacking in some ways. This +module contains code to fill those gaps. See also :py:mod:`pyutils.config`. --- @@ -2167,16 +2171,10 @@ fill those gaps. It include stuff like: pyutils.bootstrap module ------------------------ -Bootstrap module defines a decorator meant to wrap your main function. -This decorator will do several things for you: - - - If your code uses the :file:`config.py` module (see below), it invokes - :py:meth:`parse` automatically to initialize `config.config` from commandline - flags, environment variables, or other sources. - - It will optionally break into pdb in response to an unhandled - Exception at the top level of your code. - - It initializes logging for your program (see :file:`logging.py`). - - It can optionally run a code and/or memory profiler on your code. +The bootstrap module defines a decorator meant to wrap your main function. +This is optional, of course: you can use this library without using the +bootstrap decorator on your main. If you choose to use it, though, it will +do some work for you automatically. --- @@ -2188,22 +2186,17 @@ This decorator will do several things for you: pyutils.config module --------------------- -This module reads the program's configuration parameters from: the -commandline (using :py:mod:`argparse`), environment variables and/or a -shared zookeeper-based configuration. It stores this configuration -state in a dict-like structure that can be queried by your code at -runtime. - -It handles creating a nice `--help` message for your code. - -It can optionally react to dynamic config changes and change state at -runtime (if and only if you name your flag with the *dynamic_* prefix -and are using zookeeper-based configs). - -It can optionally save and retrieve sets of arguments from files on -the local disk or on Zookeeper. - -All of my examples use this as does the pyutils library itself. +The config module is an opinionated way to set up input parameters +to your program. It is enabled by using the :py:mod:`pyutils.bootstrap` +decorator or by simply calling :py:meth:`pyutils.config.parse` early in main +(which is what :py:meth:`pyutils.bootstrap.initialize` does for you). + +If you use this module, input parameters to your program can come from +the commandline (and are configured using Python's :py:mod:`argparse`). +But they can also be be augmented or replaced using saved configuration +files stored either on the local filesystem or on Apache Zookeeper. +There is a provision for enabling dynamic arguments (i.e. that can change +during runtime) via Zookeeper. --- diff --git a/src/pyutils/bootstrap.py b/src/pyutils/bootstrap.py index 8dc4a0c..3ee5f1f 100644 --- a/src/pyutils/bootstrap.py +++ b/src/pyutils/bootstrap.py @@ -271,9 +271,8 @@ def dump_all_objects() -> None: def initialize(entry_point): """ - Remember to initialize config, initialize logging, set/log a random - seed, etc... before running main. If you use this decorator around - your main, like this:: + Do whole program setup and instrumentation. See module comments for + details. To use:: from pyutils import bootstrap @@ -283,21 +282,6 @@ def initialize(entry_point): if __name__ == '__main__': main() - - You get: - - * The ability to break into pdb on unhandled exceptions, - * automatic support for :file:`config.py` (argument parsing) - * automatic logging support for :file:`logging.py`, - * the ability to enable code profiling, - * the ability to enable module import auditing, - * optional memory profiling for your program, - * ability to set random seed via commandline, - * automatic program timing and reporting, - * more verbose error handling and reporting, - - Most of these are enabled and/or configured via commandline flags - (see below). """ @functools.wraps(entry_point) diff --git a/src/pyutils/config.py b/src/pyutils/config.py index e173e71..be11a77 100644 --- a/src/pyutils/config.py +++ b/src/pyutils/config.py @@ -2,20 +2,26 @@ # © Copyright 2021-2022, Scott Gasch -"""Global configuration driven by commandline arguments, environment variables, -saved configuration files, and zookeeper-based dynamic configurations. This -works across several modules. +"""Global program configuration driven by commandline arguments and, +optionally, from saved (local or Zookeeper) configuration files... with +optional support for dynamic arguments (i.e. that can change during runtime). -Example usage: +Let's start with an example of how to use :py:mod:`pyutils.config`. It's +pretty easy for normal commandline arguments because it uses :py:mod:`argparse`: In your file.py:: from pyutils import config + # Call add_commandline_args to get an argparse.ArgumentParser + # for file.py. Each file uses a separate ArgumentParser + # chained off the main namespace. parser = config.add_commandline_args( "Module", "Args related to module doing the thing.", ) + + # Then simply add argparse-style arguments to it, as usual. parser.add_argument( "--module_do_the_thing", type=bool, @@ -27,6 +33,7 @@ Example usage: from pyutils import config + # main.py may have some arguments of its own, so add them. parser = config.add_commandline_args( "Main", "A program that does the thing.", @@ -39,13 +46,17 @@ Example usage: ) def main() -> None: - config.parse() # Very important, this must be invoked! + config.parse() # Then remember to call config.parse() early on. + + If you set this up and remember to invoke :py:meth:`pyutils.config.parse`, + all commandline arguments will play nicely together across all modules / files + in your program automatically. Argparse help messages will group flags by + the file they affect. - If you set this up and remember to invoke config.parse(), all commandline - arguments will play nicely together. This is done automatically for you - if you're using the :meth:`bootstrap.initialize` decorator on - your program's entry point. See :meth:`python_modules.bootstrap.initialize` - for more details.:: + If you use :py:meth:`pyutils.bootstrap.initialize`, a decorator that can + optionally wrap your program's entry point, it will remember to call + :py:meth:`pyutils.config.parse` for you so you can omit the last part. + That looks like this:: from pyutils import bootstrap @@ -56,7 +67,8 @@ Example usage: if __name__ == '__main__': main() - Either way, you'll get this behavior from the commandline:: + Either way, you'll get an aggregated usage message along with flags broken + down per file in help:: % main.py -h usage: main.py [-h] @@ -75,11 +87,39 @@ Example usage: --dry_run Should we really do the thing? - Arguments themselves should be accessed via - :code:`config.config['arg_name']`. e.g.:: + Once :py:meth:`pyutils.config.parse` has been called (either automatically + by :py:mod:`puytils.bootstrap` or manually, the program configuration + state is ready in a dict-like object called `config.config`. For example, + to check the state of the `--dry_run` flag:: if not config.config['dry_run']: module.do_the_thing() + + Using :py:mod:`pyutils.config` allows you to "save" and "load" whole + sets of commandline arguments using the `--config_savefile` and the + `--config_loadfile` arguments. The former saves all arguments (other than + itself) to an ascii file whose path you provide. The latter reads all + arguments from an ascii file whose path you provide. + + Saving and loading sets of arguments can make complex operations easier + to set up. They also allows for dynamic arguments. + + If you use Apache Zookeeper, you can prefix paths to + `--config_savefile` and `--config_loadfile` with the string "zk:" + to cause the path to be interpreted as a Zookeeper path rather + than one on the local filesystem. When loading arguments from + Zookeeker, the :py:mod:`pyutils.config` code registers a listener + to be notified on state change (e.g. when some other instance + overwrites your Zookeeper based configuration). Listeners then + dynamically update the value of any flag in the `config.config` + dict whose name contains the string "dynamic". So, for example, + the `--dynamic_database_connect_string` argument would be + modifiable at runtime when using Zookeeper based configurations. + Flags that do not contain the string "dynamic" will not change. + And nothing is dynamic unless we're reading configuration from + Zookeeper. + + For more information about Zookeeper, see https://zookeeper.apache.org/. """ import argparse @@ -88,7 +128,7 @@ import os import pprint import re import sys -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional # This module is commonly used by others in here and should avoid # taking any unnecessary dependencies back on them. @@ -175,7 +215,12 @@ GROUP.add_argument( type=str, metavar='FILENAME', default=None, - help='Populate a config file (compatible with --config_loadfile) with the given path for later use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:path) at startup time will see their configuration dynamically updated; flags with "dynamic" in their names (e.g. --my_dynamic_flag) may have their values changed. You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.', + help='Populate a config file (compatible with --config_loadfile) and write it at the given path for later [re]use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a local filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:) will see the update. Those that also enabled --config_allow_dynamic_updates will change the value of any flags with the string "dynamic" in their names (e.g. --my_dynamic_flag or --dynamic_database_connect_string).', +) +GROUP.add_argument( + '--config_allow_dynamic_updates', + default=False, + help='If enabled, allow config flags with the string "dynamic" in their names to change at runtime when a new Zookeeper based configuration is created. See the --config_savefile help message for more information about this option.', ) GROUP.add_argument( '--config_rejects_unrecognized_arguments', @@ -193,15 +238,23 @@ GROUP.add_argument( class Config: """ + .. warning:: + + Do not instantiate this class directly; it is meant to be a + global singleton called `pyutils.config.CONFIG`. Instead, use + :py:meth:`pyutils.config.add_commandline_args` to get an + `ArgumentGroup` and add your arguments to it. Then call + :py:meth:`pyutils.config.parse` to parse global configuration + from your main program entry point. + Everything in the config module used to be module-level functions and variables but it made the code ugly and harder to maintain. Now, this class does the heavy lifting. We still rely on some globals, though: - ARGS and GROUP to interface with argparse - PROGRAM_NAME stores argv[0] close to program invocation - ORIG_ARGV stores the original argv list close to program invocation - CONFIG and config: hold the (singleton) instance of this class. - + - ARGS and GROUP to interface with argparse + - PROGRAM_NAME stores argv[0] close to program invocation + - ORIG_ARGV stores the original argv list close to program invocation + - CONFIG and config: hold the (singleton) instance of this class. """ def __init__(self): @@ -242,7 +295,8 @@ class Config: def add_commandline_args( title: str, description: str = "" ) -> argparse._ArgumentGroup: - """Create a new context for arguments and return a handle. + """Create a new context for arguments and return an ArgumentGroup + to the caller for module-level population. Args: title: A title for your module's commandline arguments group. @@ -265,8 +319,10 @@ class Config: @staticmethod def is_flag_already_in_argv(var: str) -> bool: - """Returns true if a particular flag is passed on the commandline - and false otherwise. + """ + Returns: + True if a particular flag is passed on the commandline + and False otherwise. Args: var: The flag to search for. @@ -306,30 +362,6 @@ class Config: reordered_action_groups.insert(0, grp) return reordered_action_groups - @staticmethod - def _parse_arg_into_env(arg: str) -> Optional[Tuple[str, str, List[str]]]: - """Internal helper to parse commandling args into environment vars.""" - arg = arg.strip() - if not arg.startswith('['): - return None - arg = arg.strip('[') - if not arg.endswith(']'): - return None - arg = arg.strip(']') - - chunks = arg.split() - if len(chunks) > 1: - var = chunks[0] - else: - var = arg - - # Environment vars the same as flag names without - # the initial -'s and in UPPERCASE. - env = var.upper() - while env[0] == '-': - env = env[1:] - return var, env, chunks - @staticmethod def _to_bool(in_str: str) -> bool: """ @@ -370,49 +402,6 @@ class Config: """ return in_str.lower() in ("true", "1", "yes", "y", "t", "on") - def _augment_sys_argv_from_environment_variables(self): - """Internal. Look at the system environment for variables that match - commandline arg names. This is done via some munging such that: - - :code:`--argument_to_match` - - ...is matched by: - - :code:`ARGUMENT_TO_MATCH` - - This allows users to set args via shell environment variables - in lieu of passing them on the cmdline. - - """ - usage_message = Config.usage() - optional = False - arg = '' - - # Foreach valid optional commandline option (chunk) generate - # its analogous environment variable. - for chunk in usage_message.split(): - if chunk[0] == '[': - optional = True - if optional: - arg += f'{chunk} ' - if chunk[-1] == ']': - optional = False - _ = Config._parse_arg_into_env(arg) - if _: - var, env, chunks = _ - if env in os.environ: - if not Config.is_flag_already_in_argv(var): - value = os.environ[env] - self.saved_messages.append( - f'Initialized from environment: {var} = {value}' - ) - if len(chunks) == 1 and Config._to_bool(value): - sys.argv.append(var) - elif len(chunks) > 1: - sys.argv.append(var) - sys.argv.append(value) - arg = '' - def _process_dynamic_args(self, event): """Invoked as a callback when a zk-based config changed.""" @@ -441,7 +430,7 @@ class Config: # 'dynamic' if we are going to allow them to change at # runtime as a signal that the programmer is expecting # this. - if 'dynamic' in arg: + if 'dynamic' in arg and config.config['config_allow_dynamic_updates']: temp_argv.append(arg) logger.info("Updating %s from zookeeper async config change.", arg) @@ -581,10 +570,19 @@ class Config: self.saved_messages.append(f'Saved config to zookeeper in {zkpath}') def parse(self, entry_module: Optional[str]) -> Dict[str, Any]: - """Main program should call this early in main(). Note that the - :code:`bootstrap.initialize` wrapper takes care of this automatically. + """Main program should invoke this early in main(). Note that the + :py:meth:`pyutils.bootstrap.initialize` wrapper takes care of this automatically. This should only be called once per program invocation. + Args: + entry_module: Optional string to ensure we understand which module + contains the program entry point. Determined heuristically if not + provided. + + Returns: + A dict containing the parsed program configuration. Note that this can + be safely ignored since it is also saved in `config.config` and may + be used directly using that identifier. """ if self.config_parse_called: return self.config @@ -592,7 +590,7 @@ class Config: # If we're about to do the usage message dump, put the main # module's argument group last in the list (if possible) so that # when the user passes -h or --help, it will be visible on the - # screen w/o scrolling. + # screen w/o scrolling. This just makes for a nicer --help screen. for arg in sys.argv: if arg in ('--help', '-h'): if entry_module is not None: @@ -601,14 +599,8 @@ class Config: entry_module ) - # Examine the environment for variables that match known flags. - # For a flag called --example_flag the corresponding environment - # variable would be called EXAMPLE_FLAG. If found, hackily add - # these into sys.argv to be parsed. - self._augment_sys_argv_from_environment_variables() - - # Look for loadfile and read/parse it if present. This also - # works by jamming these values onto sys.argv. + # Look for --config_loadfile argument and, if found, read/parse + # Note that this works by jamming values onto sys.argv; kinda ugly. self._augment_sys_argv_from_loadfile() # Parse (possibly augmented, possibly completely overwritten) @@ -616,10 +608,12 @@ class Config: known, unknown = ARGS.parse_known_args() self.config.update(vars(known)) - # Reconstruct the argv with unrecognized flags for the benefit of - # future argument parsers. For example, unittest_main in python - # has some of its own flags. If we didn't recognize it, maybe - # someone else will. + # Reconstruct the sys.argv with unrecognized flags for the + # benefit of future argument parsers. For example, + # unittest_main in python has some of its own flags. If we + # didn't recognize it, maybe someone else will. Or, if + # --config_rejects_unrecognized_arguments was passed, die + # if we have unknown arguments. if len(unknown) > 0: if config['config_rejects_unrecognized_arguments']: raise Exception( @@ -643,6 +637,9 @@ class Config: if config['config_dump']: self.dump_config() + # Finally, maybe exit now if the user passed + # --config_exit_after_parse indicating they want to just + # update a config file and halt. self.config_parse_called = True if config['config_exit_after_parse']: print("Exiting because of --config_exit_after_parse.") -- 2.45.2