#!/usr/bin/env python3
-"""Global configuration driven by commandline arguments (even across
-different modules). Usage:
-
- module.py:
- ----------
- import config
-
- parser = config.add_commandline_args(
- "Module",
- "Args related to module doing the thing.",
- )
- parser.add_argument(
- "--module_do_the_thing",
- type=bool,
- default=True,
- help="Should the module do the thing?"
- )
-
- main.py:
- --------
- import config
-
- def main() -> None:
+# © Copyright 2021-2022, Scott Gasch
+
+"""Global configuration driven by commandline arguments, environment variables
+and saved configuration files. This works across several modules.
+
+Usage:
+
+ In your file.py::
+
+ import config
+
parser = config.add_commandline_args(
- "Main",
- "A program that does the thing.",
+ "Module",
+ "Args related to module doing the thing.",
)
parser.add_argument(
- "--dry_run",
+ "--module_do_the_thing",
type=bool,
- default=False,
- help="Should we really do the thing?"
+ default=True,
+ help="Should the module do the thing?"
)
- config.parse() # Very important, this must be invoked!
+
+ In your main.py::
+
+ import config
+
+ def main() -> None:
+ parser = config.add_commandline_args(
+ "Main",
+ "A program that does the thing.",
+ )
+ parser.add_argument(
+ "--dry_run",
+ type=bool,
+ default=False,
+ help="Should we really do the thing?"
+ )
+ config.parse() # Very important, this must be invoked!
If you set this up and remember to invoke config.parse(), all commandline
- arguments will play nicely together:
+ arguments will play nicely together. This is done automatically for you
+ if you're using the :meth:`bootstrap.initialize` decorator on
+ your program's entry point. See :meth:`python_modules.bootstrap.initialize`
+ for more details.::
- % main.py -h
- usage: main.py [-h]
- [--module_do_the_thing MODULE_DO_THE_THING]
- [--dry_run DRY_RUN]
+ import bootstrap
- Module:
- Args related to module doing the thing.
+ @bootstrap.initialize
+ def main():
+ whatever
- --module_do_the_thing MODULE_DO_THE_THING
- Should the module do the thing?
+ if __name__ == '__main__':
+ main()
- Main:
- A program that does the thing
+ Either way, you'll get this behavior from the commandline::
- --dry_run
- Should we really do the thing?
+ % main.py -h
+ usage: main.py [-h]
+ [--module_do_the_thing MODULE_DO_THE_THING]
+ [--dry_run DRY_RUN]
+
+ Module:
+ Args related to module doing the thing.
+
+ --module_do_the_thing MODULE_DO_THE_THING
+ Should the module do the thing?
+
+ Main:
+ A program that does the thing
+
+ --dry_run
+ Should we really do the thing?
Arguments themselves should be accessed via
- config.config['arg_name']. e.g.
+ :code:`config.config['arg_name']`. e.g.::
- if not config.config['dry_run']:
- module.do_the_thing()
+ if not config.config['dry_run']:
+ module.do_the_thing()
"""
import pprint
import re
import sys
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
+
+from kazoo.client import KazooClient
+
+import scott_secrets
# This module is commonly used by others in here and should avoid
# taking any unnecessary dependencies back on them.
-# Note: at this point in time, logging hasn't been configured and
-# anything we log will come out the root logger.
-
-
-class LoadFromFile(argparse.Action):
- """Helper to load a config file into argparse."""
- def __call__ (self, parser, namespace, values, option_string = None):
- with values as f:
- buf = f.read()
- argv = []
- for line in buf.split(','):
- line = line.strip()
- line = line.strip('{')
- line = line.strip('}')
- m = re.match(r"^'([a-zA-Z_\-]+)'\s*:\s*(.*)$", line)
- if m:
- key = m.group(1)
- value = m.group(2)
- value = value.strip("'")
- if value not in ('None', 'True', 'False'):
- argv.append(f'--{key}')
- argv.append(value)
- parser.parse_args(argv, namespace)
+# Defer logging messages until later when logging has been initialized.
+SAVED_MESSAGES: List[str] = []
+
+# Make a copy of the original program arguments.
+PROGRAM_NAME: str = os.path.basename(sys.argv[0])
+ORIG_ARGV: List[str] = sys.argv.copy()
+
+# A zookeeper client that is lazily created so as to not incur the
+# latency of connecting to zookeeper for programs that are not reading
+# or writing their config data into zookeeper.
+ZK: Optional[KazooClient] = None
+
+
+class OptionalRawFormatter(argparse.HelpFormatter):
+ """This formatter has the same bahavior as the normal argparse text
+ formatter except when the help text of an argument begins with
+ "RAW|". In that case, the line breaks are preserved and the text
+ is not wrapped.
+
+ Use this, for example, when you need the helptext of an argument
+ to have its spacing preserved exactly, e.g.::
+
+ args.add_argument(
+ '--mode',
+ type=str,
+ default='PLAY',
+ choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
+ metavar='MODE',
+ help='''RAW|Our mode of operation. One of:
+
+ PLAY = play wordle with me! Pick a random solution or
+ specify a solution with --template.
+
+ CHEAT = given a --template and, optionally, --letters_in_word
+ and/or --letters_to_avoid, return the best guess word;
+
+ AUTOPLAY = given a complete word in --template, guess it step
+ by step showing work;
+
+ SELFTEST = autoplay every possible solution keeping track of
+ wins/losses and average number of guesses;
+
+ PRECOMPUTE = populate hash table with optimal guesses.
+ ''',
+ )
+
+ """
+
+ def _split_lines(self, text, width):
+ if text.startswith('RAW|'):
+ return text[4:].splitlines()
+ return argparse.HelpFormatter._split_lines(self, text, width)
# A global parser that we will collect arguments into.
-args = argparse.ArgumentParser(
- description=f"This program uses config.py ({__file__}) for global, cross-module configuration.",
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ARGS = argparse.ArgumentParser(
+ description=None,
+ formatter_class=OptionalRawFormatter,
fromfile_prefix_chars="@",
+ epilog=f'{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.',
+ # I don't fully understand why but when loaded by sphinx sometimes
+ # the same module is loaded many times causing any arguments it
+ # registers via module-level code to be redefined. Work around
+ # this iff the program is 'sphinx-build'
+ conflict_handler='resolve' if PROGRAM_NAME == 'sphinx-build' else 'error',
)
-config_parse_called = False
-# A global configuration dictionary that will contain parsed arguments
-# It is also this variable that modules use to access parsed arguments
+# Keep track of if we've been called and prevent being called more
+# than once.
+CONFIG_PARSE_CALLED = False
+
+
+# A global configuration dictionary that will contain parsed arguments.
+# It is also this variable that modules use to access parsed arguments.
+# This is the data that is most interesting to our callers; it will hold
+# the configuration result.
config: Dict[str, Any] = {}
-# Defer logging messages until later when logging has been initialized.
-saved_messages: List[str] = []
+# It would be really nice if this shit worked from interactive python
+
+def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
+ """Create a new context for arguments and return a handle.
-def add_commandline_args(title: str, description: str = ""):
- """Create a new context for arguments and return a handle."""
- return args.add_argument_group(title, description)
+ Args:
+ title: A title for your module's commandline arguments group.
+ description: A helpful description of your module.
+
+ Returns:
+ An argparse._ArgumentGroup to be populated by the caller.
+ """
+ return ARGS.add_argument_group(title, description)
group = add_commandline_args(
)
group.add_argument(
'--config_loadfile',
- type=open,
- action=LoadFromFile,
metavar='FILENAME',
default=None,
- help='Config file from which to read args in lieu or in addition to commandline.',
+ help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to commandline.',
)
group.add_argument(
'--config_dump',
default=False,
action='store_true',
- help='Display the global configuration on STDERR at program startup.',
+ help='Display the global configuration (possibly derived from multiple sources) on STDERR at program startup.',
)
group.add_argument(
'--config_savefile',
type=str,
metavar='FILENAME',
default=None,
- help='Populate config file compatible --config_loadfile to save config for later use.',
+ help='Populate config file compatible with --config_loadfile to save global config for later use.',
+)
+group.add_argument(
+ '--config_rejects_unrecognized_arguments',
+ default=False,
+ action='store_true',
+ help=(
+ 'If present, config will raise an exception if it doesn\'t recognize an argument. The '
+ + 'default behavior is to ignore this so as to allow interoperability with programs that '
+ + 'want to use their own argparse calls to parse their own, separate commandline args.'
+ ),
)
-def is_flag_already_in_argv(var: str):
- """Is a particular flag passed on the commandline?"""
+def overwrite_argparse_epilog(msg: str) -> None:
+ """Allows your code to override the default epilog created by
+ argparse.
+
+ Args:
+ msg: The epilog message to substitute for the default.
+ """
+ ARGS.epilog = msg
+
+
+def is_flag_already_in_argv(var: str) -> bool:
+ """Returns true if a particular flag is passed on the commandline?
+
+ Args:
+ var: The flag to search for.
+ """
for _ in sys.argv:
if var in _:
return True
return False
-def parse() -> Dict[str, Any]:
- import string_utils
+def _reorder_arg_action_groups_before_help(entry_module: Optional[str]):
+ """Internal. Used to reorder the arguments before dumping out a
+ generated help string such that the main program's arguments come
+ last.
+
+ """
+ reordered_action_groups = []
+ for grp in ARGS._action_groups:
+ if entry_module is not None and entry_module in grp.title: # type: ignore
+ reordered_action_groups.append(grp)
+ elif PROGRAM_NAME in group.title: # type: ignore
+ reordered_action_groups.append(grp)
+ else:
+ reordered_action_groups.insert(0, grp)
+ return reordered_action_groups
+
+
+def print_usage() -> None:
+ """Prints the normal help usage message out."""
+ ARGS.print_help()
+
- """Main program should call this early in main()"""
- global config_parse_called
- if config_parse_called:
- return
- config_parse_called = True
- global saved_messages
+def usage() -> str:
+ """
+ Returns:
+ program usage help text as a string.
+ """
+ return ARGS.format_usage()
- # Examine the environment variables to settings that match
- # known flags.
- usage_message = args.format_usage()
+
+def _augment_sys_argv_from_environment_variables():
+ """Internal. Look at the system environment for variables that match
+ arg names. This is done via some munging such that:
+
+ :code:`--argument_to_match`
+
+ ...is matched by:
+
+ :code:`ARGUMENT_TO_MATCH`
+
+ This allows programmers to set args via shell environment variables
+ in lieu of passing them on the cmdline.
+
+ """
+
+ usage_message = usage()
optional = False
var = ''
for x in usage_message.split():
if env in os.environ:
if not is_flag_already_in_argv(var):
value = os.environ[env]
- saved_messages.append(
- f'Initialized from environment: {var} = {value}'
- )
- if len(chunks) == 1 and string_utils.to_bool(value):
+ SAVED_MESSAGES.append(f'Initialized from environment: {var} = {value}')
+ from string_utils import to_bool
+
+ if len(chunks) == 1 and to_bool(value):
sys.argv.append(var)
elif len(chunks) > 1:
sys.argv.append(var)
sys.argv.append(value)
var = ''
env = ''
- else:
- next
- # Parse (possibly augmented) commandline args with argparse normally.
- #config.update(vars(args.parse_args()))
- known, unknown = args.parse_known_args()
+
+def _augment_sys_argv_from_loadfile():
+ """Internal. Augment with arguments persisted in a saved file."""
+
+ global ZK
+ loadfile = None
+ saw_other_args = False
+ grab_next_arg = False
+ for arg in sys.argv[1:]:
+ if 'config_loadfile' in arg:
+ pieces = arg.split('=')
+ if len(pieces) > 1:
+ loadfile = pieces[1]
+ else:
+ grab_next_arg = True
+ elif grab_next_arg:
+ loadfile = arg
+ else:
+ saw_other_args = True
+
+ if loadfile is not None:
+ zkpath = None
+ if loadfile[:3] == 'zk:':
+ try:
+ if ZK is None:
+ ZK = KazooClient(
+ hosts=scott_secrets.ZOOKEEPER_NODES,
+ use_ssl=True,
+ verify_certs=False,
+ keyfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
+ keyfile_password=scott_secrets.ZOOKEEPER_CLIENT_PASS,
+ certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
+ )
+ ZK.start()
+ zkpath = loadfile[3:]
+ if not zkpath.startswith('/config/'):
+ zkpath = '/config/' + zkpath
+ zkpath = re.sub(r'//+', '/', zkpath)
+ if not ZK.exists(zkpath):
+ raise Exception(
+ f'ERROR: --config_loadfile argument must be a file, {loadfile} not found (in zookeeper)'
+ )
+ except Exception as e:
+ raise Exception(
+ f'ERROR: Error talking with zookeeper while looking for {loadfile}'
+ ) from e
+ elif not os.path.exists(loadfile):
+ raise Exception(
+ f'ERROR: --config_loadfile argument must be a file, {loadfile} not found.'
+ )
+
+ if saw_other_args:
+ msg = f'Augmenting commandline arguments with those from {loadfile}.'
+ else:
+ msg = f'Reading commandline arguments from {loadfile}.'
+ print(msg, file=sys.stderr)
+ SAVED_MESSAGES.append(msg)
+
+ newargs = []
+ if zkpath:
+ try:
+ assert ZK
+ contents = ZK.get(zkpath)[0]
+ contents = contents.decode()
+ newargs = [
+ arg.strip('\n') for arg in contents.split('\n') if 'config_savefile' not in arg
+ ]
+ size = sys.getsizeof(newargs)
+ if size > 1024 * 1024:
+ raise Exception(f'Saved args are too large! ({size} bytes)')
+ except Exception as e:
+ raise Exception(f'Error reading {zkpath} from zookeeper.') from e
+ SAVED_MESSAGES.append(f'Loaded config from zookeeper from {zkpath}')
+ else:
+ with open(loadfile, 'r') as rf:
+ newargs = rf.readlines()
+ newargs = [arg.strip('\n') for arg in newargs if 'config_savefile' not in arg]
+ sys.argv += newargs
+
+
+def parse(entry_module: Optional[str]) -> Dict[str, Any]:
+ """Main program should call this early in main(). Note that the
+ :code:`bootstrap.initialize` wrapper takes care of this automatically.
+ This should only be called once per program invocation.
+
+ """
+ global CONFIG_PARSE_CALLED
+ if CONFIG_PARSE_CALLED:
+ return config
+ global ZK
+
+ # If we're about to do the usage message dump, put the main
+ # module's argument group last in the list (if possible) so that
+ # when the user passes -h or --help, it will be visible on the
+ # screen w/o scrolling.
+ for arg in sys.argv:
+ if arg in ('--help', '-h'):
+ if entry_module is not None:
+ entry_module = os.path.basename(entry_module)
+ ARGS._action_groups = _reorder_arg_action_groups_before_help(entry_module)
+
+ # Examine the environment for variables that match known flags.
+ # For a flag called --example_flag the corresponding environment
+ # variable would be called EXAMPLE_FLAG. If found, hackily add
+ # these into sys.argv to be parsed.
+ _augment_sys_argv_from_environment_variables()
+
+ # Look for loadfile and read/parse it if present. This also
+ # works by jamming these values onto sys.argv.
+ _augment_sys_argv_from_loadfile()
+
+ # Parse (possibly augmented, possibly completely overwritten)
+ # commandline args with argparse normally and populate config.
+ known, unknown = ARGS.parse_known_args()
config.update(vars(known))
# Reconstruct the argv with unrecognized flags for the benefit of
- # future argument parsers.
+ # future argument parsers. For example, unittest_main in python
+ # has some of its own flags. If we didn't recognize it, maybe
+ # someone else will.
+ if len(unknown) > 0:
+ if config['config_rejects_unrecognized_arguments']:
+ raise Exception(
+ f'Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting.'
+ )
+ SAVED_MESSAGES.append(f'Config encountered unrecognized commandline arguments: {unknown}')
sys.argv = sys.argv[:1] + unknown
- if config['config_savefile']:
- with open(config['config_savefile'], 'w') as wf:
- wf.write("\n".join(sys.argv[1:]))
+ # Check for savefile and populate it if requested.
+ savefile = config['config_savefile']
+ if savefile and len(savefile) > 0:
+ data = '\n'.join(ORIG_ARGV[1:])
+ if savefile[:3] == 'zk:':
+ zkpath = savefile[3:]
+ if not zkpath.startswith('/config/'):
+ zkpath = '/config/' + zkpath
+ zkpath = re.sub(r'//+', '/', zkpath)
+ try:
+ if not ZK:
+ ZK = KazooClient(
+ hosts=scott_secrets.ZOOKEEPER_NODES,
+ use_ssl=True,
+ verify_certs=False,
+ keyfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
+ keyfile_password=scott_secrets.ZOOKEEPER_CLIENT_PASS,
+ certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
+ )
+ ZK.start()
+ if not ZK.exists(zkpath):
+ ZK.create(zkpath, data.encode())
+ else:
+ ZK.set(zkpath, data.encode())
+ except Exception as e:
+ raise Exception(f'Failed to create zookeeper path {zkpath}') from e
+ SAVED_MESSAGES.append(f'Saved config to zookeeper in {zkpath}')
+ else:
+ with open(savefile, 'w') as wf:
+ wf.write(data)
+ # Also dump the config on stderr if requested.
if config['config_dump']:
dump_config()
+
+ CONFIG_PARSE_CALLED = True
return config
def has_been_parsed() -> bool:
- """Has the global config been parsed yet?"""
- global config_parse_called
- return config_parse_called
+ """Returns True iff the global config has already been parsed"""
+ return CONFIG_PARSE_CALLED
def dump_config():
"""Print the current config to stdout."""
print("Global Configuration:", file=sys.stderr)
pprint.pprint(config, stream=sys.stderr)
+ print()
def late_logging():
"""Log messages saved earlier now that logging has been initialized."""
logger = logging.getLogger(__name__)
- global saved_messages
- for _ in saved_messages:
+ logger.debug('Original commandline was: %s', ORIG_ARGV)
+ for _ in SAVED_MESSAGES:
logger.debug(_)