3 # © Copyright 2021-2022, Scott Gasch
5 """Global configuration driven by commandline arguments, environment variables,
6 saved configuration files, and zookeeper-based dynamic configurations. This
7 works across several modules.
15 parser = config.add_commandline_args(
17 "Args related to module doing the thing.",
20 "--module_do_the_thing",
23 help="Should the module do the thing?"
30 parser = config.add_commandline_args(
32 "A program that does the thing.",
38 help="Should we really do the thing?"
42 config.parse() # Very important, this must be invoked!
44 If you set this up and remember to invoke config.parse(), all commandline
45 arguments will play nicely together. This is done automatically for you
46 if you're using the :meth:`bootstrap.initialize` decorator on
47 your program's entry point. See :meth:`python_modules.bootstrap.initialize`
56 if __name__ == '__main__':
59 Either way, you'll get this behavior from the commandline::
63 [--module_do_the_thing MODULE_DO_THE_THING]
67 Args related to module doing the thing.
69 --module_do_the_thing MODULE_DO_THE_THING
70 Should the module do the thing?
73 A program that does the thing
76 Should we really do the thing?
78 Arguments themselves should be accessed via
79 :code:`config.config['arg_name']`. e.g.::
81 if not config.config['dry_run']:
91 from typing import Any, Dict, List, Optional, Tuple
95 # This module is commonly used by others in here and should avoid
96 # taking any unnecessary dependencies back on them.
98 # Make a copy of the original program arguments immediately upon module load.
99 PROGRAM_NAME: str = os.path.basename(sys.argv[0])
100 ORIG_ARGV: List[str] = sys.argv.copy()
103 class OptionalRawFormatter(argparse.HelpFormatter):
104 """This formatter has the same bahavior as the normal argparse text
105 formatter except when the help text of an argument begins with
106 "RAW|". In that case, the line breaks are preserved and the text
109 Use this, for example, when you need the helptext of an argument
110 to have its spacing preserved exactly, e.g.::
116 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
118 help='''RAW|Our mode of operation. One of:
120 PLAY = play wordle with me! Pick a random solution or
121 specify a solution with --template.
123 CHEAT = given a --template and, optionally, --letters_in_word
124 and/or --letters_to_avoid, return the best guess word;
126 AUTOPLAY = given a complete word in --template, guess it step
127 by step showing work;
129 SELFTEST = autoplay every possible solution keeping track of
130 wins/losses and average number of guesses;
132 PRECOMPUTE = populate hash table with optimal guesses.
137 def _split_lines(self, text, width):
138 if text.startswith('RAW|'):
139 return text[4:].splitlines()
140 return argparse.HelpFormatter._split_lines(self, text, width)
143 # A global argparser that we will collect arguments in. Each module (including
144 # us) will add arguments to a separate argument group.
145 ARGS = argparse.ArgumentParser(
147 formatter_class=OptionalRawFormatter,
148 fromfile_prefix_chars="@",
149 epilog=f'{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.',
150 # I don't fully understand why but when loaded by sphinx sometimes
151 # the same module is loaded many times causing any arguments it
152 # registers via module-level code to be redefined. Work around
153 # this iff the program is 'sphinx-build'
154 conflict_handler='resolve' if PROGRAM_NAME == 'sphinx-build' else 'error',
157 # Arguments specific to config.py. Other users should get their own group by
158 # invoking config.add_commandline_args.
159 GROUP = ARGS.add_argument_group(
160 f'Global Config ({__file__})',
161 'Args that control the global config itself; how meta!',
167 help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to those passed via the commandline. Note that if the given path begins with "zk:" then it is interpreted as a zookeeper path instead of as a filesystem path. When loading config from zookeeper, any argument with the string "dynamic" in the name (e.g. --module_dynamic_url) may be modified at runtime by changes made to zookeeper (using --config_savefile=zk:path). You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
173 help='Display the global configuration (possibly derived from multiple sources) on STDERR at program startup time.',
180 help='Populate a config file (compatible with --config_loadfile) with the given path for later use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:path) at startup time will see their configuration dynamically updated; flags with "dynamic" in their names (e.g. --my_dynamic_flag) may have their values changed. You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
183 '--config_rejects_unrecognized_arguments',
186 help='If present, config will raise an exception if it doesn\'t recognize an argument. The default behavior is to ignore unknown arguments so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.',
189 '--config_exit_after_parse',
192 help='If present, halt the program after parsing config. Useful, for example, to write a --config_savefile and then terminate.',
198 Everything in the config module used to be module-level functions and
199 variables but it made the code ugly and harder to maintain. Now, this
200 class does the heavy lifting. We still rely on some globals, though:
202 ARGS and GROUP to interface with argparse
203 PROGRAM_NAME stores argv[0] close to program invocation
204 ORIG_ARGV stores the original argv list close to program invocation
205 CONFIG and config: hold the (singleton) instance of this class.
210 # Has our parse() method been invoked yet?
211 self.config_parse_called = False
213 # A configuration dictionary that will contain parsed
214 # arguments. This is the data that is most interesting to our
215 # callers as it will hold the configuration result.
216 self.config: Dict[str, Any] = {}
218 # Defer logging messages until later when logging has been
220 self.saved_messages: List[str] = []
222 # A zookeeper client that is lazily created so as to not incur
223 # the latency of connecting to zookeeper for programs that are
224 # not reading or writing their config data into zookeeper.
225 self.zk: Optional[Any] = None
227 # Per known zk file, what is the max version we have seen?
228 self.max_version: Dict[str, int] = {}
230 def __getitem__(self, key: str) -> Optional[Any]:
231 """If someone uses []'s on us, pass it onto self.config."""
232 return self.config.get(key, None)
234 def __setitem__(self, key: str, value: Any) -> None:
235 self.config[key] = value
237 def __contains__(self, key: str) -> bool:
238 return key in self.config
240 def get(self, key: str, default: Any = None) -> Optional[Any]:
241 return self.config.get(key, default)
244 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
245 """Create a new context for arguments and return a handle.
248 title: A title for your module's commandline arguments group.
249 description: A helpful description of your module.
252 An argparse._ArgumentGroup to be populated by the caller.
254 return ARGS.add_argument_group(title, description)
257 def overwrite_argparse_epilog(msg: str) -> None:
258 """Allows your code to override the default epilog created by
262 msg: The epilog message to substitute for the default.
267 def is_flag_already_in_argv(var: str) -> bool:
268 """Returns true if a particular flag is passed on the commandline
272 var: The flag to search for.
280 def print_usage() -> None:
281 """Prints the normal help usage message out."""
288 program usage help text as a string.
290 return ARGS.format_usage()
293 def _reorder_arg_action_groups_before_help(entry_module: Optional[str]):
294 """Internal. Used to reorder the arguments before dumping out a
295 generated help string such that the main program's arguments come
299 reordered_action_groups = []
300 for grp in ARGS._action_groups:
301 if entry_module is not None and entry_module in grp.title: # type: ignore
302 reordered_action_groups.append(grp)
303 elif PROGRAM_NAME in GROUP.title: # type: ignore
304 reordered_action_groups.append(grp)
306 reordered_action_groups.insert(0, grp)
307 return reordered_action_groups
310 def _parse_arg_into_env(arg: str) -> Optional[Tuple[str, str, List[str]]]:
311 """Internal helper to parse commandling args into environment vars."""
313 if not arg.startswith('['):
316 if not arg.endswith(']'):
326 # Environment vars the same as flag names without
327 # the initial -'s and in UPPERCASE.
331 return var, env, chunks
333 def _augment_sys_argv_from_environment_variables(self):
334 """Internal. Look at the system environment for variables that match
335 commandline arg names. This is done via some munging such that:
337 :code:`--argument_to_match`
341 :code:`ARGUMENT_TO_MATCH`
343 This allows users to set args via shell environment variables
344 in lieu of passing them on the cmdline.
347 usage_message = Config.usage()
351 # Foreach valid optional commandline option (chunk) generate
352 # its analogous environment variable.
353 for chunk in usage_message.split():
360 _ = Config._parse_arg_into_env(arg)
363 if env in os.environ:
364 if not Config.is_flag_already_in_argv(var):
365 value = os.environ[env]
366 self.saved_messages.append(
367 f'Initialized from environment: {var} = {value}'
369 from string_utils import to_bool
371 if len(chunks) == 1 and to_bool(value):
373 elif len(chunks) > 1:
375 sys.argv.append(value)
378 def _process_dynamic_args(self, event):
380 logger = logging.getLogger(__name__)
381 contents, meta = self.zk.get(event.path, watch=self._process_dynamic_args)
382 logger.debug('Update for %s at version=%d.', event.path, meta.version)
384 'Max known version for %s is %d.', event.path, self.max_version.get(event.path, 0)
386 if meta.version > self.max_version.get(event.path, 0):
387 self.max_version[event.path] = meta.version
388 contents = contents.decode()
390 for arg in contents.split():
392 temp_argv.append(arg)
393 logger.info("Updating %s from zookeeper async config change.", arg)
394 if len(temp_argv) > 0:
397 known, _ = ARGS.parse_known_args()
399 self.config.update(vars(known))
401 def _augment_sys_argv_from_loadfile(self):
402 """Internal. Augment with arguments persisted in a saved file."""
405 saw_other_args = False
406 grab_next_arg = False
407 for arg in sys.argv[1:]:
408 if 'config_loadfile' in arg:
409 pieces = arg.split('=')
417 saw_other_args = True
419 if loadfile is not None:
421 if loadfile[:3] == 'zk:':
422 from kazoo.client import KazooClient
426 self.zk = KazooClient(
427 hosts=scott_secrets.ZOOKEEPER_NODES,
430 keyfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
431 keyfile_password=scott_secrets.ZOOKEEPER_CLIENT_PASS,
432 certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
435 zkpath = loadfile[3:]
436 if not zkpath.startswith('/config/'):
437 zkpath = '/config/' + zkpath
438 zkpath = re.sub(r'//+', '/', zkpath)
439 if not self.zk.exists(zkpath):
441 f'ERROR: --config_loadfile argument must be a file, {loadfile} not found (in zookeeper)'
443 except Exception as e:
445 f'ERROR: Error talking with zookeeper while looking for {loadfile}'
447 elif not os.path.exists(loadfile):
449 f'ERROR: --config_loadfile argument must be a file, {loadfile} not found.'
453 msg = f'Augmenting commandline arguments with those from {loadfile}.'
455 msg = f'Reading commandline arguments from {loadfile}.'
456 print(msg, file=sys.stderr)
457 self.saved_messages.append(msg)
463 contents, meta = self.zk.get(zkpath, watch=self._process_dynamic_args)
464 contents = contents.decode()
467 for arg in contents.split('\n')
468 if 'config_savefile' not in arg
470 self.saved_messages.append(f'Setting {zkpath}\'s max_version to {meta.version}')
471 self.max_version[zkpath] = meta.version
472 except Exception as e:
473 raise Exception(f'Error reading {zkpath} from zookeeper.') from e
474 self.saved_messages.append(f'Loaded config from zookeeper from {zkpath}')
476 with open(loadfile, 'r') as rf:
477 newargs = rf.readlines()
478 newargs = [arg.strip('\n') for arg in newargs if 'config_savefile' not in arg]
481 def dump_config(self):
482 """Print the current config to stdout."""
483 print("Global Configuration:", file=sys.stderr)
484 pprint.pprint(self.config, stream=sys.stderr)
487 def parse(self, entry_module: Optional[str]) -> Dict[str, Any]:
488 """Main program should call this early in main(). Note that the
489 :code:`bootstrap.initialize` wrapper takes care of this automatically.
490 This should only be called once per program invocation.
493 if self.config_parse_called:
496 # If we're about to do the usage message dump, put the main
497 # module's argument group last in the list (if possible) so that
498 # when the user passes -h or --help, it will be visible on the
499 # screen w/o scrolling.
501 if arg in ('--help', '-h'):
502 if entry_module is not None:
503 entry_module = os.path.basename(entry_module)
504 ARGS._action_groups = Config._reorder_arg_action_groups_before_help(entry_module)
506 # Examine the environment for variables that match known flags.
507 # For a flag called --example_flag the corresponding environment
508 # variable would be called EXAMPLE_FLAG. If found, hackily add
509 # these into sys.argv to be parsed.
510 self._augment_sys_argv_from_environment_variables()
512 # Look for loadfile and read/parse it if present. This also
513 # works by jamming these values onto sys.argv.
514 self._augment_sys_argv_from_loadfile()
516 # Parse (possibly augmented, possibly completely overwritten)
517 # commandline args with argparse normally and populate config.
518 known, unknown = ARGS.parse_known_args()
519 self.config.update(vars(known))
521 # Reconstruct the argv with unrecognized flags for the benefit of
522 # future argument parsers. For example, unittest_main in python
523 # has some of its own flags. If we didn't recognize it, maybe
526 if config['config_rejects_unrecognized_arguments']:
528 f'Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting.'
530 self.saved_messages.append(
531 f'Config encountered unrecognized commandline arguments: {unknown}'
533 sys.argv = sys.argv[:1] + unknown
535 # Check for savefile and populate it if requested.
536 savefile = config['config_savefile']
537 if savefile and len(savefile) > 0:
538 data = '\n'.join(ORIG_ARGV[1:])
539 if savefile[:3] == 'zk:':
540 zkpath = savefile[3:]
541 if not zkpath.startswith('/config/'):
542 zkpath = '/config/' + zkpath
543 zkpath = re.sub(r'//+', '/', zkpath)
546 from kazoo.client import KazooClient
548 self.zk = KazooClient(
549 hosts=scott_secrets.ZOOKEEPER_NODES,
552 keyfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
553 keyfile_password=scott_secrets.ZOOKEEPER_CLIENT_PASS,
554 certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
558 if len(data) > 1024 * 1024:
559 raise Exception(f'Saved args are too large! ({len(data)} bytes)')
560 if not self.zk.exists(zkpath):
561 self.zk.create(zkpath, data)
562 self.saved_messages.append(
563 f'Just created {zkpath}; setting its max_version to 0'
565 self.max_version[zkpath] = 0
567 meta = self.zk.set(zkpath, data)
568 self.saved_messages.append(
569 f'Setting {zkpath}\'s max_version to {meta.version}'
571 self.max_version[zkpath] = meta.version
572 except Exception as e:
573 raise Exception(f'Failed to create zookeeper path {zkpath}') from e
574 self.saved_messages.append(f'Saved config to zookeeper in {zkpath}')
576 with open(savefile, 'w') as wf:
579 # Also dump the config on stderr if requested.
580 if config['config_dump']:
583 self.config_parse_called = True
584 if config['config_exit_after_parse']:
585 print("Exiting because of --config_exit_after_parse.")
591 def has_been_parsed(self) -> bool:
592 """Returns True iff the global config has already been parsed"""
593 return self.config_parse_called
595 def late_logging(self):
596 """Log messages saved earlier now that logging has been initialized."""
597 logger = logging.getLogger(__name__)
598 logger.debug('Original commandline was: %s', ORIG_ARGV)
599 for _ in self.saved_messages:
603 # A global singleton instance of the Config class.
606 # A lot of client code uses config.config['whatever'] to lookup
607 # configuration so to preserve this we make this, config.config, with
608 # a __getitem__ method on it.
611 # Config didn't use to be a class; it was a mess of module-level
612 # functions and data. The functions below preserve the old interface
613 # so that existing clients do not need to be changed. As you can see,
614 # they mostly just thunk into the config class.
617 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
618 """Create a new context for arguments and return a handle. An alias
619 for config.config.add_commandline_args.
622 title: A title for your module's commandline arguments group.
623 description: A helpful description of your module.
626 An argparse._ArgumentGroup to be populated by the caller.
628 return CONFIG.add_commandline_args(title, description)
631 def parse(entry_module: Optional[str]) -> Dict[str, Any]:
632 """Main program should call this early in main(). Note that the
633 :code:`bootstrap.initialize` wrapper takes care of this automatically.
634 This should only be called once per program invocation. Subsequent
635 calls do not reparse the configuration settings but rather just
636 return the current state.
638 return CONFIG.parse(entry_module)
641 def has_been_parsed() -> bool:
642 """Returns True iff the global config has already been parsed"""
643 return CONFIG.has_been_parsed()
646 def late_logging() -> None:
647 """Log messages saved earlier now that logging has been initialized."""
648 CONFIG.late_logging()
651 def dump_config() -> None:
652 """Print the current config to stdout."""
656 def overwrite_argparse_epilog(msg: str) -> None:
657 """Allows your code to override the default epilog created by
661 msg: The epilog message to substitute for the default.
663 Config.overwrite_argparse_epilog(msg)
666 def is_flag_already_in_argv(var: str) -> bool:
667 """Returns true if a particular flag is passed on the commandline
671 var: The flag to search for.
673 return Config.is_flag_already_in_argv(var)
676 def print_usage() -> None:
677 """Prints the normal help usage message out."""
684 program usage help text as a string.
686 return Config.usage()