3 # © Copyright 2021-2022, Scott Gasch
5 """Global configuration driven by commandline arguments, environment variables,
6 saved configuration files, and zookeeper-based dynamic configurations. This
7 works across several modules.
15 parser = config.add_commandline_args(
17 "Args related to module doing the thing.",
20 "--module_do_the_thing",
23 help="Should the module do the thing?"
30 parser = config.add_commandline_args(
32 "A program that does the thing.",
38 help="Should we really do the thing?"
42 config.parse() # Very important, this must be invoked!
44 If you set this up and remember to invoke config.parse(), all commandline
45 arguments will play nicely together. This is done automatically for you
46 if you're using the :meth:`bootstrap.initialize` decorator on
47 your program's entry point. See :meth:`python_modules.bootstrap.initialize`
56 if __name__ == '__main__':
59 Either way, you'll get this behavior from the commandline::
63 [--module_do_the_thing MODULE_DO_THE_THING]
67 Args related to module doing the thing.
69 --module_do_the_thing MODULE_DO_THE_THING
70 Should the module do the thing?
73 A program that does the thing
76 Should we really do the thing?
78 Arguments themselves should be accessed via
79 :code:`config.config['arg_name']`. e.g.::
81 if not config.config['dry_run']:
91 from typing import Any, Dict, List, Optional, Tuple
93 from kazoo.client import KazooClient
94 from kazoo.protocol.states import WatchedEvent
98 # This module is commonly used by others in here and should avoid
99 # taking any unnecessary dependencies back on them.
101 # Make a copy of the original program arguments immediately upon module load.
102 PROGRAM_NAME: str = os.path.basename(sys.argv[0])
103 ORIG_ARGV: List[str] = sys.argv.copy()
106 class OptionalRawFormatter(argparse.HelpFormatter):
107 """This formatter has the same bahavior as the normal argparse text
108 formatter except when the help text of an argument begins with
109 "RAW|". In that case, the line breaks are preserved and the text
112 Use this, for example, when you need the helptext of an argument
113 to have its spacing preserved exactly, e.g.::
119 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
121 help='''RAW|Our mode of operation. One of:
123 PLAY = play wordle with me! Pick a random solution or
124 specify a solution with --template.
126 CHEAT = given a --template and, optionally, --letters_in_word
127 and/or --letters_to_avoid, return the best guess word;
129 AUTOPLAY = given a complete word in --template, guess it step
130 by step showing work;
132 SELFTEST = autoplay every possible solution keeping track of
133 wins/losses and average number of guesses;
135 PRECOMPUTE = populate hash table with optimal guesses.
140 def _split_lines(self, text, width):
141 if text.startswith('RAW|'):
142 return text[4:].splitlines()
143 return argparse.HelpFormatter._split_lines(self, text, width)
146 # A global argparser that we will collect arguments in. Each module (including
147 # us) will add arguments to a separate argument group.
148 ARGS = argparse.ArgumentParser(
150 formatter_class=OptionalRawFormatter,
151 fromfile_prefix_chars="@",
152 epilog=f'{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.',
153 # I don't fully understand why but when loaded by sphinx sometimes
154 # the same module is loaded many times causing any arguments it
155 # registers via module-level code to be redefined. Work around
156 # this iff the program is 'sphinx-build'
157 conflict_handler='resolve' if PROGRAM_NAME == 'sphinx-build' else 'error',
160 # Arguments specific to config.py. Other users should get their own group by
161 # invoking config.add_commandline_args.
162 GROUP = ARGS.add_argument_group(
163 f'Global Config ({__file__})',
164 'Args that control the global config itself; how meta!',
170 help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to those passed via the commandline. Note that if the given path begins with "zk:" then it is interpreted as a zookeeper path instead of as a filesystem path. When loading config from zookeeper, any argument with the string "dynamic" in the name (e.g. --module_dynamic_url) may be modified at runtime by changes made to zookeeper (using --config_savefile=zk:path). You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
176 help='Display the global configuration (possibly derived from multiple sources) on STDERR at program startup time.',
183 help='Populate a config file (compatible with --config_loadfile) with the given path for later use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:path) at startup time will see their configuration dynamically updated; flags with "dynamic" in their names (e.g. --my_dynamic_flag) may have their values changed. You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
186 '--config_rejects_unrecognized_arguments',
189 help='If present, config will raise an exception if it doesn\'t recognize an argument. The default behavior is to ignore unknown arguments so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.',
192 '--config_exit_after_parse',
195 help='If present, halt the program after parsing config. Useful, for example, to write a --config_savefile and then terminate.',
201 Everything in the config module used to be module-level functions and
202 variables but it made the code ugly and harder to maintain. Now, this
203 class does the heavy lifting. We still rely on some globals, though:
205 ARGS and GROUP to interface with argparse
206 PROGRAM_NAME stores argv[0] close to program invocation
207 ORIG_ARGV stores the original argv list close to program invocation
208 CONFIG and config: hold the (singleton) instance of this class.
213 # Has our parse() method been invoked yet?
214 self.config_parse_called = False
216 # A configuration dictionary that will contain parsed
217 # arguments. This is the data that is most interesting to our
218 # callers as it will hold the configuration result.
219 self.config: Dict[str, Any] = {}
221 # Defer logging messages until later when logging has been
223 self.saved_messages: List[str] = []
225 # A zookeeper client that is lazily created so as to not incur
226 # the latency of connecting to zookeeper for programs that are
227 # not reading or writing their config data into zookeeper.
228 self.zk: Optional[KazooClient] = None
230 # Per known zk file, what is the max version we have seen?
231 self.max_version: Dict[str, int] = {}
233 def __getitem__(self, key: str) -> Optional[Any]:
234 """If someone uses []'s on us, pass it onto self.config."""
235 return self.config.get(key, None)
237 def __setitem__(self, key: str, value: Any) -> None:
238 self.config[key] = value
240 def __contains__(self, key: str) -> bool:
241 return key in self.config
243 def get(self, key: str, default: Any = None) -> Optional[Any]:
244 return self.config.get(key, default)
247 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
248 """Create a new context for arguments and return a handle.
251 title: A title for your module's commandline arguments group.
252 description: A helpful description of your module.
255 An argparse._ArgumentGroup to be populated by the caller.
257 return ARGS.add_argument_group(title, description)
260 def overwrite_argparse_epilog(msg: str) -> None:
261 """Allows your code to override the default epilog created by
265 msg: The epilog message to substitute for the default.
270 def is_flag_already_in_argv(var: str) -> bool:
271 """Returns true if a particular flag is passed on the commandline
275 var: The flag to search for.
283 def print_usage() -> None:
284 """Prints the normal help usage message out."""
291 program usage help text as a string.
293 return ARGS.format_usage()
296 def _reorder_arg_action_groups_before_help(entry_module: Optional[str]):
297 """Internal. Used to reorder the arguments before dumping out a
298 generated help string such that the main program's arguments come
302 reordered_action_groups = []
303 for grp in ARGS._action_groups:
304 if entry_module is not None and entry_module in grp.title: # type: ignore
305 reordered_action_groups.append(grp)
306 elif PROGRAM_NAME in GROUP.title: # type: ignore
307 reordered_action_groups.append(grp)
309 reordered_action_groups.insert(0, grp)
310 return reordered_action_groups
313 def _parse_arg_into_env(arg: str) -> Optional[Tuple[str, str, List[str]]]:
314 """Internal helper to parse commandling args into environment vars."""
316 if not arg.startswith('['):
319 if not arg.endswith(']'):
329 # Environment vars the same as flag names without
330 # the initial -'s and in UPPERCASE.
334 return var, env, chunks
336 def _augment_sys_argv_from_environment_variables(self):
337 """Internal. Look at the system environment for variables that match
338 commandline arg names. This is done via some munging such that:
340 :code:`--argument_to_match`
344 :code:`ARGUMENT_TO_MATCH`
346 This allows users to set args via shell environment variables
347 in lieu of passing them on the cmdline.
350 usage_message = Config.usage()
354 # Foreach valid optional commandline option (chunk) generate
355 # its analogous environment variable.
356 for chunk in usage_message.split():
363 _ = Config._parse_arg_into_env(arg)
366 if env in os.environ:
367 if not Config.is_flag_already_in_argv(var):
368 value = os.environ[env]
369 self.saved_messages.append(
370 f'Initialized from environment: {var} = {value}'
372 from string_utils import to_bool
374 if len(chunks) == 1 and to_bool(value):
376 elif len(chunks) > 1:
378 sys.argv.append(value)
381 def _process_dynamic_args(self, event: WatchedEvent):
383 logger = logging.getLogger(__name__)
384 contents, meta = self.zk.get(event.path, watch=self._process_dynamic_args)
385 logger.debug('Update for %s at version=%d.', event.path, meta.version)
387 'Max known version for %s is %d.', event.path, self.max_version.get(event.path, 0)
389 if meta.version > self.max_version.get(event.path, 0):
390 self.max_version[event.path] = meta.version
391 contents = contents.decode()
393 for arg in contents.split():
395 temp_argv.append(arg)
396 logger.info("Updating %s from zookeeper async config change.", arg)
397 if len(temp_argv) > 0:
400 known, _ = ARGS.parse_known_args()
402 self.config.update(vars(known))
404 def _augment_sys_argv_from_loadfile(self):
405 """Internal. Augment with arguments persisted in a saved file."""
408 saw_other_args = False
409 grab_next_arg = False
410 for arg in sys.argv[1:]:
411 if 'config_loadfile' in arg:
412 pieces = arg.split('=')
420 saw_other_args = True
422 if loadfile is not None:
424 if loadfile[:3] == 'zk:':
427 self.zk = KazooClient(
428 hosts=scott_secrets.ZOOKEEPER_NODES,
431 keyfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
432 keyfile_password=scott_secrets.ZOOKEEPER_CLIENT_PASS,
433 certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
436 zkpath = loadfile[3:]
437 if not zkpath.startswith('/config/'):
438 zkpath = '/config/' + zkpath
439 zkpath = re.sub(r'//+', '/', zkpath)
440 if not self.zk.exists(zkpath):
442 f'ERROR: --config_loadfile argument must be a file, {loadfile} not found (in zookeeper)'
444 except Exception as e:
446 f'ERROR: Error talking with zookeeper while looking for {loadfile}'
448 elif not os.path.exists(loadfile):
450 f'ERROR: --config_loadfile argument must be a file, {loadfile} not found.'
454 msg = f'Augmenting commandline arguments with those from {loadfile}.'
456 msg = f'Reading commandline arguments from {loadfile}.'
457 print(msg, file=sys.stderr)
458 self.saved_messages.append(msg)
464 contents, meta = self.zk.get(zkpath, watch=self._process_dynamic_args)
465 contents = contents.decode()
468 for arg in contents.split('\n')
469 if 'config_savefile' not in arg
471 self.saved_messages.append(f'Setting {zkpath}\'s max_version to {meta.version}')
472 self.max_version[zkpath] = meta.version
473 except Exception as e:
474 raise Exception(f'Error reading {zkpath} from zookeeper.') from e
475 self.saved_messages.append(f'Loaded config from zookeeper from {zkpath}')
477 with open(loadfile, 'r') as rf:
478 newargs = rf.readlines()
479 newargs = [arg.strip('\n') for arg in newargs if 'config_savefile' not in arg]
482 def dump_config(self):
483 """Print the current config to stdout."""
484 print("Global Configuration:", file=sys.stderr)
485 pprint.pprint(self.config, stream=sys.stderr)
488 def parse(self, entry_module: Optional[str]) -> Dict[str, Any]:
489 """Main program should call this early in main(). Note that the
490 :code:`bootstrap.initialize` wrapper takes care of this automatically.
491 This should only be called once per program invocation.
494 if self.config_parse_called:
497 # If we're about to do the usage message dump, put the main
498 # module's argument group last in the list (if possible) so that
499 # when the user passes -h or --help, it will be visible on the
500 # screen w/o scrolling.
502 if arg in ('--help', '-h'):
503 if entry_module is not None:
504 entry_module = os.path.basename(entry_module)
505 ARGS._action_groups = Config._reorder_arg_action_groups_before_help(entry_module)
507 # Examine the environment for variables that match known flags.
508 # For a flag called --example_flag the corresponding environment
509 # variable would be called EXAMPLE_FLAG. If found, hackily add
510 # these into sys.argv to be parsed.
511 self._augment_sys_argv_from_environment_variables()
513 # Look for loadfile and read/parse it if present. This also
514 # works by jamming these values onto sys.argv.
515 self._augment_sys_argv_from_loadfile()
517 # Parse (possibly augmented, possibly completely overwritten)
518 # commandline args with argparse normally and populate config.
519 known, unknown = ARGS.parse_known_args()
520 self.config.update(vars(known))
522 # Reconstruct the argv with unrecognized flags for the benefit of
523 # future argument parsers. For example, unittest_main in python
524 # has some of its own flags. If we didn't recognize it, maybe
527 if config['config_rejects_unrecognized_arguments']:
529 f'Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting.'
531 self.saved_messages.append(
532 f'Config encountered unrecognized commandline arguments: {unknown}'
534 sys.argv = sys.argv[:1] + unknown
536 # Check for savefile and populate it if requested.
537 savefile = config['config_savefile']
538 if savefile and len(savefile) > 0:
539 data = '\n'.join(ORIG_ARGV[1:])
540 if savefile[:3] == 'zk:':
541 zkpath = savefile[3:]
542 if not zkpath.startswith('/config/'):
543 zkpath = '/config/' + zkpath
544 zkpath = re.sub(r'//+', '/', zkpath)
547 self.zk = KazooClient(
548 hosts=scott_secrets.ZOOKEEPER_NODES,
551 keyfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
552 keyfile_password=scott_secrets.ZOOKEEPER_CLIENT_PASS,
553 certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT,
557 if len(data) > 1024 * 1024:
558 raise Exception(f'Saved args are too large! ({len(data)} bytes)')
559 if not self.zk.exists(zkpath):
560 self.zk.create(zkpath, data)
561 self.saved_messages.append(
562 f'Just created {zkpath}; setting its max_version to 0'
564 self.max_version[zkpath] = 0
566 meta = self.zk.set(zkpath, data)
567 self.saved_messages.append(
568 f'Setting {zkpath}\'s max_version to {meta.version}'
570 self.max_version[zkpath] = meta.version
571 except Exception as e:
572 raise Exception(f'Failed to create zookeeper path {zkpath}') from e
573 self.saved_messages.append(f'Saved config to zookeeper in {zkpath}')
575 with open(savefile, 'w') as wf:
578 # Also dump the config on stderr if requested.
579 if config['config_dump']:
582 self.config_parse_called = True
583 if config['config_exit_after_parse']:
584 print("Exiting because of --config_exit_after_parse.")
590 def has_been_parsed(self) -> bool:
591 """Returns True iff the global config has already been parsed"""
592 return self.config_parse_called
594 def late_logging(self):
595 """Log messages saved earlier now that logging has been initialized."""
596 logger = logging.getLogger(__name__)
597 logger.debug('Original commandline was: %s', ORIG_ARGV)
598 for _ in self.saved_messages:
602 # A global singleton instance of the Config class.
605 # A lot of client code uses config.config['whatever'] to lookup
606 # configuration so to preserve this we make this, config.config, with
607 # a __getitem__ method on it.
610 # Config didn't use to be a class; it was a mess of module-level
611 # functions and data. The functions below preserve the old interface
612 # so that existing clients do not need to be changed. As you can see,
613 # they mostly just thunk into the config class.
616 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
617 """Create a new context for arguments and return a handle. An alias
618 for config.config.add_commandline_args.
621 title: A title for your module's commandline arguments group.
622 description: A helpful description of your module.
625 An argparse._ArgumentGroup to be populated by the caller.
627 return CONFIG.add_commandline_args(title, description)
630 def parse(entry_module: Optional[str]) -> Dict[str, Any]:
631 """Main program should call this early in main(). Note that the
632 :code:`bootstrap.initialize` wrapper takes care of this automatically.
633 This should only be called once per program invocation. Subsequent
634 calls do not reparse the configuration settings but rather just
635 return the current state.
637 return CONFIG.parse(entry_module)
640 def has_been_parsed() -> bool:
641 """Returns True iff the global config has already been parsed"""
642 return CONFIG.has_been_parsed()
645 def late_logging() -> None:
646 """Log messages saved earlier now that logging has been initialized."""
647 CONFIG.late_logging()
650 def dump_config() -> None:
651 """Print the current config to stdout."""
655 def overwrite_argparse_epilog(msg: str) -> None:
656 """Allows your code to override the default epilog created by
660 msg: The epilog message to substitute for the default.
662 Config.overwrite_argparse_epilog(msg)
665 def is_flag_already_in_argv(var: str) -> bool:
666 """Returns true if a particular flag is passed on the commandline
670 var: The flag to search for.
672 return Config.is_flag_already_in_argv(var)
675 def print_usage() -> None:
676 """Prints the normal help usage message out."""
683 program usage help text as a string.
685 return Config.usage()