3 # © Copyright 2021-2022, Scott Gasch
5 """Global configuration driven by commandline arguments, environment variables,
6 saved configuration files, and zookeeper-based dynamic configurations. This
7 works across several modules.
13 from pyutils import config
15 parser = config.add_commandline_args(
17 "Args related to module doing the thing.",
20 "--module_do_the_thing",
23 help="Should the module do the thing?"
28 from pyutils import config
30 parser = config.add_commandline_args(
32 "A program that does the thing.",
38 help="Should we really do the thing?"
42 config.parse() # Very important, this must be invoked!
44 If you set this up and remember to invoke config.parse(), all commandline
45 arguments will play nicely together. This is done automatically for you
46 if you're using the :meth:`bootstrap.initialize` decorator on
47 your program's entry point. See :meth:`python_modules.bootstrap.initialize`
50 from pyutils import bootstrap
56 if __name__ == '__main__':
59 Either way, you'll get this behavior from the commandline::
63 [--module_do_the_thing MODULE_DO_THE_THING]
67 Args related to module doing the thing.
69 --module_do_the_thing MODULE_DO_THE_THING
70 Should the module do the thing?
73 A program that does the thing
76 Should we really do the thing?
78 Arguments themselves should be accessed via
79 :code:`config.config['arg_name']`. e.g.::
81 if not config.config['dry_run']:
91 from typing import Any, Dict, List, Optional, Tuple
93 # This module is commonly used by others in here and should avoid
94 # taking any unnecessary dependencies back on them.
96 # Make a copy of the original program arguments immediately upon module load.
97 PROGRAM_NAME: str = os.path.basename(sys.argv[0])
98 ORIG_ARGV: List[str] = sys.argv.copy()
101 class OptionalRawFormatter(argparse.HelpFormatter):
102 """This formatter has the same bahavior as the normal argparse text
103 formatter except when the help text of an argument begins with
104 "RAW|". In that case, the line breaks are preserved and the text
107 Use this, for example, when you need the helptext of an argument
108 to have its spacing preserved exactly, e.g.::
114 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
116 help='''RAW|Our mode of operation. One of:
118 PLAY = play wordle with me! Pick a random solution or
119 specify a solution with --template.
121 CHEAT = given a --template and, optionally, --letters_in_word
122 and/or --letters_to_avoid, return the best guess word;
124 AUTOPLAY = given a complete word in --template, guess it step
125 by step showing work;
127 SELFTEST = autoplay every possible solution keeping track of
128 wins/losses and average number of guesses;
130 PRECOMPUTE = populate hash table with optimal guesses.
135 def _split_lines(self, text, width):
136 if text.startswith('RAW|'):
137 return text[4:].splitlines()
138 return argparse.HelpFormatter._split_lines(self, text, width)
141 # A global argparser that we will collect arguments in. Each module (including
142 # us) will add arguments to a separate argument group.
143 ARGS = argparse.ArgumentParser(
145 formatter_class=OptionalRawFormatter,
146 fromfile_prefix_chars="@",
147 epilog=f'{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.',
148 # I don't fully understand why but when loaded by sphinx sometimes
149 # the same module is loaded many times causing any arguments it
150 # registers via module-level code to be redefined. Work around
151 # this iff the program is 'sphinx-build'
152 conflict_handler='resolve' if PROGRAM_NAME == 'sphinx-build' else 'error',
155 # Arguments specific to config.py. Other users should get their own group by
156 # invoking config.add_commandline_args.
157 GROUP = ARGS.add_argument_group(
158 f'Global Config ({__file__})',
159 'Args that control the global config itself; how meta!',
165 help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to those passed via the commandline. Note that if the given path begins with "zk:" then it is interpreted as a zookeeper path instead of as a filesystem path. When loading config from zookeeper, any argument with the string "dynamic" in the name (e.g. --module_dynamic_url) may be modified at runtime by changes made to zookeeper (using --config_savefile=zk:path). You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
171 help='Display the global configuration (possibly derived from multiple sources) on STDERR at program startup time.',
178 help='Populate a config file (compatible with --config_loadfile) with the given path for later use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:path) at startup time will see their configuration dynamically updated; flags with "dynamic" in their names (e.g. --my_dynamic_flag) may have their values changed. You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
181 '--config_rejects_unrecognized_arguments',
184 help='If present, config will raise an exception if it doesn\'t recognize an argument. The default behavior is to ignore unknown arguments so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.',
187 '--config_exit_after_parse',
190 help='If present, halt the program after parsing config. Useful, for example, to write a --config_savefile and then terminate.',
196 Everything in the config module used to be module-level functions and
197 variables but it made the code ugly and harder to maintain. Now, this
198 class does the heavy lifting. We still rely on some globals, though:
200 ARGS and GROUP to interface with argparse
201 PROGRAM_NAME stores argv[0] close to program invocation
202 ORIG_ARGV stores the original argv list close to program invocation
203 CONFIG and config: hold the (singleton) instance of this class.
208 # Has our parse() method been invoked yet?
209 self.config_parse_called = False
211 # A configuration dictionary that will contain parsed
212 # arguments. This is the data that is most interesting to our
213 # callers as it will hold the configuration result.
214 self.config: Dict[str, Any] = {}
216 # Defer logging messages until later when logging has been
218 self.saved_messages: List[str] = []
220 # A zookeeper client that is lazily created so as to not incur
221 # the latency of connecting to zookeeper for programs that are
222 # not reading or writing their config data into zookeeper.
223 self.zk: Optional[Any] = None
225 # Per known zk file, what is the max version we have seen?
226 self.max_version: Dict[str, int] = {}
228 def __getitem__(self, key: str) -> Optional[Any]:
229 """If someone uses []'s on us, pass it onto self.config."""
230 return self.config.get(key, None)
232 def __setitem__(self, key: str, value: Any) -> None:
233 self.config[key] = value
235 def __contains__(self, key: str) -> bool:
236 return key in self.config
238 def get(self, key: str, default: Any = None) -> Optional[Any]:
239 return self.config.get(key, default)
242 def add_commandline_args(
243 title: str, description: str = ""
244 ) -> argparse._ArgumentGroup:
245 """Create a new context for arguments and return a handle.
248 title: A title for your module's commandline arguments group.
249 description: A helpful description of your module.
252 An argparse._ArgumentGroup to be populated by the caller.
254 return ARGS.add_argument_group(title, description)
257 def overwrite_argparse_epilog(msg: str) -> None:
258 """Allows your code to override the default epilog created by
262 msg: The epilog message to substitute for the default.
267 def is_flag_already_in_argv(var: str) -> bool:
268 """Returns true if a particular flag is passed on the commandline
272 var: The flag to search for.
280 def print_usage() -> None:
281 """Prints the normal help usage message out."""
288 program usage help text as a string.
290 return ARGS.format_usage()
293 def _reorder_arg_action_groups_before_help(entry_module: Optional[str]):
294 """Internal. Used to reorder the arguments before dumping out a
295 generated help string such that the main program's arguments come
299 reordered_action_groups = []
300 for grp in ARGS._action_groups:
301 if entry_module is not None and entry_module in grp.title: # type: ignore
302 reordered_action_groups.append(grp)
303 elif PROGRAM_NAME in GROUP.title: # type: ignore
304 reordered_action_groups.append(grp)
306 reordered_action_groups.insert(0, grp)
307 return reordered_action_groups
310 def _parse_arg_into_env(arg: str) -> Optional[Tuple[str, str, List[str]]]:
311 """Internal helper to parse commandling args into environment vars."""
313 if not arg.startswith('['):
316 if not arg.endswith(']'):
326 # Environment vars the same as flag names without
327 # the initial -'s and in UPPERCASE.
331 return var, env, chunks
334 def _to_bool(in_str: str) -> bool:
337 in_str: the string to convert to boolean
340 A boolean equivalent of the original string based on its contents.
341 All conversion is case insensitive. A positive boolean (True) is
342 returned if the string value is any of the following:
351 Otherwise False is returned.
371 return in_str.lower() in ("true", "1", "yes", "y", "t", "on")
373 def _augment_sys_argv_from_environment_variables(self):
374 """Internal. Look at the system environment for variables that match
375 commandline arg names. This is done via some munging such that:
377 :code:`--argument_to_match`
381 :code:`ARGUMENT_TO_MATCH`
383 This allows users to set args via shell environment variables
384 in lieu of passing them on the cmdline.
387 usage_message = Config.usage()
391 # Foreach valid optional commandline option (chunk) generate
392 # its analogous environment variable.
393 for chunk in usage_message.split():
400 _ = Config._parse_arg_into_env(arg)
403 if env in os.environ:
404 if not Config.is_flag_already_in_argv(var):
405 value = os.environ[env]
406 self.saved_messages.append(
407 f'Initialized from environment: {var} = {value}'
409 if len(chunks) == 1 and Config._to_bool(value):
411 elif len(chunks) > 1:
413 sys.argv.append(value)
416 def _process_dynamic_args(self, event):
417 """Invoked as a callback when a zk-based config changed."""
421 logger = logging.getLogger(__name__)
423 contents, meta = self.zk.get(event.path, watch=self._process_dynamic_args)
424 logger.debug('Update for %s at version=%d.', event.path, meta.version)
426 'Max known version for %s is %d.',
428 self.max_version.get(event.path, 0),
430 except Exception as e:
431 raise Exception('Error reading data from zookeeper') from e
433 # Make sure we process changes in order.
434 if meta.version > self.max_version.get(event.path, 0):
435 self.max_version[event.path] = meta.version
436 contents = contents.decode()
438 for arg in contents.split():
440 # Our rule is that arguments must contain the word
441 # 'dynamic' if we are going to allow them to change at
442 # runtime as a signal that the programmer is expecting
445 temp_argv.append(arg)
446 logger.info("Updating %s from zookeeper async config change.", arg)
448 if len(temp_argv) > 0:
451 known, _ = ARGS.parse_known_args()
453 self.config.update(vars(known))
455 def _read_config_from_zookeeper(self, zkpath: str) -> Optional[str]:
456 from pyutils import zookeeper
458 if not zkpath.startswith('/config/'):
459 zkpath = '/config/' + zkpath
460 zkpath = re.sub(r'//+', '/', zkpath)
464 self.zk = zookeeper.get_started_zk_client()
465 if not self.zk.exists(zkpath):
468 # Note: we're putting a watch on this config file. Our
469 # _process_dynamic_args routine will be called to reparse
470 # args when/if they change.
471 contents, meta = self.zk.get(zkpath, watch=self._process_dynamic_args)
472 contents = contents.decode()
473 self.saved_messages.append(
474 f'Setting {zkpath}\'s max_version to {meta.version}'
476 self.max_version[zkpath] = meta.version
477 self.saved_messages.append(f'Read config from zookeeper {zkpath}.')
479 except Exception as e:
480 self.saved_messages.append(
481 f'Failed to read {zkpath} from zookeeper: exception {e}'
485 def _read_config_from_disk(self, filepath: str) -> Optional[str]:
486 if not os.path.exists(filepath):
488 with open(filepath, 'r') as rf:
489 self.saved_messages.append(f'Read config from disk file {filepath}')
492 def _augment_sys_argv_from_loadfile(self):
493 """Internal. Augment with arguments persisted in a saved file."""
495 # Check for --config_loadfile in the args manually; argparse isn't
496 # invoked yet and can't be yet.
498 saw_other_args = False
499 grab_next_arg = False
500 for arg in sys.argv[1:]:
501 if 'config_loadfile' in arg:
502 pieces = arg.split('=')
510 saw_other_args = True
512 if not loadfile or len(loadfile) == 0:
515 # Get contents from wherever.
517 if loadfile[:3] == 'zk:':
518 contents = self._read_config_from_zookeeper(loadfile[3:])
520 contents = self._read_config_from_disk(loadfile)
524 msg = f'Augmenting commandline arguments with those from {loadfile}.'
526 msg = f'Reading commandline arguments from {loadfile}.'
527 print(msg, file=sys.stderr)
528 self.saved_messages.append(msg)
530 msg = f'Failed to read/parse contents from {loadfile}'
531 print(msg, file=sys.stderr)
532 self.saved_messages.append(msg)
535 # Augment args with new ones.
538 for arg in contents.split('\n')
539 if 'config_savefile' not in arg
543 def dump_config(self):
544 """Print the current config to stdout."""
545 print("Global Configuration:", file=sys.stderr)
546 pprint.pprint(self.config, stream=sys.stderr)
549 def _write_config_to_disk(self, data: str, filepath: str) -> None:
550 with open(filepath, 'w') as wf:
553 def _write_config_to_zookeeper(self, data: str, zkpath: str) -> None:
554 if not zkpath.startswith('/config/'):
555 zkpath = '/config/' + zkpath
556 zkpath = re.sub(r'//+', '/', zkpath)
559 from pyutils import zookeeper
561 self.zk = zookeeper.get_started_zk_client()
562 encoded_data = data.encode()
563 if len(encoded_data) > 1024 * 1024:
565 f'Saved args are too large ({len(encoded_data)} bytes exceeds zk limit)'
567 if not self.zk.exists(zkpath):
568 self.zk.create(zkpath, encoded_data)
569 self.saved_messages.append(
570 f'Just created {zkpath}; setting its max_version to 0'
572 self.max_version[zkpath] = 0
574 meta = self.zk.set(zkpath, encoded_data)
575 self.saved_messages.append(
576 f'Setting {zkpath}\'s max_version to {meta.version}'
578 self.max_version[zkpath] = meta.version
579 except Exception as e:
580 raise Exception(f'Failed to create zookeeper path {zkpath}') from e
581 self.saved_messages.append(f'Saved config to zookeeper in {zkpath}')
583 def parse(self, entry_module: Optional[str]) -> Dict[str, Any]:
584 """Main program should call this early in main(). Note that the
585 :code:`bootstrap.initialize` wrapper takes care of this automatically.
586 This should only be called once per program invocation.
589 if self.config_parse_called:
592 # If we're about to do the usage message dump, put the main
593 # module's argument group last in the list (if possible) so that
594 # when the user passes -h or --help, it will be visible on the
595 # screen w/o scrolling.
597 if arg in ('--help', '-h'):
598 if entry_module is not None:
599 entry_module = os.path.basename(entry_module)
600 ARGS._action_groups = Config._reorder_arg_action_groups_before_help(
604 # Examine the environment for variables that match known flags.
605 # For a flag called --example_flag the corresponding environment
606 # variable would be called EXAMPLE_FLAG. If found, hackily add
607 # these into sys.argv to be parsed.
608 self._augment_sys_argv_from_environment_variables()
610 # Look for loadfile and read/parse it if present. This also
611 # works by jamming these values onto sys.argv.
612 self._augment_sys_argv_from_loadfile()
614 # Parse (possibly augmented, possibly completely overwritten)
615 # commandline args with argparse normally and populate config.
616 known, unknown = ARGS.parse_known_args()
617 self.config.update(vars(known))
619 # Reconstruct the argv with unrecognized flags for the benefit of
620 # future argument parsers. For example, unittest_main in python
621 # has some of its own flags. If we didn't recognize it, maybe
624 if config['config_rejects_unrecognized_arguments']:
626 f'Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting.'
628 self.saved_messages.append(
629 f'Config encountered unrecognized commandline arguments: {unknown}'
631 sys.argv = sys.argv[:1] + unknown
633 # Check for savefile and populate it if requested.
634 savefile = config['config_savefile']
635 if savefile and len(savefile) > 0:
636 data = '\n'.join(ORIG_ARGV[1:])
637 if savefile[:3] == 'zk:':
638 self._write_config_to_zookeeper(savefile[3:], data)
640 self._write_config_to_disk(savefile, data)
642 # Also dump the config on stderr if requested.
643 if config['config_dump']:
646 self.config_parse_called = True
647 if config['config_exit_after_parse']:
648 print("Exiting because of --config_exit_after_parse.")
654 def has_been_parsed(self) -> bool:
655 """Returns True iff the global config has already been parsed"""
656 return self.config_parse_called
658 def late_logging(self):
659 """Log messages saved earlier now that logging has been initialized."""
660 logger = logging.getLogger(__name__)
661 logger.debug('Original commandline was: %s', ORIG_ARGV)
662 for _ in self.saved_messages:
666 # A global singleton instance of the Config class.
669 # A lot of client code uses config.config['whatever'] to lookup
670 # configuration so to preserve this we make this, config.config, with
671 # a __getitem__ method on it.
674 # Config didn't use to be a class; it was a mess of module-level
675 # functions and data. The functions below preserve the old interface
676 # so that existing clients do not need to be changed. As you can see,
677 # they mostly just thunk into the config class.
680 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
681 """Create a new context for arguments and return a handle. An alias
682 for config.config.add_commandline_args.
685 title: A title for your module's commandline arguments group.
686 description: A helpful description of your module.
689 An argparse._ArgumentGroup to be populated by the caller.
691 return CONFIG.add_commandline_args(title, description)
694 def parse(entry_module: Optional[str]) -> Dict[str, Any]:
695 """Main program should call this early in main(). Note that the
696 :code:`bootstrap.initialize` wrapper takes care of this automatically.
697 This should only be called once per program invocation. Subsequent
698 calls do not reparse the configuration settings but rather just
699 return the current state.
701 return CONFIG.parse(entry_module)
704 def has_been_parsed() -> bool:
705 """Returns True iff the global config has already been parsed"""
706 return CONFIG.has_been_parsed()
709 def late_logging() -> None:
710 """Log messages saved earlier now that logging has been initialized."""
711 CONFIG.late_logging()
714 def dump_config() -> None:
715 """Print the current config to stdout."""
719 def overwrite_argparse_epilog(msg: str) -> None:
720 """Allows your code to override the default epilog created by
724 msg: The epilog message to substitute for the default.
726 Config.overwrite_argparse_epilog(msg)
729 def is_flag_already_in_argv(var: str) -> bool:
730 """Returns true if a particular flag is passed on the commandline
734 var: The flag to search for.
736 return Config.is_flag_already_in_argv(var)
739 def print_usage() -> None:
740 """Prints the normal help usage message out."""
747 program usage help text as a string.
749 return Config.usage()