3 # © Copyright 2021-2022, Scott Gasch
5 """Global program configuration driven by commandline arguments and,
6 optionally, from saved (local or Zookeeper) configuration files... with
7 optional support for dynamic arguments (i.e. that can change during runtime).
9 Let's start with an example of how to use :py:mod:`pyutils.config`. It's
10 pretty easy for normal commandline arguments because it wraps :py:mod:`argparse`
11 (see https://docs.python.org/3/library/argparse.html):
15 from pyutils import config
17 # Call add_commandline_args to get an argparse.ArgumentParser
18 # for file.py. Each file uses a separate ArgumentParser
19 # chained off the main namespace.
20 parser = config.add_commandline_args(
22 "Args related to module doing the thing.",
25 # Then simply add argparse-style arguments to it, as usual.
27 "--module_do_the_thing",
30 help="Should the module do the thing?"
35 from pyutils import config
37 # main.py may have some arguments of its own, so add them.
38 parser = config.add_commandline_args(
40 "A program that does the thing.",
46 help="Should we really do the thing?"
50 config.parse() # Then remember to call config.parse() early on.
52 If you set this up and remember to invoke :py:meth:`pyutils.config.parse`,
53 all commandline arguments will play nicely together across all modules / files
54 in your program automatically. Argparse help messages will group flags by
57 If you use :py:meth:`pyutils.bootstrap.initialize`, a decorator that can
58 optionally wrap your program's entry point, it will remember to call
59 :py:meth:`pyutils.config.parse` for you so you can omit the last part.
60 That looks like this::
62 from pyutils import bootstrap
68 if __name__ == '__main__':
71 Either way, you'll get an aggregated usage message along with flags broken
72 down per file in help::
76 [--module_do_the_thing MODULE_DO_THE_THING]
80 Args related to module doing the thing.
82 --module_do_the_thing MODULE_DO_THE_THING
83 Should the module do the thing?
86 A program that does the thing
89 Should we really do the thing?
91 Once :py:meth:`pyutils.config.parse` has been called (either automatically
92 by :py:mod:`puytils.bootstrap` or manually, the program configuration
93 state is ready in a dict-like object called `config.config`. For example,
94 to check the state of the `--dry_run` flag::
96 if not config.config['dry_run']:
99 Using :py:mod:`pyutils.config` allows you to "save" and "load" whole
100 sets of commandline arguments using the `--config_savefile` and the
101 `--config_loadfile` arguments. The former saves all arguments (other than
102 itself) to an ascii file whose path you provide. The latter reads all
103 arguments from an ascii file whose path you provide.
105 Saving and loading sets of arguments can make complex operations easier
106 to set up. They also allows for dynamic arguments.
108 If you use Apache Zookeeper, you can prefix paths to
109 `--config_savefile` and `--config_loadfile` with the string "zk:"
110 to cause the path to be interpreted as a Zookeeper path rather
111 than one on the local filesystem. When loading arguments from
112 Zookeeker, the :py:mod:`pyutils.config` code registers a listener
113 to be notified on state change (e.g. when some other instance
114 overwrites your Zookeeper based configuration). Listeners then
115 dynamically update the value of any flag in the `config.config`
116 dict whose name contains the string "dynamic". So, for example,
117 the `--dynamic_database_connect_string` argument would be
118 modifiable at runtime when using Zookeeper based configurations.
119 Flags that do not contain the string "dynamic" will not change.
120 And nothing is dynamic unless we're reading configuration from
123 For more information about Zookeeper, see https://zookeeper.apache.org/.
132 from typing import Any, Dict, List, Optional
134 # This module is commonly used by others in here and should avoid
135 # taking any unnecessary dependencies back on them.
137 # Make a copy of the original program arguments immediately upon module load.
138 PROGRAM_NAME: str = os.path.basename(sys.argv[0])
139 ORIG_ARGV: List[str] = sys.argv.copy()
142 class OptionalRawFormatter(argparse.HelpFormatter):
143 """This formatter has the same bahavior as the normal argparse
144 text formatter except when the help text of an argument begins
145 with "RAW|". In that case, the line breaks are preserved and the
146 text is not wrapped. It is enabled automatically if you use
147 :py:mod:`pyutils.config`.
149 Use this by prepending "RAW|" in your help message to disable
150 word wrapping and indicate that the help message is already
151 formatted and should be preserved. Here's an example usage::
157 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
159 help='''RAW|Our mode of operation. One of:
161 PLAY = play wordle with me! Pick a random solution or
162 specify a solution with --template.
164 CHEAT = given a --template and, optionally, --letters_in_word
165 and/or --letters_to_avoid, return the best guess word;
167 AUTOPLAY = given a complete word in --template, guess it step
168 by step showing work;
170 SELFTEST = autoplay every possible solution keeping track of
171 wins/losses and average number of guesses;
173 PRECOMPUTE = populate hash table with optimal guesses.
179 def _split_lines(self, text, width):
180 if text.startswith("RAW|"):
181 return text[4:].splitlines()
182 return argparse.HelpFormatter._split_lines(self, text, width)
185 # A global argparser that we will collect arguments in. Each module (including
186 # us) will add arguments to a separate argument group.
187 ARGS = argparse.ArgumentParser(
189 formatter_class=OptionalRawFormatter,
190 fromfile_prefix_chars="@",
191 epilog=f"{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.",
192 # I don't fully understand why but when loaded by sphinx sometimes
193 # the same module is loaded many times causing any arguments it
194 # registers via module-level code to be redefined. Work around
195 # this iff the program is 'sphinx-build'
196 conflict_handler="resolve" if PROGRAM_NAME == "sphinx-build" else "error",
199 # Arguments specific to config.py. Other users should get their own group by
200 # invoking config.add_commandline_args.
201 GROUP = ARGS.add_argument_group(
202 f"Global Config ({__file__})",
203 "Args that control the global config itself; how meta!",
209 help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to those passed via the commandline. Note that if the given path begins with "zk:" then it is interpreted as a zookeeper path instead of as a filesystem path. When loading config from zookeeper, any argument with the string "dynamic" in the name (e.g. --module_dynamic_url) may be modified at runtime by changes made to zookeeper (using --config_savefile=zk:path). You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
215 help="Display the global configuration (possibly derived from multiple sources) on STDERR at program startup time.",
222 help='Populate a config file (compatible with --config_loadfile) and write it at the given path for later [re]use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a local filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:<path>) will see the update. Those that also enabled --config_allow_dynamic_updates will change the value of any flags with the string "dynamic" in their names (e.g. --my_dynamic_flag or --dynamic_database_connect_string).',
225 "--config_allow_dynamic_updates",
228 help='If enabled, allow config flags with the string "dynamic" in their names to change at runtime when a new Zookeeper based configuration is created. See the --config_savefile help message for more information about this option.',
231 "--config_rejects_unrecognized_arguments",
234 help="If present, config will raise an exception if it doesn't recognize an argument. The default behavior is to ignore unknown arguments so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.",
237 "--config_exit_after_parse",
240 help="If present, halt the program after parsing config. Useful, for example, to write a --config_savefile and then terminate.",
248 Do not instantiate this class directly; it is meant to be a
249 global singleton called `pyutils.config.CONFIG`. Instead, use
250 :py:meth:`pyutils.config.add_commandline_args` to get an
251 `ArgumentGroup` and add your arguments to it. Then call
252 :py:meth:`pyutils.config.parse` to parse global configuration
253 from your main program entry point.
255 Everything in the config module used to be module-level functions and
256 variables but it made the code ugly and harder to maintain. Now, this
257 class does the heavy lifting. We still rely on some globals, though:
259 - ARGS and GROUP to interface with argparse
260 - PROGRAM_NAME stores argv[0] close to program invocation
261 - ORIG_ARGV stores the original argv list close to program invocation
262 - CONFIG and config: hold the (singleton) instance of this class.
266 # Has our parse() method been invoked yet?
267 self.config_parse_called = False
269 # A configuration dictionary that will contain parsed
270 # arguments. This is the data that is most interesting to our
271 # callers as it will hold the configuration result.
272 self.config: Dict[str, Any] = {}
274 # Defer logging messages until later when logging has been
276 self.saved_messages: List[str] = []
278 # A zookeeper client that is lazily created so as to not incur
279 # the latency of connecting to zookeeper for programs that are
280 # not reading or writing their config data into zookeeper.
281 self.zk: Optional[Any] = None
283 # Per known zk file, what is the max version we have seen?
284 self.max_version: Dict[str, int] = {}
286 # The argv after parsing known args.
287 self.parsed_argv: Optional[List[str]] = None
289 def __getitem__(self, key: str) -> Optional[Any]:
290 """If someone uses []'s on us, pass it onto self.config."""
291 return self.config.get(key, None)
293 def __setitem__(self, key: str, value: Any) -> None:
294 self.config[key] = value
296 def __contains__(self, key: str) -> bool:
297 return key in self.config
299 def get(self, key: str, default: Any = None) -> Optional[Any]:
300 return self.config.get(key, default)
303 def add_commandline_args(
304 title: str, description: str = ""
305 ) -> argparse._ArgumentGroup:
306 """Create a new context for arguments and return an ArgumentGroup
307 to the caller for module-level population.
310 title: A title for your module's commandline arguments group.
311 description: A helpful description of your module.
314 An argparse._ArgumentGroup to be populated by the caller.
316 return ARGS.add_argument_group(title, description)
319 def overwrite_argparse_epilog(msg: str) -> None:
320 """Allows your code to override the default epilog created by
324 msg: The epilog message to substitute for the default.
329 def is_flag_already_in_argv(var: str) -> bool:
332 True if a particular flag is passed on the commandline
336 var: The flag to search for.
344 def print_usage() -> None:
345 """Prints the normal help usage message out."""
352 program usage help text as a string.
354 return ARGS.format_usage()
357 def _reorder_arg_action_groups_before_help(entry_module: Optional[str]):
358 """Internal. Used to reorder the arguments before dumping out a
359 generated help string such that the main program's arguments come
363 reordered_action_groups = []
364 for grp in ARGS._action_groups:
365 if entry_module is not None and entry_module in grp.title: # type: ignore
366 reordered_action_groups.append(grp)
367 elif PROGRAM_NAME in GROUP.title: # type: ignore
368 reordered_action_groups.append(grp)
370 reordered_action_groups.insert(0, grp)
371 return reordered_action_groups
374 def _to_bool(in_str: str) -> bool:
377 in_str: the string to convert to boolean
380 A boolean equivalent of the original string based on its contents.
381 All conversion is case insensitive. A positive boolean (True) is
382 returned if the string value is any of the following:
391 Otherwise False is returned.
411 return in_str.lower() in ("true", "1", "yes", "y", "t", "on")
413 def _process_dynamic_args(self, event):
414 """Invoked as a callback when a zk-based config changed."""
418 logger = logging.getLogger(__name__)
420 contents, meta = self.zk.get(event.path, watch=self._process_dynamic_args)
421 logger.debug("Update for %s at version=%d.", event.path, meta.version)
423 "Max known version for %s is %d.",
425 self.max_version.get(event.path, 0),
427 except Exception as e:
428 raise Exception("Error reading data from zookeeper") from e
430 # Make sure we process changes in order.
431 if meta.version > self.max_version.get(event.path, 0):
432 self.max_version[event.path] = meta.version
433 contents = contents.decode()
435 for arg in contents.split():
437 # Our rule is that arguments must contain the word
438 # 'dynamic' if we are going to allow them to change at
439 # runtime as a signal that the programmer is expecting
441 if "dynamic" in arg and config.config["config_allow_dynamic_updates"]:
442 temp_argv.append(arg)
443 logger.info("Updating %s from zookeeper async config change.", arg)
445 if len(temp_argv) > 0:
448 known, _ = ARGS.parse_known_args()
450 self.config.update(vars(known))
452 def _read_config_from_zookeeper(self, zkpath: str) -> Optional[str]:
453 from pyutils import zookeeper
455 if not zkpath.startswith("/config/"):
456 zkpath = "/config/" + zkpath
457 zkpath = re.sub(r"//+", "/", zkpath)
461 self.zk = zookeeper.get_started_zk_client()
462 if not self.zk.exists(zkpath):
465 # Note: we're putting a watch on this config file. Our
466 # _process_dynamic_args routine will be called to reparse
467 # args when/if they change.
468 contents, meta = self.zk.get(zkpath, watch=self._process_dynamic_args)
469 contents = contents.decode()
470 self.saved_messages.append(
471 f"Setting {zkpath}'s max_version to {meta.version}"
473 self.max_version[zkpath] = meta.version
474 self.saved_messages.append(f"Read config from zookeeper {zkpath}.")
476 except Exception as e:
477 self.saved_messages.append(
478 f"Failed to read {zkpath} from zookeeper: exception {e}"
482 def _read_config_from_disk(self, filepath: str) -> Optional[str]:
483 if not os.path.exists(filepath):
485 with open(filepath, "r") as rf:
486 self.saved_messages.append(f"Read config from disk file {filepath}")
489 def _augment_sys_argv_from_loadfile(self):
490 """Internal. Augment with arguments persisted in a saved file."""
492 # Check for --config_loadfile in the args manually; argparse isn't
493 # invoked yet and can't be yet.
495 saw_other_args = False
496 grab_next_arg = False
497 for arg in sys.argv[1:]:
498 if "config_loadfile" in arg:
499 pieces = arg.split("=")
507 saw_other_args = True
509 if not loadfile or len(loadfile) == 0:
512 # Get contents from wherever.
514 if loadfile[:3] == "zk:":
515 contents = self._read_config_from_zookeeper(loadfile[3:])
517 contents = self._read_config_from_disk(loadfile)
521 msg = f"Augmenting commandline arguments with those from {loadfile}."
523 msg = f"Reading commandline arguments from {loadfile}."
524 print(msg, file=sys.stderr)
525 self.saved_messages.append(msg)
527 msg = f"Failed to read/parse contents from {loadfile}"
528 print(msg, file=sys.stderr)
529 self.saved_messages.append(msg)
532 # Augment args with new ones.
535 for arg in contents.split("\n")
536 if "config_savefile" not in arg
540 def dump_config(self):
541 """Print the current config to stdout."""
542 print("Global Configuration:", file=sys.stderr)
543 pprint.pprint(self.config, stream=sys.stderr)
546 def _write_config_to_disk(self, data: str, filepath: str) -> None:
547 with open(filepath, "w") as wf:
550 def _write_config_to_zookeeper(self, data: str, zkpath: str) -> None:
551 if not zkpath.startswith("/config/"):
552 zkpath = "/config/" + zkpath
553 zkpath = re.sub(r"//+", "/", zkpath)
556 from pyutils import zookeeper
558 self.zk = zookeeper.get_started_zk_client()
559 encoded_data = data.encode()
560 if len(encoded_data) > 1024 * 1024:
562 f"Saved args are too large ({len(encoded_data)} bytes exceeds zk limit)"
564 if not self.zk.exists(zkpath):
565 self.zk.create(zkpath, encoded_data)
566 self.saved_messages.append(
567 f"Just created {zkpath}; setting its max_version to 0"
569 self.max_version[zkpath] = 0
571 meta = self.zk.set(zkpath, encoded_data)
572 self.saved_messages.append(
573 f"Setting {zkpath}'s max_version to {meta.version}"
575 self.max_version[zkpath] = meta.version
576 except Exception as e:
577 raise Exception(f"Failed to create zookeeper path {zkpath}") from e
578 self.saved_messages.append(f"Saved config to zookeeper in {zkpath}")
580 def parse(self, entry_module: Optional[str]) -> Dict[str, Any]:
581 """Main program should invoke this early in main(). Note that the
582 :py:meth:`pyutils.bootstrap.initialize` wrapper takes care of this automatically.
583 This should only be called once per program invocation.
586 entry_module: Optional string to ensure we understand which module
587 contains the program entry point. Determined heuristically if not
591 A dict containing the parsed program configuration. Note that this can
592 be safely ignored since it is also saved in `config.config` and may
593 be used directly using that identifier.
595 if self.config_parse_called:
598 # If we're about to do the usage message dump, put the main
599 # module's argument group last in the list (if possible) so that
600 # when the user passes -h or --help, it will be visible on the
601 # screen w/o scrolling. This just makes for a nicer --help screen.
603 if arg in ("--help", "-h"):
604 if entry_module is not None:
605 entry_module = os.path.basename(entry_module)
606 ARGS._action_groups = Config._reorder_arg_action_groups_before_help(
610 # Look for --config_loadfile argument and, if found, read/parse
611 # Note that this works by jamming values onto sys.argv; kinda ugly.
612 self._augment_sys_argv_from_loadfile()
614 # Parse (possibly augmented, possibly completely overwritten)
615 # commandline args with argparse normally and populate config.
616 known, unknown = ARGS.parse_known_args()
617 self.config.update(vars(known))
619 # Reconstruct the sys.argv with unrecognized flags for the
620 # benefit of future argument parsers. For example,
621 # unittest_main in python has some of its own flags. If we
622 # didn't recognize it, maybe someone else will. Or, if
623 # --config_rejects_unrecognized_arguments was passed, die
624 # if we have unknown arguments.
626 if config["config_rejects_unrecognized_arguments"]:
628 f"Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting."
630 self.saved_messages.append(
631 f"Config encountered unrecognized commandline arguments: {unknown}"
633 sys.argv = sys.argv[:1] + unknown
634 self.parsed_argv = sys.argv[:1] + unknown
636 # Check for savefile and populate it if requested.
637 savefile = config["config_savefile"]
638 if savefile and len(savefile) > 0:
639 data = "\n".join(ORIG_ARGV[1:])
640 if savefile[:3] == "zk:":
641 self._write_config_to_zookeeper(savefile[3:], data)
643 self._write_config_to_disk(savefile, data)
645 # Also dump the config on stderr if requested.
646 if config["config_dump"]:
649 # Finally, maybe exit now if the user passed
650 # --config_exit_after_parse indicating they want to just
651 # update a config file and halt.
652 self.config_parse_called = True
653 if config["config_exit_after_parse"]:
654 print("Exiting because of --config_exit_after_parse.")
660 def has_been_parsed(self) -> bool:
661 """Returns True iff the global config has already been parsed"""
662 return self.config_parse_called
664 def late_logging(self):
665 """Log messages saved earlier now that logging has been initialized."""
666 logger = logging.getLogger(__name__)
667 logger.debug("Original commandline was: %s", ORIG_ARGV)
668 for _ in self.saved_messages:
672 # A global singleton instance of the Config class.
675 # A lot of client code uses config.config['whatever'] to lookup
676 # configuration so to preserve this we make this, config.config, with
677 # a __getitem__ method on it.
680 # Config didn't use to be a class; it was a mess of module-level
681 # functions and data. The functions below preserve the old interface
682 # so that existing clients do not need to be changed. As you can see,
683 # they mostly just thunk into the config class.
686 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
687 """Create a new context for arguments and return a handle. An alias
688 for config.config.add_commandline_args.
691 title: A title for your module's commandline arguments group.
692 description: A helpful description of your module.
695 An argparse._ArgumentGroup to be populated by the caller.
697 return CONFIG.add_commandline_args(title, description)
700 def parse(entry_module: Optional[str]) -> Dict[str, Any]:
701 """Main program should call this early in main(). Note that the
702 :code:`bootstrap.initialize` wrapper takes care of this automatically.
703 This should only be called once per program invocation. Subsequent
704 calls do not reparse the configuration settings but rather just
705 return the current state.
707 return CONFIG.parse(entry_module)
710 def error(message: str, exit_code: int = 1) -> None:
712 Convenience method for indicating a configuration error.
714 logging.error(message)
715 print(message, file=sys.stderr)
719 def has_been_parsed() -> bool:
720 """Returns True iff the global config has already been parsed"""
721 return CONFIG.has_been_parsed()
724 def late_logging() -> None:
725 """Log messages saved earlier now that logging has been initialized."""
726 CONFIG.late_logging()
729 def dump_config() -> None:
730 """Print the current config to stdout."""
734 def argv_after_parse() -> Optional[List[str]]:
735 """Return the argv with all known arguments removed."""
736 if CONFIG.has_been_parsed:
737 return CONFIG.parsed_argv
741 def overwrite_argparse_epilog(msg: str) -> None:
742 """Allows your code to override the default epilog created by
746 msg: The epilog message to substitute for the default.
748 Config.overwrite_argparse_epilog(msg)
751 def is_flag_already_in_argv(var: str) -> bool:
752 """Returns true if a particular flag is passed on the commandline
756 var: The flag to search for.
758 return Config.is_flag_already_in_argv(var)
761 def print_usage() -> None:
762 """Prints the normal help usage message out."""
769 program usage help text as a string.
771 return Config.usage()