3 # © Copyright 2021-2023, Scott Gasch
5 """Global program configuration driven by commandline arguments and,
6 optionally, from saved (local or Zookeeper) configuration files... with
7 optional support for dynamic arguments (i.e. that can change during runtime).
9 Let's start with an example of how to use :py:mod:`pyutils.config`. It's
10 pretty easy for normal commandline arguments because it wraps :py:mod:`argparse`
11 (see https://docs.python.org/3/library/argparse.html):
15 from pyutils import config
17 # Call add_commandline_args to get an argparse.ArgumentParser
18 # for file.py. Each file uses a separate ArgumentParser
19 # chained off the main namespace.
20 parser = config.add_commandline_args(
22 "Args related to module doing the thing.",
25 # Then simply add argparse-style arguments to it, as usual.
27 "--module_do_the_thing",
30 help="Should the module do the thing?"
35 from pyutils import config
37 # main.py may have some arguments of its own, so add them.
38 parser = config.add_commandline_args(
40 "A program that does the thing.",
46 help="Should we really do the thing?"
50 config.parse() # Then remember to call config.parse() early on.
52 If you set this up and remember to invoke :py:meth:`pyutils.config.parse`,
53 all commandline arguments will play nicely together across all modules / files
54 in your program automatically. Argparse help messages will group flags by
57 If you use :py:meth:`pyutils.bootstrap.initialize`, a decorator that can
58 optionally wrap your program's entry point, it will remember to call
59 :py:meth:`pyutils.config.parse` for you so you can omit the last part.
60 That looks like this::
62 from pyutils import bootstrap
68 if __name__ == '__main__':
71 Either way, you'll get an aggregated usage message along with flags broken
72 down per file in help::
76 [--module_do_the_thing MODULE_DO_THE_THING]
80 Args related to module doing the thing.
82 --module_do_the_thing MODULE_DO_THE_THING
83 Should the module do the thing?
86 A program that does the thing
89 Should we really do the thing?
91 Once :py:meth:`pyutils.config.parse` has been called (either automatically
92 by :py:mod:`puytils.bootstrap` or manually, the program configuration
93 state is ready in a dict-like object called `config.config`. For example,
94 to check the state of the `--dry_run` flag::
96 if not config.config['dry_run']:
99 Using :py:mod:`pyutils.config` allows you to "save" and "load" whole
100 sets of commandline arguments using the `--config_savefile` and the
101 `--config_loadfile` arguments. The former saves all arguments (other than
102 itself) to an ascii file whose path you provide. The latter reads all
103 arguments from an ascii file whose path you provide.
105 Saving and loading sets of arguments can make complex operations easier
106 to set up. They also allows for dynamic arguments.
108 If you use Apache Zookeeper, you can prefix paths to
109 `--config_savefile` and `--config_loadfile` with the string "zk:"
110 to cause the path to be interpreted as a Zookeeper path rather
111 than one on the local filesystem. When loading arguments from
112 Zookeeker, the :py:mod:`pyutils.config` code registers a listener
113 to be notified on state change (e.g. when some other instance
114 overwrites your Zookeeper based configuration). Listeners then
115 dynamically update the value of any flag in the `config.config`
116 dict whose name contains the string "dynamic". So, for example,
117 the `--dynamic_database_connect_string` argument would be
118 modifiable at runtime when using Zookeeper based configurations.
119 Flags that do not contain the string "dynamic" will not change.
120 And nothing is dynamic unless we're reading configuration from
123 For more information about Zookeeper, see https://zookeeper.apache.org/.
132 from typing import Any, Dict, List, Optional
134 # This module is commonly used by others in here and should avoid
135 # taking any unnecessary dependencies back on them.
137 # Make a copy of the original program arguments immediately upon module load.
138 PROGRAM_NAME: str = os.path.basename(sys.argv[0])
139 ORIG_ARGV: List[str] = sys.argv.copy()
142 class OptionalRawFormatter(argparse.HelpFormatter):
143 """This formatter has the same bahavior as the normal argparse
144 text formatter except when the help text of an argument begins
145 with "RAW|". In that case, the line breaks are preserved and the
146 text is not wrapped. It is enabled automatically if you use
147 :py:mod:`pyutils.config`.
149 Use this by prepending "RAW|" in your help message to disable
150 word wrapping and indicate that the help message is already
151 formatted and should be preserved. Here's an example usage::
157 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
159 help='''RAW|Our mode of operation. One of:
161 PLAY = play wordle with me! Pick a random solution or
162 specify a solution with --template.
164 CHEAT = given a --template and, optionally, --letters_in_word
165 and/or --letters_to_avoid, return the best guess word;
167 AUTOPLAY = given a complete word in --template, guess it step
168 by step showing work;
170 SELFTEST = autoplay every possible solution keeping track of
171 wins/losses and average number of guesses;
173 PRECOMPUTE = populate hash table with optimal guesses.
179 def _split_lines(self, text, width):
180 if text.startswith("RAW|"):
181 return text[4:].splitlines()
182 return argparse.HelpFormatter._split_lines(self, text, width)
185 # A global argparser that we will collect arguments in. Each module (including
186 # us) will add arguments to a separate argument group.
187 ARGS = argparse.ArgumentParser(
189 formatter_class=OptionalRawFormatter,
190 fromfile_prefix_chars="@",
191 epilog=f"{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.",
192 # I don't fully understand why but when loaded by sphinx sometimes
193 # the same module is loaded many times causing any arguments it
194 # registers via module-level code to be redefined. Work around
195 # this iff the program is 'sphinx-build'
196 conflict_handler="resolve" if PROGRAM_NAME == "sphinx-build" else "error",
199 # Arguments specific to config.py. Other users should get their own group by
200 # invoking config.add_commandline_args.
201 GROUP = ARGS.add_argument_group(
202 f"Global Config ({__file__})",
203 "Args that control the global config itself; how meta!",
209 help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to those passed via the commandline. Note that if the given path begins with "zk:" then it is interpreted as a zookeeper path instead of as a filesystem path. When loading config from zookeeper, any argument with the string "dynamic" in the name (e.g. --module_dynamic_url) may be modified at runtime by changes made to zookeeper (using --config_savefile=zk:path). You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
215 help="Display the global configuration (possibly derived from multiple sources) on STDERR at program startup time.",
222 help='Populate a config file (compatible with --config_loadfile) and write it at the given path for later [re]use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a local filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:<path>) will see the update. Those that also enabled --config_allow_dynamic_updates will change the value of any flags with the string "dynamic" in their names (e.g. --my_dynamic_flag or --dynamic_database_connect_string).',
225 "--config_allow_dynamic_updates",
228 help='If enabled, allow config flags with the string "dynamic" in their names to change at runtime when a new Zookeeper based configuration is created. See the --config_savefile help message for more information about this option.',
231 "--config_rejects_unrecognized_arguments",
234 help="If present, config will raise an exception if it doesn't recognize an argument. The default behavior is to ignore unknown arguments so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.",
237 "--config_exit_after_parse",
240 help="If present, halt the program after parsing config. Useful, for example, to write a --config_savefile and then terminate.",
248 Do not instantiate this class directly; it is meant to be a
249 global singleton called `pyutils.config.CONFIG`. Instead, use
250 :py:meth:`pyutils.config.add_commandline_args` to get an
251 `ArgumentGroup` and add your arguments to it. Then call
252 :py:meth:`pyutils.config.parse` to parse global configuration
253 from your main program entry point.
255 Everything in the config module used to be module-level functions and
256 variables but it made the code ugly and harder to maintain. Now, this
257 class does the heavy lifting. We still rely on some globals, though:
259 - ARGS and GROUP to interface with argparse
260 - PROGRAM_NAME stores argv[0] close to program invocation
261 - ORIG_ARGV stores the original argv list close to program invocation
262 - CONFIG and config: hold the (singleton) instance of this class.
266 # Has our parse() method been invoked yet?
267 self.config_parse_called = False
269 # A configuration dictionary that will contain parsed
270 # arguments. This is the data that is most interesting to our
271 # callers as it will hold the configuration result.
272 self.config: Dict[str, Any] = {}
274 # Defer logging messages until later when logging has been
276 self.saved_messages: List[str] = []
278 # A zookeeper client that is lazily created so as to not incur
279 # the latency of connecting to zookeeper for programs that are
280 # not reading or writing their config data into zookeeper.
281 self.zk: Optional[Any] = None
283 # Per known zk file, what is the max version we have seen?
284 self.max_version: Dict[str, int] = {}
286 # The argv after parsing known args.
287 self.parsed_argv: Optional[List[str]] = None
289 def __getitem__(self, key: str) -> Optional[Any]:
290 """If someone uses []'s on us, pass it onto self.config."""
291 return self.config.get(key, None)
293 def __setitem__(self, key: str, value: Any) -> None:
294 self.config[key] = value
296 def __contains__(self, key: str) -> bool:
297 return key in self.config
299 def get(self, key: str, default: Any = None) -> Optional[Any]:
300 return self.config.get(key, default)
303 def add_commandline_args(
304 title: str, description: str = ""
305 ) -> argparse._ArgumentGroup:
306 """Create a new context for arguments and return an ArgumentGroup
307 to the caller for module-level population.
310 title: A title for your module's commandline arguments group.
311 description: A helpful description of your module.
314 An argparse._ArgumentGroup to be populated by the caller.
316 return ARGS.add_argument_group(title, description)
319 def overwrite_argparse_epilog(msg: str) -> None:
320 """Allows your code to override the default epilog created by
324 msg: The epilog message to substitute for the default.
329 def is_flag_already_in_argv(var: str) -> bool:
332 True if a particular flag is passed on the commandline
336 var: The flag to search for.
347 full program usage help text as a string.
349 return ARGS.format_help()
352 def short_usage() -> str:
355 program short usage text as a string.
357 return ARGS.format_usage()
360 def print_usage() -> None:
361 """Prints the full help usage message out."""
362 print(config.usage())
365 def print_short_usage() -> None:
366 """Prints a short usage/help message."""
367 print(config.short_usage())
370 def _reorder_arg_action_groups_before_help(entry_module: Optional[str]):
371 """Internal. Used to reorder the arguments before dumping out a
372 generated help string such that the main program's arguments come
376 reordered_action_groups = []
377 for grp in ARGS._action_groups:
378 if entry_module is not None and entry_module in grp.title: # type: ignore
379 reordered_action_groups.append(grp)
380 elif PROGRAM_NAME in GROUP.title: # type: ignore
381 reordered_action_groups.append(grp)
383 reordered_action_groups.insert(0, grp)
384 return reordered_action_groups
387 def _to_bool(in_str: str) -> bool:
390 in_str: the string to convert to boolean
393 A boolean equivalent of the original string based on its contents.
394 All conversion is case insensitive. A positive boolean (True) is
395 returned if the string value is any of the following:
404 Otherwise False is returned.
407 Exception: On error reading from zookeeper
427 return in_str.lower() in {"true", "1", "yes", "y", "t", "on"}
429 def _process_dynamic_args(self, event) -> None:
430 """Invoked as a callback when a zk-based config changed."""
434 logger = logging.getLogger(__name__)
436 contents, meta = self.zk.get(event.path, watch=self._process_dynamic_args)
437 logger.debug("Update for %s at version=%d.", event.path, meta.version)
439 "Max known version for %s is %d.",
441 self.max_version.get(event.path, 0),
443 except Exception as e:
444 raise Exception("Error reading data from zookeeper") from e
446 # Make sure we process changes in order.
447 if meta.version > self.max_version.get(event.path, 0):
448 self.max_version[event.path] = meta.version
449 contents = contents.decode()
451 for arg in contents.split():
453 # Our rule is that arguments must contain the word
454 # 'dynamic' if we are going to allow them to change at
455 # runtime as a signal that the programmer is expecting
457 if "dynamic" in arg and config.config["config_allow_dynamic_updates"]:
458 temp_argv.append(arg)
459 logger.info("Updating %s from zookeeper async config change.", arg)
464 known, _ = ARGS.parse_known_args()
466 self.config.update(vars(known))
468 def _read_config_from_zookeeper(self, zkpath: str) -> Optional[str]:
469 from pyutils import zookeeper
471 if not zkpath.startswith("/config/"):
472 zkpath = "/config/" + zkpath
473 zkpath = re.sub(r"//+", "/", zkpath)
477 self.zk = zookeeper.get_started_zk_client()
478 if not self.zk.exists(zkpath):
481 # Note: we're putting a watch on this config file. Our
482 # _process_dynamic_args routine will be called to reparse
483 # args when/if they change.
484 contents, meta = self.zk.get(zkpath, watch=self._process_dynamic_args)
485 contents = contents.decode()
486 self.saved_messages.append(
487 f"Setting {zkpath}'s max_version to {meta.version}"
489 self.max_version[zkpath] = meta.version
490 self.saved_messages.append(f"Read config from zookeeper {zkpath}.")
492 except Exception as e:
493 self.saved_messages.append(
494 f"Failed to read {zkpath} from zookeeper: exception {e}"
498 def _read_config_from_disk(self, filepath: str) -> Optional[str]:
499 if not os.path.exists(filepath):
501 with open(filepath, "r") as rf:
502 self.saved_messages.append(f"Read config from disk file {filepath}")
505 def _augment_sys_argv_from_loadfile(self):
506 """Internal. Augment with arguments persisted in a saved file."""
508 # Check for --config_loadfile in the args manually; argparse isn't
509 # invoked yet and can't be yet.
511 saw_other_args = False
512 grab_next_arg = False
513 for arg in sys.argv[1:]:
514 if "config_loadfile" in arg:
515 pieces = arg.split("=")
523 saw_other_args = True
528 # Get contents from wherever.
530 if loadfile[:3] == "zk:":
531 contents = self._read_config_from_zookeeper(loadfile[3:])
533 contents = self._read_config_from_disk(loadfile)
537 msg = f"Augmenting commandline arguments with those from {loadfile}."
539 msg = f"Reading commandline arguments from {loadfile}."
540 print(msg, file=sys.stderr)
541 self.saved_messages.append(msg)
543 msg = f"Failed to read/parse contents from {loadfile}"
544 print(msg, file=sys.stderr)
545 self.saved_messages.append(msg)
548 # Augment args with new ones.
551 for arg in contents.split("\n")
552 if "config_savefile" not in arg
556 def dump_config(self):
557 """Print the current config to stdout."""
558 print("Global Configuration:", file=sys.stderr)
559 pprint.pprint(self.config, stream=sys.stderr)
562 def _write_config_to_disk(self, data: str, filepath: str) -> None:
563 with open(filepath, "w") as wf:
566 def _write_config_to_zookeeper(self, data: str, zkpath: str) -> None:
567 if not zkpath.startswith("/config/"):
568 zkpath = "/config/" + zkpath
569 zkpath = re.sub(r"//+", "/", zkpath)
572 from pyutils import zookeeper
574 self.zk = zookeeper.get_started_zk_client()
575 encoded_data = data.encode()
576 if len(encoded_data) > 1024 * 1024:
578 f"Saved args are too large ({len(encoded_data)} bytes exceeds zk limit)"
580 if not self.zk.exists(zkpath):
581 self.zk.create(zkpath, encoded_data)
582 self.saved_messages.append(
583 f"Just created {zkpath}; setting its max_version to 0"
585 self.max_version[zkpath] = 0
587 meta = self.zk.set(zkpath, encoded_data)
588 self.saved_messages.append(
589 f"Setting {zkpath}'s max_version to {meta.version}"
591 self.max_version[zkpath] = meta.version
592 except Exception as e:
593 raise Exception(f"Failed to create zookeeper path {zkpath}") from e
594 self.saved_messages.append(f"Saved config to zookeeper in {zkpath}")
596 def parse(self, entry_module: Optional[str]) -> Dict[str, Any]:
597 """Main program should invoke this early in main(). Note that the
598 :py:meth:`pyutils.bootstrap.initialize` wrapper takes care of this automatically.
599 This should only be called once per program invocation.
602 entry_module: Optional string to ensure we understand which module
603 contains the program entry point. Determined heuristically if not
607 A dict containing the parsed program configuration. Note that this can
608 be safely ignored since it is also saved in `config.config` and may
609 be used directly using that identifier.
612 Exception: if unrecognized config argument(s) are detected and the
613 --config_rejects_unrecognized_arguments argument is enabled.
615 if self.config_parse_called:
618 # If we're about to do the usage message dump, put the main
619 # module's argument group last in the list (if possible) so that
620 # when the user passes -h or --help, it will be visible on the
621 # screen w/o scrolling. This just makes for a nicer --help screen.
623 if arg in {"--help", "-h"}:
624 if entry_module is not None:
625 entry_module = os.path.basename(entry_module)
626 ARGS._action_groups = Config._reorder_arg_action_groups_before_help(
630 # Look for --config_loadfile argument and, if found, read/parse
631 # Note that this works by jamming values onto sys.argv; kinda ugly.
632 self._augment_sys_argv_from_loadfile()
634 # Parse (possibly augmented, possibly completely overwritten)
635 # commandline args with argparse normally and populate config.
636 known, unknown = ARGS.parse_known_args()
637 self.config.update(vars(known))
639 # Reconstruct the sys.argv with unrecognized flags for the
640 # benefit of future argument parsers. For example,
641 # unittest_main in python has some of its own flags. If we
642 # didn't recognize it, maybe someone else will. Or, if
643 # --config_rejects_unrecognized_arguments was passed, die
644 # if we have unknown arguments.
646 if config["config_rejects_unrecognized_arguments"]:
648 f"Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting."
650 self.saved_messages.append(
651 f"Config encountered unrecognized commandline arguments: {unknown}"
653 sys.argv = sys.argv[:1] + unknown
654 self.parsed_argv = sys.argv[:1] + unknown
656 # Check for savefile and populate it if requested.
657 savefile = config["config_savefile"]
659 data = "\n".join(ORIG_ARGV[1:])
660 if savefile[:3] == "zk:":
661 self._write_config_to_zookeeper(savefile[3:], data)
663 self._write_config_to_disk(savefile, data)
665 # Also dump the config on stderr if requested.
666 if config["config_dump"]:
669 # Finally, maybe exit now if the user passed
670 # --config_exit_after_parse indicating they want to just
671 # update a config file and halt.
672 self.config_parse_called = True
673 if config["config_exit_after_parse"]:
674 print("Exiting because of --config_exit_after_parse.")
680 def has_been_parsed(self) -> bool:
681 """Returns True iff the global config has already been parsed"""
682 return self.config_parse_called
684 def late_logging(self):
685 """Log messages saved earlier now that logging has been initialized."""
686 logger = logging.getLogger(__name__)
687 logger.debug("Invocation commandline: %s", ORIG_ARGV)
688 for _ in self.saved_messages:
692 # A global singleton instance of the Config class.
695 # A lot of client code uses config.config['whatever'] to lookup
696 # configuration so to preserve this we make this, config.config, with
697 # a __getitem__ method on it.
700 # Config didn't use to be a class; it was a mess of module-level
701 # functions and data. The functions below preserve the old interface
702 # so that existing clients do not need to be changed. As you can see,
703 # they mostly just thunk into the config class.
706 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
707 """Create a new context for arguments and return a handle. An alias
708 for config.config.add_commandline_args.
711 title: A title for your module's commandline arguments group.
712 description: A helpful description of your module.
715 An argparse._ArgumentGroup to be populated by the caller.
717 return CONFIG.add_commandline_args(title, description)
720 def parse(entry_module: Optional[str]) -> Dict[str, Any]:
721 """Main program should call this early in main(). Note that the
722 :code:`bootstrap.initialize` wrapper takes care of this automatically.
723 This should only be called once per program invocation. Subsequent
724 calls do not reparse the configuration settings but rather just
725 return the current state.
727 return CONFIG.parse(entry_module)
730 def error(message: str, exit_code: int = 1) -> None:
732 Convenience method for indicating a configuration error.
734 logging.error(message)
735 print(message, file=sys.stderr)
739 def has_been_parsed() -> bool:
740 """Returns True iff the global config has already been parsed"""
741 return CONFIG.has_been_parsed()
744 def late_logging() -> None:
745 """Log messages saved earlier now that logging has been initialized."""
746 CONFIG.late_logging()
749 def dump_config() -> None:
750 """Print the current config to stdout."""
754 def argv_after_parse() -> Optional[List[str]]:
755 """Return the argv with all known arguments removed."""
756 if CONFIG.has_been_parsed():
757 return CONFIG.parsed_argv
761 def overwrite_argparse_epilog(msg: str) -> None:
762 """Allows your code to override the default epilog created by
766 msg: The epilog message to substitute for the default.
768 Config.overwrite_argparse_epilog(msg)
771 def is_flag_already_in_argv(var: str) -> bool:
772 """Returns true if a particular flag is passed on the commandline
776 var: The flag to search for.
778 return Config.is_flag_already_in_argv(var)
781 def print_usage() -> None:
782 """Prints the normal help usage message out."""
786 def print_short_usage() -> None:
787 Config.print_short_usage()
793 program usage help text as a string.
795 return Config.usage()
798 def short_usage() -> str:
801 program short usage help text as a string.
803 return Config.short_usage()