3 # © Copyright 2021-2023, Scott Gasch
5 """Global program configuration driven by commandline arguments and,
6 optionally, from saved (local or Zookeeper) configuration files... with
7 optional support for dynamic arguments (i.e. that can change during runtime).
9 Let's start with an example of how to use :py:mod:`pyutils.config`. It's
10 pretty easy for normal commandline arguments because it wraps :py:mod:`argparse`
11 (see https://docs.python.org/3/library/argparse.html):
15 from pyutils import config
17 # Call add_commandline_args to get an argparse.ArgumentParser
18 # for file.py. Each file uses a separate ArgumentParser
19 # chained off the main namespace.
20 parser = config.add_commandline_args(
22 "Args related to module doing the thing.",
25 # Then simply add argparse-style arguments to it, as usual.
27 "--module_do_the_thing",
30 help="Should the module do the thing?"
35 from pyutils import config
37 # main.py may have some arguments of its own, so add them.
38 parser = config.add_commandline_args(
40 "A program that does the thing.",
46 help="Should we really do the thing?"
50 config.parse() # Then remember to call config.parse() early on.
52 If you set this up and remember to invoke :py:meth:`pyutils.config.parse`,
53 all commandline arguments will play nicely together across all modules / files
54 in your program automatically. Argparse help messages will group flags by
57 If you use :py:meth:`pyutils.bootstrap.initialize`, a decorator that can
58 optionally wrap your program's entry point, it will remember to call
59 :py:meth:`pyutils.config.parse` for you so you can omit the last part.
60 That looks like this::
62 from pyutils import bootstrap
68 if __name__ == '__main__':
71 Either way, you'll get an aggregated usage message along with flags broken
72 down per file in help::
76 [--module_do_the_thing MODULE_DO_THE_THING]
80 Args related to module doing the thing.
82 --module_do_the_thing MODULE_DO_THE_THING
83 Should the module do the thing?
86 A program that does the thing
89 Should we really do the thing?
91 Once :py:meth:`pyutils.config.parse` has been called (either automatically
92 by :py:mod:`puytils.bootstrap` or manually, the program configuration
93 state is ready in a dict-like object called `config.config`. For example,
94 to check the state of the `--dry_run` flag::
96 if not config.config['dry_run']:
99 Using :py:mod:`pyutils.config` allows you to "save" and "load" whole
100 sets of commandline arguments using the `--config_savefile` and the
101 `--config_loadfile` arguments. The former saves all arguments (other than
102 itself) to an ascii file whose path you provide. The latter reads all
103 arguments from an ascii file whose path you provide.
105 Saving and loading sets of arguments can make complex operations easier
106 to set up. They also allows for dynamic arguments.
108 If you use Apache Zookeeper, you can prefix paths to
109 `--config_savefile` and `--config_loadfile` with the string "zk:"
110 to cause the path to be interpreted as a Zookeeper path rather
111 than one on the local filesystem. When loading arguments from
112 Zookeeker, the :py:mod:`pyutils.config` code registers a listener
113 to be notified on state change (e.g. when some other instance
114 overwrites your Zookeeper based configuration). Listeners then
115 dynamically update the value of any flag in the `config.config`
116 dict whose name contains the string "dynamic". So, for example,
117 the `--dynamic_database_connect_string` argument would be
118 modifiable at runtime when using Zookeeper based configurations.
119 Flags that do not contain the string "dynamic" will not change.
120 And nothing is dynamic unless we're reading configuration from
123 For more information about Zookeeper, see https://zookeeper.apache.org/.
132 from typing import Any, Dict, List, Optional
134 # This module is commonly used by others in here and should avoid
135 # taking any unnecessary dependencies back on them.
137 # Make a copy of the original program arguments immediately upon module load.
138 PROGRAM_NAME: str = os.path.basename(sys.argv[0])
139 ORIG_ARGV: List[str] = sys.argv.copy()
142 class OptionalRawFormatter(argparse.HelpFormatter):
143 """This formatter has the same bahavior as the normal argparse
144 text formatter except when the help text of an argument begins
145 with "RAW|". In that case, the line breaks are preserved and the
146 text is not wrapped. It is enabled automatically if you use
147 :py:mod:`pyutils.config`.
149 Use this by prepending "RAW|" in your help message to disable
150 word wrapping and indicate that the help message is already
151 formatted and should be preserved. Here's an example usage::
157 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
159 help='''RAW|Our mode of operation. One of:
161 PLAY = play wordle with me! Pick a random solution or
162 specify a solution with --template.
164 CHEAT = given a --template and, optionally, --letters_in_word
165 and/or --letters_to_avoid, return the best guess word;
167 AUTOPLAY = given a complete word in --template, guess it step
168 by step showing work;
170 SELFTEST = autoplay every possible solution keeping track of
171 wins/losses and average number of guesses;
173 PRECOMPUTE = populate hash table with optimal guesses.
179 def _split_lines(self, text, width):
180 if text.startswith("RAW|"):
181 return text[4:].splitlines()
182 return argparse.HelpFormatter._split_lines(self, text, width)
185 # A global argparser that we will collect arguments in. Each module (including
186 # us) will add arguments to a separate argument group.
187 ARGS = argparse.ArgumentParser(
189 formatter_class=OptionalRawFormatter,
190 fromfile_prefix_chars="@",
191 epilog=f"{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.",
192 # I don't fully understand why but when loaded by sphinx sometimes
193 # the same module is loaded many times causing any arguments it
194 # registers via module-level code to be redefined. Work around
195 # this iff the program is 'sphinx-build'
196 conflict_handler="resolve" if PROGRAM_NAME == "sphinx-build" else "error",
199 # Arguments specific to config.py. Other users should get their own group by
200 # invoking config.add_commandline_args.
201 GROUP = ARGS.add_argument_group(
202 f"Global Config ({__file__})",
203 "Args that control the global config itself; how meta!",
209 help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to those passed via the commandline. Note that if the given path begins with "zk:" then it is interpreted as a zookeeper path instead of as a filesystem path. When loading config from zookeeper, any argument with the string "dynamic" in the name (e.g. --module_dynamic_url) may be modified at runtime by changes made to zookeeper (using --config_savefile=zk:path). You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
215 help="Display the global configuration (possibly derived from multiple sources) on STDERR at program startup time.",
222 help='Populate a config file (compatible with --config_loadfile) and write it at the given path for later [re]use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a local filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:<path>) will see the update. Those that also enabled --config_allow_dynamic_updates will change the value of any flags with the string "dynamic" in their names (e.g. --my_dynamic_flag or --dynamic_database_connect_string).',
225 "--config_allow_dynamic_updates",
228 help='If enabled, allow config flags with the string "dynamic" in their names to change at runtime when a new Zookeeper based configuration is created. See the --config_savefile help message for more information about this option.',
231 "--config_rejects_unrecognized_arguments",
234 help="If present, config will raise an exception if it doesn't recognize an argument. The default behavior is to ignore unknown arguments so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.",
237 "--config_exit_after_parse",
240 help="If present, halt the program after parsing config. Useful, for example, to write a --config_savefile and then terminate.",
248 Do not instantiate this class directly; it is meant to be a
249 global singleton called `pyutils.config.CONFIG`. Instead, use
250 :py:meth:`pyutils.config.add_commandline_args` to get an
251 `ArgumentGroup` and add your arguments to it. Then call
252 :py:meth:`pyutils.config.parse` to parse global configuration
253 from your main program entry point.
255 Everything in the config module used to be module-level functions and
256 variables but it made the code ugly and harder to maintain. Now, this
257 class does the heavy lifting. We still rely on some globals, though:
259 - ARGS and GROUP to interface with argparse
260 - PROGRAM_NAME stores argv[0] close to program invocation
261 - ORIG_ARGV stores the original argv list close to program invocation
262 - CONFIG and config: hold the (singleton) instance of this class.
266 # Has our parse() method been invoked yet?
267 self.config_parse_called = False
269 # A configuration dictionary that will contain parsed
270 # arguments. This is the data that is most interesting to our
271 # callers as it will hold the configuration result.
272 self.config: Dict[str, Any] = {}
274 # Defer logging messages until later when logging has been
276 self.saved_messages: List[str] = []
278 # A zookeeper client that is lazily created so as to not incur
279 # the latency of connecting to zookeeper for programs that are
280 # not reading or writing their config data into zookeeper.
281 self.zk: Optional[Any] = None
283 # Per known zk file, what is the max version we have seen?
284 self.max_version: Dict[str, int] = {}
286 # The argv after parsing known args.
287 self.parsed_argv: Optional[List[str]] = None
289 def __getitem__(self, key: str) -> Optional[Any]:
290 """If someone uses []'s on us, pass it onto self.config."""
291 return self.config.get(key, None)
293 def __setitem__(self, key: str, value: Any) -> None:
294 self.config[key] = value
296 def __contains__(self, key: str) -> bool:
297 return key in self.config
299 def get(self, key: str, default: Any = None) -> Optional[Any]:
300 return self.config.get(key, default)
303 def add_commandline_args(
304 title: str, description: str = ""
305 ) -> argparse._ArgumentGroup:
306 """Create a new context for arguments and return an ArgumentGroup
307 to the caller for module-level population.
310 title: A title for your module's commandline arguments group.
311 description: A helpful description of your module.
314 An argparse._ArgumentGroup to be populated by the caller.
316 return ARGS.add_argument_group(title, description)
319 def overwrite_argparse_epilog(msg: str) -> None:
320 """Allows your code to override the default epilog created by
324 msg: The epilog message to substitute for the default.
329 def is_flag_already_in_argv(var: str) -> bool:
332 True if a particular flag is passed on the commandline
336 var: The flag to search for.
347 full program usage help text as a string.
349 return ARGS.format_help()
352 def short_usage() -> str:
355 program short usage text as a string.
357 return ARGS.format_usage()
360 def print_usage() -> None:
361 """Prints the full help usage message out."""
362 print(config.usage())
365 def print_short_usage() -> None:
366 """Prints a short usage/help message."""
367 print(config.short_usage())
370 def _reorder_arg_action_groups_before_help(entry_module: Optional[str]):
371 """Internal. Used to reorder the arguments before dumping out a
372 generated help string such that the main program's arguments come
376 reordered_action_groups = []
377 for grp in ARGS._action_groups:
378 if entry_module is not None and entry_module in grp.title: # type: ignore
379 reordered_action_groups.append(grp)
380 elif PROGRAM_NAME in GROUP.title: # type: ignore
381 reordered_action_groups.append(grp)
383 reordered_action_groups.insert(0, grp)
384 return reordered_action_groups
387 def _to_bool(in_str: str) -> bool:
390 in_str: the string to convert to boolean
393 A boolean equivalent of the original string based on its contents.
394 All conversion is case insensitive. A positive boolean (True) is
395 returned if the string value is any of the following:
404 Otherwise False is returned.
424 return in_str.lower() in {"true", "1", "yes", "y", "t", "on"}
426 def _process_dynamic_args(self, event) -> None:
427 """Invoked as a callback when a zk-based config changed."""
431 logger = logging.getLogger(__name__)
433 contents, meta = self.zk.get(event.path, watch=self._process_dynamic_args)
434 logger.debug("Update for %s at version=%d.", event.path, meta.version)
436 "Max known version for %s is %d.",
438 self.max_version.get(event.path, 0),
440 except Exception as e:
441 raise Exception("Error reading data from zookeeper") from e
443 # Make sure we process changes in order.
444 if meta.version > self.max_version.get(event.path, 0):
445 self.max_version[event.path] = meta.version
446 contents = contents.decode()
448 for arg in contents.split():
450 # Our rule is that arguments must contain the word
451 # 'dynamic' if we are going to allow them to change at
452 # runtime as a signal that the programmer is expecting
454 if "dynamic" in arg and config.config["config_allow_dynamic_updates"]:
455 temp_argv.append(arg)
456 logger.info("Updating %s from zookeeper async config change.", arg)
461 known, _ = ARGS.parse_known_args()
463 self.config.update(vars(known))
465 def _read_config_from_zookeeper(self, zkpath: str) -> Optional[str]:
466 from pyutils import zookeeper
468 if not zkpath.startswith("/config/"):
469 zkpath = "/config/" + zkpath
470 zkpath = re.sub(r"//+", "/", zkpath)
474 self.zk = zookeeper.get_started_zk_client()
475 if not self.zk.exists(zkpath):
478 # Note: we're putting a watch on this config file. Our
479 # _process_dynamic_args routine will be called to reparse
480 # args when/if they change.
481 contents, meta = self.zk.get(zkpath, watch=self._process_dynamic_args)
482 contents = contents.decode()
483 self.saved_messages.append(
484 f"Setting {zkpath}'s max_version to {meta.version}"
486 self.max_version[zkpath] = meta.version
487 self.saved_messages.append(f"Read config from zookeeper {zkpath}.")
489 except Exception as e:
490 self.saved_messages.append(
491 f"Failed to read {zkpath} from zookeeper: exception {e}"
495 def _read_config_from_disk(self, filepath: str) -> Optional[str]:
496 if not os.path.exists(filepath):
498 with open(filepath, "r") as rf:
499 self.saved_messages.append(f"Read config from disk file {filepath}")
502 def _augment_sys_argv_from_loadfile(self):
503 """Internal. Augment with arguments persisted in a saved file."""
505 # Check for --config_loadfile in the args manually; argparse isn't
506 # invoked yet and can't be yet.
508 saw_other_args = False
509 grab_next_arg = False
510 for arg in sys.argv[1:]:
511 if "config_loadfile" in arg:
512 pieces = arg.split("=")
520 saw_other_args = True
525 # Get contents from wherever.
527 if loadfile[:3] == "zk:":
528 contents = self._read_config_from_zookeeper(loadfile[3:])
530 contents = self._read_config_from_disk(loadfile)
534 msg = f"Augmenting commandline arguments with those from {loadfile}."
536 msg = f"Reading commandline arguments from {loadfile}."
537 print(msg, file=sys.stderr)
538 self.saved_messages.append(msg)
540 msg = f"Failed to read/parse contents from {loadfile}"
541 print(msg, file=sys.stderr)
542 self.saved_messages.append(msg)
545 # Augment args with new ones.
548 for arg in contents.split("\n")
549 if "config_savefile" not in arg
553 def dump_config(self):
554 """Print the current config to stdout."""
555 print("Global Configuration:", file=sys.stderr)
556 pprint.pprint(self.config, stream=sys.stderr)
559 def _write_config_to_disk(self, data: str, filepath: str) -> None:
560 with open(filepath, "w") as wf:
563 def _write_config_to_zookeeper(self, data: str, zkpath: str) -> None:
564 if not zkpath.startswith("/config/"):
565 zkpath = "/config/" + zkpath
566 zkpath = re.sub(r"//+", "/", zkpath)
569 from pyutils import zookeeper
571 self.zk = zookeeper.get_started_zk_client()
572 encoded_data = data.encode()
573 if len(encoded_data) > 1024 * 1024:
575 f"Saved args are too large ({len(encoded_data)} bytes exceeds zk limit)"
577 if not self.zk.exists(zkpath):
578 self.zk.create(zkpath, encoded_data)
579 self.saved_messages.append(
580 f"Just created {zkpath}; setting its max_version to 0"
582 self.max_version[zkpath] = 0
584 meta = self.zk.set(zkpath, encoded_data)
585 self.saved_messages.append(
586 f"Setting {zkpath}'s max_version to {meta.version}"
588 self.max_version[zkpath] = meta.version
589 except Exception as e:
590 raise Exception(f"Failed to create zookeeper path {zkpath}") from e
591 self.saved_messages.append(f"Saved config to zookeeper in {zkpath}")
593 def parse(self, entry_module: Optional[str]) -> Dict[str, Any]:
594 """Main program should invoke this early in main(). Note that the
595 :py:meth:`pyutils.bootstrap.initialize` wrapper takes care of this automatically.
596 This should only be called once per program invocation.
599 entry_module: Optional string to ensure we understand which module
600 contains the program entry point. Determined heuristically if not
604 A dict containing the parsed program configuration. Note that this can
605 be safely ignored since it is also saved in `config.config` and may
606 be used directly using that identifier.
608 if self.config_parse_called:
611 # If we're about to do the usage message dump, put the main
612 # module's argument group last in the list (if possible) so that
613 # when the user passes -h or --help, it will be visible on the
614 # screen w/o scrolling. This just makes for a nicer --help screen.
616 if arg in {"--help", "-h"}:
617 if entry_module is not None:
618 entry_module = os.path.basename(entry_module)
619 ARGS._action_groups = Config._reorder_arg_action_groups_before_help(
623 # Look for --config_loadfile argument and, if found, read/parse
624 # Note that this works by jamming values onto sys.argv; kinda ugly.
625 self._augment_sys_argv_from_loadfile()
627 # Parse (possibly augmented, possibly completely overwritten)
628 # commandline args with argparse normally and populate config.
629 known, unknown = ARGS.parse_known_args()
630 self.config.update(vars(known))
632 # Reconstruct the sys.argv with unrecognized flags for the
633 # benefit of future argument parsers. For example,
634 # unittest_main in python has some of its own flags. If we
635 # didn't recognize it, maybe someone else will. Or, if
636 # --config_rejects_unrecognized_arguments was passed, die
637 # if we have unknown arguments.
639 if config["config_rejects_unrecognized_arguments"]:
641 f"Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting."
643 self.saved_messages.append(
644 f"Config encountered unrecognized commandline arguments: {unknown}"
646 sys.argv = sys.argv[:1] + unknown
647 self.parsed_argv = sys.argv[:1] + unknown
649 # Check for savefile and populate it if requested.
650 savefile = config["config_savefile"]
652 data = "\n".join(ORIG_ARGV[1:])
653 if savefile[:3] == "zk:":
654 self._write_config_to_zookeeper(savefile[3:], data)
656 self._write_config_to_disk(savefile, data)
658 # Also dump the config on stderr if requested.
659 if config["config_dump"]:
662 # Finally, maybe exit now if the user passed
663 # --config_exit_after_parse indicating they want to just
664 # update a config file and halt.
665 self.config_parse_called = True
666 if config["config_exit_after_parse"]:
667 print("Exiting because of --config_exit_after_parse.")
673 def has_been_parsed(self) -> bool:
674 """Returns True iff the global config has already been parsed"""
675 return self.config_parse_called
677 def late_logging(self):
678 """Log messages saved earlier now that logging has been initialized."""
679 logger = logging.getLogger(__name__)
680 logger.debug("Invocation commandline: %s", ORIG_ARGV)
681 for _ in self.saved_messages:
685 # A global singleton instance of the Config class.
688 # A lot of client code uses config.config['whatever'] to lookup
689 # configuration so to preserve this we make this, config.config, with
690 # a __getitem__ method on it.
693 # Config didn't use to be a class; it was a mess of module-level
694 # functions and data. The functions below preserve the old interface
695 # so that existing clients do not need to be changed. As you can see,
696 # they mostly just thunk into the config class.
699 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
700 """Create a new context for arguments and return a handle. An alias
701 for config.config.add_commandline_args.
704 title: A title for your module's commandline arguments group.
705 description: A helpful description of your module.
708 An argparse._ArgumentGroup to be populated by the caller.
710 return CONFIG.add_commandline_args(title, description)
713 def parse(entry_module: Optional[str]) -> Dict[str, Any]:
714 """Main program should call this early in main(). Note that the
715 :code:`bootstrap.initialize` wrapper takes care of this automatically.
716 This should only be called once per program invocation. Subsequent
717 calls do not reparse the configuration settings but rather just
718 return the current state.
720 return CONFIG.parse(entry_module)
723 def error(message: str, exit_code: int = 1) -> None:
725 Convenience method for indicating a configuration error.
727 logging.error(message)
728 print(message, file=sys.stderr)
732 def has_been_parsed() -> bool:
733 """Returns True iff the global config has already been parsed"""
734 return CONFIG.has_been_parsed()
737 def late_logging() -> None:
738 """Log messages saved earlier now that logging has been initialized."""
739 CONFIG.late_logging()
742 def dump_config() -> None:
743 """Print the current config to stdout."""
747 def argv_after_parse() -> Optional[List[str]]:
748 """Return the argv with all known arguments removed."""
749 if CONFIG.has_been_parsed():
750 return CONFIG.parsed_argv
754 def overwrite_argparse_epilog(msg: str) -> None:
755 """Allows your code to override the default epilog created by
759 msg: The epilog message to substitute for the default.
761 Config.overwrite_argparse_epilog(msg)
764 def is_flag_already_in_argv(var: str) -> bool:
765 """Returns true if a particular flag is passed on the commandline
769 var: The flag to search for.
771 return Config.is_flag_already_in_argv(var)
774 def print_usage() -> None:
775 """Prints the normal help usage message out."""
779 def print_short_usage() -> None:
780 Config.print_short_usage()
786 program usage help text as a string.
788 return Config.usage()
791 def short_usage() -> str:
794 program short usage help text as a string.
796 return Config.short_usage()