3 # © Copyright 2021-2022, Scott Gasch
5 """Global program configuration driven by commandline arguments and,
6 optionally, from saved (local or Zookeeper) configuration files... with
7 optional support for dynamic arguments (i.e. that can change during runtime).
9 Let's start with an example of how to use :py:mod:`pyutils.config`. It's
10 pretty easy for normal commandline arguments because it wraps :py:mod:`argparse`
11 (see https://docs.python.org/3/library/argparse.html):
15 from pyutils import config
17 # Call add_commandline_args to get an argparse.ArgumentParser
18 # for file.py. Each file uses a separate ArgumentParser
19 # chained off the main namespace.
20 parser = config.add_commandline_args(
22 "Args related to module doing the thing.",
25 # Then simply add argparse-style arguments to it, as usual.
27 "--module_do_the_thing",
30 help="Should the module do the thing?"
35 from pyutils import config
37 # main.py may have some arguments of its own, so add them.
38 parser = config.add_commandline_args(
40 "A program that does the thing.",
46 help="Should we really do the thing?"
50 config.parse() # Then remember to call config.parse() early on.
52 If you set this up and remember to invoke :py:meth:`pyutils.config.parse`,
53 all commandline arguments will play nicely together across all modules / files
54 in your program automatically. Argparse help messages will group flags by
57 If you use :py:meth:`pyutils.bootstrap.initialize`, a decorator that can
58 optionally wrap your program's entry point, it will remember to call
59 :py:meth:`pyutils.config.parse` for you so you can omit the last part.
60 That looks like this::
62 from pyutils import bootstrap
68 if __name__ == '__main__':
71 Either way, you'll get an aggregated usage message along with flags broken
72 down per file in help::
76 [--module_do_the_thing MODULE_DO_THE_THING]
80 Args related to module doing the thing.
82 --module_do_the_thing MODULE_DO_THE_THING
83 Should the module do the thing?
86 A program that does the thing
89 Should we really do the thing?
91 Once :py:meth:`pyutils.config.parse` has been called (either automatically
92 by :py:mod:`puytils.bootstrap` or manually, the program configuration
93 state is ready in a dict-like object called `config.config`. For example,
94 to check the state of the `--dry_run` flag::
96 if not config.config['dry_run']:
99 Using :py:mod:`pyutils.config` allows you to "save" and "load" whole
100 sets of commandline arguments using the `--config_savefile` and the
101 `--config_loadfile` arguments. The former saves all arguments (other than
102 itself) to an ascii file whose path you provide. The latter reads all
103 arguments from an ascii file whose path you provide.
105 Saving and loading sets of arguments can make complex operations easier
106 to set up. They also allows for dynamic arguments.
108 If you use Apache Zookeeper, you can prefix paths to
109 `--config_savefile` and `--config_loadfile` with the string "zk:"
110 to cause the path to be interpreted as a Zookeeper path rather
111 than one on the local filesystem. When loading arguments from
112 Zookeeker, the :py:mod:`pyutils.config` code registers a listener
113 to be notified on state change (e.g. when some other instance
114 overwrites your Zookeeper based configuration). Listeners then
115 dynamically update the value of any flag in the `config.config`
116 dict whose name contains the string "dynamic". So, for example,
117 the `--dynamic_database_connect_string` argument would be
118 modifiable at runtime when using Zookeeper based configurations.
119 Flags that do not contain the string "dynamic" will not change.
120 And nothing is dynamic unless we're reading configuration from
123 For more information about Zookeeper, see https://zookeeper.apache.org/.
132 from typing import Any, Dict, List, Optional
134 # This module is commonly used by others in here and should avoid
135 # taking any unnecessary dependencies back on them.
137 # Make a copy of the original program arguments immediately upon module load.
138 PROGRAM_NAME: str = os.path.basename(sys.argv[0])
139 ORIG_ARGV: List[str] = sys.argv.copy()
142 class OptionalRawFormatter(argparse.HelpFormatter):
143 """This formatter has the same bahavior as the normal argparse
144 text formatter except when the help text of an argument begins
145 with "RAW|". In that case, the line breaks are preserved and the
146 text is not wrapped. It is enabled automatically if you use
147 :py:mod:`pyutils.config`.
149 Use this by prepending "RAW|" in your help message to disable
150 word wrapping and indicate that the help message is already
151 formatted and should be preserved. Here's an example usage::
157 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
159 help='''RAW|Our mode of operation. One of:
161 PLAY = play wordle with me! Pick a random solution or
162 specify a solution with --template.
164 CHEAT = given a --template and, optionally, --letters_in_word
165 and/or --letters_to_avoid, return the best guess word;
167 AUTOPLAY = given a complete word in --template, guess it step
168 by step showing work;
170 SELFTEST = autoplay every possible solution keeping track of
171 wins/losses and average number of guesses;
173 PRECOMPUTE = populate hash table with optimal guesses.
179 def _split_lines(self, text, width):
180 if text.startswith('RAW|'):
181 return text[4:].splitlines()
182 return argparse.HelpFormatter._split_lines(self, text, width)
185 # A global argparser that we will collect arguments in. Each module (including
186 # us) will add arguments to a separate argument group.
187 ARGS = argparse.ArgumentParser(
189 formatter_class=OptionalRawFormatter,
190 fromfile_prefix_chars="@",
191 epilog=f'{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.',
192 # I don't fully understand why but when loaded by sphinx sometimes
193 # the same module is loaded many times causing any arguments it
194 # registers via module-level code to be redefined. Work around
195 # this iff the program is 'sphinx-build'
196 conflict_handler='resolve' if PROGRAM_NAME == 'sphinx-build' else 'error',
199 # Arguments specific to config.py. Other users should get their own group by
200 # invoking config.add_commandline_args.
201 GROUP = ARGS.add_argument_group(
202 f'Global Config ({__file__})',
203 'Args that control the global config itself; how meta!',
209 help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to those passed via the commandline. Note that if the given path begins with "zk:" then it is interpreted as a zookeeper path instead of as a filesystem path. When loading config from zookeeper, any argument with the string "dynamic" in the name (e.g. --module_dynamic_url) may be modified at runtime by changes made to zookeeper (using --config_savefile=zk:path). You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
215 help='Display the global configuration (possibly derived from multiple sources) on STDERR at program startup time.',
222 help='Populate a config file (compatible with --config_loadfile) and write it at the given path for later [re]use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a local filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:<path>) will see the update. Those that also enabled --config_allow_dynamic_updates will change the value of any flags with the string "dynamic" in their names (e.g. --my_dynamic_flag or --dynamic_database_connect_string).',
225 '--config_allow_dynamic_updates',
227 help='If enabled, allow config flags with the string "dynamic" in their names to change at runtime when a new Zookeeper based configuration is created. See the --config_savefile help message for more information about this option.',
230 '--config_rejects_unrecognized_arguments',
233 help='If present, config will raise an exception if it doesn\'t recognize an argument. The default behavior is to ignore unknown arguments so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.',
236 '--config_exit_after_parse',
239 help='If present, halt the program after parsing config. Useful, for example, to write a --config_savefile and then terminate.',
247 Do not instantiate this class directly; it is meant to be a
248 global singleton called `pyutils.config.CONFIG`. Instead, use
249 :py:meth:`pyutils.config.add_commandline_args` to get an
250 `ArgumentGroup` and add your arguments to it. Then call
251 :py:meth:`pyutils.config.parse` to parse global configuration
252 from your main program entry point.
254 Everything in the config module used to be module-level functions and
255 variables but it made the code ugly and harder to maintain. Now, this
256 class does the heavy lifting. We still rely on some globals, though:
258 - ARGS and GROUP to interface with argparse
259 - PROGRAM_NAME stores argv[0] close to program invocation
260 - ORIG_ARGV stores the original argv list close to program invocation
261 - CONFIG and config: hold the (singleton) instance of this class.
265 # Has our parse() method been invoked yet?
266 self.config_parse_called = False
268 # A configuration dictionary that will contain parsed
269 # arguments. This is the data that is most interesting to our
270 # callers as it will hold the configuration result.
271 self.config: Dict[str, Any] = {}
273 # Defer logging messages until later when logging has been
275 self.saved_messages: List[str] = []
277 # A zookeeper client that is lazily created so as to not incur
278 # the latency of connecting to zookeeper for programs that are
279 # not reading or writing their config data into zookeeper.
280 self.zk: Optional[Any] = None
282 # Per known zk file, what is the max version we have seen?
283 self.max_version: Dict[str, int] = {}
285 def __getitem__(self, key: str) -> Optional[Any]:
286 """If someone uses []'s on us, pass it onto self.config."""
287 return self.config.get(key, None)
289 def __setitem__(self, key: str, value: Any) -> None:
290 self.config[key] = value
292 def __contains__(self, key: str) -> bool:
293 return key in self.config
295 def get(self, key: str, default: Any = None) -> Optional[Any]:
296 return self.config.get(key, default)
299 def add_commandline_args(
300 title: str, description: str = ""
301 ) -> argparse._ArgumentGroup:
302 """Create a new context for arguments and return an ArgumentGroup
303 to the caller for module-level population.
306 title: A title for your module's commandline arguments group.
307 description: A helpful description of your module.
310 An argparse._ArgumentGroup to be populated by the caller.
312 return ARGS.add_argument_group(title, description)
315 def overwrite_argparse_epilog(msg: str) -> None:
316 """Allows your code to override the default epilog created by
320 msg: The epilog message to substitute for the default.
325 def is_flag_already_in_argv(var: str) -> bool:
328 True if a particular flag is passed on the commandline
332 var: The flag to search for.
340 def print_usage() -> None:
341 """Prints the normal help usage message out."""
348 program usage help text as a string.
350 return ARGS.format_usage()
353 def _reorder_arg_action_groups_before_help(entry_module: Optional[str]):
354 """Internal. Used to reorder the arguments before dumping out a
355 generated help string such that the main program's arguments come
359 reordered_action_groups = []
360 for grp in ARGS._action_groups:
361 if entry_module is not None and entry_module in grp.title: # type: ignore
362 reordered_action_groups.append(grp)
363 elif PROGRAM_NAME in GROUP.title: # type: ignore
364 reordered_action_groups.append(grp)
366 reordered_action_groups.insert(0, grp)
367 return reordered_action_groups
370 def _to_bool(in_str: str) -> bool:
373 in_str: the string to convert to boolean
376 A boolean equivalent of the original string based on its contents.
377 All conversion is case insensitive. A positive boolean (True) is
378 returned if the string value is any of the following:
387 Otherwise False is returned.
407 return in_str.lower() in ("true", "1", "yes", "y", "t", "on")
409 def _process_dynamic_args(self, event):
410 """Invoked as a callback when a zk-based config changed."""
414 logger = logging.getLogger(__name__)
416 contents, meta = self.zk.get(event.path, watch=self._process_dynamic_args)
417 logger.debug('Update for %s at version=%d.', event.path, meta.version)
419 'Max known version for %s is %d.',
421 self.max_version.get(event.path, 0),
423 except Exception as e:
424 raise Exception('Error reading data from zookeeper') from e
426 # Make sure we process changes in order.
427 if meta.version > self.max_version.get(event.path, 0):
428 self.max_version[event.path] = meta.version
429 contents = contents.decode()
431 for arg in contents.split():
433 # Our rule is that arguments must contain the word
434 # 'dynamic' if we are going to allow them to change at
435 # runtime as a signal that the programmer is expecting
437 if 'dynamic' in arg and config.config['config_allow_dynamic_updates']:
438 temp_argv.append(arg)
439 logger.info("Updating %s from zookeeper async config change.", arg)
441 if len(temp_argv) > 0:
444 known, _ = ARGS.parse_known_args()
446 self.config.update(vars(known))
448 def _read_config_from_zookeeper(self, zkpath: str) -> Optional[str]:
449 from pyutils import zookeeper
451 if not zkpath.startswith('/config/'):
452 zkpath = '/config/' + zkpath
453 zkpath = re.sub(r'//+', '/', zkpath)
457 self.zk = zookeeper.get_started_zk_client()
458 if not self.zk.exists(zkpath):
461 # Note: we're putting a watch on this config file. Our
462 # _process_dynamic_args routine will be called to reparse
463 # args when/if they change.
464 contents, meta = self.zk.get(zkpath, watch=self._process_dynamic_args)
465 contents = contents.decode()
466 self.saved_messages.append(
467 f'Setting {zkpath}\'s max_version to {meta.version}'
469 self.max_version[zkpath] = meta.version
470 self.saved_messages.append(f'Read config from zookeeper {zkpath}.')
472 except Exception as e:
473 self.saved_messages.append(
474 f'Failed to read {zkpath} from zookeeper: exception {e}'
478 def _read_config_from_disk(self, filepath: str) -> Optional[str]:
479 if not os.path.exists(filepath):
481 with open(filepath, 'r') as rf:
482 self.saved_messages.append(f'Read config from disk file {filepath}')
485 def _augment_sys_argv_from_loadfile(self):
486 """Internal. Augment with arguments persisted in a saved file."""
488 # Check for --config_loadfile in the args manually; argparse isn't
489 # invoked yet and can't be yet.
491 saw_other_args = False
492 grab_next_arg = False
493 for arg in sys.argv[1:]:
494 if 'config_loadfile' in arg:
495 pieces = arg.split('=')
503 saw_other_args = True
505 if not loadfile or len(loadfile) == 0:
508 # Get contents from wherever.
510 if loadfile[:3] == 'zk:':
511 contents = self._read_config_from_zookeeper(loadfile[3:])
513 contents = self._read_config_from_disk(loadfile)
517 msg = f'Augmenting commandline arguments with those from {loadfile}.'
519 msg = f'Reading commandline arguments from {loadfile}.'
520 print(msg, file=sys.stderr)
521 self.saved_messages.append(msg)
523 msg = f'Failed to read/parse contents from {loadfile}'
524 print(msg, file=sys.stderr)
525 self.saved_messages.append(msg)
528 # Augment args with new ones.
531 for arg in contents.split('\n')
532 if 'config_savefile' not in arg
536 def dump_config(self):
537 """Print the current config to stdout."""
538 print("Global Configuration:", file=sys.stderr)
539 pprint.pprint(self.config, stream=sys.stderr)
542 def _write_config_to_disk(self, data: str, filepath: str) -> None:
543 with open(filepath, 'w') as wf:
546 def _write_config_to_zookeeper(self, data: str, zkpath: str) -> None:
547 if not zkpath.startswith('/config/'):
548 zkpath = '/config/' + zkpath
549 zkpath = re.sub(r'//+', '/', zkpath)
552 from pyutils import zookeeper
554 self.zk = zookeeper.get_started_zk_client()
555 encoded_data = data.encode()
556 if len(encoded_data) > 1024 * 1024:
558 f'Saved args are too large ({len(encoded_data)} bytes exceeds zk limit)'
560 if not self.zk.exists(zkpath):
561 self.zk.create(zkpath, encoded_data)
562 self.saved_messages.append(
563 f'Just created {zkpath}; setting its max_version to 0'
565 self.max_version[zkpath] = 0
567 meta = self.zk.set(zkpath, encoded_data)
568 self.saved_messages.append(
569 f'Setting {zkpath}\'s max_version to {meta.version}'
571 self.max_version[zkpath] = meta.version
572 except Exception as e:
573 raise Exception(f'Failed to create zookeeper path {zkpath}') from e
574 self.saved_messages.append(f'Saved config to zookeeper in {zkpath}')
576 def parse(self, entry_module: Optional[str]) -> Dict[str, Any]:
577 """Main program should invoke this early in main(). Note that the
578 :py:meth:`pyutils.bootstrap.initialize` wrapper takes care of this automatically.
579 This should only be called once per program invocation.
582 entry_module: Optional string to ensure we understand which module
583 contains the program entry point. Determined heuristically if not
587 A dict containing the parsed program configuration. Note that this can
588 be safely ignored since it is also saved in `config.config` and may
589 be used directly using that identifier.
591 if self.config_parse_called:
594 # If we're about to do the usage message dump, put the main
595 # module's argument group last in the list (if possible) so that
596 # when the user passes -h or --help, it will be visible on the
597 # screen w/o scrolling. This just makes for a nicer --help screen.
599 if arg in ('--help', '-h'):
600 if entry_module is not None:
601 entry_module = os.path.basename(entry_module)
602 ARGS._action_groups = Config._reorder_arg_action_groups_before_help(
606 # Look for --config_loadfile argument and, if found, read/parse
607 # Note that this works by jamming values onto sys.argv; kinda ugly.
608 self._augment_sys_argv_from_loadfile()
610 # Parse (possibly augmented, possibly completely overwritten)
611 # commandline args with argparse normally and populate config.
612 known, unknown = ARGS.parse_known_args()
613 self.config.update(vars(known))
615 # Reconstruct the sys.argv with unrecognized flags for the
616 # benefit of future argument parsers. For example,
617 # unittest_main in python has some of its own flags. If we
618 # didn't recognize it, maybe someone else will. Or, if
619 # --config_rejects_unrecognized_arguments was passed, die
620 # if we have unknown arguments.
622 if config['config_rejects_unrecognized_arguments']:
624 f'Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting.'
626 self.saved_messages.append(
627 f'Config encountered unrecognized commandline arguments: {unknown}'
629 sys.argv = sys.argv[:1] + unknown
631 # Check for savefile and populate it if requested.
632 savefile = config['config_savefile']
633 if savefile and len(savefile) > 0:
634 data = '\n'.join(ORIG_ARGV[1:])
635 if savefile[:3] == 'zk:':
636 self._write_config_to_zookeeper(savefile[3:], data)
638 self._write_config_to_disk(savefile, data)
640 # Also dump the config on stderr if requested.
641 if config['config_dump']:
644 # Finally, maybe exit now if the user passed
645 # --config_exit_after_parse indicating they want to just
646 # update a config file and halt.
647 self.config_parse_called = True
648 if config['config_exit_after_parse']:
649 print("Exiting because of --config_exit_after_parse.")
655 def has_been_parsed(self) -> bool:
656 """Returns True iff the global config has already been parsed"""
657 return self.config_parse_called
659 def late_logging(self):
660 """Log messages saved earlier now that logging has been initialized."""
661 logger = logging.getLogger(__name__)
662 logger.debug('Original commandline was: %s', ORIG_ARGV)
663 for _ in self.saved_messages:
667 # A global singleton instance of the Config class.
670 # A lot of client code uses config.config['whatever'] to lookup
671 # configuration so to preserve this we make this, config.config, with
672 # a __getitem__ method on it.
675 # Config didn't use to be a class; it was a mess of module-level
676 # functions and data. The functions below preserve the old interface
677 # so that existing clients do not need to be changed. As you can see,
678 # they mostly just thunk into the config class.
681 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
682 """Create a new context for arguments and return a handle. An alias
683 for config.config.add_commandline_args.
686 title: A title for your module's commandline arguments group.
687 description: A helpful description of your module.
690 An argparse._ArgumentGroup to be populated by the caller.
692 return CONFIG.add_commandline_args(title, description)
695 def parse(entry_module: Optional[str]) -> Dict[str, Any]:
696 """Main program should call this early in main(). Note that the
697 :code:`bootstrap.initialize` wrapper takes care of this automatically.
698 This should only be called once per program invocation. Subsequent
699 calls do not reparse the configuration settings but rather just
700 return the current state.
702 return CONFIG.parse(entry_module)
705 def has_been_parsed() -> bool:
706 """Returns True iff the global config has already been parsed"""
707 return CONFIG.has_been_parsed()
710 def late_logging() -> None:
711 """Log messages saved earlier now that logging has been initialized."""
712 CONFIG.late_logging()
715 def dump_config() -> None:
716 """Print the current config to stdout."""
720 def overwrite_argparse_epilog(msg: str) -> None:
721 """Allows your code to override the default epilog created by
725 msg: The epilog message to substitute for the default.
727 Config.overwrite_argparse_epilog(msg)
730 def is_flag_already_in_argv(var: str) -> bool:
731 """Returns true if a particular flag is passed on the commandline
735 var: The flag to search for.
737 return Config.is_flag_already_in_argv(var)
740 def print_usage() -> None:
741 """Prints the normal help usage message out."""
748 program usage help text as a string.
750 return Config.usage()