3 # © Copyright 2021-2022, Scott Gasch
5 """Global program configuration driven by commandline arguments and,
6 optionally, from saved (local or Zookeeper) configuration files... with
7 optional support for dynamic arguments (i.e. that can change during runtime).
9 Let's start with an example of how to use :py:mod:`pyutils.config`. It's
10 pretty easy for normal commandline arguments because it wraps :py:mod:`argparse`
11 (see https://docs.python.org/3/library/argparse.html):
15 from pyutils import config
17 # Call add_commandline_args to get an argparse.ArgumentParser
18 # for file.py. Each file uses a separate ArgumentParser
19 # chained off the main namespace.
20 parser = config.add_commandline_args(
22 "Args related to module doing the thing.",
25 # Then simply add argparse-style arguments to it, as usual.
27 "--module_do_the_thing",
30 help="Should the module do the thing?"
35 from pyutils import config
37 # main.py may have some arguments of its own, so add them.
38 parser = config.add_commandline_args(
40 "A program that does the thing.",
46 help="Should we really do the thing?"
50 config.parse() # Then remember to call config.parse() early on.
52 If you set this up and remember to invoke :py:meth:`pyutils.config.parse`,
53 all commandline arguments will play nicely together across all modules / files
54 in your program automatically. Argparse help messages will group flags by
57 If you use :py:meth:`pyutils.bootstrap.initialize`, a decorator that can
58 optionally wrap your program's entry point, it will remember to call
59 :py:meth:`pyutils.config.parse` for you so you can omit the last part.
60 That looks like this::
62 from pyutils import bootstrap
68 if __name__ == '__main__':
71 Either way, you'll get an aggregated usage message along with flags broken
72 down per file in help::
76 [--module_do_the_thing MODULE_DO_THE_THING]
80 Args related to module doing the thing.
82 --module_do_the_thing MODULE_DO_THE_THING
83 Should the module do the thing?
86 A program that does the thing
89 Should we really do the thing?
91 Once :py:meth:`pyutils.config.parse` has been called (either automatically
92 by :py:mod:`puytils.bootstrap` or manually, the program configuration
93 state is ready in a dict-like object called `config.config`. For example,
94 to check the state of the `--dry_run` flag::
96 if not config.config['dry_run']:
99 Using :py:mod:`pyutils.config` allows you to "save" and "load" whole
100 sets of commandline arguments using the `--config_savefile` and the
101 `--config_loadfile` arguments. The former saves all arguments (other than
102 itself) to an ascii file whose path you provide. The latter reads all
103 arguments from an ascii file whose path you provide.
105 Saving and loading sets of arguments can make complex operations easier
106 to set up. They also allows for dynamic arguments.
108 If you use Apache Zookeeper, you can prefix paths to
109 `--config_savefile` and `--config_loadfile` with the string "zk:"
110 to cause the path to be interpreted as a Zookeeper path rather
111 than one on the local filesystem. When loading arguments from
112 Zookeeker, the :py:mod:`pyutils.config` code registers a listener
113 to be notified on state change (e.g. when some other instance
114 overwrites your Zookeeper based configuration). Listeners then
115 dynamically update the value of any flag in the `config.config`
116 dict whose name contains the string "dynamic". So, for example,
117 the `--dynamic_database_connect_string` argument would be
118 modifiable at runtime when using Zookeeper based configurations.
119 Flags that do not contain the string "dynamic" will not change.
120 And nothing is dynamic unless we're reading configuration from
123 For more information about Zookeeper, see https://zookeeper.apache.org/.
132 from typing import Any, Dict, List, Optional
134 # This module is commonly used by others in here and should avoid
135 # taking any unnecessary dependencies back on them.
137 # Make a copy of the original program arguments immediately upon module load.
138 PROGRAM_NAME: str = os.path.basename(sys.argv[0])
139 ORIG_ARGV: List[str] = sys.argv.copy()
142 class OptionalRawFormatter(argparse.HelpFormatter):
143 """This formatter has the same bahavior as the normal argparse
144 text formatter except when the help text of an argument begins
145 with "RAW|". In that case, the line breaks are preserved and the
146 text is not wrapped. It is enabled automatically if you use
147 :py:mod:`pyutils.config`.
149 Use this by prepending "RAW|" in your help message to disable
150 word wrapping and indicate that the help message is already
151 formatted and should be preserved. Here's an example usage::
157 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
159 help='''RAW|Our mode of operation. One of:
161 PLAY = play wordle with me! Pick a random solution or
162 specify a solution with --template.
164 CHEAT = given a --template and, optionally, --letters_in_word
165 and/or --letters_to_avoid, return the best guess word;
167 AUTOPLAY = given a complete word in --template, guess it step
168 by step showing work;
170 SELFTEST = autoplay every possible solution keeping track of
171 wins/losses and average number of guesses;
173 PRECOMPUTE = populate hash table with optimal guesses.
179 def _split_lines(self, text, width):
180 if text.startswith('RAW|'):
181 return text[4:].splitlines()
182 return argparse.HelpFormatter._split_lines(self, text, width)
185 # A global argparser that we will collect arguments in. Each module (including
186 # us) will add arguments to a separate argument group.
187 ARGS = argparse.ArgumentParser(
189 formatter_class=OptionalRawFormatter,
190 fromfile_prefix_chars="@",
191 epilog=f'{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.',
192 # I don't fully understand why but when loaded by sphinx sometimes
193 # the same module is loaded many times causing any arguments it
194 # registers via module-level code to be redefined. Work around
195 # this iff the program is 'sphinx-build'
196 conflict_handler='resolve' if PROGRAM_NAME == 'sphinx-build' else 'error',
199 # Arguments specific to config.py. Other users should get their own group by
200 # invoking config.add_commandline_args.
201 GROUP = ARGS.add_argument_group(
202 f'Global Config ({__file__})',
203 'Args that control the global config itself; how meta!',
209 help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to those passed via the commandline. Note that if the given path begins with "zk:" then it is interpreted as a zookeeper path instead of as a filesystem path. When loading config from zookeeper, any argument with the string "dynamic" in the name (e.g. --module_dynamic_url) may be modified at runtime by changes made to zookeeper (using --config_savefile=zk:path). You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
215 help='Display the global configuration (possibly derived from multiple sources) on STDERR at program startup time.',
222 help='Populate a config file (compatible with --config_loadfile) and write it at the given path for later [re]use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a local filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:<path>) will see the update. Those that also enabled --config_allow_dynamic_updates will change the value of any flags with the string "dynamic" in their names (e.g. --my_dynamic_flag or --dynamic_database_connect_string).',
225 '--config_allow_dynamic_updates',
228 help='If enabled, allow config flags with the string "dynamic" in their names to change at runtime when a new Zookeeper based configuration is created. See the --config_savefile help message for more information about this option.',
231 '--config_rejects_unrecognized_arguments',
234 help='If present, config will raise an exception if it doesn\'t recognize an argument. The default behavior is to ignore unknown arguments so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.',
237 '--config_exit_after_parse',
240 help='If present, halt the program after parsing config. Useful, for example, to write a --config_savefile and then terminate.',
248 Do not instantiate this class directly; it is meant to be a
249 global singleton called `pyutils.config.CONFIG`. Instead, use
250 :py:meth:`pyutils.config.add_commandline_args` to get an
251 `ArgumentGroup` and add your arguments to it. Then call
252 :py:meth:`pyutils.config.parse` to parse global configuration
253 from your main program entry point.
255 Everything in the config module used to be module-level functions and
256 variables but it made the code ugly and harder to maintain. Now, this
257 class does the heavy lifting. We still rely on some globals, though:
259 - ARGS and GROUP to interface with argparse
260 - PROGRAM_NAME stores argv[0] close to program invocation
261 - ORIG_ARGV stores the original argv list close to program invocation
262 - CONFIG and config: hold the (singleton) instance of this class.
266 # Has our parse() method been invoked yet?
267 self.config_parse_called = False
269 # A configuration dictionary that will contain parsed
270 # arguments. This is the data that is most interesting to our
271 # callers as it will hold the configuration result.
272 self.config: Dict[str, Any] = {}
274 # Defer logging messages until later when logging has been
276 self.saved_messages: List[str] = []
278 # A zookeeper client that is lazily created so as to not incur
279 # the latency of connecting to zookeeper for programs that are
280 # not reading or writing their config data into zookeeper.
281 self.zk: Optional[Any] = None
283 # Per known zk file, what is the max version we have seen?
284 self.max_version: Dict[str, int] = {}
286 def __getitem__(self, key: str) -> Optional[Any]:
287 """If someone uses []'s on us, pass it onto self.config."""
288 return self.config.get(key, None)
290 def __setitem__(self, key: str, value: Any) -> None:
291 self.config[key] = value
293 def __contains__(self, key: str) -> bool:
294 return key in self.config
296 def get(self, key: str, default: Any = None) -> Optional[Any]:
297 return self.config.get(key, default)
300 def add_commandline_args(
301 title: str, description: str = ""
302 ) -> argparse._ArgumentGroup:
303 """Create a new context for arguments and return an ArgumentGroup
304 to the caller for module-level population.
307 title: A title for your module's commandline arguments group.
308 description: A helpful description of your module.
311 An argparse._ArgumentGroup to be populated by the caller.
313 return ARGS.add_argument_group(title, description)
316 def overwrite_argparse_epilog(msg: str) -> None:
317 """Allows your code to override the default epilog created by
321 msg: The epilog message to substitute for the default.
326 def is_flag_already_in_argv(var: str) -> bool:
329 True if a particular flag is passed on the commandline
333 var: The flag to search for.
341 def print_usage() -> None:
342 """Prints the normal help usage message out."""
349 program usage help text as a string.
351 return ARGS.format_usage()
354 def _reorder_arg_action_groups_before_help(entry_module: Optional[str]):
355 """Internal. Used to reorder the arguments before dumping out a
356 generated help string such that the main program's arguments come
360 reordered_action_groups = []
361 for grp in ARGS._action_groups:
362 if entry_module is not None and entry_module in grp.title: # type: ignore
363 reordered_action_groups.append(grp)
364 elif PROGRAM_NAME in GROUP.title: # type: ignore
365 reordered_action_groups.append(grp)
367 reordered_action_groups.insert(0, grp)
368 return reordered_action_groups
371 def _to_bool(in_str: str) -> bool:
374 in_str: the string to convert to boolean
377 A boolean equivalent of the original string based on its contents.
378 All conversion is case insensitive. A positive boolean (True) is
379 returned if the string value is any of the following:
388 Otherwise False is returned.
408 return in_str.lower() in ("true", "1", "yes", "y", "t", "on")
410 def _process_dynamic_args(self, event):
411 """Invoked as a callback when a zk-based config changed."""
415 logger = logging.getLogger(__name__)
417 contents, meta = self.zk.get(event.path, watch=self._process_dynamic_args)
418 logger.debug('Update for %s at version=%d.', event.path, meta.version)
420 'Max known version for %s is %d.',
422 self.max_version.get(event.path, 0),
424 except Exception as e:
425 raise Exception('Error reading data from zookeeper') from e
427 # Make sure we process changes in order.
428 if meta.version > self.max_version.get(event.path, 0):
429 self.max_version[event.path] = meta.version
430 contents = contents.decode()
432 for arg in contents.split():
434 # Our rule is that arguments must contain the word
435 # 'dynamic' if we are going to allow them to change at
436 # runtime as a signal that the programmer is expecting
438 if 'dynamic' in arg and config.config['config_allow_dynamic_updates']:
439 temp_argv.append(arg)
440 logger.info("Updating %s from zookeeper async config change.", arg)
442 if len(temp_argv) > 0:
445 known, _ = ARGS.parse_known_args()
447 self.config.update(vars(known))
449 def _read_config_from_zookeeper(self, zkpath: str) -> Optional[str]:
450 from pyutils import zookeeper
452 if not zkpath.startswith('/config/'):
453 zkpath = '/config/' + zkpath
454 zkpath = re.sub(r'//+', '/', zkpath)
458 self.zk = zookeeper.get_started_zk_client()
459 if not self.zk.exists(zkpath):
462 # Note: we're putting a watch on this config file. Our
463 # _process_dynamic_args routine will be called to reparse
464 # args when/if they change.
465 contents, meta = self.zk.get(zkpath, watch=self._process_dynamic_args)
466 contents = contents.decode()
467 self.saved_messages.append(
468 f'Setting {zkpath}\'s max_version to {meta.version}'
470 self.max_version[zkpath] = meta.version
471 self.saved_messages.append(f'Read config from zookeeper {zkpath}.')
473 except Exception as e:
474 self.saved_messages.append(
475 f'Failed to read {zkpath} from zookeeper: exception {e}'
479 def _read_config_from_disk(self, filepath: str) -> Optional[str]:
480 if not os.path.exists(filepath):
482 with open(filepath, 'r') as rf:
483 self.saved_messages.append(f'Read config from disk file {filepath}')
486 def _augment_sys_argv_from_loadfile(self):
487 """Internal. Augment with arguments persisted in a saved file."""
489 # Check for --config_loadfile in the args manually; argparse isn't
490 # invoked yet and can't be yet.
492 saw_other_args = False
493 grab_next_arg = False
494 for arg in sys.argv[1:]:
495 if 'config_loadfile' in arg:
496 pieces = arg.split('=')
504 saw_other_args = True
506 if not loadfile or len(loadfile) == 0:
509 # Get contents from wherever.
511 if loadfile[:3] == 'zk:':
512 contents = self._read_config_from_zookeeper(loadfile[3:])
514 contents = self._read_config_from_disk(loadfile)
518 msg = f'Augmenting commandline arguments with those from {loadfile}.'
520 msg = f'Reading commandline arguments from {loadfile}.'
521 print(msg, file=sys.stderr)
522 self.saved_messages.append(msg)
524 msg = f'Failed to read/parse contents from {loadfile}'
525 print(msg, file=sys.stderr)
526 self.saved_messages.append(msg)
529 # Augment args with new ones.
532 for arg in contents.split('\n')
533 if 'config_savefile' not in arg
537 def dump_config(self):
538 """Print the current config to stdout."""
539 print("Global Configuration:", file=sys.stderr)
540 pprint.pprint(self.config, stream=sys.stderr)
543 def _write_config_to_disk(self, data: str, filepath: str) -> None:
544 with open(filepath, 'w') as wf:
547 def _write_config_to_zookeeper(self, data: str, zkpath: str) -> None:
548 if not zkpath.startswith('/config/'):
549 zkpath = '/config/' + zkpath
550 zkpath = re.sub(r'//+', '/', zkpath)
553 from pyutils import zookeeper
555 self.zk = zookeeper.get_started_zk_client()
556 encoded_data = data.encode()
557 if len(encoded_data) > 1024 * 1024:
559 f'Saved args are too large ({len(encoded_data)} bytes exceeds zk limit)'
561 if not self.zk.exists(zkpath):
562 self.zk.create(zkpath, encoded_data)
563 self.saved_messages.append(
564 f'Just created {zkpath}; setting its max_version to 0'
566 self.max_version[zkpath] = 0
568 meta = self.zk.set(zkpath, encoded_data)
569 self.saved_messages.append(
570 f'Setting {zkpath}\'s max_version to {meta.version}'
572 self.max_version[zkpath] = meta.version
573 except Exception as e:
574 raise Exception(f'Failed to create zookeeper path {zkpath}') from e
575 self.saved_messages.append(f'Saved config to zookeeper in {zkpath}')
577 def parse(self, entry_module: Optional[str]) -> Dict[str, Any]:
578 """Main program should invoke this early in main(). Note that the
579 :py:meth:`pyutils.bootstrap.initialize` wrapper takes care of this automatically.
580 This should only be called once per program invocation.
583 entry_module: Optional string to ensure we understand which module
584 contains the program entry point. Determined heuristically if not
588 A dict containing the parsed program configuration. Note that this can
589 be safely ignored since it is also saved in `config.config` and may
590 be used directly using that identifier.
592 if self.config_parse_called:
595 # If we're about to do the usage message dump, put the main
596 # module's argument group last in the list (if possible) so that
597 # when the user passes -h or --help, it will be visible on the
598 # screen w/o scrolling. This just makes for a nicer --help screen.
600 if arg in ('--help', '-h'):
601 if entry_module is not None:
602 entry_module = os.path.basename(entry_module)
603 ARGS._action_groups = Config._reorder_arg_action_groups_before_help(
607 # Look for --config_loadfile argument and, if found, read/parse
608 # Note that this works by jamming values onto sys.argv; kinda ugly.
609 self._augment_sys_argv_from_loadfile()
611 # Parse (possibly augmented, possibly completely overwritten)
612 # commandline args with argparse normally and populate config.
613 known, unknown = ARGS.parse_known_args()
614 self.config.update(vars(known))
616 # Reconstruct the sys.argv with unrecognized flags for the
617 # benefit of future argument parsers. For example,
618 # unittest_main in python has some of its own flags. If we
619 # didn't recognize it, maybe someone else will. Or, if
620 # --config_rejects_unrecognized_arguments was passed, die
621 # if we have unknown arguments.
623 if config['config_rejects_unrecognized_arguments']:
625 f'Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting.'
627 self.saved_messages.append(
628 f'Config encountered unrecognized commandline arguments: {unknown}'
630 sys.argv = sys.argv[:1] + unknown
632 # Check for savefile and populate it if requested.
633 savefile = config['config_savefile']
634 if savefile and len(savefile) > 0:
635 data = '\n'.join(ORIG_ARGV[1:])
636 if savefile[:3] == 'zk:':
637 self._write_config_to_zookeeper(savefile[3:], data)
639 self._write_config_to_disk(savefile, data)
641 # Also dump the config on stderr if requested.
642 if config['config_dump']:
645 # Finally, maybe exit now if the user passed
646 # --config_exit_after_parse indicating they want to just
647 # update a config file and halt.
648 self.config_parse_called = True
649 if config['config_exit_after_parse']:
650 print("Exiting because of --config_exit_after_parse.")
656 def has_been_parsed(self) -> bool:
657 """Returns True iff the global config has already been parsed"""
658 return self.config_parse_called
660 def late_logging(self):
661 """Log messages saved earlier now that logging has been initialized."""
662 logger = logging.getLogger(__name__)
663 logger.debug('Original commandline was: %s', ORIG_ARGV)
664 for _ in self.saved_messages:
668 # A global singleton instance of the Config class.
671 # A lot of client code uses config.config['whatever'] to lookup
672 # configuration so to preserve this we make this, config.config, with
673 # a __getitem__ method on it.
676 # Config didn't use to be a class; it was a mess of module-level
677 # functions and data. The functions below preserve the old interface
678 # so that existing clients do not need to be changed. As you can see,
679 # they mostly just thunk into the config class.
682 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
683 """Create a new context for arguments and return a handle. An alias
684 for config.config.add_commandline_args.
687 title: A title for your module's commandline arguments group.
688 description: A helpful description of your module.
691 An argparse._ArgumentGroup to be populated by the caller.
693 return CONFIG.add_commandline_args(title, description)
696 def parse(entry_module: Optional[str]) -> Dict[str, Any]:
697 """Main program should call this early in main(). Note that the
698 :code:`bootstrap.initialize` wrapper takes care of this automatically.
699 This should only be called once per program invocation. Subsequent
700 calls do not reparse the configuration settings but rather just
701 return the current state.
703 return CONFIG.parse(entry_module)
706 def error(message: str, exit_code: int = 1) -> None:
708 Convenience method for indicating a configuration error.
710 logging.error(message)
711 print(message, file=sys.stderr)
715 def has_been_parsed() -> bool:
716 """Returns True iff the global config has already been parsed"""
717 return CONFIG.has_been_parsed()
720 def late_logging() -> None:
721 """Log messages saved earlier now that logging has been initialized."""
722 CONFIG.late_logging()
725 def dump_config() -> None:
726 """Print the current config to stdout."""
730 def overwrite_argparse_epilog(msg: str) -> None:
731 """Allows your code to override the default epilog created by
735 msg: The epilog message to substitute for the default.
737 Config.overwrite_argparse_epilog(msg)
740 def is_flag_already_in_argv(var: str) -> bool:
741 """Returns true if a particular flag is passed on the commandline
745 var: The flag to search for.
747 return Config.is_flag_already_in_argv(var)
750 def print_usage() -> None:
751 """Prints the normal help usage message out."""
758 program usage help text as a string.
760 return Config.usage()