3 # © Copyright 2021-2022, Scott Gasch
5 """Global program configuration driven by commandline arguments and,
6 optionally, from saved (local or Zookeeper) configuration files... with
7 optional support for dynamic arguments (i.e. that can change during runtime).
9 Let's start with an example of how to use :py:mod:`pyutils.config`. It's
10 pretty easy for normal commandline arguments because it uses :py:mod:`argparse`:
14 from pyutils import config
16 # Call add_commandline_args to get an argparse.ArgumentParser
17 # for file.py. Each file uses a separate ArgumentParser
18 # chained off the main namespace.
19 parser = config.add_commandline_args(
21 "Args related to module doing the thing.",
24 # Then simply add argparse-style arguments to it, as usual.
26 "--module_do_the_thing",
29 help="Should the module do the thing?"
34 from pyutils import config
36 # main.py may have some arguments of its own, so add them.
37 parser = config.add_commandline_args(
39 "A program that does the thing.",
45 help="Should we really do the thing?"
49 config.parse() # Then remember to call config.parse() early on.
51 If you set this up and remember to invoke :py:meth:`pyutils.config.parse`,
52 all commandline arguments will play nicely together across all modules / files
53 in your program automatically. Argparse help messages will group flags by
56 If you use :py:meth:`pyutils.bootstrap.initialize`, a decorator that can
57 optionally wrap your program's entry point, it will remember to call
58 :py:meth:`pyutils.config.parse` for you so you can omit the last part.
59 That looks like this::
61 from pyutils import bootstrap
67 if __name__ == '__main__':
70 Either way, you'll get an aggregated usage message along with flags broken
71 down per file in help::
75 [--module_do_the_thing MODULE_DO_THE_THING]
79 Args related to module doing the thing.
81 --module_do_the_thing MODULE_DO_THE_THING
82 Should the module do the thing?
85 A program that does the thing
88 Should we really do the thing?
90 Once :py:meth:`pyutils.config.parse` has been called (either automatically
91 by :py:mod:`puytils.bootstrap` or manually, the program configuration
92 state is ready in a dict-like object called `config.config`. For example,
93 to check the state of the `--dry_run` flag::
95 if not config.config['dry_run']:
98 Using :py:mod:`pyutils.config` allows you to "save" and "load" whole
99 sets of commandline arguments using the `--config_savefile` and the
100 `--config_loadfile` arguments. The former saves all arguments (other than
101 itself) to an ascii file whose path you provide. The latter reads all
102 arguments from an ascii file whose path you provide.
104 Saving and loading sets of arguments can make complex operations easier
105 to set up. They also allows for dynamic arguments.
107 If you use Apache Zookeeper, you can prefix paths to
108 `--config_savefile` and `--config_loadfile` with the string "zk:"
109 to cause the path to be interpreted as a Zookeeper path rather
110 than one on the local filesystem. When loading arguments from
111 Zookeeker, the :py:mod:`pyutils.config` code registers a listener
112 to be notified on state change (e.g. when some other instance
113 overwrites your Zookeeper based configuration). Listeners then
114 dynamically update the value of any flag in the `config.config`
115 dict whose name contains the string "dynamic". So, for example,
116 the `--dynamic_database_connect_string` argument would be
117 modifiable at runtime when using Zookeeper based configurations.
118 Flags that do not contain the string "dynamic" will not change.
119 And nothing is dynamic unless we're reading configuration from
122 For more information about Zookeeper, see https://zookeeper.apache.org/.
131 from typing import Any, Dict, List, Optional
133 # This module is commonly used by others in here and should avoid
134 # taking any unnecessary dependencies back on them.
136 # Make a copy of the original program arguments immediately upon module load.
137 PROGRAM_NAME: str = os.path.basename(sys.argv[0])
138 ORIG_ARGV: List[str] = sys.argv.copy()
141 class OptionalRawFormatter(argparse.HelpFormatter):
142 """This formatter has the same bahavior as the normal argparse text
143 formatter except when the help text of an argument begins with
144 "RAW|". In that case, the line breaks are preserved and the text
147 Use this, for example, when you need the helptext of an argument
148 to have its spacing preserved exactly, e.g.::
154 choices=['CHEAT', 'AUTOPLAY', 'SELFTEST', 'PRECOMPUTE', 'PLAY'],
156 help='''RAW|Our mode of operation. One of:
158 PLAY = play wordle with me! Pick a random solution or
159 specify a solution with --template.
161 CHEAT = given a --template and, optionally, --letters_in_word
162 and/or --letters_to_avoid, return the best guess word;
164 AUTOPLAY = given a complete word in --template, guess it step
165 by step showing work;
167 SELFTEST = autoplay every possible solution keeping track of
168 wins/losses and average number of guesses;
170 PRECOMPUTE = populate hash table with optimal guesses.
175 def _split_lines(self, text, width):
176 if text.startswith('RAW|'):
177 return text[4:].splitlines()
178 return argparse.HelpFormatter._split_lines(self, text, width)
181 # A global argparser that we will collect arguments in. Each module (including
182 # us) will add arguments to a separate argument group.
183 ARGS = argparse.ArgumentParser(
185 formatter_class=OptionalRawFormatter,
186 fromfile_prefix_chars="@",
187 epilog=f'{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.',
188 # I don't fully understand why but when loaded by sphinx sometimes
189 # the same module is loaded many times causing any arguments it
190 # registers via module-level code to be redefined. Work around
191 # this iff the program is 'sphinx-build'
192 conflict_handler='resolve' if PROGRAM_NAME == 'sphinx-build' else 'error',
195 # Arguments specific to config.py. Other users should get their own group by
196 # invoking config.add_commandline_args.
197 GROUP = ARGS.add_argument_group(
198 f'Global Config ({__file__})',
199 'Args that control the global config itself; how meta!',
205 help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to those passed via the commandline. Note that if the given path begins with "zk:" then it is interpreted as a zookeeper path instead of as a filesystem path. When loading config from zookeeper, any argument with the string "dynamic" in the name (e.g. --module_dynamic_url) may be modified at runtime by changes made to zookeeper (using --config_savefile=zk:path). You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.',
211 help='Display the global configuration (possibly derived from multiple sources) on STDERR at program startup time.',
218 help='Populate a config file (compatible with --config_loadfile) and write it at the given path for later [re]use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a local filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:<path>) will see the update. Those that also enabled --config_allow_dynamic_updates will change the value of any flags with the string "dynamic" in their names (e.g. --my_dynamic_flag or --dynamic_database_connect_string).',
221 '--config_allow_dynamic_updates',
223 help='If enabled, allow config flags with the string "dynamic" in their names to change at runtime when a new Zookeeper based configuration is created. See the --config_savefile help message for more information about this option.',
226 '--config_rejects_unrecognized_arguments',
229 help='If present, config will raise an exception if it doesn\'t recognize an argument. The default behavior is to ignore unknown arguments so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.',
232 '--config_exit_after_parse',
235 help='If present, halt the program after parsing config. Useful, for example, to write a --config_savefile and then terminate.',
243 Do not instantiate this class directly; it is meant to be a
244 global singleton called `pyutils.config.CONFIG`. Instead, use
245 :py:meth:`pyutils.config.add_commandline_args` to get an
246 `ArgumentGroup` and add your arguments to it. Then call
247 :py:meth:`pyutils.config.parse` to parse global configuration
248 from your main program entry point.
250 Everything in the config module used to be module-level functions and
251 variables but it made the code ugly and harder to maintain. Now, this
252 class does the heavy lifting. We still rely on some globals, though:
254 - ARGS and GROUP to interface with argparse
255 - PROGRAM_NAME stores argv[0] close to program invocation
256 - ORIG_ARGV stores the original argv list close to program invocation
257 - CONFIG and config: hold the (singleton) instance of this class.
261 # Has our parse() method been invoked yet?
262 self.config_parse_called = False
264 # A configuration dictionary that will contain parsed
265 # arguments. This is the data that is most interesting to our
266 # callers as it will hold the configuration result.
267 self.config: Dict[str, Any] = {}
269 # Defer logging messages until later when logging has been
271 self.saved_messages: List[str] = []
273 # A zookeeper client that is lazily created so as to not incur
274 # the latency of connecting to zookeeper for programs that are
275 # not reading or writing their config data into zookeeper.
276 self.zk: Optional[Any] = None
278 # Per known zk file, what is the max version we have seen?
279 self.max_version: Dict[str, int] = {}
281 def __getitem__(self, key: str) -> Optional[Any]:
282 """If someone uses []'s on us, pass it onto self.config."""
283 return self.config.get(key, None)
285 def __setitem__(self, key: str, value: Any) -> None:
286 self.config[key] = value
288 def __contains__(self, key: str) -> bool:
289 return key in self.config
291 def get(self, key: str, default: Any = None) -> Optional[Any]:
292 return self.config.get(key, default)
295 def add_commandline_args(
296 title: str, description: str = ""
297 ) -> argparse._ArgumentGroup:
298 """Create a new context for arguments and return an ArgumentGroup
299 to the caller for module-level population.
302 title: A title for your module's commandline arguments group.
303 description: A helpful description of your module.
306 An argparse._ArgumentGroup to be populated by the caller.
308 return ARGS.add_argument_group(title, description)
311 def overwrite_argparse_epilog(msg: str) -> None:
312 """Allows your code to override the default epilog created by
316 msg: The epilog message to substitute for the default.
321 def is_flag_already_in_argv(var: str) -> bool:
324 True if a particular flag is passed on the commandline
328 var: The flag to search for.
336 def print_usage() -> None:
337 """Prints the normal help usage message out."""
344 program usage help text as a string.
346 return ARGS.format_usage()
349 def _reorder_arg_action_groups_before_help(entry_module: Optional[str]):
350 """Internal. Used to reorder the arguments before dumping out a
351 generated help string such that the main program's arguments come
355 reordered_action_groups = []
356 for grp in ARGS._action_groups:
357 if entry_module is not None and entry_module in grp.title: # type: ignore
358 reordered_action_groups.append(grp)
359 elif PROGRAM_NAME in GROUP.title: # type: ignore
360 reordered_action_groups.append(grp)
362 reordered_action_groups.insert(0, grp)
363 return reordered_action_groups
366 def _to_bool(in_str: str) -> bool:
369 in_str: the string to convert to boolean
372 A boolean equivalent of the original string based on its contents.
373 All conversion is case insensitive. A positive boolean (True) is
374 returned if the string value is any of the following:
383 Otherwise False is returned.
403 return in_str.lower() in ("true", "1", "yes", "y", "t", "on")
405 def _process_dynamic_args(self, event):
406 """Invoked as a callback when a zk-based config changed."""
410 logger = logging.getLogger(__name__)
412 contents, meta = self.zk.get(event.path, watch=self._process_dynamic_args)
413 logger.debug('Update for %s at version=%d.', event.path, meta.version)
415 'Max known version for %s is %d.',
417 self.max_version.get(event.path, 0),
419 except Exception as e:
420 raise Exception('Error reading data from zookeeper') from e
422 # Make sure we process changes in order.
423 if meta.version > self.max_version.get(event.path, 0):
424 self.max_version[event.path] = meta.version
425 contents = contents.decode()
427 for arg in contents.split():
429 # Our rule is that arguments must contain the word
430 # 'dynamic' if we are going to allow them to change at
431 # runtime as a signal that the programmer is expecting
433 if 'dynamic' in arg and config.config['config_allow_dynamic_updates']:
434 temp_argv.append(arg)
435 logger.info("Updating %s from zookeeper async config change.", arg)
437 if len(temp_argv) > 0:
440 known, _ = ARGS.parse_known_args()
442 self.config.update(vars(known))
444 def _read_config_from_zookeeper(self, zkpath: str) -> Optional[str]:
445 from pyutils import zookeeper
447 if not zkpath.startswith('/config/'):
448 zkpath = '/config/' + zkpath
449 zkpath = re.sub(r'//+', '/', zkpath)
453 self.zk = zookeeper.get_started_zk_client()
454 if not self.zk.exists(zkpath):
457 # Note: we're putting a watch on this config file. Our
458 # _process_dynamic_args routine will be called to reparse
459 # args when/if they change.
460 contents, meta = self.zk.get(zkpath, watch=self._process_dynamic_args)
461 contents = contents.decode()
462 self.saved_messages.append(
463 f'Setting {zkpath}\'s max_version to {meta.version}'
465 self.max_version[zkpath] = meta.version
466 self.saved_messages.append(f'Read config from zookeeper {zkpath}.')
468 except Exception as e:
469 self.saved_messages.append(
470 f'Failed to read {zkpath} from zookeeper: exception {e}'
474 def _read_config_from_disk(self, filepath: str) -> Optional[str]:
475 if not os.path.exists(filepath):
477 with open(filepath, 'r') as rf:
478 self.saved_messages.append(f'Read config from disk file {filepath}')
481 def _augment_sys_argv_from_loadfile(self):
482 """Internal. Augment with arguments persisted in a saved file."""
484 # Check for --config_loadfile in the args manually; argparse isn't
485 # invoked yet and can't be yet.
487 saw_other_args = False
488 grab_next_arg = False
489 for arg in sys.argv[1:]:
490 if 'config_loadfile' in arg:
491 pieces = arg.split('=')
499 saw_other_args = True
501 if not loadfile or len(loadfile) == 0:
504 # Get contents from wherever.
506 if loadfile[:3] == 'zk:':
507 contents = self._read_config_from_zookeeper(loadfile[3:])
509 contents = self._read_config_from_disk(loadfile)
513 msg = f'Augmenting commandline arguments with those from {loadfile}.'
515 msg = f'Reading commandline arguments from {loadfile}.'
516 print(msg, file=sys.stderr)
517 self.saved_messages.append(msg)
519 msg = f'Failed to read/parse contents from {loadfile}'
520 print(msg, file=sys.stderr)
521 self.saved_messages.append(msg)
524 # Augment args with new ones.
527 for arg in contents.split('\n')
528 if 'config_savefile' not in arg
532 def dump_config(self):
533 """Print the current config to stdout."""
534 print("Global Configuration:", file=sys.stderr)
535 pprint.pprint(self.config, stream=sys.stderr)
538 def _write_config_to_disk(self, data: str, filepath: str) -> None:
539 with open(filepath, 'w') as wf:
542 def _write_config_to_zookeeper(self, data: str, zkpath: str) -> None:
543 if not zkpath.startswith('/config/'):
544 zkpath = '/config/' + zkpath
545 zkpath = re.sub(r'//+', '/', zkpath)
548 from pyutils import zookeeper
550 self.zk = zookeeper.get_started_zk_client()
551 encoded_data = data.encode()
552 if len(encoded_data) > 1024 * 1024:
554 f'Saved args are too large ({len(encoded_data)} bytes exceeds zk limit)'
556 if not self.zk.exists(zkpath):
557 self.zk.create(zkpath, encoded_data)
558 self.saved_messages.append(
559 f'Just created {zkpath}; setting its max_version to 0'
561 self.max_version[zkpath] = 0
563 meta = self.zk.set(zkpath, encoded_data)
564 self.saved_messages.append(
565 f'Setting {zkpath}\'s max_version to {meta.version}'
567 self.max_version[zkpath] = meta.version
568 except Exception as e:
569 raise Exception(f'Failed to create zookeeper path {zkpath}') from e
570 self.saved_messages.append(f'Saved config to zookeeper in {zkpath}')
572 def parse(self, entry_module: Optional[str]) -> Dict[str, Any]:
573 """Main program should invoke this early in main(). Note that the
574 :py:meth:`pyutils.bootstrap.initialize` wrapper takes care of this automatically.
575 This should only be called once per program invocation.
578 entry_module: Optional string to ensure we understand which module
579 contains the program entry point. Determined heuristically if not
583 A dict containing the parsed program configuration. Note that this can
584 be safely ignored since it is also saved in `config.config` and may
585 be used directly using that identifier.
587 if self.config_parse_called:
590 # If we're about to do the usage message dump, put the main
591 # module's argument group last in the list (if possible) so that
592 # when the user passes -h or --help, it will be visible on the
593 # screen w/o scrolling. This just makes for a nicer --help screen.
595 if arg in ('--help', '-h'):
596 if entry_module is not None:
597 entry_module = os.path.basename(entry_module)
598 ARGS._action_groups = Config._reorder_arg_action_groups_before_help(
602 # Look for --config_loadfile argument and, if found, read/parse
603 # Note that this works by jamming values onto sys.argv; kinda ugly.
604 self._augment_sys_argv_from_loadfile()
606 # Parse (possibly augmented, possibly completely overwritten)
607 # commandline args with argparse normally and populate config.
608 known, unknown = ARGS.parse_known_args()
609 self.config.update(vars(known))
611 # Reconstruct the sys.argv with unrecognized flags for the
612 # benefit of future argument parsers. For example,
613 # unittest_main in python has some of its own flags. If we
614 # didn't recognize it, maybe someone else will. Or, if
615 # --config_rejects_unrecognized_arguments was passed, die
616 # if we have unknown arguments.
618 if config['config_rejects_unrecognized_arguments']:
620 f'Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting.'
622 self.saved_messages.append(
623 f'Config encountered unrecognized commandline arguments: {unknown}'
625 sys.argv = sys.argv[:1] + unknown
627 # Check for savefile and populate it if requested.
628 savefile = config['config_savefile']
629 if savefile and len(savefile) > 0:
630 data = '\n'.join(ORIG_ARGV[1:])
631 if savefile[:3] == 'zk:':
632 self._write_config_to_zookeeper(savefile[3:], data)
634 self._write_config_to_disk(savefile, data)
636 # Also dump the config on stderr if requested.
637 if config['config_dump']:
640 # Finally, maybe exit now if the user passed
641 # --config_exit_after_parse indicating they want to just
642 # update a config file and halt.
643 self.config_parse_called = True
644 if config['config_exit_after_parse']:
645 print("Exiting because of --config_exit_after_parse.")
651 def has_been_parsed(self) -> bool:
652 """Returns True iff the global config has already been parsed"""
653 return self.config_parse_called
655 def late_logging(self):
656 """Log messages saved earlier now that logging has been initialized."""
657 logger = logging.getLogger(__name__)
658 logger.debug('Original commandline was: %s', ORIG_ARGV)
659 for _ in self.saved_messages:
663 # A global singleton instance of the Config class.
666 # A lot of client code uses config.config['whatever'] to lookup
667 # configuration so to preserve this we make this, config.config, with
668 # a __getitem__ method on it.
671 # Config didn't use to be a class; it was a mess of module-level
672 # functions and data. The functions below preserve the old interface
673 # so that existing clients do not need to be changed. As you can see,
674 # they mostly just thunk into the config class.
677 def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup:
678 """Create a new context for arguments and return a handle. An alias
679 for config.config.add_commandline_args.
682 title: A title for your module's commandline arguments group.
683 description: A helpful description of your module.
686 An argparse._ArgumentGroup to be populated by the caller.
688 return CONFIG.add_commandline_args(title, description)
691 def parse(entry_module: Optional[str]) -> Dict[str, Any]:
692 """Main program should call this early in main(). Note that the
693 :code:`bootstrap.initialize` wrapper takes care of this automatically.
694 This should only be called once per program invocation. Subsequent
695 calls do not reparse the configuration settings but rather just
696 return the current state.
698 return CONFIG.parse(entry_module)
701 def has_been_parsed() -> bool:
702 """Returns True iff the global config has already been parsed"""
703 return CONFIG.has_been_parsed()
706 def late_logging() -> None:
707 """Log messages saved earlier now that logging has been initialized."""
708 CONFIG.late_logging()
711 def dump_config() -> None:
712 """Print the current config to stdout."""
716 def overwrite_argparse_epilog(msg: str) -> None:
717 """Allows your code to override the default epilog created by
721 msg: The epilog message to substitute for the default.
723 Config.overwrite_argparse_epilog(msg)
726 def is_flag_already_in_argv(var: str) -> bool:
727 """Returns true if a particular flag is passed on the commandline
731 var: The flag to search for.
733 return Config.is_flag_already_in_argv(var)
736 def print_usage() -> None:
737 """Prints the normal help usage message out."""
744 program usage help text as a string.
746 return Config.usage()