From: Scott Gasch Date: Thu, 18 Aug 2022 00:23:44 +0000 (-0700) Subject: Cleanup config in preparation for zookeeper-based dynamic configs. X-Git-Url: https://wannabe.guru.org/gitweb/?a=commitdiff_plain;h=2fe7a541ab5bc08fc9d88ebde74b6e88951c56e6;p=python_utils.git Cleanup config in preparation for zookeeper-based dynamic configs. --- diff --git a/config.py b/config.py index 81bd7d1..935bb79 100644 --- a/config.py +++ b/config.py @@ -78,7 +78,6 @@ Usage: if not config.config['dry_run']: module.do_the_thing() - """ import argparse @@ -96,18 +95,10 @@ import scott_secrets # This module is commonly used by others in here and should avoid # taking any unnecessary dependencies back on them. -# Defer logging messages until later when logging has been initialized. -SAVED_MESSAGES: List[str] = [] - # Make a copy of the original program arguments. PROGRAM_NAME: str = os.path.basename(sys.argv[0]) ORIG_ARGV: List[str] = sys.argv.copy() -# A zookeeper client that is lazily created so as to not incur the -# latency of connecting to zookeeper for programs that are not reading -# or writing their config data into zookeeper. -ZK: Optional[KazooClient] = None - class OptionalRawFormatter(argparse.HelpFormatter): """This formatter has the same bahavior as the normal argparse text @@ -141,7 +132,6 @@ class OptionalRawFormatter(argparse.HelpFormatter): PRECOMPUTE = populate hash table with optimal guesses. ''', ) - """ def _split_lines(self, text, width): @@ -162,352 +152,466 @@ ARGS = argparse.ArgumentParser( # this iff the program is 'sphinx-build' conflict_handler='resolve' if PROGRAM_NAME == 'sphinx-build' else 'error', ) - -# Keep track of if we've been called and prevent being called more -# than once. -CONFIG_PARSE_CALLED = False - - -# A global configuration dictionary that will contain parsed arguments. -# It is also this variable that modules use to access parsed arguments. -# This is the data that is most interesting to our callers; it will hold -# the configuration result. -config: Dict[str, Any] = {} - -# It would be really nice if this shit worked from interactive python - - -def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup: - """Create a new context for arguments and return a handle. - - Args: - title: A title for your module's commandline arguments group. - description: A helpful description of your module. - - Returns: - An argparse._ArgumentGroup to be populated by the caller. - """ - return ARGS.add_argument_group(title, description) - - -group = add_commandline_args( +GROUP = ARGS.add_argument_group( f'Global Config ({__file__})', 'Args that control the global config itself; how meta!', ) -group.add_argument( +GROUP.add_argument( '--config_loadfile', metavar='FILENAME', default=None, - help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to commandline.', + help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to commandline. Note that if this begins with zk: the path is interpreted as a zookeeper path instead of a filesystem path.', ) -group.add_argument( +GROUP.add_argument( '--config_dump', default=False, action='store_true', help='Display the global configuration (possibly derived from multiple sources) on STDERR at program startup.', ) -group.add_argument( +GROUP.add_argument( '--config_savefile', type=str, metavar='FILENAME', default=None, - help='Populate config file compatible with --config_loadfile to save global config for later use.', + help='Populate config file compatible with --config_loadfile to save global config for later use. Note that if this begins with zk: the path is interpreted as a zookeeper oath instead of a filesystem path.', ) -group.add_argument( +GROUP.add_argument( '--config_rejects_unrecognized_arguments', default=False, action='store_true', - help=( - 'If present, config will raise an exception if it doesn\'t recognize an argument. The ' - + 'default behavior is to ignore this so as to allow interoperability with programs that ' - + 'want to use their own argparse calls to parse their own, separate commandline args.' - ), + help='If present, config will raise an exception if it doesn\'t recognize an argument. The default behavior is to ignore this so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.', ) -def overwrite_argparse_epilog(msg: str) -> None: - """Allows your code to override the default epilog created by - argparse. - - Args: - msg: The epilog message to substitute for the default. +class Config: """ - ARGS.epilog = msg - + Everything in the config module used to be module-level functions and + variables but it made the code ugly and harder to maintain. Now, this + class does the heavy lifting. We still rely on some globals, though: -def is_flag_already_in_argv(var: str) -> bool: - """Returns true if a particular flag is passed on the commandline? + ARGS and GROUP to interface with argparse + PROGRAM_NAME stores argv[0] close to program invocation + ORIG_ARGV stores the original argv list close to program invocation + CONFIG and config: hold the (singleton) instance of this class. - Args: - var: The flag to search for. """ - for _ in sys.argv: - if var in _: - return True - return False + def __init__(self): + # Has our parse() method been invoked yet? + self.config_parse_called = False + + # A configuration dictionary that will contain parsed + # arguments. This is the data that is most interesting to our + # callers as it will hold the configuration result. + self.config: Dict[str, Any] = {} + + # Defer logging messages until later when logging has been + # initialized. + self.saved_messages: List[str] = [] + + # A zookeeper client that is lazily created so as to not incur + # the latency of connecting to zookeeper for programs that are + # not reading or writing their config data into zookeeper. + self.zk: Optional[KazooClient] = None + + def __getitem__(self, key: str) -> Optional[Any]: + """If someone uses []'s on us, pass it onto self.config.""" + return self.config.get(key, None) + + def __setitem__(self, key: str, value: Any) -> None: + self.config[key] = value + + def __contains__(self, key: str) -> bool: + return key in self.config + + @staticmethod + def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup: + """Create a new context for arguments and return a handle. + + Args: + title: A title for your module's commandline arguments group. + description: A helpful description of your module. + + Returns: + An argparse._ArgumentGroup to be populated by the caller. + """ + return ARGS.add_argument_group(title, description) + + @staticmethod + def overwrite_argparse_epilog(msg: str) -> None: + """Allows your code to override the default epilog created by + argparse. + + Args: + msg: The epilog message to substitute for the default. + """ + ARGS.epilog = msg + + @staticmethod + def is_flag_already_in_argv(var: str) -> bool: + """Returns true if a particular flag is passed on the commandline + and false otherwise. + + Args: + var: The flag to search for. + """ + for _ in sys.argv: + if var in _: + return True + return False + + @staticmethod + def print_usage() -> None: + """Prints the normal help usage message out.""" + ARGS.print_help() + + @staticmethod + def usage() -> str: + """ + Returns: + program usage help text as a string. + """ + return ARGS.format_usage() + + @staticmethod + def _reorder_arg_action_groups_before_help(entry_module: Optional[str]): + """Internal. Used to reorder the arguments before dumping out a + generated help string such that the main program's arguments come + last. + + """ + reordered_action_groups = [] + for grp in ARGS._action_groups: + if entry_module is not None and entry_module in grp.title: # type: ignore + reordered_action_groups.append(grp) + elif PROGRAM_NAME in GROUP.title: # type: ignore + reordered_action_groups.append(grp) + else: + reordered_action_groups.insert(0, grp) + return reordered_action_groups + + def _augment_sys_argv_from_environment_variables(self): + """Internal. Look at the system environment for variables that match + arg names. This is done via some munging such that: + + :code:`--argument_to_match` + + ...is matched by: + + :code:`ARGUMENT_TO_MATCH` + + This allows programmers to set args via shell environment variables + in lieu of passing them on the cmdline. + + """ + usage_message = Config.usage() + optional = False + var = '' + + # Foreach valid optional commandline option (line) generate + # its analogous environment variable. + for line in usage_message.split(): + if line[0] == '[': + optional = True + if optional: + var += f'{line} ' + if line[-1] == ']': + optional = False + var = var.strip() + var = var.strip('[') + var = var.strip(']') + chunks = var.split() + if len(chunks) > 1: + var = var.split()[0] + + # Environment vars the same as flag names without + # the initial -'s and in UPPERCASE. + env = var.upper() + while env[0] == '-': + env = env[1:] + + # Do we see that environment varaible? + if env in os.environ: + if not Config.is_flag_already_in_argv(var): + value = os.environ[env] + self.saved_messages.append( + f'Initialized from environment: {var} = {value}' + ) + from string_utils import to_bool + + if len(chunks) == 1 and to_bool(value): + sys.argv.append(var) + elif len(chunks) > 1: + sys.argv.append(var) + sys.argv.append(value) + var = '' + env = '' + chunks = [] + + def _augment_sys_argv_from_loadfile(self): + """Internal. Augment with arguments persisted in a saved file.""" + + loadfile = None + saw_other_args = False + grab_next_arg = False + for arg in sys.argv[1:]: + if 'config_loadfile' in arg: + pieces = arg.split('=') + if len(pieces) > 1: + loadfile = pieces[1] + else: + grab_next_arg = True + elif grab_next_arg: + loadfile = arg + else: + saw_other_args = True + + if loadfile is not None: + zkpath = None + if loadfile[:3] == 'zk:': + try: + if self.zk is None: + self.zk = KazooClient( + hosts=scott_secrets.ZOOKEEPER_NODES, + use_ssl=True, + verify_certs=False, + keyfile=scott_secrets.ZOOKEEPER_CLIENT_CERT, + keyfile_password=scott_secrets.ZOOKEEPER_CLIENT_PASS, + certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT, + ) + self.zk.start() + zkpath = loadfile[3:] + if not zkpath.startswith('/config/'): + zkpath = '/config/' + zkpath + zkpath = re.sub(r'//+', '/', zkpath) + if not self.zk.exists(zkpath): + raise Exception( + f'ERROR: --config_loadfile argument must be a file, {loadfile} not found (in zookeeper)' + ) + except Exception as e: + raise Exception( + f'ERROR: Error talking with zookeeper while looking for {loadfile}' + ) from e + elif not os.path.exists(loadfile): + raise Exception( + f'ERROR: --config_loadfile argument must be a file, {loadfile} not found.' + ) -def _reorder_arg_action_groups_before_help(entry_module: Optional[str]): - """Internal. Used to reorder the arguments before dumping out a - generated help string such that the main program's arguments come - last. + if saw_other_args: + msg = f'Augmenting commandline arguments with those from {loadfile}.' + else: + msg = f'Reading commandline arguments from {loadfile}.' + print(msg, file=sys.stderr) + self.saved_messages.append(msg) + + newargs = [] + if zkpath: + try: + assert self.zk + contents = self.zk.get(zkpath)[0] + contents = contents.decode() + newargs = [ + arg.strip('\n') + for arg in contents.split('\n') + if 'config_savefile' not in arg + ] + size = sys.getsizeof(newargs) + if size > 1024 * 1024: + raise Exception(f'Saved args are too large! ({size} bytes)') + except Exception as e: + raise Exception(f'Error reading {zkpath} from zookeeper.') from e + self.saved_messages.append(f'Loaded config from zookeeper from {zkpath}') + else: + with open(loadfile, 'r') as rf: + newargs = rf.readlines() + newargs = [arg.strip('\n') for arg in newargs if 'config_savefile' not in arg] + sys.argv += newargs + + def dump_config(self): + """Print the current config to stdout.""" + print("Global Configuration:", file=sys.stderr) + pprint.pprint(self.config, stream=sys.stderr) + print() + + def parse(self, entry_module: Optional[str]) -> Dict[str, Any]: + """Main program should call this early in main(). Note that the + :code:`bootstrap.initialize` wrapper takes care of this automatically. + This should only be called once per program invocation. + + """ + if self.config_parse_called: + return self.config + + # If we're about to do the usage message dump, put the main + # module's argument group last in the list (if possible) so that + # when the user passes -h or --help, it will be visible on the + # screen w/o scrolling. + for arg in sys.argv: + if arg in ('--help', '-h'): + if entry_module is not None: + entry_module = os.path.basename(entry_module) + ARGS._action_groups = Config._reorder_arg_action_groups_before_help(entry_module) + + # Examine the environment for variables that match known flags. + # For a flag called --example_flag the corresponding environment + # variable would be called EXAMPLE_FLAG. If found, hackily add + # these into sys.argv to be parsed. + self._augment_sys_argv_from_environment_variables() + + # Look for loadfile and read/parse it if present. This also + # works by jamming these values onto sys.argv. + self._augment_sys_argv_from_loadfile() + + # Parse (possibly augmented, possibly completely overwritten) + # commandline args with argparse normally and populate config. + known, unknown = ARGS.parse_known_args() + self.config.update(vars(known)) + + # Reconstruct the argv with unrecognized flags for the benefit of + # future argument parsers. For example, unittest_main in python + # has some of its own flags. If we didn't recognize it, maybe + # someone else will. + if len(unknown) > 0: + if config['config_rejects_unrecognized_arguments']: + raise Exception( + f'Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting.' + ) + self.saved_messages.append( + f'Config encountered unrecognized commandline arguments: {unknown}' + ) + sys.argv = sys.argv[:1] + unknown + + # Check for savefile and populate it if requested. + savefile = config['config_savefile'] + if savefile and len(savefile) > 0: + data = '\n'.join(ORIG_ARGV[1:]) + if savefile[:3] == 'zk:': + zkpath = savefile[3:] + if not zkpath.startswith('/config/'): + zkpath = '/config/' + zkpath + zkpath = re.sub(r'//+', '/', zkpath) + try: + if not self.zk: + self.zk = KazooClient( + hosts=scott_secrets.ZOOKEEPER_NODES, + use_ssl=True, + verify_certs=False, + keyfile=scott_secrets.ZOOKEEPER_CLIENT_CERT, + keyfile_password=scott_secrets.ZOOKEEPER_CLIENT_PASS, + certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT, + ) + self.zk.start() + if not self.zk.exists(zkpath): + self.zk.create(zkpath, data.encode()) + else: + self.zk.set(zkpath, data.encode()) + except Exception as e: + raise Exception(f'Failed to create zookeeper path {zkpath}') from e + self.saved_messages.append(f'Saved config to zookeeper in {zkpath}') + else: + with open(savefile, 'w') as wf: + wf.write(data) - """ - reordered_action_groups = [] - for grp in ARGS._action_groups: - if entry_module is not None and entry_module in grp.title: # type: ignore - reordered_action_groups.append(grp) - elif PROGRAM_NAME in group.title: # type: ignore - reordered_action_groups.append(grp) - else: - reordered_action_groups.insert(0, grp) - return reordered_action_groups + # Also dump the config on stderr if requested. + if config['config_dump']: + self.dump_config() + self.config_parse_called = True + return self.config -def print_usage() -> None: - """Prints the normal help usage message out.""" - ARGS.print_help() + def has_been_parsed(self) -> bool: + """Returns True iff the global config has already been parsed""" + return self.config_parse_called + def late_logging(self): + """Log messages saved earlier now that logging has been initialized.""" + logger = logging.getLogger(__name__) + logger.debug('Original commandline was: %s', ORIG_ARGV) + for _ in self.saved_messages: + logger.debug(_) -def usage() -> str: - """ - Returns: - program usage help text as a string. - """ - return ARGS.format_usage() +# A global singleton instance of the Config class. +CONFIG = Config() -def _augment_sys_argv_from_environment_variables(): - """Internal. Look at the system environment for variables that match - arg names. This is done via some munging such that: +# A lot of client code uses config.config['whatever'] to lookup +# configuration so to preserve this we make this, config.config, with +# a __getitem__ method on it. +config = CONFIG - :code:`--argument_to_match` +# Config didn't use to be a class; it was a mess of module-level +# functions and data. The functions below preserve the old interface +# so that existing clients do not need to be changed. As you can see, +# they mostly just thunk into the config class. - ...is matched by: - :code:`ARGUMENT_TO_MATCH` +def add_commandline_args(title: str, description: str = "") -> argparse._ArgumentGroup: + """Create a new context for arguments and return a handle. An alias + for config.config.add_commandline_args. - This allows programmers to set args via shell environment variables - in lieu of passing them on the cmdline. + Args: + title: A title for your module's commandline arguments group. + description: A helpful description of your module. + Returns: + An argparse._ArgumentGroup to be populated by the caller. """ - - usage_message = usage() - optional = False - var = '' - for x in usage_message.split(): - if x[0] == '[': - optional = True - if optional: - var += f'{x} ' - if x[-1] == ']': - optional = False - var = var.strip() - var = var.strip('[') - var = var.strip(']') - chunks = var.split() - if len(chunks) > 1: - var = var.split()[0] - - # Environment vars the same as flag names without - # the initial -'s and in UPPERCASE. - env = var.strip('-').upper() - if env in os.environ: - if not is_flag_already_in_argv(var): - value = os.environ[env] - SAVED_MESSAGES.append(f'Initialized from environment: {var} = {value}') - from string_utils import to_bool - - if len(chunks) == 1 and to_bool(value): - sys.argv.append(var) - elif len(chunks) > 1: - sys.argv.append(var) - sys.argv.append(value) - var = '' - env = '' - - -def _augment_sys_argv_from_loadfile(): - """Internal. Augment with arguments persisted in a saved file.""" - - global ZK - loadfile = None - saw_other_args = False - grab_next_arg = False - for arg in sys.argv[1:]: - if 'config_loadfile' in arg: - pieces = arg.split('=') - if len(pieces) > 1: - loadfile = pieces[1] - else: - grab_next_arg = True - elif grab_next_arg: - loadfile = arg - else: - saw_other_args = True - - if loadfile is not None: - zkpath = None - if loadfile[:3] == 'zk:': - try: - if ZK is None: - ZK = KazooClient( - hosts=scott_secrets.ZOOKEEPER_NODES, - use_ssl=True, - verify_certs=False, - keyfile=scott_secrets.ZOOKEEPER_CLIENT_CERT, - keyfile_password=scott_secrets.ZOOKEEPER_CLIENT_PASS, - certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT, - ) - ZK.start() - zkpath = loadfile[3:] - if not zkpath.startswith('/config/'): - zkpath = '/config/' + zkpath - zkpath = re.sub(r'//+', '/', zkpath) - if not ZK.exists(zkpath): - raise Exception( - f'ERROR: --config_loadfile argument must be a file, {loadfile} not found (in zookeeper)' - ) - except Exception as e: - raise Exception( - f'ERROR: Error talking with zookeeper while looking for {loadfile}' - ) from e - elif not os.path.exists(loadfile): - raise Exception( - f'ERROR: --config_loadfile argument must be a file, {loadfile} not found.' - ) - - if saw_other_args: - msg = f'Augmenting commandline arguments with those from {loadfile}.' - else: - msg = f'Reading commandline arguments from {loadfile}.' - print(msg, file=sys.stderr) - SAVED_MESSAGES.append(msg) - - newargs = [] - if zkpath: - try: - assert ZK - contents = ZK.get(zkpath)[0] - contents = contents.decode() - newargs = [ - arg.strip('\n') for arg in contents.split('\n') if 'config_savefile' not in arg - ] - size = sys.getsizeof(newargs) - if size > 1024 * 1024: - raise Exception(f'Saved args are too large! ({size} bytes)') - except Exception as e: - raise Exception(f'Error reading {zkpath} from zookeeper.') from e - SAVED_MESSAGES.append(f'Loaded config from zookeeper from {zkpath}') - else: - with open(loadfile, 'r') as rf: - newargs = rf.readlines() - newargs = [arg.strip('\n') for arg in newargs if 'config_savefile' not in arg] - sys.argv += newargs + return CONFIG.add_commandline_args(title, description) def parse(entry_module: Optional[str]) -> Dict[str, Any]: """Main program should call this early in main(). Note that the :code:`bootstrap.initialize` wrapper takes care of this automatically. - This should only be called once per program invocation. - + This should only be called once per program invocation. Subsequent + calls do not reparse the configuration settings but rather just + return the current state. """ - global CONFIG_PARSE_CALLED - if CONFIG_PARSE_CALLED: - return config - global ZK - - # If we're about to do the usage message dump, put the main - # module's argument group last in the list (if possible) so that - # when the user passes -h or --help, it will be visible on the - # screen w/o scrolling. - for arg in sys.argv: - if arg in ('--help', '-h'): - if entry_module is not None: - entry_module = os.path.basename(entry_module) - ARGS._action_groups = _reorder_arg_action_groups_before_help(entry_module) - - # Examine the environment for variables that match known flags. - # For a flag called --example_flag the corresponding environment - # variable would be called EXAMPLE_FLAG. If found, hackily add - # these into sys.argv to be parsed. - _augment_sys_argv_from_environment_variables() - - # Look for loadfile and read/parse it if present. This also - # works by jamming these values onto sys.argv. - _augment_sys_argv_from_loadfile() - - # Parse (possibly augmented, possibly completely overwritten) - # commandline args with argparse normally and populate config. - known, unknown = ARGS.parse_known_args() - config.update(vars(known)) - - # Reconstruct the argv with unrecognized flags for the benefit of - # future argument parsers. For example, unittest_main in python - # has some of its own flags. If we didn't recognize it, maybe - # someone else will. - if len(unknown) > 0: - if config['config_rejects_unrecognized_arguments']: - raise Exception( - f'Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting.' - ) - SAVED_MESSAGES.append(f'Config encountered unrecognized commandline arguments: {unknown}') - sys.argv = sys.argv[:1] + unknown - - # Check for savefile and populate it if requested. - savefile = config['config_savefile'] - if savefile and len(savefile) > 0: - data = '\n'.join(ORIG_ARGV[1:]) - if savefile[:3] == 'zk:': - zkpath = savefile[3:] - if not zkpath.startswith('/config/'): - zkpath = '/config/' + zkpath - zkpath = re.sub(r'//+', '/', zkpath) - try: - if not ZK: - ZK = KazooClient( - hosts=scott_secrets.ZOOKEEPER_NODES, - use_ssl=True, - verify_certs=False, - keyfile=scott_secrets.ZOOKEEPER_CLIENT_CERT, - keyfile_password=scott_secrets.ZOOKEEPER_CLIENT_PASS, - certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT, - ) - ZK.start() - if not ZK.exists(zkpath): - ZK.create(zkpath, data.encode()) - else: - ZK.set(zkpath, data.encode()) - except Exception as e: - raise Exception(f'Failed to create zookeeper path {zkpath}') from e - SAVED_MESSAGES.append(f'Saved config to zookeeper in {zkpath}') - else: - with open(savefile, 'w') as wf: - wf.write(data) - - # Also dump the config on stderr if requested. - if config['config_dump']: - dump_config() - - CONFIG_PARSE_CALLED = True - return config + return CONFIG.parse(entry_module) def has_been_parsed() -> bool: """Returns True iff the global config has already been parsed""" - return CONFIG_PARSE_CALLED + return CONFIG.has_been_parsed() + + +def late_logging() -> None: + """Log messages saved earlier now that logging has been initialized.""" + CONFIG.late_logging() -def dump_config(): +def dump_config() -> None: """Print the current config to stdout.""" - print("Global Configuration:", file=sys.stderr) - pprint.pprint(config, stream=sys.stderr) - print() + CONFIG.dump_config() -def late_logging(): - """Log messages saved earlier now that logging has been initialized.""" - logger = logging.getLogger(__name__) - logger.debug('Original commandline was: %s', ORIG_ARGV) - for _ in SAVED_MESSAGES: - logger.debug(_) +def overwrite_argparse_epilog(msg: str) -> None: + """Allows your code to override the default epilog created by + argparse. + + Args: + msg: The epilog message to substitute for the default. + """ + Config.overwrite_argparse_epilog(msg) + + +def is_flag_already_in_argv(var: str) -> bool: + """Returns true if a particular flag is passed on the commandline + and false otherwise. + + Args: + var: The flag to search for. + """ + return Config.is_flag_already_in_argv(var) + + +def print_usage() -> None: + """Prints the normal help usage message out.""" + Config.print_usage() + + +def usage() -> str: + """ + Returns: + program usage help text as a string. + """ + return Config.usage()