From aafec9ed642bcbd6fa8ba14656111eb98ebf0b29 Mon Sep 17 00:00:00 2001 From: Scott Gasch Date: Wed, 17 Aug 2022 19:12:33 -0700 Subject: [PATCH] Dynamic config arguments stab #1. --- config.py | 183 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 119 insertions(+), 64 deletions(-) diff --git a/config.py b/config.py index 935bb79..0c40166 100644 --- a/config.py +++ b/config.py @@ -2,10 +2,11 @@ # © Copyright 2021-2022, Scott Gasch -"""Global configuration driven by commandline arguments, environment variables -and saved configuration files. This works across several modules. +"""Global configuration driven by commandline arguments, environment variables, +saved configuration files, and zookeeper-based dynamic configurations. This +works across several modules. -Usage: +Example usage: In your file.py:: @@ -26,17 +27,18 @@ Usage: import config + parser = config.add_commandline_args( + "Main", + "A program that does the thing.", + ) + parser.add_argument( + "--dry_run", + type=bool, + default=False, + help="Should we really do the thing?" + ) + def main() -> None: - parser = config.add_commandline_args( - "Main", - "A program that does the thing.", - ) - parser.add_argument( - "--dry_run", - type=bool, - default=False, - help="Should we really do the thing?" - ) config.parse() # Very important, this must be invoked! If you set this up and remember to invoke config.parse(), all commandline @@ -86,16 +88,17 @@ import os import pprint import re import sys -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple from kazoo.client import KazooClient +from kazoo.protocol.states import WatchedEvent import scott_secrets # This module is commonly used by others in here and should avoid # taking any unnecessary dependencies back on them. -# Make a copy of the original program arguments. +# Make a copy of the original program arguments immediately upon module load. PROGRAM_NAME: str = os.path.basename(sys.argv[0]) ORIG_ARGV: List[str] = sys.argv.copy() @@ -140,7 +143,8 @@ class OptionalRawFormatter(argparse.HelpFormatter): return argparse.HelpFormatter._split_lines(self, text, width) -# A global parser that we will collect arguments into. +# A global argparser that we will collect arguments in. Each module (including +# us) will add arguments to a separate argument group. ARGS = argparse.ArgumentParser( description=None, formatter_class=OptionalRawFormatter, @@ -152,6 +156,9 @@ ARGS = argparse.ArgumentParser( # this iff the program is 'sphinx-build' conflict_handler='resolve' if PROGRAM_NAME == 'sphinx-build' else 'error', ) + +# Arguments specific to config.py. Other users should get their own group by +# invoking config.add_commandline_args. GROUP = ARGS.add_argument_group( f'Global Config ({__file__})', 'Args that control the global config itself; how meta!', @@ -160,26 +167,26 @@ GROUP.add_argument( '--config_loadfile', metavar='FILENAME', default=None, - help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to commandline. Note that if this begins with zk: the path is interpreted as a zookeeper path instead of a filesystem path.', + help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to those passed via the commandline. Note that if the given path begins with "zk:" then it is interpreted as a zookeeper path instead of as a filesystem path. When loading config from zookeeper, any argument with the string "dynamic" in the name (e.g. --module_dynamic_url) may be modified at runtime by changes made to zookeeper (using --config_savefile=zk:path). You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.', ) GROUP.add_argument( '--config_dump', default=False, action='store_true', - help='Display the global configuration (possibly derived from multiple sources) on STDERR at program startup.', + help='Display the global configuration (possibly derived from multiple sources) on STDERR at program startup time.', ) GROUP.add_argument( '--config_savefile', type=str, metavar='FILENAME', default=None, - help='Populate config file compatible with --config_loadfile to save global config for later use. Note that if this begins with zk: the path is interpreted as a zookeeper oath instead of a filesystem path.', + help='Populate a config file (compatible with --config_loadfile) with the given path for later use. If the given path begins with "zk:" it is interpreted as a zookeeper path instead of a filesystem path. When updating zookeeper-based configs, all running programs that read their configuration from zookeeper (via --config_loadfile=zk:path) at startup time will see their configuration dynamically updated; flags with "dynamic" in their names (e.g. --my_dynamic_flag) may have their values changed. You should therefore either write your code to handle dynamic argument changes or avoid naming arguments "dynamic" if you use zookeeper configuration paths.', ) GROUP.add_argument( '--config_rejects_unrecognized_arguments', default=False, action='store_true', - help='If present, config will raise an exception if it doesn\'t recognize an argument. The default behavior is to ignore this so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.', + help='If present, config will raise an exception if it doesn\'t recognize an argument. The default behavior is to ignore unknown arguments so as to allow interoperability with programs that want to use their own argparse calls to parse their own, separate commandline args.', ) @@ -214,6 +221,9 @@ class Config: # not reading or writing their config data into zookeeper. self.zk: Optional[KazooClient] = None + # Per known zk file, what is the max version we have seen? + self.max_version: Dict[str, int] = {} + def __getitem__(self, key: str) -> Optional[Any]: """If someone uses []'s on us, pass it onto self.config.""" return self.config.get(key, None) @@ -290,9 +300,34 @@ class Config: reordered_action_groups.insert(0, grp) return reordered_action_groups + @staticmethod + def _parse_arg_into_env(arg: str) -> Optional[Tuple[str, str, List[str]]]: + """Internal helper to parse commandling args into environment vars.""" + + arg = arg.strip() + if not arg.startswith('['): + return None + arg = arg.strip('[') + if not arg.endswith(']'): + return None + arg = arg.strip(']') + + chunks = arg.split() + if len(chunks) > 1: + var = arg.split()[0] + else: + var = arg + + # Environment vars the same as flag names without + # the initial -'s and in UPPERCASE. + env = var.upper() + while env[0] == '-': + env = env[1:] + return arg, env, chunks + def _augment_sys_argv_from_environment_variables(self): """Internal. Look at the system environment for variables that match - arg names. This is done via some munging such that: + commandline arg names. This is done via some munging such that: :code:`--argument_to_match` @@ -300,53 +335,63 @@ class Config: :code:`ARGUMENT_TO_MATCH` - This allows programmers to set args via shell environment variables + This allows users to set args via shell environment variables in lieu of passing them on the cmdline. """ usage_message = Config.usage() optional = False - var = '' + arg = '' - # Foreach valid optional commandline option (line) generate + # Foreach valid optional commandline option (chunk) generate # its analogous environment variable. - for line in usage_message.split(): - if line[0] == '[': + for chunk in usage_message.split(): + if chunk[0] == '[': optional = True if optional: - var += f'{line} ' - if line[-1] == ']': + arg += f'{chunk} ' + if chunk[-1] == ']': optional = False - var = var.strip() - var = var.strip('[') - var = var.strip(']') - chunks = var.split() - if len(chunks) > 1: - var = var.split()[0] - - # Environment vars the same as flag names without - # the initial -'s and in UPPERCASE. - env = var.upper() - while env[0] == '-': - env = env[1:] - - # Do we see that environment varaible? - if env in os.environ: - if not Config.is_flag_already_in_argv(var): - value = os.environ[env] - self.saved_messages.append( - f'Initialized from environment: {var} = {value}' - ) - from string_utils import to_bool - - if len(chunks) == 1 and to_bool(value): - sys.argv.append(var) - elif len(chunks) > 1: - sys.argv.append(var) - sys.argv.append(value) - var = '' - env = '' - chunks = [] + _ = Config._parse_arg_into_env(arg) + if _: + var, env, chunks = _ + if env in os.environ: + if not Config.is_flag_already_in_argv(var): + value = os.environ[env] + self.saved_messages.append( + f'Initialized from environment: {var} = {value}' + ) + from string_utils import to_bool + + if len(chunks) == 1 and to_bool(value): + sys.argv.append(arg) + elif len(chunks) > 1: + sys.argv.append(arg) + sys.argv.append(value) + arg = '' + + def _process_dynamic_args(self, event: WatchedEvent): + assert self.zk + logger = logging.getLogger(__name__) + contents, meta = self.zk.get(event.path, watch=self._process_dynamic_args) + logger.debug('Update for %s at version=%d.', event.path, meta.version) + logger.debug( + 'Max known version for %s is %d.', event.path, self.max_version.get(event.path, 0) + ) + if meta.version > self.max_version.get(event.path, 0): + self.max_version[event.path] = meta.version + contents = contents.decode() + temp_argv = [] + for arg in contents.split(): + if 'dynamic' in arg: + temp_argv.append(arg) + logger.info("Updating %s from zookeeper async config change.", arg) + if len(temp_argv) > 0: + old_argv = sys.argv + sys.argv = temp_argv + known, _ = ARGS.parse_known_args() + sys.argv = old_argv + self.config.update(vars(known)) def _augment_sys_argv_from_loadfile(self): """Internal. Augment with arguments persisted in a saved file.""" @@ -408,16 +453,15 @@ class Config: if zkpath: try: assert self.zk - contents = self.zk.get(zkpath)[0] + contents, meta = self.zk.get(zkpath, watch=self._process_dynamic_args) contents = contents.decode() newargs = [ arg.strip('\n') for arg in contents.split('\n') if 'config_savefile' not in arg ] - size = sys.getsizeof(newargs) - if size > 1024 * 1024: - raise Exception(f'Saved args are too large! ({size} bytes)') + self.saved_messages.append(f'Setting {zkpath}\'s max_version to {meta.version}') + self.max_version[zkpath] = meta.version except Exception as e: raise Exception(f'Error reading {zkpath} from zookeeper.') from e self.saved_messages.append(f'Loaded config from zookeeper from {zkpath}') @@ -501,10 +545,21 @@ class Config: certfile=scott_secrets.ZOOKEEPER_CLIENT_CERT, ) self.zk.start() + data = data.encode() + if len(data) > 1024 * 1024: + raise Exception(f'Saved args are too large! ({len(data)} bytes)') if not self.zk.exists(zkpath): - self.zk.create(zkpath, data.encode()) + self.zk.create(zkpath, data) + self.saved_messages.append( + f'Just created {zkpath}; setting its max_version to 0' + ) + self.max_version[zkpath] = 0 else: - self.zk.set(zkpath, data.encode()) + meta = self.zk.set(zkpath, data) + self.saved_messages.append( + f'Setting {zkpath}\'s max_version to {meta.version}' + ) + self.max_version[zkpath] = meta.version except Exception as e: raise Exception(f'Failed to create zookeeper path {zkpath}') from e self.saved_messages.append(f'Saved config to zookeeper in {zkpath}') -- 2.47.1