From c974b8cde11f133df7680967b759772c624007e9 Mon Sep 17 00:00:00 2001 From: Scott Gasch Date: Wed, 9 Feb 2022 22:18:59 -0800 Subject: [PATCH] More spring cleaning. --- arper.py | 19 +++++++++++---- base_presence.py | 24 +++++++++---------- bootstrap.py | 31 +++++++++++++++++++----- config.py | 61 ++++++++++++++++++++++------------------------- logical_search.py | 2 +- 5 files changed, 81 insertions(+), 56 deletions(-) diff --git a/arper.py b/arper.py index a665137..2ef2676 100644 --- a/arper.py +++ b/arper.py @@ -55,6 +55,17 @@ cfg.add_argument( @persistent.persistent_autoloaded_singleton() # type: ignore class Arper(persistent.Persistent): + """A caching layer around the kernel's network mapping between IPs and + MACs. This class restores persisted state that expires + periodically (see --arper_cache_max_staleness) at program startup + time. If it's unable to use the file's contents, it queries the + kernel (via arp) and uses an auxillary utility called arp-scan to + query the network. If it has to do this there's a latency hit but + it persists the collected data in the cache file. Either way, the + class behaves as a global singleton hosting this data thereafter. + + """ + def __init__( self, cached_local_state: Optional[BiDict] = None, @@ -118,10 +129,10 @@ class Arper(persistent.Persistent): @classmethod def load_state( - cls, - cache_file: str, - freshness_threshold_sec: int, - state: BiDict, + cls, + cache_file: str, + freshness_threshold_sec: int, + state: BiDict, ): if not file_utils.file_is_readable(cache_file): logger.debug('Can\'t read %s', cache_file) diff --git a/base_presence.py b/base_presence.py index f18b870..8be4d93 100755 --- a/base_presence.py +++ b/base_presence.py @@ -1,5 +1,10 @@ #!/usr/bin/env python3 +"""This is a module dealing with trying to guess a person's location +based on the location of certain devices (e.g. phones, laptops) +belonging to that person. It works with networks I run that log +device MAC addresses active.""" + import datetime import logging import re @@ -38,6 +43,9 @@ cfg.add_argument( class PresenceDetection(object): + """See above. This is a base class for determining a person's + location on networks I administer.""" + def __init__(self) -> None: # Note: list most important devices first. self.devices_by_person: Dict[Person, List[str]] = { @@ -219,17 +227,11 @@ class PresenceDetection(object): if mac not in self.names_by_mac: continue mac_name = self.names_by_mac[mac] - logger.debug( - 'Looking for %s... check for mac %s (%s)', - name, mac, mac_name - ) + logger.debug('Looking for %s... check for mac %s (%s)', name, mac, mac_name) for location in self.location_ts_by_mac: if mac in self.location_ts_by_mac[location]: ts = (self.location_ts_by_mac[location])[mac] - logger.debug( - 'Seen %s (%s) at %s since %s', - mac, mac_name, location, ts - ) + logger.debug('Seen %s (%s) at %s since %s', mac, mac_name, location, ts) tiebreaks[location] = ts ( @@ -257,10 +259,8 @@ def main() -> None: for person in Person: print(f'{person} => {p.where_is_person_now(person)}') print() - - -# for location in Location: -# print(f'{location} => {p.is_anyone_in_location_now(location)}') + for location in Location: + print(f'{location} => {p.is_anyone_in_location_now(location)}') if __name__ == '__main__': diff --git a/bootstrap.py b/bootstrap.py index 3574786..50af844 100644 --- a/bootstrap.py +++ b/bootstrap.py @@ -1,5 +1,11 @@ #!/usr/bin/env python3 +"""This is a module for wrapping around python programs and doing some +minor setup and tear down work for them. With it, you can break into +pdb on unhandled top level exceptions, profile your code by passing a +commandline argument in, audit module import events, examine where +memory is being used in your program, and so on.""" + import functools import importlib import logging @@ -101,6 +107,15 @@ def handle_uncaught_exception(exc_type, exc_value, exc_tb): class ImportInterceptor(importlib.abc.MetaPathFinder): + """An interceptor that always allows module load events but dumps a + record into the log and onto stdout when modules are loaded and + produces an audit of who imported what at the end of the run. It + can't see any load events that happen before it, though, so move + bootstrap up in your __main__'s import list just temporarily to + get a good view. + + """ + def __init__(self): import collect.trie @@ -111,9 +126,9 @@ class ImportInterceptor(importlib.abc.MetaPathFinder): def repopulate_modules_by_filename(self): self.module_by_filename_cache.clear() - for mod in sys.modules: - if hasattr(sys.modules[mod], '__file__'): - fname = getattr(sys.modules[mod], '__file__') + for _, mod in sys.modules.copy().items(): # copy here because modules is volatile + if hasattr(mod, '__file__'): + fname = getattr(mod, '__file__') else: fname = 'unknown' self.module_by_filename_cache[fname] = mod @@ -243,8 +258,7 @@ def initialize(entry_point): # Maybe log some info about the python interpreter itself. logger.debug( - 'Platform: %s, maxint=0x%x, byteorder=%s', - sys.platform, sys.maxsize, sys.byteorder + 'Platform: %s, maxint=0x%x, byteorder=%s', sys.platform, sys.maxsize, sys.byteorder ) logger.debug('Python interpreter version: %s', sys.version) logger.debug('Python implementation: %s', sys.implementation) @@ -320,7 +334,12 @@ def initialize(entry_point): 'child system: %.4fs\n' 'machine uptime: %.4fs\n' 'walltime: %.4fs', - utime, stime, cutime, cstime, elapsed_time, walltime + utime, + stime, + cutime, + cstime, + elapsed_time, + walltime, ) # If it doesn't return cleanly, call attention to the return value. diff --git a/config.py b/config.py index 1ac5cff..b2e62c0 100644 --- a/config.py +++ b/config.py @@ -77,11 +77,11 @@ from typing import Any, Dict, List, Optional # taking any unnecessary dependencies back on them. # Defer logging messages until later when logging has been initialized. -saved_messages: List[str] = [] +SAVED_MESSAGES: List[str] = [] # Make a copy of the original program arguments. -program_name: str = os.path.basename(sys.argv[0]) -original_argv: List[str] = [arg for arg in sys.argv] +PROGRAM_NAME: str = os.path.basename(sys.argv[0]) +ORIG_ARGV: List[str] = sys.argv.copy() class OptionalRawFormatter(argparse.HelpFormatter): @@ -99,16 +99,16 @@ class OptionalRawFormatter(argparse.HelpFormatter): # A global parser that we will collect arguments into. -args = argparse.ArgumentParser( +ARGS = argparse.ArgumentParser( description=None, formatter_class=OptionalRawFormatter, fromfile_prefix_chars="@", - epilog=f'{program_name} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.', + epilog=f'{PROGRAM_NAME} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.', ) # Keep track of if we've been called and prevent being called more # than once. -config_parse_called = False +CONFIG_PARSE_CALLED = False # A global configuration dictionary that will contain parsed arguments. @@ -116,12 +116,13 @@ config_parse_called = False # This is the data that is most interesting to our callers; it will hold # the configuration result. config: Dict[str, Any] = {} + # It would be really nice if this shit worked from interactive python def add_commandline_args(title: str, description: str = ""): """Create a new context for arguments and return a handle.""" - return args.add_argument_group(title, description) + return ARGS.add_argument_group(title, description) group = add_commandline_args( @@ -160,7 +161,7 @@ group.add_argument( def overwrite_argparse_epilog(msg: str) -> None: - args.epilog = msg + ARGS.epilog = msg def is_flag_already_in_argv(var: str): @@ -172,21 +173,19 @@ def is_flag_already_in_argv(var: str): def reorder_arg_action_groups(entry_module: Optional[str]): - global program_name, args reordered_action_groups = [] - for group in args._action_groups: - if entry_module is not None and entry_module in group.title: # type: ignore - reordered_action_groups.append(group) - elif program_name in group.title: # type: ignore - reordered_action_groups.append(group) + for grp in ARGS._action_groups: + if entry_module is not None and entry_module in grp.title: # type: ignore + reordered_action_groups.append(grp) + elif PROGRAM_NAME in group.title: # type: ignore + reordered_action_groups.append(grp) else: - reordered_action_groups.insert(0, group) + reordered_action_groups.insert(0, grp) return reordered_action_groups def augment_sys_argv_from_environment_variables(): - global saved_messages - usage_message = args.format_usage() + usage_message = ARGS.format_usage() optional = False var = '' for x in usage_message.split(): @@ -209,7 +208,7 @@ def augment_sys_argv_from_environment_variables(): if env in os.environ: if not is_flag_already_in_argv(var): value = os.environ[env] - saved_messages.append(f'Initialized from environment: {var} = {value}') + SAVED_MESSAGES.append(f'Initialized from environment: {var} = {value}') from string_utils import to_bool if len(chunks) == 1 and to_bool(value): @@ -222,7 +221,6 @@ def augment_sys_argv_from_environment_variables(): def augment_sys_argv_from_loadfile(): - global saved_messages loadfile = None saw_other_args = False grab_next_arg = False @@ -248,7 +246,7 @@ def augment_sys_argv_from_loadfile(): else: msg = f'Reading commandline arguments from {loadfile}.' print(msg, file=sys.stderr) - saved_messages.append(msg) + SAVED_MESSAGES.append(msg) with open(loadfile, 'r') as rf: newargs = rf.readlines() @@ -261,18 +259,17 @@ def parse(entry_module: Optional[str]) -> Dict[str, Any]: bootstrap.initialize wrapper takes care of this automatically. """ - global config_parse_called - if config_parse_called: + global CONFIG_PARSE_CALLED + if CONFIG_PARSE_CALLED: return config - global saved_messages # If we're about to do the usage message dump, put the main # module's argument group last in the list (if possible) so that # when the user passes -h or --help, it will be visible on the # screen w/o scrolling. for arg in sys.argv: - if arg == '--help' or arg == '-h': - args._action_groups = reorder_arg_action_groups(entry_module) + if arg in ('--help', '-h'): + ARGS._action_groups = reorder_arg_action_groups(entry_module) # Examine the environment for variables that match known flags. # For a flag called --example_flag the corresponding environment @@ -286,7 +283,7 @@ def parse(entry_module: Optional[str]) -> Dict[str, Any]: # Parse (possibly augmented, possibly completely overwritten) # commandline args with argparse normally and populate config. - known, unknown = args.parse_known_args() + known, unknown = ARGS.parse_known_args() config.update(vars(known)) # Reconstruct the argv with unrecognized flags for the benefit of @@ -298,27 +295,26 @@ def parse(entry_module: Optional[str]) -> Dict[str, Any]: raise Exception( f'Encountered unrecognized config argument(s) {unknown} with --config_rejects_unrecognized_arguments enabled; halting.' ) - saved_messages.append(f'Config encountered unrecognized commandline arguments: {unknown}') + SAVED_MESSAGES.append(f'Config encountered unrecognized commandline arguments: {unknown}') sys.argv = sys.argv[:1] + unknown # Check for savefile and populate it if requested. savefile = config['config_savefile'] if savefile and len(savefile) > 0: with open(savefile, 'w') as wf: - wf.write("\n".join(original_argv[1:])) + wf.write("\n".join(ORIG_ARGV[1:])) # Also dump the config on stderr if requested. if config['config_dump']: dump_config() - config_parse_called = True + CONFIG_PARSE_CALLED = True return config def has_been_parsed() -> bool: """Has the global config been parsed yet?""" - global config_parse_called - return config_parse_called + return CONFIG_PARSE_CALLED def dump_config(): @@ -331,6 +327,5 @@ def dump_config(): def late_logging(): """Log messages saved earlier now that logging has been initialized.""" logger = logging.getLogger(__name__) - global saved_messages - for _ in saved_messages: + for _ in SAVED_MESSAGES: logger.debug(_) diff --git a/logical_search.py b/logical_search.py index 41ed729..c85b262 100644 --- a/logical_search.py +++ b/logical_search.py @@ -183,7 +183,7 @@ class Corpus(object): def invert_docid_set(self, original: Set[str]) -> Set[str]: """Invert a set of docids.""" - return set([docid for docid in self.documents_by_docid.keys() if docid not in original]) + return {docid for docid in self.documents_by_docid if docid not in original} def get_doc(self, docid: str) -> Optional[Document]: """Given a docid, retrieve the previously added Document.""" -- 2.47.1