bootstrap.py

   1 #!/usr/bin/env python3
   2
   3 import functools
   4 import importlib
   5 import logging
   6 import os
   7 import sys
   8 from inspect import stack
   9 from typing import List
  10
  11 import config
  12 import logging_utils
  13 from argparse_utils import ActionNoYes
  14
  15 # This module is commonly used by others in here and should avoid
  16 # taking any unnecessary dependencies back on them.
  17
  18
  19 logger = logging.getLogger(__name__)
  20
  21 args = config.add_commandline_args(
  22     f'Bootstrap ({__file__})',
  23     'Args related to python program bootstrapper and Swiss army knife',
  24 )
  25 args.add_argument(
  26     '--debug_unhandled_exceptions',
  27     action=ActionNoYes,
  28     default=False,
  29     help='Break into pdb on top level unhandled exceptions.',
  30 )
  31 args.add_argument(
  32     '--show_random_seed',
  33     action=ActionNoYes,
  34     default=False,
  35     help='Should we display (and log.debug) the global random seed?',
  36 )
  37 args.add_argument(
  38     '--set_random_seed',
  39     type=int,
  40     nargs=1,
  41     default=None,
  42     metavar='SEED_INT',
  43     help='Override the global random seed with a particular number.',
  44 )
  45 args.add_argument(
  46     '--dump_all_objects',
  47     action=ActionNoYes,
  48     default=False,
  49     help='Should we dump the Python import tree before main?',
  50 )
  51 args.add_argument(
  52     '--audit_import_events',
  53     action=ActionNoYes,
  54     default=False,
  55     help='Should we audit all import events?',
  56 )
  57 args.add_argument(
  58     '--run_profiler',
  59     action=ActionNoYes,
  60     default=False,
  61     help='Should we run cProfile on this code?',
  62 )
  63 args.add_argument(
  64     '--trace_memory',
  65     action=ActionNoYes,
  66     default=False,
  67     help='Should we record/report on memory utilization?',
  68 )
  69
  70 original_hook = sys.excepthook
  71
  72
  73 def handle_uncaught_exception(exc_type, exc_value, exc_tb):
  74     """
  75     Top-level exception handler for exceptions that make it past any exception
  76     handlers in the python code being run.  Logs the error and stacktrace then
  77     maybe attaches a debugger.
  78
  79     """
  80     global original_hook
  81     msg = f'Unhandled top level exception {exc_type}'
  82     logger.exception(msg)
  83     print(msg, file=sys.stderr)
  84     if issubclass(exc_type, KeyboardInterrupt):
  85         sys.__excepthook__(exc_type, exc_value, exc_tb)
  86         return
  87     else:
  88         if not sys.stderr.isatty() or not sys.stdin.isatty():
  89             # stdin or stderr is redirected, just do the normal thing
  90             original_hook(exc_type, exc_value, exc_tb)
  91         else:
  92             # a terminal is attached and stderr is not redirected, maybe debug.
  93             import traceback
  94
  95             traceback.print_exception(exc_type, exc_value, exc_tb)
  96             if config.config['debug_unhandled_exceptions']:
  97                 import pdb
  98
  99                 logger.info("Invoking the debugger...")
 100                 pdb.pm()
 101             else:
 102                 original_hook(exc_type, exc_value, exc_tb)
 103
 104
 105 class ImportInterceptor(importlib.abc.MetaPathFinder):
 106     def __init__(self):
 107         import collect.trie
 108
 109         self.module_by_filename_cache = {}
 110         self.repopulate_modules_by_filename()
 111         self.tree = collect.trie.Trie()
 112         self.tree_node_by_module = {}
 113
 114     def repopulate_modules_by_filename(self):
 115         self.module_by_filename_cache.clear()
 116         for mod in sys.modules:
 117             if hasattr(sys.modules[mod], '__file__'):
 118                 fname = getattr(sys.modules[mod], '__file__')
 119             else:
 120                 fname = 'unknown'
 121             self.module_by_filename_cache[fname] = mod
 122
 123     def should_ignore_filename(self, filename: str) -> bool:
 124         return 'importlib' in filename or 'six.py' in filename
 125
 126     def find_module(self, fullname, path):
 127         raise Exception(
 128             "This method has been deprecated since Python 3.4, please upgrade."
 129         )
 130
 131     def find_spec(self, loaded_module, path=None, target=None):
 132         s = stack()
 133         for x in range(3, len(s)):
 134             filename = s[x].filename
 135             if self.should_ignore_filename(filename):
 136                 continue
 137
 138             loading_function = s[x].function
 139             if filename in self.module_by_filename_cache:
 140                 loading_module = self.module_by_filename_cache[filename]
 141             else:
 142                 self.repopulate_modules_by_filename()
 143                 loading_module = self.module_by_filename_cache.get(filename, 'unknown')
 144
 145             path = self.tree_node_by_module.get(loading_module, [])
 146             path.extend([loaded_module])
 147             self.tree.insert(path)
 148             self.tree_node_by_module[loading_module] = path
 149
 150             msg = f'*** Import {loaded_module} from {filename}:{s[x].lineno} in {loading_module}::{loading_function}'
 151             logger.debug(msg)
 152             print(msg)
 153             return
 154         msg = f'*** Import {loaded_module} from ?????'
 155         logger.debug(msg)
 156         print(msg)
 157
 158     def invalidate_caches(self):
 159         pass
 160
 161     def find_importer(self, module: str):
 162         if module in self.tree_node_by_module:
 163             node = self.tree_node_by_module[module]
 164             return node
 165         return []
 166
 167
 168 # Audit import events?  Note: this runs early in the lifetime of the
 169 # process (assuming that import bootstrap happens early); config has
 170 # (probably) not yet been loaded or parsed the commandline.  Also,
 171 # some things have probably already been imported while we weren't
 172 # watching so this information may be incomplete.
 173 #
 174 # Also note: move bootstrap up in the global import list to catch
 175 # more import events and have a more complete record.
 176 import_interceptor = None
 177 for arg in sys.argv:
 178     if arg == '--audit_import_events':
 179         import_interceptor = ImportInterceptor()
 180         sys.meta_path.insert(0, import_interceptor)
 181
 182
 183 def dump_all_objects() -> None:
 184     global import_interceptor
 185     messages = {}
 186     all_modules = sys.modules
 187     for obj in object.__subclasses__():
 188         if not hasattr(obj, '__name__'):
 189             continue
 190         klass = obj.__name__
 191         if not hasattr(obj, '__module__'):
 192             continue
 193         class_mod_name = obj.__module__
 194         if class_mod_name in all_modules:
 195             mod = all_modules[class_mod_name]
 196             if not hasattr(mod, '__name__'):
 197                 mod_name = class_mod_name
 198             else:
 199                 mod_name = mod.__name__
 200             if hasattr(mod, '__file__'):
 201                 mod_file = mod.__file__
 202             else:
 203                 mod_file = 'unknown'
 204             if import_interceptor is not None:
 205                 import_path = import_interceptor.find_importer(mod_name)
 206             else:
 207                 import_path = 'unknown'
 208             msg = f'{class_mod_name}::{klass} ({mod_file})'
 209             if import_path != 'unknown' and len(import_path) > 0:
 210                 msg += f' imported by {import_path}'
 211             messages[f'{class_mod_name}::{klass}'] = msg
 212     for x in sorted(messages.keys()):
 213         logger.debug(messages[x])
 214         print(messages[x])
 215
 216
 217 def initialize(entry_point):
 218     """
 219     Remember to initialize config, initialize logging, set/log a random
 220     seed, etc... before running main.
 221
 222     """
 223
 224     @functools.wraps(entry_point)
 225     def initialize_wrapper(*args, **kwargs):
 226         # Hook top level unhandled exceptions, maybe invoke debugger.
 227         if sys.excepthook == sys.__excepthook__:
 228             sys.excepthook = handle_uncaught_exception
 229
 230         # Try to figure out the name of the program entry point.  Then
 231         # parse configuration (based on cmdline flags, environment vars
 232         # etc...)
 233         if (
 234             '__globals__' in entry_point.__dict__
 235             and '__file__' in entry_point.__globals__
 236         ):
 237             config.parse(entry_point.__globals__['__file__'])
 238         else:
 239             config.parse(None)
 240
 241         if config.config['trace_memory']:
 242             import tracemalloc
 243
 244             tracemalloc.start()
 245
 246         # Initialize logging... and log some remembered messages from
 247         # config module.
 248         logging_utils.initialize_logging(logging.getLogger())
 249         config.late_logging()
 250
 251         # Maybe log some info about the python interpreter itself.
 252         logger.debug(
 253             f'Platform: {sys.platform}, maxint=0x{sys.maxsize:x}, byteorder={sys.byteorder}'
 254         )
 255         logger.debug(f'Python interpreter version: {sys.version}')
 256         logger.debug(f'Python implementation: {sys.implementation}')
 257         logger.debug(f'Python C API version: {sys.api_version}')
 258         logger.debug(f'Python path: {sys.path}')
 259
 260         # Allow programs that don't bother to override the random seed
 261         # to be replayed via the commandline.
 262         import random
 263
 264         random_seed = config.config['set_random_seed']
 265         if random_seed is not None:
 266             random_seed = random_seed[0]
 267         else:
 268             random_seed = int.from_bytes(os.urandom(4), 'little')
 269
 270         if config.config['show_random_seed']:
 271             msg = f'Global random seed is: {random_seed}'
 272             logger.debug(msg)
 273             print(msg)
 274         random.seed(random_seed)
 275
 276         # Do it, invoke the user's code.  Pay attention to how long it takes.
 277         logger.debug(f'Starting {entry_point.__name__} (program entry point)')
 278         ret = None
 279         import stopwatch
 280
 281         if config.config['run_profiler']:
 282             import cProfile
 283             from pstats import SortKey
 284
 285             with stopwatch.Timer() as t:
 286                 cProfile.runctx(
 287                     "ret = entry_point(*args, **kwargs)",
 288                     globals(),
 289                     locals(),
 290                     None,
 291                     SortKey.CUMULATIVE,
 292                 )
 293         else:
 294             with stopwatch.Timer() as t:
 295                 ret = entry_point(*args, **kwargs)
 296
 297         logger.debug(f'{entry_point.__name__} (program entry point) returned {ret}.')
 298
 299         if config.config['trace_memory']:
 300             snapshot = tracemalloc.take_snapshot()
 301             top_stats = snapshot.statistics('lineno')
 302             print()
 303             print("--trace_memory's top 10 memory using files:")
 304             for stat in top_stats[:10]:
 305                 print(stat)
 306
 307         if config.config['dump_all_objects']:
 308             dump_all_objects()
 309
 310         if config.config['audit_import_events']:
 311             global import_interceptor
 312             if import_interceptor is not None:
 313                 print(import_interceptor.tree)
 314
 315         walltime = t()
 316         (utime, stime, cutime, cstime, elapsed_time) = os.times()
 317         logger.debug(
 318             '\n'
 319             f'user: {utime}s\n'
 320             f'system: {stime}s\n'
 321             f'child user: {cutime}s\n'
 322             f'child system: {cstime}s\n'
 323             f'machine uptime: {elapsed_time}s\n'
 324             f'walltime: {walltime}s'
 325         )
 326
 327         # If it doesn't return cleanly, call attention to the return value.
 328         if ret is not None and ret != 0:
 329             logger.error(f'Exit {ret}')
 330         else:
 331             logger.debug(f'Exit {ret}')
 332         sys.exit(ret)
 333
 334     return initialize_wrapper