3 # © Copyright 2021-2023, Scott Gasch
6 If you decorate your main method (i.e. program entry point) like this::
14 * automatic support for :py:mod:`pyutils.config` (argument parsing, see
15 that module for details),
16 * The ability to break into pdb on unhandled exceptions (which is
17 enabled/disabled via the commandline flag :code:`--debug_unhandled_exceptions`),
18 * automatic logging support from :py:mod:`pyutils.logging_utils` controllable
19 via several commandline flags,
20 * the ability to optionally enable whole-program code profiling and reporting
21 when you run your code using commandline flag :code:`--run_profiler`,
22 * the ability to optionally enable import auditing via the commandline flag
23 :code:`--audit_import_events`. This logs a message whenever a module is imported
24 *after* the bootstrap module itself is loaded. Note that other modules may
25 already be loaded when bootstrap is loaded and these imports will not be
26 logged. If you're trying to debug import events or dependency problems,
27 I suggest putting bootstrap very early in your import list and using this
29 * optional memory profiling for your program set via the commandline flag
30 :code:`--trace_memory`. This provides a report of python memory utilization
31 at program termination time.
32 * the ability to set the global random seed via commandline flag for
33 reproducable runs (as long as subsequent code doesn't reset the seed)
34 using the :code:`--set_random_seed` flag,
35 * automatic program timing and reporting logged to the INFO log,
36 * more verbose error handling and reporting.
47 from inspect import stack
48 from typing import NoReturn
50 from pyutils import config, logging_utils
51 from pyutils.argparse_utils import ActionNoYes
53 # This module is commonly used by others in here and should avoid
54 # taking any unnecessary dependencies back on them.
57 logger = logging.getLogger(__name__)
59 cfg = config.add_commandline_args(
60 f"Bootstrap ({__file__})",
61 "Args related to python program bootstrapper and Swiss army knife",
64 "--debug_unhandled_exceptions",
67 help="Break into pdb on top level unhandled exceptions.",
73 help="Should we display (and log.debug) the global random seed?",
81 help="Override the global random seed with a particular number.",
87 help="Should we dump the Python import tree before main?",
90 "--audit_import_events",
93 help="Should we audit all import events?",
99 help="Should we run cProfile on this code?",
105 help="Should we record/report on memory utilization?",
108 ORIGINAL_EXCEPTION_HOOK = sys.excepthook
111 def handle_uncaught_exception(exc_type, exc_value, exc_tb):
113 Top-level exception handler for exceptions that make it past any exception
114 handlers in the python code being run. Logs the error and stacktrace then
115 maybe attaches a debugger.
118 msg = f"Unhandled top level exception {exc_type}"
119 logger.exception(msg)
120 print(msg, file=sys.stderr)
121 logging_utils.unhandled_top_level_exception(exc_type, exc_value, exc_tb)
122 if issubclass(exc_type, KeyboardInterrupt):
123 sys.__excepthook__(exc_type, exc_value, exc_tb)
129 tb_output = io.StringIO()
130 traceback.print_tb(exc_tb, None, tb_output)
131 print(tb_output.getvalue(), file=sys.stderr)
132 logger.error(tb_output.getvalue())
135 # stdin or stderr is redirected, just do the normal thing
136 if not sys.stderr.isatty() or not sys.stdin.isatty():
137 ORIGINAL_EXCEPTION_HOOK(exc_type, exc_value, exc_tb)
139 else: # a terminal is attached and stderr isn't redirected, maybe debug.
140 if config.config["debug_unhandled_exceptions"]:
141 logger.info("Invoking the debugger...")
146 ORIGINAL_EXCEPTION_HOOK(exc_type, exc_value, exc_tb)
149 class ImportInterceptor(importlib.abc.MetaPathFinder):
150 """An interceptor that always allows module load events but dumps a
151 record into the log and onto stdout when modules are loaded and
152 produces an audit of who imported what at the end of the run. It
153 can't see any load events that happen before it, though, so move
154 bootstrap up in your __main__'s import list just temporarily to
160 from pyutils.collectionz.trie import Trie
162 self.module_by_filename_cache = {}
163 self.repopulate_modules_by_filename()
165 self.tree_node_by_module = {}
167 def repopulate_modules_by_filename(self):
168 self.module_by_filename_cache.clear()
172 ) in sys.modules.copy().items(): # copy here because modules is volatile
173 if hasattr(mod, "__file__"):
174 fname = getattr(mod, "__file__")
177 self.module_by_filename_cache[fname] = mod
180 def should_ignore_filename(filename: str) -> bool:
181 return "importlib" in filename or "six.py" in filename
183 def find_module(self, fullname, path) -> NoReturn:
185 "This method has been deprecated since Python 3.4, please upgrade."
188 def find_spec(self, loaded_module, path=None, _=None):
190 for x in range(3, len(s)):
191 filename = s[x].filename
192 if ImportInterceptor.should_ignore_filename(filename):
195 loading_function = s[x].function
196 if filename in self.module_by_filename_cache:
197 loading_module = self.module_by_filename_cache[filename]
199 self.repopulate_modules_by_filename()
200 loading_module = self.module_by_filename_cache.get(filename, "unknown")
202 path = self.tree_node_by_module.get(loading_module, [])
203 path.extend([loaded_module])
204 self.tree.insert(path)
205 self.tree_node_by_module[loading_module] = path
207 msg = f"*** Import {loaded_module} from {filename}:{s[x].lineno} in {loading_module}::{loading_function}"
211 msg = f"*** Import {loaded_module} from ?????"
215 def invalidate_caches(self):
218 def find_importer(self, module: str):
219 if module in self.tree_node_by_module:
220 node = self.tree_node_by_module[module]
225 # Audit import events? Note: this runs early in the lifetime of the
226 # process (assuming that import bootstrap happens early); config has
227 # (probably) not yet been loaded or parsed the commandline. Also,
228 # some things have probably already been imported while we weren't
229 # watching so this information may be incomplete.
231 # Also note: move bootstrap up in the global import list to catch
232 # more import events and have a more complete record.
233 IMPORT_INTERCEPTOR = None
235 if arg == "--audit_import_events":
236 IMPORT_INTERCEPTOR = ImportInterceptor()
237 sys.meta_path.insert(0, IMPORT_INTERCEPTOR)
240 def dump_all_objects() -> None:
241 """Helper code to dump all known python objects."""
244 all_modules = sys.modules
245 for obj in object.__subclasses__():
246 if not hasattr(obj, "__name__"):
249 if not hasattr(obj, "__module__"):
251 class_mod_name = obj.__module__
252 if class_mod_name in all_modules:
253 mod = all_modules[class_mod_name]
254 if not hasattr(mod, "__name__"):
255 mod_name = class_mod_name
257 mod_name = mod.__name__
258 if hasattr(mod, "__file__"):
259 mod_file = mod.__file__
262 if IMPORT_INTERCEPTOR is not None:
263 import_path = IMPORT_INTERCEPTOR.find_importer(mod_name)
265 import_path = "unknown"
266 msg = f"{class_mod_name}::{klass} ({mod_file})"
267 if import_path != "unknown" and len(import_path) > 0:
268 msg += f" imported by {import_path}"
269 messages[f"{class_mod_name}::{klass}"] = msg
270 for x in sorted(messages.keys()):
271 logger.debug(messages[x])
275 def initialize(entry_point):
277 Do whole program setup and instrumentation. See module comments for
280 from pyutils import bootstrap
282 @bootstrap.initialize
286 if __name__ == '__main__':
290 @functools.wraps(entry_point)
291 def initialize_wrapper(*args, **kwargs):
292 # Hook top level unhandled exceptions, maybe invoke debugger.
293 if sys.excepthook == sys.__excepthook__:
294 sys.excepthook = handle_uncaught_exception
296 # Try to figure out the name of the program entry point. Then
297 # parse configuration (based on cmdline flags, environment vars
299 entry_filename = None
302 entry_filename = entry_point.__code__.co_filename
303 entry_descr = repr(entry_point.__code__)
306 "__globals__" in entry_point.__dict__
307 and "__file__" in entry_point.__globals__
309 entry_filename = entry_point.__globals__["__file__"]
310 entry_descr = entry_filename
311 if not entry_filename:
312 entry_filename = 'UNKNOWN'
313 config.parse(entry_filename)
315 if config.config["trace_memory"]:
320 # Initialize logging... and log some remembered messages from
321 # config module. Also logs about the logging config if we're
323 logging_utils.initialize_logging(logging.getLogger())
324 config.late_logging()
326 # Log some info about the python interpreter itself if we're
329 "Platform: %s, maxint=0x%x, byteorder=%s",
334 logger.debug("Python interpreter path: %s", sys.executable)
335 logger.debug("Python interpreter version: %s", sys.version)
336 logger.debug("Python implementation: %s", sys.implementation)
337 logger.debug("Python C API version: %s", sys.api_version)
339 logger.debug("Python interpreter running in __debug__ mode.")
341 logger.debug("Python interpreter running in optimized mode.")
342 logger.debug("PYTHONPATH: %s", sys.path)
344 # Dump some info about the physical machine we're running on
345 # if we're ing debug mode.
346 if "SC_PAGE_SIZE" in os.sysconf_names and "SC_PHYS_PAGES" in os.sysconf_names:
348 "Physical memory: %.1fGb",
349 os.sysconf("SC_PAGE_SIZE")
350 * os.sysconf("SC_PHYS_PAGES")
353 logger.debug("Logical processors: %s", os.cpu_count())
355 # Allow programs that don't bother to override the random seed
356 # to be replayed via the commandline.
359 random_seed = config.config["set_random_seed"]
360 if random_seed is not None:
361 random_seed = random_seed[0]
363 random_seed = int.from_bytes(os.urandom(4), "little")
364 if config.config["show_random_seed"]:
365 msg = f"Global random seed is: {random_seed}"
368 random.seed(random_seed)
370 # Give each run a unique identifier if we're in debug mode.
371 logger.debug("This run's UUID: %s", str(uuid.uuid4()))
373 # Do it, invoke the user's code. Pay attention to how long it takes.
375 "Starting %s (program entry point) ---------------------- ", entry_descr
378 from pyutils import stopwatch
380 if config.config["run_profiler"]:
382 from pstats import SortKey
384 with stopwatch.Timer() as t:
386 "ret = entry_point(*args, **kwargs)",
393 with stopwatch.Timer() as t:
394 ret = entry_point(*args, **kwargs)
396 logger.debug("%s (program entry point) returned %s.", entry_descr, ret)
398 if config.config["trace_memory"]:
399 snapshot = tracemalloc.take_snapshot()
400 top_stats = snapshot.statistics("lineno")
402 print("--trace_memory's top 10 memory using files:")
403 for stat in top_stats[:10]:
406 if config.config["dump_all_objects"]:
409 if config.config["audit_import_events"]:
410 if IMPORT_INTERCEPTOR is not None:
411 print(IMPORT_INTERCEPTOR.tree)
414 (utime, stime, cutime, cstime, elapsed_time) = os.times()
419 "child user: %.4fs\n"
420 "child system: %.4fs\n"
421 "machine uptime: %.4fs\n"
431 # If it doesn't return cleanly, call attention to the return value.
432 base_filename = os.path.basename(entry_filename)
433 if ret is not None and ret != 0:
434 if not logging_utils.non_zero_return_value(ret):
435 logger.error("%s: Exit %s", base_filename, ret)
437 logger.debug("%s: Exit %s", base_filename, ret)
440 return initialize_wrapper