3 # © Copyright 2021-2023, Scott Gasch
6 If you decorate your main method (i.e. program entry point) like this::
14 * automatic support for :py:mod:`pyutils.config` (argument parsing, see
15 that module for details),
16 * The ability to break into pdb on unhandled exceptions (which is
17 enabled/disabled via the commandline flag :code:`--debug_unhandled_exceptions`),
18 * automatic logging support from :py:mod:`pyutils.logging_utils` controllable
19 via several commandline flags,
20 * the ability to optionally enable whole-program code profiling and reporting
21 when you run your code using commandline flag :code:`--run_profiler`,
22 * the ability to optionally enable import auditing via the commandline flag
23 :code:`--audit_import_events`. This logs a message whenever a module is imported
24 *after* the bootstrap module itself is loaded. Note that other modules may
25 already be loaded when bootstrap is loaded and these imports will not be
26 logged. If you're trying to debug import events or dependency problems,
27 I suggest putting bootstrap very early in your import list and using this
29 * optional memory profiling for your program set via the commandline flag
30 :code:`--trace_memory`. This provides a report of python memory utilization
31 at program termination time.
32 * the ability to set the global random seed via commandline flag for
33 reproducable runs (as long as subsequent code doesn't reset the seed)
34 using the :code:`--set_random_seed` flag,
35 * automatic program timing and reporting logged to the INFO log,
36 * more verbose error handling and reporting.
47 from inspect import stack
48 from typing import NoReturn
50 from pyutils import config, logging_utils
51 from pyutils.argparse_utils import ActionNoYes
53 # This module is commonly used by others in here and should avoid
54 # taking any unnecessary dependencies back on them.
57 logger = logging.getLogger(__name__)
59 cfg = config.add_commandline_args(
60 f"Bootstrap ({__file__})",
61 "Args related to python program bootstrapper and Swiss army knife",
64 "--debug_unhandled_exceptions",
67 help="Break into pdb on top level unhandled exceptions.",
73 help="Should we display (and log.debug) the global random seed?",
81 help="Override the global random seed with a particular number.",
87 help="Should we dump the Python import tree before main?",
90 "--audit_import_events",
93 help="Should we audit all import events?",
99 help="Should we run cProfile on this code?",
105 help="Should we record/report on memory utilization?",
108 ORIGINAL_EXCEPTION_HOOK = sys.excepthook
111 def handle_uncaught_exception(exc_type, exc_value, exc_tb):
113 Top-level exception handler for exceptions that make it past any exception
114 handlers in the python code being run. Logs the error and stacktrace then
115 maybe attaches a debugger.
118 msg = f"Unhandled top level exception {exc_type}"
119 logger.exception(msg)
120 print(msg, file=sys.stderr)
121 if issubclass(exc_type, KeyboardInterrupt):
122 sys.__excepthook__(exc_type, exc_value, exc_tb)
128 tb_output = io.StringIO()
129 traceback.print_tb(exc_tb, None, tb_output)
130 print(tb_output.getvalue(), file=sys.stderr)
131 logger.error(tb_output.getvalue())
134 # stdin or stderr is redirected, just do the normal thing
135 if not sys.stderr.isatty() or not sys.stdin.isatty():
136 ORIGINAL_EXCEPTION_HOOK(exc_type, exc_value, exc_tb)
138 else: # a terminal is attached and stderr isn't redirected, maybe debug.
139 if config.config["debug_unhandled_exceptions"]:
140 logger.info("Invoking the debugger...")
145 ORIGINAL_EXCEPTION_HOOK(exc_type, exc_value, exc_tb)
148 class ImportInterceptor(importlib.abc.MetaPathFinder):
149 """An interceptor that always allows module load events but dumps a
150 record into the log and onto stdout when modules are loaded and
151 produces an audit of who imported what at the end of the run. It
152 can't see any load events that happen before it, though, so move
153 bootstrap up in your __main__'s import list just temporarily to
159 from pyutils.collectionz.trie import Trie
161 self.module_by_filename_cache = {}
162 self.repopulate_modules_by_filename()
164 self.tree_node_by_module = {}
166 def repopulate_modules_by_filename(self):
167 self.module_by_filename_cache.clear()
171 ) in sys.modules.copy().items(): # copy here because modules is volatile
172 if hasattr(mod, "__file__"):
173 fname = getattr(mod, "__file__")
176 self.module_by_filename_cache[fname] = mod
179 def should_ignore_filename(filename: str) -> bool:
180 return "importlib" in filename or "six.py" in filename
182 def find_module(self, fullname, path) -> NoReturn:
184 "This method has been deprecated since Python 3.4, please upgrade."
187 def find_spec(self, loaded_module, path=None, _=None):
189 for x in range(3, len(s)):
190 filename = s[x].filename
191 if ImportInterceptor.should_ignore_filename(filename):
194 loading_function = s[x].function
195 if filename in self.module_by_filename_cache:
196 loading_module = self.module_by_filename_cache[filename]
198 self.repopulate_modules_by_filename()
199 loading_module = self.module_by_filename_cache.get(filename, "unknown")
201 path = self.tree_node_by_module.get(loading_module, [])
202 path.extend([loaded_module])
203 self.tree.insert(path)
204 self.tree_node_by_module[loading_module] = path
206 msg = f"*** Import {loaded_module} from {filename}:{s[x].lineno} in {loading_module}::{loading_function}"
210 msg = f"*** Import {loaded_module} from ?????"
214 def invalidate_caches(self):
217 def find_importer(self, module: str):
218 if module in self.tree_node_by_module:
219 node = self.tree_node_by_module[module]
224 # Audit import events? Note: this runs early in the lifetime of the
225 # process (assuming that import bootstrap happens early); config has
226 # (probably) not yet been loaded or parsed the commandline. Also,
227 # some things have probably already been imported while we weren't
228 # watching so this information may be incomplete.
230 # Also note: move bootstrap up in the global import list to catch
231 # more import events and have a more complete record.
232 IMPORT_INTERCEPTOR = None
234 if arg == "--audit_import_events":
235 IMPORT_INTERCEPTOR = ImportInterceptor()
236 sys.meta_path.insert(0, IMPORT_INTERCEPTOR)
239 def dump_all_objects() -> None:
240 """Helper code to dump all known python objects."""
243 all_modules = sys.modules
244 for obj in object.__subclasses__():
245 if not hasattr(obj, "__name__"):
248 if not hasattr(obj, "__module__"):
250 class_mod_name = obj.__module__
251 if class_mod_name in all_modules:
252 mod = all_modules[class_mod_name]
253 if not hasattr(mod, "__name__"):
254 mod_name = class_mod_name
256 mod_name = mod.__name__
257 if hasattr(mod, "__file__"):
258 mod_file = mod.__file__
261 if IMPORT_INTERCEPTOR is not None:
262 import_path = IMPORT_INTERCEPTOR.find_importer(mod_name)
264 import_path = "unknown"
265 msg = f"{class_mod_name}::{klass} ({mod_file})"
266 if import_path != "unknown" and len(import_path) > 0:
267 msg += f" imported by {import_path}"
268 messages[f"{class_mod_name}::{klass}"] = msg
269 for x in sorted(messages.keys()):
270 logger.debug(messages[x])
274 def initialize(entry_point):
276 Do whole program setup and instrumentation. See module comments for
279 from pyutils import bootstrap
281 @bootstrap.initialize
285 if __name__ == '__main__':
289 @functools.wraps(entry_point)
290 def initialize_wrapper(*args, **kwargs):
291 # Hook top level unhandled exceptions, maybe invoke debugger.
292 if sys.excepthook == sys.__excepthook__:
293 sys.excepthook = handle_uncaught_exception
295 # Try to figure out the name of the program entry point. Then
296 # parse configuration (based on cmdline flags, environment vars
298 entry_filename = None
301 entry_filename = entry_point.__code__.co_filename
302 entry_descr = repr(entry_point.__code__)
305 "__globals__" in entry_point.__dict__
306 and "__file__" in entry_point.__globals__
308 entry_filename = entry_point.__globals__["__file__"]
309 entry_descr = entry_filename
310 config.parse(entry_filename)
312 if config.config["trace_memory"]:
317 # Initialize logging... and log some remembered messages from
318 # config module. Also logs about the logging config if we're
320 logging_utils.initialize_logging(logging.getLogger())
321 config.late_logging()
323 # Log some info about the python interpreter itself if we're
326 "Platform: %s, maxint=0x%x, byteorder=%s",
331 logger.debug("Python interpreter path: %s", sys.executable)
332 logger.debug("Python interpreter version: %s", sys.version)
333 logger.debug("Python implementation: %s", sys.implementation)
334 logger.debug("Python C API version: %s", sys.api_version)
336 logger.debug("Python interpreter running in __debug__ mode.")
338 logger.debug("Python interpreter running in optimized mode.")
339 logger.debug("PYTHONPATH: %s", sys.path)
341 # Dump some info about the physical machine we're running on
342 # if we're ing debug mode.
343 if "SC_PAGE_SIZE" in os.sysconf_names and "SC_PHYS_PAGES" in os.sysconf_names:
345 "Physical memory: %.1fGb",
346 os.sysconf("SC_PAGE_SIZE")
347 * os.sysconf("SC_PHYS_PAGES")
350 logger.debug("Logical processors: %s", os.cpu_count())
352 # Allow programs that don't bother to override the random seed
353 # to be replayed via the commandline.
356 random_seed = config.config["set_random_seed"]
357 if random_seed is not None:
358 random_seed = random_seed[0]
360 random_seed = int.from_bytes(os.urandom(4), "little")
361 if config.config["show_random_seed"]:
362 msg = f"Global random seed is: {random_seed}"
365 random.seed(random_seed)
367 # Give each run a unique identifier if we're in debug mode.
368 logger.debug("This run's UUID: %s", str(uuid.uuid4()))
370 # Do it, invoke the user's code. Pay attention to how long it takes.
372 "Starting %s (program entry point) ---------------------- ", entry_descr
375 from pyutils import stopwatch
377 if config.config["run_profiler"]:
379 from pstats import SortKey
381 with stopwatch.Timer() as t:
383 "ret = entry_point(*args, **kwargs)",
390 with stopwatch.Timer() as t:
391 ret = entry_point(*args, **kwargs)
393 logger.debug("%s (program entry point) returned %s.", entry_descr, ret)
395 if config.config["trace_memory"]:
396 snapshot = tracemalloc.take_snapshot()
397 top_stats = snapshot.statistics("lineno")
399 print("--trace_memory's top 10 memory using files:")
400 for stat in top_stats[:10]:
403 if config.config["dump_all_objects"]:
406 if config.config["audit_import_events"]:
407 if IMPORT_INTERCEPTOR is not None:
408 print(IMPORT_INTERCEPTOR.tree)
411 (utime, stime, cutime, cstime, elapsed_time) = os.times()
416 "child user: %.4fs\n"
417 "child system: %.4fs\n"
418 "machine uptime: %.4fs\n"
428 # If it doesn't return cleanly, call attention to the return value.
429 if ret is not None and ret != 0:
430 logger.error("Exit %s", ret)
432 logger.debug("Exit %s", ret)
435 return initialize_wrapper