3 # © Copyright 2021-2023, Scott Gasch
6 If you decorate your main method (i.e. program entry point) like this::
14 * automatic support for :py:mod:`pyutils.config` (argument parsing, see
15 that module for details),
16 * The ability to break into pdb on unhandled exceptions (which is
17 enabled/disabled via the commandline flag :code:`--debug_unhandled_exceptions`),
18 * automatic logging support from :py:mod:`pyutils.logging_utils` controllable
19 via several commandline flags,
20 * the ability to optionally enable whole-program code profiling and reporting
21 when you run your code using commandline flag :code:`--run_profiler`,
22 * the ability to optionally enable import auditing via the commandline flag
23 :code:`--audit_import_events`. This logs a message whenever a module is imported
24 *after* the bootstrap module itself is loaded. Note that other modules may
25 already be loaded when bootstrap is loaded and these imports will not be
26 logged. If you're trying to debug import events or dependency problems,
27 I suggest putting bootstrap very early in your import list and using this
29 * optional memory profiling for your program set via the commandline flag
30 :code:`--trace_memory`. This provides a report of python memory utilization
31 at program termination time.
32 * the ability to set the global random seed via commandline flag for
33 reproducable runs (as long as subsequent code doesn't reset the seed)
34 using the :code:`--set_random_seed` flag,
35 * automatic program timing and reporting logged to the INFO log,
36 * more verbose error handling and reporting.
47 from inspect import stack
49 from pyutils import config, logging_utils
50 from pyutils.argparse_utils import ActionNoYes
52 # This module is commonly used by others in here and should avoid
53 # taking any unnecessary dependencies back on them.
56 logger = logging.getLogger(__name__)
58 cfg = config.add_commandline_args(
59 f"Bootstrap ({__file__})",
60 "Args related to python program bootstrapper and Swiss army knife",
63 "--debug_unhandled_exceptions",
66 help="Break into pdb on top level unhandled exceptions.",
72 help="Should we display (and log.debug) the global random seed?",
80 help="Override the global random seed with a particular number.",
86 help="Should we dump the Python import tree before main?",
89 "--audit_import_events",
92 help="Should we audit all import events?",
98 help="Should we run cProfile on this code?",
104 help="Should we record/report on memory utilization?",
107 ORIGINAL_EXCEPTION_HOOK = sys.excepthook
110 def handle_uncaught_exception(exc_type, exc_value, exc_tb):
112 Top-level exception handler for exceptions that make it past any exception
113 handlers in the python code being run. Logs the error and stacktrace then
114 maybe attaches a debugger.
117 msg = f"Unhandled top level exception {exc_type}"
118 logger.exception(msg)
119 print(msg, file=sys.stderr)
120 if issubclass(exc_type, KeyboardInterrupt):
121 sys.__excepthook__(exc_type, exc_value, exc_tb)
127 tb_output = io.StringIO()
128 traceback.print_tb(exc_tb, None, tb_output)
129 print(tb_output.getvalue(), file=sys.stderr)
130 logger.error(tb_output.getvalue())
133 # stdin or stderr is redirected, just do the normal thing
134 if not sys.stderr.isatty() or not sys.stdin.isatty():
135 ORIGINAL_EXCEPTION_HOOK(exc_type, exc_value, exc_tb)
137 else: # a terminal is attached and stderr isn't redirected, maybe debug.
138 if config.config["debug_unhandled_exceptions"]:
139 logger.info("Invoking the debugger...")
144 ORIGINAL_EXCEPTION_HOOK(exc_type, exc_value, exc_tb)
147 class ImportInterceptor(importlib.abc.MetaPathFinder):
148 """An interceptor that always allows module load events but dumps a
149 record into the log and onto stdout when modules are loaded and
150 produces an audit of who imported what at the end of the run. It
151 can't see any load events that happen before it, though, so move
152 bootstrap up in your __main__'s import list just temporarily to
158 from pyutils.collectionz.trie import Trie
160 self.module_by_filename_cache = {}
161 self.repopulate_modules_by_filename()
163 self.tree_node_by_module = {}
165 def repopulate_modules_by_filename(self):
166 self.module_by_filename_cache.clear()
170 ) in sys.modules.copy().items(): # copy here because modules is volatile
171 if hasattr(mod, "__file__"):
172 fname = getattr(mod, "__file__")
175 self.module_by_filename_cache[fname] = mod
178 def should_ignore_filename(filename: str) -> bool:
179 return "importlib" in filename or "six.py" in filename
181 def find_module(self, fullname, path):
183 "This method has been deprecated since Python 3.4, please upgrade."
186 def find_spec(self, loaded_module, path=None, _=None):
188 for x in range(3, len(s)):
189 filename = s[x].filename
190 if ImportInterceptor.should_ignore_filename(filename):
193 loading_function = s[x].function
194 if filename in self.module_by_filename_cache:
195 loading_module = self.module_by_filename_cache[filename]
197 self.repopulate_modules_by_filename()
198 loading_module = self.module_by_filename_cache.get(filename, "unknown")
200 path = self.tree_node_by_module.get(loading_module, [])
201 path.extend([loaded_module])
202 self.tree.insert(path)
203 self.tree_node_by_module[loading_module] = path
205 msg = f"*** Import {loaded_module} from {filename}:{s[x].lineno} in {loading_module}::{loading_function}"
209 msg = f"*** Import {loaded_module} from ?????"
213 def invalidate_caches(self):
216 def find_importer(self, module: str):
217 if module in self.tree_node_by_module:
218 node = self.tree_node_by_module[module]
223 # Audit import events? Note: this runs early in the lifetime of the
224 # process (assuming that import bootstrap happens early); config has
225 # (probably) not yet been loaded or parsed the commandline. Also,
226 # some things have probably already been imported while we weren't
227 # watching so this information may be incomplete.
229 # Also note: move bootstrap up in the global import list to catch
230 # more import events and have a more complete record.
231 IMPORT_INTERCEPTOR = None
233 if arg == "--audit_import_events":
234 IMPORT_INTERCEPTOR = ImportInterceptor()
235 sys.meta_path.insert(0, IMPORT_INTERCEPTOR)
238 def dump_all_objects() -> None:
239 """Helper code to dump all known python objects."""
242 all_modules = sys.modules
243 for obj in object.__subclasses__():
244 if not hasattr(obj, "__name__"):
247 if not hasattr(obj, "__module__"):
249 class_mod_name = obj.__module__
250 if class_mod_name in all_modules:
251 mod = all_modules[class_mod_name]
252 if not hasattr(mod, "__name__"):
253 mod_name = class_mod_name
255 mod_name = mod.__name__
256 if hasattr(mod, "__file__"):
257 mod_file = mod.__file__
260 if IMPORT_INTERCEPTOR is not None:
261 import_path = IMPORT_INTERCEPTOR.find_importer(mod_name)
263 import_path = "unknown"
264 msg = f"{class_mod_name}::{klass} ({mod_file})"
265 if import_path != "unknown" and len(import_path) > 0:
266 msg += f" imported by {import_path}"
267 messages[f"{class_mod_name}::{klass}"] = msg
268 for x in sorted(messages.keys()):
269 logger.debug(messages[x])
273 def initialize(entry_point):
275 Do whole program setup and instrumentation. See module comments for
278 from pyutils import bootstrap
280 @bootstrap.initialize
284 if __name__ == '__main__':
288 @functools.wraps(entry_point)
289 def initialize_wrapper(*args, **kwargs):
290 # Hook top level unhandled exceptions, maybe invoke debugger.
291 if sys.excepthook == sys.__excepthook__:
292 sys.excepthook = handle_uncaught_exception
294 # Try to figure out the name of the program entry point. Then
295 # parse configuration (based on cmdline flags, environment vars
297 entry_filename = None
300 entry_filename = entry_point.__code__.co_filename
301 entry_descr = repr(entry_point.__code__)
304 "__globals__" in entry_point.__dict__
305 and "__file__" in entry_point.__globals__
307 entry_filename = entry_point.__globals__["__file__"]
308 entry_descr = entry_filename
309 config.parse(entry_filename)
311 if config.config["trace_memory"]:
316 # Initialize logging... and log some remembered messages from
317 # config module. Also logs about the logging config if we're
319 logging_utils.initialize_logging(logging.getLogger())
320 config.late_logging()
322 # Log some info about the python interpreter itself if we're
325 "Platform: %s, maxint=0x%x, byteorder=%s",
330 logger.debug("Python interpreter version: %s", sys.version)
331 logger.debug("Python implementation: %s", sys.implementation)
332 logger.debug("Python C API version: %s", sys.api_version)
334 logger.debug("Python interpreter running in __debug__ mode.")
336 logger.debug("Python interpreter running in optimized mode.")
337 logger.debug("Python path: %s", sys.path)
339 # Dump some info about the physical machine we're running on
340 # if we're ing debug mode.
341 if "SC_PAGE_SIZE" in os.sysconf_names and "SC_PHYS_PAGES" in os.sysconf_names:
343 "Physical memory: %.1fGb",
344 os.sysconf("SC_PAGE_SIZE")
345 * os.sysconf("SC_PHYS_PAGES")
348 logger.debug("Logical processors: %s", os.cpu_count())
350 # Allow programs that don't bother to override the random seed
351 # to be replayed via the commandline.
354 random_seed = config.config["set_random_seed"]
355 if random_seed is not None:
356 random_seed = random_seed[0]
358 random_seed = int.from_bytes(os.urandom(4), "little")
359 if config.config["show_random_seed"]:
360 msg = f"Global random seed is: {random_seed}"
363 random.seed(random_seed)
365 # Give each run a unique identifier if we're in debug mode.
366 logger.debug("This run's UUID: %s", str(uuid.uuid4()))
368 # Do it, invoke the user's code. Pay attention to how long it takes.
370 "Starting %s (program entry point) ---------------------- ", entry_descr
373 from pyutils import stopwatch
375 if config.config["run_profiler"]:
377 from pstats import SortKey
379 with stopwatch.Timer() as t:
381 "ret = entry_point(*args, **kwargs)",
388 with stopwatch.Timer() as t:
389 ret = entry_point(*args, **kwargs)
391 logger.debug("%s (program entry point) returned %s.", entry_descr, ret)
393 if config.config["trace_memory"]:
394 snapshot = tracemalloc.take_snapshot()
395 top_stats = snapshot.statistics("lineno")
397 print("--trace_memory's top 10 memory using files:")
398 for stat in top_stats[:10]:
401 if config.config["dump_all_objects"]:
404 if config.config["audit_import_events"]:
405 if IMPORT_INTERCEPTOR is not None:
406 print(IMPORT_INTERCEPTOR.tree)
409 (utime, stime, cutime, cstime, elapsed_time) = os.times()
414 "child user: %.4fs\n"
415 "child system: %.4fs\n"
416 "machine uptime: %.4fs\n"
426 # If it doesn't return cleanly, call attention to the return value.
427 if ret is not None and ret != 0:
428 logger.error("Exit %s", ret)
430 logger.debug("Exit %s", ret)
433 return initialize_wrapper