2 # -*- coding: utf-8 -*-
4 # © Copyright 2021-2022, Scott Gasch
6 """Utilities related to logging. To use it you must invoke
7 :meth:`initialize_logging`. If you use the
8 :meth:`bootstrap.initialize` decorator on your program's entry point,
9 it will call this for you. See :meth:`python_modules.bootstrap.initialize`
10 for more details. If you use this you get:
12 * Ability to set logging level,
13 * ability to define the logging format,
14 * ability to tee all logging on stderr,
15 * ability to tee all logging into a file,
16 * ability to rotate said file as it grows,
17 * ability to tee all logging into the system log (syslog) and
18 define the facility and level used to do so,
19 * easy automatic pid/tid stamp on logging for debugging threads,
20 * ability to squelch repeated log messages,
21 * ability to log probabilistically in code,
22 * ability to only see log messages from a particular module or
24 * ability to clear logging handlers added by earlier loaded modules.
26 All of these are controlled via commandline arguments to your program,
27 see the code below for details.
39 from logging.config import fileConfig
40 from logging.handlers import RotatingFileHandler, SysLogHandler
41 from typing import Any, Callable, Dict, Iterable, List, Optional
44 from overrides import overrides
46 # This module is commonly used by others in here and should avoid
47 # taking any unnecessary dependencies back on them.
48 from pyutils import argparse_utils, config
50 cfg = config.add_commandline_args(f'Logging ({__file__})', 'Args related to logging')
52 '--logging_config_file',
53 type=argparse_utils.valid_filename,
56 help='Config file containing the logging setup, see: https://docs.python.org/3/howto/logging.html#logging-advanced-tutorial',
62 choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
64 help='The global default level below which to squelch log messages; see also --lmodule',
70 help='The format for lines logged via the logger module. See: https://docs.python.org/3/library/logging.html#formatter-objects',
73 '--logging_date_format',
75 default='%Y/%m/%dT%H:%M:%S.%f%z',
77 help='The format of any dates in --logging_format.',
81 action=argparse_utils.ActionNoYes,
83 help='Should we log to the console (stderr)',
90 help='The filename of the logfile to write.',
93 '--logging_filename_maxsize',
95 default=(1024 * 1024),
97 help='The maximum size (in bytes) to write to the logging_filename.',
100 '--logging_filename_count',
104 help='The number of logging_filename copies to keep before deleting.',
108 action=argparse_utils.ActionNoYes,
110 help='Should we log to localhost\'s syslog.',
113 '--logging_syslog_facility',
139 metavar='SYSLOG_FACILITY_LIST',
140 help='The default syslog message facility identifier',
143 '--logging_debug_threads',
144 action=argparse_utils.ActionNoYes,
146 help='Should we prepend pid/tid data to all log messages?',
149 '--logging_debug_modules',
150 action=argparse_utils.ActionNoYes,
152 help='Should we prepend module/function data to all log messages?',
155 '--logging_info_is_print',
156 action=argparse_utils.ActionNoYes,
158 help='logging.info also prints to stdout.',
161 '--logging_squelch_repeats',
162 action=argparse_utils.ActionNoYes,
164 help='Do we allow code to indicate that it wants to squelch repeated logging messages or should we always log?',
167 '--logging_probabilistically',
168 action=argparse_utils.ActionNoYes,
170 help='Do we allow probabilistic logging (for code that wants it) or should we always log?',
172 # See also: OutputMultiplexer
174 '--logging_captures_prints',
175 action=argparse_utils.ActionNoYes,
177 help='When calling print, also log.info automatically.',
182 metavar='<SCOPE>=<LEVEL>[,<SCOPE>=<LEVEL>...]',
184 'Allows per-scope logging levels which override the global level set with --logging-level.'
185 + 'Pass a space separated list of <scope>=<level> where <scope> is one of: module, '
186 + 'module:function, or :function and <level> is a logging level (e.g. INFO, DEBUG...)'
190 '--logging_clear_preexisting_handlers',
191 action=argparse_utils.ActionNoYes,
194 'Should logging code clear preexisting global logging handlers and thus insist that is '
195 + 'alone can add handlers. Use this to work around annoying modules that insert global '
196 + 'handlers with formats and logging levels you might now want. Caveat emptor, this may '
197 + 'cause you to miss logging messages.'
201 BUILT_IN_PRINT = print
202 LOGGING_INITIALIZED = False
205 # A map from logging_callsite_id -> count of logged messages.
206 squelched_logging_counts: Dict[str, int] = {}
209 def squelch_repeated_log_messages(squelch_after_n_repeats: int) -> Callable:
211 A decorator that marks a function as interested in having the logging
212 messages that it produces be squelched (ignored) after it logs the
213 same message more than N times.
217 This decorator affects *ALL* logging messages produced
218 within the decorated function. That said, messages must be
219 identical in order to be squelched. For example, if the same line
220 of code produces different messages (because of, e.g., a format
221 string), the messages are considered to be different.
225 def squelch_logging_wrapper(f: Callable):
226 from pyutils import function_utils
228 identifier = function_utils.function_identifier(f)
229 squelched_logging_counts[identifier] = squelch_after_n_repeats
232 return squelch_logging_wrapper
235 class SquelchRepeatedMessagesFilter(logging.Filter):
236 """A filter that only logs messages from a given site with the same
237 (exact) message at the same logging level N times and ignores
238 subsequent attempts to log.
240 This filter only affects logging messages that repeat more than a
241 threshold number of times from functions that are tagged with the
242 @logging_utils.squelched_logging_ok decorator (see above); others
245 This functionality is enabled by default but can be disabled via
246 the :code:`--no_logging_squelch_repeats` commandline flag.
249 def __init__(self) -> None:
251 self.counters: collections.Counter = collections.Counter()
254 def filter(self, record: logging.LogRecord) -> bool:
255 id1 = f'{record.module}:{record.funcName}'
256 if id1 not in squelched_logging_counts:
258 threshold = squelched_logging_counts[id1]
259 logsite = f'{record.pathname}+{record.lineno}+{record.levelno}+{record.msg}'
260 count = self.counters[logsite]
261 self.counters[logsite] += 1
262 return count < threshold
265 class DynamicPerScopeLoggingLevelFilter(logging.Filter):
266 """This filter only allows logging messages from an allow list of
267 module names or module:function names. Blocks all others.
271 def level_name_to_level(name: str) -> int:
272 numeric_level = getattr(logging, name, None)
273 if not isinstance(numeric_level, int):
274 raise ValueError(f'Invalid level: {name}')
279 default_logging_level: int,
280 per_scope_logging_levels: str,
283 self.valid_levels = set(
284 ['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
286 self.default_logging_level = default_logging_level
287 self.level_by_scope = {}
288 if per_scope_logging_levels is not None:
289 for chunk in per_scope_logging_levels.split(','):
292 f'Malformed lmodule directive: "{chunk}", missing "=". Ignored.',
297 (scope, level) = chunk.split('=')
300 f'Malformed lmodule directive: "{chunk}". Ignored.',
304 scope = scope.strip()
305 level = level.strip().upper()
306 if level not in self.valid_levels:
308 f'Malformed lmodule directive: "{chunk}", bad level. Ignored.',
314 ] = DynamicPerScopeLoggingLevelFilter.level_name_to_level(level)
317 def filter(self, record: logging.LogRecord) -> bool:
318 """Decides whether or not to log based on an allow list."""
320 # First try to find a logging level by scope (--lmodule)
321 if len(self.level_by_scope) > 0:
325 f'{record.module}:{record.funcName}',
326 f':{record.funcName}',
328 level = self.level_by_scope.get(scope, None)
329 if level is not None:
330 if min_level is None or level < min_level:
333 # If we found one, use it instead of the global default level.
334 if min_level is not None:
335 return record.levelno >= min_level
337 # Otherwise, use the global logging level (--logging_level)
338 return record.levelno >= self.default_logging_level
341 # A map from function_identifier -> probability of logging (0.0%..100.0%)
342 probabilistic_logging_levels: Dict[str, float] = {}
345 def logging_is_probabilistic(probability_of_logging: float) -> Callable:
346 """A decorator that indicates that all logging statements within the
347 scope of a particular (marked) function are not deterministic
348 (i.e. they do not always unconditionally log) but rather are
349 probabilistic (i.e. they log N% of the time, randomly).
352 This affects *ALL* logging statements within the marked function.
354 That this functionality can be disabled (forcing all logged
355 messages to produce output) via the
356 :code:`--no_logging_probabilistically` cmdline argument.
359 def probabilistic_logging_wrapper(f: Callable):
360 from pyutils import function_utils
362 identifier = function_utils.function_identifier(f)
363 probabilistic_logging_levels[identifier] = probability_of_logging
366 return probabilistic_logging_wrapper
369 class ProbabilisticFilter(logging.Filter):
371 A filter that logs messages probabilistically (i.e. randomly at some
374 This filter only affects logging messages from functions that have
375 been tagged with the @logging_utils.probabilistic_logging decorator.
379 def filter(self, record: logging.LogRecord) -> bool:
380 id1 = f'{record.module}:{record.funcName}'
381 if id1 not in probabilistic_logging_levels:
383 threshold = probabilistic_logging_levels[id1]
384 return (random.random() * 100.0) <= threshold
387 class OnlyInfoFilter(logging.Filter):
388 """A filter that only logs messages produced at the INFO logging
389 level. This is used by the ::code`--logging_info_is_print`
390 commandline option to select a subset of the logging stream to
391 send to a stdout handler.
395 def filter(self, record: logging.LogRecord):
396 return record.levelno == logging.INFO
399 class MillisecondAwareFormatter(logging.Formatter):
401 A formatter for adding milliseconds to log messages which, for
402 whatever reason, the default python logger doesn't do.
405 converter = datetime.datetime.fromtimestamp # type: ignore
408 def formatTime(self, record, datefmt=None):
409 ct = MillisecondAwareFormatter.converter(
410 record.created, pytz.timezone("US/Pacific")
413 s = ct.strftime(datefmt)
415 t = ct.strftime("%Y-%m-%d %H:%M:%S")
416 s = f"{t},{record.msecs:%03d}"
420 def log_about_logging(
422 default_logging_level,
423 preexisting_handlers_count,
427 """Some of the initial messages in the debug log are about how we
428 have set up logging itself."""
430 level_name = logging._levelToName.get(
431 default_logging_level, str(default_logging_level)
433 logger.debug('Initialized global logging; default logging level is %s.', level_name)
435 config.config['logging_clear_preexisting_handlers']
436 and preexisting_handlers_count > 0
439 'Logging cleared %d global handlers (--logging_clear_preexisting_handlers)',
440 preexisting_handlers_count,
442 logger.debug('Logging format specification is "%s"', fmt)
443 if config.config['logging_debug_threads']:
445 '...Logging format spec captures tid/pid. (--logging_debug_threads)'
447 if config.config['logging_debug_modules']:
449 '...Logging format spec captures files/functions/lineno. (--logging_debug_modules)'
451 if config.config['logging_syslog']:
453 'Logging to syslog as %s with priority mapping based on level. (--logging_syslog)',
456 if config.config['logging_filename']:
458 'Logging to file "%s". (--logging_filename)',
459 config.config["logging_filename"],
462 '...with %d bytes max file size. (--logging_filename_maxsize)',
463 config.config["logging_filename_maxsize"],
466 '...and %d rotating backup file count. (--logging_filename_count)',
467 config.config["logging_filename_count"],
469 if config.config['logging_console']:
470 logger.debug('Logging to the console (stderr). (--logging_console)')
471 if config.config['logging_info_is_print']:
473 'Logging logger.info messages will be repeated on stdout. (--logging_info_is_print)'
475 if config.config['logging_squelch_repeats']:
477 'Logging code allowed to request repeated messages be squelched. (--logging_squelch_repeats)'
481 'Logging code forbidden to request messages be squelched; all messages logged. (--no_logging_squelch_repeats)'
483 if config.config['logging_probabilistically']:
485 'Logging code is allowed to request probabilistic logging. (--logging_probabilistically)'
489 'Logging code is forbidden to request probabilistic logging; messages always logged. (--no_logging_probabilistically)'
491 if config.config['lmodule']:
493 f'Logging dynamic per-module logging enabled. (--lmodule={config.config["lmodule"]})'
495 if config.config['logging_captures_prints']:
497 'Logging will capture printed data as logger.info messages. (--logging_captures_prints)'
501 def initialize_logging(logger=None) -> logging.Logger:
502 """Initialize logging for the program. This must be called if you want
503 to use any of the functionality provided by this module such as:
505 * Ability to set logging level,
506 * ability to define the logging format,
507 * ability to tee all logging on stderr,
508 * ability to tee all logging into a file,
509 * ability to rotate said file as it grows,
510 * ability to tee all logging into the system log (syslog) and
511 define the facility and level used to do so,
512 * easy automatic pid/tid stamp on logging for debugging threads,
513 * ability to squelch repeated log messages,
514 * ability to log probabilistically in code,
515 * ability to only see log messages from a particular module or
517 * ability to clear logging handlers added by earlier loaded modules.
519 All of these are controlled via commandline arguments to your program,
520 see the code below for details.
523 :meth:`bootstrap.initialize` decorator on your program's entry point,
524 it will call this for you. See :meth:`python_modules.bootstrap.initialize`
527 global LOGGING_INITIALIZED
528 if LOGGING_INITIALIZED:
529 return logging.getLogger()
530 LOGGING_INITIALIZED = True
533 logger = logging.getLogger()
535 # --logging_clear_preexisting_handlers removes logging handlers
536 # that were registered by global statements during imported module
538 preexisting_handlers_count = 0
539 assert config.has_been_parsed()
540 if config.config['logging_clear_preexisting_handlers']:
541 while logger.hasHandlers():
542 logger.removeHandler(logger.handlers[0])
543 preexisting_handlers_count += 1
545 # --logging_config_file pulls logging settings from a config file
546 # skipping the rest of this setup.
547 if config.config['logging_config_file'] is not None:
548 fileConfig(config.config['logging_config_file'])
551 handlers: List[logging.Handler] = []
552 handler: Optional[logging.Handler] = None
554 # Global default logging level (--logging_level); messages below
555 # this level will be silenced.
556 default_logging_level = getattr(
557 logging, config.config['logging_level'].upper(), None
559 if not isinstance(default_logging_level, int):
560 raise ValueError(f'Invalid level: {config.config["logging_level"]}')
562 # Custom or default --logging_format?
563 if config.config['logging_format']:
564 fmt = config.config['logging_format']
566 if config.config['logging_syslog']:
567 fmt = '%(levelname).1s:%(filename)s[%(process)d]: %(message)s'
569 fmt = '%(levelname).1s:%(asctime)s: %(message)s'
571 # --logging_debug_threads and --logging_debug_modules both affect
572 # the format by prepending information about the pid/tid or
574 if config.config['logging_debug_threads']:
575 fmt = f'%(process)d.%(thread)d|{fmt}'
576 if config.config['logging_debug_modules']:
577 fmt = f'%(filename)s:%(funcName)s:%(lineno)s|{fmt}'
579 # --logging_syslog (optionally with --logging_syslog_facility)
580 # sets up for logging to use the standard system syslogd as a
583 if config.config['logging_syslog']:
584 if sys.platform not in ('win32', 'cygwin'):
585 if config.config['logging_syslog_facility']:
586 facility_name = 'LOG_' + config.config['logging_syslog_facility']
587 facility = SysLogHandler.__dict__.get(facility_name, SysLogHandler.LOG_USER) # type: ignore
588 assert facility is not None
589 handler = SysLogHandler(facility=facility, address='/dev/log')
590 handler.setFormatter(
591 MillisecondAwareFormatter(
593 datefmt=config.config['logging_date_format'],
596 handlers.append(handler)
598 # --logging_filename (with friends --logging_filename_count and
599 # --logging_filename_maxsize) set up logging to a file on the
600 # filesystem with automatic rotation when it gets too big.
601 if config.config['logging_filename']:
602 handler = RotatingFileHandler(
603 config.config['logging_filename'],
604 maxBytes=config.config['logging_filename_maxsize'],
605 backupCount=config.config['logging_filename_count'],
607 handler.setFormatter(
608 MillisecondAwareFormatter(
610 datefmt=config.config['logging_date_format'],
613 handlers.append(handler)
615 # --logging_console is, ahem, logging to the console.
616 if config.config['logging_console']:
617 handler = logging.StreamHandler(sys.stderr)
618 handler.setFormatter(
619 MillisecondAwareFormatter(
621 datefmt=config.config['logging_date_format'],
624 handlers.append(handler)
626 if len(handlers) == 0:
627 handlers.append(logging.NullHandler())
628 for handler in handlers:
629 logger.addHandler(handler)
631 # --logging_info_is_print echoes any message to logger.info(x) as
632 # a print statement on stdout.
633 if config.config['logging_info_is_print']:
634 handler = logging.StreamHandler(sys.stdout)
635 handler.addFilter(OnlyInfoFilter())
636 logger.addHandler(handler)
638 # --logging_squelch_repeats allows code to request repeat logging
639 # messages (identical log site and message contents) to be
640 # silenced. Logging code must request this explicitly, it isn't
641 # automatic. This option just allows the silencing to happen.
642 if config.config['logging_squelch_repeats']:
643 for handler in handlers:
644 handler.addFilter(SquelchRepeatedMessagesFilter())
646 # --logging_probabilistically allows code to request
647 # non-deterministic logging where messages have some probability
648 # of being produced. Logging code must request this explicitly.
649 # This option just allows the non-deterministic behavior to
650 # happen. Disabling it will cause every log message to be
652 if config.config['logging_probabilistically']:
653 for handler in handlers:
654 handler.addFilter(ProbabilisticFilter())
656 # --lmodule is a way to have a special logging level for just on
657 # module or one set of modules that is different than the one set
658 # globally via --logging_level.
659 for handler in handlers:
661 DynamicPerScopeLoggingLevelFilter(
662 default_logging_level,
663 config.config['lmodule'],
667 logger.propagate = False
669 # --logging_captures_prints, if set, will capture and log.info
670 # anything printed on stdout.
671 if config.config['logging_captures_prints']:
674 def print_and_also_log(*arg, **kwarg):
675 f = kwarg.get('file', None)
680 BUILT_IN_PRINT(*arg, **kwarg)
682 builtins.print = print_and_also_log
684 # At this point the logger is ready, handlers are set up,
685 # etc... so log about the logging configuration.
688 default_logging_level,
689 preexisting_handlers_count,
696 def get_logger(name: str = ""):
697 """Get the global logger"""
698 logger = logging.getLogger(name)
699 return initialize_logging(logger)
702 def tprint(*args, **kwargs) -> None:
703 """Legacy function for printing a message augmented with thread id
704 still needed by some code. Please use --logging_debug_threads in
707 if config.config['logging_debug_threads']:
708 from pyutils.parallelize.thread_utils import current_thread_id
710 print(f'{current_thread_id()}', end="")
711 print(*args, **kwargs)
716 def dprint(*args, **kwargs) -> None:
717 """Legacy function used to print to stderr still needed by some code.
718 Please just use normal logging with --logging_console which
719 accomplishes the same thing in new code.
721 print(*args, file=sys.stderr, **kwargs)
724 class OutputMultiplexer(object):
725 """A class that broadcasts printed messages to several sinks
726 (including various logging levels, different files, different file
727 handles, the house log, etc...). See also
728 :class:`OutputMultiplexerContext` for an easy usage pattern.
731 class Destination(enum.IntEnum):
732 """Bits in the destination_bitv bitvector. Used to indicate the
733 output destination."""
738 LOG_WARNING = 0x04 # ⎬ Must provide logger to the c'tor.
740 LOG_CRITICAL = 0x10 # ⎭
741 FILENAMES = 0x20 # Must provide a filename to the c'tor.
742 FILEHANDLES = 0x40 # Must provide a handle to the c'tor.
744 ALL_LOG_DESTINATIONS = (
745 LOG_DEBUG | LOG_INFO | LOG_WARNING | LOG_ERROR | LOG_CRITICAL
747 ALL_OUTPUT_DESTINATIONS = 0x8F
752 destination_bitv: int,
755 filenames: Optional[Iterable[str]] = None,
756 handles: Optional[Iterable[io.TextIOWrapper]] = None,
759 Constructs the OutputMultiplexer instance.
762 destination_bitv: a bitvector where each bit represents an
763 output destination. Multiple bits may be set.
764 logger: if LOG_* bits are set, you must pass a logger here.
765 filenames: if FILENAMES bit is set, this should be a list of
766 files you'd like to output into. This code handles opening
767 and closing said files.
768 handles: if FILEHANDLES bit is set, this should be a list of
769 already opened filehandles you'd like to output into. The
770 handles will remain open after the scope of the multiplexer.
773 logger = logging.getLogger(None)
776 self.f: Optional[List[Any]] = None
777 if filenames is not None:
778 self.f = [open(filename, 'wb', buffering=0) for filename in filenames]
780 if destination_bitv & OutputMultiplexer.Destination.FILENAMES:
781 raise ValueError("Filenames argument is required if bitv & FILENAMES")
784 self.h: Optional[List[Any]] = None
785 if handles is not None:
786 self.h = list(handles)
788 if destination_bitv & OutputMultiplexer.Destination.FILEHANDLES:
789 raise ValueError("Handle argument is required if bitv & FILEHANDLES")
792 self.set_destination_bitv(destination_bitv)
794 def get_destination_bitv(self):
795 """Where are we outputting?"""
796 return self.destination_bitv
798 def set_destination_bitv(self, destination_bitv: int):
799 """Change the output destination_bitv to the one provided."""
800 if destination_bitv & self.Destination.FILENAMES and self.f is None:
801 raise ValueError("Filename argument is required if bitv & FILENAMES")
802 if destination_bitv & self.Destination.FILEHANDLES and self.h is None:
803 raise ValueError("Handle argument is required if bitv & FILEHANDLES")
804 self.destination_bitv = destination_bitv
806 def print(self, *args, **kwargs):
807 """Produce some output to all sinks."""
808 from pyutils.string_utils import sprintf, strip_escape_sequences
810 end = kwargs.pop("end", None)
812 if not isinstance(end, str):
813 raise TypeError("end must be None or a string")
814 sep = kwargs.pop("sep", None)
816 if not isinstance(sep, str):
817 raise TypeError("sep must be None or a string")
819 raise TypeError("invalid keyword arguments to print()")
820 buf = sprintf(*args, end="", sep=sep)
827 if self.destination_bitv & self.Destination.FILENAMES and self.f is not None:
829 _.write(buf.encode('utf-8'))
832 if self.destination_bitv & self.Destination.FILEHANDLES and self.h is not None:
837 buf = strip_escape_sequences(buf)
838 if self.logger is not None:
839 if self.destination_bitv & self.Destination.LOG_DEBUG:
840 self.logger.debug(buf)
841 if self.destination_bitv & self.Destination.LOG_INFO:
842 self.logger.info(buf)
843 if self.destination_bitv & self.Destination.LOG_WARNING:
844 self.logger.warning(buf)
845 if self.destination_bitv & self.Destination.LOG_ERROR:
846 self.logger.error(buf)
847 if self.destination_bitv & self.Destination.LOG_CRITICAL:
848 self.logger.critical(buf)
849 if self.destination_bitv & self.Destination.HLOG:
853 """Close all open files."""
854 if self.f is not None:
859 class OutputMultiplexerContext(OutputMultiplexer, contextlib.ContextDecorator):
861 A context that uses an :class:`OutputMultiplexer`. e.g.::
863 with OutputMultiplexerContext(
864 OutputMultiplexer.LOG_INFO |
865 OutputMultiplexer.LOG_DEBUG |
866 OutputMultiplexer.FILENAMES |
867 OutputMultiplexer.FILEHANDLES,
868 filenames = [ '/tmp/foo.log', '/var/log/bar.log' ],
871 mplex.print("This is a log message!")
876 destination_bitv: OutputMultiplexer.Destination,
892 def __exit__(self, etype, value, traceback) -> bool:
894 if etype is not None:
899 def hlog(message: str) -> None:
900 """Write a message to the house log (syslog facility local7 priority
901 info) by calling /usr/bin/logger. This is pretty hacky but used
902 by a bunch of code. Another way to do this would be to use
903 :code:`--logging_syslog` and :code:`--logging_syslog_facility` but
904 I can't actually say that's easier.
906 message = message.replace("'", "'\"'\"'")
907 os.system(f"/usr/bin/logger -p local7.info -- '{message}'")
910 if __name__ == '__main__':