3 """Utilities related to logging."""
11 from logging.handlers import RotatingFileHandler, SysLogHandler
15 from typing import Callable, Iterable, Mapping, Optional
18 from overrides import overrides
21 # This module is commonly used by others in here and should avoid
22 # taking any unnecessary dependencies back on them.
26 cfg = config.add_commandline_args(
27 f'Logging ({__file__})',
28 'Args related to logging')
30 '--logging_config_file',
31 type=argparse_utils.valid_filename,
34 help='Config file containing the logging setup, see: https://docs.python.org/3/howto/logging.html#logging-advanced-tutorial',
40 choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
42 help='The global default level below which to squelch log messages; see also --lmodule',
48 help='The format for lines logged via the logger module. See: https://docs.python.org/3/library/logging.html#formatter-objects'
51 '--logging_date_format',
53 default='%Y/%m/%dT%H:%M:%S.%f%z',
55 help='The format of any dates in --logging_format.'
59 action=argparse_utils.ActionNoYes,
61 help='Should we log to the console (stderr)',
68 help='The filename of the logfile to write.'
71 '--logging_filename_maxsize',
75 help='The maximum size (in bytes) to write to the logging_filename.'
78 '--logging_filename_count',
82 help='The number of logging_filename copies to keep before deleting.'
86 action=argparse_utils.ActionNoYes,
88 help='Should we log to localhost\'s syslog.'
91 '--logging_syslog_facility',
94 choices=['NOTSET', 'AUTH', 'AUTH_PRIV', 'CRON', 'DAEMON', 'FTP', 'KERN', 'LPR', 'MAIL', 'NEWS',
95 'SYSLOG', 'USER', 'UUCP', 'LOCAL0', 'LOCAL1', 'LOCAL2', 'LOCAL3', 'LOCAL4', 'LOCAL5',
97 metavar='SYSLOG_FACILITY_LIST',
98 help='The default syslog message facility identifier',
101 '--logging_debug_threads',
102 action=argparse_utils.ActionNoYes,
104 help='Should we prepend pid/tid data to all log messages?'
107 '--logging_debug_modules',
108 action=argparse_utils.ActionNoYes,
110 help='Should we prepend module/function data to all log messages?'
113 '--logging_info_is_print',
114 action=argparse_utils.ActionNoYes,
116 help='logging.info also prints to stdout.'
119 '--logging_squelch_repeats',
120 action=argparse_utils.ActionNoYes,
122 help='Do we allow code to indicate that it wants to squelch repeated logging messages or should we always log?'
125 '--logging_probabilistically',
126 action=argparse_utils.ActionNoYes,
128 help='Do we allow probabilistic logging (for code that wants it) or should we always log?'
130 # See also: OutputMultiplexer
132 '--logging_captures_prints',
133 action=argparse_utils.ActionNoYes,
135 help='When calling print, also log.info automatically.'
140 metavar='<SCOPE>=<LEVEL>[,<SCOPE>=<LEVEL>...]',
142 'Allows per-scope logging levels which override the global level set with --logging-level.' +
143 'Pass a space separated list of <scope>=<level> where <scope> is one of: module, ' +
144 'module:function, or :function and <level> is a logging level (e.g. INFO, DEBUG...)'
148 '--logging_clear_preexisting_handlers',
149 action=argparse_utils.ActionNoYes,
152 'Should logging code clear preexisting global logging handlers and thus insist that is ' +
153 'alone can add handlers. Use this to work around annoying modules that insert global ' +
154 'handlers with formats and logging levels you might now want. Caveat emptor, this may ' +
155 'cause you to miss logging messages.'
159 built_in_print = print
160 logging_initialized = False
163 def function_identifier(f: Callable) -> str:
165 Given a callable function, return a string that identifies it.
166 Usually that string is just __module__:__name__ but there's a
167 corner case: when __module__ is __main__ (i.e. the callable is
168 defined in the same module as __main__). In this case,
169 f.__module__ returns "__main__" instead of the file that it is
170 defined in. Work around this using pathlib.Path (see below).
172 >>> function_identifier(function_identifier)
173 'logging_utils:function_identifier'
176 if f.__module__ == '__main__':
177 from pathlib import Path
179 module = __main__.__file__
180 module = Path(module).stem
181 return f'{module}:{f.__name__}'
183 return f'{f.__module__}:{f.__name__}'
186 # A map from logging_callsite_id -> count of logged messages.
187 squelched_logging_counts: Mapping[str, int] = {}
190 def squelch_repeated_log_messages(squelch_after_n_repeats: int) -> Callable:
192 A decorator that marks a function as interested in having the logging
193 messages that it produces be squelched (ignored) after it logs the
194 same message more than N times.
196 Note: this decorator affects *ALL* logging messages produced
197 within the decorated function. That said, messages must be
198 identical in order to be squelched. For example, if the same line
199 of code produces different messages (because of, e.g., a format
200 string), the messages are considered to be different.
203 def squelch_logging_wrapper(f: Callable):
204 identifier = function_identifier(f)
205 squelched_logging_counts[identifier] = squelch_after_n_repeats
207 return squelch_logging_wrapper
210 class SquelchRepeatedMessagesFilter(logging.Filter):
212 A filter that only logs messages from a given site with the same
213 (exact) message at the same logging level N times and ignores
214 subsequent attempts to log.
216 This filter only affects logging messages that repeat more than
217 a threshold number of times from functions that are tagged with
218 the @logging_utils.squelched_logging_ok decorator; others are
221 This functionality is enabled by default but can be disabled via
222 the --no_logging_squelch_repeats commandline flag.
225 def __init__(self) -> None:
226 self.counters = collections.Counter()
230 def filter(self, record: logging.LogRecord) -> bool:
231 id1 = f'{record.module}:{record.funcName}'
232 if id1 not in squelched_logging_counts:
234 threshold = squelched_logging_counts[id1]
235 logsite = f'{record.pathname}+{record.lineno}+{record.levelno}+{record.msg}'
236 count = self.counters[logsite]
237 self.counters[logsite] += 1
238 return count < threshold
241 class DynamicPerScopeLoggingLevelFilter(logging.Filter):
242 """This filter only allows logging messages from an allow list of
243 module names or module:function names. Blocks others.
247 def level_name_to_level(name: str) -> int:
248 numeric_level = getattr(
253 if not isinstance(numeric_level, int):
254 raise ValueError('Invalid level: {name}')
259 default_logging_level: int,
260 per_scope_logging_levels: str,
263 self.valid_levels = set(['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'])
264 self.default_logging_level = default_logging_level
265 self.level_by_scope = {}
266 if per_scope_logging_levels is not None:
267 for chunk in per_scope_logging_levels.split(','):
270 f'Malformed lmodule directive: "{chunk}", missing "=". Ignored.',
275 (scope, level) = chunk.split('=')
278 f'Malformed lmodule directive: "{chunk}". Ignored.',
282 scope = scope.strip()
283 level = level.strip().upper()
284 if level not in self.valid_levels:
286 f'Malformed lmodule directive: "{chunk}", bad level. Ignored.',
290 self.level_by_scope[scope] = (
291 DynamicPerScopeLoggingLevelFilter.level_name_to_level(
297 def filter(self, record: logging.LogRecord) -> bool:
298 # First try to find a logging level by scope (--lmodule)
299 if len(self.level_by_scope) > 0:
303 f'{record.module}:{record.funcName}',
304 f':{record.funcName}'
306 level = self.level_by_scope.get(scope, None)
307 if level is not None:
308 if min_level is None or level < min_level:
311 # If we found one, use it instead of the global default level.
312 if min_level is not None:
313 return record.levelno >= min_level
315 # Otherwise, use the global logging level (--logging_level)
316 return record.levelno >= self.default_logging_level
319 # A map from function_identifier -> probability of logging (0.0%..100.0%)
320 probabilistic_logging_levels: Mapping[str, float] = {}
323 def logging_is_probabilistic(probability_of_logging: float) -> Callable:
325 A decorator that indicates that all logging statements within the
326 scope of a particular (marked) function are not deterministic
327 (i.e. they do not always unconditionally log) but rather are
328 probabilistic (i.e. they log N% of the time randomly).
330 Note that this functionality can be disabled (forcing all logged
331 messages to produce output) via the --no_logging_probabilistically
334 This affects *ALL* logging statements within the marked function.
337 def probabilistic_logging_wrapper(f: Callable):
338 identifier = function_identifier(f)
339 probabilistic_logging_levels[identifier] = probability_of_logging
341 return probabilistic_logging_wrapper
344 class ProbabilisticFilter(logging.Filter):
346 A filter that logs messages probabilistically (i.e. randomly at some
349 This filter only affects logging messages from functions that have
350 been tagged with the @logging_utils.probabilistic_logging decorator.
354 def filter(self, record: logging.LogRecord) -> bool:
355 id1 = f'{record.module}:{record.funcName}'
356 if id1 not in probabilistic_logging_levels:
358 threshold = probabilistic_logging_levels[id1]
359 return (random.random() * 100.0) <= threshold
362 class OnlyInfoFilter(logging.Filter):
364 A filter that only logs messages produced at the INFO logging
365 level. This is used by the logging_info_is_print commandline
366 option to select a subset of the logging stream to send to a
371 def filter(self, record: logging.LogRecord):
372 return record.levelno == logging.INFO
375 class MillisecondAwareFormatter(logging.Formatter):
377 A formatter for adding milliseconds to log messages which, for
378 whatever reason, the default python logger doesn't do.
381 converter = datetime.datetime.fromtimestamp
384 def formatTime(self, record, datefmt=None):
385 ct = MillisecondAwareFormatter.converter(
386 record.created, pytz.timezone("US/Pacific")
389 s = ct.strftime(datefmt)
391 t = ct.strftime("%Y-%m-%d %H:%M:%S")
392 s = "%s,%03d" % (t, record.msecs)
396 def initialize_logging(logger=None) -> logging.Logger:
397 global logging_initialized
398 if logging_initialized:
400 logging_initialized = True
403 logger = logging.getLogger()
405 preexisting_handlers_count = 0
406 assert config.has_been_parsed()
407 if config.config['logging_clear_preexisting_handlers']:
408 while logger.hasHandlers():
409 logger.removeHandler(logger.handlers[0])
410 preexisting_handlers_count += 1
412 if config.config['logging_config_file'] is not None:
413 logging.config.fileConfig('logging.conf')
418 # Global default logging level (--logging_level)
419 default_logging_level = getattr(
421 config.config['logging_level'].upper(),
424 if not isinstance(default_logging_level, int):
425 raise ValueError('Invalid level: %s' % config.config['logging_level'])
427 if config.config['logging_format']:
428 fmt = config.config['logging_format']
430 if config.config['logging_syslog']:
431 fmt = '%(levelname).1s:%(filename)s[%(process)d]: %(message)s'
433 fmt = '%(levelname).1s:%(asctime)s: %(message)s'
434 if config.config['logging_debug_threads']:
435 fmt = f'%(process)d.%(thread)d|{fmt}'
436 if config.config['logging_debug_modules']:
437 fmt = f'%(filename)s:%(funcName)s:%(lineno)s|{fmt}'
439 if config.config['logging_syslog']:
440 if sys.platform not in ('win32', 'cygwin'):
441 if config.config['logging_syslog_facility']:
442 facility_name = 'LOG_' + config.config['logging_syslog_facility']
443 facility = SysLogHandler.__dict__.get(facility_name, SysLogHandler.LOG_USER)
444 handler = SysLogHandler(facility=facility, address='/dev/log')
445 handler.setFormatter(
446 MillisecondAwareFormatter(
448 datefmt=config.config['logging_date_format'],
451 handlers.append(handler)
453 if config.config['logging_filename']:
454 handler = RotatingFileHandler(
455 config.config['logging_filename'],
456 maxBytes = config.config['logging_filename_maxsize'],
457 backupCount = config.config['logging_filename_count'],
459 handler.setFormatter(
460 MillisecondAwareFormatter(
462 datefmt=config.config['logging_date_format'],
465 handlers.append(handler)
467 if config.config['logging_console']:
468 handler = logging.StreamHandler(sys.stderr)
469 handler.setFormatter(
470 MillisecondAwareFormatter(
472 datefmt=config.config['logging_date_format'],
475 handlers.append(handler)
477 if len(handlers) == 0:
478 handlers.append(logging.NullHandler())
480 for handler in handlers:
481 logger.addHandler(handler)
483 if config.config['logging_info_is_print']:
484 handler = logging.StreamHandler(sys.stdout)
485 handler.addFilter(OnlyInfoFilter())
486 logger.addHandler(handler)
488 if config.config['logging_squelch_repeats']:
489 for handler in handlers:
490 handler.addFilter(SquelchRepeatedMessagesFilter())
492 if config.config['logging_probabilistically']:
493 for handler in handlers:
494 handler.addFilter(ProbabilisticFilter())
496 for handler in handlers:
498 DynamicPerScopeLoggingLevelFilter(
499 default_logging_level,
500 config.config['lmodule'],
504 logger.propagate = False
506 if config.config['logging_captures_prints']:
508 global built_in_print
510 def print_and_also_log(*arg, **kwarg):
511 f = kwarg.get('file', None)
516 built_in_print(*arg, **kwarg)
517 builtins.print = print_and_also_log
519 # At this point the logger is ready, handlers are set up,
520 # etc... so log about the logging configuration.
522 level_name = logging._levelToName.get(default_logging_level, str(default_logging_level))
524 f'Initialized global logging; default logging level is {level_name}.'
526 if config.config['logging_clear_preexisting_handlers'] and preexisting_handlers_count > 0:
527 msg = 'Logging cleared {preexisting_handlers_count} global handlers (--logging_clear_preexisting_handlers)'
530 logger.debug(f'Logging format specification is "{fmt}"')
531 if config.config['logging_debug_threads']:
532 logger.debug('...Logging format spec captures tid/pid (--logging_debug_threads)')
533 if config.config['logging_debug_modules']:
534 logger.debug('...Logging format spec captures files/functions/lineno (--logging_debug_modules)')
535 if config.config['logging_syslog']:
536 logger.debug(f'Logging to syslog as {facility_name} with priority mapping based on level')
537 if config.config['logging_filename']:
538 logger.debug(f'Logging to filename {config.config["logging_filename"]}')
539 logger.debug(f'...with {config.config["logging_filename_maxsize"]} bytes max file size.')
540 logger.debug(f'...and {config.config["logging_filename_count"]} rotating backup file count.')
541 if config.config['logging_console']:
542 logger.debug('Logging to the console (stderr).')
543 if config.config['logging_info_is_print']:
545 'Logging logger.info messages will be repeated on stdout (--logging_info_is_print)'
547 if config.config['logging_squelch_repeats']:
549 'Logging code allowed to request repeated messages be squelched (--logging_squelch_repeats)'
553 'Logging code forbidden to request messages be squelched; all messages logged (--no_logging_squelch_repeats)'
555 if config.config['logging_probabilistically']:
557 'Logging code is allowed to request probabilistic logging (--logging_probabilistically)'
561 'Logging code is forbidden to request probabilistic logging; messages always logged (--no_logging_probabilistically)'
563 if config.config['lmodule']:
565 'Logging dynamic per-module logging enabled (--lmodule={config.config["lmodule"]})'
567 if config.config['logging_captures_prints']:
568 logger.debug('Logging will capture printed data as logger.info messages (--logging_captures_prints)')
572 def get_logger(name: str = ""):
573 logger = logging.getLogger(name)
574 return initialize_logging(logger)
577 def tprint(*args, **kwargs) -> None:
578 """Legacy function for printing a message augmented with thread id
579 still needed by some code. Please use --logging_debug_threads in
583 if config.config['logging_debug_threads']:
584 from thread_utils import current_thread_id
585 print(f'{current_thread_id()}', end="")
586 print(*args, **kwargs)
591 def dprint(*args, **kwargs) -> None:
592 """Legacy function used to print to stderr still needed by some code.
593 Please just use normal logging with --logging_console which
594 accomplishes the same thing in new code.
597 print(*args, file=sys.stderr, **kwargs)
600 class OutputMultiplexer(object):
602 A class that broadcasts printed messages to several sinks (including
603 various logging levels, different files, different file handles,
604 the house log, etc...). See also OutputMultiplexerContext for an
608 class Destination(enum.IntEnum):
609 """Bits in the destination_bitv bitvector. Used to indicate the
610 output destination."""
613 LOG_WARNING = 0x04 # ⎬ Must provide logger to the c'tor.
615 LOG_CRITICAL = 0x10 # ⎭
616 FILENAMES = 0x20 # Must provide a filename to the c'tor.
617 FILEHANDLES = 0x40 # Must provide a handle to the c'tor.
619 ALL_LOG_DESTINATIONS = (
620 LOG_DEBUG | LOG_INFO | LOG_WARNING | LOG_ERROR | LOG_CRITICAL
622 ALL_OUTPUT_DESTINATIONS = 0x8F
625 destination_bitv: int,
628 filenames: Optional[Iterable[str]] = None,
629 handles: Optional[Iterable[io.TextIOWrapper]] = None):
631 logger = logging.getLogger(None)
634 if filenames is not None:
636 open(filename, 'wb', buffering=0) for filename in filenames
639 if destination_bitv & OutputMultiplexer.FILENAMES:
641 "Filenames argument is required if bitv & FILENAMES"
645 if handles is not None:
646 self.h = [handle for handle in handles]
648 if destination_bitv & OutputMultiplexer.Destination.FILEHANDLES:
650 "Handle argument is required if bitv & FILEHANDLES"
654 self.set_destination_bitv(destination_bitv)
656 def get_destination_bitv(self):
657 return self.destination_bitv
659 def set_destination_bitv(self, destination_bitv: int):
660 if destination_bitv & self.Destination.FILENAMES and self.f is None:
662 "Filename argument is required if bitv & FILENAMES"
664 if destination_bitv & self.Destination.FILEHANDLES and self.h is None:
666 "Handle argument is required if bitv & FILEHANDLES"
668 self.destination_bitv = destination_bitv
670 def print(self, *args, **kwargs):
671 from string_utils import sprintf, strip_escape_sequences
672 end = kwargs.pop("end", None)
674 if not isinstance(end, str):
675 raise TypeError("end must be None or a string")
676 sep = kwargs.pop("sep", None)
678 if not isinstance(sep, str):
679 raise TypeError("sep must be None or a string")
681 raise TypeError("invalid keyword arguments to print()")
682 buf = sprintf(*args, end="", sep=sep)
690 self.destination_bitv & self.Destination.FILENAMES and
694 _.write(buf.encode('utf-8'))
698 self.destination_bitv & self.Destination.FILEHANDLES and
705 buf = strip_escape_sequences(buf)
706 if self.logger is not None:
707 if self.destination_bitv & self.Destination.LOG_DEBUG:
708 self.logger.debug(buf)
709 if self.destination_bitv & self.Destination.LOG_INFO:
710 self.logger.info(buf)
711 if self.destination_bitv & self.Destination.LOG_WARNING:
712 self.logger.warning(buf)
713 if self.destination_bitv & self.Destination.LOG_ERROR:
714 self.logger.error(buf)
715 if self.destination_bitv & self.Destination.LOG_CRITICAL:
716 self.logger.critical(buf)
717 if self.destination_bitv & self.Destination.HLOG:
721 if self.f is not None:
726 class OutputMultiplexerContext(OutputMultiplexer, contextlib.ContextDecorator):
728 A context that uses an OutputMultiplexer. e.g.
730 with OutputMultiplexerContext(
731 OutputMultiplexer.LOG_INFO |
732 OutputMultiplexer.LOG_DEBUG |
733 OutputMultiplexer.FILENAMES |
734 OutputMultiplexer.FILEHANDLES,
735 filenames = [ '/tmp/foo.log', '/var/log/bar.log' ],
738 mplex.print("This is a log message!")
742 destination_bitv: OutputMultiplexer.Destination,
756 def __exit__(self, etype, value, traceback) -> bool:
758 if etype is not None:
763 def hlog(message: str) -> None:
764 """Write a message to the house log (syslog facility local7 priority
765 info) by calling /usr/bin/logger. This is pretty hacky but used
766 by a bunch of code. Another way to do this would be to use
767 --logging_syslog and --logging_syslog_facility but I can't
768 actually say that's easier.
771 message = message.replace("'", "'\"'\"'")
772 os.system(f"/usr/bin/logger -p local7.info -- '{message}'")
775 if __name__ == '__main__':