3 """Utilities related to logging."""
11 from logging.handlers import RotatingFileHandler, SysLogHandler
15 from typing import Callable, Iterable, Mapping, Optional
17 from overrides import overrides
20 # This module is commonly used by others in here and should avoid
21 # taking any unnecessary dependencies back on them.
25 cfg = config.add_commandline_args(
26 f'Logging ({__file__})',
27 'Args related to logging')
29 '--logging_config_file',
30 type=argparse_utils.valid_filename,
33 help='Config file containing the logging setup, see: https://docs.python.org/3/howto/logging.html#logging-advanced-tutorial',
39 choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
41 help='The global default level below which to squelch log messages; see also --lmodule',
47 help='The format for lines logged via the logger module. See: https://docs.python.org/3/library/logging.html#formatter-objects'
50 '--logging_date_format',
52 default='%Y/%m/%dT%H:%M:%S.%f%z',
54 help='The format of any dates in --logging_format.'
58 action=argparse_utils.ActionNoYes,
60 help='Should we log to the console (stderr)',
67 help='The filename of the logfile to write.'
70 '--logging_filename_maxsize',
74 help='The maximum size (in bytes) to write to the logging_filename.'
77 '--logging_filename_count',
81 help='The number of logging_filename copies to keep before deleting.'
85 action=argparse_utils.ActionNoYes,
87 help='Should we log to localhost\'s syslog.'
90 '--logging_syslog_facility',
93 choices=['NOTSET', 'AUTH', 'AUTH_PRIV', 'CRON', 'DAEMON', 'FTP', 'KERN', 'LPR', 'MAIL', 'NEWS',
94 'SYSLOG', 'USER', 'UUCP', 'LOCAL0', 'LOCAL1', 'LOCAL2', 'LOCAL3', 'LOCAL4', 'LOCAL5',
96 metavar='SYSLOG_FACILITY_LIST',
97 help='The default syslog message facility identifier',
100 '--logging_debug_threads',
101 action=argparse_utils.ActionNoYes,
103 help='Should we prepend pid/tid data to all log messages?'
106 '--logging_debug_modules',
107 action=argparse_utils.ActionNoYes,
109 help='Should we prepend module/function data to all log messages?'
112 '--logging_info_is_print',
113 action=argparse_utils.ActionNoYes,
115 help='logging.info also prints to stdout.'
118 '--logging_squelch_repeats',
119 action=argparse_utils.ActionNoYes,
121 help='Do we allow code to indicate that it wants to squelch repeated logging messages or should we always log?'
124 '--logging_probabilistically',
125 action=argparse_utils.ActionNoYes,
127 help='Do we allow probabilistic logging (for code that wants it) or should we always log?'
129 # See also: OutputMultiplexer
131 '--logging_captures_prints',
132 action=argparse_utils.ActionNoYes,
134 help='When calling print, also log.info automatically.'
139 metavar='<SCOPE>=<LEVEL>[,<SCOPE>=<LEVEL>...]',
141 'Allows per-scope logging levels which override the global level set with --logging-level.' +
142 'Pass a space separated list of <scope>=<level> where <scope> is one of: module, ' +
143 'module:function, or :function and <level> is a logging level (e.g. INFO, DEBUG...)'
147 '--logging_clear_preexisting_handlers',
148 action=argparse_utils.ActionNoYes,
151 'Should logging code clear preexisting global logging handlers and thus insist that is ' +
152 'alone can add handlers. Use this to work around annoying modules that insert global ' +
153 'handlers with formats and logging levels you might now want. Caveat emptor, this may ' +
154 'cause you to miss logging messages.'
158 built_in_print = print
159 logging_initialized = False
162 def function_identifier(f: Callable) -> str:
164 Given a callable function, return a string that identifies it.
165 Usually that string is just __module__:__name__ but there's a
166 corner case: when __module__ is __main__ (i.e. the callable is
167 defined in the same module as __main__). In this case,
168 f.__module__ returns "__main__" instead of the file that it is
169 defined in. Work around this using pathlib.Path (see below).
171 >>> function_identifier(function_identifier)
172 'logging_utils:function_identifier'
175 if f.__module__ == '__main__':
176 from pathlib import Path
178 module = __main__.__file__
179 module = Path(module).stem
180 return f'{module}:{f.__name__}'
182 return f'{f.__module__}:{f.__name__}'
185 # A map from logging_callsite_id -> count of logged messages.
186 squelched_logging_counts: Mapping[str, int] = {}
189 def squelch_repeated_log_messages(squelch_after_n_repeats: int) -> Callable:
191 A decorator that marks a function as interested in having the logging
192 messages that it produces be squelched (ignored) after it logs the
193 same message more than N times.
195 Note: this decorator affects *ALL* logging messages produced
196 within the decorated function. That said, messages must be
197 identical in order to be squelched. For example, if the same line
198 of code produces different messages (because of, e.g., a format
199 string), the messages are considered to be different.
202 def squelch_logging_wrapper(f: Callable):
203 identifier = function_identifier(f)
204 squelched_logging_counts[identifier] = squelch_after_n_repeats
206 return squelch_logging_wrapper
209 class SquelchRepeatedMessagesFilter(logging.Filter):
211 A filter that only logs messages from a given site with the same
212 (exact) message at the same logging level N times and ignores
213 subsequent attempts to log.
215 This filter only affects logging messages that repeat more than
216 a threshold number of times from functions that are tagged with
217 the @logging_utils.squelched_logging_ok decorator; others are
220 This functionality is enabled by default but can be disabled via
221 the --no_logging_squelch_repeats commandline flag.
224 def __init__(self) -> None:
225 self.counters = collections.Counter()
229 def filter(self, record: logging.LogRecord) -> bool:
230 id1 = f'{record.module}:{record.funcName}'
231 if id1 not in squelched_logging_counts:
233 threshold = squelched_logging_counts[id1]
234 logsite = f'{record.pathname}+{record.lineno}+{record.levelno}+{record.msg}'
235 count = self.counters[logsite]
236 self.counters[logsite] += 1
237 return count < threshold
240 class DynamicPerScopeLoggingLevelFilter(logging.Filter):
241 """This filter only allows logging messages from an allow list of
242 module names or module:function names. Blocks others.
246 def level_name_to_level(name: str) -> int:
247 numeric_level = getattr(
252 if not isinstance(numeric_level, int):
253 raise ValueError('Invalid level: {name}')
258 default_logging_level: int,
259 per_scope_logging_levels: str,
262 self.valid_levels = set(['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'])
263 self.default_logging_level = default_logging_level
264 self.level_by_scope = {}
265 if per_scope_logging_levels is not None:
266 for chunk in per_scope_logging_levels.split(','):
269 f'Malformed lmodule directive: "{chunk}", missing "=". Ignored.',
274 (scope, level) = chunk.split('=')
277 f'Malformed lmodule directive: "{chunk}". Ignored.',
281 scope = scope.strip()
282 level = level.strip().upper()
283 if level not in self.valid_levels:
285 f'Malformed lmodule directive: "{chunk}", bad level. Ignored.',
289 self.level_by_scope[scope] = (
290 DynamicPerScopeLoggingLevelFilter.level_name_to_level(
296 def filter(self, record: logging.LogRecord) -> bool:
297 # First try to find a logging level by scope (--lmodule)
298 if len(self.level_by_scope) > 0:
302 f'{record.module}:{record.funcName}',
303 f':{record.funcName}'
305 level = self.level_by_scope.get(scope, None)
306 if level is not None:
307 if min_level is None or level < min_level:
310 # If we found one, use it instead of the global default level.
311 if min_level is not None:
312 return record.levelno >= min_level
314 # Otherwise, use the global logging level (--logging_level)
315 return record.levelno >= self.default_logging_level
318 # A map from function_identifier -> probability of logging (0.0%..100.0%)
319 probabilistic_logging_levels: Mapping[str, float] = {}
322 def logging_is_probabilistic(probability_of_logging: float) -> Callable:
324 A decorator that indicates that all logging statements within the
325 scope of a particular (marked) function are not deterministic
326 (i.e. they do not always unconditionally log) but rather are
327 probabilistic (i.e. they log N% of the time randomly).
329 Note that this functionality can be disabled (forcing all logged
330 messages to produce output) via the --no_logging_probabilistically
333 This affects *ALL* logging statements within the marked function.
336 def probabilistic_logging_wrapper(f: Callable):
337 identifier = function_identifier(f)
338 probabilistic_logging_levels[identifier] = probability_of_logging
340 return probabilistic_logging_wrapper
343 class ProbabilisticFilter(logging.Filter):
345 A filter that logs messages probabilistically (i.e. randomly at some
348 This filter only affects logging messages from functions that have
349 been tagged with the @logging_utils.probabilistic_logging decorator.
353 def filter(self, record: logging.LogRecord) -> bool:
354 id1 = f'{record.module}:{record.funcName}'
355 if id1 not in probabilistic_logging_levels:
357 threshold = probabilistic_logging_levels[id1]
358 return (random.random() * 100.0) <= threshold
361 class OnlyInfoFilter(logging.Filter):
363 A filter that only logs messages produced at the INFO logging
364 level. This is used by the logging_info_is_print commandline
365 option to select a subset of the logging stream to send to a
370 def filter(self, record: logging.LogRecord):
371 return record.levelno == logging.INFO
374 class MillisecondAwareFormatter(logging.Formatter):
376 A formatter for adding milliseconds to log messages which, for
377 whatever reason, the default python logger doesn't do.
380 converter = datetime.datetime.fromtimestamp
383 def formatTime(self, record, datefmt=None):
384 ct = MillisecondAwareFormatter.converter(
385 record.created, pytz.timezone("US/Pacific")
388 s = ct.strftime(datefmt)
390 t = ct.strftime("%Y-%m-%d %H:%M:%S")
391 s = "%s,%03d" % (t, record.msecs)
395 def initialize_logging(logger=None) -> logging.Logger:
396 global logging_initialized
397 if logging_initialized:
399 logging_initialized = True
402 logger = logging.getLogger()
404 preexisting_handlers_count = 0
405 assert config.has_been_parsed()
406 if config.config['logging_clear_preexisting_handlers']:
407 while logger.hasHandlers():
408 logger.removeHandler(logger.handlers[0])
409 preexisting_handlers_count += 1
411 if config.config['logging_config_file'] is not None:
412 logging.config.fileConfig('logging.conf')
417 # Global default logging level (--logging_level)
418 default_logging_level = getattr(
420 config.config['logging_level'].upper(),
423 if not isinstance(default_logging_level, int):
424 raise ValueError('Invalid level: %s' % config.config['logging_level'])
426 if config.config['logging_format']:
427 fmt = config.config['logging_format']
429 if config.config['logging_syslog']:
430 fmt = '%(levelname).1s:%(filename)s[%(process)d]: %(message)s'
432 fmt = '%(levelname).1s:%(asctime)s: %(message)s'
433 if config.config['logging_debug_threads']:
434 fmt = f'%(process)d.%(thread)d|{fmt}'
435 if config.config['logging_debug_modules']:
436 fmt = f'%(filename)s:%(funcName)s:%(lineno)s|{fmt}'
438 if config.config['logging_syslog']:
439 if sys.platform not in ('win32', 'cygwin'):
440 if config.config['logging_syslog_facility']:
441 facility_name = 'LOG_' + config.config['logging_syslog_facility']
442 facility = SysLogHandler.__dict__.get(facility_name, SysLogHandler.LOG_USER)
443 handler = SysLogHandler(facility=facility, address='/dev/log')
444 handler.setFormatter(
445 MillisecondAwareFormatter(
447 datefmt=config.config['logging_date_format'],
450 handlers.append(handler)
452 if config.config['logging_filename']:
453 handler = RotatingFileHandler(
454 config.config['logging_filename'],
455 maxBytes = config.config['logging_filename_maxsize'],
456 backupCount = config.config['logging_filename_count'],
458 handler.setFormatter(
459 MillisecondAwareFormatter(
461 datefmt=config.config['logging_date_format'],
464 handlers.append(handler)
466 if config.config['logging_console']:
467 handler = logging.StreamHandler(sys.stderr)
468 handler.setFormatter(
469 MillisecondAwareFormatter(
471 datefmt=config.config['logging_date_format'],
474 handlers.append(handler)
476 if len(handlers) == 0:
477 handlers.append(logging.NullHandler())
479 for handler in handlers:
480 logger.addHandler(handler)
482 if config.config['logging_info_is_print']:
483 handler = logging.StreamHandler(sys.stdout)
484 handler.addFilter(OnlyInfoFilter())
485 logger.addHandler(handler)
487 if config.config['logging_squelch_repeats']:
488 for handler in handlers:
489 handler.addFilter(SquelchRepeatedMessagesFilter())
491 if config.config['logging_probabilistically']:
492 for handler in handlers:
493 handler.addFilter(ProbabilisticFilter())
495 for handler in handlers:
497 DynamicPerScopeLoggingLevelFilter(
498 default_logging_level,
499 config.config['lmodule'],
503 logger.propagate = False
505 if config.config['logging_captures_prints']:
507 global built_in_print
509 def print_and_also_log(*arg, **kwarg):
510 f = kwarg.get('file', None)
515 built_in_print(*arg, **kwarg)
516 builtins.print = print_and_also_log
518 # At this point the logger is ready, handlers are set up,
519 # etc... so log about the logging configuration.
521 level_name = logging._levelToName.get(default_logging_level, str(default_logging_level))
523 f'Initialized global logging; default logging level is {level_name}.'
525 if config.config['logging_clear_preexisting_handlers'] and preexisting_handlers_count > 0:
526 msg = 'Logging cleared {preexisting_handlers_count} global handlers (--logging_clear_preexisting_handlers)'
528 logger.debug(f'Logging format specification is "{fmt}"')
529 if config.config['logging_debug_threads']:
530 logger.debug('...Logging format spec captures tid/pid (--logging_debug_threads)')
531 if config.config['logging_debug_modules']:
532 logger.debug('...Logging format spec captures files/functions/lineno (--logging_debug_modules)')
533 if config.config['logging_syslog']:
534 logger.debug(f'Logging to syslog as {facility_name} with priority mapping based on level')
535 if config.config['logging_filename']:
536 logger.debug(f'Logging to filename {config.config["logging_filename"]}')
537 logger.debug(f'...with {config.config["logging_filename_maxsize"]} bytes max file size.')
538 logger.debug(f'...and {config.config["logging_filename_count"]} rotating backup file count.')
539 if config.config['logging_console']:
540 logger.debug('Logging to the console (stderr).')
541 if config.config['logging_info_is_print']:
543 'Logging logger.info messages will be repeated on stdout (--logging_info_is_print)'
545 if config.config['logging_squelch_repeats']:
547 'Logging code allowed to request repeated messages be squelched (--logging_squelch_repeats)'
551 'Logging code forbidden to request messages be squelched; all messages logged (--no_logging_squelch_repeats)'
553 if config.config['logging_probabilistically']:
555 'Logging code is allowed to request probabilistic logging (--logging_probabilistically)'
559 'Logging code is forbidden to request probabilistic logging; messages always logged (--no_logging_probabilistically)'
561 if config.config['lmodule']:
563 'Logging dynamic per-module logging enabled (--lmodule={config.config["lmodule"]})'
565 if config.config['logging_captures_prints']:
566 logger.debug('Logging will capture printed data as logger.info messages (--logging_captures_prints)')
570 def get_logger(name: str = ""):
571 logger = logging.getLogger(name)
572 return initialize_logging(logger)
575 def tprint(*args, **kwargs) -> None:
576 """Legacy function for printing a message augmented with thread id
577 still needed by some code. Please use --logging_debug_threads in
581 if config.config['logging_debug_threads']:
582 from thread_utils import current_thread_id
583 print(f'{current_thread_id()}', end="")
584 print(*args, **kwargs)
589 def dprint(*args, **kwargs) -> None:
590 """Legacy function used to print to stderr still needed by some code.
591 Please just use normal logging with --logging_console which
592 accomplishes the same thing in new code.
595 print(*args, file=sys.stderr, **kwargs)
598 class OutputMultiplexer(object):
600 A class that broadcasts printed messages to several sinks (including
601 various logging levels, different files, different file handles,
602 the house log, etc...). See also OutputMultiplexerContext for an
606 class Destination(enum.IntEnum):
607 """Bits in the destination_bitv bitvector. Used to indicate the
608 output destination."""
611 LOG_WARNING = 0x04 # ⎬ Must provide logger to the c'tor.
613 LOG_CRITICAL = 0x10 # ⎭
614 FILENAMES = 0x20 # Must provide a filename to the c'tor.
615 FILEHANDLES = 0x40 # Must provide a handle to the c'tor.
617 ALL_LOG_DESTINATIONS = (
618 LOG_DEBUG | LOG_INFO | LOG_WARNING | LOG_ERROR | LOG_CRITICAL
620 ALL_OUTPUT_DESTINATIONS = 0x8F
623 destination_bitv: int,
626 filenames: Optional[Iterable[str]] = None,
627 handles: Optional[Iterable[io.TextIOWrapper]] = None):
629 logger = logging.getLogger(None)
632 if filenames is not None:
634 open(filename, 'wb', buffering=0) for filename in filenames
637 if destination_bitv & OutputMultiplexer.FILENAMES:
639 "Filenames argument is required if bitv & FILENAMES"
643 if handles is not None:
644 self.h = [handle for handle in handles]
646 if destination_bitv & OutputMultiplexer.Destination.FILEHANDLES:
648 "Handle argument is required if bitv & FILEHANDLES"
652 self.set_destination_bitv(destination_bitv)
654 def get_destination_bitv(self):
655 return self.destination_bitv
657 def set_destination_bitv(self, destination_bitv: int):
658 if destination_bitv & self.Destination.FILENAMES and self.f is None:
660 "Filename argument is required if bitv & FILENAMES"
662 if destination_bitv & self.Destination.FILEHANDLES and self.h is None:
664 "Handle argument is required if bitv & FILEHANDLES"
666 self.destination_bitv = destination_bitv
668 def print(self, *args, **kwargs):
669 from string_utils import sprintf, strip_escape_sequences
670 end = kwargs.pop("end", None)
672 if not isinstance(end, str):
673 raise TypeError("end must be None or a string")
674 sep = kwargs.pop("sep", None)
676 if not isinstance(sep, str):
677 raise TypeError("sep must be None or a string")
679 raise TypeError("invalid keyword arguments to print()")
680 buf = sprintf(*args, end="", sep=sep)
688 self.destination_bitv & self.Destination.FILENAMES and
692 _.write(buf.encode('utf-8'))
696 self.destination_bitv & self.Destination.FILEHANDLES and
703 buf = strip_escape_sequences(buf)
704 if self.logger is not None:
705 if self.destination_bitv & self.Destination.LOG_DEBUG:
706 self.logger.debug(buf)
707 if self.destination_bitv & self.Destination.LOG_INFO:
708 self.logger.info(buf)
709 if self.destination_bitv & self.Destination.LOG_WARNING:
710 self.logger.warning(buf)
711 if self.destination_bitv & self.Destination.LOG_ERROR:
712 self.logger.error(buf)
713 if self.destination_bitv & self.Destination.LOG_CRITICAL:
714 self.logger.critical(buf)
715 if self.destination_bitv & self.Destination.HLOG:
719 if self.f is not None:
724 class OutputMultiplexerContext(OutputMultiplexer, contextlib.ContextDecorator):
726 A context that uses an OutputMultiplexer. e.g.
728 with OutputMultiplexerContext(
729 OutputMultiplexer.LOG_INFO |
730 OutputMultiplexer.LOG_DEBUG |
731 OutputMultiplexer.FILENAMES |
732 OutputMultiplexer.FILEHANDLES,
733 filenames = [ '/tmp/foo.log', '/var/log/bar.log' ],
736 mplex.print("This is a log message!")
740 destination_bitv: OutputMultiplexer.Destination,
754 def __exit__(self, etype, value, traceback) -> bool:
756 if etype is not None:
761 def hlog(message: str) -> None:
762 """Write a message to the house log (syslog facility local7 priority
763 info) by calling /usr/bin/logger. This is pretty hacky but used
764 by a bunch of code. Another way to do this would be to use
765 --logging_syslog and --logging_syslog_facility but I can't
766 actually say that's easier.
769 message = message.replace("'", "'\"'\"'")
770 os.system(f"/usr/bin/logger -p local7.info -- '{message}'")
773 if __name__ == '__main__':