3 """Utilities related to logging."""
11 from logging.handlers import RotatingFileHandler, SysLogHandler
15 from typing import Callable, Iterable, Mapping, Optional
17 from overrides import overrides
20 # This module is commonly used by others in here and should avoid
21 # taking any unnecessary dependencies back on them.
25 cfg = config.add_commandline_args(
26 f'Logging ({__file__})',
27 'Args related to logging')
29 '--logging_config_file',
30 type=argparse_utils.valid_filename,
33 help='Config file containing the logging setup, see: https://docs.python.org/3/howto/logging.html#logging-advanced-tutorial',
39 choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
41 help='The global default level below which to squelch log messages; see also --lmodule',
47 help='The format for lines logged via the logger module. See: https://docs.python.org/3/library/logging.html#formatter-objects'
50 '--logging_date_format',
52 default='%Y/%m/%dT%H:%M:%S.%f%z',
54 help='The format of any dates in --logging_format.'
58 action=argparse_utils.ActionNoYes,
60 help='Should we log to the console (stderr)',
67 help='The filename of the logfile to write.'
70 '--logging_filename_maxsize',
74 help='The maximum size (in bytes) to write to the logging_filename.'
77 '--logging_filename_count',
81 help='The number of logging_filename copies to keep before deleting.'
85 action=argparse_utils.ActionNoYes,
87 help='Should we log to localhost\'s syslog.'
90 '--logging_syslog_facility',
93 choices=['NOTSET', 'AUTH', 'AUTH_PRIV', 'CRON', 'DAEMON', 'FTP', 'KERN', 'LPR', 'MAIL', 'NEWS',
94 'SYSLOG', 'USER', 'UUCP', 'LOCAL0', 'LOCAL1', 'LOCAL2', 'LOCAL3', 'LOCAL4', 'LOCAL5',
96 metavar='SYSLOG_FACILITY_LIST',
97 help='The default syslog message facility identifier',
100 '--logging_debug_threads',
101 action=argparse_utils.ActionNoYes,
103 help='Should we prepend pid/tid data to all log messages?'
106 '--logging_debug_modules',
107 action=argparse_utils.ActionNoYes,
109 help='Should we prepend module/function data to all log messages?'
112 '--logging_info_is_print',
113 action=argparse_utils.ActionNoYes,
115 help='logging.info also prints to stdout.'
118 '--logging_squelch_repeats',
119 action=argparse_utils.ActionNoYes,
121 help='Do we allow code to indicate that it wants to squelch repeated logging messages or should we always log?'
124 '--logging_probabilistically',
125 action=argparse_utils.ActionNoYes,
127 help='Do we allow probabilistic logging (for code that wants it) or should we always log?'
129 # See also: OutputMultiplexer
131 '--logging_captures_prints',
132 action=argparse_utils.ActionNoYes,
134 help='When calling print, also log.info automatically.'
139 metavar='<SCOPE>=<LEVEL>[,<SCOPE>=<LEVEL>...]',
141 'Allows per-scope logging levels which override the global level set with --logging-level.' +
142 'Pass a space separated list of <scope>=<level> where <scope> is one of: module, ' +
143 'module:function, or :function and <level> is a logging level (e.g. INFO, DEBUG...)'
147 '--logging_clear_preexisting_handlers',
148 action=argparse_utils.ActionNoYes,
151 'Should logging code clear preexisting global logging handlers and thus insist that is ' +
152 'alone can add handlers. Use this to work around annoying modules that insert global ' +
153 'handlers with formats and logging levels you might now want. Caveat emptor, this may ' +
154 'cause you to miss logging messages.'
158 built_in_print = print
159 logging_initialized = False
162 def function_identifier(f: Callable) -> str:
164 Given a callable function, return a string that identifies it.
165 Usually that string is just __module__:__name__ but there's a
166 corner case: when __module__ is __main__ (i.e. the callable is
167 defined in the same module as __main__). In this case,
168 f.__module__ returns "__main__" instead of the file that it is
169 defined in. Work around this using pathlib.Path (see below).
171 >>> function_identifier(function_identifier)
172 'logging_utils:function_identifier'
175 if f.__module__ == '__main__':
176 from pathlib import Path
178 module = __main__.__file__
179 module = Path(module).stem
180 return f'{module}:{f.__name__}'
182 return f'{f.__module__}:{f.__name__}'
185 # A map from logging_callsite_id -> count of logged messages.
186 squelched_logging_counts: Mapping[str, int] = {}
189 def squelch_repeated_log_messages(squelch_after_n_repeats: int) -> Callable:
191 A decorator that marks a function as interested in having the logging
192 messages that it produces be squelched (ignored) after it logs the
193 same message more than N times.
195 Note: this decorator affects *ALL* logging messages produced
196 within the decorated function. That said, messages must be
197 identical in order to be squelched. For example, if the same line
198 of code produces different messages (because of, e.g., a format
199 string), the messages are considered to be different.
202 def squelch_logging_wrapper(f: Callable):
203 identifier = function_identifier(f)
204 squelched_logging_counts[identifier] = squelch_after_n_repeats
206 return squelch_logging_wrapper
209 class SquelchRepeatedMessagesFilter(logging.Filter):
211 A filter that only logs messages from a given site with the same
212 (exact) message at the same logging level N times and ignores
213 subsequent attempts to log.
215 This filter only affects logging messages that repeat more than
216 a threshold number of times from functions that are tagged with
217 the @logging_utils.squelched_logging_ok decorator; others are
220 This functionality is enabled by default but can be disabled via
221 the --no_logging_squelch_repeats commandline flag.
224 def __init__(self) -> None:
225 self.counters = collections.Counter()
229 def filter(self, record: logging.LogRecord) -> bool:
230 id1 = f'{record.module}:{record.funcName}'
231 if id1 not in squelched_logging_counts:
233 threshold = squelched_logging_counts[id1]
234 logsite = f'{record.pathname}+{record.lineno}+{record.levelno}+{record.msg}'
235 count = self.counters[logsite]
236 self.counters[logsite] += 1
237 return count < threshold
240 class DynamicPerScopeLoggingLevelFilter(logging.Filter):
241 """This filter only allows logging messages from an allow list of
242 module names or module:function names. Blocks others.
246 def level_name_to_level(name: str) -> int:
247 numeric_level = getattr(
252 if not isinstance(numeric_level, int):
253 raise ValueError('Invalid level: {name}')
258 default_logging_level: int,
259 per_scope_logging_levels: str,
262 self.valid_levels = set(['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'])
263 self.default_logging_level = default_logging_level
264 self.level_by_scope = {}
265 if per_scope_logging_levels is not None:
266 for chunk in per_scope_logging_levels.split(','):
269 f'Malformed lmodule directive: "{chunk}", missing "=". Ignored.',
274 (scope, level) = chunk.split('=')
277 f'Malformed lmodule directive: "{chunk}". Ignored.',
281 scope = scope.strip()
282 level = level.strip().upper()
283 if level not in self.valid_levels:
285 f'Malformed lmodule directive: "{chunk}", bad level. Ignored.',
289 self.level_by_scope[scope] = (
290 DynamicPerScopeLoggingLevelFilter.level_name_to_level(
296 def filter(self, record: logging.LogRecord) -> bool:
297 # First try to find a logging level by scope (--lmodule)
298 if len(self.level_by_scope) > 0:
302 f'{record.module}:{record.funcName}',
303 f':{record.funcName}'
305 level = self.level_by_scope.get(scope, None)
306 if level is not None:
307 if min_level is None or level < min_level:
310 # If we found one, use it instead of the global default level.
311 if min_level is not None:
312 return record.levelno >= min_level
314 # Otherwise, use the global logging level (--logging_level)
315 return record.levelno >= self.default_logging_level
318 # A map from function_identifier -> probability of logging (0.0%..100.0%)
319 probabilistic_logging_levels: Mapping[str, float] = {}
322 def logging_is_probabilistic(probability_of_logging: float) -> Callable:
324 A decorator that indicates that all logging statements within the
325 scope of a particular (marked) function are not deterministic
326 (i.e. they do not always unconditionally log) but rather are
327 probabilistic (i.e. they log N% of the time randomly).
329 Note that this functionality can be disabled (forcing all logged
330 messages to produce output) via the --no_logging_probabilistically
333 This affects *ALL* logging statements within the marked function.
336 def probabilistic_logging_wrapper(f: Callable):
337 identifier = function_identifier(f)
338 probabilistic_logging_levels[identifier] = probability_of_logging
340 return probabilistic_logging_wrapper
343 class ProbabilisticFilter(logging.Filter):
345 A filter that logs messages probabilistically (i.e. randomly at some
348 This filter only affects logging messages from functions that have
349 been tagged with the @logging_utils.probabilistic_logging decorator.
353 def filter(self, record: logging.LogRecord) -> bool:
354 id1 = f'{record.module}:{record.funcName}'
355 if id1 not in probabilistic_logging_levels:
357 threshold = probabilistic_logging_levels[id1]
358 return (random.random() * 100.0) <= threshold
361 class OnlyInfoFilter(logging.Filter):
363 A filter that only logs messages produced at the INFO logging
364 level. This is used by the logging_info_is_print commandline
365 option to select a subset of the logging stream to send to a
370 def filter(self, record: logging.LogRecord):
371 return record.levelno == logging.INFO
374 class MillisecondAwareFormatter(logging.Formatter):
376 A formatter for adding milliseconds to log messages which, for
377 whatever reason, the default python logger doesn't do.
380 converter = datetime.datetime.fromtimestamp
383 def formatTime(self, record, datefmt=None):
384 ct = MillisecondAwareFormatter.converter(
385 record.created, pytz.timezone("US/Pacific")
388 s = ct.strftime(datefmt)
390 t = ct.strftime("%Y-%m-%d %H:%M:%S")
391 s = "%s,%03d" % (t, record.msecs)
395 def initialize_logging(logger=None) -> logging.Logger:
396 global logging_initialized
397 if logging_initialized:
399 logging_initialized = True
402 logger = logging.getLogger()
404 preexisting_handlers_count = 0
405 assert config.has_been_parsed()
406 if config.config['logging_clear_preexisting_handlers']:
407 while logger.hasHandlers():
408 logger.removeHandler(logger.handlers[0])
409 preexisting_handlers_count += 1
411 if config.config['logging_config_file'] is not None:
412 logging.config.fileConfig('logging.conf')
417 # Global default logging level (--logging_level)
418 default_logging_level = getattr(
420 config.config['logging_level'].upper(),
423 if not isinstance(default_logging_level, int):
424 raise ValueError('Invalid level: %s' % config.config['logging_level'])
426 if config.config['logging_format']:
427 fmt = config.config['logging_format']
429 if config.config['logging_syslog']:
430 fmt = '%(levelname).1s:%(filename)s[%(process)d]: %(message)s'
432 fmt = '%(levelname).1s:%(asctime)s: %(message)s'
433 if config.config['logging_debug_threads']:
434 fmt = f'%(process)d.%(thread)d|{fmt}'
435 if config.config['logging_debug_modules']:
436 fmt = f'%(filename)s:%(funcName)s:%(lineno)s|{fmt}'
438 if config.config['logging_syslog']:
439 if sys.platform not in ('win32', 'cygwin'):
440 if config.config['logging_syslog_facility']:
441 facility_name = 'LOG_' + config.config['logging_syslog_facility']
442 facility = SysLogHandler.__dict__.get(facility_name, SysLogHandler.LOG_USER)
443 handler = SysLogHandler(facility=facility, address='/dev/log')
444 handler.setFormatter(
445 MillisecondAwareFormatter(
447 datefmt=config.config['logging_date_format'],
450 handlers.append(handler)
452 if config.config['logging_filename']:
453 handler = RotatingFileHandler(
454 config.config['logging_filename'],
455 maxBytes = config.config['logging_filename_maxsize'],
456 backupCount = config.config['logging_filename_count'],
458 handler.setFormatter(
459 MillisecondAwareFormatter(
461 datefmt=config.config['logging_date_format'],
464 handlers.append(handler)
466 if config.config['logging_console']:
467 handler = logging.StreamHandler(sys.stderr)
468 handler.setFormatter(
469 MillisecondAwareFormatter(
471 datefmt=config.config['logging_date_format'],
474 handlers.append(handler)
476 if len(handlers) == 0:
477 handlers.append(logging.NullHandler())
479 for handler in handlers:
480 logger.addHandler(handler)
482 if config.config['logging_info_is_print']:
483 handler = logging.StreamHandler(sys.stdout)
484 handler.addFilter(OnlyInfoFilter())
485 logger.addHandler(handler)
487 if config.config['logging_squelch_repeats']:
488 for handler in handlers:
489 handler.addFilter(SquelchRepeatedMessagesFilter())
491 if config.config['logging_probabilistically']:
492 for handler in handlers:
493 handler.addFilter(ProbabilisticFilter())
495 for handler in handlers:
497 DynamicPerScopeLoggingLevelFilter(
498 default_logging_level,
499 config.config['lmodule'],
503 logger.propagate = False
505 if config.config['logging_captures_prints']:
507 global built_in_print
509 def print_and_also_log(*arg, **kwarg):
510 f = kwarg.get('file', None)
515 built_in_print(*arg, **kwarg)
516 builtins.print = print_and_also_log
518 # At this point the logger is ready, handlers are set up,
519 # etc... so log about the logging configuration.
521 level_name = logging._levelToName.get(default_logging_level, str(default_logging_level))
523 f'Initialized global logging; default logging level is {level_name}.'
525 if config.config['logging_clear_preexisting_handlers'] and preexisting_handlers_count > 0:
527 'Logging cleared {preexisting_handlers_count} global handlers (--logging_clear_preexisting_handlers)'
529 logger.debug(f'Logging format specification is "{fmt}"')
530 if config.config['logging_debug_threads']:
531 logger.debug('...Logging format spec captures tid/pid (--logging_debug_threads)')
532 if config.config['logging_debug_modules']:
533 logger.debug('...Logging format spec captures files/functions/lineno (--logging_debug_modules)')
534 if config.config['logging_syslog']:
535 logger.debug(f'Logging to syslog as {facility_name} with priority mapping based on level')
536 if config.config['logging_filename']:
537 logger.debug(f'Logging to filename {config.config["logging_filename"]}')
538 logger.debug(f'...with {config.config["logging_filename_maxsize"]} bytes max file size.')
539 logger.debug(f'...and {config.config["logging_filename_count"]} rotating backup file count.')
540 if config.config['logging_console']:
541 logger.debug('Logging to the console (stderr).')
542 if config.config['logging_info_is_print']:
544 'Logging logger.info messages will be repeated on stdout (--logging_info_is_print)'
546 if config.config['logging_squelch_repeats']:
548 'Logging code allowed to request repeated messages be squelched (--logging_squelch_repeats)'
552 'Logging code forbidden to request messages be squelched; all messages logged (--no_logging_squelch_repeats)'
554 if config.config['logging_probabilistically']:
556 'Logging code is allowed to request probabilistic logging (--logging_probabilistically)'
560 'Logging code is forbidden to request probabilistic logging; messages always logged (--no_logging_probabilistically)'
562 if config.config['lmodule']:
564 'Logging dynamic per-module logging enabled (--lmodule={config.config["lmodule"]})'
566 if config.config['logging_captures_prints']:
567 logger.debug('Logging will capture printed data as logger.info messages (--logging_captures_prints)')
571 def get_logger(name: str = ""):
572 logger = logging.getLogger(name)
573 return initialize_logging(logger)
576 def tprint(*args, **kwargs) -> None:
577 """Legacy function for printing a message augmented with thread id
578 still needed by some code. Please use --logging_debug_threads in
582 if config.config['logging_debug_threads']:
583 from thread_utils import current_thread_id
584 print(f'{current_thread_id()}', end="")
585 print(*args, **kwargs)
590 def dprint(*args, **kwargs) -> None:
591 """Legacy function used to print to stderr still needed by some code.
592 Please just use normal logging with --logging_console which
593 accomplishes the same thing in new code.
596 print(*args, file=sys.stderr, **kwargs)
599 class OutputMultiplexer(object):
601 A class that broadcasts printed messages to several sinks (including
602 various logging levels, different files, different file handles,
603 the house log, etc...). See also OutputMultiplexerContext for an
607 class Destination(enum.IntEnum):
608 """Bits in the destination_bitv bitvector. Used to indicate the
609 output destination."""
612 LOG_WARNING = 0x04 # ⎬ Must provide logger to the c'tor.
614 LOG_CRITICAL = 0x10 # ⎭
615 FILENAMES = 0x20 # Must provide a filename to the c'tor.
616 FILEHANDLES = 0x40 # Must provide a handle to the c'tor.
618 ALL_LOG_DESTINATIONS = (
619 LOG_DEBUG | LOG_INFO | LOG_WARNING | LOG_ERROR | LOG_CRITICAL
621 ALL_OUTPUT_DESTINATIONS = 0x8F
624 destination_bitv: int,
627 filenames: Optional[Iterable[str]] = None,
628 handles: Optional[Iterable[io.TextIOWrapper]] = None):
630 logger = logging.getLogger(None)
633 if filenames is not None:
635 open(filename, 'wb', buffering=0) for filename in filenames
638 if destination_bitv & OutputMultiplexer.FILENAMES:
640 "Filenames argument is required if bitv & FILENAMES"
644 if handles is not None:
645 self.h = [handle for handle in handles]
647 if destination_bitv & OutputMultiplexer.Destination.FILEHANDLES:
649 "Handle argument is required if bitv & FILEHANDLES"
653 self.set_destination_bitv(destination_bitv)
655 def get_destination_bitv(self):
656 return self.destination_bitv
658 def set_destination_bitv(self, destination_bitv: int):
659 if destination_bitv & self.Destination.FILENAMES and self.f is None:
661 "Filename argument is required if bitv & FILENAMES"
663 if destination_bitv & self.Destination.FILEHANDLES and self.h is None:
665 "Handle argument is required if bitv & FILEHANDLES"
667 self.destination_bitv = destination_bitv
669 def print(self, *args, **kwargs):
670 from string_utils import sprintf, strip_escape_sequences
671 end = kwargs.pop("end", None)
673 if not isinstance(end, str):
674 raise TypeError("end must be None or a string")
675 sep = kwargs.pop("sep", None)
677 if not isinstance(sep, str):
678 raise TypeError("sep must be None or a string")
680 raise TypeError("invalid keyword arguments to print()")
681 buf = sprintf(*args, end="", sep=sep)
689 self.destination_bitv & self.Destination.FILENAMES and
693 _.write(buf.encode('utf-8'))
697 self.destination_bitv & self.Destination.FILEHANDLES and
704 buf = strip_escape_sequences(buf)
705 if self.logger is not None:
706 if self.destination_bitv & self.Destination.LOG_DEBUG:
707 self.logger.debug(buf)
708 if self.destination_bitv & self.Destination.LOG_INFO:
709 self.logger.info(buf)
710 if self.destination_bitv & self.Destination.LOG_WARNING:
711 self.logger.warning(buf)
712 if self.destination_bitv & self.Destination.LOG_ERROR:
713 self.logger.error(buf)
714 if self.destination_bitv & self.Destination.LOG_CRITICAL:
715 self.logger.critical(buf)
716 if self.destination_bitv & self.Destination.HLOG:
720 if self.f is not None:
725 class OutputMultiplexerContext(OutputMultiplexer, contextlib.ContextDecorator):
727 A context that uses an OutputMultiplexer. e.g.
729 with OutputMultiplexerContext(
730 OutputMultiplexer.LOG_INFO |
731 OutputMultiplexer.LOG_DEBUG |
732 OutputMultiplexer.FILENAMES |
733 OutputMultiplexer.FILEHANDLES,
734 filenames = [ '/tmp/foo.log', '/var/log/bar.log' ],
737 mplex.print("This is a log message!")
741 destination_bitv: OutputMultiplexer.Destination,
755 def __exit__(self, etype, value, traceback) -> bool:
757 if etype is not None:
762 def hlog(message: str) -> None:
763 """Write a message to the house log (syslog facility local7 priority
764 info) by calling /usr/bin/logger. This is pretty hacky but used
765 by a bunch of code. Another way to do this would be to use
766 --logging_syslog and --logging_syslog_facility but I can't
767 actually say that's easier.
770 message = message.replace("'", "'\"'\"'")
771 os.system(f"/usr/bin/logger -p local7.info -- '{message}'")
774 if __name__ == '__main__':