X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=logging_utils.py;h=c04d76d610708b5ff731c9ca4cd5a4f988c35867;hb=28034f6b1df572a3a87c3bc2181f806c4e47b2eb;hp=183e1f0109eb8b8dc243b23e9cceb431b01073e0;hpb=1fa049c0ce2af2027e362e368321636c75c987de;p=python_utils.git diff --git a/logging_utils.py b/logging_utils.py index 183e1f0..c04d76d 100644 --- a/logging_utils.py +++ b/logging_utils.py @@ -10,11 +10,13 @@ import io import logging from logging.handlers import RotatingFileHandler, SysLogHandler import os -import pytz import random import sys from typing import Callable, Iterable, Mapping, Optional +from overrides import overrides +import pytz + # This module is commonly used by others in here and should avoid # taking any unnecessary dependencies back on them. import argparse_utils @@ -36,13 +38,13 @@ cfg.add_argument( default='INFO', choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], metavar='LEVEL', - help='The level below which to squelch log messages.', + help='The global default level below which to squelch log messages; see also --lmodule', ) cfg.add_argument( '--logging_format', type=str, - default='%(levelname).1s:%(asctime)s: %(message)s', - help='The format for lines logged via the logger module.' + default=None, + help='The format for lines logged via the logger module. See: https://docs.python.org/3/library/logging.html#formatter-objects' ) cfg.add_argument( '--logging_date_format', @@ -74,7 +76,7 @@ cfg.add_argument( cfg.add_argument( '--logging_filename_count', type=int, - default=2, + default=7, metavar='COUNT', help='The number of logging_filename copies to keep before deleting.' ) @@ -84,12 +86,28 @@ cfg.add_argument( default=False, help='Should we log to localhost\'s syslog.' ) +cfg.add_argument( + '--logging_syslog_facility', + type=str, + default = 'USER', + choices=['NOTSET', 'AUTH', 'AUTH_PRIV', 'CRON', 'DAEMON', 'FTP', 'KERN', 'LPR', 'MAIL', 'NEWS', + 'SYSLOG', 'USER', 'UUCP', 'LOCAL0', 'LOCAL1', 'LOCAL2', 'LOCAL3', 'LOCAL4', 'LOCAL5', + 'LOCAL6', 'LOCAL7'], + metavar='SYSLOG_FACILITY_LIST', + help='The default syslog message facility identifier', +) cfg.add_argument( '--logging_debug_threads', action=argparse_utils.ActionNoYes, default=False, help='Should we prepend pid/tid data to all log messages?' ) +cfg.add_argument( + '--logging_debug_modules', + action=argparse_utils.ActionNoYes, + default=False, + help='Should we prepend module/function data to all log messages?' +) cfg.add_argument( '--logging_info_is_print', action=argparse_utils.ActionNoYes, @@ -97,13 +115,13 @@ cfg.add_argument( help='logging.info also prints to stdout.' ) cfg.add_argument( - '--logging_squelch_repeats_enabled', + '--logging_squelch_repeats', action=argparse_utils.ActionNoYes, default=True, help='Do we allow code to indicate that it wants to squelch repeated logging messages or should we always log?' ) cfg.add_argument( - '--logging_probabilistically_enabled', + '--logging_probabilistically', action=argparse_utils.ActionNoYes, default=True, help='Do we allow probabilistic logging (for code that wants it) or should we always log?' @@ -115,31 +133,30 @@ cfg.add_argument( default=False, help='When calling print, also log.info automatically.' ) +cfg.add_argument( + '--lmodule', + type=str, + metavar='=[,=...]', + help=( + 'Allows per-scope logging levels which override the global level set with --logging-level.' + + 'Pass a space separated list of = where is one of: module, ' + + 'module:function, or :function and is a logging level (e.g. INFO, DEBUG...)' + ) +) +cfg.add_argument( + '--logging_clear_preexisting_handlers', + action=argparse_utils.ActionNoYes, + default=True, + help=( + 'Should logging code clear preexisting global logging handlers and thus insist that is ' + + 'alone can add handlers. Use this to work around annoying modules that insert global ' + + 'handlers with formats and logging levels you might now want. Caveat emptor, this may ' + + 'cause you to miss logging messages.' + ) +) built_in_print = print - - -def function_identifier(f: Callable) -> str: - """ - Given a callable function, return a string that identifies it. - Usually that string is just __module__:__name__ but there's a - corner case: when __module__ is __main__ (i.e. the callable is - defined in the same module as __main__). In this case, - f.__module__ returns "__main__" instead of the file that it is - defined in. Work around this using pathlib.Path (see below). - - >>> function_identifier(function_identifier) - 'logging_utils:function_identifier' - - """ - if f.__module__ == '__main__': - from pathlib import Path - import __main__ - module = __main__.__file__ - module = Path(module).stem - return f'{module}:{f.__name__}' - else: - return f'{f.__module__}:{f.__name__}' +logging_initialized = False # A map from logging_callsite_id -> count of logged messages. @@ -160,7 +177,8 @@ def squelch_repeated_log_messages(squelch_after_n_repeats: int) -> Callable: """ def squelch_logging_wrapper(f: Callable): - identifier = function_identifier(f) + import function_utils + identifier = function_utils.function_identifier(f) squelched_logging_counts[identifier] = squelch_after_n_repeats return f return squelch_logging_wrapper @@ -174,13 +192,18 @@ class SquelchRepeatedMessagesFilter(logging.Filter): This filter only affects logging messages that repeat more than a threshold number of times from functions that are tagged with - the @logging_utils.squelched_logging_ok decorator. + the @logging_utils.squelched_logging_ok decorator; others are + ignored. + + This functionality is enabled by default but can be disabled via + the --no_logging_squelch_repeats commandline flag. """ def __init__(self) -> None: self.counters = collections.Counter() super().__init__() + @overrides def filter(self, record: logging.LogRecord) -> bool: id1 = f'{record.module}:{record.funcName}' if id1 not in squelched_logging_counts: @@ -192,6 +215,84 @@ class SquelchRepeatedMessagesFilter(logging.Filter): return count < threshold +class DynamicPerScopeLoggingLevelFilter(logging.Filter): + """This filter only allows logging messages from an allow list of + module names or module:function names. Blocks others. + + """ + @staticmethod + def level_name_to_level(name: str) -> int: + numeric_level = getattr( + logging, + name, + None + ) + if not isinstance(numeric_level, int): + raise ValueError('Invalid level: {name}') + return numeric_level + + def __init__( + self, + default_logging_level: int, + per_scope_logging_levels: str, + ) -> None: + super().__init__() + self.valid_levels = set(['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']) + self.default_logging_level = default_logging_level + self.level_by_scope = {} + if per_scope_logging_levels is not None: + for chunk in per_scope_logging_levels.split(','): + if '=' not in chunk: + print( + f'Malformed lmodule directive: "{chunk}", missing "=". Ignored.', + file=sys.stderr + ) + continue + try: + (scope, level) = chunk.split('=') + except ValueError: + print( + f'Malformed lmodule directive: "{chunk}". Ignored.', + file=sys.stderr + ) + continue + scope = scope.strip() + level = level.strip().upper() + if level not in self.valid_levels: + print( + f'Malformed lmodule directive: "{chunk}", bad level. Ignored.', + file=sys.stderr + ) + continue + self.level_by_scope[scope] = ( + DynamicPerScopeLoggingLevelFilter.level_name_to_level( + level + ) + ) + + @overrides + def filter(self, record: logging.LogRecord) -> bool: + # First try to find a logging level by scope (--lmodule) + if len(self.level_by_scope) > 0: + min_level = None + for scope in ( + record.module, + f'{record.module}:{record.funcName}', + f':{record.funcName}' + ): + level = self.level_by_scope.get(scope, None) + if level is not None: + if min_level is None or level < min_level: + min_level = level + + # If we found one, use it instead of the global default level. + if min_level is not None: + return record.levelno >= min_level + + # Otherwise, use the global logging level (--logging_level) + return record.levelno >= self.default_logging_level + + # A map from function_identifier -> probability of logging (0.0%..100.0%) probabilistic_logging_levels: Mapping[str, float] = {} @@ -203,11 +304,16 @@ def logging_is_probabilistic(probability_of_logging: float) -> Callable: (i.e. they do not always unconditionally log) but rather are probabilistic (i.e. they log N% of the time randomly). + Note that this functionality can be disabled (forcing all logged + messages to produce output) via the --no_logging_probabilistically + cmdline argument. + This affects *ALL* logging statements within the marked function. """ def probabilistic_logging_wrapper(f: Callable): - identifier = function_identifier(f) + import function_utils + identifier = function_utils.function_identifier(f) probabilistic_logging_levels[identifier] = probability_of_logging return f return probabilistic_logging_wrapper @@ -222,6 +328,7 @@ class ProbabilisticFilter(logging.Filter): been tagged with the @logging_utils.probabilistic_logging decorator. """ + @overrides def filter(self, record: logging.LogRecord) -> bool: id1 = f'{record.module}:{record.funcName}' if id1 not in probabilistic_logging_levels: @@ -238,17 +345,20 @@ class OnlyInfoFilter(logging.Filter): stdout handler. """ - def filter(self, record): + @overrides + def filter(self, record: logging.LogRecord): return record.levelno == logging.INFO class MillisecondAwareFormatter(logging.Formatter): """ - A formatter for adding milliseconds to log messages. + A formatter for adding milliseconds to log messages which, for + whatever reason, the default python logger doesn't do. """ converter = datetime.datetime.fromtimestamp + @overrides def formatTime(self, record, datefmt=None): ct = MillisecondAwareFormatter.converter( record.created, pytz.timezone("US/Pacific") @@ -262,39 +372,60 @@ class MillisecondAwareFormatter(logging.Formatter): def initialize_logging(logger=None) -> logging.Logger: - assert config.has_been_parsed() + global logging_initialized + if logging_initialized: + return + logging_initialized = True + if logger is None: - logger = logging.getLogger() # Root logger + logger = logging.getLogger() + + preexisting_handlers_count = 0 + assert config.has_been_parsed() + if config.config['logging_clear_preexisting_handlers']: + while logger.hasHandlers(): + logger.removeHandler(logger.handlers[0]) + preexisting_handlers_count += 1 if config.config['logging_config_file'] is not None: logging.config.fileConfig('logging.conf') return logger handlers = [] - numeric_level = getattr( + + # Global default logging level (--logging_level) + default_logging_level = getattr( logging, config.config['logging_level'].upper(), None ) - if not isinstance(numeric_level, int): + if not isinstance(default_logging_level, int): raise ValueError('Invalid level: %s' % config.config['logging_level']) - fmt = config.config['logging_format'] + if config.config['logging_format']: + fmt = config.config['logging_format'] + else: + if config.config['logging_syslog']: + fmt = '%(levelname).1s:%(filename)s[%(process)d]: %(message)s' + else: + fmt = '%(levelname).1s:%(asctime)s: %(message)s' if config.config['logging_debug_threads']: fmt = f'%(process)d.%(thread)d|{fmt}' + if config.config['logging_debug_modules']: + fmt = f'%(filename)s:%(funcName)s:%(lineno)s|{fmt}' if config.config['logging_syslog']: if sys.platform not in ('win32', 'cygwin'): - handler = SysLogHandler() -# for k, v in encoded_priorities.items(): -# handler.encodePriority(k, v) + if config.config['logging_syslog_facility']: + facility_name = 'LOG_' + config.config['logging_syslog_facility'] + facility = SysLogHandler.__dict__.get(facility_name, SysLogHandler.LOG_USER) + handler = SysLogHandler(facility=facility, address='/dev/log') handler.setFormatter( MillisecondAwareFormatter( fmt=fmt, datefmt=config.config['logging_date_format'], ) ) - handler.setLevel(numeric_level) handlers.append(handler) if config.config['logging_filename']: @@ -303,7 +434,6 @@ def initialize_logging(logger=None) -> logging.Logger: maxBytes = config.config['logging_filename_maxsize'], backupCount = config.config['logging_filename_count'], ) - handler.setLevel(numeric_level) handler.setFormatter( MillisecondAwareFormatter( fmt=fmt, @@ -314,7 +444,6 @@ def initialize_logging(logger=None) -> logging.Logger: if config.config['logging_console']: handler = logging.StreamHandler(sys.stderr) - handler.setLevel(numeric_level) handler.setFormatter( MillisecondAwareFormatter( fmt=fmt, @@ -334,15 +463,22 @@ def initialize_logging(logger=None) -> logging.Logger: handler.addFilter(OnlyInfoFilter()) logger.addHandler(handler) - if config.config['logging_squelch_repeats_enabled']: + if config.config['logging_squelch_repeats']: for handler in handlers: handler.addFilter(SquelchRepeatedMessagesFilter()) - if config.config['logging_probabilistically_enabled']: + if config.config['logging_probabilistically']: for handler in handlers: handler.addFilter(ProbabilisticFilter()) - logger.setLevel(numeric_level) + for handler in handlers: + handler.addFilter( + DynamicPerScopeLoggingLevelFilter( + default_logging_level, + config.config['lmodule'], + ) + ) + logger.setLevel(0) logger.propagate = False if config.config['logging_captures_prints']: @@ -358,6 +494,55 @@ def initialize_logging(logger=None) -> logging.Logger: built_in_print(*arg, **kwarg) builtins.print = print_and_also_log + # At this point the logger is ready, handlers are set up, + # etc... so log about the logging configuration. + + level_name = logging._levelToName.get(default_logging_level, str(default_logging_level)) + logger.debug( + f'Initialized global logging; default logging level is {level_name}.' + ) + if config.config['logging_clear_preexisting_handlers'] and preexisting_handlers_count > 0: + msg = f'Logging cleared {preexisting_handlers_count} global handlers (--logging_clear_preexisting_handlers)' + logger.warning(msg) + logger.debug(f'Logging format specification is "{fmt}"') + if config.config['logging_debug_threads']: + logger.debug('...Logging format spec captures tid/pid (--logging_debug_threads)') + if config.config['logging_debug_modules']: + logger.debug('...Logging format spec captures files/functions/lineno (--logging_debug_modules)') + if config.config['logging_syslog']: + logger.debug(f'Logging to syslog as {facility_name} with priority mapping based on level') + if config.config['logging_filename']: + logger.debug(f'Logging to filename {config.config["logging_filename"]}') + logger.debug(f'...with {config.config["logging_filename_maxsize"]} bytes max file size.') + logger.debug(f'...and {config.config["logging_filename_count"]} rotating backup file count.') + if config.config['logging_console']: + logger.debug('Logging to the console (stderr).') + if config.config['logging_info_is_print']: + logger.debug( + 'Logging logger.info messages will be repeated on stdout (--logging_info_is_print)' + ) + if config.config['logging_squelch_repeats']: + logger.debug( + 'Logging code allowed to request repeated messages be squelched (--logging_squelch_repeats)' + ) + else: + logger.debug( + 'Logging code forbidden to request messages be squelched; all messages logged (--no_logging_squelch_repeats)' + ) + if config.config['logging_probabilistically']: + logger.debug( + 'Logging code is allowed to request probabilistic logging (--logging_probabilistically)' + ) + else: + logger.debug( + 'Logging code is forbidden to request probabilistic logging; messages always logged (--no_logging_probabilistically)' + ) + if config.config['lmodule']: + logger.debug( + 'Logging dynamic per-module logging enabled (--lmodule={config.config["lmodule"]})' + ) + if config.config['logging_captures_prints']: + logger.debug('Logging will capture printed data as logger.info messages (--logging_captures_prints)') return logger @@ -367,8 +552,11 @@ def get_logger(name: str = ""): def tprint(*args, **kwargs) -> None: - """Legacy function for printing a message augmented with thread id.""" + """Legacy function for printing a message augmented with thread id + still needed by some code. Please use --logging_debug_threads in + new code. + """ if config.config['logging_debug_threads']: from thread_utils import current_thread_id print(f'{current_thread_id()}', end="") @@ -378,8 +566,11 @@ def tprint(*args, **kwargs) -> None: def dprint(*args, **kwargs) -> None: - """Legacy function used to print to stderr.""" + """Legacy function used to print to stderr still needed by some code. + Please just use normal logging with --logging_console which + accomplishes the same thing in new code. + """ print(*args, file=sys.stderr, **kwargs) @@ -387,7 +578,8 @@ class OutputMultiplexer(object): """ A class that broadcasts printed messages to several sinks (including various logging levels, different files, different file handles, - the house log, etc...) + the house log, etc...). See also OutputMultiplexerContext for an + easy usage pattern. """ class Destination(enum.IntEnum): @@ -546,8 +738,13 @@ class OutputMultiplexerContext(OutputMultiplexer, contextlib.ContextDecorator): def hlog(message: str) -> None: - """Write a message to the house log.""" + """Write a message to the house log (syslog facility local7 priority + info) by calling /usr/bin/logger. This is pretty hacky but used + by a bunch of code. Another way to do this would be to use + --logging_syslog and --logging_syslog_facility but I can't + actually say that's easier. + """ message = message.replace("'", "'\"'\"'") os.system(f"/usr/bin/logger -p local7.info -- '{message}'")