From: Scott Gasch Date: Wed, 15 Sep 2021 16:32:08 +0000 (-0700) Subject: Adding doctests. Also added a logging filter. X-Git-Url: https://wannabe.guru.org/gitweb/?a=commitdiff_plain;h=4c315e387f18010ba0b5661744ad3c792f21d2d1;p=python_utils.git Adding doctests. Also added a logging filter. --- diff --git a/argparse_utils.py b/argparse_utils.py index 3799a47..0ee2be9 100644 --- a/argparse_utils.py +++ b/argparse_utils.py @@ -4,6 +4,7 @@ import argparse import datetime import logging import os +from typing import Any # This module is commonly used by others in here and should avoid # taking any unnecessary dependencies back on them. @@ -56,14 +57,54 @@ class ActionNoYes(argparse.Action): setattr(namespace, self.dest, True) -def valid_bool(v): +def valid_bool(v: Any) -> bool: + """ + If the string is a valid bool, return its value. + + >>> valid_bool(True) + True + + >>> valid_bool("true") + True + + >>> valid_bool("yes") + True + + >>> valid_bool("on") + True + + >>> valid_bool("1") + True + + >>> valid_bool(12345) + Traceback (most recent call last): + ... + argparse.ArgumentTypeError: 12345 + + """ if isinstance(v, bool): return v from string_utils import to_bool - return to_bool(v) + try: + return to_bool(v) + except Exception: + raise argparse.ArgumentTypeError(v) def valid_ip(ip: str) -> str: + """ + If the string is a valid IPv4 address, return it. Otherwise raise + an ArgumentTypeError. + + >>> valid_ip("1.2.3.4") + '1.2.3.4' + + >>> valid_ip("localhost") + Traceback (most recent call last): + ... + argparse.ArgumentTypeError: localhost is an invalid IP address + + """ from string_utils import extract_ip_v4 s = extract_ip_v4(ip.strip()) if s is not None: @@ -74,6 +115,22 @@ def valid_ip(ip: str) -> str: def valid_mac(mac: str) -> str: + """ + If the string is a valid MAC address, return it. Otherwise raise + an ArgumentTypeError. + + >>> valid_mac('12:23:3A:4F:55:66') + '12:23:3A:4F:55:66' + + >>> valid_mac('12-23-3A-4F-55-66') + '12-23-3A-4F-55-66' + + >>> valid_mac('big') + Traceback (most recent call last): + ... + argparse.ArgumentTypeError: big is an invalid MAC address + + """ from string_utils import extract_mac_address s = extract_mac_address(mac) if s is not None: @@ -84,6 +141,22 @@ def valid_mac(mac: str) -> str: def valid_percentage(num: str) -> float: + """ + If the string is a valid percentage, return it. Otherwise raise + an ArgumentTypeError. + + >>> valid_percentage("15%") + 15.0 + + >>> valid_percentage('40') + 40.0 + + >>> valid_percentage('115') + Traceback (most recent call last): + ... + argparse.ArgumentTypeError: 115 is an invalid percentage; expected 0 <= n <= 100.0 + + """ num = num.strip('%') n = float(num) if 0.0 <= n <= 100.0: @@ -94,6 +167,19 @@ def valid_percentage(num: str) -> float: def valid_filename(filename: str) -> str: + """ + If the string is a valid filename, return it. Otherwise raise + an ArgumentTypeError. + + >>> valid_filename('/tmp') + '/tmp' + + >>> valid_filename('wfwefwefwefwefwefwefwefwef') + Traceback (most recent call last): + ... + argparse.ArgumentTypeError: wfwefwefwefwefwefwefwefwef was not found and is therefore invalid. + + """ s = filename.strip() if os.path.exists(s): return s @@ -103,6 +189,18 @@ def valid_filename(filename: str) -> str: def valid_date(txt: str) -> datetime.date: + """If the string is a valid date, return it. Otherwise raise + an ArgumentTypeError. + + >>> valid_date('6/5/2021') + datetime.date(2021, 6, 5) + + # Note: dates like 'next wednesday' work fine, they are just + # hard to test for without knowing when the testcase will be + # executed... + >>> valid_date('next wednesday') # doctest: +ELLIPSIS + -ANYTHING- + """ from string_utils import to_date date = to_date(txt) if date is not None: @@ -113,6 +211,18 @@ def valid_date(txt: str) -> datetime.date: def valid_datetime(txt: str) -> datetime.datetime: + """If the string is a valid datetime, return it. Otherwise raise + an ArgumentTypeError. + + >>> valid_datetime('6/5/2021 3:01:02') + datetime.datetime(2021, 6, 5, 3, 1, 2) + + # Again, these types of expressions work fine but are + # difficult to test with doctests because the answer is + # relative to the time the doctest is executed. + >>> valid_datetime('next christmas at 4:15am') # doctest: +ELLIPSIS + -ANYTHING- + """ from string_utils import to_datetime dt = to_datetime(txt) if dt is not None: @@ -120,3 +230,9 @@ def valid_datetime(txt: str) -> datetime.datetime: msg = f'Cannot parse argument as datetime: {txt}' logger.warning(msg) raise argparse.ArgumentTypeError(msg) + + +if __name__ == '__main__': + import doctest + doctest.ELLIPSIS_MARKER = '-ANYTHING-' + doctest.testmod() diff --git a/bootstrap.py b/bootstrap.py index 7f63dbb..bf5d91f 100644 --- a/bootstrap.py +++ b/bootstrap.py @@ -42,6 +42,11 @@ original_hook = sys.excepthook def handle_uncaught_exception(exc_type, exc_value, exc_tb): + """ + Top-level exception handler for exceptions that make it past any exception + handlers in the python code being run. Logs the error and stacktrace then + maybe attaches a debugger. + """ global original_hook msg = f'Unhandled top level exception {exc_type}' logger.exception(msg) @@ -57,11 +62,11 @@ def handle_uncaught_exception(exc_type, exc_value, exc_tb): # stdin or stderr is redirected, just do the normal thing original_hook(exc_type, exc_value, exc_tb) else: - # a terminal is attached and stderr is not redirected, debug. + # a terminal is attached and stderr is not redirected, maybe debug. + import traceback + traceback.print_exception(exc_type, exc_value, exc_tb) if config.config['debug_unhandled_exceptions']: - import traceback import pdb - traceback.print_exception(exc_type, exc_value, exc_tb) logger.info("Invoking the debugger...") pdb.pm() else: @@ -69,12 +74,21 @@ def handle_uncaught_exception(exc_type, exc_value, exc_tb): def initialize(entry_point): + """ + Remember to initialize config, initialize logging, set/log a random + seed, etc... before running main. - """Remember to initialize config and logging before running main.""" + """ @functools.wraps(entry_point) def initialize_wrapper(*args, **kwargs): + + # Hook top level unhandled exceptions, maybe invoke debugger. if sys.excepthook == sys.__excepthook__: sys.excepthook = handle_uncaught_exception + + # Try to figure out the name of the program entry point. Then + # parse configuration (based on cmdline flags, environment vars + # etc...) if ( '__globals__' in entry_point.__dict__ and '__file__' in entry_point.__globals__ @@ -83,8 +97,9 @@ def initialize(entry_point): else: config.parse(None) + # Initialize logging... and log some remembered messages from + # config module. logging_utils.initialize_logging(logging.getLogger()) - config.late_logging() # Allow programs that don't bother to override the random seed @@ -102,8 +117,8 @@ def initialize(entry_point): logger.debug(msg) random.seed(random_seed) + # Do it, invoke the user's code. Pay attention to how long it takes. logger.debug(f'Starting {entry_point.__name__} (program entry point)') - ret = None import stopwatch with stopwatch.Timer() as t: @@ -121,6 +136,8 @@ def initialize(entry_point): f'child system: {cstime}s\n' f'machine uptime: {elapsed_time}s\n' f'walltime: {walltime}s') + + # If it doesn't return cleanly, call attention to the return value. if ret is not None and ret != 0: logger.error(f'Exit {ret}') else: diff --git a/config.py b/config.py index 574449f..1e690bc 100644 --- a/config.py +++ b/config.py @@ -1,7 +1,9 @@ #!/usr/bin/env python3 -"""Global configuration driven by commandline arguments (even across -different modules). Usage: +"""Global configuration driven by commandline arguments, environment variables +and saved configuration files. This works across several modules. + +Usage: module.py: ---------- @@ -36,7 +38,8 @@ different modules). Usage: config.parse() # Very important, this must be invoked! If you set this up and remember to invoke config.parse(), all commandline - arguments will play nicely together: + arguments will play nicely together. This is done automatically for you + if you're using the bootstrap module's initialize wrapper. % main.py -h usage: main.py [-h] diff --git a/dict_utils.py b/dict_utils.py index 6dd79f3..92fd1e0 100644 --- a/dict_utils.py +++ b/dict_utils.py @@ -25,6 +25,7 @@ def init_or_inc( False >>> d {'test': 2, 'ing': 1} + """ if key in d.keys(): d[key] = inc_function(d[key]) @@ -71,6 +72,7 @@ def coalesce( >>> c = {'c': 1, 'd': 2} >>> coalesce([a, b, c]) {'a': 1, 'b': [1, 2], 'c': [1, 2], 'd': [2, 3]} + """ out: Dict[Any, Any] = {} for d in inputs: @@ -93,6 +95,7 @@ def item_with_max_value(d: Dict[Any, Any]) -> Tuple[Any, Any]: Traceback (most recent call last): ... ValueError: max() arg is an empty sequence + """ return max(d.items(), key=lambda _: _[1]) @@ -103,6 +106,7 @@ def item_with_min_value(d: Dict[Any, Any]) -> Tuple[Any, Any]: >>> d = {'a': 1, 'b': 2, 'c': 3} >>> item_with_min_value(d) ('a', 1) + """ return min(d.items(), key=lambda _: _[1]) @@ -113,6 +117,7 @@ def key_with_max_value(d: Dict[Any, Any]) -> Any: >>> d = {'a': 1, 'b': 2, 'c': 3} >>> key_with_max_value(d) 'c' + """ return item_with_max_value(d)[0] @@ -123,6 +128,7 @@ def key_with_min_value(d: Dict[Any, Any]) -> Any: >>> d = {'a': 1, 'b': 2, 'c': 3} >>> key_with_min_value(d) 'a' + """ return item_with_min_value(d)[0] @@ -133,6 +139,7 @@ def max_value(d: Dict[Any, Any]) -> Any: >>> d = {'a': 1, 'b': 2, 'c': 3} >>> max_value(d) 3 + """ return item_with_max_value(d)[1] @@ -143,6 +150,7 @@ def min_value(d: Dict[Any, Any]) -> Any: >>> d = {'a': 1, 'b': 2, 'c': 3} >>> min_value(d) 1 + """ return item_with_min_value(d)[1] @@ -153,6 +161,7 @@ def max_key(d: Dict[Any, Any]) -> Any: >>> d = {'a': 3, 'b': 2, 'c': 1} >>> max_key(d) 'c' + """ return max(d.keys()) @@ -163,6 +172,7 @@ def min_key(d: Dict[Any, Any]) -> Any: >>> d = {'a': 3, 'b': 2, 'c': 1} >>> min_key(d) 'a' + """ return min(d.keys()) diff --git a/input_utils.py b/input_utils.py index 648ee30..153641b 100644 --- a/input_utils.py +++ b/input_utils.py @@ -17,6 +17,8 @@ def single_keystroke_response( default_response: str = None, timeout_seconds: int = None, ) -> str: + """Get a single keystroke response to a prompt.""" + def _handle_timeout(signum, frame) -> None: raise exceptions.TimeoutError() @@ -56,12 +58,16 @@ def single_keystroke_response( def yn_response(prompt: str = None, *, timeout_seconds=None) -> str: + """Get a Y/N response to a prompt.""" + return single_keystroke_response( ["y", "n", "Y", "N"], prompt=prompt, timeout_seconds=timeout_seconds ).lower() def keystroke_helper() -> None: + """Misc util to watch keystrokes and report what they were.""" + print("Watching for keystrokes; ^C to quit.") while True: key = readchar.readkey() diff --git a/logging_utils.py b/logging_utils.py index 25919a7..034f90c 100644 --- a/logging_utils.py +++ b/logging_utils.py @@ -2,6 +2,7 @@ """Utilities related to logging.""" +import collections import contextlib import datetime import enum @@ -94,6 +95,12 @@ cfg.add_argument( default=False, help='logging.info also prints to stdout.' ) +cfg.add_argument( + '--logging_max_n_times_per_message', + type=int, + default=0, + help='When set, ignore logged messages from the same site after N.' +) # See also: OutputMultiplexer cfg.add_argument( @@ -107,11 +114,37 @@ built_in_print = print class OnlyInfoFilter(logging.Filter): + """ + A filter that only logs messages produced at the INFO logging level. + """ def filter(self, record): return record.levelno == logging.INFO +class OnlyNTimesFilter(logging.Filter): + """ + A filter that only logs messages from a given site with the same + message at the same logging level N times and ignores subsequent + attempts to log. + + """ + def __init__(self, maximum: int) -> None: + self.maximum = maximum + self.counters = collections.Counter() + super().__init__() + + def filter(self, record: logging.LogRecord) -> bool: + source = f'{record.pathname}+{record.lineno}+{record.levelno}+{record.msg}' + count = self.counters[source] + self.counters[source] += 1 + return count < self.maximum + + class MillisecondAwareFormatter(logging.Formatter): + """ + A formatter for adding milliseconds to log messages. + + """ converter = datetime.datetime.fromtimestamp def formatTime(self, record, datefmt=None): @@ -199,6 +232,11 @@ def initialize_logging(logger=None) -> logging.Logger: handler.addFilter(OnlyInfoFilter()) logger.addHandler(handler) + maximum = config.config['logging_max_n_times_per_message'] + if maximum > 0: + for handler in handlers: + handler.addFilter(OnlyNTimesFilter(maximum)) + logger.setLevel(numeric_level) logger.propagate = False diff --git a/logical_search.py b/logical_search.py index 805ec22..3ebaee5 100644 --- a/logical_search.py +++ b/logical_search.py @@ -86,6 +86,16 @@ class Corpus(object): ... reference=None, ... ) ... ) + >>> c.add_doc(Document( + ... docid=3, + ... tags=set(['urgent']), + ... properties=[ + ... ('author', 'Scott'), + ... ('subject', 'car turning in front of you') + ... ], + ... reference=None, + ... ) + ... ) >>> c.query('author:Scott and important') {1} """ diff --git a/string_utils.py b/string_utils.py index 0829846..3aaf1d7 100644 --- a/string_utils.py +++ b/string_utils.py @@ -1059,18 +1059,26 @@ def to_bool(in_str: str) -> bool: >>> to_bool('True') True + >>> to_bool('1') True + >>> to_bool('yes') True + >>> to_bool('no') False + >>> to_bool('huh?') False + + >>> to_bool('on') + True + """ if not is_string(in_str): raise ValueError(in_str) - return in_str.lower() in ("true", "1", "yes", "y", "t") + return in_str.lower() in ("true", "1", "yes", "y", "t", "on") def to_date(in_str: str) -> Optional[datetime.date]: diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 6e0c30c..9623f1b 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -78,10 +78,11 @@ if [ ${DOCTEST} -eq 1 ]; then BASE="${BASE} (doctest)" make_header "${BASE}" "${CYAN}" OUT=$( python3 ${doctest} 2>&1 ) - if [ "$OUT" == "" ]; then + FAILED=$( echo "${OUT}" | grep '\*\*\*Test Failed\*\*\*' | wc -l ) + if [ $FAILED == 0 ]; then echo "OK" else - echo -e "${OUT}" + echo -e "${FAILED}" FAILURES=$((FAILURES+1)) fi done @@ -112,5 +113,11 @@ if [ ${INTEGRATION} -eq 1 ]; then fi if [ ${FAILURES} -ne 0 ]; then - echo -e "${RED}There were ${FAILURES} failure(s).${NC}" + if [ ${FAILURES} -eq 1 ]; then + echo -e "${RED}There was ${FAILURES} failure.${NC}" + else + echo -e "${RED}There were ${FAILURES} failures.${NC}" + fi +else + echo -e "${GREEN}Everything looks good.${NC}" fi diff --git a/unittest_utils.py b/unittest_utils.py index 8a0556b..bb1a9b4 100644 --- a/unittest_utils.py +++ b/unittest_utils.py @@ -56,13 +56,14 @@ _db = '/home/scott/.python_unittest_performance_db' def check_method_for_perf_regressions(func: Callable) -> Callable: - """This is meant to be used on a method in a class that subclasses + """ + This is meant to be used on a method in a class that subclasses unittest.TestCase. When thus decorated it will time the execution of the code in the method, compare it with a database of historical perfmance, and fail the test with a perf-related message if it has become too slow. - """ + """ def load_known_test_performance_characteristics(): with open(_db, 'rb') as f: return pickle.load(f) @@ -107,7 +108,7 @@ def check_method_for_perf_regressions(func: Callable) -> Callable: ) else: stdev = statistics.stdev(hist) - limit = hist[-1] + stdev * 3 + limit = hist[-1] + stdev * 5 logger.debug( f'Max acceptable performace for {func.__name__} is {limit:f}s' ) @@ -117,12 +118,14 @@ def check_method_for_perf_regressions(func: Callable) -> Callable: ): msg = f'''{func_id} performance has regressed unacceptably. {hist[-1]:f}s is the slowest record in {len(hist)} db perf samples. -It just ran in {run_time:f}s which is >3 stdevs slower than the slowest sample. +It just ran in {run_time:f}s which is >5 stdevs slower than the slowest sample. Here is the current, full db perf timing distribution: -{hist}''' - slf = args[0] +''' + for x in hist: + msg += f'{x:f}\n' logger.error(msg) + slf = args[0] slf.fail(msg) else: hist.append(run_time)