unittest_utils.py

   1 #!/usr/bin/env python3
   2
   3 """Helpers for unittests.  Note that when you import this we
   4    automatically wrap unittest.main() with a call to
   5    bootstrap.initialize so that we getLogger config, commandline args,
   6    logging control, etc... this works fine but it's a little hacky so
   7    caveat emptor.
   8 """
   9
  10 from abc import ABC, abstractmethod
  11 import contextlib
  12 import functools
  13 import inspect
  14 import logging
  15 import os
  16 import pickle
  17 import random
  18 import statistics
  19 import time
  20 import tempfile
  21 from typing import Callable, Dict, List
  22 import unittest
  23 import warnings
  24
  25 import bootstrap
  26 import config
  27 import function_utils
  28 import scott_secrets
  29
  30 import sqlalchemy as sa
  31
  32
  33 logger = logging.getLogger(__name__)
  34 cfg = config.add_commandline_args(
  35     f'Logging ({__file__})',
  36     'Args related to function decorators')
  37 cfg.add_argument(
  38     '--unittests_ignore_perf',
  39     action='store_true',
  40     default=False,
  41     help='Ignore unittest perf regression in @check_method_for_perf_regressions',
  42 )
  43 cfg.add_argument(
  44     '--unittests_num_perf_samples',
  45     type=int,
  46     default=50,
  47     help='The count of perf timing samples we need to see before blocking slow runs on perf grounds'
  48 )
  49 cfg.add_argument(
  50     '--unittests_drop_perf_traces',
  51     type=str,
  52     nargs=1,
  53     default=None,
  54     help='The identifier (i.e. file!test_fixture) for which we should drop all perf data'
  55 )
  56 cfg.add_argument(
  57     '--unittests_persistance_strategy',
  58     choices=['FILE', 'DATABASE'],
  59     default='DATABASE',
  60     help='Should we persist perf data in a file or db?'
  61 )
  62 cfg.add_argument(
  63     '--unittests_perfdb_filename',
  64     type=str,
  65     metavar='FILENAME',
  66     default=f'{os.environ["HOME"]}/.python_unittest_performance_db',
  67     help='File in which to store perf data (iff --unittests_persistance_strategy is FILE)'
  68 )
  69 cfg.add_argument(
  70     '--unittests_perfdb_spec',
  71     type=str,
  72     metavar='DBSPEC',
  73     default='mariadb+pymysql://python_unittest:<PASSWORD>@db.house:3306/python_unittest_performance',
  74     help='Db connection spec for perf data (iff --unittest_persistance_strategy is DATABASE)'
  75 )
  76
  77 # >>> This is the hacky business, FYI. <<<
  78 unittest.main = bootstrap.initialize(unittest.main)
  79
  80
  81 class PerfRegressionDataPersister(ABC):
  82     def __init__(self):
  83         pass
  84
  85     @abstractmethod
  86     def load_performance_data(self) -> Dict[str, List[float]]:
  87         pass
  88
  89     @abstractmethod
  90     def save_performance_data(self, method_id: str, data: Dict[str, List[float]]):
  91         pass
  92
  93     @abstractmethod
  94     def delete_performance_data(self, method_id: str):
  95         pass
  96
  97
  98 class FileBasedPerfRegressionDataPersister(PerfRegressionDataPersister):
  99     def __init__(self, filename: str):
 100         self.filename = filename
 101         self.traces_to_delete = []
 102
 103     def load_performance_data(self, method_id: str) -> Dict[str, List[float]]:
 104         with open(self.filename, 'rb') as f:
 105             return pickle.load(f)
 106
 107     def save_performance_data(self, method_id: str, data: Dict[str, List[float]]):
 108         for trace in self.traces_to_delete:
 109             if trace in data:
 110                 data[trace] = []
 111
 112         with open(self.filename, 'wb') as f:
 113             pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
 114
 115     def delete_performance_data(self, method_id: str):
 116         self.traces_to_delete.append(method_id)
 117
 118
 119 class DatabasePerfRegressionDataPersister(PerfRegressionDataPersister):
 120     def __init__(self, dbspec: str):
 121         self.dbspec = dbspec
 122         self.engine = sa.create_engine(self.dbspec)
 123         self.conn = self.engine.connect()
 124
 125     def load_performance_data(self, method_id: str) -> Dict[str, List[float]]:
 126         results = self.conn.execute(
 127             sa.text(
 128                 f'SELECT * FROM runtimes_by_function WHERE function = "{method_id}";'
 129             )
 130         )
 131         ret = {method_id: []}
 132         for result in results.all():
 133             ret[method_id].append(result['runtime'])
 134         results.close()
 135         return ret
 136
 137     def save_performance_data(self, method_id: str, data: Dict[str, List[float]]):
 138         self.delete_performance_data(method_id)
 139         for (method_id, perf_data) in data.items():
 140             sql = 'INSERT INTO runtimes_by_function (function, runtime) VALUES '
 141             for perf in perf_data:
 142                 self.conn.execute(sql + f'("{method_id}", {perf});')
 143
 144     def delete_performance_data(self, method_id: str):
 145         sql = f'DELETE FROM runtimes_by_function WHERE function = "{method_id}"'
 146         self.conn.execute(sql)
 147
 148
 149 def check_method_for_perf_regressions(func: Callable) -> Callable:
 150     """
 151     This is meant to be used on a method in a class that subclasses
 152     unittest.TestCase.  When thus decorated it will time the execution
 153     of the code in the method, compare it with a database of
 154     historical perfmance, and fail the test with a perf-related
 155     message if it has become too slow.
 156
 157     """
 158     @functools.wraps(func)
 159     def wrapper_perf_monitor(*args, **kwargs):
 160         if config.config['unittests_persistance_strategy'] == 'FILE':
 161             filename = config.config['unittests_perfdb_filename']
 162             helper = FileBasedPerfRegressionDataPersister(filename)
 163         elif config.config['unittests_persistance_strategy'] == 'DATABASE':
 164             dbspec = config.config['unittests_perfdb_spec']
 165             dbspec = dbspec.replace('<PASSWORD>', scott_secrets.MARIADB_UNITTEST_PERF_PASSWORD)
 166             helper = DatabasePerfRegressionDataPersister(dbspec)
 167         else:
 168             raise Exception(
 169                 'Unknown/unexpected --unittests_persistance_strategy value'
 170             )
 171
 172         func_id = function_utils.function_identifier(func)
 173         func_name = func.__name__
 174         logger.debug(f'Watching {func_name}\'s performance...')
 175         logger.debug(f'Canonical function identifier = {func_id}')
 176
 177         try:
 178             perfdb = helper.load_performance_data(func_id)
 179         except Exception as e:
 180             logger.exception(e)
 181             msg = 'Unable to load perfdb; skipping it...'
 182             logger.warning(msg)
 183             warnings.warn(msg)
 184             perfdb = {}
 185
 186         # cmdline arg to forget perf traces for function
 187         drop_id = config.config['unittests_drop_perf_traces']
 188         if drop_id is not None:
 189             helper.delete_performance_data(drop_id)
 190
 191         # Run the wrapped test paying attention to latency.
 192         start_time = time.perf_counter()
 193         value = func(*args, **kwargs)
 194         end_time = time.perf_counter()
 195         run_time = end_time - start_time
 196
 197         # See if it was unexpectedly slow.
 198         hist = perfdb.get(func_id, [])
 199         if len(hist) < config.config['unittests_num_perf_samples']:
 200             hist.append(run_time)
 201             logger.debug(
 202                 f'Still establishing a perf baseline for {func_name}'
 203             )
 204         else:
 205             stdev = statistics.stdev(hist)
 206             logger.debug(f'For {func_name}, performance stdev={stdev}')
 207             slowest = hist[-1]
 208             logger.debug(f'For {func_name}, slowest perf on record is {slowest:f}s')
 209             limit = slowest + stdev * 4
 210             logger.debug(
 211                 f'For {func_name}, max acceptable runtime is {limit:f}s'
 212             )
 213             logger.debug(
 214                 f'For {func_name}, actual observed runtime was {run_time:f}s'
 215             )
 216             if (
 217                 run_time > limit and
 218                 not config.config['unittests_ignore_perf']
 219             ):
 220                 msg = f'''{func_id} performance has regressed unacceptably.
 221 {slowest:f}s is the slowest runtime on record in {len(hist)} perf samples.
 222 It just ran in {run_time:f}s which is 4+ stdevs slower than the slowest.
 223 Here is the current, full db perf timing distribution:
 224
 225 '''
 226                 for x in hist:
 227                     msg += f'{x:f}\n'
 228                 logger.error(msg)
 229                 slf = args[0]        # Peek at the wrapped function's self ref.
 230                 slf.fail(msg)        # ...to fail the testcase.
 231             else:
 232                 hist.append(run_time)
 233
 234         # Don't spam the database with samples; just pick a random
 235         # sample from what we have and store that back.
 236         n = min(config.config['unittests_num_perf_samples'], len(hist))
 237         hist = random.sample(hist, n)
 238         hist.sort()
 239         perfdb[func_id] = hist
 240         helper.save_performance_data(func_id, perfdb)
 241         return value
 242     return wrapper_perf_monitor
 243
 244
 245 def check_all_methods_for_perf_regressions(prefix='test_'):
 246     """Decorate unittests with this to pay attention to the perf of the
 247     testcode and flag perf regressions.  e.g.
 248
 249     import unittest_utils as uu
 250
 251     @uu.check_all_methods_for_perf_regressions()
 252     class TestMyClass(unittest.TestCase):
 253
 254         def test_some_part_of_my_class(self):
 255             ...
 256
 257     """
 258     def decorate_the_testcase(cls):
 259         if issubclass(cls, unittest.TestCase):
 260             for name, m in inspect.getmembers(cls, inspect.isfunction):
 261                 if name.startswith(prefix):
 262                     setattr(cls, name, check_method_for_perf_regressions(m))
 263                     logger.debug(f'Wrapping {cls.__name__}:{name}.')
 264         return cls
 265     return decorate_the_testcase
 266
 267
 268 def breakpoint():
 269     """Hard code a breakpoint somewhere; drop into pdb."""
 270     import pdb
 271     pdb.set_trace()
 272
 273
 274 class RecordStdout(object):
 275     """
 276     Record what is emitted to stdout.
 277
 278     >>> with RecordStdout() as record:
 279     ...     print("This is a test!")
 280     >>> print({record().readline()})
 281     {'This is a test!\\n'}
 282     """
 283
 284     def __init__(self) -> None:
 285         self.destination = tempfile.SpooledTemporaryFile(mode='r+')
 286         self.recorder = None
 287
 288     def __enter__(self) -> Callable[[], tempfile.SpooledTemporaryFile]:
 289         self.recorder = contextlib.redirect_stdout(self.destination)
 290         self.recorder.__enter__()
 291         return lambda: self.destination
 292
 293     def __exit__(self, *args) -> bool:
 294         self.recorder.__exit__(*args)
 295         self.destination.seek(0)
 296         return None
 297
 298
 299 class RecordStderr(object):
 300     """
 301     Record what is emitted to stderr.
 302
 303     >>> import sys
 304     >>> with RecordStderr() as record:
 305     ...     print("This is a test!", file=sys.stderr)
 306     >>> print({record().readline()})
 307     {'This is a test!\\n'}
 308     """
 309
 310     def __init__(self) -> None:
 311         self.destination = tempfile.SpooledTemporaryFile(mode='r+')
 312         self.recorder = None
 313
 314     def __enter__(self) -> Callable[[], tempfile.SpooledTemporaryFile]:
 315         self.recorder = contextlib.redirect_stderr(self.destination)
 316         self.recorder.__enter__()
 317         return lambda: self.destination
 318
 319     def __exit__(self, *args) -> bool:
 320         self.recorder.__exit__(*args)
 321         self.destination.seek(0)
 322         return None
 323
 324
 325 class RecordMultipleStreams(object):
 326     """
 327     Record the output to more than one stream.
 328     """
 329
 330     def __init__(self, *files) -> None:
 331         self.files = [*files]
 332         self.destination = tempfile.SpooledTemporaryFile(mode='r+')
 333         self.saved_writes = []
 334
 335     def __enter__(self) -> Callable[[], tempfile.SpooledTemporaryFile]:
 336         for f in self.files:
 337             self.saved_writes.append(f.write)
 338             f.write = self.destination.write
 339         return lambda: self.destination
 340
 341     def __exit__(self, *args) -> bool:
 342         for f in self.files:
 343             f.write = self.saved_writes.pop()
 344         self.destination.seek(0)
 345
 346
 347 if __name__ == '__main__':
 348     import doctest
 349     doctest.testmod()