caveat emptor.
"""
+from abc import ABC, abstractmethod
import contextlib
import functools
import inspect
import logging
+import os
import pickle
import random
import statistics
import time
import tempfile
-from typing import Callable
+from typing import Callable, Dict, List
import unittest
+import warnings
import bootstrap
import config
+import scott_secrets
+
+import sqlalchemy as sa
logger = logging.getLogger(__name__)
cfg.add_argument(
'--unittests_num_perf_samples',
type=int,
- default=20,
+ default=50,
help='The count of perf timing samples we need to see before blocking slow runs on perf grounds'
)
cfg.add_argument(
default=None,
help='The identifier (i.e. file!test_fixture) for which we should drop all perf data'
)
-
+cfg.add_argument(
+ '--unittests_persistance_strategy',
+ choices=['FILE', 'DATABASE'],
+ default='DATABASE',
+ help='Should we persist perf data in a file or db?'
+)
+cfg.add_argument(
+ '--unittests_perfdb_filename',
+ type=str,
+ metavar='FILENAME',
+ default=f'{os.environ["HOME"]}/.python_unittest_performance_db',
+ help='File in which to store perf data (iff --unittests_persistance_strategy is FILE)'
+)
+cfg.add_argument(
+ '--unittests_perfdb_spec',
+ type=str,
+ metavar='DBSPEC',
+ default='mariadb+pymysql://python_unittest:<PASSWORD>@db.house:3306/python_unittest_performance',
+ help='Db connection spec for perf data (iff --unittest_persistance_strategy is DATABASE)'
+)
# >>> This is the hacky business, FYI. <<<
unittest.main = bootstrap.initialize(unittest.main)
-_db = '/home/scott/.python_unittest_performance_db'
+class PerfRegressionDataPersister(ABC):
+ def __init__(self):
+ pass
+
+ @abstractmethod
+ def load_performance_data(self) -> Dict[str, List[float]]:
+ pass
+
+ @abstractmethod
+ def save_performance_data(self, method_id: str, data: Dict[str, List[float]]):
+ pass
+
+ @abstractmethod
+ def delete_performance_data(self, method_id: str):
+ pass
+
+
+class FileBasedPerfRegressionDataPersister(PerfRegressionDataPersister):
+ def __init__(self, filename: str):
+ self.filename = filename
+ self.traces_to_delete = []
+
+ def load_performance_data(self, method_id: str) -> Dict[str, List[float]]:
+ with open(self.filename, 'rb') as f:
+ return pickle.load(f)
+
+ def save_performance_data(self, method_id: str, data: Dict[str, List[float]]):
+ for trace in self.traces_to_delete:
+ if trace in data:
+ data[trace] = []
+
+ with open(self.filename, 'wb') as f:
+ pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
+
+ def delete_performance_data(self, method_id: str):
+ self.traces_to_delete.append(method_id)
+
+
+class DatabasePerfRegressionDataPersister(PerfRegressionDataPersister):
+ def __init__(self, dbspec: str):
+ self.dbspec = dbspec
+ self.engine = sa.create_engine(self.dbspec)
+ self.conn = self.engine.connect()
+
+ def load_performance_data(self, method_id: str) -> Dict[str, List[float]]:
+ results = self.conn.execute(
+ sa.text(
+ f'SELECT * FROM runtimes_by_function WHERE function = "{method_id}";'
+ )
+ )
+ ret = {method_id: []}
+ for result in results.all():
+ ret[method_id].append(result['runtime'])
+ results.close()
+ return ret
+
+ def save_performance_data(self, method_id: str, data: Dict[str, List[float]]):
+ self.delete_performance_data(method_id)
+ for (method_id, perf_data) in data.items():
+ sql = 'INSERT INTO runtimes_by_function (function, runtime) VALUES '
+ for perf in perf_data:
+ self.conn.execute(sql + f'("{method_id}", {perf});')
+
+ def delete_performance_data(self, method_id: str):
+ sql = f'DELETE FROM runtimes_by_function WHERE function = "{method_id}"'
+ self.conn.execute(sql)
+
+
+def function_identifier(f: Callable) -> str:
+ """
+ Given a callable function, return a string that identifies it.
+ Usually that string is just __module__:__name__ but there's a
+ corner case: when __module__ is __main__ (i.e. the callable is
+ defined in the same module as __main__). In this case,
+ f.__module__ returns "__main__" instead of the file that it is
+ defined in. Work around this using pathlib.Path (see below).
+
+ >>> function_identifier(function_identifier)
+ 'unittest_utils:function_identifier'
+
+ """
+ if f.__module__ == '__main__':
+ from pathlib import Path
+ import __main__
+ module = __main__.__file__
+ module = Path(module).stem
+ return f'{module}:{f.__name__}'
+ else:
+ return f'{f.__module__}:{f.__name__}'
def check_method_for_perf_regressions(func: Callable) -> Callable:
message if it has become too slow.
"""
- def load_known_test_performance_characteristics():
- with open(_db, 'rb') as f:
- return pickle.load(f)
-
- def save_known_test_performance_characteristics(perfdb):
- with open(_db, 'wb') as f:
- pickle.dump(perfdb, f, pickle.HIGHEST_PROTOCOL)
-
@functools.wraps(func)
def wrapper_perf_monitor(*args, **kwargs):
+ if config.config['unittests_persistance_strategy'] == 'FILE':
+ filename = config.config['unittests_perfdb_filename']
+ helper = FileBasedPerfRegressionDataPersister(filename)
+ elif config.config['unittests_persistance_strategy'] == 'DATABASE':
+ dbspec = config.config['unittests_perfdb_spec']
+ dbspec = dbspec.replace('<PASSWORD>', scott_secrets.MARIADB_UNITTEST_PERF_PASSWORD)
+ helper = DatabasePerfRegressionDataPersister(dbspec)
+ else:
+ raise Exception(
+ 'Unknown/unexpected --unittests_persistance_strategy value'
+ )
+
+ logger.debug(f'Watching {func.__name__}\'s performance...')
+ func_id = function_identifier(func)
+ logger.debug(f'Canonical function identifier = {func_id}')
+
try:
- perfdb = load_known_test_performance_characteristics()
+ perfdb = helper.load_performance_data(func_id)
except Exception as e:
logger.exception(e)
- msg = f'Unable to load perfdb from {_db}'
+ msg = 'Unable to load perfdb; skipping it...'
logger.warning(msg)
+ warnings.warn(msg)
perfdb = {}
- # This is a unique identifier for a test: filepath!function
- logger.debug(f'Watching {func.__name__}\'s performance...')
- func_id = f'{func.__globals__["__file__"]}!{func.__name__}'
- logger.debug(f'Canonical function identifier = {func_id}')
-
# cmdline arg to forget perf traces for function
drop_id = config.config['unittests_drop_perf_traces']
if drop_id is not None:
- if drop_id in perfdb:
- perfdb[drop_id] = []
+ helper.delete_performance_data(drop_id)
# Run the wrapped test paying attention to latency.
start_time = time.perf_counter()
value = func(*args, **kwargs)
end_time = time.perf_counter()
run_time = end_time - start_time
- logger.debug(f'{func.__name__} executed in {run_time:f}s.')
- # Check the db; see if it was unexpectedly slow.
+ # See if it was unexpectedly slow.
hist = perfdb.get(func_id, [])
if len(hist) < config.config['unittests_num_perf_samples']:
hist.append(run_time)
)
else:
stdev = statistics.stdev(hist)
- limit = hist[-1] + stdev * 5
+ logger.debug(f'For {func.__name__}, performance stdev={stdev}')
+ slowest = hist[-1]
+ logger.debug(f'For {func.__name__}, slowest perf on record is {slowest:f}s')
+ limit = slowest + stdev * 4
+ logger.debug(
+ f'For {func.__name__}, max acceptable runtime is {limit:f}s'
+ )
logger.debug(
- f'Max acceptable performace for {func.__name__} is {limit:f}s'
+ f'For {func.__name__}, actual observed runtime was {run_time:f}s'
)
if (
run_time > limit and
hist = random.sample(hist, n)
hist.sort()
perfdb[func_id] = hist
- save_known_test_performance_characteristics(perfdb)
+ helper.save_performance_data(func_id, perfdb)
return value
return wrapper_perf_monitor