From ff0c10dff77a141bfebcce592eef34d6b065bfa9 Mon Sep 17 00:00:00 2001 From: Scott Date: Mon, 17 Jan 2022 15:54:14 -0800 Subject: [PATCH] Make unittest_utils log its perf data to a database, optionally. --- unittest_utils.py | 173 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 147 insertions(+), 26 deletions(-) diff --git a/unittest_utils.py b/unittest_utils.py index 8c9a120..584eb3c 100644 --- a/unittest_utils.py +++ b/unittest_utils.py @@ -7,20 +7,26 @@ caveat emptor. """ +from abc import ABC, abstractmethod import contextlib import functools import inspect import logging +import os import pickle import random import statistics import time import tempfile -from typing import Callable +from typing import Callable, Dict, List import unittest +import warnings import bootstrap import config +import scott_secrets + +import sqlalchemy as sa logger = logging.getLogger(__name__) @@ -36,7 +42,7 @@ cfg.add_argument( cfg.add_argument( '--unittests_num_perf_samples', type=int, - default=20, + default=50, help='The count of perf timing samples we need to see before blocking slow runs on perf grounds' ) cfg.add_argument( @@ -46,13 +52,120 @@ cfg.add_argument( default=None, help='The identifier (i.e. file!test_fixture) for which we should drop all perf data' ) - +cfg.add_argument( + '--unittests_persistance_strategy', + choices=['FILE', 'DATABASE'], + default='DATABASE', + help='Should we persist perf data in a file or db?' +) +cfg.add_argument( + '--unittests_perfdb_filename', + type=str, + metavar='FILENAME', + default=f'{os.environ["HOME"]}/.python_unittest_performance_db', + help='File in which to store perf data (iff --unittests_persistance_strategy is FILE)' +) +cfg.add_argument( + '--unittests_perfdb_spec', + type=str, + metavar='DBSPEC', + default='mariadb+pymysql://python_unittest:@db.house:3306/python_unittest_performance', + help='Db connection spec for perf data (iff --unittest_persistance_strategy is DATABASE)' +) # >>> This is the hacky business, FYI. <<< unittest.main = bootstrap.initialize(unittest.main) -_db = '/home/scott/.python_unittest_performance_db' +class PerfRegressionDataPersister(ABC): + def __init__(self): + pass + + @abstractmethod + def load_performance_data(self) -> Dict[str, List[float]]: + pass + + @abstractmethod + def save_performance_data(self, method_id: str, data: Dict[str, List[float]]): + pass + + @abstractmethod + def delete_performance_data(self, method_id: str): + pass + + +class FileBasedPerfRegressionDataPersister(PerfRegressionDataPersister): + def __init__(self, filename: str): + self.filename = filename + self.traces_to_delete = [] + + def load_performance_data(self, method_id: str) -> Dict[str, List[float]]: + with open(self.filename, 'rb') as f: + return pickle.load(f) + + def save_performance_data(self, method_id: str, data: Dict[str, List[float]]): + for trace in self.traces_to_delete: + if trace in data: + data[trace] = [] + + with open(self.filename, 'wb') as f: + pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) + + def delete_performance_data(self, method_id: str): + self.traces_to_delete.append(method_id) + + +class DatabasePerfRegressionDataPersister(PerfRegressionDataPersister): + def __init__(self, dbspec: str): + self.dbspec = dbspec + self.engine = sa.create_engine(self.dbspec) + self.conn = self.engine.connect() + + def load_performance_data(self, method_id: str) -> Dict[str, List[float]]: + results = self.conn.execute( + sa.text( + f'SELECT * FROM runtimes_by_function WHERE function = "{method_id}";' + ) + ) + ret = {method_id: []} + for result in results.all(): + ret[method_id].append(result['runtime']) + results.close() + return ret + + def save_performance_data(self, method_id: str, data: Dict[str, List[float]]): + self.delete_performance_data(method_id) + for (method_id, perf_data) in data.items(): + sql = 'INSERT INTO runtimes_by_function (function, runtime) VALUES ' + for perf in perf_data: + self.conn.execute(sql + f'("{method_id}", {perf});') + + def delete_performance_data(self, method_id: str): + sql = f'DELETE FROM runtimes_by_function WHERE function = "{method_id}"' + self.conn.execute(sql) + + +def function_identifier(f: Callable) -> str: + """ + Given a callable function, return a string that identifies it. + Usually that string is just __module__:__name__ but there's a + corner case: when __module__ is __main__ (i.e. the callable is + defined in the same module as __main__). In this case, + f.__module__ returns "__main__" instead of the file that it is + defined in. Work around this using pathlib.Path (see below). + + >>> function_identifier(function_identifier) + 'unittest_utils:function_identifier' + + """ + if f.__module__ == '__main__': + from pathlib import Path + import __main__ + module = __main__.__file__ + module = Path(module).stem + return f'{module}:{f.__name__}' + else: + return f'{f.__module__}:{f.__name__}' def check_method_for_perf_regressions(func: Callable) -> Callable: @@ -64,43 +177,45 @@ def check_method_for_perf_regressions(func: Callable) -> Callable: message if it has become too slow. """ - def load_known_test_performance_characteristics(): - with open(_db, 'rb') as f: - return pickle.load(f) - - def save_known_test_performance_characteristics(perfdb): - with open(_db, 'wb') as f: - pickle.dump(perfdb, f, pickle.HIGHEST_PROTOCOL) - @functools.wraps(func) def wrapper_perf_monitor(*args, **kwargs): + if config.config['unittests_persistance_strategy'] == 'FILE': + filename = config.config['unittests_perfdb_filename'] + helper = FileBasedPerfRegressionDataPersister(filename) + elif config.config['unittests_persistance_strategy'] == 'DATABASE': + dbspec = config.config['unittests_perfdb_spec'] + dbspec = dbspec.replace('', scott_secrets.MARIADB_UNITTEST_PERF_PASSWORD) + helper = DatabasePerfRegressionDataPersister(dbspec) + else: + raise Exception( + 'Unknown/unexpected --unittests_persistance_strategy value' + ) + + logger.debug(f'Watching {func.__name__}\'s performance...') + func_id = function_identifier(func) + logger.debug(f'Canonical function identifier = {func_id}') + try: - perfdb = load_known_test_performance_characteristics() + perfdb = helper.load_performance_data(func_id) except Exception as e: logger.exception(e) - msg = f'Unable to load perfdb from {_db}' + msg = 'Unable to load perfdb; skipping it...' logger.warning(msg) + warnings.warn(msg) perfdb = {} - # This is a unique identifier for a test: filepath!function - logger.debug(f'Watching {func.__name__}\'s performance...') - func_id = f'{func.__globals__["__file__"]}!{func.__name__}' - logger.debug(f'Canonical function identifier = {func_id}') - # cmdline arg to forget perf traces for function drop_id = config.config['unittests_drop_perf_traces'] if drop_id is not None: - if drop_id in perfdb: - perfdb[drop_id] = [] + helper.delete_performance_data(drop_id) # Run the wrapped test paying attention to latency. start_time = time.perf_counter() value = func(*args, **kwargs) end_time = time.perf_counter() run_time = end_time - start_time - logger.debug(f'{func.__name__} executed in {run_time:f}s.') - # Check the db; see if it was unexpectedly slow. + # See if it was unexpectedly slow. hist = perfdb.get(func_id, []) if len(hist) < config.config['unittests_num_perf_samples']: hist.append(run_time) @@ -109,9 +224,15 @@ def check_method_for_perf_regressions(func: Callable) -> Callable: ) else: stdev = statistics.stdev(hist) - limit = hist[-1] + stdev * 5 + logger.debug(f'For {func.__name__}, performance stdev={stdev}') + slowest = hist[-1] + logger.debug(f'For {func.__name__}, slowest perf on record is {slowest:f}s') + limit = slowest + stdev * 4 + logger.debug( + f'For {func.__name__}, max acceptable runtime is {limit:f}s' + ) logger.debug( - f'Max acceptable performace for {func.__name__} is {limit:f}s' + f'For {func.__name__}, actual observed runtime was {run_time:f}s' ) if ( run_time > limit and @@ -135,7 +256,7 @@ Here is the current, full db perf timing distribution: hist = random.sample(hist, n) hist.sort() perfdb[func_id] = hist - save_known_test_performance_characteristics(perfdb) + helper.save_performance_data(func_id, perfdb) return value return wrapper_perf_monitor -- 2.45.2