From b10d30a46e601c9ee1f843241f2d69a1f90f7a94 Mon Sep 17 00:00:00 2001 From: Scott Gasch Date: Tue, 7 Sep 2021 22:20:40 -0700 Subject: [PATCH] Various changes. --- bootstrap.py | 4 +- config.py | 17 +- dateparse/dateparse_utils.py | 2 + executors.py | 137 ++++++------- histogram.py | 6 +- logging_utils.py | 2 +- presence.py | 8 +- smart_future.py | 10 + string_utils.py | 36 +++- tests/ansi_test.py | 19 -- tests/dateparse_utils_test.py | 362 ++++++++++++++++++---------------- tests/parallelize_test.py | 30 +++ tests/run_all_tests.sh | 6 +- text_utils.py | 13 ++ 14 files changed, 374 insertions(+), 278 deletions(-) delete mode 100755 tests/ansi_test.py diff --git a/bootstrap.py b/bootstrap.py index 3489b8a..7f63dbb 100644 --- a/bootstrap.py +++ b/bootstrap.py @@ -121,8 +121,8 @@ def initialize(entry_point): f'child system: {cstime}s\n' f'machine uptime: {elapsed_time}s\n' f'walltime: {walltime}s') - if ret != 0: - logger.info(f'Exit {ret}') + if ret is not None and ret != 0: + logger.error(f'Exit {ret}') else: logger.debug(f'Exit {ret}') sys.exit(ret) diff --git a/config.py b/config.py index cf6d5a1..574449f 100644 --- a/config.py +++ b/config.py @@ -73,10 +73,6 @@ from typing import Any, Dict, List, Optional # This module is commonly used by others in here and should avoid # taking any unnecessary dependencies back on them. -# Note: at this point in time, logging hasn't been configured and -# anything we log will come out the root logger. -logger = logging.getLogger(__name__) - # Defer logging messages until later when logging has been initialized. saved_messages: List[str] = [] @@ -84,12 +80,13 @@ saved_messages: List[str] = [] program_name = os.path.basename(sys.argv[0]) original_argv = [arg for arg in sys.argv] + # A global parser that we will collect arguments into. args = argparse.ArgumentParser( description=None, formatter_class=argparse.ArgumentDefaultsHelpFormatter, fromfile_prefix_chars="@", - epilog=f'-----------------------------------------------------------------------------\n{program_name} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.\n-----------------------------------------------------------------------------' + epilog=f'------------------------------------------------------------------------------\n{program_name} uses config.py ({__file__}) for global, cross-module configuration setup and parsing.\n------------------------------------------------------------------------------' ) # Keep track of if we've been called and prevent being called more @@ -116,7 +113,7 @@ group.add_argument( '--config_loadfile', metavar='FILENAME', default=None, - help='Config file from which to read args in lieu or in addition to commandline.', + help='Config file (populated via --config_savefile) from which to read args in lieu or in addition to commandline.', ) group.add_argument( '--config_dump', @@ -129,7 +126,7 @@ group.add_argument( type=str, metavar='FILENAME', default=None, - help='Populate config file compatible --config_loadfile to save config for later use.', + help='Populate config file compatible with --config_loadfile to save config for later use.', ) @@ -145,7 +142,6 @@ def parse(entry_module: Optional[str]) -> Dict[str, Any]: """Main program should call this early in main()""" global config_parse_called if config_parse_called: - logger.warning('config.parse has already been called; ignoring spurious invocation') return config global saved_messages @@ -224,11 +220,11 @@ def parse(entry_module: Optional[str]) -> Dict[str, Any]: if loadfile is not None: if saw_other_args: - msg = f'WARNING: Augmenting commandline arguments with those from {loadfile}.' + msg = f'Augmenting commandline arguments with those from {loadfile}.' print(msg, file=sys.stderr) saved_messages.append(msg) if not os.path.exists(loadfile): - print(f'--config_loadfile argument must be a file, {loadfile} not found.', + print(f'ERROR: --config_loadfile argument must be a file, {loadfile} not found.', file=sys.stderr) sys.exit(-1) with open(loadfile, 'r') as rf: @@ -273,6 +269,7 @@ def dump_config(): """Print the current config to stdout.""" print("Global Configuration:", file=sys.stderr) pprint.pprint(config, stream=sys.stderr) + print() def late_logging(): diff --git a/dateparse/dateparse_utils.py b/dateparse/dateparse_utils.py index cd8bc35..e5e7e76 100755 --- a/dateparse/dateparse_utils.py +++ b/dateparse/dateparse_utils.py @@ -194,6 +194,8 @@ class DateParser(dateparse_utilsListener): This is the main entrypoint to this class for caller code. """ + date_string = date_string.strip() + date_string = re.sub('\s+', ' ', date_string) self._reset() listener = RaisingErrorListener() input_stream = antlr4.InputStream(date_string) diff --git a/executors.py b/executors.py index 2f4cf83..63efd81 100644 --- a/executors.py +++ b/executors.py @@ -23,7 +23,7 @@ import argparse_utils import config from exec_utils import run_silently, cmd_in_background from decorator_utils import singleton -import histogram +import histogram as hist logger = logging.getLogger(__name__) @@ -61,33 +61,6 @@ parser.add_argument( RSYNC = 'rsync -q --no-motd -W --ignore-existing --timeout=60 --size-only -z' SSH = 'ssh -oForwardX11=no' -HIST = histogram.SimpleHistogram( - histogram.SimpleHistogram.n_evenly_spaced_buckets( - int(0), int(500), 25 - ) -) - - -def run_local_bundle(fun, *args, **kwargs): - logger.debug(f"Running local bundle at {fun.__name__}") - start = time.time() - result = fun(*args, **kwargs) - end = time.time() - duration = end - start - logger.debug(f"{fun.__name__} finished; used {duration:.1f}s") - HIST.add_item(duration) - return result - - -def run_cloud_pickle(pickle): - fun, args, kwargs = cloudpickle.loads(pickle) - logger.debug(f"Running pickled bundle at {fun.__name__}") - start = time.time() - result = fun(*args, **kwargs) - end = time.time() - duration = end - start - logger.debug(f"{fun.__name__} finished; used {duration:.1f}s") - return result def make_cloud_pickle(fun, *args, **kwargs): @@ -96,8 +69,14 @@ def make_cloud_pickle(fun, *args, **kwargs): class BaseExecutor(ABC): - def __init__(self): - pass + def __init__(self, *, title=''): + self.title = title + self.task_count = 0 + self.histogram = hist.SimpleHistogram( + hist.SimpleHistogram.n_evenly_spaced_buckets( + int(0), int(500), 50 + ) + ) @abstractmethod def submit(self, @@ -111,6 +90,10 @@ class BaseExecutor(ABC): wait: bool = True) -> None: pass + def adjust_task_count(self, delta: int) -> None: + self.task_count += delta + logger.debug(f'Executor current task count is {self.task_count}') + class ThreadExecutor(BaseExecutor): def __init__(self, @@ -126,29 +109,36 @@ class ThreadExecutor(BaseExecutor): max_workers=workers, thread_name_prefix="thread_executor_helper" ) - self.job_count = 0 + + def run_local_bundle(self, fun, *args, **kwargs): + logger.debug(f"Running local bundle at {fun.__name__}") + start = time.time() + result = fun(*args, **kwargs) + end = time.time() + self.adjust_task_count(-1) + duration = end - start + logger.debug(f"{fun.__name__} finished; used {duration:.1f}s") + self.histogram.add_item(duration) + return result def submit(self, function: Callable, *args, **kwargs) -> fut.Future: - self.job_count += 1 - logger.debug( - f'Submitted work to threadpool; there are now {self.job_count} items.' - ) + self.adjust_task_count(+1) newargs = [] newargs.append(function) for arg in args: newargs.append(arg) return self._thread_pool_executor.submit( - run_local_bundle, + self.run_local_bundle, *newargs, **kwargs) def shutdown(self, wait = True) -> None: - logger.debug("Shutting down threadpool executor.") - print(HIST) + logger.debug(f'Shutting down threadpool executor {self.title}') + print(self.histogram) self._thread_pool_executor.shutdown(wait) @@ -165,24 +155,41 @@ class ProcessExecutor(BaseExecutor): self._process_executor = fut.ProcessPoolExecutor( max_workers=workers, ) - self.job_count = 0 + + def run_cloud_pickle(self, pickle): + fun, args, kwargs = cloudpickle.loads(pickle) + logger.debug(f"Running pickled bundle at {fun.__name__}") + result = fun(*args, **kwargs) + self.adjust_task_count(-1) + return result def submit(self, function: Callable, *args, **kwargs) -> fut.Future: - # Bundle it up before submitting because pickle sucks. + start = time.time() + self.adjust_task_count(+1) pickle = make_cloud_pickle(function, *args, **kwargs) - self.job_count += 1 - logger.debug( - f'Submitting work to processpool executor; there are now {self.job_count} items.' + result = self._process_executor.submit( + self.run_cloud_pickle, + pickle + ) + result.add_done_callback( + lambda _: self.histogram.add_item( + time.time() - start + ) ) - return self._process_executor.submit(run_cloud_pickle, pickle) + return result def shutdown(self, wait=True) -> None: - logger.debug('Shutting down processpool executor') - print(HIST) + logger.debug(f'Shutting down processpool executor {self.title}') self._process_executor.shutdown(wait) + print(self.histogram) + + def __getstate__(self): + state = self.__dict__.copy() + state['_process_executor'] = None + return state @dataclass @@ -565,6 +572,7 @@ class RemoteExecutor(BaseExecutor): def launch(self, bundle: BundleDetails) -> Any: """Find a worker for bundle or block until one is available.""" + self.adjust_task_count(+1) uuid = bundle.uuid hostname = bundle.hostname avoid_machine = None @@ -648,6 +656,7 @@ class RemoteExecutor(BaseExecutor): # Whether original or backup, if we finished first we must # fetch the results if the computation happened on a # remote machine. + bundle.end_ts = time.time() if not was_cancelled: assert bundle.machine is not None if bundle.hostname not in bundle.machine: @@ -658,31 +667,24 @@ class RemoteExecutor(BaseExecutor): try: run_silently(cmd) except subprocess.CalledProcessError: - pass + logger.critical(f'Failed to copy {username}@{machine}:{result_file}!') run_silently(f'{SSH} {username}@{machine}' f' "/bin/rm -f {code_file} {result_file}"') - bundle.end_ts = time.time() + dur = bundle.end_ts - bundle.start_ts + self.histogram.add_item(dur) assert bundle.worker is not None self.status.record_release_worker_already_locked( bundle.worker, bundle.uuid, was_cancelled ) - if not was_cancelled: - dur = bundle.end_ts - bundle.start_ts - HIST.add_item(dur) - - # Original or not, the results should be back on the local - # machine. Are they? - if not os.path.exists(result_file): - msg = f'{result_file} unexpectedly missing, wtf?!' - logger.critical(msg) - bundle.failure_count += 1 - self.release_worker(bundle.worker) - raise Exception(msg) # Only the original worker should unpickle the file contents - # though since it's the only one whose result matters. + # though since it's the only one whose result matters. The + # original is also the only job that may delete result_file + # from disk. Note that the original may have been cancelled + # if one of the backups finished first; it still must read the + # result from disk. if is_original: logger.debug(f"Unpickling {result_file}.") try: @@ -709,11 +711,11 @@ class RemoteExecutor(BaseExecutor): ) backup.is_cancelled.set() - # This is a backup. + # This is a backup job. else: # Backup results don't matter, they just need to leave the - # result file in the right place for their original to - # read later. + # result file in the right place for their originals to + # read/unpickle later. result = None # Tell the original to stop if we finished first. @@ -725,6 +727,7 @@ class RemoteExecutor(BaseExecutor): assert bundle.worker is not None self.release_worker(bundle.worker) + self.adjust_task_count(-1) return result def create_original_bundle(self, pickle): @@ -811,14 +814,12 @@ class RemoteExecutor(BaseExecutor): pickle = make_cloud_pickle(function, *args, **kwargs) bundle = self.create_original_bundle(pickle) self.total_bundles_submitted += 1 - logger.debug( - f'Submitted work to remote executor; {self.total_bundles_submitted} items now submitted' - ) return self._helper_executor.submit(self.launch, bundle) def shutdown(self, wait=True) -> None: self._helper_executor.shutdown(wait) - print(HIST) + logging.debug(f'Shutting down RemoteExecutor {self.title}') + print(self.histogram) @singleton diff --git a/histogram.py b/histogram.py index 0368376..3391b0b 100644 --- a/histogram.py +++ b/histogram.py @@ -69,13 +69,15 @@ class SimpleHistogram(Generic[T]): def __repr__(self, label_formatter='%10s') -> str: from text_utils import bar_graph + max_population: Optional[int] = None for bucket in self.buckets: pop = self.buckets[bucket] if pop > 0: - last_bucket_start = bucket[0] + last_bucket_start = bucket[0] # beginning of range if max_population is None or pop > max_population: - max_population = pop + max_population = pop # bucket with max items + txt = "" if max_population is None: return txt diff --git a/logging_utils.py b/logging_utils.py index a0131b1..25919a7 100644 --- a/logging_utils.py +++ b/logging_utils.py @@ -95,7 +95,7 @@ cfg.add_argument( help='logging.info also prints to stdout.' ) -# See also: OutputMultiplexer/OutputContext +# See also: OutputMultiplexer cfg.add_argument( '--logging_captures_prints', action=argparse_utils.ActionNoYes, diff --git a/presence.py b/presence.py index c697124..947ff08 100755 --- a/presence.py +++ b/presence.py @@ -48,10 +48,10 @@ class PresenceDetection(object): # Note: list most important devices first. self.devices_by_person: Dict[Person, List[str]] = { Person.SCOTT: [ - "3C:28:6D:10:6D:41", - "D4:61:2E:88:18:09", - "6C:40:08:AE:DC:2E", - "14:7D:DA:6A:20:D7", + "3C:28:6D:10:6D:41", # pixel3 + "6C:40:08:AE:DC:2E", # laptop +# "D4:61:2E:88:18:09", # watch +# "14:7D:DA:6A:20:D7", # work laptop ], Person.LYNN: [ "08:CC:27:63:26:14", diff --git a/smart_future.py b/smart_future.py index 1c95973..7dbec50 100644 --- a/smart_future.py +++ b/smart_future.py @@ -36,6 +36,16 @@ def wait_any(futures: List[SmartFuture], *, callback: Callable = None): return +def wait_all(futures: List[SmartFuture]) -> None: + done_set = set() + while len(done_set) < len(futures): + for future in futures: + i = future.get_id() + if i not in done_set and future.wrapped_future.done(): + done_set.add(i) + time.sleep(0.1) + + class SmartFuture(DeferredOperand): """This is a SmartFuture, a class that wraps a normal Future and can then be used, mostly, like a normal (non-Future) identifier. diff --git a/string_utils.py b/string_utils.py index 45cf5aa..bca2b70 100644 --- a/string_utils.py +++ b/string_utils.py @@ -9,7 +9,7 @@ import logging import random import re import string -from typing import Any, Callable, Iterable, List, Optional +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple import unicodedata from uuid import uuid4 @@ -981,3 +981,37 @@ def bigrams(txt: str): def trigrams(txt: str): return ngrams(txt, 3) + + +def shuffle_columns( + txt: Iterable[str], + specs: Iterable[Iterable[int]], + delim='' +) -> Iterable[str]: + out = [] + for spec in specs: + chunk = '' + for n in spec: + chunk = chunk + delim + txt[n] + chunk = chunk.strip(delim) + out.append(chunk) + return out + + +def shuffle_columns_into_dict( + txt: Iterable[str], + specs: Iterable[Tuple[str, Iterable[int]]], + delim='' +) -> Dict[str, str]: + out = {} + for spec in specs: + chunk = '' + for n in spec[1]: + chunk = chunk + delim + txt[n] + chunk = chunk.strip(delim) + out[spec[0]] = chunk + return out + + +def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str: + return sprintf(txt.format(**values), end='') diff --git a/tests/ansi_test.py b/tests/ansi_test.py deleted file mode 100755 index 4c1f449..0000000 --- a/tests/ansi_test.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python3 - -import unittest - -import ansi -import unittest_utils as uu - - -class TestAnsi(unittest.TestCase): - - def test_colorizer(self): - with ansi.Colorizer() as c: - print("testing...") - print("Section:") - print(" This is some detail.") - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/dateparse_utils_test.py b/tests/dateparse_utils_test.py index ff16e01..3b3b802 100755 --- a/tests/dateparse_utils_test.py +++ b/tests/dateparse_utils_test.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 import datetime +import random +import re import unittest import pytz @@ -9,6 +11,176 @@ import dateparse.dateparse_utils as du import unittest_utils as uu +parsable_expressions = [ + ('today', + datetime.datetime(2021, 7, 2)), + ('tomorrow', + datetime.datetime(2021, 7, 3)), + ('yesterday', + datetime.datetime(2021, 7, 1)), + ('21:30', + datetime.datetime(2021, 7, 2, 21, 30, 0, 0)), + ('12:01am', + datetime.datetime(2021, 7, 2, 0, 1, 0, 0)), + ('12:02p', + datetime.datetime(2021, 7, 2, 12, 2, 0, 0)), + ('0:03', + datetime.datetime(2021, 7, 2, 0, 3, 0, 0)), + ('last wednesday', + datetime.datetime(2021, 6, 30)), + ('this wed', + datetime.datetime(2021, 7, 7)), + ('next wed', + datetime.datetime(2021, 7, 14)), + ('this coming tues', + datetime.datetime(2021, 7, 6)), + ('this past monday', + datetime.datetime(2021, 6, 28)), + ('4 days ago', + datetime.datetime(2021, 6, 28)), + ('4 mondays ago', + datetime.datetime(2021, 6, 7)), + ('4 months ago', + datetime.datetime(2021, 3, 2)), + ('3 days back', + datetime.datetime(2021, 6, 29)), + ('13 weeks from now', + datetime.datetime(2021, 10, 1)), + ('1 year from now', + datetime.datetime(2022, 7, 2)), + ('4 weeks from now', + datetime.datetime(2021, 7, 30)), + ('3 saturdays ago', + datetime.datetime(2021, 6, 12)), + ('4 months from today', + datetime.datetime(2021, 11, 2)), + ('4 years from yesterday', + datetime.datetime(2025, 7, 1)), + ('4 weeks from tomorrow', + datetime.datetime(2021, 7, 31)), + ('april 15, 2005', + datetime.datetime(2005, 4, 15)), + ('april 14', + datetime.datetime(2021, 4, 14)), + ('9:30am on last wednesday', + datetime.datetime(2021, 6, 30, 9, 30)), + ('2005/apr/15', + datetime.datetime(2005, 4, 15)), + ('2005 apr 15', + datetime.datetime(2005, 4, 15)), + ('the 1st wednesday in may', + datetime.datetime(2021, 5, 5)), + ('last sun of june', + datetime.datetime(2021, 6, 27)), + ('this Easter', + datetime.datetime(2021, 4, 4)), + ('last christmas', + datetime.datetime(2020, 12, 25)), + ('last Xmas', + datetime.datetime(2020, 12, 25)), + ('xmas, 1999', + datetime.datetime(1999, 12, 25)), + ('next mlk day', + datetime.datetime(2022, 1, 17)), + ('Halloween, 2020', + datetime.datetime(2020, 10, 31)), + ('5 work days after independence day', + datetime.datetime(2021, 7, 12)), + ('50 working days from last wed', + datetime.datetime(2021, 9, 10)), + ('25 working days before columbus day', + datetime.datetime(2021, 9, 3)), + ('today +1 week', + datetime.datetime(2021, 7, 9)), + ('sunday -3 weeks', + datetime.datetime(2021, 6, 13)), + ('4 weeks before xmas, 1999', + datetime.datetime(1999, 11, 27)), + ('3 days before new years eve, 2000', + datetime.datetime(2000, 12, 28)), + ('july 4th', + datetime.datetime(2021, 7, 4)), + ('the ides of march', + datetime.datetime(2021, 3, 15)), + ('the nones of april', + datetime.datetime(2021, 4, 5)), + ('the kalends of may', + datetime.datetime(2021, 5, 1)), + ('9/11/2001', + datetime.datetime(2001, 9, 11)), + ('4 sundays before veterans\' day', + datetime.datetime(2021, 10, 17)), + ('xmas eve', + datetime.datetime(2021, 12, 24)), + ('this friday at 5pm', + datetime.datetime(2021, 7, 9, 17, 0, 0)), + ('presidents day', + datetime.datetime(2021, 2, 15)), + ('memorial day, 1921', + datetime.datetime(1921, 5, 30)), + ('today -4 wednesdays', + datetime.datetime(2021, 6, 9)), + ('thanksgiving', + datetime.datetime(2021, 11, 25)), + ('2 sun in jun', + datetime.datetime(2021, 6, 13)), + ('easter -40 days', + datetime.datetime(2021, 2, 23)), + ('easter +39 days', + datetime.datetime(2021, 5, 13)), + ('1st tuesday in nov, 2024', + datetime.datetime(2024, 11, 5)), + ('2 days before last xmas at 3:14:15.92a', + datetime.datetime(2020, 12, 23, 3, 14, 15, 92)), + ('3 weeks after xmas, 1995 at midday', + datetime.datetime(1996, 1, 15, 12, 0, 0)), + ('4 months before easter, 1992 at midnight', + datetime.datetime(1991, 12, 19)), + ('5 months before halloween, 1995 at noon', + datetime.datetime(1995, 5, 31, 12)), + ('4 days before last wednesday', + datetime.datetime(2021, 6, 26)), + ('44 months after today', + datetime.datetime(2025, 3, 2)), + ('44 years before today', + datetime.datetime(1977, 7, 2)), + ('44 weeks ago', + datetime.datetime(2020, 8, 28)), + ('15 minutes to 3am', + datetime.datetime(2021, 7, 2, 2, 45)), + ('quarter past 4pm', + datetime.datetime(2021, 7, 2, 16, 15)), + ('half past 9', + datetime.datetime(2021, 7, 2, 9, 30)), + ('4 seconds to midnight', + datetime.datetime(2021, 7, 1, 23, 59, 56)), + ('4 seconds to midnight, tomorrow', + datetime.datetime(2021, 7, 2, 23, 59, 56)), + ('2021/apr/15T21:30:44.55', + datetime.datetime(2021, 4, 15, 21, 30, 44, 55)), + ('2021/apr/15 at 21:30:44.55', + datetime.datetime(2021, 4, 15, 21, 30, 44, 55)), + ('2021/4/15 at 21:30:44.55', + datetime.datetime(2021, 4, 15, 21, 30, 44, 55)), + ('2021/04/15 at 21:30:44.55', + datetime.datetime(2021, 4, 15, 21, 30, 44, 55)), + ('2021/04/15 at 21:30:44.55Z', + datetime.datetime(2021, 4, 15, 21, 30, 44, 55, + tzinfo=pytz.timezone('UTC'))), + ('2021/04/15 at 21:30:44.55EST', + datetime.datetime(2021, 4, 15, 21, 30, 44, 55, + tzinfo=pytz.timezone('EST'))), + ('13 days after last memorial day at 12 seconds before 4pm', + datetime.datetime(2020, 6, 7, 15, 59, 48)), + (' 2 days before yesterday at 9am ', + datetime.datetime(2021, 6, 29, 9)), + ('-3 days before today', + datetime.datetime(2021, 7, 5)), + ('3 days before yesterday at midnight EST', + datetime.datetime(2021, 6, 28, tzinfo=pytz.timezone('EST'))) +] + + class TestDateparseUtils(unittest.TestCase): @uu.check_method_for_perf_regressions @@ -16,178 +188,30 @@ class TestDateparseUtils(unittest.TestCase): dp = du.DateParser( override_now_for_test_purposes = datetime.datetime(2021, 7, 2) ) - parsable_expressions = [ - ('today', - datetime.datetime(2021, 7, 2)), - ('tomorrow', - datetime.datetime(2021, 7, 3)), - ('yesterday', - datetime.datetime(2021, 7, 1)), - ('21:30', - datetime.datetime(2021, 7, 2, 21, 30, 0, 0)), - ('12:01am', - datetime.datetime(2021, 7, 2, 0, 1, 0, 0)), - ('12:02p', - datetime.datetime(2021, 7, 2, 12, 2, 0, 0)), - ('0:03', - datetime.datetime(2021, 7, 2, 0, 3, 0, 0)), - ('last wednesday', - datetime.datetime(2021, 6, 30)), - ('this wed', - datetime.datetime(2021, 7, 7)), - ('next wed', - datetime.datetime(2021, 7, 14)), - ('this coming tues', - datetime.datetime(2021, 7, 6)), - ('this past monday', - datetime.datetime(2021, 6, 28)), - ('4 days ago', - datetime.datetime(2021, 6, 28)), - ('4 mondays ago', - datetime.datetime(2021, 6, 7)), - ('4 months ago', - datetime.datetime(2021, 3, 2)), - ('3 days back', - datetime.datetime(2021, 6, 29)), - ('13 weeks from now', - datetime.datetime(2021, 10, 1)), - ('1 year from now', - datetime.datetime(2022, 7, 2)), - ('4 weeks from now', - datetime.datetime(2021, 7, 30)), - ('3 saturdays ago', - datetime.datetime(2021, 6, 12)), - ('4 months from today', - datetime.datetime(2021, 11, 2)), - ('4 years from yesterday', - datetime.datetime(2025, 7, 1)), - ('4 weeks from tomorrow', - datetime.datetime(2021, 7, 31)), - ('april 15, 2005', - datetime.datetime(2005, 4, 15)), - ('april 14', - datetime.datetime(2021, 4, 14)), - ('9:30am on last wednesday', - datetime.datetime(2021, 6, 30, 9, 30)), - ('2005/apr/15', - datetime.datetime(2005, 4, 15)), - ('2005 apr 15', - datetime.datetime(2005, 4, 15)), - ('the 1st wednesday in may', - datetime.datetime(2021, 5, 5)), - ('last sun of june', - datetime.datetime(2021, 6, 27)), - ('this Easter', - datetime.datetime(2021, 4, 4)), - ('last christmas', - datetime.datetime(2020, 12, 25)), - ('last Xmas', - datetime.datetime(2020, 12, 25)), - ('xmas, 1999', - datetime.datetime(1999, 12, 25)), - ('next mlk day', - datetime.datetime(2022, 1, 17)), - ('Halloween, 2020', - datetime.datetime(2020, 10, 31)), - ('5 work days after independence day', - datetime.datetime(2021, 7, 12)), - ('50 working days from last wed', - datetime.datetime(2021, 9, 10)), - ('25 working days before columbus day', - datetime.datetime(2021, 9, 3)), - ('today +1 week', - datetime.datetime(2021, 7, 9)), - ('sunday -3 weeks', - datetime.datetime(2021, 6, 13)), - ('4 weeks before xmas, 1999', - datetime.datetime(1999, 11, 27)), - ('3 days before new years eve, 2000', - datetime.datetime(2000, 12, 28)), - ('july 4th', - datetime.datetime(2021, 7, 4)), - ('the ides of march', - datetime.datetime(2021, 3, 15)), - ('the nones of april', - datetime.datetime(2021, 4, 5)), - ('the kalends of may', - datetime.datetime(2021, 5, 1)), - ('9/11/2001', - datetime.datetime(2001, 9, 11)), - ('4 sundays before veterans\' day', - datetime.datetime(2021, 10, 17)), - ('xmas eve', - datetime.datetime(2021, 12, 24)), - ('this friday at 5pm', - datetime.datetime(2021, 7, 9, 17, 0, 0)), - ('presidents day', - datetime.datetime(2021, 2, 15)), - ('memorial day, 1921', - datetime.datetime(1921, 5, 30)), - ('today -4 wednesdays', - datetime.datetime(2021, 6, 9)), - ('thanksgiving', - datetime.datetime(2021, 11, 25)), - ('2 sun in jun', - datetime.datetime(2021, 6, 13)), - ('easter -40 days', - datetime.datetime(2021, 2, 23)), - ('easter +39 days', - datetime.datetime(2021, 5, 13)), - ('1st tuesday in nov, 2024', - datetime.datetime(2024, 11, 5)), - ('2 days before last xmas at 3:14:15.92a', - datetime.datetime(2020, 12, 23, 3, 14, 15, 92)), - ('3 weeks after xmas, 1995 at midday', - datetime.datetime(1996, 1, 15, 12, 0, 0)), - ('4 months before easter, 1992 at midnight', - datetime.datetime(1991, 12, 19)), - ('5 months before halloween, 1995 at noon', - datetime.datetime(1995, 5, 31, 12)), - ('4 days before last wednesday', - datetime.datetime(2021, 6, 26)), - ('44 months after today', - datetime.datetime(2025, 3, 2)), - ('44 years before today', - datetime.datetime(1977, 7, 2)), - ('44 weeks ago', - datetime.datetime(2020, 8, 28)), - ('15 minutes to 3am', - datetime.datetime(2021, 7, 2, 2, 45)), - ('quarter past 4pm', - datetime.datetime(2021, 7, 2, 16, 15)), - ('half past 9', - datetime.datetime(2021, 7, 2, 9, 30)), - ('4 seconds to midnight', - datetime.datetime(2021, 7, 1, 23, 59, 56)), - ('4 seconds to midnight, tomorrow', - datetime.datetime(2021, 7, 2, 23, 59, 56)), - ('2021/apr/15T21:30:44.55', - datetime.datetime(2021, 4, 15, 21, 30, 44, 55)), - ('2021/apr/15 at 21:30:44.55', - datetime.datetime(2021, 4, 15, 21, 30, 44, 55)), - ('2021/4/15 at 21:30:44.55', - datetime.datetime(2021, 4, 15, 21, 30, 44, 55)), - ('2021/04/15 at 21:30:44.55', - datetime.datetime(2021, 4, 15, 21, 30, 44, 55)), - ('2021/04/15 at 21:30:44.55Z', - datetime.datetime(2021, 4, 15, 21, 30, 44, 55, - tzinfo=pytz.timezone('UTC'))), - ('2021/04/15 at 21:30:44.55EST', - datetime.datetime(2021, 4, 15, 21, 30, 44, 55, - tzinfo=pytz.timezone('EST'))), - ('13 days after last memorial day at 12 seconds before 4pm', - datetime.datetime(2020, 6, 7, 15, 59, 48)), - (' 2 days before yesterday at 9am ', - datetime.datetime(2021, 6, 29, 9)), - ('-3 days before today', - datetime.datetime(2021, 7, 5)), - ('3 days before yesterday at midnight EST', - datetime.datetime(2021, 6, 28, tzinfo=pytz.timezone('EST'))), - ] for (txt, expected_dt) in parsable_expressions: try: - print(f'> {txt}') + actual_dt = dp.parse(txt) + self.assertIsNotNone(actual_dt) + self.assertEqual( + actual_dt, + expected_dt, + f'"{txt}", got "{actual_dt}" while expecting "{expected_dt}"' + ) + except du.ParseException: + self.fail(f'Expected "{txt}" to parse successfully.') + + def test_whitespace_handling(self): + dp = du.DateParser( + override_now_for_test_purposes = datetime.datetime(2021, 7, 2) + ) + + for (txt, expected_dt) in parsable_expressions: + try: + txt = f' {txt} ' + i = random.randint(2, 5) + replacement = ' ' * i + txt = re.sub('\s', replacement, txt) actual_dt = dp.parse(txt) self.assertIsNotNone(actual_dt) self.assertEqual( diff --git a/tests/parallelize_test.py b/tests/parallelize_test.py index 44f723c..d87b5e7 100755 --- a/tests/parallelize_test.py +++ b/tests/parallelize_test.py @@ -8,6 +8,23 @@ import parallelize as p import decorator_utils import executors import math_utils +import smart_future + + +@p.parallelize(method=p.Method.THREAD) +def compute_factorial_thread(n): + total = 1 + for x in range(2, n): + total *= x + return total + + +@p.parallelize(method=p.Method.PROCESS) +def compute_factorial_process(n): + total = 1 + for x in range(2, n): + total *= x + return total @p.parallelize(method=p.Method.REMOTE) @@ -21,6 +38,19 @@ def list_primes(n): @decorator_utils.timed def driver() -> None: + results = [] + for _ in range(20): + results.append(compute_factorial_process(_)) + for future in smart_future.wait_any(results): + print(f'Process: {future}') + + results = [] + for _ in range(20): + results.append(compute_factorial_thread(_)) + smart_future.wait_all(results) + for future in results: + print(f'Thread: {future}') + results = {} for _ in range(50): n = random.randint(0, 100000) diff --git a/tests/run_all_tests.sh b/tests/run_all_tests.sh index 13aa2fb..c2f9f93 100755 --- a/tests/run_all_tests.sh +++ b/tests/run_all_tests.sh @@ -1,6 +1,8 @@ #!/bin/bash for test in $(ls *_test.py); do - echo "------------------------- ${test} -------------------------" - ${test} + if [ "${test}" != "parallelize_test.py" ]; then + echo "------------------------- ${test} -------------------------" + ${test} + fi done diff --git a/text_utils.py b/text_utils.py index 1a8fa18..3be32ff 100644 --- a/text_utils.py +++ b/text_utils.py @@ -169,6 +169,19 @@ def generate_padded_columns(text: List[str]) -> str: yield out +def wrap_string(text: str, n: int) -> str: + chunks = text.split() + out = '' + width = 0 + for chunk in chunks: + if width + len(chunk) > n: + out += '\n' + width = 0 + out += chunk + ' ' + width += len(chunk) + 1 + return out + + class Indenter: """ with Indenter(pad_count = 8) as i: -- 2.51.1