X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=dict_utils.py;h=573e683c1d84d2f0f11d18e61fa9f950f585e1c4;hb=562a15c6397610cf93646d9530005eb4a0d6e6f8;hp=92fd1e06a21d06a3205a138639f9cf013cb055ec;hpb=4c315e387f18010ba0b5661744ad3c792f21d2d1;p=python_utils.git diff --git a/dict_utils.py b/dict_utils.py index 92fd1e0..573e683 100644 --- a/dict_utils.py +++ b/dict_utils.py @@ -1,7 +1,11 @@ #!/usr/bin/env python3 +# © Copyright 2021-2022, Scott Gasch + +"""Helper functions for dealing with dictionaries.""" + from itertools import islice -from typing import Any, Callable, Dict, Iterator, Tuple +from typing import Any, Callable, Dict, Iterator, List, Tuple def init_or_inc( @@ -9,7 +13,7 @@ def init_or_inc( key: Any, *, init_value: Any = 1, - inc_function: Callable[..., Any] = lambda x: x + 1 + inc_function: Callable[..., Any] = lambda x: x + 1, ) -> bool: """ Initialize a dict value (if it doesn't exist) or increments it (using the @@ -41,31 +45,57 @@ def shard(d: Dict[Any, Any], size: int) -> Iterator[Dict[Any, Any]]: """ items = d.items() for x in range(0, len(d), size): - yield {key: value for (key, value) in islice(items, x, x + size)} + yield dict(islice(items, x, x + size)) -def coalesce_by_creating_list(key, v1, v2): +def coalesce_by_creating_list(_, new_value, old_value): + """Helper for use with :meth:`coalesce` that creates a list on + collision.""" from list_utils import flatten - return flatten([v1, v2]) + + return flatten([new_value, old_value]) + + +def coalesce_by_creating_set(key, new_value, old_value): + """Helper for use with :meth:`coalesce` that creates a set on + collision.""" + return set(coalesce_by_creating_list(key, new_value, old_value)) + + +def coalesce_last_write_wins(_, new_value, discarded_old_value): + """Helper for use with :meth:`coalsce` that klobbers the old + with the new one on collision.""" + return new_value -def coalesce_by_creating_set(key, v1, v2): - return set(coalesce_by_creating_list(key, v1, v2)) +def coalesce_first_write_wins(_, discarded_new_value, old_value): + """Helper for use with :meth:`coalsce` that preserves the old + value and discards the new one on collision.""" + return old_value -def raise_on_duplicated_keys(key, v1, v2): +def raise_on_duplicated_keys(key, new_value, old_value): + """Helper for use with :meth:`coalesce` that raises an exception + when a collision is detected. + """ raise Exception(f'Key {key} is duplicated in more than one input dict.') def coalesce( - inputs: Iterator[Dict[Any, Any]], - *, - aggregation_function: Callable[[Any, Any], Any] = coalesce_by_creating_list + inputs: Iterator[Dict[Any, Any]], + *, + aggregation_function: Callable[[Any, Any, Any], Any] = coalesce_by_creating_list, ) -> Dict[Any, Any]: - """Merge N dicts into one dict containing the union of all keys/values in - the input dicts. When keys collide, apply the aggregation_function which, - by default, creates a list of values. See also coalesce_by_creating_set or - provide a user defined aggregation_function. + """Merge N dicts into one dict containing the union of all keys / + values in the input dicts. When keys collide, apply the + aggregation_function which, by default, creates a list of values. + See also several other alternative functions for coalescing values: + + * :meth:`coalesce_by_creating_set` + * :meth:`coalesce_first_write_wins` + * :meth:`coalesce_last_write_wins` + * :meth:`raise_on_duplicated_keys` + * or provive your own collision resolution code. >>> a = {'a': 1, 'b': 2} >>> b = {'b': 1, 'c': 2, 'd': 3} @@ -73,6 +103,14 @@ def coalesce( >>> coalesce([a, b, c]) {'a': 1, 'b': [1, 2], 'c': [1, 2], 'd': [2, 3]} + >>> coalesce([a, b, c], aggregation_function=coalesce_last_write_wins) + {'a': 1, 'b': 1, 'c': 1, 'd': 2} + + >>> coalesce([a, b, c], aggregation_function=raise_on_duplicated_keys) + Traceback (most recent call last): + ... + Exception: Key b is duplicated in more than one input dict. + """ out: Dict[Any, Any] = {} for d in inputs: @@ -86,7 +124,7 @@ def coalesce( def item_with_max_value(d: Dict[Any, Any]) -> Tuple[Any, Any]: - """Returns the key and value with the max value in a dict. + """Returns the key and value of the item with the max value in a dict. >>> d = {'a': 1, 'b': 2, 'c': 3} >>> item_with_max_value(d) @@ -101,7 +139,7 @@ def item_with_max_value(d: Dict[Any, Any]) -> Tuple[Any, Any]: def item_with_min_value(d: Dict[Any, Any]) -> Tuple[Any, Any]: - """Returns the key and value with the min value in a dict. + """Returns the key and value of the item with the min value in a dict. >>> d = {'a': 1, 'b': 2, 'c': 3} >>> item_with_min_value(d) @@ -177,6 +215,39 @@ def min_key(d: Dict[Any, Any]) -> Any: return min(d.keys()) +def parallel_lists_to_dict(keys: List[Any], values: List[Any]) -> Dict[Any, Any]: + """Given two parallel lists (keys and values), create and return + a dict. + + >>> k = ['name', 'phone', 'address', 'zip'] + >>> v = ['scott', '555-1212', '123 main st.', '12345'] + >>> parallel_lists_to_dict(k, v) + {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'} + + """ + if len(keys) != len(values): + raise Exception("Parallel keys and values lists must have the same length") + return dict(zip(keys, values)) + + +def dict_to_key_value_lists(d: Dict[Any, Any]) -> Tuple[List[Any], List[Any]]: + """ + >>> d = {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'} + >>> (k, v) = dict_to_key_value_lists(d) + >>> k + ['name', 'phone', 'address', 'zip'] + >>> v + ['scott', '555-1212', '123 main st.', '12345'] + + """ + r: Tuple[List[Any], List[Any]] = ([], []) + for (k, v) in d.items(): + r[0].append(k) + r[1].append(v) + return r + + if __name__ == '__main__': import doctest + doctest.testmod()