Source code for pyutils.dict_utils

#!/usr/bin/env python3

# © Copyright 2021-2023, Scott Gasch

"""This module contains helper functions for dealing with Python dictionaries."""

from itertools import islice
from typing import Any, Callable, Dict, Hashable, Iterator, List, Tuple

from pyutils import dataclass_utils
from pyutils.typez.type_hints import Comparable

AnyDict = Dict[Hashable, Any]
DictWithComparableKeys = Dict[Comparable, Any]


[docs]def init_or_inc( d: AnyDict, key: Hashable, *, init_value: Any = 1, inc_function: Callable[..., Any] = lambda x: x + 1, ) -> bool: """Initialize a dict value (if it doesn't exist) or increments it (using the inc_function, which is customizable) if it already does exist. See also :py:class:`defaultdict` (https://docs.python.org/3/library/collections.html#collections.defaultdict) for a more pythonic alternative. Args: d: the dict to increment or initialize a value in key: the key to increment or initialize init_value: default initial value (see also :meth:`dict.setdefault`) inc_function: Callable use to increment a value Returns: True if the key already existed or False otherwise See also: :py:class:`collections.defaultdict` and :py:class:`collections.Counter`. >>> d = {} >>> init_or_inc(d, "test") False >>> init_or_inc(d, "test") True >>> init_or_inc(d, 'ing') False >>> d {'test': 2, 'ing': 1} """ if key in d.keys(): d[key] = inc_function(d[key]) return True d[key] = init_value return False
[docs]def shard(d: AnyDict, size: int) -> Iterator[AnyDict]: """ Shards (i.e. splits) a dict into N subdicts which, together, contain all keys/values from the original unsharded dict. Args: d: the input dict to be sharded (split) size: the ideal shard size (number of elements per shard) Returns: A generator that yields subsequent shards. .. note:: If `len(d)` is not an even multiple of `size` then the last shard will not have `size` items in it. It will have `len(d) % size` items instead. >>> d = { ... 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, ... 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, ... } >>> for r in shard(d, 5): ... r {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5} {'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10} {'k': 11, 'l': 12} """ items = d.items() for x in range(0, len(d), size): yield dict(islice(items, x, x + size))
[docs]def coalesce_by_creating_list(_, new_value, old_value): """Helper for use with :meth:`coalesce` that creates a list on collision.""" from pyutils.list_utils import flatten return flatten([new_value, old_value])
[docs]def coalesce_by_creating_set(key, new_value, old_value): """Helper for use with :meth:`coalesce` that creates a set on collision.""" return set(coalesce_by_creating_list(key, new_value, old_value))
[docs]def coalesce_last_write_wins(_, new_value, discarded_old_value): """Helper for use with :meth:`coalsce` that klobbers the old with the new one on collision.""" return new_value
[docs]def coalesce_first_write_wins(_, discarded_new_value, old_value): """Helper for use with :meth:`coalsce` that preserves the old value and discards the new one on collision.""" return old_value
[docs]def raise_on_duplicated_keys(key, new_value, old_value): """Helper for use with :meth:`coalesce` that raises an exception when a collision is detected. """ raise KeyError(f'Key {key} is duplicated in more than one input dict.')
[docs]def coalesce( inputs: Iterator[AnyDict], *, aggregation_function: Callable[[Any, Any, Any], Any] = coalesce_by_creating_list, ) -> AnyDict: """Coalesce (i.e. combine) N input dicts into one output dict ontaining the union of all keys / values in every input dict. When keys collide, apply the aggregation_function which, by default, creates a list of values with the same key in the output dict. Args: inputs: an iterable set of dicts to coalesce aggregation_function: a Callable to deal with key collisions; one of the below functions already defined or your own strategy: * :meth:`coalesce_by_creating_list` creates a list of values with the same key in the output dict. * :meth:`coalesce_by_creating_set` creates a set of values with the same key in the output dict. * :meth:`coalesce_first_write_wins` only preserves the first value with a duplicated key. Others are dropped silently. * :meth:`coalesce_last_write_wins` only preserves the last value with a duplicated key. Others are dropped silently. * :meth:`raise_on_duplicated_keys` raises an Exception on duplicated keys; use when keys should never collide. * Your own strategy; Callables will be passed the key and two values and can return whatever they want which will be stored in the output dict. Returns: The coalesced output dict. >>> a = {'a': 1, 'b': 2} >>> b = {'b': 1, 'c': 2, 'd': 3} >>> c = {'c': 1, 'd': 2} >>> coalesce([a, b, c]) {'a': 1, 'b': [1, 2], 'c': [1, 2], 'd': [2, 3]} >>> coalesce([a, b, c], aggregation_function=coalesce_last_write_wins) {'a': 1, 'b': 1, 'c': 1, 'd': 2} >>> coalesce([a, b, c], aggregation_function=raise_on_duplicated_keys) Traceback (most recent call last): ... KeyError: 'Key b is duplicated in more than one input dict.' """ out: AnyDict = {} for d in inputs: for key in d: if key in out: value = aggregation_function(key, d[key], out[key]) else: value = d[key] out[key] = value return out
[docs]def item_with_max_value(d: AnyDict) -> Tuple[Hashable, Any]: """ Args: d: a dict with comparable values Returns: The key and value of the item with the highest value in a dict as a `Tuple[key, value]`. >>> d = {'a': 1, 'b': 2, 'c': 3} >>> item_with_max_value(d) ('c', 3) >>> item_with_max_value({}) Traceback (most recent call last): ... ValueError: max() arg is an empty sequence """ return max(d.items(), key=lambda _: _[1])
[docs]def item_with_min_value(d: AnyDict) -> Tuple[Hashable, Any]: """ Args: d: a dict with comparable values Returns: The key and value of the item with the lowest value in a dict as a `Tuple[key, value]`. >>> d = {'a': 1, 'b': 2, 'c': 3} >>> item_with_min_value(d) ('a', 1) """ return min(d.items(), key=lambda _: _[1])
[docs]def key_with_max_value(d: AnyDict) -> Hashable: """ Args: d: a dict with comparable keys Returns: The maximum key in the dict when comparing the keys with each other. .. note:: This code totally ignores values; it is comparing key against key to find the maximum key in the keyspace. >>> d = {'a': 1, 'b': 2, 'c': 3} >>> key_with_max_value(d) 'c' """ return item_with_max_value(d)[0]
[docs]def key_with_min_value(d: AnyDict) -> Hashable: """ Args: d: a dict with comparable keys Returns: The minimum key in the dict when comparing the keys with each other. .. note:: This code totally ignores values; it is comparing key against key to find the minimum key in the keyspace. >>> d = {'a': 1, 'b': 2, 'c': 3} >>> key_with_min_value(d) 'a' """ return item_with_min_value(d)[0]
[docs]def max_value(d: AnyDict) -> Any: """ Args: d: a dict with compatable values Returns: The maximum value in the dict *without its key*. >>> d = {'a': 1, 'b': 2, 'c': 3} >>> max_value(d) 3 """ return item_with_max_value(d)[1]
[docs]def min_value(d: AnyDict) -> Any: """ Args: d: a dict with comparable values Returns: The minimum value in the dict *without its key*. >>> d = {'a': 1, 'b': 2, 'c': 3} >>> min_value(d) 1 """ return item_with_min_value(d)[1]
[docs]def max_key(d: DictWithComparableKeys) -> Comparable: """ Args: d: a dict with comparable keys Returns: The maximum key in dict (ignoring values totally) .. note:: This code totally ignores values; it is comparing key against key to find the maximum key in the keyspace. >>> d = {'a': 3, 'b': 2, 'c': 1} >>> max_key(d) 'c' """ return max(d.keys())
[docs]def min_key(d: DictWithComparableKeys) -> Comparable: """ Args: d: a dict with comparable keys Returns: The minimum key in dict (ignoring values totally) .. note:: This code totally ignores values; it is comparing key against key to find the minimum key in the keyspace. >>> d = {'a': 3, 'b': 2, 'c': 1} >>> min_key(d) 'a' """ return min(d.keys())
[docs]def parallel_lists_to_dict(keys: List[Hashable], values: List[Any]) -> AnyDict: """Given two parallel lists (keys and values), create and return a dict. Args: keys: list containing keys and no duplicated keys values: a parallel list (to keys) containing values Returns: A dict composed of zipping the keys list and values list together. Raises: ValueError: if keys and values lists not the same length. >>> k = ['name', 'phone', 'address', 'zip'] >>> v = ['scott', '555-1212', '123 main st.', '12345'] >>> parallel_lists_to_dict(k, v) {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'} """ if len(keys) != len(values): raise ValueError("Parallel keys and values lists must have the same length") return dict(zip(keys, values))
[docs]def dict_to_key_value_lists(d: AnyDict) -> Tuple[List[Hashable], List[Any]]: """Given a dict, decompose it into a list of keys and values. Args: d: a dict Returns: A tuple of two elements: the first is the keys list and the second is the values list. >>> d = {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'} >>> (k, v) = dict_to_key_value_lists(d) >>> k ['name', 'phone', 'address', 'zip'] >>> v ['scott', '555-1212', '123 main st.', '12345'] """ r: Tuple[List[Any], List[Any]] = ([], []) for (k, v) in d.items(): r[0].append(k) r[1].append(v) return r
dict_to_dataclass = dataclass_utils.dataclass_from_dict dict_from_dataclass = dataclass_utils.dataclass_to_dict if __name__ == '__main__': import doctest doctest.testmod()