Source code for pyutils.dict_utils

#!/usr/bin/env python3

# © Copyright 2021-2023, Scott Gasch

"""This module contains helper functions for dealing with Python dictionaries."""

from itertools import islice
from typing import Any, Callable, Dict, Hashable, Iterator, List, Tuple

from pyutils import dataclass_utils
from pyutils.typez.type_hints import Comparable

AnyDict = Dict[Hashable, Any]
DictWithComparableKeys = Dict[Comparable, Any]



[docs]
def init_or_inc(
    d: AnyDict,
    key: Hashable,
    *,
    init_value: Any = 1,
    inc_function: Callable[..., Any] = lambda x: x + 1,
) -> bool:
    """Initialize a dict value (if it doesn't exist) or increments it (using the
    inc_function, which is customizable) if it already does exist.

    See also :py:class:`defaultdict`
    (https://docs.python.org/3/library/collections.html#collections.defaultdict)
    for a more pythonic alternative.

    Args:
        d: the dict to increment or initialize a value in
        key: the key to increment or initialize
        init_value: default initial value (see also :meth:`dict.setdefault`)
        inc_function: Callable use to increment a value

    Returns:
        True if the key already existed or False otherwise

    See also: :py:class:`collections.defaultdict` and
    :py:class:`collections.Counter`.

    >>> d = {}
    >>> init_or_inc(d, "test")
    False
    >>> init_or_inc(d, "test")
    True
    >>> init_or_inc(d, 'ing')
    False
    >>> d
    {'test': 2, 'ing': 1}

    """
    if key in d.keys():
        d[key] = inc_function(d[key])
        return True
    d[key] = init_value
    return False




[docs]
def shard(d: AnyDict, size: int) -> Iterator[AnyDict]:
    """
    Shards (i.e. splits) a dict into N subdicts which, together,
    contain all keys/values from the original unsharded dict.

    Args:
        d: the input dict to be sharded (split)
        size: the ideal shard size (number of elements per shard)

    Returns:
        A generator that yields subsequent shards.

    .. note::

        If `len(d)` is not an even multiple of `size` then the last
        shard will not have `size` items in it.  It will have
        `len(d) % size` items instead.

    >>> d = {
    ...     'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6,
    ...     'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12,
    ... }
    >>> for r in shard(d, 5):
    ...     r
    {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
    {'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10}
    {'k': 11, 'l': 12}
    """
    items = d.items()
    for x in range(0, len(d), size):
        yield dict(islice(items, x, x + size))




[docs]
def coalesce_by_creating_list(_, new_value, old_value):
    """Helper for use with :meth:`coalesce` that creates a list on
    collision."""
    from pyutils.list_utils import flatten

    return flatten([new_value, old_value])




[docs]
def coalesce_by_creating_set(key, new_value, old_value):
    """Helper for use with :meth:`coalesce` that creates a set on
    collision."""
    return set(coalesce_by_creating_list(key, new_value, old_value))




[docs]
def coalesce_last_write_wins(_, new_value, discarded_old_value):
    """Helper for use with :meth:`coalsce` that klobbers the old
    with the new one on collision."""
    return new_value




[docs]
def coalesce_first_write_wins(_, discarded_new_value, old_value):
    """Helper for use with :meth:`coalsce` that preserves the old
    value and discards the new one on collision."""
    return old_value




[docs]
def raise_on_duplicated_keys(key, new_value, old_value):
    """Helper for use with :meth:`coalesce` that raises an exception
    when a collision is detected.
    """
    raise KeyError(f'Key {key} is duplicated in more than one input dict.')




[docs]
def coalesce(
    inputs: Iterator[AnyDict],
    *,
    aggregation_function: Callable[[Any, Any, Any], Any] = coalesce_by_creating_list,
) -> AnyDict:
    """Coalesce (i.e. combine) N input dicts into one output dict
    containing the union of all keys / values in every input dict.
    When keys collide, apply the aggregation_function which, by
    default, creates a list of values with the same key in the output
    dict.

    Args:
        inputs: an iterable set of dicts to coalesce
        aggregation_function: a Callable to deal with key collisions; one of
            the below functions already defined or your own strategy:

            * :meth:`coalesce_by_creating_list` creates a list of values
              with the same key in the output dict.
            * :meth:`coalesce_by_creating_set` creates a set of values with
              the same key in the output dict.
            * :meth:`coalesce_first_write_wins` only preserves the first
              value with a duplicated key.  Others are dropped silently.
            * :meth:`coalesce_last_write_wins` only preserves the last
              value with a duplicated key.  Others are dropped silently.
            * :meth:`raise_on_duplicated_keys` raises an Exception on
              duplicated keys; use when keys should never collide.
            * Your own strategy; Callables will be passed the key and
              two values and can return whatever they want which will
              be stored in the output dict.

    Returns:
        The coalesced output dict.

    >>> a = {'a': 1, 'b': 2}
    >>> b = {'b': 1, 'c': 2, 'd': 3}
    >>> c = {'c': 1, 'd': 2}
    >>> coalesce([a, b, c])
    {'a': 1, 'b': [1, 2], 'c': [1, 2], 'd': [2, 3]}

    >>> coalesce([a, b, c], aggregation_function=coalesce_last_write_wins)
    {'a': 1, 'b': 1, 'c': 1, 'd': 2}

    >>> coalesce([a, b, c], aggregation_function=raise_on_duplicated_keys)
    Traceback (most recent call last):
    ...
    KeyError: 'Key b is duplicated in more than one input dict.'
    """
    out: AnyDict = {}
    for d in inputs:
        for key in d:
            if key in out:
                value = aggregation_function(key, d[key], out[key])
            else:
                value = d[key]
            out[key] = value
    return out




[docs]
def item_with_max_value(d: AnyDict) -> Tuple[Hashable, Any]:
    """
    Args:
        d: a dict with comparable values

    Returns:
        The key and value of the item with the highest value in a
        dict as a `Tuple[key, value]`.

    >>> d = {'a': 1, 'b': 2, 'c': 3}
    >>> item_with_max_value(d)
    ('c', 3)
    >>> item_with_max_value({})
    Traceback (most recent call last):
    ...
    ValueError: max() arg is an empty sequence

    """
    return max(d.items(), key=lambda _: _[1])




[docs]
def item_with_min_value(d: AnyDict) -> Tuple[Hashable, Any]:
    """
    Args:
        d: a dict with comparable values

    Returns:
        The key and value of the item with the lowest value in a
        dict as a `Tuple[key, value]`.

    >>> d = {'a': 1, 'b': 2, 'c': 3}
    >>> item_with_min_value(d)
    ('a', 1)

    """
    return min(d.items(), key=lambda _: _[1])




[docs]
def key_with_max_value(d: AnyDict) -> Hashable:
    """
    Args:
        d: a dict with comparable keys

    Returns:
        The maximum key in the dict when comparing the keys with
        each other.

    .. note:: This code totally ignores values; it is comparing key
        against key to find the maximum key in the keyspace.

    >>> d = {'a': 1, 'b': 2, 'c': 3}
    >>> key_with_max_value(d)
    'c'

    """
    return item_with_max_value(d)[0]




[docs]
def key_with_min_value(d: AnyDict) -> Hashable:
    """
    Args:
        d: a dict with comparable keys

    Returns:
        The minimum key in the dict when comparing the keys with
        each other.

    .. note:: This code totally ignores values; it is comparing key
        against key to find the minimum key in the keyspace.

    >>> d = {'a': 1, 'b': 2, 'c': 3}
    >>> key_with_min_value(d)
    'a'

    """
    return item_with_min_value(d)[0]




[docs]
def max_value(d: AnyDict) -> Any:
    """
    Args:
        d: a dict with compatable values

    Returns:
        The maximum value in the dict *without its key*.

    >>> d = {'a': 1, 'b': 2, 'c': 3}
    >>> max_value(d)
    3
    """
    return item_with_max_value(d)[1]




[docs]
def min_value(d: AnyDict) -> Any:
    """
    Args:
        d: a dict with comparable values

    Returns:
        The minimum value in the dict *without its key*.

    >>> d = {'a': 1, 'b': 2, 'c': 3}
    >>> min_value(d)
    1
    """
    return item_with_min_value(d)[1]




[docs]
def max_key(d: DictWithComparableKeys) -> Comparable:
    """
    Args:
        d: a dict with comparable keys

    Returns:
        The maximum key in dict (ignoring values totally)

    .. note:: This code totally ignores values; it is comparing key
        against key to find the maximum key in the keyspace.

    >>> d = {'a': 3, 'b': 2, 'c': 1}
    >>> max_key(d)
    'c'
    """
    return max(d.keys())




[docs]
def min_key(d: DictWithComparableKeys) -> Comparable:
    """
    Args:
        d: a dict with comparable keys

    Returns:
        The minimum key in dict (ignoring values totally)

    .. note:: This code totally ignores values; it is comparing key
        against key to find the minimum key in the keyspace.

    >>> d = {'a': 3, 'b': 2, 'c': 1}
    >>> min_key(d)
    'a'
    """
    return min(d.keys())




[docs]
def parallel_lists_to_dict(keys: List[Hashable], values: List[Any]) -> AnyDict:
    """Given two parallel lists (keys and values), create and return
    a dict.

    Args:
        keys: list containing keys and no duplicated keys
        values: a parallel list (to keys) containing values

    Returns:
        A dict composed of zipping the keys list and values list together.

    Raises:
        ValueError: if keys and values lists not the same length.

    >>> k = ['name', 'phone', 'address', 'zip']
    >>> v = ['scott', '555-1212', '123 main st.', '12345']
    >>> parallel_lists_to_dict(k, v)
    {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'}
    """
    if len(keys) != len(values):
        raise ValueError("Parallel keys and values lists must have the same length")
    return dict(zip(keys, values))




[docs]
def dict_to_key_value_lists(d: AnyDict) -> Tuple[List[Hashable], List[Any]]:
    """Given a dict, decompose it into a list of keys and values.

    Args:
        d: a dict

    Returns:
        A tuple of two elements: the first is the keys list and the second
        is the values list.

    >>> d = {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'}
    >>> (k, v) = dict_to_key_value_lists(d)
    >>> k
    ['name', 'phone', 'address', 'zip']
    >>> v
    ['scott', '555-1212', '123 main st.', '12345']
    """
    r: Tuple[List[Any], List[Any]] = ([], [])
    for k, v in d.items():
        r[0].append(k)
        r[1].append(v)
    return r



dict_to_dataclass = dataclass_utils.dataclass_from_dict

dict_from_dataclass = dataclass_utils.dataclass_to_dict


if __name__ == '__main__':
    import doctest

    doctest.testmod()