src/pyutils/dict_utils.py

   1 #!/usr/bin/env python3
   2
   3 # © Copyright 2021-2023, Scott Gasch
   4
   5 """This module contains helper functions for dealing with Python dictionaries."""
   6
   7 from itertools import islice
   8 from typing import Any, Callable, Dict, Hashable, Iterator, List, Tuple
   9
  10 from pyutils import dataclass_utils
  11 from pyutils.typez.typing import Comparable
  12
  13 AnyDict = Dict[Hashable, Any]
  14 DictWithComparableKeys = Dict[Comparable, Any]
  15
  16
  17 def init_or_inc(
  18     d: AnyDict,
  19     key: Hashable,
  20     *,
  21     init_value: Any = 1,
  22     inc_function: Callable[..., Any] = lambda x: x + 1,
  23 ) -> bool:
  24     """Initialize a dict value (if it doesn't exist) or increments it (using the
  25     inc_function, which is customizable) if it already does exist.
  26
  27     See also :py:class:`defaultdict`
  28     (https://docs.python.org/3/library/collections.html#collections.defaultdict)
  29     for a more pythonic alternative.
  30
  31     Args:
  32         d: the dict to increment or initialize a value in
  33         key: the key to increment or initialize
  34         init_value: default initial value (see also :meth:`dict.setdefault`)
  35         inc_function: Callable use to increment a value
  36
  37     Returns:
  38         True if the key already existed or False otherwise
  39
  40     See also: :py:class:`collections.defaultdict` and
  41     :py:class:`collections.Counter`.
  42
  43     >>> d = {}
  44     >>> init_or_inc(d, "test")
  45     False
  46     >>> init_or_inc(d, "test")
  47     True
  48     >>> init_or_inc(d, 'ing')
  49     False
  50     >>> d
  51     {'test': 2, 'ing': 1}
  52
  53     """
  54     if key in d.keys():
  55         d[key] = inc_function(d[key])
  56         return True
  57     d[key] = init_value
  58     return False
  59
  60
  61 def shard(d: AnyDict, size: int) -> Iterator[AnyDict]:
  62     """
  63     Shards (i.e. splits) a dict into N subdicts which, together,
  64     contain all keys/values from the original unsharded dict.
  65
  66     Args:
  67         d: the input dict to be sharded (split)
  68         size: the ideal shard size (number of elements per shard)
  69
  70     Returns:
  71         A generator that yields subsequent shards.
  72
  73     .. note::
  74
  75         If `len(d)` is not an even multiple of `size` then the last
  76         shard will not have `size` items in it.  It will have
  77         `len(d) % size` items instead.
  78
  79     >>> d = {
  80     ...     'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6,
  81     ...     'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12,
  82     ... }
  83     >>> for r in shard(d, 5):
  84     ...     r
  85     {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
  86     {'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10}
  87     {'k': 11, 'l': 12}
  88     """
  89     items = d.items()
  90     for x in range(0, len(d), size):
  91         yield dict(islice(items, x, x + size))
  92
  93
  94 def coalesce_by_creating_list(_, new_value, old_value):
  95     """Helper for use with :meth:`coalesce` that creates a list on
  96     collision."""
  97     from pyutils.list_utils import flatten
  98
  99     return flatten([new_value, old_value])
 100
 101
 102 def coalesce_by_creating_set(key, new_value, old_value):
 103     """Helper for use with :meth:`coalesce` that creates a set on
 104     collision."""
 105     return set(coalesce_by_creating_list(key, new_value, old_value))
 106
 107
 108 def coalesce_last_write_wins(_, new_value, discarded_old_value):
 109     """Helper for use with :meth:`coalsce` that klobbers the old
 110     with the new one on collision."""
 111     return new_value
 112
 113
 114 def coalesce_first_write_wins(_, discarded_new_value, old_value):
 115     """Helper for use with :meth:`coalsce` that preserves the old
 116     value and discards the new one on collision."""
 117     return old_value
 118
 119
 120 def raise_on_duplicated_keys(key, new_value, old_value):
 121     """Helper for use with :meth:`coalesce` that raises an exception
 122     when a collision is detected.
 123     """
 124     raise Exception(f'Key {key} is duplicated in more than one input dict.')
 125
 126
 127 def coalesce(
 128     inputs: Iterator[AnyDict],
 129     *,
 130     aggregation_function: Callable[[Any, Any, Any], Any] = coalesce_by_creating_list,
 131 ) -> AnyDict:
 132     """Coalesce (i.e. combine) N input dicts into one output dict
 133     ontaining the union of all keys / values in every input dict.
 134     When keys collide, apply the aggregation_function which, by
 135     default, creates a list of values with the same key in the output
 136     dict.
 137
 138     Args:
 139         inputs: an iterable set of dicts to coalesce
 140         aggregation_function: a Callable to deal with key collisions; one of
 141             the below functions already defined or your own strategy:
 142
 143             * :meth:`coalesce_by_creating_list` creates a list of values
 144               with the same key in the output dict.
 145             * :meth:`coalesce_by_creating_set` creates a set of values with
 146               the same key in the output dict.
 147             * :meth:`coalesce_first_write_wins` only preserves the first
 148               value with a duplicated key.  Others are dropped silently.
 149             * :meth:`coalesce_last_write_wins` only preserves the last
 150               value with a duplicated key.  Others are dropped silently.
 151             * :meth:`raise_on_duplicated_keys` raises an Exception on
 152               duplicated keys; use when keys should never collide.
 153             * Your own strategy; Callables will be passed the key and
 154               two values and can return whatever they want which will
 155               be stored in the output dict.
 156
 157     Returns:
 158         The coalesced output dict.
 159
 160     >>> a = {'a': 1, 'b': 2}
 161     >>> b = {'b': 1, 'c': 2, 'd': 3}
 162     >>> c = {'c': 1, 'd': 2}
 163     >>> coalesce([a, b, c])
 164     {'a': 1, 'b': [1, 2], 'c': [1, 2], 'd': [2, 3]}
 165
 166     >>> coalesce([a, b, c], aggregation_function=coalesce_last_write_wins)
 167     {'a': 1, 'b': 1, 'c': 1, 'd': 2}
 168
 169     >>> coalesce([a, b, c], aggregation_function=raise_on_duplicated_keys)
 170     Traceback (most recent call last):
 171     ...
 172     Exception: Key b is duplicated in more than one input dict.
 173     """
 174     out: AnyDict = {}
 175     for d in inputs:
 176         for key in d:
 177             if key in out:
 178                 value = aggregation_function(key, d[key], out[key])
 179             else:
 180                 value = d[key]
 181             out[key] = value
 182     return out
 183
 184
 185 def item_with_max_value(d: AnyDict) -> Tuple[Hashable, Any]:
 186     """
 187     Args:
 188         d: a dict with comparable values
 189
 190     Returns:
 191         The key and value of the item with the highest value in a
 192         dict as a `Tuple[key, value]`.
 193
 194     >>> d = {'a': 1, 'b': 2, 'c': 3}
 195     >>> item_with_max_value(d)
 196     ('c', 3)
 197     >>> item_with_max_value({})
 198     Traceback (most recent call last):
 199     ...
 200     ValueError: max() arg is an empty sequence
 201
 202     """
 203     return max(d.items(), key=lambda _: _[1])
 204
 205
 206 def item_with_min_value(d: AnyDict) -> Tuple[Hashable, Any]:
 207     """
 208     Args:
 209         d: a dict with comparable values
 210
 211     Returns:
 212         The key and value of the item with the lowest value in a
 213         dict as a `Tuple[key, value]`.
 214
 215     >>> d = {'a': 1, 'b': 2, 'c': 3}
 216     >>> item_with_min_value(d)
 217     ('a', 1)
 218
 219     """
 220     return min(d.items(), key=lambda _: _[1])
 221
 222
 223 def key_with_max_value(d: AnyDict) -> Hashable:
 224     """
 225     Args:
 226         d: a dict with comparable keys
 227
 228     Returns:
 229         The maximum key in the dict when comparing the keys with
 230         each other.
 231
 232     .. note:: This code totally ignores values; it is comparing key
 233         against key to find the maximum key in the keyspace.
 234
 235     >>> d = {'a': 1, 'b': 2, 'c': 3}
 236     >>> key_with_max_value(d)
 237     'c'
 238
 239     """
 240     return item_with_max_value(d)[0]
 241
 242
 243 def key_with_min_value(d: AnyDict) -> Hashable:
 244     """
 245     Args:
 246         d: a dict with comparable keys
 247
 248     Returns:
 249         The minimum key in the dict when comparing the keys with
 250         each other.
 251
 252     .. note:: This code totally ignores values; it is comparing key
 253         against key to find the minimum key in the keyspace.
 254
 255     >>> d = {'a': 1, 'b': 2, 'c': 3}
 256     >>> key_with_min_value(d)
 257     'a'
 258
 259     """
 260     return item_with_min_value(d)[0]
 261
 262
 263 def max_value(d: AnyDict) -> Any:
 264     """
 265     Args:
 266         d: a dict with compatable values
 267
 268     Returns:
 269         The maximum value in the dict *without its key*.
 270
 271     >>> d = {'a': 1, 'b': 2, 'c': 3}
 272     >>> max_value(d)
 273     3
 274     """
 275     return item_with_max_value(d)[1]
 276
 277
 278 def min_value(d: AnyDict) -> Any:
 279     """
 280     Args:
 281         d: a dict with comparable values
 282
 283     Returns:
 284         The minimum value in the dict *without its key*.
 285
 286     >>> d = {'a': 1, 'b': 2, 'c': 3}
 287     >>> min_value(d)
 288     1
 289     """
 290     return item_with_min_value(d)[1]
 291
 292
 293 def max_key(d: DictWithComparableKeys) -> Comparable:
 294     """
 295     Args:
 296         d: a dict with comparable keys
 297
 298     Returns:
 299         The maximum key in dict (ignoring values totally)
 300
 301     .. note:: This code totally ignores values; it is comparing key
 302         against key to find the maximum key in the keyspace.
 303
 304     >>> d = {'a': 3, 'b': 2, 'c': 1}
 305     >>> max_key(d)
 306     'c'
 307     """
 308     return max(d.keys())
 309
 310
 311 def min_key(d: DictWithComparableKeys) -> Comparable:
 312     """
 313     Args:
 314         d: a dict with comparable keys
 315
 316     Returns:
 317         The minimum key in dict (ignoring values totally)
 318
 319     .. note:: This code totally ignores values; it is comparing key
 320         against key to find the minimum key in the keyspace.
 321
 322     >>> d = {'a': 3, 'b': 2, 'c': 1}
 323     >>> min_key(d)
 324     'a'
 325     """
 326     return min(d.keys())
 327
 328
 329 def parallel_lists_to_dict(keys: List[Hashable], values: List[Any]) -> AnyDict:
 330     """Given two parallel lists (keys and values), create and return
 331     a dict.
 332
 333     Args:
 334         keys: list containing keys and no duplicated keys
 335         values: a parallel list (to keys) containing values
 336
 337     Returns:
 338         A dict composed of zipping the keys list and values list together.
 339
 340     Raises:
 341         ValueError: if keys and values lists not the same length.
 342
 343     >>> k = ['name', 'phone', 'address', 'zip']
 344     >>> v = ['scott', '555-1212', '123 main st.', '12345']
 345     >>> parallel_lists_to_dict(k, v)
 346     {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'}
 347     """
 348     if len(keys) != len(values):
 349         raise ValueError("Parallel keys and values lists must have the same length")
 350     return dict(zip(keys, values))
 351
 352
 353 def dict_to_key_value_lists(d: AnyDict) -> Tuple[List[Hashable], List[Any]]:
 354     """Given a dict, decompose it into a list of keys and values.
 355
 356     Args:
 357         d: a dict
 358
 359     Returns:
 360         A tuple of two elements: the first is the keys list and the second
 361         is the values list.
 362
 363     >>> d = {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'}
 364     >>> (k, v) = dict_to_key_value_lists(d)
 365     >>> k
 366     ['name', 'phone', 'address', 'zip']
 367     >>> v
 368     ['scott', '555-1212', '123 main st.', '12345']
 369     """
 370     r: Tuple[List[Any], List[Any]] = ([], [])
 371     for (k, v) in d.items():
 372         r[0].append(k)
 373         r[1].append(v)
 374     return r
 375
 376
 377 dict_to_dataclass = dataclass_utils.dataclass_from_dict
 378
 379 dict_from_dataclass = dataclass_utils.dataclass_to_dict
 380
 381
 382 if __name__ == '__main__':
 383     import doctest
 384
 385     doctest.testmod()