3 # © Copyright 2021-2023, Scott Gasch
5 """This module contains helper functions for dealing with Python dictionaries."""
7 from itertools import islice
8 from typing import Any, Callable, Dict, Hashable, Iterator, List, Tuple
10 from pyutils import dataclass_utils
11 from pyutils.typez.typing import Comparable
13 AnyDict = Dict[Hashable, Any]
14 DictWithComparableKeys = Dict[Comparable, Any]
22 inc_function: Callable[..., Any] = lambda x: x + 1,
24 """Initialize a dict value (if it doesn't exist) or increments it (using the
25 inc_function, which is customizable) if it already does exist.
27 See also :py:class:`defaultdict`
28 (https://docs.python.org/3/library/collections.html#collections.defaultdict)
29 for a more pythonic alternative.
32 d: the dict to increment or initialize a value in
33 key: the key to increment or initialize
34 init_value: default initial value (see also :meth:`dict.setdefault`)
35 inc_function: Callable use to increment a value
38 True if the key already existed or False otherwise
40 See also: :py:class:`collections.defaultdict` and
41 :py:class:`collections.Counter`.
44 >>> init_or_inc(d, "test")
46 >>> init_or_inc(d, "test")
48 >>> init_or_inc(d, 'ing')
55 d[key] = inc_function(d[key])
61 def shard(d: AnyDict, size: int) -> Iterator[AnyDict]:
63 Shards (i.e. splits) a dict into N subdicts which, together,
64 contain all keys/values from the original unsharded dict.
67 d: the input dict to be sharded (split)
68 size: the ideal shard size (number of elements per shard)
71 A generator that yields subsequent shards.
75 If `len(d)` is not an even multiple of `size` then the last
76 shard will not have `size` items in it. It will have
77 `len(d) % size` items instead.
80 ... 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6,
81 ... 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12,
83 >>> for r in shard(d, 5):
85 {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
86 {'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10}
90 for x in range(0, len(d), size):
91 yield dict(islice(items, x, x + size))
94 def coalesce_by_creating_list(_, new_value, old_value):
95 """Helper for use with :meth:`coalesce` that creates a list on
97 from pyutils.list_utils import flatten
99 return flatten([new_value, old_value])
102 def coalesce_by_creating_set(key, new_value, old_value):
103 """Helper for use with :meth:`coalesce` that creates a set on
105 return set(coalesce_by_creating_list(key, new_value, old_value))
108 def coalesce_last_write_wins(_, new_value, discarded_old_value):
109 """Helper for use with :meth:`coalsce` that klobbers the old
110 with the new one on collision."""
114 def coalesce_first_write_wins(_, discarded_new_value, old_value):
115 """Helper for use with :meth:`coalsce` that preserves the old
116 value and discards the new one on collision."""
120 def raise_on_duplicated_keys(key, new_value, old_value):
121 """Helper for use with :meth:`coalesce` that raises an exception
122 when a collision is detected.
124 raise Exception(f'Key {key} is duplicated in more than one input dict.')
128 inputs: Iterator[AnyDict],
130 aggregation_function: Callable[[Any, Any, Any], Any] = coalesce_by_creating_list,
132 """Coalesce (i.e. combine) N input dicts into one output dict
133 ontaining the union of all keys / values in every input dict.
134 When keys collide, apply the aggregation_function which, by
135 default, creates a list of values with the same key in the output
139 inputs: an iterable set of dicts to coalesce
140 aggregation_function: a Callable to deal with key collisions; one of
141 the below functions already defined or your own strategy:
143 * :meth:`coalesce_by_creating_list` creates a list of values
144 with the same key in the output dict.
145 * :meth:`coalesce_by_creating_set` creates a set of values with
146 the same key in the output dict.
147 * :meth:`coalesce_first_write_wins` only preserves the first
148 value with a duplicated key. Others are dropped silently.
149 * :meth:`coalesce_last_write_wins` only preserves the last
150 value with a duplicated key. Others are dropped silently.
151 * :meth:`raise_on_duplicated_keys` raises an Exception on
152 duplicated keys; use when keys should never collide.
153 * Your own strategy; Callables will be passed the key and
154 two values and can return whatever they want which will
155 be stored in the output dict.
158 The coalesced output dict.
160 >>> a = {'a': 1, 'b': 2}
161 >>> b = {'b': 1, 'c': 2, 'd': 3}
162 >>> c = {'c': 1, 'd': 2}
163 >>> coalesce([a, b, c])
164 {'a': 1, 'b': [1, 2], 'c': [1, 2], 'd': [2, 3]}
166 >>> coalesce([a, b, c], aggregation_function=coalesce_last_write_wins)
167 {'a': 1, 'b': 1, 'c': 1, 'd': 2}
169 >>> coalesce([a, b, c], aggregation_function=raise_on_duplicated_keys)
170 Traceback (most recent call last):
172 Exception: Key b is duplicated in more than one input dict.
178 value = aggregation_function(key, d[key], out[key])
185 def item_with_max_value(d: AnyDict) -> Tuple[Hashable, Any]:
188 d: a dict with comparable values
191 The key and value of the item with the highest value in a
192 dict as a `Tuple[key, value]`.
194 >>> d = {'a': 1, 'b': 2, 'c': 3}
195 >>> item_with_max_value(d)
197 >>> item_with_max_value({})
198 Traceback (most recent call last):
200 ValueError: max() arg is an empty sequence
203 return max(d.items(), key=lambda _: _[1])
206 def item_with_min_value(d: AnyDict) -> Tuple[Hashable, Any]:
209 d: a dict with comparable values
212 The key and value of the item with the lowest value in a
213 dict as a `Tuple[key, value]`.
215 >>> d = {'a': 1, 'b': 2, 'c': 3}
216 >>> item_with_min_value(d)
220 return min(d.items(), key=lambda _: _[1])
223 def key_with_max_value(d: AnyDict) -> Hashable:
226 d: a dict with comparable keys
229 The maximum key in the dict when comparing the keys with
232 .. note:: This code totally ignores values; it is comparing key
233 against key to find the maximum key in the keyspace.
235 >>> d = {'a': 1, 'b': 2, 'c': 3}
236 >>> key_with_max_value(d)
240 return item_with_max_value(d)[0]
243 def key_with_min_value(d: AnyDict) -> Hashable:
246 d: a dict with comparable keys
249 The minimum key in the dict when comparing the keys with
252 .. note:: This code totally ignores values; it is comparing key
253 against key to find the minimum key in the keyspace.
255 >>> d = {'a': 1, 'b': 2, 'c': 3}
256 >>> key_with_min_value(d)
260 return item_with_min_value(d)[0]
263 def max_value(d: AnyDict) -> Any:
266 d: a dict with compatable values
269 The maximum value in the dict *without its key*.
271 >>> d = {'a': 1, 'b': 2, 'c': 3}
275 return item_with_max_value(d)[1]
278 def min_value(d: AnyDict) -> Any:
281 d: a dict with comparable values
284 The minimum value in the dict *without its key*.
286 >>> d = {'a': 1, 'b': 2, 'c': 3}
290 return item_with_min_value(d)[1]
293 def max_key(d: DictWithComparableKeys) -> Comparable:
296 d: a dict with comparable keys
299 The maximum key in dict (ignoring values totally)
301 .. note:: This code totally ignores values; it is comparing key
302 against key to find the maximum key in the keyspace.
304 >>> d = {'a': 3, 'b': 2, 'c': 1}
311 def min_key(d: DictWithComparableKeys) -> Comparable:
314 d: a dict with comparable keys
317 The minimum key in dict (ignoring values totally)
319 .. note:: This code totally ignores values; it is comparing key
320 against key to find the minimum key in the keyspace.
322 >>> d = {'a': 3, 'b': 2, 'c': 1}
329 def parallel_lists_to_dict(keys: List[Hashable], values: List[Any]) -> AnyDict:
330 """Given two parallel lists (keys and values), create and return
334 keys: list containing keys and no duplicated keys
335 values: a parallel list (to keys) containing values
338 A dict composed of zipping the keys list and values list together.
341 ValueError: if keys and values lists not the same length.
343 >>> k = ['name', 'phone', 'address', 'zip']
344 >>> v = ['scott', '555-1212', '123 main st.', '12345']
345 >>> parallel_lists_to_dict(k, v)
346 {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'}
348 if len(keys) != len(values):
349 raise ValueError("Parallel keys and values lists must have the same length")
350 return dict(zip(keys, values))
353 def dict_to_key_value_lists(d: AnyDict) -> Tuple[List[Hashable], List[Any]]:
354 """Given a dict, decompose it into a list of keys and values.
360 A tuple of two elements: the first is the keys list and the second
363 >>> d = {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'}
364 >>> (k, v) = dict_to_key_value_lists(d)
366 ['name', 'phone', 'address', 'zip']
368 ['scott', '555-1212', '123 main st.', '12345']
370 r: Tuple[List[Any], List[Any]] = ([], [])
371 for (k, v) in d.items():
377 dict_to_dataclass = dataclass_utils.dataclass_from_dict
379 dict_from_dataclass = dataclass_utils.dataclass_to_dict
382 if __name__ == '__main__':