3 # © Copyright 2021-2023, Scott Gasch
5 """This module contains helper functions for dealing with Python dictionaries."""
7 from itertools import islice
8 from typing import Any, Callable, Dict, Hashable, Iterator, List, Tuple
10 from pyutils import dataclass_utils
11 from pyutils.typez.typing import Comparable
13 AnyDict = Dict[Hashable, Any]
14 DictWithComparableKeys = Dict[Comparable, Any]
22 inc_function: Callable[..., Any] = lambda x: x + 1,
25 Initialize a dict value (if it doesn't exist) or increments it (using the
26 inc_function, which is customizable) if it already does exist.
29 d: the dict to increment or initialize a value in
30 key: the key to increment or initialize
31 init_value: default initial value
32 inc_function: Callable use to increment a value
35 True if the key already existed or False otherwise
37 See also: :py:class:`collections.defaultdict` and
38 :py:class:`collections.Counter`.
41 >>> init_or_inc(d, "test")
43 >>> init_or_inc(d, "test")
45 >>> init_or_inc(d, 'ing')
51 d[key] = inc_function(d[key])
57 def shard(d: AnyDict, size: int) -> Iterator[AnyDict]:
59 Shards (i.e. splits) a dict into N subdicts which, together,
60 contain all keys/values from the original unsharded dict.
63 d: the input dict to be sharded (split)
64 size: the ideal shard size (number of elements per shard)
67 A generator that yields subsequent shards.
71 If `len(d)` is not an even multiple of `size` then the last
72 shard will not have `size` items in it. It will have
73 `len(d) % size` items instead.
76 ... 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6,
77 ... 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12,
79 >>> for r in shard(d, 5):
81 {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
82 {'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10}
86 for x in range(0, len(d), size):
87 yield dict(islice(items, x, x + size))
90 def coalesce_by_creating_list(_, new_value, old_value):
91 """Helper for use with :meth:`coalesce` that creates a list on
93 from pyutils.list_utils import flatten
95 return flatten([new_value, old_value])
98 def coalesce_by_creating_set(key, new_value, old_value):
99 """Helper for use with :meth:`coalesce` that creates a set on
101 return set(coalesce_by_creating_list(key, new_value, old_value))
104 def coalesce_last_write_wins(_, new_value, discarded_old_value):
105 """Helper for use with :meth:`coalsce` that klobbers the old
106 with the new one on collision."""
110 def coalesce_first_write_wins(_, discarded_new_value, old_value):
111 """Helper for use with :meth:`coalsce` that preserves the old
112 value and discards the new one on collision."""
116 def raise_on_duplicated_keys(key, new_value, old_value):
117 """Helper for use with :meth:`coalesce` that raises an exception
118 when a collision is detected.
120 raise Exception(f'Key {key} is duplicated in more than one input dict.')
124 inputs: Iterator[AnyDict],
126 aggregation_function: Callable[[Any, Any, Any], Any] = coalesce_by_creating_list,
128 """Coalesce (i.e. combine) N input dicts into one output dict
129 ontaining the union of all keys / values in every input dict.
130 When keys collide, apply the aggregation_function which, by
131 default, creates a list of values with the same key in the output
135 inputs: an iterable set of dicts to coalesce
136 aggregation_function: a Callable to deal with key collisions; one of
137 the below functions already defined or your own strategy:
139 * :meth:`coalesce_by_creating_list` creates a list of values
140 with the same key in the output dict.
141 * :meth:`coalesce_by_creating_set` creates a set of values with
142 the same key in the output dict.
143 * :meth:`coalesce_first_write_wins` only preserves the first
144 value with a duplicated key. Others are dropped silently.
145 * :meth:`coalesce_last_write_wins` only preserves the last
146 value with a duplicated key. Others are dropped silently.
147 * :meth:`raise_on_duplicated_keys` raises an Exception on
148 duplicated keys; use when keys should never collide.
149 * Your own strategy; Callables will be passed the key and
150 two values and can return whatever they want which will
151 be stored in the output dict.
154 The coalesced output dict.
156 >>> a = {'a': 1, 'b': 2}
157 >>> b = {'b': 1, 'c': 2, 'd': 3}
158 >>> c = {'c': 1, 'd': 2}
159 >>> coalesce([a, b, c])
160 {'a': 1, 'b': [1, 2], 'c': [1, 2], 'd': [2, 3]}
162 >>> coalesce([a, b, c], aggregation_function=coalesce_last_write_wins)
163 {'a': 1, 'b': 1, 'c': 1, 'd': 2}
165 >>> coalesce([a, b, c], aggregation_function=raise_on_duplicated_keys)
166 Traceback (most recent call last):
168 Exception: Key b is duplicated in more than one input dict.
174 value = aggregation_function(key, d[key], out[key])
181 def item_with_max_value(d: AnyDict) -> Tuple[Hashable, Any]:
184 d: a dict with comparable values
187 The key and value of the item with the highest value in a
188 dict as a `Tuple[key, value]`.
190 >>> d = {'a': 1, 'b': 2, 'c': 3}
191 >>> item_with_max_value(d)
193 >>> item_with_max_value({})
194 Traceback (most recent call last):
196 ValueError: max() arg is an empty sequence
199 return max(d.items(), key=lambda _: _[1])
202 def item_with_min_value(d: AnyDict) -> Tuple[Hashable, Any]:
205 d: a dict with comparable values
208 The key and value of the item with the lowest value in a
209 dict as a `Tuple[key, value]`.
211 >>> d = {'a': 1, 'b': 2, 'c': 3}
212 >>> item_with_min_value(d)
216 return min(d.items(), key=lambda _: _[1])
219 def key_with_max_value(d: AnyDict) -> Hashable:
222 d: a dict with comparable keys
225 The maximum key in the dict when comparing the keys with
228 .. note:: This code totally ignores values; it is comparing key
229 against key to find the maximum key in the keyspace.
231 >>> d = {'a': 1, 'b': 2, 'c': 3}
232 >>> key_with_max_value(d)
236 return item_with_max_value(d)[0]
239 def key_with_min_value(d: AnyDict) -> Hashable:
242 d: a dict with comparable keys
245 The minimum key in the dict when comparing the keys with
248 .. note:: This code totally ignores values; it is comparing key
249 against key to find the minimum key in the keyspace.
251 >>> d = {'a': 1, 'b': 2, 'c': 3}
252 >>> key_with_min_value(d)
256 return item_with_min_value(d)[0]
259 def max_value(d: AnyDict) -> Any:
262 d: a dict with compatable values
265 The maximum value in the dict *without its key*.
267 >>> d = {'a': 1, 'b': 2, 'c': 3}
271 return item_with_max_value(d)[1]
274 def min_value(d: AnyDict) -> Any:
277 d: a dict with comparable values
280 The minimum value in the dict *without its key*.
282 >>> d = {'a': 1, 'b': 2, 'c': 3}
286 return item_with_min_value(d)[1]
289 def max_key(d: DictWithComparableKeys) -> Comparable:
292 d: a dict with comparable keys
295 The maximum key in dict (ignoring values totally)
297 .. note:: This code totally ignores values; it is comparing key
298 against key to find the maximum key in the keyspace.
300 >>> d = {'a': 3, 'b': 2, 'c': 1}
307 def min_key(d: DictWithComparableKeys) -> Comparable:
310 d: a dict with comparable keys
313 The minimum key in dict (ignoring values totally)
315 .. note:: This code totally ignores values; it is comparing key
316 against key to find the minimum key in the keyspace.
318 >>> d = {'a': 3, 'b': 2, 'c': 1}
325 def parallel_lists_to_dict(keys: List[Hashable], values: List[Any]) -> AnyDict:
326 """Given two parallel lists (keys and values), create and return
330 keys: list containing keys and no duplicated keys
331 values: a parallel list (to keys) containing values
334 A dict composed of zipping the keys list and values list together.
336 >>> k = ['name', 'phone', 'address', 'zip']
337 >>> v = ['scott', '555-1212', '123 main st.', '12345']
338 >>> parallel_lists_to_dict(k, v)
339 {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'}
341 if len(keys) != len(values):
342 raise Exception("Parallel keys and values lists must have the same length")
343 return dict(zip(keys, values))
346 def dict_to_key_value_lists(d: AnyDict) -> Tuple[List[Hashable], List[Any]]:
347 """Given a dict, decompose it into a list of keys and values.
353 A tuple of two elements: the first is the keys list and the second
356 >>> d = {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'}
357 >>> (k, v) = dict_to_key_value_lists(d)
359 ['name', 'phone', 'address', 'zip']
361 ['scott', '555-1212', '123 main st.', '12345']
363 r: Tuple[List[Any], List[Any]] = ([], [])
364 for (k, v) in d.items():
370 dict_to_dataclass = dataclass_utils.dataclass_from_dict
372 dict_from_dataclass = dataclass_utils.dataclass_to_dict
375 if __name__ == '__main__':