3 # © Copyright 2021-2022, Scott Gasch
5 """This module contains helper functions for dealing with Python dictionaries."""
7 from itertools import islice
8 from typing import Any, Callable, Dict, Iterator, List, Tuple
16 inc_function: Callable[..., Any] = lambda x: x + 1,
19 Initialize a dict value (if it doesn't exist) or increments it (using the
20 inc_function, which is customizable) if it already does exist.
23 d: the dict to increment or initialize a value in
24 key: the key to increment or initialize
25 init_value: default initial value
26 inc_function: Callable use to increment a value
29 True if the key already existed or False otherwise
31 See also: :py:class:`collections.defaultdict` and
32 :py:class:`collections.Counter`.
35 >>> init_or_inc(d, "test")
37 >>> init_or_inc(d, "test")
39 >>> init_or_inc(d, 'ing')
45 d[key] = inc_function(d[key])
51 def shard(d: Dict[Any, Any], size: int) -> Iterator[Dict[Any, Any]]:
53 Shards (i.e. splits) a dict into N subdicts which, together,
54 contain all keys/values from the original unsharded dict.
57 d: the input dict to be sharded (split)
58 size: the ideal shard size (number of elements per shard)
61 A generator that yields subsequent shards.
65 If `len(d)` is not an even multiple of `size` then the last
66 shard will not have `size` items in it. It will have
67 `len(d) % size` items instead.
70 ... 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6,
71 ... 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12,
73 >>> for r in shard(d, 5):
75 {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
76 {'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10}
80 for x in range(0, len(d), size):
81 yield dict(islice(items, x, x + size))
84 def coalesce_by_creating_list(_, new_value, old_value):
85 """Helper for use with :meth:`coalesce` that creates a list on
87 from pyutils.list_utils import flatten
89 return flatten([new_value, old_value])
92 def coalesce_by_creating_set(key, new_value, old_value):
93 """Helper for use with :meth:`coalesce` that creates a set on
95 return set(coalesce_by_creating_list(key, new_value, old_value))
98 def coalesce_last_write_wins(_, new_value, discarded_old_value):
99 """Helper for use with :meth:`coalsce` that klobbers the old
100 with the new one on collision."""
104 def coalesce_first_write_wins(_, discarded_new_value, old_value):
105 """Helper for use with :meth:`coalsce` that preserves the old
106 value and discards the new one on collision."""
110 def raise_on_duplicated_keys(key, new_value, old_value):
111 """Helper for use with :meth:`coalesce` that raises an exception
112 when a collision is detected.
114 raise Exception(f'Key {key} is duplicated in more than one input dict.')
118 inputs: Iterator[Dict[Any, Any]],
120 aggregation_function: Callable[[Any, Any, Any], Any] = coalesce_by_creating_list,
122 """Coalesce (i.e. combine) N input dicts into one output dict
123 ontaining the union of all keys / values in every input dict.
124 When keys collide, apply the aggregation_function which, by
125 default, creates a list of values with the same key in the output
129 inputs: an iterable set of dicts to coalesce
130 aggregation_function: a Callable to deal with key collisions; one of
131 the below functions already defined or your own strategy:
133 * :meth:`coalesce_by_creating_list` creates a list of values
134 with the same key in the output dict.
135 * :meth:`coalesce_by_creating_set` creates a set of values with
136 the same key in the output dict.
137 * :meth:`coalesce_first_write_wins` only preserves the first
138 value with a duplicated key. Others are dropped silently.
139 * :meth:`coalesce_last_write_wins` only preserves the last
140 value with a duplicated key. Others are dropped silently.
141 * :meth:`raise_on_duplicated_keys` raises an Exception on
142 duplicated keys; use when keys should never collide.
143 * Your own strategy; Callables will be passed the key and
144 two values and can return whatever they want which will
145 be stored in the output dict.
148 The coalesced output dict.
150 >>> a = {'a': 1, 'b': 2}
151 >>> b = {'b': 1, 'c': 2, 'd': 3}
152 >>> c = {'c': 1, 'd': 2}
153 >>> coalesce([a, b, c])
154 {'a': 1, 'b': [1, 2], 'c': [1, 2], 'd': [2, 3]}
156 >>> coalesce([a, b, c], aggregation_function=coalesce_last_write_wins)
157 {'a': 1, 'b': 1, 'c': 1, 'd': 2}
159 >>> coalesce([a, b, c], aggregation_function=raise_on_duplicated_keys)
160 Traceback (most recent call last):
162 Exception: Key b is duplicated in more than one input dict.
164 out: Dict[Any, Any] = {}
168 value = aggregation_function(key, d[key], out[key])
175 def item_with_max_value(d: Dict[Any, Any]) -> Tuple[Any, Any]:
178 d: a dict with comparable values
181 The key and value of the item with the highest value in a
182 dict as a `Tuple[key, value]`.
184 >>> d = {'a': 1, 'b': 2, 'c': 3}
185 >>> item_with_max_value(d)
187 >>> item_with_max_value({})
188 Traceback (most recent call last):
190 ValueError: max() arg is an empty sequence
193 return max(d.items(), key=lambda _: _[1])
196 def item_with_min_value(d: Dict[Any, Any]) -> Tuple[Any, Any]:
199 d: a dict with comparable values
202 The key and value of the item with the lowest value in a
203 dict as a `Tuple[key, value]`.
205 >>> d = {'a': 1, 'b': 2, 'c': 3}
206 >>> item_with_min_value(d)
210 return min(d.items(), key=lambda _: _[1])
213 def key_with_max_value(d: Dict[Any, Any]) -> Any:
216 d: a dict with comparable keys
219 The maximum key in the dict when comparing the keys with
222 .. note:: This code totally ignores values; it is comparing key
223 against key to find the maximum key in the keyspace.
225 >>> d = {'a': 1, 'b': 2, 'c': 3}
226 >>> key_with_max_value(d)
230 return item_with_max_value(d)[0]
233 def key_with_min_value(d: Dict[Any, Any]) -> Any:
236 d: a dict with comparable keys
239 The minimum key in the dict when comparing the keys with
242 .. note:: This code totally ignores values; it is comparing key
243 against key to find the minimum key in the keyspace.
245 >>> d = {'a': 1, 'b': 2, 'c': 3}
246 >>> key_with_min_value(d)
250 return item_with_min_value(d)[0]
253 def max_value(d: Dict[Any, Any]) -> Any:
256 d: a dict with compatable values
259 The maximum value in the dict *without its key*.
261 >>> d = {'a': 1, 'b': 2, 'c': 3}
265 return item_with_max_value(d)[1]
268 def min_value(d: Dict[Any, Any]) -> Any:
271 d: a dict with comparable values
274 The minimum value in the dict *without its key*.
276 >>> d = {'a': 1, 'b': 2, 'c': 3}
280 return item_with_min_value(d)[1]
283 def max_key(d: Dict[Any, Any]) -> Any:
286 d: a dict with comparable keys
289 The maximum key in dict (ignoring values totally)
291 .. note:: This code totally ignores values; it is comparing key
292 against key to find the maximum key in the keyspace.
294 >>> d = {'a': 3, 'b': 2, 'c': 1}
301 def min_key(d: Dict[Any, Any]) -> Any:
304 d: a dict with comparable keys
307 The minimum key in dict (ignoring values totally)
309 .. note:: This code totally ignores values; it is comparing key
310 against key to find the minimum key in the keyspace.
312 >>> d = {'a': 3, 'b': 2, 'c': 1}
319 def parallel_lists_to_dict(keys: List[Any], values: List[Any]) -> Dict[Any, Any]:
320 """Given two parallel lists (keys and values), create and return
324 keys: list containing keys and no duplicated keys
325 values: a parallel list (to keys) containing values
328 A dict composed of zipping the keys list and values list together.
330 >>> k = ['name', 'phone', 'address', 'zip']
331 >>> v = ['scott', '555-1212', '123 main st.', '12345']
332 >>> parallel_lists_to_dict(k, v)
333 {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'}
335 if len(keys) != len(values):
336 raise Exception("Parallel keys and values lists must have the same length")
337 return dict(zip(keys, values))
340 def dict_to_key_value_lists(d: Dict[Any, Any]) -> Tuple[List[Any], List[Any]]:
341 """Given a dict, decompose it into a list of keys and values.
347 A tuple of two elements: the first is the keys list and the second
350 >>> d = {'name': 'scott', 'phone': '555-1212', 'address': '123 main st.', 'zip': '12345'}
351 >>> (k, v) = dict_to_key_value_lists(d)
353 ['name', 'phone', 'address', 'zip']
355 ['scott', '555-1212', '123 main st.', '12345']
357 r: Tuple[List[Any], List[Any]] = ([], [])
358 for (k, v) in d.items():
364 if __name__ == '__main__':