3 # © Copyright 2021-2022, Scott Gasch
5 """Some useful(?) utilities for dealing with Lists."""
8 from collections import Counter
9 from itertools import chain, combinations, islice
10 from typing import Any, Iterator, List, MutableSequence, Sequence, Tuple
13 def shard(lst: List[Any], size: int) -> Iterator[Any]:
15 Yield successive size-sized shards from lst.
17 >>> for sublist in shard([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 3):
18 ... [_ for _ in sublist]
25 for x in range(0, len(lst), size):
26 yield islice(lst, x, x + size)
29 def flatten(lst: List[Any]) -> List[Any]:
33 >>> flatten([ 1, [2, 3, 4, [5], 6], 7, [8, [9]]])
34 [1, 2, 3, 4, 5, 6, 7, 8, 9]
39 if isinstance(lst[0], list):
40 return flatten(lst[0]) + flatten(lst[1:])
41 return lst[:1] + flatten(lst[1:])
44 def prepend(item: Any, lst: List[Any]) -> List[Any]:
46 Prepend an item to a list.
48 >>> prepend('foo', ['bar', 'baz'])
56 def remove_list_if_one_element(lst: List[Any]) -> Any:
58 Remove the list and return the 0th element iff its length is one.
60 >>> remove_list_if_one_element([1234])
63 >>> remove_list_if_one_element([1, 2, 3, 4])
73 def population_counts(lst: Sequence[Any]) -> Counter:
75 Return a population count mapping for the list (i.e. the keys are
76 list items and the values are the number of occurrances of that
77 list item in the original list.
79 >>> population_counts([1, 1, 1, 2, 2, 3, 3, 3, 4])
80 Counter({1: 3, 3: 3, 2: 2, 4: 1})
86 def most_common(lst: List[Any], *, count=1) -> Any:
89 Return the most common item in the list. In the case of ties,
90 which most common item is returned will be random.
92 >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
95 >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4], count=2)
99 p = population_counts(lst)
100 return remove_list_if_one_element([_[0] for _ in p.most_common()[0:count]])
103 def least_common(lst: List[Any], *, count=1) -> Any:
105 Return the least common item in the list. In the case of
106 ties, which least common item is returned will be random.
108 >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4])
111 >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4], count=2)
115 p = population_counts(lst)
116 mc = p.most_common()[-count:]
118 return remove_list_if_one_element([_[0] for _ in mc])
121 def dedup_list(lst: List[Any]) -> List[Any]:
123 Remove duplicates from the list performantly.
125 >>> dedup_list([1, 2, 1, 3, 3, 4, 2, 3, 4, 5, 1])
129 return list(set(lst))
132 def uniq(lst: List[Any]) -> List[Any]:
134 Alias for dedup_list.
136 return dedup_list(lst)
139 def contains_duplicates(lst: List[Any]) -> bool:
141 Does the list contian duplicate elements or not?
143 >>> lst = [1, 2, 1, 3, 3, 4, 4, 5, 6, 1, 3, 4]
144 >>> contains_duplicates(lst)
147 >>> contains_duplicates(dedup_list(lst))
159 def all_unique(lst: List[Any]) -> bool:
161 Inverted alias for contains_duplicates.
163 return not contains_duplicates(lst)
166 def transpose(lst: List[Any]) -> List[Any]:
168 Transpose a list of lists.
170 >>> lst = [[1, 2], [3, 4], [5, 6]]
172 [[1, 3, 5], [2, 4, 6]]
175 transposed = zip(*lst)
176 return [list(_) for _ in transposed]
179 def ngrams(lst: Sequence[Any], n):
181 Return the ngrams in the sequence.
183 >>> seq = 'encyclopedia'
184 >>> for _ in ngrams(seq, 3):
197 >>> seq = ['this', 'is', 'an', 'awesome', 'test']
198 >>> for _ in ngrams(seq, 3):
201 ['is', 'an', 'awesome']
202 ['an', 'awesome', 'test']
204 for i in range(len(lst) - n + 1):
208 def permute(seq: str):
210 Returns all permutations of a sequence; takes O(N!) time.
212 >>> for x in permute('cat'):
222 yield from _permute(seq, "")
225 def _permute(seq: str, path: str):
230 for i in range(seq_len):
235 yield from _permute(cdr, path + car)
238 def shuffle(seq: MutableSequence[Any]) -> MutableSequence[Any]:
239 """Shuffles a sequence into a random order.
242 >>> shuffle([1, 2, 3, 4, 5])
245 >>> shuffle('example')
249 if isinstance(seq, str):
250 from pyutils import string_utils
252 return string_utils.shuffle(seq)
258 def scramble(seq: MutableSequence[Any]) -> MutableSequence[Any]:
262 def binary_search(lst: Sequence[Any], target: Any) -> Tuple[bool, int]:
263 """Performs a binary search on lst (which must already be sorted).
264 Returns a Tuple composed of a bool which indicates whether the
265 target was found and an int which indicates the index closest to
266 target whether it was found or not.
268 >>> a = [1, 4, 5, 6, 7, 9, 10, 11]
269 >>> binary_search(a, 4)
272 >>> binary_search(a, 12)
275 >>> binary_search(a, 3)
278 >>> binary_search(a, 2)
282 >>> binary_search(a, 4)
283 Traceback (most recent call last):
292 assert x >= last # This asserts iff the list isn't sorted
293 last = x # in ascending order.
294 return _binary_search(lst, target, 0, len(lst) - 1)
298 lst: Sequence[Any], target: Any, low: int, high: int
299 ) -> Tuple[bool, int]:
301 mid = (high + low) // 2
302 if lst[mid] == target:
304 elif lst[mid] > target:
305 return _binary_search(lst, target, low, mid - 1)
307 return _binary_search(lst, target, mid + 1, high)
312 def powerset(lst: Sequence[Any]) -> Iterator[Sequence[Any]]:
313 """Returns the powerset of the items in the input sequence.
315 >>> for x in powerset([1, 2, 3]):
326 return chain.from_iterable(combinations(lst, r) for r in range(len(lst) + 1))
329 if __name__ == '__main__':