3 """Some useful(?) utilities for dealing with Lists."""
5 from collections import Counter
6 from itertools import islice
7 from typing import Any, Iterator, List, Sequence, Tuple
10 def shard(lst: List[Any], size: int) -> Iterator[Any]:
12 Yield successive size-sized shards from lst.
14 >>> for sublist in shard([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 3):
15 ... [_ for _ in sublist]
22 for x in range(0, len(lst), size):
23 yield islice(lst, x, x + size)
26 def flatten(lst: List[Any]) -> List[Any]:
30 >>> flatten([ 1, [2, 3, 4, [5], 6], 7, [8, [9]]])
31 [1, 2, 3, 4, 5, 6, 7, 8, 9]
36 if isinstance(lst[0], list):
37 return flatten(lst[0]) + flatten(lst[1:])
38 return lst[:1] + flatten(lst[1:])
41 def prepend(item: Any, lst: List[Any]) -> List[Any]:
43 Prepend an item to a list.
45 >>> prepend('foo', ['bar', 'baz'])
53 def remove_list_if_one_element(lst: List[Any]) -> Any:
55 Remove the list and return the 0th element iff its length is one.
57 >>> remove_list_if_one_element([1234])
60 >>> remove_list_if_one_element([1, 2, 3, 4])
70 def population_counts(lst: Sequence[Any]) -> Counter:
72 Return a population count mapping for the list (i.e. the keys are
73 list items and the values are the number of occurrances of that
74 list item in the original list.
76 >>> population_counts([1, 1, 1, 2, 2, 3, 3, 3, 4])
77 Counter({1: 3, 3: 3, 2: 2, 4: 1})
83 def most_common(lst: List[Any], *, count=1) -> Any:
86 Return the most common item in the list. In the case of ties,
87 which most common item is returned will be random.
89 >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4])
92 >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4], count=2)
96 p = population_counts(lst)
97 return remove_list_if_one_element([_[0] for _ in p.most_common()[0:count]])
100 def least_common(lst: List[Any], *, count=1) -> Any:
102 Return the least common item in the list. In the case of
103 ties, which least common item is returned will be random.
105 >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4])
108 >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4], count=2)
112 p = population_counts(lst)
113 mc = p.most_common()[-count:]
115 return remove_list_if_one_element([_[0] for _ in mc])
118 def dedup_list(lst: List[Any]) -> List[Any]:
120 Remove duplicates from the list performantly.
122 >>> dedup_list([1, 2, 1, 3, 3, 4, 2, 3, 4, 5, 1])
126 return list(set(lst))
129 def uniq(lst: List[Any]) -> List[Any]:
131 Alias for dedup_list.
133 return dedup_list(lst)
136 def contains_duplicates(lst: List[Any]) -> bool:
138 Does the list contian duplicate elements or not?
140 >>> lst = [1, 2, 1, 3, 3, 4, 4, 5, 6, 1, 3, 4]
141 >>> contains_duplicates(lst)
144 >>> contains_duplicates(dedup_list(lst))
156 def all_unique(lst: List[Any]) -> bool:
158 Inverted alias for contains_duplicates.
160 return not contains_duplicates(lst)
163 def transpose(lst: List[Any]) -> List[Any]:
165 Transpose a list of lists.
167 >>> lst = [[1, 2], [3, 4], [5, 6]]
169 [[1, 3, 5], [2, 4, 6]]
172 transposed = zip(*lst)
173 return [list(_) for _ in transposed]
176 def ngrams(lst: Sequence[Any], n):
178 Return the ngrams in the sequence.
180 >>> seq = 'encyclopedia'
181 >>> for _ in ngrams(seq, 3):
194 >>> seq = ['this', 'is', 'an', 'awesome', 'test']
195 >>> for _ in ngrams(seq, 3):
198 ['is', 'an', 'awesome']
199 ['an', 'awesome', 'test']
201 for i in range(len(lst) - n + 1):
205 def permute(seq: str):
207 Returns all permutations of a sequence; takes O(N!) time.
209 >>> for x in permute('cat'):
219 yield from _permute(seq, "")
222 def _permute(seq: str, path: str):
227 for i in range(seq_len):
232 yield from _permute(cdr, path + car)
235 def binary_search(lst: Sequence[Any], target: Any, *, sanity_check=False) -> Tuple[bool, int]:
236 """Performs a binary search on lst (which must already be sorted).
237 Returns a Tuple composed of a bool which indicates whether the
238 target was found and an int which indicates the index closest to
239 target whether it was found or not.
241 >>> a = [1, 4, 5, 6, 7, 9, 10, 11]
242 >>> binary_search(a, 4)
245 >>> binary_search(a, 12)
248 >>> binary_search(a, 3)
251 >>> binary_search(a, 2)
255 >>> binary_search(a, 4, sanity_check=True)
256 Traceback (most recent call last):
265 assert x >= last # This asserts iff the list isn't sorted
266 last = x # in ascending order.
267 return _binary_search(lst, target, 0, len(lst) - 1)
270 def _binary_search(lst: Sequence[Any], target: Any, low: int, high: int) -> Tuple[bool, int]:
272 mid = (high + low) // 2
273 if lst[mid] == target:
275 elif lst[mid] > target:
276 return _binary_search(lst, target, low, mid - 1)
278 return _binary_search(lst, target, mid + 1, high)
283 if __name__ == '__main__':