X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=list_utils.py;h=91af8f9eb924fb7e7e04932d58a1bcb6eded0690;hb=e8fbbb7306430478dec55d2c963eed116d8330cc;hp=74f1cf3078457d371194deb33ddf5ad6410ed599;hpb=3bc4daf1edc121cd633429187392227f2fa61885;p=python_utils.git diff --git a/list_utils.py b/list_utils.py index 74f1cf3..91af8f9 100644 --- a/list_utils.py +++ b/list_utils.py @@ -1,23 +1,286 @@ #!/usr/bin/env python3 +"""Some useful(?) utilities for dealing with Lists.""" + +from collections import Counter from itertools import islice -from typing import Any, Iterator, List +from typing import Any, Iterator, List, Sequence, Tuple def shard(lst: List[Any], size: int) -> Iterator[Any]: - """Yield successive size-sized shards from lst.""" + """ + Yield successive size-sized shards from lst. + + >>> for sublist in shard([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 3): + ... [_ for _ in sublist] + [1, 2, 3] + [4, 5, 6] + [7, 8, 9] + [10, 11, 12] + + """ for x in range(0, len(lst), size): yield islice(lst, x, x + size) def flatten(lst: List[Any]) -> List[Any]: - """Flatten out a list: + """ + Flatten out a list: + + >>> flatten([ 1, [2, 3, 4, [5], 6], 7, [8, [9]]]) + [1, 2, 3, 4, 5, 6, 7, 8, 9] - >>> flatten([ 1, [2, 3, 4, [5], 6], 7, [8, [9]]]) - [1, 2, 3, 4, 5, 6, 7, 8, 9] """ if len(lst) == 0: return lst if isinstance(lst[0], list): return flatten(lst[0]) + flatten(lst[1:]) return lst[:1] + flatten(lst[1:]) + + +def prepend(item: Any, lst: List[Any]) -> List[Any]: + """ + Prepend an item to a list. + + >>> prepend('foo', ['bar', 'baz']) + ['foo', 'bar', 'baz'] + + """ + lst.insert(0, item) + return lst + + +def remove_list_if_one_element(lst: List[Any]) -> Any: + """ + Remove the list and return the 0th element iff its length is one. + + >>> remove_list_if_one_element([1234]) + 1234 + + >>> remove_list_if_one_element([1, 2, 3, 4]) + [1, 2, 3, 4] + + """ + if len(lst) == 1: + return lst[0] + else: + return lst + + +def population_counts(lst: Sequence[Any]) -> Counter: + """ + Return a population count mapping for the list (i.e. the keys are + list items and the values are the number of occurrances of that + list item in the original list. + + >>> population_counts([1, 1, 1, 2, 2, 3, 3, 3, 4]) + Counter({1: 3, 3: 3, 2: 2, 4: 1}) + + """ + return Counter(lst) + + +def most_common(lst: List[Any], *, count=1) -> Any: + + """ + Return the most common item in the list. In the case of ties, + which most common item is returned will be random. + + >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) + 3 + + >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4], count=2) + [3, 1] + + """ + p = population_counts(lst) + return remove_list_if_one_element([_[0] for _ in p.most_common()[0:count]]) + + +def least_common(lst: List[Any], *, count=1) -> Any: + """ + Return the least common item in the list. In the case of + ties, which least common item is returned will be random. + + >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4]) + 4 + + >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4], count=2) + [4, 2] + + """ + p = population_counts(lst) + mc = p.most_common()[-count:] + mc.reverse() + return remove_list_if_one_element([_[0] for _ in mc]) + + +def dedup_list(lst: List[Any]) -> List[Any]: + """ + Remove duplicates from the list performantly. + + >>> dedup_list([1, 2, 1, 3, 3, 4, 2, 3, 4, 5, 1]) + [1, 2, 3, 4, 5] + + """ + return list(set(lst)) + + +def uniq(lst: List[Any]) -> List[Any]: + """ + Alias for dedup_list. + """ + return dedup_list(lst) + + +def contains_duplicates(lst: List[Any]) -> bool: + """ + Does the list contian duplicate elements or not? + + >>> lst = [1, 2, 1, 3, 3, 4, 4, 5, 6, 1, 3, 4] + >>> contains_duplicates(lst) + True + + >>> contains_duplicates(dedup_list(lst)) + False + + """ + seen = set() + for _ in lst: + if _ in seen: + return True + seen.add(_) + return False + + +def all_unique(lst: List[Any]) -> bool: + """ + Inverted alias for contains_duplicates. + """ + return not contains_duplicates(lst) + + +def transpose(lst: List[Any]) -> List[Any]: + """ + Transpose a list of lists. + + >>> lst = [[1, 2], [3, 4], [5, 6]] + >>> transpose(lst) + [[1, 3, 5], [2, 4, 6]] + + """ + transposed = zip(*lst) + return [list(_) for _ in transposed] + + +def ngrams(lst: Sequence[Any], n): + """ + Return the ngrams in the sequence. + + >>> seq = 'encyclopedia' + >>> for _ in ngrams(seq, 3): + ... _ + 'enc' + 'ncy' + 'cyc' + 'ycl' + 'clo' + 'lop' + 'ope' + 'ped' + 'edi' + 'dia' + + >>> seq = ['this', 'is', 'an', 'awesome', 'test'] + >>> for _ in ngrams(seq, 3): + ... _ + ['this', 'is', 'an'] + ['is', 'an', 'awesome'] + ['an', 'awesome', 'test'] + """ + for i in range(len(lst) - n + 1): + yield lst[i : i + n] + + +def permute(seq: str): + """ + Returns all permutations of a sequence; takes O(N!) time. + + >>> for x in permute('cat'): + ... print(x) + cat + cta + act + atc + tca + tac + + """ + yield from _permute(seq, "") + + +def _permute(seq: str, path: str): + seq_len = len(seq) + if seq_len == 0: + yield path + + for i in range(seq_len): + car = seq[i] + left = seq[0:i] + right = seq[i + 1 :] + cdr = left + right + yield from _permute(cdr, path + car) + + +def binary_search(lst: Sequence[Any], target: Any, *, sanity_check=False) -> Tuple[bool, int]: + """Performs a binary search on lst (which must already be sorted). + Returns a Tuple composed of a bool which indicates whether the + target was found and an int which indicates the index closest to + target whether it was found or not. + + >>> a = [1, 4, 5, 6, 7, 9, 10, 11] + >>> binary_search(a, 4) + (True, 1) + + >>> binary_search(a, 12) + (False, 8) + + >>> binary_search(a, 3) + (False, 1) + + >>> binary_search(a, 2) + (False, 1) + + >>> a.append(9) + >>> binary_search(a, 4, sanity_check=True) + Traceback (most recent call last): + ... + AssertionError + + """ + if sanity_check: + last = None + for x in lst: + if last is not None: + assert x >= last # This asserts iff the list isn't sorted + last = x # in ascending order. + return _binary_search(lst, target, 0, len(lst) - 1) + + +def _binary_search(lst: Sequence[Any], target: Any, low: int, high: int) -> Tuple[bool, int]: + if high >= low: + mid = (high + low) // 2 + if lst[mid] == target: + return (True, mid) + elif lst[mid] > target: + return _binary_search(lst, target, low, mid - 1) + else: + return _binary_search(lst, target, mid + 1, high) + else: + return (False, low) + + +if __name__ == '__main__': + import doctest + + doctest.testmod()