X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=list_utils.py;h=05512b564c2303372ff81660840bfd29dcba942b;hb=e5da1fa6bab9ea6bf4394facbb29f13cdc3daf9a;hp=533317eb6da71f44a4f8b3c338505c053c460c0b;hpb=c41e0e59446412511c5737cf5b6ba8f289e75e7e;p=python_utils.git diff --git a/list_utils.py b/list_utils.py index 533317e..05512b5 100644 --- a/list_utils.py +++ b/list_utils.py @@ -2,7 +2,7 @@ from collections import Counter from itertools import islice -from typing import Any, Iterator, List, Mapping +from typing import Any, Iterator, List, Mapping, Sequence def shard(lst: List[Any], size: int) -> Iterator[Any]: @@ -48,6 +48,23 @@ def prepend(item: Any, lst: List[Any]) -> List[Any]: return lst +def remove_list_if_one_element(lst: List[Any]) -> Any: + """ + Remove the list and return the 0th element iff its length is one. + + >>> remove_list_if_one_element([1234]) + 1234 + + >>> remove_list_if_one_element([1, 2, 3, 4]) + [1, 2, 3, 4] + + """ + if len(lst) == 1: + return lst[0] + else: + return lst + + def population_counts(lst: List[Any]) -> Mapping[Any, int]: """ Return a population count mapping for the list (i.e. the keys are @@ -61,29 +78,126 @@ def population_counts(lst: List[Any]) -> Mapping[Any, int]: return Counter(lst) -def most_common_item(lst: List[Any]) -> Any: +def most_common(lst: List[Any], *, count=1) -> Any: """ Return the most common item in the list. In the case of ties, which most common item is returned will be random. - >>> most_common_item([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) + >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) 3 + >>> most_common([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4], count=2) + [3, 1] + """ - return population_counts(lst).most_common(1)[0][0] + p = population_counts(lst) + return remove_list_if_one_element([_[0] for _ in p.most_common()[0:count]]) -def least_common_item(lst: List[Any]) -> Any: +def least_common(lst: List[Any], *, count=1) -> Any: """ Return the least common item in the list. In the case of ties, which least common item is returned will be random. - >>> least_common_item([1, 1, 1, 2, 2, 3, 3, 3, 4]) + >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4]) 4 + >>> least_common([1, 1, 1, 2, 2, 3, 3, 3, 4], count=2) + [4, 2] + + """ + p = population_counts(lst) + mc = p.most_common()[-count:] + mc.reverse() + return remove_list_if_one_element([_[0] for _ in mc]) + + +def dedup_list(lst: List[Any]) -> List[Any]: + """ + Remove duplicates from the list performantly. + + >>> dedup_list([1, 2, 1, 3, 3, 4, 2, 3, 4, 5, 1]) + [1, 2, 3, 4, 5] + + """ + return list(set(lst)) + + +def uniq(lst: List[Any]) -> List[Any]: + """ + Alias for dedup_list. + """ + return dedup_list(lst) + + +def contains_duplicates(lst: List[Any]) -> bool: + """ + Does the list contian duplicate elements or not? + + >>> lst = [1, 2, 1, 3, 3, 4, 4, 5, 6, 1, 3, 4] + >>> contains_duplicates(lst) + True + + >>> contains_duplicates(dedup_list(lst)) + False + + """ + seen = set() + for _ in lst: + if _ in seen: + return True + seen.add(_) + return False + + +def all_unique(lst: List[Any]) -> bool: + """ + Inverted alias for contains_duplicates. + """ + return not contains_duplicates(lst) + + +def transpose(lst: List[Any]) -> List[Any]: + """ + Transpose a list of lists. + + >>> lst = [[1, 2], [3, 4], [5, 6]] + >>> transpose(lst) + [[1, 3, 5], [2, 4, 6]] + + """ + transposed = zip(*lst) + return [list(_) for _ in transposed] + + +def ngrams(lst: Sequence[Any], n): + """ + Return the ngrams in the sequence. + + >>> seq = 'encyclopedia' + >>> for _ in ngrams(seq, 3): + ... _ + 'enc' + 'ncy' + 'cyc' + 'ycl' + 'clo' + 'lop' + 'ope' + 'ped' + 'edi' + 'dia' + + >>> seq = ['this', 'is', 'an', 'awesome', 'test'] + >>> for _ in ngrams(seq, 3): + ... _ + ['this', 'is', 'an'] + ['is', 'an', 'awesome'] + ['an', 'awesome', 'test'] """ - return population_counts(lst).most_common()[-1][0] + for i in range(len(lst) - n + 1): + yield lst[i:i + n] if __name__ == '__main__':