#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""The MIT License (MIT)

Copyright (c) 2016-2020 Davide Zanotti
Modifications Copyright (c) 2021-2022 Scott Gasch

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

This class is based on: https://github.com/daveoncode/python-string-utils.
"""

import base64
import contextlib  # type: ignore
import datetime
import io
import json
import logging
import numbers
import random
import re
import string
import unicodedata
import warnings
from itertools import zip_longest
from typing import (
    Any,
    Callable,
    Dict,
    Iterable,
    List,
    Literal,
    Optional,
    Sequence,
    Tuple,
)
from uuid import uuid4

import list_utils

logger = logging.getLogger(__name__)

NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")

HEX_NUMBER_RE = re.compile(r"^([+|-]?)0[x|X]([0-9A-Fa-f]+)$")

OCT_NUMBER_RE = re.compile(r"^([+|-]?)0[O|o]([0-7]+)$")

BIN_NUMBER_RE = re.compile(r"^([+|-]?)0[B|b]([0|1]+)$")

URLS_RAW_STRING = (
    r"([a-z-]+://)"  # scheme
    r"([a-z_\d-]+:[a-z_\d-]+@)?"  # user:password
    r"(www\.)?"  # www.
    r"((?<!\.)[a-z\d]+[a-z\d.-]+\.[a-z]{2,6}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|localhost)"  # domain
    r"(:\d{2,})?"  # port number
    r"(/[a-z\d_%+-]*)*"  # folders
    r"(\.[a-z\d_%+-]+)*"  # file extension
    r"(\?[a-z\d_+%-=]*)?"  # query string
    r"(#\S*)?"  # hash
)

URL_RE = re.compile(r"^{}$".format(URLS_RAW_STRING), re.IGNORECASE)

URLS_RE = re.compile(r"({})".format(URLS_RAW_STRING), re.IGNORECASE)

ESCAPED_AT_SIGN = re.compile(r'(?!"[^"]*)@+(?=[^"]*")|\\@')

EMAILS_RAW_STRING = r"[a-zA-Z\d._\+\-'`!%#$&*/=\?\^\{\}\|~\\]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}"

EMAIL_RE = re.compile(r"^{}$".format(EMAILS_RAW_STRING))

EMAILS_RE = re.compile(r"({})".format(EMAILS_RAW_STRING))

CAMEL_CASE_TEST_RE = re.compile(r"^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$")

CAMEL_CASE_REPLACE_RE = re.compile(r"([a-z]|[A-Z]+)(?=[A-Z])")

SNAKE_CASE_TEST_RE = re.compile(r"^([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)$", re.IGNORECASE)

SNAKE_CASE_TEST_DASH_RE = re.compile(r"([a-z]+\d*-[a-z\d-]*|-+[a-z\d]+[a-z\d-]*)$", re.IGNORECASE)

SNAKE_CASE_REPLACE_RE = re.compile(r"(_)([a-z\d])")

SNAKE_CASE_REPLACE_DASH_RE = re.compile(r"(-)([a-z\d])")

CREDIT_CARDS = {
    "VISA": re.compile(r"^4\d{12}(?:\d{3})?$"),
    "MASTERCARD": re.compile(r"^5[1-5]\d{14}$"),
    "AMERICAN_EXPRESS": re.compile(r"^3[47]\d{13}$"),
    "DINERS_CLUB": re.compile(r"^3(?:0[0-5]|[68]\d)\d{11}$"),
    "DISCOVER": re.compile(r"^6(?:011|5\d{2})\d{12}$"),
    "JCB": re.compile(r"^(?:2131|1800|35\d{3})\d{11}$"),
}

JSON_WRAPPER_RE = re.compile(r"^\s*[\[{]\s*(.*)\s*[\}\]]\s*$", re.MULTILINE | re.DOTALL)

UUID_RE = re.compile(r"^[a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12}$", re.IGNORECASE)

UUID_HEX_OK_RE = re.compile(
    r"^[a-f\d]{8}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{12}$",
    re.IGNORECASE,
)

SHALLOW_IP_V4_RE = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")

ANYWHERE_IP_V4_RE = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")

IP_V6_RE = re.compile(r"^([a-z\d]{0,4}:){7}[a-z\d]{0,4}$", re.IGNORECASE)

ANYWHERE_IP_V6_RE = re.compile(r"([a-z\d]{0,4}:){7}[a-z\d]{0,4}", re.IGNORECASE)

MAC_ADDRESS_RE = re.compile(r"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})$", re.IGNORECASE)

ANYWHERE_MAC_ADDRESS_RE = re.compile(r"([0-9A-F]{2}[:-]){5}([0-9A-F]{2})", re.IGNORECASE)

WORDS_COUNT_RE = re.compile(r"\W*[^\W_]+\W*", re.IGNORECASE | re.MULTILINE | re.UNICODE)

HTML_RE = re.compile(
    r"((<([a-z]+:)?[a-z]+[^>]*/?>)(.*?(</([a-z]+:)?[a-z]+>))?|<!--.*-->|<!doctype.*>)",
    re.IGNORECASE | re.MULTILINE | re.DOTALL,
)

HTML_TAG_ONLY_RE = re.compile(
    r"(<([a-z]+:)?[a-z]+[^>]*/?>|</([a-z]+:)?[a-z]+>|<!--.*-->|<!doctype.*>)",
    re.IGNORECASE | re.MULTILINE | re.DOTALL,
)

SPACES_RE = re.compile(r"\s")

NO_LETTERS_OR_NUMBERS_RE = re.compile(r"[^\w\d]+|_+", re.IGNORECASE | re.UNICODE)

MARGIN_RE = re.compile(r"^[^\S\r\n]+")

ESCAPE_SEQUENCE_RE = re.compile(r"\[[^A-Za-z]*[A-Za-z]")

NUM_SUFFIXES = {
    "Pb": (1024**5),
    "P": (1024**5),
    "Tb": (1024**4),
    "T": (1024**4),
    "Gb": (1024**3),
    "G": (1024**3),
    "Mb": (1024**2),
    "M": (1024**2),
    "Kb": (1024**1),
    "K": (1024**1),
}


def is_none_or_empty(in_str: Optional[str]) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the input string is either None or an empty string,
        False otherwise.

    >>> is_none_or_empty("")
    True
    >>> is_none_or_empty(None)
    True
    >>> is_none_or_empty("   \t   ")
    True
    >>> is_none_or_empty('Test')
    False
    """
    return in_str is None or len(in_str.strip()) == 0


def is_string(obj: Any) -> bool:
    """
    Args:
        in_str: the object to test

    Returns:
        True if the object is a string and False otherwise.

    >>> is_string('test')
    True
    >>> is_string(123)
    False
    >>> is_string(100.3)
    False
    >>> is_string([1, 2, 3])
    False
    """
    return isinstance(obj, str)


def is_empty_string(in_str: Any) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the string is empty and False otherwise.
    """
    return is_empty(in_str)


def is_empty(in_str: Any) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the string is empty and false otherwise.

    >>> is_empty('')
    True
    >>> is_empty('    \t\t    ')
    True
    >>> is_empty('test')
    False
    >>> is_empty(100.88)
    False
    >>> is_empty([1, 2, 3])
    False
    """
    return is_string(in_str) and in_str.strip() == ""


def is_full_string(in_str: Any) -> bool:
    """
    Args:
        in_str: the object to test

    Returns:
        True if the object is a string and is not empty ('') and
        is not only composed of whitespace.

    >>> is_full_string('test!')
    True
    >>> is_full_string('')
    False
    >>> is_full_string('      ')
    False
    >>> is_full_string(100.999)
    False
    >>> is_full_string({"a": 1, "b": 2})
    False
    """
    return is_string(in_str) and in_str.strip() != ""


def is_number(in_str: str) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the string contains a valid numberic value and
        False otherwise.

    >>> is_number(100.5)
    Traceback (most recent call last):
    ...
    ValueError: 100.5
    >>> is_number("100.5")
    True
    >>> is_number("test")
    False
    >>> is_number("99")
    True
    >>> is_number([1, 2, 3])
    Traceback (most recent call last):
    ...
    ValueError: [1, 2, 3]
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    return NUMBER_RE.match(in_str) is not None


def is_integer_number(in_str: str) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the string contains a valid (signed or unsigned,
        decimal, hex, or octal, regular or scientific) integral
        expression and False otherwise.

    >>> is_integer_number('42')
    True
    >>> is_integer_number('42.0')
    False
    """
    return (
        (is_number(in_str) and "." not in in_str)
        or is_hexidecimal_integer_number(in_str)
        or is_octal_integer_number(in_str)
        or is_binary_integer_number(in_str)
    )


def is_hexidecimal_integer_number(in_str: str) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the string is a hex integer number and False otherwise.

    >>> is_hexidecimal_integer_number('0x12345')
    True
    >>> is_hexidecimal_integer_number('0x1A3E')
    True
    >>> is_hexidecimal_integer_number('1234')  # Needs 0x
    False
    >>> is_hexidecimal_integer_number('-0xff')
    True
    >>> is_hexidecimal_integer_number('test')
    False
    >>> is_hexidecimal_integer_number(12345)  # Not a string
    Traceback (most recent call last):
    ...
    ValueError: 12345
    >>> is_hexidecimal_integer_number(101.4)
    Traceback (most recent call last):
    ...
    ValueError: 101.4
    >>> is_hexidecimal_integer_number(0x1A3E)
    Traceback (most recent call last):
    ...
    ValueError: 6718
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    return HEX_NUMBER_RE.match(in_str) is not None


def is_octal_integer_number(in_str: str) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the string is a valid octal integral number and False otherwise.

    >>> is_octal_integer_number('0o777')
    True
    >>> is_octal_integer_number('-0O115')
    True
    >>> is_octal_integer_number('0xFF')  # Not octal, needs 0o
    False
    >>> is_octal_integer_number('7777')  # Needs 0o
    False
    >>> is_octal_integer_number('test')
    False
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    return OCT_NUMBER_RE.match(in_str) is not None


def is_binary_integer_number(in_str: str) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the string contains a binary integral number and False otherwise.

    >>> is_binary_integer_number('0b10111')
    True
    >>> is_binary_integer_number('-0b111')
    True
    >>> is_binary_integer_number('0B10101')
    True
    >>> is_binary_integer_number('0b10102')
    False
    >>> is_binary_integer_number('0xFFF')
    False
    >>> is_binary_integer_number('test')
    False
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    return BIN_NUMBER_RE.match(in_str) is not None


def to_int(in_str: str) -> int:
    """
    Args:
        in_str: the string to convert

    Returns:
        The integral value of the string or raises on error.

    >>> to_int('1234')
    1234
    >>> to_int('test')
    Traceback (most recent call last):
    ...
    ValueError: invalid literal for int() with base 10: 'test'
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    if is_binary_integer_number(in_str):
        return int(in_str, 2)
    if is_octal_integer_number(in_str):
        return int(in_str, 8)
    if is_hexidecimal_integer_number(in_str):
        return int(in_str, 16)
    return int(in_str)


def is_decimal_number(in_str: str) -> bool:
    """
    Args:
        in_str: the string to check

    Returns:
        True if the given string represents a decimal or False
        otherwise.  A decimal may be signed or unsigned or use
        a "scientific notation".

    .. note::
        We do not consider integers without a decimal point
        to be decimals; they return False (see example).

    >>> is_decimal_number('42.0')
    True
    >>> is_decimal_number('42')
    False
    """
    return is_number(in_str) and "." in in_str


def strip_escape_sequences(in_str: str) -> str:
    """
    Args:
        in_str: the string to strip of escape sequences.

    Returns:
        in_str with escape sequences removed.

    .. note::
        What is considered to be an "escape sequence" is defined
        by a regular expression.  While this gets common ones,
        there may exist valid sequences that it doesn't match.

    >>> strip_escape_sequences('[12;11;22mthis is a test!')
    'this is a test!'
    """
    in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
    return in_str


def add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str:
    """
    Args:
        in_str: string or number to which to add thousands separator(s)
        separator_char: the separator character to add (defaults to comma)
        places: add a separator every N places (defaults to three)

    Returns:
        A numeric string with thousands separators added appropriately.

    >>> add_thousands_separator('12345678')
    '12,345,678'
    >>> add_thousands_separator(12345678)
    '12,345,678'
    >>> add_thousands_separator(12345678.99)
    '12,345,678.99'
    >>> add_thousands_separator('test')
    Traceback (most recent call last):
    ...
    ValueError: test

    """
    if isinstance(in_str, numbers.Number):
        in_str = f'{in_str}'
    if is_number(in_str):
        return _add_thousands_separator(in_str, separator_char=separator_char, places=places)
    raise ValueError(in_str)


def _add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str:
    decimal_part = ""
    if '.' in in_str:
        (in_str, decimal_part) = in_str.split('.')
    tmp = [iter(in_str[::-1])] * places
    ret = separator_char.join("".join(x) for x in zip_longest(*tmp, fillvalue=""))[::-1]
    if len(decimal_part) > 0:
        ret += '.'
        ret += decimal_part
    return ret


def is_url(in_str: Any, allowed_schemes: Optional[List[str]] = None) -> bool:
    """
    Args:
        in_str: the string to test
        allowed_schemes: an optional list of allowed schemes (e.g.
            ['http', 'https', 'ftp'].  If passed, only URLs that
            begin with the one of the schemes passed will be considered
            to be valid.  Otherwise, any scheme:// will be considered
            valid.

    Returns:
        True if in_str contains a valid URL and False otherwise.

    >>> is_url('http://www.mysite.com')
    True
    >>> is_url('https://mysite.com')
    True
    >>> is_url('.mysite.com')
    False
    >>> is_url('scheme://username:password@www.domain.com:8042/folder/subfolder/file.extension?param=value&param2=value2#hash')
    True
    """
    if not is_full_string(in_str):
        return False

    valid = URL_RE.match(in_str) is not None

    if allowed_schemes:
        return valid and any([in_str.startswith(s) for s in allowed_schemes])
    return valid


def is_email(in_str: Any) -> bool:
    """
    Args:
        in_str: the email address to check

    Returns: True if the in_str contains a valid email (as defined by
        https://tools.ietf.org/html/rfc3696#section-3) or False
        otherwise.

    >>> is_email('my.email@the-provider.com')
    True
    >>> is_email('@gmail.com')
    False
    """
    if not is_full_string(in_str) or len(in_str) > 320 or in_str.startswith("."):
        return False

    try:
        # we expect 2 tokens, one before "@" and one after, otherwise
        # we have an exception and the email is not valid.
        head, tail = in_str.split("@")

        # head's size must be <= 64, tail <= 255, head must not start
        # with a dot or contain multiple consecutive dots.
        if len(head) > 64 or len(tail) > 255 or head.endswith(".") or (".." in head):
            return False

        # removes escaped spaces, so that later on the test regex will
        # accept the string.
        head = head.replace("\\ ", "")
        if head.startswith('"') and head.endswith('"'):
            head = head.replace(" ", "")[1:-1]
        return EMAIL_RE.match(head + "@" + tail) is not None

    except ValueError:
        # borderline case in which we have multiple "@" signs but the
        # head part is correctly escaped.
        if ESCAPED_AT_SIGN.search(in_str) is not None:
            # replace "@" with "a" in the head
            return is_email(ESCAPED_AT_SIGN.sub("a", in_str))
        return False


def suffix_string_to_number(in_str: str) -> Optional[int]:
    """Takes a string like "33Gb" and converts it into a number (of bytes)
    like 34603008.

    Args:
        in_str: the string with a suffix to be interpreted and removed.

    Returns:
        An integer number of bytes or None to indicate an error.

    >>> suffix_string_to_number('1Mb')
    1048576
    >>> suffix_string_to_number('13.1Gb')
    14066017894
    """

    def suffix_capitalize(s: str) -> str:
        if len(s) == 1:
            return s.upper()
        elif len(s) == 2:
            return f"{s[0].upper()}{s[1].lower()}"
        return suffix_capitalize(s[0:1])

    if is_string(in_str):
        if is_integer_number(in_str):
            return to_int(in_str)
        suffixes = [in_str[-2:], in_str[-1:]]
        rest = [in_str[:-2], in_str[:-1]]
        for x in range(len(suffixes)):
            s = suffixes[x]
            s = suffix_capitalize(s)
            multiplier = NUM_SUFFIXES.get(s, None)
            if multiplier is not None:
                r = rest[x]
                if is_integer_number(r):
                    return to_int(r) * multiplier
                if is_decimal_number(r):
                    return int(float(r) * multiplier)
    return None


def number_to_suffix_string(num: int) -> Optional[str]:
    """Take a number (of bytes) and returns a string like "43.8Gb".

    Args:
        num: an integer number of bytes

    Returns:
        A string with a suffix representing num bytes concisely or
        None to indicate an error.

    >>> number_to_suffix_string(14066017894)
    '13.1Gb'
    >>> number_to_suffix_string(1024 * 1024)
    '1.0Mb'
    """
    d = 0.0
    suffix = None
    for (sfx, size) in NUM_SUFFIXES.items():
        if num >= size:
            d = num / size
            suffix = sfx
            break
    if suffix is not None:
        return f"{d:.1f}{suffix}"
    else:
        return f'{num:d}'


def is_credit_card(in_str: Any, card_type: str = None) -> bool:
    """
    Args:
        in_str: a string to check
        card_type: if provided, contains the card type to validate
            with.  Otherwise, all known credit card number types will
            be accepted.

            Supported card types are the following:

            * VISA
            * MASTERCARD
            * AMERICAN_EXPRESS
            * DINERS_CLUB
            * DISCOVER
            * JCB

    Returns:
        True if in_str is a valid credit card number.
    """
    if not is_full_string(in_str):
        return False

    if card_type is not None:
        if card_type not in CREDIT_CARDS:
            raise KeyError(
                f'Invalid card type "{card_type}". Valid types are: {CREDIT_CARDS.keys()}'
            )
        return CREDIT_CARDS[card_type].match(in_str) is not None
    for c in CREDIT_CARDS:
        if CREDIT_CARDS[c].match(in_str) is not None:
            return True
    return False


def is_camel_case(in_str: Any) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the string is formatted as camel case and False otherwise.
        A string is considered camel case when:

        * it's composed only by letters ([a-zA-Z]) and optionally numbers ([0-9])
        * it contains both lowercase and uppercase letters
        * it does not start with a number
    """
    return is_full_string(in_str) and CAMEL_CASE_TEST_RE.match(in_str) is not None


def is_snake_case(in_str: Any, *, separator: str = "_") -> bool:
    """
    Args:
        in_str: the string to test

    Returns: True if the string is snake case and False otherwise.  A
        string is considered snake case when:

        * it's composed only by lowercase/uppercase letters and digits
        * it contains at least one underscore (or provided separator)
        * it does not start with a number

    >>> is_snake_case('this_is_a_test')
    True
    >>> is_snake_case('___This_Is_A_Test_1_2_3___')
    True
    >>> is_snake_case('this-is-a-test')
    False
    >>> is_snake_case('this-is-a-test', separator='-')
    True
    """
    if is_full_string(in_str):
        re_map = {"_": SNAKE_CASE_TEST_RE, "-": SNAKE_CASE_TEST_DASH_RE}
        re_template = r"([a-z]+\d*{sign}[a-z\d{sign}]*|{sign}+[a-z\d]+[a-z\d{sign}]*)"
        r = re_map.get(
            separator,
            re.compile(re_template.format(sign=re.escape(separator)), re.IGNORECASE),
        )
        return r.match(in_str) is not None
    return False


def is_json(in_str: Any) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the in_str contains valid JSON and False otherwise.

    >>> is_json('{"name": "Peter"}')
    True
    >>> is_json('[1, 2, 3]')
    True
    >>> is_json('{nope}')
    False
    """
    if is_full_string(in_str) and JSON_WRAPPER_RE.match(in_str) is not None:
        try:
            return isinstance(json.loads(in_str), (dict, list))
        except (TypeError, ValueError, OverflowError):
            pass
    return False


def is_uuid(in_str: Any, allow_hex: bool = False) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if the in_str contains a valid UUID and False otherwise.

    >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf')
    True
    >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf')
    False
    >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf', allow_hex=True)
    True
    """
    # string casting is used to allow UUID itself as input data type
    s = str(in_str)
    if allow_hex:
        return UUID_HEX_OK_RE.match(s) is not None
    return UUID_RE.match(s) is not None


def is_ip_v4(in_str: Any) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if in_str contains a valid IPv4 address and False otherwise.

    >>> is_ip_v4('255.200.100.75')
    True
    >>> is_ip_v4('nope')
    False
    >>> is_ip_v4('255.200.100.999')  # 999 out of range
    False
    """
    if not is_full_string(in_str) or SHALLOW_IP_V4_RE.match(in_str) is None:
        return False

    # checks that each entry in the ip is in the valid range (0 to 255)
    for token in in_str.split("."):
        if not 0 <= int(token) <= 255:
            return False
    return True


def extract_ip_v4(in_str: Any) -> Optional[str]:
    """
    Args:
        in_str: the string to extract an IPv4 address from.

    Returns:
        The first extracted IPv4 address from in_str or None if
        none were found or an error occurred.

    >>> extract_ip_v4('   The secret IP address: 127.0.0.1 (use it wisely)   ')
    '127.0.0.1'
    >>> extract_ip_v4('Your mom dresses you funny.')
    """
    if not is_full_string(in_str):
        return None
    m = ANYWHERE_IP_V4_RE.search(in_str)
    if m is not None:
        return m.group(0)
    return None


def is_ip_v6(in_str: Any) -> bool:
    """
    Args:
        in_str: the string to test.

    Returns:
        True if in_str contains a valid IPv6 address and False otherwise.

    >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334')
    True
    >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?')    # invalid "?"
    False
    """
    return is_full_string(in_str) and IP_V6_RE.match(in_str) is not None


def extract_ip_v6(in_str: Any) -> Optional[str]:
    """
    Args:
        in_str: the string from which to extract an IPv6 address.

    Returns:
        The first IPv6 address found in in_str or None if no address
        was found or an error occurred.

    >>> extract_ip_v6('IP: 2001:db8:85a3:0000:0000:8a2e:370:7334')
    '2001:db8:85a3:0000:0000:8a2e:370:7334'
    >>> extract_ip_v6("(and she's ugly too, btw)")
    """
    if not is_full_string(in_str):
        return None
    m = ANYWHERE_IP_V6_RE.search(in_str)
    if m is not None:
        return m.group(0)
    return None


def is_ip(in_str: Any) -> bool:
    """
    Args:
        in_str: the string to test.

    Returns:
        True if in_str contains a valid IP address (either IPv4 or
        IPv6).

    >>> is_ip('255.200.100.75')
    True
    >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334')
    True
    >>> is_ip('1.2.3')
    False
    >>> is_ip('1.2.3.999')
    False
    """
    return is_ip_v6(in_str) or is_ip_v4(in_str)


def extract_ip(in_str: Any) -> Optional[str]:
    """
    Args:
        in_str: the string from which to extract in IP address.

    Returns:
        The first IP address (IPv4 or IPv6) found in in_str or
        None to indicate none found or an error condition.

    >>> extract_ip('Attacker: 255.200.100.75')
    '255.200.100.75'
    >>> extract_ip('Remote host: 2001:db8:85a3:0000:0000:8a2e:370:7334')
    '2001:db8:85a3:0000:0000:8a2e:370:7334'
    >>> extract_ip('1.2.3')
    """
    ip = extract_ip_v4(in_str)
    if ip is None:
        ip = extract_ip_v6(in_str)
    return ip


def is_mac_address(in_str: Any) -> bool:
    """
    Args:
        in_str: the string to test

    Returns:
        True if in_str is a valid MAC address False otherwise.

    >>> is_mac_address("34:29:8F:12:0D:2F")
    True
    >>> is_mac_address('34:29:8f:12:0d:2f')
    True
    >>> is_mac_address('34-29-8F-12-0D-2F')
    True
    >>> is_mac_address("test")
    False
    """
    return is_full_string(in_str) and MAC_ADDRESS_RE.match(in_str) is not None


def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
    """
    Args:
        in_str: the string from which to extract a MAC address.

    Returns:
        The first MAC address found in in_str or None to indicate no
        match or an error.

    >>> extract_mac_address(' MAC Address: 34:29:8F:12:0D:2F')
    '34:29:8F:12:0D:2F'

    >>> extract_mac_address('? (10.0.0.30) at d8:5d:e2:34:54:86 on em0 expires in 1176 seconds [ethernet]')
    'd8:5d:e2:34:54:86'
    """
    if not is_full_string(in_str):
        return None
    in_str.strip()
    m = ANYWHERE_MAC_ADDRESS_RE.search(in_str)
    if m is not None:
        mac = m.group(0)
        mac.replace(":", separator)
        mac.replace("-", separator)
        return mac
    return None


def is_slug(in_str: Any, separator: str = "-") -> bool:
    """
    Args:
        in_str: string to test

    Returns:
        True if in_str is a slug string and False otherwise.

    >>> is_slug('my-blog-post-title')
    True
    >>> is_slug('My blog post title')
    False
    """
    if not is_full_string(in_str):
        return False
    rex = r"^([a-z\d]+" + re.escape(separator) + r"*?)*[a-z\d]$"
    return re.match(rex, in_str) is not None


def contains_html(in_str: str) -> bool:
    """
    Args:
        in_str: the string to check for tags in

    Returns:
        True if the given string contains HTML/XML tags and False
        otherwise.

    .. warning::
        By design, this function matches ANY type of tag, so don't expect
        to use it as an HTML validator.  It's a quick sanity check at
        best.  See something like BeautifulSoup for a more full-featuered
        HTML parser.

    >>> contains_html('my string is <strong>bold</strong>')
    True
    >>> contains_html('my string is not bold')
    False

    """
    if not is_string(in_str):
        raise ValueError(in_str)
    return HTML_RE.search(in_str) is not None


def words_count(in_str: str) -> int:
    """
    Args:
        in_str: the string to count words in

    Returns:
        The number of words contained in the given string.

    .. note::

        This method is "smart" in that it does consider only sequences
        of one or more letter and/or numbers to be "words".  Thus a
        string like this: "! @ # % ... []" will return zero.  Moreover
        it is aware of punctuation, so the count for a string like
        "one,two,three.stop" will be 4 not 1 (even if there are no spaces
        in the string).

    >>> words_count('hello world')
    2
    >>> words_count('one,two,three.stop')
    4
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    return len(WORDS_COUNT_RE.findall(in_str))


def word_count(in_str: str) -> int:
    """
    Args:
        in_str: the string to count words in

    Returns:
        The number of words contained in the given string.

    .. note::

        This method is "smart" in that it does consider only sequences
        of one or more letter and/or numbers to be "words".  Thus a
        string like this: "! @ # % ... []" will return zero.  Moreover
        it is aware of punctuation, so the count for a string like
        "one,two,three.stop" will be 4 not 1 (even if there are no spaces
        in the string).

    >>> word_count('hello world')
    2
    >>> word_count('one,two,three.stop')
    4
    """
    return words_count(in_str)


def generate_uuid(omit_dashes: bool = False) -> str:
    """
    Args:
        omit_dashes: should we omit the dashes in the generated UUID?

    Returns:
        A generated UUID string (using `uuid.uuid4()`) with or without
        dashes per the omit_dashes arg.

    generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
    generate_uuid(omit_dashes=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
    """
    uid = uuid4()
    if omit_dashes:
        return uid.hex
    return str(uid)


def generate_random_alphanumeric_string(size: int) -> str:
    """
    Args:
        size: number of characters to generate

    Returns:
        A string of the specified size containing random characters
        (uppercase/lowercase ascii letters and digits).

    >>> random.seed(22)
    >>> generate_random_alphanumeric_string(9)
    '96ipbNClS'
    """
    if size < 1:
        raise ValueError("size must be >= 1")
    chars = string.ascii_letters + string.digits
    buffer = [random.choice(chars) for _ in range(size)]
    return from_char_list(buffer)


def reverse(in_str: str) -> str:
    """
    Args:
        in_str: the string to reverse

    Returns:
        The reversed (chracter by character) string.

    >>> reverse('test')
    'tset'
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    return in_str[::-1]


def camel_case_to_snake_case(in_str, *, separator="_"):
    """
    Args:
        in_str: the camel case string to convert

    Returns:
        A snake case string equivalent to the camel case input or the
        original string if it is not a valid camel case string or some
        other error occurs.

    >>> camel_case_to_snake_case('MacAddressExtractorFactory')
    'mac_address_extractor_factory'
    >>> camel_case_to_snake_case('Luke Skywalker')
    'Luke Skywalker'
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    if not is_camel_case(in_str):
        return in_str
    return CAMEL_CASE_REPLACE_RE.sub(lambda m: m.group(1) + separator, in_str).lower()


def snake_case_to_camel_case(
    in_str: str, *, upper_case_first: bool = True, separator: str = "_"
) -> str:
    """
    Args:
        in_str: the snake case string to convert

    Returns:
        A camel case string that is equivalent to the snake case string
        provided or the original string back again if it is not valid
        snake case or another error occurs.

    >>> snake_case_to_camel_case('this_is_a_test')
    'ThisIsATest'
    >>> snake_case_to_camel_case('Han Solo')
    'Han Solo'
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    if not is_snake_case(in_str, separator=separator):
        return in_str
    tokens = [s.title() for s in in_str.split(separator) if is_full_string(s)]
    if not upper_case_first:
        tokens[0] = tokens[0].lower()
    return from_char_list(tokens)


def to_char_list(in_str: str) -> List[str]:
    """
    Args:
        in_str: the string to split into a char list

    Returns:
        A list of strings of length one each.

    >>> to_char_list('test')
    ['t', 'e', 's', 't']
    """
    if not is_string(in_str):
        return []
    return list(in_str)


def from_char_list(in_list: List[str]) -> str:
    """
    Args:
        in_list: A list of characters to convert into a string.

    Returns:
        The string resulting from gluing the characters in in_list
        together.

    >>> from_char_list(['t', 'e', 's', 't'])
    'test'
    """
    return "".join(in_list)


def shuffle(in_str: str) -> Optional[str]:
    """
    Args:
        in_str: a string to shuffle randomly by character

    Returns:
        A new string containing same chars of the given one but in
        a randomized order.  Note that in rare cases this could result
        in the same original string as no check is done.  Returns
        None to indicate error conditions.

    >>> random.seed(22)
    >>> shuffle('awesome')
    'meosaew'
    """
    if not is_string(in_str):
        return None
    chars = to_char_list(in_str)
    random.shuffle(chars)
    return from_char_list(chars)


def scramble(in_str: str) -> Optional[str]:
    """
    Args:
        in_str: a string to shuffle randomly by character

    Returns:
        A new string containing same chars of the given one but in
        a randomized order.  Note that in rare cases this could result
        in the same original string as no check is done.  Returns
        None to indicate error conditions.

    >>> random.seed(22)
    >>> scramble('awesome')
    'meosaew'
    """
    return shuffle(in_str)


def strip_html(in_str: str, keep_tag_content: bool = False) -> str:
    """
    Args:
        in_str: the string to strip tags from
        keep_tag_content: should we keep the inner contents of tags?

    Returns:
        A string with all HTML tags removed (optionally with tag contents
        preserved).

    .. note::
        This method uses simple regular expressions to strip tags and is
        not a full fledged HTML parser by any means.  Consider using
        something like BeautifulSoup if your needs are more than this
        simple code can fulfill.

    >>> strip_html('test: <a href="foo/bar">click here</a>')
    'test: '
    >>> strip_html('test: <a href="foo/bar">click here</a>', keep_tag_content=True)
    'test: click here'
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    r = HTML_TAG_ONLY_RE if keep_tag_content else HTML_RE
    return r.sub("", in_str)


def asciify(in_str: str) -> str:
    """
    Args:
        in_str: the string to asciify.

    Returns:
        An output string roughly equivalent to the original string
        where all content to are ascii-only.  This is accomplished
        by translating all non-ascii chars into their closest possible
        ASCII representation (eg: ó -> o, Ë -> E, ç -> c...).

    .. warning::
        Some chars may be lost if impossible to translate.

    >>> asciify('èéùúòóäåëýñÅÀÁÇÌÍÑÓË')
    'eeuuooaaeynAAACIINOE'
    """
    if not is_string(in_str):
        raise ValueError(in_str)

    # "NFKD" is the algorithm which is able to successfully translate
    # the most of non-ascii chars.
    normalized = unicodedata.normalize("NFKD", in_str)

    # encode string forcing ascii and ignore any errors
    # (unrepresentable chars will be stripped out)
    ascii_bytes = normalized.encode("ascii", "ignore")

    # turns encoded bytes into an utf-8 string
    return ascii_bytes.decode("utf-8")


def slugify(in_str: str, *, separator: str = "-") -> str:
    """
    Args:
        in_str: the string to slugify
        separator: the character to use during sligification (default
            is a dash)

    Returns:
        The converted string.  The returned string has the following properties:

        * it has no spaces
        * all letters are in lower case
        * all punctuation signs and non alphanumeric chars are removed
        * words are divided using provided separator
        * all chars are encoded as ascii (by using :meth:`asciify`)
        * is safe for URL

    >>> slugify('Top 10 Reasons To Love Dogs!!!')
    'top-10-reasons-to-love-dogs'
    >>> slugify('Mönstér Mägnët')
    'monster-magnet'
    """
    if not is_string(in_str):
        raise ValueError(in_str)

    # replace any character that is NOT letter or number with spaces
    out = NO_LETTERS_OR_NUMBERS_RE.sub(" ", in_str.lower()).strip()

    # replace spaces with join sign
    out = SPACES_RE.sub(separator, out)

    # normalize joins (remove duplicates)
    out = re.sub(re.escape(separator) + r"+", separator, out)
    return asciify(out)


def to_bool(in_str: str) -> bool:
    """
    Args:
        in_str: the string to convert to boolean

    Returns:
        A boolean equivalent of the original string based on its contents.
        All conversion is case insensitive.  A positive boolean (True) is
        returned if the string value is any of the following:

        * "true"
        * "t"
        * "1"
        * "yes"
        * "y"
        * "on"

        Otherwise False is returned.

    >>> to_bool('True')
    True

    >>> to_bool('1')
    True

    >>> to_bool('yes')
    True

    >>> to_bool('no')
    False

    >>> to_bool('huh?')
    False

    >>> to_bool('on')
    True
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    return in_str.lower() in ("true", "1", "yes", "y", "t", "on")


def to_date(in_str: str) -> Optional[datetime.date]:
    """
    Args:
        in_str: the string to convert into a date

    Returns:
        The datetime.date the string contained or None to indicate
        an error.  This parser is relatively clever; see
        :class:`python_modules.dateparse.dateparse_utils` docs for
        details.

    >>> to_date('9/11/2001')
    datetime.date(2001, 9, 11)
    >>> to_date('xyzzy')
    """
    import dateparse.dateparse_utils as du

    try:
        d = du.DateParser()  # type: ignore
        d.parse(in_str)
        return d.get_date()
    except du.ParseException:  # type: ignore
        msg = f'Unable to parse date {in_str}.'
        logger.warning(msg)
    return None


def extract_date(in_str: Any) -> Optional[datetime.datetime]:
    """Finds and extracts a date from the string, if possible.

    Args:
        in_str: the string to extract a date from

    Returns:
        a datetime if date was found, otherwise None

    >>> extract_date("filename.txt    dec 13, 2022")
    datetime.datetime(2022, 12, 13, 0, 0)

    >>> extract_date("Dear Santa, please get me a pony.")

    """
    import itertools

    import dateparse.dateparse_utils as du

    d = du.DateParser()  # type: ignore
    chunks = in_str.split()
    for ngram in itertools.chain(
        list_utils.ngrams(chunks, 5),
        list_utils.ngrams(chunks, 4),
        list_utils.ngrams(chunks, 3),
        list_utils.ngrams(chunks, 2),
    ):
        try:
            expr = " ".join(ngram)
            logger.debug(f"Trying {expr}")
            if d.parse(expr):
                return d.get_datetime()
        except du.ParseException:  # type: ignore
            pass
    return None


def is_valid_date(in_str: str) -> bool:
    """
    Args:
        in_str: the string to check

    Returns:
        True if the string represents a valid date that we can recognize
        and False otherwise.  This parser is relatively clever; see
        :class:`python_modules.dateparse.dateparse_utils` docs for
        details.

    >>> is_valid_date('1/2/2022')
    True
    >>> is_valid_date('christmas')
    True
    >>> is_valid_date('next wednesday')
    True
    >>> is_valid_date('xyzzy')
    False
    """
    import dateparse.dateparse_utils as dp

    try:
        d = dp.DateParser()  # type: ignore
        _ = d.parse(in_str)
        return True
    except dp.ParseException:  # type: ignore
        msg = f'Unable to parse date {in_str}.'
        logger.warning(msg)
    return False


def to_datetime(in_str: str) -> Optional[datetime.datetime]:
    """
    Args:
        in_str: string to parse into a datetime

    Returns:
        A python datetime parsed from in_str or None to indicate
        an error.  This parser is relatively clever; see
        :class:`python_modules.dateparse.dateparse_utils` docs for
        details.

    >>> to_datetime('7/20/1969 02:56 GMT')
    datetime.datetime(1969, 7, 20, 2, 56, tzinfo=<StaticTzInfo 'GMT'>)
    """
    import dateparse.dateparse_utils as dp

    try:
        d = dp.DateParser()  # type: ignore
        dt = d.parse(in_str)
        if isinstance(dt, datetime.datetime):
            return dt
    except Exception:
        msg = f'Unable to parse datetime {in_str}.'
        logger.warning(msg)
    return None


def valid_datetime(in_str: str) -> bool:
    """
    Args:
        in_str: the string to check

    Returns:
        True if in_str contains a valid datetime and False otherwise.
        This parser is relatively clever; see
        :class:`python_modules.dateparse.dateparse_utils` docs for
        details.

    >>> valid_datetime('next wednesday at noon')
    True
    >>> valid_datetime('3 weeks ago at midnight')
    True
    >>> valid_datetime('next easter at 5:00 am')
    True
    >>> valid_datetime('sometime soon')
    False
    """
    _ = to_datetime(in_str)
    if _ is not None:
        return True
    msg = f'Unable to parse datetime {in_str}.'
    logger.warning(msg)
    return False


def squeeze(in_str: str, character_to_squeeze: str = ' ') -> str:
    """
    Args:
        in_str: the string to squeeze
        character_to_squeeze: the character to remove runs of
            more than one in a row (default = space)

    Returns: A "squeezed string" where runs of more than one
        character_to_squeeze into one.

    >>> squeeze(' this        is       a    test    ')
    ' this is a test '

    >>> squeeze('one|!||!|two|!||!|three', character_to_squeeze='|!|')
    'one|!|two|!|three'

    """
    return re.sub(
        r'(' + re.escape(character_to_squeeze) + r')+',
        character_to_squeeze,
        in_str,
    )


def dedent(in_str: str) -> Optional[str]:
    """
    Args:
        in_str: the string to dedent

    Returns:
        A string with tab indentation removed or None on error.

    .. note::

        Inspired by analogous Scala function.

    >>> dedent('\t\ttest\\n\t\ting')
    'test\\ning'
    """
    if not is_string(in_str):
        return None
    line_separator = '\n'
    lines = [MARGIN_RE.sub('', line) for line in in_str.split(line_separator)]
    return line_separator.join(lines)


def indent(in_str: str, amount: int) -> str:
    """
    Args:
        in_str: the string to indent
        amount: count of spaces to indent each line by

    Returns:
        An indented string created by prepending amount spaces.

    >>> indent('This is a test', 4)
    '    This is a test'
    """
    if not is_string(in_str):
        raise ValueError(in_str)
    line_separator = '\n'
    lines = [" " * amount + line for line in in_str.split(line_separator)]
    return line_separator.join(lines)


def sprintf(*args, **kwargs) -> str:
    """
    Args:
        This function uses the same syntax as the builtin print
        function.

    Returns:
        An interpolated string capturing print output, like man(3)
        :code:sprintf.
    """
    ret = ""

    sep = kwargs.pop("sep", None)
    if sep is not None:
        if not isinstance(sep, str):
            raise TypeError("sep must be None or a string")

    end = kwargs.pop("end", None)
    if end is not None:
        if not isinstance(end, str):
            raise TypeError("end must be None or a string")

    if kwargs:
        raise TypeError("invalid keyword arguments to sprint()")

    if sep is None:
        sep = " "
    if end is None:
        end = "\n"
    for i, arg in enumerate(args):
        if i:
            ret += sep
        if isinstance(arg, str):
            ret += arg
        else:
            ret += str(arg)
    ret += end
    return ret


def strip_ansi_sequences(in_str: str) -> str:
    """
    Args:
        in_str: the string to strip

    Returns:
        in_str with recognized ANSI escape sequences removed.

    .. warning::
        This method works by using a regular expression.
        It works for all ANSI escape sequences I've tested with but
        may miss some; caveat emptor.

    >>> import ansi as a
    >>> s = a.fg('blue') + 'blue!' + a.reset()
    >>> len(s)   # '\x1b[38;5;21mblue!\x1b[m'
    18
    >>> len(strip_ansi_sequences(s))
    5
    >>> strip_ansi_sequences(s)
    'blue!'

    """
    return re.sub(r'\x1b\[[\d+;]*[a-z]', '', in_str)


class SprintfStdout(contextlib.AbstractContextManager):
    """
    A context manager that captures outputs to stdout to a buffer
    without printing them.

    >>> with SprintfStdout() as buf:
    ...     print("test")
    ...     print("1, 2, 3")
    ...
    >>> print(buf(), end='')
    test
    1, 2, 3

    """

    def __init__(self) -> None:
        self.destination = io.StringIO()
        self.recorder: contextlib.redirect_stdout

    def __enter__(self) -> Callable[[], str]:
        self.recorder = contextlib.redirect_stdout(self.destination)
        self.recorder.__enter__()
        return lambda: self.destination.getvalue()

    def __exit__(self, *args) -> Literal[False]:
        self.recorder.__exit__(*args)
        self.destination.seek(0)
        return False


def capitalize_first_letter(in_str: str) -> str:
    """
    Args:
        in_str: the string to capitalize

    Returns:
        in_str with the first character capitalized.

    >>> capitalize_first_letter('test')
    'Test'
    >>> capitalize_first_letter("ALREADY!")
    'ALREADY!'

    """
    return in_str[0].upper() + in_str[1:]


def it_they(n: int) -> str:
    """
    Args:
        n: how many of them are there?

    Returns:
        'it' if n is one or 'they' otherwize.

    Suggested usage::

        n = num_files_saved_to_tmp()
        print(f'Saved file{pluralize(n)} successfully.')
        print(f'{it_they(n)} {is_are(n)} located in /tmp.')

    >>> it_they(1)
    'it'
    >>> it_they(100)
    'they'
    """
    if n == 1:
        return "it"
    return "they"


def is_are(n: int) -> str:
    """
    Args:
        n: how many of them are there?

    Returns:
        'is' if n is one or 'are' otherwize.

    Suggested usage::

        n = num_files_saved_to_tmp()
        print(f'Saved file{pluralize(n)} successfully.')
        print(f'{it_they(n)} {is_are(n)} located in /tmp.')

    >>> is_are(1)
    'is'
    >>> is_are(2)
    'are'

    """
    if n == 1:
        return "is"
    return "are"


def pluralize(n: int) -> str:
    """
    Args:
        n: how many of them are there?

    Returns:
        's' if n is greater than one otherwize ''.

    Suggested usage::

        n = num_files_saved_to_tmp()
        print(f'Saved file{pluralize(n)} successfully.')
        print(f'{it_they(n)} {is_are(n)} located in /tmp.')

    >>> pluralize(15)
    's'
    >>> count = 1
    >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
    There is 1 file.
    >>> count = 4
    >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
    There are 4 files.
    """
    if n == 1:
        return ""
    return "s"


def make_contractions(txt: str) -> str:
    """This code glues words in txt together to form (English)
    contractions.

    Args:
        txt: the input text to be contractionized.

    Returns:
        Output text identical to original input except for any
        recognized contractions are formed.

    .. note::
        The order in which we create contractions is defined by the
        implementation and what I thought made more sense when writing
        this code.

    >>> make_contractions('It is nice today.')
    "It's nice today."

    >>> make_contractions('I can    not even...')
    "I can't even..."

    >>> make_contractions('She could not see!')
    "She couldn't see!"

    >>> make_contractions('But she will not go.')
    "But she won't go."

    >>> make_contractions('Verily, I shall not.')
    "Verily, I shan't."

    >>> make_contractions('No you cannot.')
    "No you can't."

    >>> make_contractions('I said you can not go.')
    "I said you can't go."
    """

    first_second = [
        (
            [
                'are',
                'could',
                'did',
                'has',
                'have',
                'is',
                'must',
                'should',
                'was',
                'were',
                'would',
            ],
            ['(n)o(t)'],
        ),
        (
            [
                "I",
                "you",
                "he",
                "she",
                "it",
                "we",
                "they",
                "how",
                "why",
                "when",
                "where",
                "who",
                "there",
            ],
            ['woul(d)', 'i(s)', 'a(re)', 'ha(s)', 'ha(ve)', 'ha(d)', 'wi(ll)'],
        ),
    ]

    # Special cases: can't, shan't and won't.
    txt = re.sub(r'\b(can)\s*no(t)\b', r"\1'\2", txt, count=0, flags=re.IGNORECASE)
    txt = re.sub(r'\b(sha)ll\s*(n)o(t)\b', r"\1\2'\3", txt, count=0, flags=re.IGNORECASE)
    txt = re.sub(
        r'\b(w)ill\s*(n)(o)(t)\b',
        r"\1\3\2'\4",
        txt,
        count=0,
        flags=re.IGNORECASE,
    )

    for first_list, second_list in first_second:
        for first in first_list:
            for second in second_list:
                # Disallow there're/where're.  They're valid English
                # but sound weird.
                if (first in ('there', 'where')) and second == 'a(re)':
                    continue

                pattern = fr'\b({first})\s+{second}\b'
                if second == '(n)o(t)':
                    replacement = r"\1\2'\3"
                else:
                    replacement = r"\1'\2"
                txt = re.sub(pattern, replacement, txt, count=0, flags=re.IGNORECASE)

    return txt


def thify(n: int) -> str:
    """
    Args:
        n: how many of them are there?

    Returns:
        The proper cardinal suffix for a number.

    Suggested usage::

        attempt_count = 0
        while True:
            attempt_count += 1
            if try_the_thing():
                break
            print(f'The {attempt_count}{thify(attempt_count)} failed, trying again.')

    >>> thify(1)
    'st'
    >>> thify(33)
    'rd'
    >>> thify(16)
    'th'
    """
    digit = str(n)
    assert is_integer_number(digit)
    digit = digit[-1:]
    if digit == "1":
        return "st"
    elif digit == "2":
        return "nd"
    elif digit == "3":
        return "rd"
    else:
        return "th"


def ngrams(txt: str, n: int):
    """
    Args:
        txt: the string to create ngrams using
        n: how many words per ngram created?

    Returns:
        Generates the ngrams from the input string.

    >>> [x for x in ngrams('This is a test', 2)]
    ['This is', 'is a', 'a test']
    """
    words = txt.split()
    for ngram in ngrams_presplit(words, n):
        ret = ''
        for word in ngram:
            ret += f'{word} '
        yield ret.strip()


def ngrams_presplit(words: Sequence[str], n: int):
    """
    Same as :meth:`ngrams` but with the string pre-split.
    """
    return list_utils.ngrams(words, n)


def bigrams(txt: str):
    """Generates the bigrams (n=2) of the given string."""
    return ngrams(txt, 2)


def trigrams(txt: str):
    """Generates the trigrams (n=3) of the given string."""
    return ngrams(txt, 3)


def shuffle_columns_into_list(
    input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim=''
) -> Iterable[str]:
    """Helper to shuffle / parse columnar data and return the results as a
    list.

    Args:
        input_lines: A sequence of strings that represents text that
            has been broken into columns by the caller
        column_specs: an iterable collection of numeric sequences that
            indicate one or more column numbers to copy to form the Nth
            position in the output list.  See example below.
        delim: for column_specs that indicate we should copy more than
            one column from the input into this position, use delim to
            separate source data.  Defaults to ''.

    Returns:
        A list of string created by following the instructions set forth
        in column_specs.

    >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul  9 11:34 acl_test.py'.split()
    >>> shuffle_columns_into_list(
    ...     cols,
    ...     [ [8], [2, 3], [5, 6, 7] ],
    ...     delim='!',
    ... )
    ['acl_test.py', 'scott!wheel', 'Jul!9!11:34']
    """
    out = []

    # Column specs map input lines' columns into outputs.
    # [col1, col2...]
    for spec in column_specs:
        hunk = ''
        for n in spec:
            hunk = hunk + delim + input_lines[n]
        hunk = hunk.strip(delim)
        out.append(hunk)
    return out


def shuffle_columns_into_dict(
    input_lines: Sequence[str],
    column_specs: Iterable[Tuple[str, Iterable[int]]],
    delim='',
) -> Dict[str, str]:
    """Helper to shuffle / parse columnar data and return the results
    as a dict.

    Args:
        input_lines: a sequence of strings that represents text that
            has been broken into columns by the caller
        column_specs: instructions for what dictionary keys to apply
            to individual or compound input column data.  See example
            below.
        delim: when forming compound output data by gluing more than
            one input column together, use this character to separate
            the source data.  Defaults to ''.

    Returns:
        A dict formed by applying the column_specs instructions.

    >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul  9 11:34 acl_test.py'.split()
    >>> shuffle_columns_into_dict(
    ...     cols,
    ...     [ ('filename', [8]), ('owner', [2, 3]), ('mtime', [5, 6, 7]) ],
    ...     delim='!',
    ... )
    {'filename': 'acl_test.py', 'owner': 'scott!wheel', 'mtime': 'Jul!9!11:34'}
    """
    out = {}

    # Column specs map input lines' columns into outputs.
    # "key", [col1, col2...]
    for spec in column_specs:
        hunk = ''
        for n in spec[1]:
            hunk = hunk + delim + input_lines[n]
        hunk = hunk.strip(delim)
        out[spec[0]] = hunk
    return out


def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str:
    """
    Interpolate a string with data from a dict.

    Args:
        txt: the mad libs template
        values: what you and your kids chose for each category.

    >>> interpolate_using_dict('This is a {adjective} {noun}.',
    ...                        {'adjective': 'good', 'noun': 'example'})
    'This is a good example.'
    """
    return sprintf(txt.format(**values), end='')


def to_ascii(txt: str):
    """
    Args:
        txt: the input data to encode

    Returns:
        txt encoded as an ASCII byte string.

    >>> to_ascii('test')
    b'test'

    >>> to_ascii(b'1, 2, 3')
    b'1, 2, 3'
    """
    if isinstance(txt, str):
        return txt.encode('ascii')
    if isinstance(txt, bytes):
        return txt
    raise Exception('to_ascii works with strings and bytes')


def to_base64(txt: str, *, encoding='utf-8', errors='surrogatepass') -> bytes:
    """
    Args:
        txt: the input data to encode

    Returns:
        txt encoded with a 64-chracter alphabet.  Similar to and compatible
        with uuencode/uudecode.

    >>> to_base64('hello?')
    b'aGVsbG8/\\n'
    """
    return base64.encodebytes(txt.encode(encoding, errors))


def is_base64(txt: str) -> bool:
    """
    Args:
        txt: the string to check

    Returns:
        True if txt is a valid base64 encoded string.  This assumes
        txt was encoded with Python's standard base64 alphabet which
        is the same as what uuencode/uudecode uses).

    >>> is_base64('test')    # all letters in the b64 alphabet
    True

    >>> is_base64('another test, how do you like this one?')
    False

    >>> is_base64(b'aGVsbG8/\\n')    # Ending newline is ok.
    True

    """
    a = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+/'
    alphabet = set(a.encode('ascii'))
    for char in to_ascii(txt.strip()):
        if char not in alphabet:
            return False
    return True


def from_base64(b64: bytes, encoding='utf-8', errors='surrogatepass') -> str:
    """
    Args:
        b64: bytestring of 64-bit encoded data to decode / convert.

    Returns:
        The decoded form of b64 as a normal python string.  Similar to
        and compatible with uuencode / uudecode.

    >>> from_base64(b'aGVsbG8/\\n')
    'hello?'
    """
    return base64.decodebytes(b64).decode(encoding, errors)


def chunk(txt: str, chunk_size: int):
    """
    Args:
        txt: a string to be chunked into evenly spaced pieces.
        chunk_size: the size of each chunk to make

    Returns:
        The original string chunked into evenly spaced pieces.

    >>> ' '.join(chunk('010011011100010110101010101010101001111110101000', 8))
    '01001101 11000101 10101010 10101010 10011111 10101000'
    """
    if len(txt) % chunk_size != 0:
        msg = f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})'
        logger.warning(msg)
        warnings.warn(msg, stacklevel=2)
    for x in range(0, len(txt), chunk_size):
        yield txt[x : x + chunk_size]


def to_bitstring(txt: str, *, delimiter='') -> str:
    """
    Args:
        txt: the string to convert into a bitstring
        delimiter: character to insert between adjacent bytes.  Note that
            only bitstrings with delimiter='' are interpretable by
            :meth:`from_bitstring`.

    Returns:
        txt converted to ascii/binary and then chopped into bytes.

    >>> to_bitstring('hello?')
    '011010000110010101101100011011000110111100111111'

    >>> to_bitstring('test', delimiter=' ')
    '01110100 01100101 01110011 01110100'

    >>> to_bitstring(b'test')
    '01110100011001010111001101110100'
    """
    etxt = to_ascii(txt)
    bits = bin(int.from_bytes(etxt, 'big'))
    bits = bits[2:]
    return delimiter.join(chunk(bits.zfill(8 * ((len(bits) + 7) // 8)), 8))


def is_bitstring(txt: str) -> bool:
    """
    Args:
        txt: the string to check

    Returns:
        True if txt is a recognized bitstring and False otherwise.
        Note that if delimiter is non empty this code will not
        recognize the bitstring.

    >>> is_bitstring('011010000110010101101100011011000110111100111111')
    True

    >>> is_bitstring('1234')
    False
    """
    return is_binary_integer_number(f'0b{txt}')


def from_bitstring(bits: str, encoding='utf-8', errors='surrogatepass') -> str:
    """
    Args:
        bits: the bitstring to convert back into a python string
        encoding: the encoding to use

    Returns:
        The regular python string represented by bits.  Note that this
        code does not work with to_bitstring when delimiter is non-empty.

    >>> from_bitstring('011010000110010101101100011011000110111100111111')
    'hello?'
    """
    n = int(bits, 2)
    return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'


def ip_v4_sort_key(txt: str) -> Optional[Tuple[int, ...]]:
    """
    Args:
        txt: an IP address to chunk up for sorting purposes

    Returns:
        A tuple of IP components arranged such that the sorting of
        IP addresses using a normal comparator will do something sane
        and desireable.

    >>> ip_v4_sort_key('10.0.0.18')
    (10, 0, 0, 18)

    >>> ips = ['10.0.0.10', '100.0.0.1', '1.2.3.4', '10.0.0.9']
    >>> sorted(ips, key=lambda x: ip_v4_sort_key(x))
    ['1.2.3.4', '10.0.0.9', '10.0.0.10', '100.0.0.1']
    """
    if not is_ip_v4(txt):
        print(f"not IP: {txt}")
        return None
    return tuple(int(x) for x in txt.split('.'))


def path_ancestors_before_descendants_sort_key(volume: str) -> Tuple[str, ...]:
    """
    Args:
        volume: the string to chunk up for sorting purposes

    Returns:
        A tuple of volume's components such that the sorting of
        volumes using a normal comparator will do something sane
        and desireable.

    >>> path_ancestors_before_descendants_sort_key('/usr/local/bin')
    ('usr', 'local', 'bin')

    >>> paths = ['/usr/local', '/usr/local/bin', '/usr']
    >>> sorted(paths, key=lambda x: path_ancestors_before_descendants_sort_key(x))
    ['/usr', '/usr/local', '/usr/local/bin']
    """
    return tuple(x for x in volume.split('/') if len(x) > 0)


def replace_all(in_str: str, replace_set: str, replacement: str) -> str:
    """
    Execute several replace operations in a row.

    Args:
        in_str: the string in which to replace characters
        replace_set: the set of target characters to replace
        replacement: the character to replace any member of replace_set
            with

    Returns:
        The string with replacements executed.

    >>> s = 'this_is a-test!'
    >>> replace_all(s, ' _-!', '')
    'thisisatest'
    """
    for char in replace_set:
        in_str = in_str.replace(char, replacement)
    return in_str


def replace_nth(in_str: str, source: str, target: str, nth: int):
    """
    Replaces the nth occurrance of a substring within a string.

    Args:
        in_str: the string in which to run the replacement
        source: the substring to replace
        target: the replacement text
        nth: which occurrance of source to replace?

    >>> replace_nth('this is a test', ' ', '-', 3)
    'this is a-test'
    """
    where = [m.start() for m in re.finditer(source, in_str)][nth - 1]
    before = in_str[:where]
    after = in_str[where:]
    after = after.replace(source, target, 1)
    return before + after


if __name__ == '__main__':
    import doctest

    doctest.testmod()