More cleanup, yey!

[python_utils.git] / string_utils.py
diff --git a/string_utils.py b/string_utils.py

index b586ae1a7e82d62e92ba567b20e5a440254fe8b3..d75c6ba1aca2c559ed4254d535747c54f4719bf5 100644 (file)
--- a/string_utils.py
+++ b/string_utils.py
@@ -1,13 +1,52 @@
  #!/usr/bin/env python3
  #!/usr/bin/env python3
-
+# -*- coding: utf-8 -*-
+
+"""The MIT License (MIT)
+
+Copyright (c) 2016-2020 Davide Zanotti
+Modifications Copyright (c) 2021-2022 Scott Gasch
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+This class is based on: https://github.com/daveoncode/python-string-utils.
+"""
+
+import base64
+import contextlib  # type: ignore
+import datetime
+import io
  import json
  import json
+import logging
+import numbers
  import random
  import re
  import string
  import random
  import re
  import string
-from typing import Any, List, Optional
  import unicodedata
  import unicodedata
+import warnings
+from itertools import zip_longest
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
  from uuid import uuid4
  
  from uuid import uuid4
  
+import list_utils
+
+logger = logging.getLogger(__name__)
+
  NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
  
  HEX_NUMBER_RE = re.compile(r"^([+|-]?)0[x|X]([0-9A-Fa-f]+)$")
  NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
  
  HEX_NUMBER_RE = re.compile(r"^([+|-]?)0[x|X]([0-9A-Fa-f]+)$")
@@ -40,19 +79,13 @@ EMAIL_RE = re.compile(r"^{}$".format(EMAILS_RAW_STRING))
  
  EMAILS_RE = re.compile(r"({})".format(EMAILS_RAW_STRING))
  
  
  EMAILS_RE = re.compile(r"({})".format(EMAILS_RAW_STRING))
  
-CAMEL_CASE_TEST_RE = re.compile(
-    r"^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$"
-)
+CAMEL_CASE_TEST_RE = re.compile(r"^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$")
  
  CAMEL_CASE_REPLACE_RE = re.compile(r"([a-z]|[A-Z]+)(?=[A-Z])")
  
  
  CAMEL_CASE_REPLACE_RE = re.compile(r"([a-z]|[A-Z]+)(?=[A-Z])")
  
-SNAKE_CASE_TEST_RE = re.compile(
-    r"^([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)$", re.IGNORECASE
-)
+SNAKE_CASE_TEST_RE = re.compile(r"^([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)$", re.IGNORECASE)
  
  
-SNAKE_CASE_TEST_DASH_RE = re.compile(
-    r"([a-z]+\d*-[a-z\d-]*|-+[a-z\d]+[a-z\d-]*)$", re.IGNORECASE
-)
+SNAKE_CASE_TEST_DASH_RE = re.compile(r"([a-z]+\d*-[a-z\d-]*|-+[a-z\d]+[a-z\d-]*)$", re.IGNORECASE)
  
  SNAKE_CASE_REPLACE_RE = re.compile(r"(_)([a-z\d])")
  
  
  SNAKE_CASE_REPLACE_RE = re.compile(r"(_)([a-z\d])")
  
@@ -67,13 +100,9 @@ CREDIT_CARDS = {
      "JCB": re.compile(r"^(?:2131|1800|35\d{3})\d{11}$"),
  }
  
      "JCB": re.compile(r"^(?:2131|1800|35\d{3})\d{11}$"),
  }
  
-JSON_WRAPPER_RE = re.compile(
-    r"^\s*[\[{]\s*(.*)\s*[\}\]]\s*$", re.MULTILINE | re.DOTALL
-)
+JSON_WRAPPER_RE = re.compile(r"^\s*[\[{]\s*(.*)\s*[\}\]]\s*$", re.MULTILINE | re.DOTALL)
  
  
-UUID_RE = re.compile(
-    r"^[a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12}$", re.IGNORECASE
-)
+UUID_RE = re.compile(r"^[a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12}$", re.IGNORECASE)
  
  UUID_HEX_OK_RE = re.compile(
      r"^[a-f\d]{8}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{12}$",
  
  UUID_HEX_OK_RE = re.compile(
      r"^[a-f\d]{8}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{12}$",
@@ -82,15 +111,17 @@ UUID_HEX_OK_RE = re.compile(
  
  SHALLOW_IP_V4_RE = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
  
  
  SHALLOW_IP_V4_RE = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
  
+ANYWHERE_IP_V4_RE = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
+
  IP_V6_RE = re.compile(r"^([a-z\d]{0,4}:){7}[a-z\d]{0,4}$", re.IGNORECASE)
  
  IP_V6_RE = re.compile(r"^([a-z\d]{0,4}:){7}[a-z\d]{0,4}$", re.IGNORECASE)
  
-MAC_ADDRESS_RE = re.compile(
-    r"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})", re.IGNORECASE
-)
+ANYWHERE_IP_V6_RE = re.compile(r"([a-z\d]{0,4}:){7}[a-z\d]{0,4}", re.IGNORECASE)
  
  
-WORDS_COUNT_RE = re.compile(
-    r"\W*[^\W_]+\W*", re.IGNORECASE | re.MULTILINE | re.UNICODE
-)
+MAC_ADDRESS_RE = re.compile(r"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})$", re.IGNORECASE)
+
+ANYWHERE_MAC_ADDRESS_RE = re.compile(r"([0-9A-F]{2}[:-]){5}([0-9A-F]{2})", re.IGNORECASE)
+
+WORDS_COUNT_RE = re.compile(r"\W*[^\W_]+\W*", re.IGNORECASE | re.MULTILINE | re.UNICODE)
  
  HTML_RE = re.compile(
      r"((<([a-z]+:)?[a-z]+[^>]*/?>)(.*?(</([a-z]+:)?[a-z]+>))?|<!--.*-->|<!doctype.*>)",
  
  HTML_RE = re.compile(
      r"((<([a-z]+:)?[a-z]+[^>]*/?>)(.*?(</([a-z]+:)?[a-z]+>))?|<!--.*-->|<!doctype.*>)",
@@ -104,50 +135,116 @@ HTML_TAG_ONLY_RE = re.compile(
  
  SPACES_RE = re.compile(r"\s")
  
  
  SPACES_RE = re.compile(r"\s")
  
-NO_LETTERS_OR_NUMBERS_RE = re.compile(
-    r"[^\w\d]+|_+", re.IGNORECASE | re.UNICODE
-)
+NO_LETTERS_OR_NUMBERS_RE = re.compile(r"[^\w\d]+|_+", re.IGNORECASE | re.UNICODE)
  
  MARGIN_RE = re.compile(r"^[^\S\r\n]+")
  
  ESCAPE_SEQUENCE_RE = re.compile(r"\e\[[^A-Za-z]*[A-Za-z]")
  
  NUM_SUFFIXES = {
  
  MARGIN_RE = re.compile(r"^[^\S\r\n]+")
  
  ESCAPE_SEQUENCE_RE = re.compile(r"\e\[[^A-Za-z]*[A-Za-z]")
  
  NUM_SUFFIXES = {
-    "Pb": (1024 ** 5),
-    "P": (1024 ** 5),
-    "Tb": (1024 ** 4),
-    "T": (1024 ** 4),
-    "Gb": (1024 ** 3),
-    "G": (1024 ** 3),
-    "Mb": (1024 ** 2),
-    "M": (1024 ** 2),
-    "Kb": (1024 ** 1),
-    "K": (1024 ** 1),
+    "Pb": (1024**5),
+    "P": (1024**5),
+    "Tb": (1024**4),
+    "T": (1024**4),
+    "Gb": (1024**3),
+    "G": (1024**3),
+    "Mb": (1024**2),
+    "M": (1024**2),
+    "Kb": (1024**1),
+    "K": (1024**1),
  }
  
  
  def is_none_or_empty(in_str: Optional[str]) -> bool:
  }
  
  
  def is_none_or_empty(in_str: Optional[str]) -> bool:
+    """
+    Returns true if the input string is either None or an empty string.
+
+    >>> is_none_or_empty("")
+    True
+    >>> is_none_or_empty(None)
+    True
+    >>> is_none_or_empty("   \t   ")
+    True
+    >>> is_none_or_empty('Test')
+    False
+    """
      return in_str is None or len(in_str.strip()) == 0
  
  
  def is_string(obj: Any) -> bool:
      """
      Checks if an object is a string.
      return in_str is None or len(in_str.strip()) == 0
  
  
  def is_string(obj: Any) -> bool:
      """
      Checks if an object is a string.
+
+    >>> is_string('test')
+    True
+    >>> is_string(123)
+    False
+    >>> is_string(100.3)
+    False
+    >>> is_string([1, 2, 3])
+    False
      """
      return isinstance(obj, str)
  
  
  def is_empty_string(in_str: Any) -> bool:
      """
      return isinstance(obj, str)
  
  
  def is_empty_string(in_str: Any) -> bool:
+    return is_empty(in_str)
+
+
+def is_empty(in_str: Any) -> bool:
+    """
+    Checks if input is a string and empty or only whitespace.
+
+    >>> is_empty('')
+    True
+    >>> is_empty('    \t\t    ')
+    True
+    >>> is_empty('test')
+    False
+    >>> is_empty(100.88)
+    False
+    >>> is_empty([1, 2, 3])
+    False
+    """
      return is_string(in_str) and in_str.strip() == ""
  
  
  def is_full_string(in_str: Any) -> bool:
      return is_string(in_str) and in_str.strip() == ""
  
  
  def is_full_string(in_str: Any) -> bool:
+    """
+    Checks that input is a string and is not empty ('') or only whitespace.
+
+    >>> is_full_string('test!')
+    True
+    >>> is_full_string('')
+    False
+    >>> is_full_string('      ')
+    False
+    >>> is_full_string(100.999)
+    False
+    >>> is_full_string({"a": 1, "b": 2})
+    False
+    """
      return is_string(in_str) and in_str.strip() != ""
  
  
  def is_number(in_str: str) -> bool:
      """
      Checks if a string is a valid number.
      return is_string(in_str) and in_str.strip() != ""
  
  
  def is_number(in_str: str) -> bool:
      """
      Checks if a string is a valid number.
+
+    >>> is_number(100.5)
+    Traceback (most recent call last):
+    ...
+    ValueError: 100.5
+    >>> is_number("100.5")
+    True
+    >>> is_number("test")
+    False
+    >>> is_number("99")
+    True
+    >>> is_number([1, 2, 3])
+    Traceback (most recent call last):
+    ...
+    ValueError: [1, 2, 3]
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -160,38 +257,103 @@ def is_integer_number(in_str: str) -> bool:
  
      An integer may be signed or unsigned or use a "scientific notation".
  
  
      An integer may be signed or unsigned or use a "scientific notation".
  
-    *Examples:*
-
-    >>> is_integer('42') # returns true
-    >>> is_integer('42.0') # returns false
+    >>> is_integer_number('42')
+    True
+    >>> is_integer_number('42.0')
+    False
      """
      return (
      """
      return (
-        (is_number(in_str) and "." not in in_str) or
-        is_hexidecimal_integer_number(in_str) or
-        is_octal_integer_number(in_str) or
-        is_binary_integer_number(in_str)
+        (is_number(in_str) and "." not in in_str)
+        or is_hexidecimal_integer_number(in_str)
+        or is_octal_integer_number(in_str)
+        or is_binary_integer_number(in_str)
      )
  
  
  def is_hexidecimal_integer_number(in_str: str) -> bool:
      )
  
  
  def is_hexidecimal_integer_number(in_str: str) -> bool:
+    """
+    Checks whether a string is a hex integer number.
+
+    >>> is_hexidecimal_integer_number('0x12345')
+    True
+    >>> is_hexidecimal_integer_number('0x1A3E')
+    True
+    >>> is_hexidecimal_integer_number('1234')  # Needs 0x
+    False
+    >>> is_hexidecimal_integer_number('-0xff')
+    True
+    >>> is_hexidecimal_integer_number('test')
+    False
+    >>> is_hexidecimal_integer_number(12345)  # Not a string
+    Traceback (most recent call last):
+    ...
+    ValueError: 12345
+    >>> is_hexidecimal_integer_number(101.4)
+    Traceback (most recent call last):
+    ...
+    ValueError: 101.4
+    >>> is_hexidecimal_integer_number(0x1A3E)
+    Traceback (most recent call last):
+    ...
+    ValueError: 6718
+    """
      if not is_string(in_str):
          raise ValueError(in_str)
      return HEX_NUMBER_RE.match(in_str) is not None
  
  
  def is_octal_integer_number(in_str: str) -> bool:
      if not is_string(in_str):
          raise ValueError(in_str)
      return HEX_NUMBER_RE.match(in_str) is not None
  
  
  def is_octal_integer_number(in_str: str) -> bool:
+    """
+    Checks whether a string is an octal number.
+
+    >>> is_octal_integer_number('0o777')
+    True
+    >>> is_octal_integer_number('-0O115')
+    True
+    >>> is_octal_integer_number('0xFF')  # Not octal, needs 0o
+    False
+    >>> is_octal_integer_number('7777')  # Needs 0o
+    False
+    >>> is_octal_integer_number('test')
+    False
+    """
      if not is_string(in_str):
          raise ValueError(in_str)
      return OCT_NUMBER_RE.match(in_str) is not None
  
  
  def is_binary_integer_number(in_str: str) -> bool:
      if not is_string(in_str):
          raise ValueError(in_str)
      return OCT_NUMBER_RE.match(in_str) is not None
  
  
  def is_binary_integer_number(in_str: str) -> bool:
+    """
+    Returns whether a string contains a binary number.
+
+    >>> is_binary_integer_number('0b10111')
+    True
+    >>> is_binary_integer_number('-0b111')
+    True
+    >>> is_binary_integer_number('0B10101')
+    True
+    >>> is_binary_integer_number('0b10102')
+    False
+    >>> is_binary_integer_number('0xFFF')
+    False
+    >>> is_binary_integer_number('test')
+    False
+    """
      if not is_string(in_str):
          raise ValueError(in_str)
      return BIN_NUMBER_RE.match(in_str) is not None
  
  
  def to_int(in_str: str) -> int:
      if not is_string(in_str):
          raise ValueError(in_str)
      return BIN_NUMBER_RE.match(in_str) is not None
  
  
  def to_int(in_str: str) -> int:
+    """Returns the integral value of the string or raises on error.
+
+    >>> to_int('1234')
+    1234
+    >>> to_int('test')
+    Traceback (most recent call last):
+    ...
+    ValueError: invalid literal for int() with base 10: 'test'
+    """
      if not is_string(in_str):
          raise ValueError(in_str)
      if is_binary_integer_number(in_str):
      if not is_string(in_str):
          raise ValueError(in_str)
      if is_binary_integer_number(in_str):
@@ -209,28 +371,72 @@ def is_decimal_number(in_str: str) -> bool:
  
      A decimal may be signed or unsigned or use a "scientific notation".
  
  
      A decimal may be signed or unsigned or use a "scientific notation".
  
-    >>> is_decimal('42.0') # returns true
-    >>> is_decimal('42') # returns false
+    >>> is_decimal_number('42.0')
+    True
+    >>> is_decimal_number('42')
+    False
      """
      return is_number(in_str) and "." in in_str
  
  
  def strip_escape_sequences(in_str: str) -> str:
      """
      return is_number(in_str) and "." in in_str
  
  
  def strip_escape_sequences(in_str: str) -> str:
+    """
+    Remove escape sequences in the input string.
+
+    >>> strip_escape_sequences('\e[12;11;22mthis is a test!')
+    'this is a test!'
+    """
      in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
      return in_str
  
  
      in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
      return in_str
  
  
+def add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str:
+    """
+    Add thousands separator to a numeric string.  Also handles numbers.
+
+    >>> add_thousands_separator('12345678')
+    '12,345,678'
+    >>> add_thousands_separator(12345678)
+    '12,345,678'
+    >>> add_thousands_separator(12345678.99)
+    '12,345,678.99'
+    >>> add_thousands_separator('test')
+    Traceback (most recent call last):
+    ...
+    ValueError: test
+
+    """
+    if isinstance(in_str, numbers.Number):
+        in_str = f'{in_str}'
+    if is_number(in_str):
+        return _add_thousands_separator(in_str, separator_char=separator_char, places=places)
+    raise ValueError(in_str)
+
+
+def _add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str:
+    decimal_part = ""
+    if '.' in in_str:
+        (in_str, decimal_part) = in_str.split('.')
+    tmp = [iter(in_str[::-1])] * places
+    ret = separator_char.join("".join(x) for x in zip_longest(*tmp, fillvalue=""))[::-1]
+    if len(decimal_part) > 0:
+        ret += '.'
+        ret += decimal_part
+    return ret
+
+
  # Full url example:
  # scheme://username:[email protected]:8042/folder/subfolder/file.extension?param=value&param2=value2#hash
  def is_url(in_str: Any, allowed_schemes: Optional[List[str]] = None) -> bool:
      """
      Check if a string is a valid url.
  
  # Full url example:
  # scheme://username:[email protected]:8042/folder/subfolder/file.extension?param=value&param2=value2#hash
  def is_url(in_str: Any, allowed_schemes: Optional[List[str]] = None) -> bool:
      """
      Check if a string is a valid url.
  
-    *Examples:*
-
-    >>> is_url('http://www.mysite.com') # returns true
-    >>> is_url('https://mysite.com') # returns true
-    >>> is_url('.mysite.com') # returns false
+    >>> is_url('http://www.mysite.com')
+    True
+    >>> is_url('https://mysite.com')
+    True
+    >>> is_url('.mysite.com')
+    False
      """
      if not is_full_string(in_str):
          return False
      """
      if not is_full_string(in_str):
          return False
@@ -248,16 +454,12 @@ def is_email(in_str: Any) -> bool:
  
      Reference: https://tools.ietf.org/html/rfc3696#section-3
  
  
      Reference: https://tools.ietf.org/html/rfc3696#section-3
  
-    *Examples:*
-
-    >>> is_email('[email protected]') # returns true
-    >>> is_email('@gmail.com') # returns false
+    >>> is_email('[email protected]')
+    True
+    >>> is_email('@gmail.com')
+    False
      """
      """
-    if (
-        not is_full_string(in_str)
-        or len(in_str) > 320
-        or in_str.startswith(".")
-    ):
+    if not is_full_string(in_str) or len(in_str) > 320 or in_str.startswith("."):
          return False
  
      try:
          return False
  
      try:
@@ -267,12 +469,7 @@ def is_email(in_str: Any) -> bool:
  
          # head's size must be <= 64, tail <= 255, head must not start
          # with a dot or contain multiple consecutive dots.
  
          # head's size must be <= 64, tail <= 255, head must not start
          # with a dot or contain multiple consecutive dots.
-        if (
-            len(head) > 64
-            or len(tail) > 255
-            or head.endswith(".")
-            or (".." in head)
-        ):
+        if len(head) > 64 or len(tail) > 255 or head.endswith(".") or (".." in head):
              return False
  
          # removes escaped spaces, so that later on the test regex will
              return False
  
          # removes escaped spaces, so that later on the test regex will
@@ -294,6 +491,11 @@ def is_email(in_str: Any) -> bool:
  def suffix_string_to_number(in_str: str) -> Optional[int]:
      """Take a string like "33Gb" and convert it into a number (of bytes)
      like 34603008.  Return None if the input string is not valid.
  def suffix_string_to_number(in_str: str) -> Optional[int]:
      """Take a string like "33Gb" and convert it into a number (of bytes)
      like 34603008.  Return None if the input string is not valid.
+
+    >>> suffix_string_to_number('1Mb')
+    1048576
+    >>> suffix_string_to_number('13.1Gb')
+    14066017894
      """
  
      def suffix_capitalize(s: str) -> str:
      """
  
      def suffix_capitalize(s: str) -> str:
@@ -315,24 +517,33 @@ def suffix_string_to_number(in_str: str) -> Optional[int]:
              if multiplier is not None:
                  r = rest[x]
                  if is_integer_number(r):
              if multiplier is not None:
                  r = rest[x]
                  if is_integer_number(r):
-                    return int(r) * multiplier
+                    return to_int(r) * multiplier
+                if is_decimal_number(r):
+                    return int(float(r) * multiplier)
      return None
  
  
  def number_to_suffix_string(num: int) -> Optional[str]:
      """Take a number (of bytes) and returns a string like "43.8Gb".
      Returns none if the input is invalid.
      return None
  
  
  def number_to_suffix_string(num: int) -> Optional[str]:
      """Take a number (of bytes) and returns a string like "43.8Gb".
      Returns none if the input is invalid.
+
+    >>> number_to_suffix_string(14066017894)
+    '13.1Gb'
+    >>> number_to_suffix_string(1024 * 1024)
+    '1.0Mb'
+
      """
      d = 0.0
      suffix = None
      for (sfx, size) in NUM_SUFFIXES.items():
      """
      d = 0.0
      suffix = None
      for (sfx, size) in NUM_SUFFIXES.items():
-        if num > size:
+        if num >= size:
              d = num / size
              suffix = sfx
              break
      if suffix is not None:
          return f"{d:.1f}{suffix}"
              d = num / size
              suffix = sfx
              break
      if suffix is not None:
          return f"{d:.1f}{suffix}"
-    return None
+    else:
+        return f'{num:d}'
  
  
  def is_credit_card(in_str: Any, card_type: str = None) -> bool:
  
  
  def is_credit_card(in_str: Any, card_type: str = None) -> bool:
@@ -375,9 +586,7 @@ def is_camel_case(in_str: Any) -> bool:
      - it contains both lowercase and uppercase letters
      - it does not start with a number
      """
      - it contains both lowercase and uppercase letters
      - it does not start with a number
      """
-    return (
-        is_full_string(in_str) and CAMEL_CASE_TEST_RE.match(in_str) is not None
-    )
+    return is_full_string(in_str) and CAMEL_CASE_TEST_RE.match(in_str) is not None
  
  
  def is_snake_case(in_str: Any, *, separator: str = "_") -> bool:
  
  
  def is_snake_case(in_str: Any, *, separator: str = "_") -> bool:
@@ -389,17 +598,23 @@ def is_snake_case(in_str: Any, *, separator: str = "_") -> bool:
      - it's composed only by lowercase/uppercase letters and digits
      - it contains at least one underscore (or provided separator)
      - it does not start with a number
      - it's composed only by lowercase/uppercase letters and digits
      - it contains at least one underscore (or provided separator)
      - it does not start with a number
+
+    >>> is_snake_case('this_is_a_test')
+    True
+    >>> is_snake_case('___This_Is_A_Test_1_2_3___')
+    True
+    >>> is_snake_case('this-is-a-test')
+    False
+    >>> is_snake_case('this-is-a-test', separator='-')
+    True
+
      """
      if is_full_string(in_str):
          re_map = {"_": SNAKE_CASE_TEST_RE, "-": SNAKE_CASE_TEST_DASH_RE}
      """
      if is_full_string(in_str):
          re_map = {"_": SNAKE_CASE_TEST_RE, "-": SNAKE_CASE_TEST_DASH_RE}
-        re_template = (
-            r"([a-z]+\d*{sign}[a-z\d{sign}]*|{sign}+[a-z\d]+[a-z\d{sign}]*)"
-        )
+        re_template = r"([a-z]+\d*{sign}[a-z\d{sign}]*|{sign}+[a-z\d]+[a-z\d{sign}]*)"
          r = re_map.get(
              separator,
          r = re_map.get(
              separator,
-            re.compile(
-                re_template.format(sign=re.escape(separator)), re.IGNORECASE
-            ),
+            re.compile(re_template.format(sign=re.escape(separator)), re.IGNORECASE),
          )
          return r.match(in_str) is not None
      return False
          )
          return r.match(in_str) is not None
      return False
@@ -409,11 +624,12 @@ def is_json(in_str: Any) -> bool:
      """
      Check if a string is a valid json.
  
      """
      Check if a string is a valid json.
  
-    *Examples:*
-
-    >>> is_json('{"name": "Peter"}') # returns true
-    >>> is_json('[1, 2, 3]') # returns true
-    >>> is_json('{nope}') # returns false
+    >>> is_json('{"name": "Peter"}')
+    True
+    >>> is_json('[1, 2, 3]')
+    True
+    >>> is_json('{nope}')
+    False
      """
      if is_full_string(in_str) and JSON_WRAPPER_RE.match(in_str) is not None:
          try:
      """
      if is_full_string(in_str) and JSON_WRAPPER_RE.match(in_str) is not None:
          try:
@@ -427,11 +643,12 @@ def is_uuid(in_str: Any, allow_hex: bool = False) -> bool:
      """
      Check if a string is a valid UUID.
  
      """
      Check if a string is a valid UUID.
  
-    *Example:*
-
-    >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf') # returns true
-    >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf') # returns false
-    >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf', allow_hex=True) # returns true
+    >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf')
+    True
+    >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf')
+    False
+    >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf', allow_hex=True)
+    True
      """
      # string casting is used to allow UUID itself as input data type
      s = str(in_str)
      """
      # string casting is used to allow UUID itself as input data type
      s = str(in_str)
@@ -444,11 +661,12 @@ def is_ip_v4(in_str: Any) -> bool:
      """
      Checks if a string is a valid ip v4.
  
      """
      Checks if a string is a valid ip v4.
  
-    *Examples:*
-
-    >>> is_ip_v4('255.200.100.75') # returns true
-    >>> is_ip_v4('nope') # returns false (not an ip)
-    >>> is_ip_v4('255.200.100.999') # returns false (999 is out of range)
+    >>> is_ip_v4('255.200.100.75')
+    True
+    >>> is_ip_v4('nope')
+    False
+    >>> is_ip_v4('255.200.100.999')  # 999 out of range
+    False
      """
      if not is_full_string(in_str) or SHALLOW_IP_V4_RE.match(in_str) is None:
          return False
      """
      if not is_full_string(in_str) or SHALLOW_IP_V4_RE.match(in_str) is None:
          return False
@@ -463,11 +681,14 @@ def is_ip_v4(in_str: Any) -> bool:
  def extract_ip_v4(in_str: Any) -> Optional[str]:
      """
      Extracts the IPv4 chunk of a string or None.
  def extract_ip_v4(in_str: Any) -> Optional[str]:
      """
      Extracts the IPv4 chunk of a string or None.
+
+    >>> extract_ip_v4('   The secret IP address: 127.0.0.1 (use it wisely)   ')
+    '127.0.0.1'
+    >>> extract_ip_v4('Your mom dresses you funny.')
      """
      if not is_full_string(in_str):
          return None
      """
      if not is_full_string(in_str):
          return None
-    in_str.strip()
-    m = SHALLOW_IP_V4_RE.match(in_str)
+    m = ANYWHERE_IP_V4_RE.search(in_str)
      if m is not None:
          return m.group(0)
      return None
      if m is not None:
          return m.group(0)
      return None
@@ -477,10 +698,10 @@ def is_ip_v6(in_str: Any) -> bool:
      """
      Checks if a string is a valid ip v6.
  
      """
      Checks if a string is a valid ip v6.
  
-    *Examples:*
-
-    >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
-    >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?') # returns false (invalid "?")
+    >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334')
+    True
+    >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?')    # invalid "?"
+    False
      """
      return is_full_string(in_str) and IP_V6_RE.match(in_str) is not None
  
      """
      return is_full_string(in_str) and IP_V6_RE.match(in_str) is not None
  
@@ -488,11 +709,14 @@ def is_ip_v6(in_str: Any) -> bool:
  def extract_ip_v6(in_str: Any) -> Optional[str]:
      """
      Extract IPv6 chunk or None.
  def extract_ip_v6(in_str: Any) -> Optional[str]:
      """
      Extract IPv6 chunk or None.
+
+    >>> extract_ip_v6('IP: 2001:db8:85a3:0000:0000:8a2e:370:7334')
+    '2001:db8:85a3:0000:0000:8a2e:370:7334'
+    >>> extract_ip_v6("(and she's ugly too, btw)")
      """
      if not is_full_string(in_str):
          return None
      """
      if not is_full_string(in_str):
          return None
-    in_str.strip()
-    m = IP_V6_RE.match(in_str)
+    m = ANYWHERE_IP_V6_RE.search(in_str)
      if m is not None:
          return m.group(0)
      return None
      if m is not None:
          return m.group(0)
      return None
@@ -502,17 +726,29 @@ def is_ip(in_str: Any) -> bool:
      """
      Checks if a string is a valid ip (either v4 or v6).
  
      """
      Checks if a string is a valid ip (either v4 or v6).
  
-    *Examples:*
-
-    >>> is_ip('255.200.100.75') # returns true
-    >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
-    >>> is_ip('1.2.3') # returns false
+    >>> is_ip('255.200.100.75')
+    True
+    >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334')
+    True
+    >>> is_ip('1.2.3')
+    False
+    >>> is_ip('1.2.3.999')
+    False
      """
      return is_ip_v6(in_str) or is_ip_v4(in_str)
  
  
  def extract_ip(in_str: Any) -> Optional[str]:
      """
      return is_ip_v6(in_str) or is_ip_v4(in_str)
  
  
  def extract_ip(in_str: Any) -> Optional[str]:
-    """Extract the IP address or None."""
+    """
+    Extract the IP address or None.
+
+    >>> extract_ip('Attacker: 255.200.100.75')
+    '255.200.100.75'
+    >>> extract_ip('Remote host: 2001:db8:85a3:0000:0000:8a2e:370:7334')
+    '2001:db8:85a3:0000:0000:8a2e:370:7334'
+    >>> extract_ip('1.2.3')
+
+    """
      ip = extract_ip_v4(in_str)
      if ip is None:
          ip = extract_ip_v6(in_str)
      ip = extract_ip_v4(in_str)
      if ip is None:
          ip = extract_ip_v6(in_str)
@@ -520,16 +756,35 @@ def extract_ip(in_str: Any) -> Optional[str]:
  
  
  def is_mac_address(in_str: Any) -> bool:
  
  
  def is_mac_address(in_str: Any) -> bool:
-    """Return True if in_str is a valid MAC address false otherwise."""
+    """Return True if in_str is a valid MAC address false otherwise.
+
+    >>> is_mac_address("34:29:8F:12:0D:2F")
+    True
+    >>> is_mac_address('34:29:8f:12:0d:2f')
+    True
+    >>> is_mac_address('34-29-8F-12-0D-2F')
+    True
+    >>> is_mac_address("test")
+    False
+    """
      return is_full_string(in_str) and MAC_ADDRESS_RE.match(in_str) is not None
  
  
  def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
      return is_full_string(in_str) and MAC_ADDRESS_RE.match(in_str) is not None
  
  
  def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
-    """Extract the MAC address from in_str"""
+    """
+    Extract the MAC address from in_str.
+
+    >>> extract_mac_address(' MAC Address: 34:29:8F:12:0D:2F')
+    '34:29:8F:12:0D:2F'
+
+    >>> extract_mac_address('? (10.0.0.30) at d8:5d:e2:34:54:86 on em0 expires in 1176 seconds [ethernet]')
+    'd8:5d:e2:34:54:86'
+
+    """
      if not is_full_string(in_str):
          return None
      in_str.strip()
      if not is_full_string(in_str):
          return None
      in_str.strip()
-    m = MAC_ADDRESS_RE.match(in_str)
+    m = ANYWHERE_MAC_ADDRESS_RE.search(in_str)
      if m is not None:
          mac = m.group(0)
          mac.replace(":", separator)
      if m is not None:
          mac = m.group(0)
          mac.replace(":", separator)
@@ -542,16 +797,11 @@ def is_slug(in_str: Any, separator: str = "-") -> bool:
      """
      Checks if a given string is a slug (as created by `slugify()`).
  
      """
      Checks if a given string is a slug (as created by `slugify()`).
  
-    *Examples:*
-
-    >>> is_slug('my-blog-post-title') # returns true
-    >>> is_slug('My blog post title') # returns false
+    >>> is_slug('my-blog-post-title')
+    True
+    >>> is_slug('My blog post title')
+    False
  
  
-    :param in_str: String to check.
-    :type in_str: str
-    :param separator: Join sign used by the slug.
-    :type separator: str
-    :return: True if slug, false otherwise.
      """
      if not is_full_string(in_str):
          return False
      """
      if not is_full_string(in_str):
          return False
@@ -566,10 +816,11 @@ def contains_html(in_str: str) -> bool:
      By design, this function matches ANY type of tag, so don't expect to use it
      as an HTML validator, its goal is to detect "malicious" or undesired tags in the text.
  
      By design, this function matches ANY type of tag, so don't expect to use it
      as an HTML validator, its goal is to detect "malicious" or undesired tags in the text.
  
-    *Examples:*
+    >>> contains_html('my string is <strong>bold</strong>')
+    True
+    >>> contains_html('my string is not bold')
+    False
  
  
-    >>> contains_html('my string is <strong>bold</strong>') # returns true
-    >>> contains_html('my string is not bold') # returns false
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -585,27 +836,27 @@ def words_count(in_str: str) -> int:
      Moreover it is aware of punctuation, so the count for a string like "one,two,three.stop"
      will be 4 not 1 (even if there are no spaces in the string).
  
      Moreover it is aware of punctuation, so the count for a string like "one,two,three.stop"
      will be 4 not 1 (even if there are no spaces in the string).
  
-    *Examples:*
+    >>> words_count('hello world')
+    2
+    >>> words_count('one,two,three.stop')
+    4
  
  
-    >>> words_count('hello world') # returns 2
-    >>> words_count('one,two,three.stop') # returns 4
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      return len(WORDS_COUNT_RE.findall(in_str))
  
  
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      return len(WORDS_COUNT_RE.findall(in_str))
  
  
-def generate_uuid(as_hex: bool = False) -> str:
+def generate_uuid(omit_dashes: bool = False) -> str:
      """
      Generated an UUID string (using `uuid.uuid4()`).
  
      """
      Generated an UUID string (using `uuid.uuid4()`).
  
-    *Examples:*
+    generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
+    generate_uuid(omit_dashes=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
  
  
-    >>> uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
-    >>> uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
      """
      uid = uuid4()
      """
      uid = uuid4()
-    if as_hex:
+    if omit_dashes:
          return uid.hex
      return str(uid)
  
          return uid.hex
      return str(uid)
  
@@ -615,9 +866,8 @@ def generate_random_alphanumeric_string(size: int) -> str:
      Returns a string of the specified size containing random
      characters (uppercase/lowercase ascii letters and digits).
  
      Returns a string of the specified size containing random
      characters (uppercase/lowercase ascii letters and digits).
  
-    *Example:*
+    random_string(9) # possible output: "cx3QQbzYg"
  
  
-    >>> random_string(9) # possible output: "cx3QQbzYg"
      """
      if size < 1:
          raise ValueError("size must be >= 1")
      """
      if size < 1:
          raise ValueError("size must be >= 1")
@@ -629,6 +879,10 @@ def generate_random_alphanumeric_string(size: int) -> str:
  def reverse(in_str: str) -> str:
      """
      Returns the string with its chars reversed.
  def reverse(in_str: str) -> str:
      """
      Returns the string with its chars reversed.
+
+    >>> reverse('test')
+    'tset'
+
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -639,14 +893,17 @@ def camel_case_to_snake_case(in_str, *, separator="_"):
      """
      Convert a camel case string into a snake case one.
      (The original string is returned if is not a valid camel case string)
      """
      Convert a camel case string into a snake case one.
      (The original string is returned if is not a valid camel case string)
+
+    >>> camel_case_to_snake_case('MacAddressExtractorFactory')
+    'mac_address_extractor_factory'
+    >>> camel_case_to_snake_case('Luke Skywalker')
+    'Luke Skywalker'
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      if not is_camel_case(in_str):
          return in_str
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      if not is_camel_case(in_str):
          return in_str
-    return CAMEL_CASE_REPLACE_RE.sub(
-        lambda m: m.group(1) + separator, in_str
-    ).lower()
+    return CAMEL_CASE_REPLACE_RE.sub(lambda m: m.group(1) + separator, in_str).lower()
  
  
  def snake_case_to_camel_case(
  
  
  def snake_case_to_camel_case(
@@ -655,6 +912,11 @@ def snake_case_to_camel_case(
      """
      Convert a snake case string into a camel case one.
      (The original string is returned if is not a valid snake case string)
      """
      Convert a snake case string into a camel case one.
      (The original string is returned if is not a valid snake case string)
+
+    >>> snake_case_to_camel_case('this_is_a_test')
+    'ThisIsATest'
+    >>> snake_case_to_camel_case('Han Solo')
+    'Han Solo'
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -667,12 +929,22 @@ def snake_case_to_camel_case(
  
  
  def to_char_list(in_str: str) -> List[str]:
  
  
  def to_char_list(in_str: str) -> List[str]:
+    """Convert a string into a list of chars.
+
+    >>> to_char_list('test')
+    ['t', 'e', 's', 't']
+    """
      if not is_string(in_str):
          return []
      return list(in_str)
  
  
  def from_char_list(in_list: List[str]) -> str:
      if not is_string(in_str):
          return []
      return list(in_str)
  
  
  def from_char_list(in_list: List[str]) -> str:
+    """Convert a char list into a string.
+
+    >>> from_char_list(['t', 'e', 's', 't'])
+    'test'
+    """
      return "".join(in_list)
  
  
      return "".join(in_list)
  
  
@@ -693,10 +965,10 @@ def strip_html(in_str: str, keep_tag_content: bool = False) -> str:
      """
      Remove html code contained into the given string.
  
      """
      Remove html code contained into the given string.
  
-    *Examples:*
-
-    >>> strip_html('test: <a href="foo/bar">click here</a>') # returns 'test: '
-    >>> strip_html('test: <a href="foo/bar">click here</a>', keep_tag_content=True) # returns 'test: click here'
+    >>> strip_html('test: <a href="foo/bar">click here</a>')
+    'test: '
+    >>> strip_html('test: <a href="foo/bar">click here</a>', keep_tag_content=True)
+    'test: click here'
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -706,14 +978,14 @@ def strip_html(in_str: str, keep_tag_content: bool = False) -> str:
  
  def asciify(in_str: str) -> str:
      """
  
  def asciify(in_str: str) -> str:
      """
-    Force string content to be ascii-only by translating all non-ascii chars into the closest possible representation
-    (eg: ó -> o, Ë -> E, ç -> c...).
+    Force string content to be ascii-only by translating all non-ascii
+    chars into the closest possible representation (eg: ó -> o, Ë ->
+    E, ç -> c...).
  
  
-    **Bear in mind**: Some chars may be lost if impossible to translate.
+    N.B. Some chars may be lost if impossible to translate.
  
  
-    *Example:*
-
-    >>> asciify('èéùúòóäåëýñÅÀÁÇÌÍÑÓË') # returns 'eeuuooaaeynAAACIINOE'
+    >>> asciify('èéùúòóäåëýñÅÀÁÇÌÍÑÓË')
+    'eeuuooaaeynAAACIINOE'
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -742,10 +1014,10 @@ def slugify(in_str: str, *, separator: str = "-") -> str:
      - all chars are encoded as ascii (by using `asciify()`)
      - is safe for URL
  
      - all chars are encoded as ascii (by using `asciify()`)
      - is safe for URL
  
-    *Examples:*
-
-    >>> slugify('Top 10 Reasons To Love Dogs!!!') # returns: 'top-10-reasons-to-love-dogs'
-    >>> slugify('Mönstér Mägnët') # returns 'monster-magnet'
+    >>> slugify('Top 10 Reasons To Love Dogs!!!')
+    'top-10-reasons-to-love-dogs'
+    >>> slugify('Mönstér Mägnët')
+    'monster-magnet'
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -765,7 +1037,8 @@ def to_bool(in_str: str) -> bool:
      """
      Turns a string into a boolean based on its content (CASE INSENSITIVE).
  
      """
      Turns a string into a boolean based on its content (CASE INSENSITIVE).
  
-    A positive boolean (True) is returned if the string value is one of the following:
+    A positive boolean (True) is returned if the string value is one
+    of the following:
  
      - "true"
      - "1"
  
      - "true"
      - "1"
@@ -773,29 +1046,113 @@ def to_bool(in_str: str) -> bool:
      - "y"
  
      Otherwise False is returned.
      - "y"
  
      Otherwise False is returned.
+
+    >>> to_bool('True')
+    True
+
+    >>> to_bool('1')
+    True
+
+    >>> to_bool('yes')
+    True
+
+    >>> to_bool('no')
+    False
+
+    >>> to_bool('huh?')
+    False
+
+    >>> to_bool('on')
+    True
+
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
-    return in_str.lower() in ("true", "1", "yes", "y", "t")
+    return in_str.lower() in ("true", "1", "yes", "y", "t", "on")
  
  
  
  
-def dedent(in_str: str) -> str:
+def to_date(in_str: str) -> Optional[datetime.date]:
      """
      """
-    Removes tab indentation from multi line strings (inspired by analogous Scala function).
+    Parses a date string.  See DateParser docs for details.
+    """
+    import dateparse.dateparse_utils as du
+
+    try:
+        d = du.DateParser()  # type: ignore
+        d.parse(in_str)
+        return d.get_date()
+    except du.ParseException:  # type: ignore
+        msg = f'Unable to parse date {in_str}.'
+        logger.warning(msg)
+    return None
  
  
-    *Example:*
  
  
-    >>> strip_margin('''
-    >>>                 line 1
-    >>>                 line 2
-    >>>                 line 3
-    >>> ''')
-    >>> # returns:
-    >>> '''
-    >>> line 1
-    >>> line 2
-    >>> line 3
-    >>> '''
+def valid_date(in_str: str) -> bool:
+    """
+    True if the string represents a valid date.
+    """
+    import dateparse.dateparse_utils as dp
+
+    try:
+        d = dp.DateParser()  # type: ignore
+        _ = d.parse(in_str)
+        return True
+    except dp.ParseException:  # type: ignore
+        msg = f'Unable to parse date {in_str}.'
+        logger.warning(msg)
+    return False
+
+
+def to_datetime(in_str: str) -> Optional[datetime.datetime]:
+    """
+    Parses a datetime string.  See DateParser docs for more info.
+    """
+    import dateparse.dateparse_utils as dp
+
+    try:
+        d = dp.DateParser()  # type: ignore
+        dt = d.parse(in_str)
+        if isinstance(dt, datetime.datetime):
+            return dt
+    except ValueError:
+        msg = f'Unable to parse datetime {in_str}.'
+        logger.warning(msg)
+    return None
+
+
+def valid_datetime(in_str: str) -> bool:
+    """
+    True if the string represents a valid datetime.
+    """
+    _ = to_datetime(in_str)
+    if _ is not None:
+        return True
+    msg = f'Unable to parse datetime {in_str}.'
+    logger.warning(msg)
+    return False
+
+
+def squeeze(in_str: str, character_to_squeeze: str = ' ') -> str:
+    """
+    Squeeze runs of more than one character_to_squeeze into one.
+
+    >>> squeeze(' this        is       a    test    ')
+    ' this is a test '
+
+    >>> squeeze('one|!||!|two|!||!|three', character_to_squeeze='|!|')
+    'one|!|two|!|three'
+
+    """
+    return re.sub(
+        r'(' + re.escape(character_to_squeeze) + r')+',
+        character_to_squeeze,
+        in_str,
+    )
+
+
+def dedent(in_str: str) -> str:
+    """
+    Removes tab indentation from multi line strings (inspired by analogous Scala function).
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -805,6 +1162,13 @@ def dedent(in_str: str) -> str:
  
  
  def indent(in_str: str, amount: int) -> str:
  
  
  def indent(in_str: str, amount: int) -> str:
+    """
+    Indents string by prepending amount spaces.
+
+    >>> indent('This is a test', 4)
+    '    This is a test'
+
+    """
      if not is_string(in_str):
          raise ValueError(in_str)
      line_separator = '\n'
      if not is_string(in_str):
          raise ValueError(in_str)
      line_separator = '\n'
@@ -813,6 +1177,7 @@ def indent(in_str: str, amount: int) -> str:
  
  
  def sprintf(*args, **kwargs) -> str:
  
  
  def sprintf(*args, **kwargs) -> str:
+    """String printf, like in C"""
      ret = ""
  
      sep = kwargs.pop("sep", None)
      ret = ""
  
      sep = kwargs.pop("sep", None)
@@ -841,3 +1206,471 @@ def sprintf(*args, **kwargs) -> str:
              ret += str(arg)
      ret += end
      return ret
              ret += str(arg)
      ret += end
      return ret
+
+
+class SprintfStdout(object):
+    """
+    A context manager that captures outputs to stdout.
+
+    with SprintfStdout() as buf:
+        print("test")
+    print(buf())
+
+    'test\n'
+    """
+
+    def __init__(self) -> None:
+        self.destination = io.StringIO()
+        self.recorder: contextlib.redirect_stdout
+
+    def __enter__(self) -> Callable[[], str]:
+        self.recorder = contextlib.redirect_stdout(self.destination)
+        self.recorder.__enter__()
+        return lambda: self.destination.getvalue()
+
+    def __exit__(self, *args) -> None:
+        self.recorder.__exit__(*args)
+        self.destination.seek(0)
+        return None  # don't suppress exceptions
+
+
+def capitalize_first_letter(txt: str) -> str:
+    """Capitalize the first letter of a string.
+
+    >>> capitalize_first_letter('test')
+    'Test'
+    >>> capitalize_first_letter("ALREADY!")
+    'ALREADY!'
+
+    """
+    return txt[0].upper() + txt[1:]
+
+
+def it_they(n: int) -> str:
+    """It or they?
+
+    >>> it_they(1)
+    'it'
+    >>> it_they(100)
+    'they'
+
+    """
+    if n == 1:
+        return "it"
+    return "they"
+
+
+def is_are(n: int) -> str:
+    """Is or are?
+
+    >>> is_are(1)
+    'is'
+    >>> is_are(2)
+    'are'
+
+    """
+    if n == 1:
+        return "is"
+    return "are"
+
+
+def pluralize(n: int) -> str:
+    """Add an s?
+
+    >>> pluralize(15)
+    's'
+    >>> count = 1
+    >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
+    There is 1 file.
+    >>> count = 4
+    >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
+    There are 4 files.
+
+    """
+    if n == 1:
+        return ""
+    return "s"
+
+
+def make_contractions(txt: str) -> str:
+    """Glue words together to form contractions.
+
+    >>> make_contractions('It is nice today.')
+    "It's nice today."
+
+    >>> make_contractions('I can    not even...')
+    "I can't even..."
+
+    >>> make_contractions('She could not see!')
+    "She couldn't see!"
+
+    >>> make_contractions('But she will not go.')
+    "But she won't go."
+
+    >>> make_contractions('Verily, I shall not.')
+    "Verily, I shan't."
+
+    >>> make_contractions('No you cannot.')
+    "No you can't."
+
+    >>> make_contractions('I said you can not go.')
+    "I said you can't go."
+
+    """
+
+    first_second = [
+        (
+            [
+                'are',
+                'could',
+                'did',
+                'has',
+                'have',
+                'is',
+                'must',
+                'should',
+                'was',
+                'were',
+                'would',
+            ],
+            ['(n)o(t)'],
+        ),
+        (
+            [
+                "I",
+                "you",
+                "he",
+                "she",
+                "it",
+                "we",
+                "they",
+                "how",
+                "why",
+                "when",
+                "where",
+                "who",
+                "there",
+            ],
+            ['woul(d)', 'i(s)', 'a(re)', 'ha(s)', 'ha(ve)', 'ha(d)', 'wi(ll)'],
+        ),
+    ]
+
+    # Special cases: can't, shan't and won't.
+    txt = re.sub(r'\b(can)\s*no(t)\b', r"\1'\2", txt, count=0, flags=re.IGNORECASE)
+    txt = re.sub(r'\b(sha)ll\s*(n)o(t)\b', r"\1\2'\3", txt, count=0, flags=re.IGNORECASE)
+    txt = re.sub(
+        r'\b(w)ill\s*(n)(o)(t)\b',
+        r"\1\3\2'\4",
+        txt,
+        count=0,
+        flags=re.IGNORECASE,
+    )
+
+    for first_list, second_list in first_second:
+        for first in first_list:
+            for second in second_list:
+                # Disallow there're/where're.  They're valid English
+                # but sound weird.
+                if (first in ('there', 'where')) and second == 'a(re)':
+                    continue
+
+                pattern = fr'\b({first})\s+{second}\b'
+                if second == '(n)o(t)':
+                    replacement = r"\1\2'\3"
+                else:
+                    replacement = r"\1'\2"
+                txt = re.sub(pattern, replacement, txt, count=0, flags=re.IGNORECASE)
+
+    return txt
+
+
+def thify(n: int) -> str:
+    """Return the proper cardinal suffix for a number.
+
+    >>> thify(1)
+    'st'
+    >>> thify(33)
+    'rd'
+    >>> thify(16)
+    'th'
+
+    """
+    digit = str(n)
+    assert is_integer_number(digit)
+    digit = digit[-1:]
+    if digit == "1":
+        return "st"
+    elif digit == "2":
+        return "nd"
+    elif digit == "3":
+        return "rd"
+    else:
+        return "th"
+
+
+def ngrams(txt: str, n: int):
+    """Return the ngrams from a string.
+
+    >>> [x for x in ngrams('This is a test', 2)]
+    ['This is', 'is a', 'a test']
+
+    """
+    words = txt.split()
+    for ngram in ngrams_presplit(words, n):
+        ret = ''
+        for word in ngram:
+            ret += f'{word} '
+        yield ret.strip()
+
+
+def ngrams_presplit(words: Sequence[str], n: int):
+    return list_utils.ngrams(words, n)
+
+
+def bigrams(txt: str):
+    return ngrams(txt, 2)
+
+
+def trigrams(txt: str):
+    return ngrams(txt, 3)
+
+
+def shuffle_columns_into_list(
+    input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim=''
+) -> Iterable[str]:
+    """Helper to shuffle / parse columnar data and return the results as a
+    list.  The column_specs argument is an iterable collection of
+    numeric sequences that indicate one or more column numbers to
+    copy.
+
+    >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul  9 11:34 acl_test.py'.split()
+    >>> shuffle_columns_into_list(
+    ...     cols,
+    ...     [ [8], [2, 3], [5, 6, 7] ],
+    ...     delim=' ',
+    ... )
+    ['acl_test.py', 'scott wheel', 'Jul 9 11:34']
+
+    """
+    out = []
+
+    # Column specs map input lines' columns into outputs.
+    # [col1, col2...]
+    for spec in column_specs:
+        hunk = ''
+        for n in spec:
+            hunk = hunk + delim + input_lines[n]
+        hunk = hunk.strip(delim)
+        out.append(hunk)
+    return out
+
+
+def shuffle_columns_into_dict(
+    input_lines: Sequence[str],
+    column_specs: Iterable[Tuple[str, Iterable[int]]],
+    delim='',
+) -> Dict[str, str]:
+    """Helper to shuffle / parse columnar data and return the results
+    as a dict.
+
+    >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul  9 11:34 acl_test.py'.split()
+    >>> shuffle_columns_into_dict(
+    ...     cols,
+    ...     [ ('filename', [8]), ('owner', [2, 3]), ('mtime', [5, 6, 7]) ],
+    ...     delim=' ',
+    ... )
+    {'filename': 'acl_test.py', 'owner': 'scott wheel', 'mtime': 'Jul 9 11:34'}
+
+    """
+    out = {}
+
+    # Column specs map input lines' columns into outputs.
+    # "key", [col1, col2...]
+    for spec in column_specs:
+        hunk = ''
+        for n in spec[1]:
+            hunk = hunk + delim + input_lines[n]
+        hunk = hunk.strip(delim)
+        out[spec[0]] = hunk
+    return out
+
+
+def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str:
+    """Interpolate a string with data from a dict.
+
+    >>> interpolate_using_dict('This is a {adjective} {noun}.',
+    ...                        {'adjective': 'good', 'noun': 'example'})
+    'This is a good example.'
+
+    """
+    return sprintf(txt.format(**values), end='')
+
+
+def to_ascii(x: str):
+    """Encode as ascii bytes string.
+
+    >>> to_ascii('test')
+    b'test'
+
+    >>> to_ascii(b'1, 2, 3')
+    b'1, 2, 3'
+
+    """
+    if isinstance(x, str):
+        return x.encode('ascii')
+    if isinstance(x, bytes):
+        return x
+    raise Exception('to_ascii works with strings and bytes')
+
+
+def to_base64(txt: str, *, encoding='utf-8', errors='surrogatepass') -> bytes:
+    """Encode txt and then encode the bytes with a 64-character
+    alphabet.  This is compatible with uudecode.
+
+    >>> to_base64('hello?')
+    b'aGVsbG8/\\n'
+
+    """
+    return base64.encodebytes(txt.encode(encoding, errors))
+
+
+def is_base64(txt: str) -> bool:
+    """Determine whether a string is base64 encoded (with Python's standard
+    base64 alphabet which is the same as what uuencode uses).
+
+    >>> is_base64('test')    # all letters in the b64 alphabet
+    True
+
+    >>> is_base64('another test, how do you like this one?')
+    False
+
+    >>> is_base64(b'aGVsbG8/\\n')    # Ending newline is ok.
+    True
+
+    """
+    a = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+/'
+    alphabet = set(a.encode('ascii'))
+    for char in to_ascii(txt.strip()):
+        if char not in alphabet:
+            return False
+    return True
+
+
+def from_base64(b64: bytes, encoding='utf-8', errors='surrogatepass') -> str:
+    """Convert base64 encoded string back to normal strings.
+
+    >>> from_base64(b'aGVsbG8/\\n')
+    'hello?'
+
+    """
+    return base64.decodebytes(b64).decode(encoding, errors)
+
+
+def chunk(txt: str, chunk_size):
+    """Chunk up a string.
+
+    >>> ' '.join(chunk('010011011100010110101010101010101001111110101000', 8))
+    '01001101 11000101 10101010 10101010 10011111 10101000'
+
+    """
+    if len(txt) % chunk_size != 0:
+        msg = f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})'
+        logger.warning(msg)
+        warnings.warn(msg, stacklevel=2)
+    for x in range(0, len(txt), chunk_size):
+        yield txt[x : x + chunk_size]
+
+
+def to_bitstring(txt: str, *, delimiter='', encoding='utf-8', errors='surrogatepass') -> str:
+    """Encode txt and then chop it into bytes.  Note: only bitstrings
+    with delimiter='' are interpretable by from_bitstring.
+
+    >>> to_bitstring('hello?')
+    '011010000110010101101100011011000110111100111111'
+
+    >>> to_bitstring('test', delimiter=' ')
+    '01110100 01100101 01110011 01110100'
+
+    >>> to_bitstring(b'test')
+    '01110100011001010111001101110100'
+
+    """
+    etxt = to_ascii(txt)
+    bits = bin(int.from_bytes(etxt, 'big'))
+    bits = bits[2:]
+    return delimiter.join(chunk(bits.zfill(8 * ((len(bits) + 7) // 8)), 8))
+
+
+def is_bitstring(txt: str) -> bool:
+    """Is this a bitstring?
+
+    >>> is_bitstring('011010000110010101101100011011000110111100111111')
+    True
+
+    >>> is_bitstring('1234')
+    False
+
+    """
+    return is_binary_integer_number(f'0b{txt}')
+
+
+def from_bitstring(bits: str, encoding='utf-8', errors='surrogatepass') -> str:
+    """Convert from bitstring back to bytes then decode into a str.
+
+    >>> from_bitstring('011010000110010101101100011011000110111100111111')
+    'hello?'
+
+    """
+    n = int(bits, 2)
+    return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'
+
+
+def ip_v4_sort_key(txt: str) -> Optional[Tuple[int, ...]]:
+    """Turn an IPv4 address into a tuple for sorting purposes.
+
+    >>> ip_v4_sort_key('10.0.0.18')
+    (10, 0, 0, 18)
+
+    >>> ips = ['10.0.0.10', '100.0.0.1', '1.2.3.4', '10.0.0.9']
+    >>> sorted(ips, key=lambda x: ip_v4_sort_key(x))
+    ['1.2.3.4', '10.0.0.9', '10.0.0.10', '100.0.0.1']
+
+    """
+    if not is_ip_v4(txt):
+        print(f"not IP: {txt}")
+        return None
+    return tuple([int(x) for x in txt.split('.')])
+
+
+def path_ancestors_before_descendants_sort_key(volume: str) -> Tuple[str, ...]:
+    """Chunk up a file path so that parent/ancestor paths sort before
+    children/descendant paths.
+
+    >>> path_ancestors_before_descendants_sort_key('/usr/local/bin')
+    ('usr', 'local', 'bin')
+
+    >>> paths = ['/usr/local', '/usr/local/bin', '/usr']
+    >>> sorted(paths, key=lambda x: path_ancestors_before_descendants_sort_key(x))
+    ['/usr', '/usr/local', '/usr/local/bin']
+
+    """
+    return tuple([x for x in volume.split('/') if len(x) > 0])
+
+
+def replace_all(in_str: str, replace_set: str, replacement: str) -> str:
+    """Execute several replace operations in a row.
+
+    >>> s = 'this_is a-test!'
+    >>> replace_all(s, ' _-!', '')
+    'thisisatest'
+
+    """
+    for char in replace_set:
+        in_str = in_str.replace(char, replacement)
+    return in_str
+
+
+if __name__ == '__main__':
+    import doctest
+
+    doctest.testmod()