Make rate_limited use cvs.

[python_utils.git] / string_utils.py
diff --git a/string_utils.py b/string_utils.py

index 6fc257de52c48f34e207e79e8b2227e914ad2b8c..097dc1b092dc51bb031f104e82e09047bef1b8ad 100644 (file)
--- a/string_utils.py
+++ b/string_utils.py
@@ -1,18 +1,22 @@
  #!/usr/bin/env python3
  
  #!/usr/bin/env python3
  
+import base64
  import contextlib
  import datetime
  import io
  from itertools import zip_longest
  import json
  import logging
  import contextlib
  import datetime
  import io
  from itertools import zip_longest
  import json
  import logging
+import numbers
  import random
  import re
  import string
  import random
  import re
  import string
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
  import unicodedata
  from uuid import uuid4
  
  import unicodedata
  from uuid import uuid4
  
+import list_utils
+
  logger = logging.getLogger(__name__)
  
  NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
  logger = logging.getLogger(__name__)
  
  NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
@@ -27,7 +31,7 @@ URLS_RAW_STRING = (
      r"([a-z-]+://)"  # scheme
      r"([a-z_\d-]+:[a-z_\d-]+@)?"  # user:password
      r"(www\.)?"  # www.
      r"([a-z-]+://)"  # scheme
      r"([a-z_\d-]+:[a-z_\d-]+@)?"  # user:password
      r"(www\.)?"  # www.
-    r"((?<!\.)[a-z\d]+[a-z\d.-]+\.[a-z]{2,6}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|localhost)"  # domain
+    r"((?<!\.)[a-z\d]+[a-z\d.-]+\.[a-z]{2,6}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|localhost)" # domain
      r"(:\d{2,})?"  # port number
      r"(/[a-z\d_%+-]*)*"  # folders
      r"(\.[a-z\d_%+-]+)*"  # file extension
      r"(:\d{2,})?"  # port number
      r"(/[a-z\d_%+-]*)*"  # folders
      r"(\.[a-z\d_%+-]+)*"  # file extension
@@ -89,10 +93,18 @@ UUID_HEX_OK_RE = re.compile(
  
  SHALLOW_IP_V4_RE = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
  
  
  SHALLOW_IP_V4_RE = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
  
+ANYWHERE_IP_V4_RE = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
+
  IP_V6_RE = re.compile(r"^([a-z\d]{0,4}:){7}[a-z\d]{0,4}$", re.IGNORECASE)
  
  IP_V6_RE = re.compile(r"^([a-z\d]{0,4}:){7}[a-z\d]{0,4}$", re.IGNORECASE)
  
+ANYWHERE_IP_V6_RE = re.compile(r"([a-z\d]{0,4}:){7}[a-z\d]{0,4}", re.IGNORECASE)
+
  MAC_ADDRESS_RE = re.compile(
  MAC_ADDRESS_RE = re.compile(
-    r"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})", re.IGNORECASE
+    r"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})$", re.IGNORECASE
+)
+
+ANYWHERE_MAC_ADDRESS_RE = re.compile(
+    r"([0-9A-F]{2}[:-]){5}([0-9A-F]{2})", re.IGNORECASE
  )
  
  WORDS_COUNT_RE = re.compile(
  )
  
  WORDS_COUNT_RE = re.compile(
@@ -134,27 +146,95 @@ NUM_SUFFIXES = {
  
  
  def is_none_or_empty(in_str: Optional[str]) -> bool:
  
  
  def is_none_or_empty(in_str: Optional[str]) -> bool:
+    """
+    Returns true if the input string is either None or an empty string.
+
+    >>> is_none_or_empty("")
+    True
+    >>> is_none_or_empty(None)
+    True
+    >>> is_none_or_empty("   \t   ")
+    True
+    >>> is_none_or_empty('Test')
+    False
+    """
      return in_str is None or len(in_str.strip()) == 0
  
  
  def is_string(obj: Any) -> bool:
      """
      Checks if an object is a string.
      return in_str is None or len(in_str.strip()) == 0
  
  
  def is_string(obj: Any) -> bool:
      """
      Checks if an object is a string.
+
+    >>> is_string('test')
+    True
+    >>> is_string(123)
+    False
+    >>> is_string(100.3)
+    False
+    >>> is_string([1, 2, 3])
+    False
      """
      return isinstance(obj, str)
  
  
  def is_empty_string(in_str: Any) -> bool:
      """
      return isinstance(obj, str)
  
  
  def is_empty_string(in_str: Any) -> bool:
+    return is_empty(in_str)
+
+
+def is_empty(in_str: Any) -> bool:
+    """
+    Checks if input is a string and empty or only whitespace.
+
+    >>> is_empty('')
+    True
+    >>> is_empty('    \t\t    ')
+    True
+    >>> is_empty('test')
+    False
+    >>> is_empty(100.88)
+    False
+    >>> is_empty([1, 2, 3])
+    False
+    """
      return is_string(in_str) and in_str.strip() == ""
  
  
  def is_full_string(in_str: Any) -> bool:
      return is_string(in_str) and in_str.strip() == ""
  
  
  def is_full_string(in_str: Any) -> bool:
+    """
+    Checks that input is a string and is not empty ('') or only whitespace.
+
+    >>> is_full_string('test!')
+    True
+    >>> is_full_string('')
+    False
+    >>> is_full_string('      ')
+    False
+    >>> is_full_string(100.999)
+    False
+    >>> is_full_string({"a": 1, "b": 2})
+    False
+    """
      return is_string(in_str) and in_str.strip() != ""
  
  
  def is_number(in_str: str) -> bool:
      """
      Checks if a string is a valid number.
      return is_string(in_str) and in_str.strip() != ""
  
  
  def is_number(in_str: str) -> bool:
      """
      Checks if a string is a valid number.
+
+    >>> is_number(100.5)
+    Traceback (most recent call last):
+    ...
+    ValueError: 100.5
+    >>> is_number("100.5")
+    True
+    >>> is_number("test")
+    False
+    >>> is_number("99")
+    True
+    >>> is_number([1, 2, 3])
+    Traceback (most recent call last):
+    ...
+    ValueError: [1, 2, 3]
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -167,10 +247,10 @@ def is_integer_number(in_str: str) -> bool:
  
      An integer may be signed or unsigned or use a "scientific notation".
  
  
      An integer may be signed or unsigned or use a "scientific notation".
  
-    *Examples:*
-
-    >>> is_integer('42') # returns true
-    >>> is_integer('42.0') # returns false
+    >>> is_integer_number('42')
+    True
+    >>> is_integer_number('42.0')
+    False
      """
      return (
          (is_number(in_str) and "." not in in_str) or
      """
      return (
          (is_number(in_str) and "." not in in_str) or
@@ -181,24 +261,89 @@ def is_integer_number(in_str: str) -> bool:
  
  
  def is_hexidecimal_integer_number(in_str: str) -> bool:
  
  
  def is_hexidecimal_integer_number(in_str: str) -> bool:
+    """
+    Checks whether a string is a hex integer number.
+
+    >>> is_hexidecimal_integer_number('0x12345')
+    True
+    >>> is_hexidecimal_integer_number('0x1A3E')
+    True
+    >>> is_hexidecimal_integer_number('1234')  # Needs 0x
+    False
+    >>> is_hexidecimal_integer_number('-0xff')
+    True
+    >>> is_hexidecimal_integer_number('test')
+    False
+    >>> is_hexidecimal_integer_number(12345)  # Not a string
+    Traceback (most recent call last):
+    ...
+    ValueError: 12345
+    >>> is_hexidecimal_integer_number(101.4)
+    Traceback (most recent call last):
+    ...
+    ValueError: 101.4
+    >>> is_hexidecimal_integer_number(0x1A3E)
+    Traceback (most recent call last):
+    ...
+    ValueError: 6718
+    """
      if not is_string(in_str):
          raise ValueError(in_str)
      return HEX_NUMBER_RE.match(in_str) is not None
  
  
  def is_octal_integer_number(in_str: str) -> bool:
      if not is_string(in_str):
          raise ValueError(in_str)
      return HEX_NUMBER_RE.match(in_str) is not None
  
  
  def is_octal_integer_number(in_str: str) -> bool:
+    """
+    Checks whether a string is an octal number.
+
+    >>> is_octal_integer_number('0o777')
+    True
+    >>> is_octal_integer_number('-0O115')
+    True
+    >>> is_octal_integer_number('0xFF')  # Not octal, needs 0o
+    False
+    >>> is_octal_integer_number('7777')  # Needs 0o
+    False
+    >>> is_octal_integer_number('test')
+    False
+    """
      if not is_string(in_str):
          raise ValueError(in_str)
      return OCT_NUMBER_RE.match(in_str) is not None
  
  
  def is_binary_integer_number(in_str: str) -> bool:
      if not is_string(in_str):
          raise ValueError(in_str)
      return OCT_NUMBER_RE.match(in_str) is not None
  
  
  def is_binary_integer_number(in_str: str) -> bool:
+    """
+    Returns whether a string contains a binary number.
+
+    >>> is_binary_integer_number('0b10111')
+    True
+    >>> is_binary_integer_number('-0b111')
+    True
+    >>> is_binary_integer_number('0B10101')
+    True
+    >>> is_binary_integer_number('0b10102')
+    False
+    >>> is_binary_integer_number('0xFFF')
+    False
+    >>> is_binary_integer_number('test')
+    False
+    """
      if not is_string(in_str):
          raise ValueError(in_str)
      return BIN_NUMBER_RE.match(in_str) is not None
  
  
  def to_int(in_str: str) -> int:
      if not is_string(in_str):
          raise ValueError(in_str)
      return BIN_NUMBER_RE.match(in_str) is not None
  
  
  def to_int(in_str: str) -> int:
+    """Returns the integral value of the string or raises on error.
+
+    >>> to_int('1234')
+    1234
+    >>> to_int('test')
+    Traceback (most recent call last):
+    ...
+    ValueError: invalid literal for int() with base 10: 'test'
+    """
      if not is_string(in_str):
          raise ValueError(in_str)
      if is_binary_integer_number(in_str):
      if not is_string(in_str):
          raise ValueError(in_str)
      if is_binary_integer_number(in_str):
@@ -216,13 +361,21 @@ def is_decimal_number(in_str: str) -> bool:
  
      A decimal may be signed or unsigned or use a "scientific notation".
  
  
      A decimal may be signed or unsigned or use a "scientific notation".
  
-    >>> is_decimal('42.0') # returns true
-    >>> is_decimal('42') # returns false
+    >>> is_decimal_number('42.0')
+    True
+    >>> is_decimal_number('42')
+    False
      """
      return is_number(in_str) and "." in in_str
  
  
  def strip_escape_sequences(in_str: str) -> str:
      """
      return is_number(in_str) and "." in in_str
  
  
  def strip_escape_sequences(in_str: str) -> str:
+    """
+    Remove escape sequences in the input string.
+
+    >>> strip_escape_sequences('\e[12;11;22mthis is a test!')
+    'this is a test!'
+    """
      in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
      return in_str
  
      in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
      return in_str
  
@@ -233,7 +386,22 @@ def add_thousands_separator(
          separator_char = ',',
          places = 3
  ) -> str:
          separator_char = ',',
          places = 3
  ) -> str:
-    if isinstance(in_str, int):
+    """
+    Add thousands separator to a numeric string.  Also handles numbers.
+
+    >>> add_thousands_separator('12345678')
+    '12,345,678'
+    >>> add_thousands_separator(12345678)
+    '12,345,678'
+    >>> add_thousands_separator(12345678.99)
+    '12,345,678.99'
+    >>> add_thousands_separator('test')
+    Traceback (most recent call last):
+    ...
+    ValueError: test
+
+    """
+    if isinstance(in_str, numbers.Number):
          in_str = f'{in_str}'
      if is_number(in_str):
          return _add_thousands_separator(
          in_str = f'{in_str}'
      if is_number(in_str):
          return _add_thousands_separator(
@@ -263,11 +431,12 @@ def is_url(in_str: Any, allowed_schemes: Optional[List[str]] = None) -> bool:
      """
      Check if a string is a valid url.
  
      """
      Check if a string is a valid url.
  
-    *Examples:*
-
-    >>> is_url('http://www.mysite.com') # returns true
-    >>> is_url('https://mysite.com') # returns true
-    >>> is_url('.mysite.com') # returns false
+    >>> is_url('http://www.mysite.com')
+    True
+    >>> is_url('https://mysite.com')
+    True
+    >>> is_url('.mysite.com')
+    False
      """
      if not is_full_string(in_str):
          return False
      """
      if not is_full_string(in_str):
          return False
@@ -285,10 +454,10 @@ def is_email(in_str: Any) -> bool:
  
      Reference: https://tools.ietf.org/html/rfc3696#section-3
  
  
      Reference: https://tools.ietf.org/html/rfc3696#section-3
  
-    *Examples:*
-
-    >>> is_email('[email protected]') # returns true
-    >>> is_email('@gmail.com') # returns false
+    >>> is_email('[email protected]')
+    True
+    >>> is_email('@gmail.com')
+    False
      """
      if (
          not is_full_string(in_str)
      """
      if (
          not is_full_string(in_str)
@@ -331,8 +500,12 @@ def is_email(in_str: Any) -> bool:
  def suffix_string_to_number(in_str: str) -> Optional[int]:
      """Take a string like "33Gb" and convert it into a number (of bytes)
      like 34603008.  Return None if the input string is not valid.
  def suffix_string_to_number(in_str: str) -> Optional[int]:
      """Take a string like "33Gb" and convert it into a number (of bytes)
      like 34603008.  Return None if the input string is not valid.
-    """
  
  
+    >>> suffix_string_to_number('1Mb')
+    1048576
+    >>> suffix_string_to_number('13.1Gb')
+    14066017894
+    """
      def suffix_capitalize(s: str) -> str:
          if len(s) == 1:
              return s.upper()
      def suffix_capitalize(s: str) -> str:
          if len(s) == 1:
              return s.upper()
@@ -352,13 +525,21 @@ def suffix_string_to_number(in_str: str) -> Optional[int]:
              if multiplier is not None:
                  r = rest[x]
                  if is_integer_number(r):
              if multiplier is not None:
                  r = rest[x]
                  if is_integer_number(r):
-                    return int(r) * multiplier
+                    return to_int(r) * multiplier
+                if is_decimal_number(r):
+                    return int(float(r) * multiplier)
      return None
  
  
  def number_to_suffix_string(num: int) -> Optional[str]:
      """Take a number (of bytes) and returns a string like "43.8Gb".
      Returns none if the input is invalid.
      return None
  
  
  def number_to_suffix_string(num: int) -> Optional[str]:
      """Take a number (of bytes) and returns a string like "43.8Gb".
      Returns none if the input is invalid.
+
+    >>> number_to_suffix_string(14066017894)
+    '13.1Gb'
+    >>> number_to_suffix_string(1024 * 1024)
+    '1.0Mb'
+
      """
      d = 0.0
      suffix = None
      """
      d = 0.0
      suffix = None
@@ -427,6 +608,16 @@ def is_snake_case(in_str: Any, *, separator: str = "_") -> bool:
      - it's composed only by lowercase/uppercase letters and digits
      - it contains at least one underscore (or provided separator)
      - it does not start with a number
      - it's composed only by lowercase/uppercase letters and digits
      - it contains at least one underscore (or provided separator)
      - it does not start with a number
+
+    >>> is_snake_case('this_is_a_test')
+    True
+    >>> is_snake_case('___This_Is_A_Test_1_2_3___')
+    True
+    >>> is_snake_case('this-is-a-test')
+    False
+    >>> is_snake_case('this-is-a-test', separator='-')
+    True
+
      """
      if is_full_string(in_str):
          re_map = {"_": SNAKE_CASE_TEST_RE, "-": SNAKE_CASE_TEST_DASH_RE}
      """
      if is_full_string(in_str):
          re_map = {"_": SNAKE_CASE_TEST_RE, "-": SNAKE_CASE_TEST_DASH_RE}
@@ -447,11 +638,12 @@ def is_json(in_str: Any) -> bool:
      """
      Check if a string is a valid json.
  
      """
      Check if a string is a valid json.
  
-    *Examples:*
-
-    >>> is_json('{"name": "Peter"}') # returns true
-    >>> is_json('[1, 2, 3]') # returns true
-    >>> is_json('{nope}') # returns false
+    >>> is_json('{"name": "Peter"}')
+    True
+    >>> is_json('[1, 2, 3]')
+    True
+    >>> is_json('{nope}')
+    False
      """
      if is_full_string(in_str) and JSON_WRAPPER_RE.match(in_str) is not None:
          try:
      """
      if is_full_string(in_str) and JSON_WRAPPER_RE.match(in_str) is not None:
          try:
@@ -465,11 +657,12 @@ def is_uuid(in_str: Any, allow_hex: bool = False) -> bool:
      """
      Check if a string is a valid UUID.
  
      """
      Check if a string is a valid UUID.
  
-    *Example:*
-
-    >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf') # returns true
-    >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf') # returns false
-    >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf', allow_hex=True) # returns true
+    >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf')
+    True
+    >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf')
+    False
+    >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf', allow_hex=True)
+    True
      """
      # string casting is used to allow UUID itself as input data type
      s = str(in_str)
      """
      # string casting is used to allow UUID itself as input data type
      s = str(in_str)
@@ -482,11 +675,12 @@ def is_ip_v4(in_str: Any) -> bool:
      """
      Checks if a string is a valid ip v4.
  
      """
      Checks if a string is a valid ip v4.
  
-    *Examples:*
-
-    >>> is_ip_v4('255.200.100.75') # returns true
-    >>> is_ip_v4('nope') # returns false (not an ip)
-    >>> is_ip_v4('255.200.100.999') # returns false (999 is out of range)
+    >>> is_ip_v4('255.200.100.75')
+    True
+    >>> is_ip_v4('nope')
+    False
+    >>> is_ip_v4('255.200.100.999')  # 999 out of range
+    False
      """
      if not is_full_string(in_str) or SHALLOW_IP_V4_RE.match(in_str) is None:
          return False
      """
      if not is_full_string(in_str) or SHALLOW_IP_V4_RE.match(in_str) is None:
          return False
@@ -501,11 +695,14 @@ def is_ip_v4(in_str: Any) -> bool:
  def extract_ip_v4(in_str: Any) -> Optional[str]:
      """
      Extracts the IPv4 chunk of a string or None.
  def extract_ip_v4(in_str: Any) -> Optional[str]:
      """
      Extracts the IPv4 chunk of a string or None.
+
+    >>> extract_ip_v4('   The secret IP address: 127.0.0.1 (use it wisely)   ')
+    '127.0.0.1'
+    >>> extract_ip_v4('Your mom dresses you funny.')
      """
      if not is_full_string(in_str):
          return None
      """
      if not is_full_string(in_str):
          return None
-    in_str.strip()
-    m = SHALLOW_IP_V4_RE.match(in_str)
+    m = ANYWHERE_IP_V4_RE.search(in_str)
      if m is not None:
          return m.group(0)
      return None
      if m is not None:
          return m.group(0)
      return None
@@ -515,10 +712,10 @@ def is_ip_v6(in_str: Any) -> bool:
      """
      Checks if a string is a valid ip v6.
  
      """
      Checks if a string is a valid ip v6.
  
-    *Examples:*
-
-    >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
-    >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?') # returns false (invalid "?")
+    >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334')
+    True
+    >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?')    # invalid "?"
+    False
      """
      return is_full_string(in_str) and IP_V6_RE.match(in_str) is not None
  
      """
      return is_full_string(in_str) and IP_V6_RE.match(in_str) is not None
  
@@ -526,11 +723,14 @@ def is_ip_v6(in_str: Any) -> bool:
  def extract_ip_v6(in_str: Any) -> Optional[str]:
      """
      Extract IPv6 chunk or None.
  def extract_ip_v6(in_str: Any) -> Optional[str]:
      """
      Extract IPv6 chunk or None.
+
+    >>> extract_ip_v6('IP: 2001:db8:85a3:0000:0000:8a2e:370:7334')
+    '2001:db8:85a3:0000:0000:8a2e:370:7334'
+    >>> extract_ip_v6("(and she's ugly too, btw)")
      """
      if not is_full_string(in_str):
          return None
      """
      if not is_full_string(in_str):
          return None
-    in_str.strip()
-    m = IP_V6_RE.match(in_str)
+    m = ANYWHERE_IP_V6_RE.search(in_str)
      if m is not None:
          return m.group(0)
      return None
      if m is not None:
          return m.group(0)
      return None
@@ -540,17 +740,29 @@ def is_ip(in_str: Any) -> bool:
      """
      Checks if a string is a valid ip (either v4 or v6).
  
      """
      Checks if a string is a valid ip (either v4 or v6).
  
-    *Examples:*
-
-    >>> is_ip('255.200.100.75') # returns true
-    >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
-    >>> is_ip('1.2.3') # returns false
+    >>> is_ip('255.200.100.75')
+    True
+    >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334')
+    True
+    >>> is_ip('1.2.3')
+    False
+    >>> is_ip('1.2.3.999')
+    False
      """
      return is_ip_v6(in_str) or is_ip_v4(in_str)
  
  
  def extract_ip(in_str: Any) -> Optional[str]:
      """
      return is_ip_v6(in_str) or is_ip_v4(in_str)
  
  
  def extract_ip(in_str: Any) -> Optional[str]:
-    """Extract the IP address or None."""
+    """
+    Extract the IP address or None.
+
+    >>> extract_ip('Attacker: 255.200.100.75')
+    '255.200.100.75'
+    >>> extract_ip('Remote host: 2001:db8:85a3:0000:0000:8a2e:370:7334')
+    '2001:db8:85a3:0000:0000:8a2e:370:7334'
+    >>> extract_ip('1.2.3')
+
+    """
      ip = extract_ip_v4(in_str)
      if ip is None:
          ip = extract_ip_v6(in_str)
      ip = extract_ip_v4(in_str)
      if ip is None:
          ip = extract_ip_v6(in_str)
@@ -558,16 +770,35 @@ def extract_ip(in_str: Any) -> Optional[str]:
  
  
  def is_mac_address(in_str: Any) -> bool:
  
  
  def is_mac_address(in_str: Any) -> bool:
-    """Return True if in_str is a valid MAC address false otherwise."""
+    """Return True if in_str is a valid MAC address false otherwise.
+
+    >>> is_mac_address("34:29:8F:12:0D:2F")
+    True
+    >>> is_mac_address('34:29:8f:12:0d:2f')
+    True
+    >>> is_mac_address('34-29-8F-12-0D-2F')
+    True
+    >>> is_mac_address("test")
+    False
+    """
      return is_full_string(in_str) and MAC_ADDRESS_RE.match(in_str) is not None
  
  
  def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
      return is_full_string(in_str) and MAC_ADDRESS_RE.match(in_str) is not None
  
  
  def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
-    """Extract the MAC address from in_str"""
+    """
+    Extract the MAC address from in_str.
+
+    >>> extract_mac_address(' MAC Address: 34:29:8F:12:0D:2F')
+    '34:29:8F:12:0D:2F'
+
+    >>> extract_mac_address('? (10.0.0.30) at d8:5d:e2:34:54:86 on em0 expires in 1176 seconds [ethernet]')
+    'd8:5d:e2:34:54:86'
+
+    """
      if not is_full_string(in_str):
          return None
      in_str.strip()
      if not is_full_string(in_str):
          return None
      in_str.strip()
-    m = MAC_ADDRESS_RE.match(in_str)
+    m = ANYWHERE_MAC_ADDRESS_RE.search(in_str)
      if m is not None:
          mac = m.group(0)
          mac.replace(":", separator)
      if m is not None:
          mac = m.group(0)
          mac.replace(":", separator)
@@ -580,16 +811,11 @@ def is_slug(in_str: Any, separator: str = "-") -> bool:
      """
      Checks if a given string is a slug (as created by `slugify()`).
  
      """
      Checks if a given string is a slug (as created by `slugify()`).
  
-    *Examples:*
-
-    >>> is_slug('my-blog-post-title') # returns true
-    >>> is_slug('My blog post title') # returns false
+    >>> is_slug('my-blog-post-title')
+    True
+    >>> is_slug('My blog post title')
+    False
  
  
-    :param in_str: String to check.
-    :type in_str: str
-    :param separator: Join sign used by the slug.
-    :type separator: str
-    :return: True if slug, false otherwise.
      """
      if not is_full_string(in_str):
          return False
      """
      if not is_full_string(in_str):
          return False
@@ -604,10 +830,11 @@ def contains_html(in_str: str) -> bool:
      By design, this function matches ANY type of tag, so don't expect to use it
      as an HTML validator, its goal is to detect "malicious" or undesired tags in the text.
  
      By design, this function matches ANY type of tag, so don't expect to use it
      as an HTML validator, its goal is to detect "malicious" or undesired tags in the text.
  
-    *Examples:*
+    >>> contains_html('my string is <strong>bold</strong>')
+    True
+    >>> contains_html('my string is not bold')
+    False
  
  
-    >>> contains_html('my string is <strong>bold</strong>') # returns true
-    >>> contains_html('my string is not bold') # returns false
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -623,10 +850,11 @@ def words_count(in_str: str) -> int:
      Moreover it is aware of punctuation, so the count for a string like "one,two,three.stop"
      will be 4 not 1 (even if there are no spaces in the string).
  
      Moreover it is aware of punctuation, so the count for a string like "one,two,three.stop"
      will be 4 not 1 (even if there are no spaces in the string).
  
-    *Examples:*
+    >>> words_count('hello world')
+    2
+    >>> words_count('one,two,three.stop')
+    4
  
  
-    >>> words_count('hello world') # returns 2
-    >>> words_count('one,two,three.stop') # returns 4
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -637,10 +865,9 @@ def generate_uuid(as_hex: bool = False) -> str:
      """
      Generated an UUID string (using `uuid.uuid4()`).
  
      """
      Generated an UUID string (using `uuid.uuid4()`).
  
-    *Examples:*
+    generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
+    generate_uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
  
  
-    >>> uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
-    >>> uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
      """
      uid = uuid4()
      if as_hex:
      """
      uid = uuid4()
      if as_hex:
@@ -653,9 +880,8 @@ def generate_random_alphanumeric_string(size: int) -> str:
      Returns a string of the specified size containing random
      characters (uppercase/lowercase ascii letters and digits).
  
      Returns a string of the specified size containing random
      characters (uppercase/lowercase ascii letters and digits).
  
-    *Example:*
+    random_string(9) # possible output: "cx3QQbzYg"
  
  
-    >>> random_string(9) # possible output: "cx3QQbzYg"
      """
      if size < 1:
          raise ValueError("size must be >= 1")
      """
      if size < 1:
          raise ValueError("size must be >= 1")
@@ -667,6 +893,10 @@ def generate_random_alphanumeric_string(size: int) -> str:
  def reverse(in_str: str) -> str:
      """
      Returns the string with its chars reversed.
  def reverse(in_str: str) -> str:
      """
      Returns the string with its chars reversed.
+
+    >>> reverse('test')
+    'tset'
+
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -677,6 +907,11 @@ def camel_case_to_snake_case(in_str, *, separator="_"):
      """
      Convert a camel case string into a snake case one.
      (The original string is returned if is not a valid camel case string)
      """
      Convert a camel case string into a snake case one.
      (The original string is returned if is not a valid camel case string)
+
+    >>> camel_case_to_snake_case('MacAddressExtractorFactory')
+    'mac_address_extractor_factory'
+    >>> camel_case_to_snake_case('Luke Skywalker')
+    'Luke Skywalker'
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -693,6 +928,11 @@ def snake_case_to_camel_case(
      """
      Convert a snake case string into a camel case one.
      (The original string is returned if is not a valid snake case string)
      """
      Convert a snake case string into a camel case one.
      (The original string is returned if is not a valid snake case string)
+
+    >>> snake_case_to_camel_case('this_is_a_test')
+    'ThisIsATest'
+    >>> snake_case_to_camel_case('Han Solo')
+    'Han Solo'
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -705,12 +945,22 @@ def snake_case_to_camel_case(
  
  
  def to_char_list(in_str: str) -> List[str]:
  
  
  def to_char_list(in_str: str) -> List[str]:
+    """Convert a string into a list of chars.
+
+    >>> to_char_list('test')
+    ['t', 'e', 's', 't']
+    """
      if not is_string(in_str):
          return []
      return list(in_str)
  
  
  def from_char_list(in_list: List[str]) -> str:
      if not is_string(in_str):
          return []
      return list(in_str)
  
  
  def from_char_list(in_list: List[str]) -> str:
+    """Convert a char list into a string.
+
+    >>> from_char_list(['t', 'e', 's', 't'])
+    'test'
+    """
      return "".join(in_list)
  
  
      return "".join(in_list)
  
  
@@ -731,10 +981,10 @@ def strip_html(in_str: str, keep_tag_content: bool = False) -> str:
      """
      Remove html code contained into the given string.
  
      """
      Remove html code contained into the given string.
  
-    *Examples:*
-
-    >>> strip_html('test: <a href="foo/bar">click here</a>') # returns 'test: '
-    >>> strip_html('test: <a href="foo/bar">click here</a>', keep_tag_content=True) # returns 'test: click here'
+    >>> strip_html('test: <a href="foo/bar">click here</a>')
+    'test: '
+    >>> strip_html('test: <a href="foo/bar">click here</a>', keep_tag_content=True)
+    'test: click here'
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -744,14 +994,14 @@ def strip_html(in_str: str, keep_tag_content: bool = False) -> str:
  
  def asciify(in_str: str) -> str:
      """
  
  def asciify(in_str: str) -> str:
      """
-    Force string content to be ascii-only by translating all non-ascii chars into the closest possible representation
-    (eg: ó -> o, Ë -> E, ç -> c...).
+    Force string content to be ascii-only by translating all non-ascii
+    chars into the closest possible representation (eg: ó -> o, Ë ->
+    E, ç -> c...).
  
  
-    **Bear in mind**: Some chars may be lost if impossible to translate.
+    N.B. Some chars may be lost if impossible to translate.
  
  
-    *Example:*
-
-    >>> asciify('èéùúòóäåëýñÅÀÁÇÌÍÑÓË') # returns 'eeuuooaaeynAAACIINOE'
+    >>> asciify('èéùúòóäåëýñÅÀÁÇÌÍÑÓË')
+    'eeuuooaaeynAAACIINOE'
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -780,10 +1030,10 @@ def slugify(in_str: str, *, separator: str = "-") -> str:
      - all chars are encoded as ascii (by using `asciify()`)
      - is safe for URL
  
      - all chars are encoded as ascii (by using `asciify()`)
      - is safe for URL
  
-    *Examples:*
-
-    >>> slugify('Top 10 Reasons To Love Dogs!!!') # returns: 'top-10-reasons-to-love-dogs'
-    >>> slugify('Mönstér Mägnët') # returns 'monster-magnet'
+    >>> slugify('Top 10 Reasons To Love Dogs!!!')
+    'top-10-reasons-to-love-dogs'
+    >>> slugify('Mönstér Mägnët')
+    'monster-magnet'
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -803,7 +1053,8 @@ def to_bool(in_str: str) -> bool:
      """
      Turns a string into a boolean based on its content (CASE INSENSITIVE).
  
      """
      Turns a string into a boolean based on its content (CASE INSENSITIVE).
  
-    A positive boolean (True) is returned if the string value is one of the following:
+    A positive boolean (True) is returned if the string value is one
+    of the following:
  
      - "true"
      - "1"
  
      - "true"
      - "1"
@@ -811,13 +1062,35 @@ def to_bool(in_str: str) -> bool:
      - "y"
  
      Otherwise False is returned.
      - "y"
  
      Otherwise False is returned.
+
+    >>> to_bool('True')
+    True
+
+    >>> to_bool('1')
+    True
+
+    >>> to_bool('yes')
+    True
+
+    >>> to_bool('no')
+    False
+
+    >>> to_bool('huh?')
+    False
+
+    >>> to_bool('on')
+    True
+
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
-    return in_str.lower() in ("true", "1", "yes", "y", "t")
+    return in_str.lower() in ("true", "1", "yes", "y", "t", "on")
  
  
  def to_date(in_str: str) -> Optional[datetime.date]:
  
  
  def to_date(in_str: str) -> Optional[datetime.date]:
+    """
+    Parses a date string.  See DateParser docs for details.
+    """
      import dateparse.dateparse_utils as dp
      try:
          d = dp.DateParser()
      import dateparse.dateparse_utils as dp
      try:
          d = dp.DateParser()
@@ -829,6 +1102,9 @@ def to_date(in_str: str) -> Optional[datetime.date]:
  
  
  def valid_date(in_str: str) -> bool:
  
  
  def valid_date(in_str: str) -> bool:
+    """
+    True if the string represents a valid date.
+    """
      import dateparse.dateparse_utils as dp
      try:
          d = dp.DateParser()
      import dateparse.dateparse_utils as dp
      try:
          d = dp.DateParser()
@@ -840,6 +1116,9 @@ def valid_date(in_str: str) -> bool:
  
  
  def to_datetime(in_str: str) -> Optional[datetime.datetime]:
  
  
  def to_datetime(in_str: str) -> Optional[datetime.datetime]:
+    """
+    Parses a datetime string.  See DateParser docs for more info.
+    """
      import dateparse.dateparse_utils as dp
      try:
          d = dp.DateParser()
      import dateparse.dateparse_utils as dp
      try:
          d = dp.DateParser()
@@ -852,6 +1131,9 @@ def to_datetime(in_str: str) -> Optional[datetime.datetime]:
  
  
  def valid_datetime(in_str: str) -> bool:
  
  
  def valid_datetime(in_str: str) -> bool:
+    """
+    True if the string represents a valid datetime.
+    """
      _ = to_datetime(in_str)
      if _ is not None:
          return True
      _ = to_datetime(in_str)
      if _ is not None:
          return True
@@ -859,23 +1141,27 @@ def valid_datetime(in_str: str) -> bool:
      return False
  
  
      return False
  
  
-def dedent(in_str: str) -> str:
+def squeeze(in_str: str, character_to_squeeze: str = ' ') -> str:
      """
      """
-    Removes tab indentation from multi line strings (inspired by analogous Scala function).
+    Squeeze runs of more than one character_to_squeeze into one.
  
  
-    *Example:*
+    >>> squeeze(' this        is       a    test    ')
+    ' this is a test '
+
+    >>> squeeze('one|!||!|two|!||!|three', character_to_squeeze='|!|')
+    'one|!|two|!|three'
  
  
-    >>> strip_margin('''
-    >>>                 line 1
-    >>>                 line 2
-    >>>                 line 3
-    >>> ''')
-    >>> # returns:
-    >>> '''
-    >>> line 1
-    >>> line 2
-    >>> line 3
-    >>> '''
+    """
+    return re.sub(
+        r'(' + re.escape(character_to_squeeze) + r')+',
+        character_to_squeeze,
+        in_str
+    )
+
+
+def dedent(in_str: str) -> str:
+    """
+    Removes tab indentation from multi line strings (inspired by analogous Scala function).
      """
      if not is_string(in_str):
          raise ValueError(in_str)
      """
      if not is_string(in_str):
          raise ValueError(in_str)
@@ -885,6 +1171,13 @@ def dedent(in_str: str) -> str:
  
  
  def indent(in_str: str, amount: int) -> str:
  
  
  def indent(in_str: str, amount: int) -> str:
+    """
+    Indents string by prepending amount spaces.
+
+    >>> indent('This is a test', 4)
+    '    This is a test'
+
+    """
      if not is_string(in_str):
          raise ValueError(in_str)
      line_separator = '\n'
      if not is_string(in_str):
          raise ValueError(in_str)
      line_separator = '\n'
@@ -893,6 +1186,7 @@ def indent(in_str: str, amount: int) -> str:
  
  
  def sprintf(*args, **kwargs) -> str:
  
  
  def sprintf(*args, **kwargs) -> str:
+    """String printf, like in C"""
      ret = ""
  
      sep = kwargs.pop("sep", None)
      ret = ""
  
      sep = kwargs.pop("sep", None)
@@ -924,6 +1218,15 @@ def sprintf(*args, **kwargs) -> str:
  
  
  class SprintfStdout(object):
  
  
  class SprintfStdout(object):
+    """
+    A context manager that captures outputs to stdout.
+
+    with SprintfStdout() as buf:
+        print("test")
+    print(buf())
+
+    'test\n'
+    """
      def __init__(self) -> None:
          self.destination = io.StringIO()
          self.recorder = None
      def __init__(self) -> None:
          self.destination = io.StringIO()
          self.recorder = None
@@ -940,18 +1243,48 @@ class SprintfStdout(object):
  
  
  def is_are(n: int) -> str:
  
  
  def is_are(n: int) -> str:
+    """Is or are?
+
+    >>> is_are(1)
+    'is'
+    >>> is_are(2)
+    'are'
+
+    """
      if n == 1:
          return "is"
      return "are"
  
  
  def pluralize(n: int) -> str:
      if n == 1:
          return "is"
      return "are"
  
  
  def pluralize(n: int) -> str:
+    """Add an s?
+
+    >>> pluralize(15)
+    's'
+    >>> count = 1
+    >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
+    There is 1 file.
+    >>> count = 4
+    >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
+    There are 4 files.
+
+    """
      if n == 1:
          return ""
      return "s"
  
  
  def thify(n: int) -> str:
      if n == 1:
          return ""
      return "s"
  
  
  def thify(n: int) -> str:
+    """Return the proper cardinal suffix for a number.
+
+    >>> thify(1)
+    'st'
+    >>> thify(33)
+    'rd'
+    >>> thify(16)
+    'th'
+
+    """
      digit = str(n)
      assert is_integer_number(digit)
      digit = digit[-1:]
      digit = str(n)
      assert is_integer_number(digit)
      digit = digit[-1:]
@@ -963,3 +1296,263 @@ def thify(n: int) -> str:
          return "rd"
      else:
          return "th"
          return "rd"
      else:
          return "th"
+
+
+def ngrams(txt: str, n: int):
+    """Return the ngrams from a string.
+
+    >>> [x for x in ngrams('This is a test', 2)]
+    ['This is', 'is a', 'a test']
+
+    """
+    words = txt.split()
+    for ngram in ngrams_presplit(words, n):
+        ret = ''
+        for word in ngram:
+            ret += f'{word} '
+        yield ret.strip()
+
+
+def ngrams_presplit(words: Sequence[str], n: int):
+    return list_utils.ngrams(words, n)
+
+
+def bigrams(txt: str):
+    return ngrams(txt, 2)
+
+
+def trigrams(txt: str):
+    return ngrams(txt, 3)
+
+
+def shuffle_columns_into_list(
+        input_lines: Iterable[str],
+        column_specs: Iterable[Iterable[int]],
+        delim=''
+) -> Iterable[str]:
+    """Helper to shuffle / parse columnar data and return the results as a
+    list.  The column_specs argument is an iterable collection of
+    numeric sequences that indicate one or more column numbers to
+    copy.
+
+    >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul  9 11:34 acl_test.py'.split()
+    >>> shuffle_columns_into_list(
+    ...     cols,
+    ...     [ [8], [2, 3], [5, 6, 7] ],
+    ...     delim=' ',
+    ... )
+    ['acl_test.py', 'scott wheel', 'Jul 9 11:34']
+
+    """
+    out = []
+
+    # Column specs map input lines' columns into outputs.
+    # [col1, col2...]
+    for spec in column_specs:
+        chunk = ''
+        for n in spec:
+            chunk = chunk + delim + input_lines[n]
+        chunk = chunk.strip(delim)
+        out.append(chunk)
+    return out
+
+
+def shuffle_columns_into_dict(
+        input_lines: Iterable[str],
+        column_specs: Iterable[Tuple[str, Iterable[int]]],
+        delim=''
+) -> Dict[str, str]:
+    """Helper to shuffle / parse columnar data and return the results
+    as a dict.
+
+    >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul  9 11:34 acl_test.py'.split()
+    >>> shuffle_columns_into_dict(
+    ...     cols,
+    ...     [ ('filename', [8]), ('owner', [2, 3]), ('mtime', [5, 6, 7]) ],
+    ...     delim=' ',
+    ... )
+    {'filename': 'acl_test.py', 'owner': 'scott wheel', 'mtime': 'Jul 9 11:34'}
+
+    """
+    out = {}
+
+    # Column specs map input lines' columns into outputs.
+    # "key", [col1, col2...]
+    for spec in column_specs:
+        chunk = ''
+        for n in spec[1]:
+            chunk = chunk + delim + input_lines[n]
+        chunk = chunk.strip(delim)
+        out[spec[0]] = chunk
+    return out
+
+
+def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str:
+    """Interpolate a string with data from a dict.
+
+    >>> interpolate_using_dict('This is a {adjective} {noun}.',
+    ...                        {'adjective': 'good', 'noun': 'example'})
+    'This is a good example.'
+
+    """
+    return sprintf(txt.format(**values), end='')
+
+
+def to_ascii(x: str):
+    """Encode as ascii bytes string.
+
+    >>> to_ascii('test')
+    b'test'
+
+    >>> to_ascii(b'1, 2, 3')
+    b'1, 2, 3'
+
+    """
+    if type(x) is str:
+        return x.encode('ascii')
+    if type(x) is bytes:
+        return x
+    raise Exception('to_ascii works with strings and bytes')
+
+
+def to_base64(txt: str, *, encoding='utf-8', errors='surrogatepass') -> str:
+    """Encode txt and then encode the bytes with a 64-character
+    alphabet.  This is compatible with uudecode.
+
+    >>> to_base64('hello?')
+    b'aGVsbG8/\\n'
+
+    """
+    return base64.encodebytes(txt.encode(encoding, errors))
+
+
+def is_base64(txt: str) -> bool:
+    """Determine whether a string is base64 encoded (with Python's standard
+    base64 alphabet which is the same as what uuencode uses).
+
+    >>> is_base64('test')    # all letters in the b64 alphabet
+    True
+
+    >>> is_base64('another test, how do you like this one?')
+    False
+
+    >>> is_base64(b'aGVsbG8/\\n')    # Ending newline is ok.
+    True
+
+    """
+    a = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+/'
+    alphabet = set(a.encode('ascii'))
+    for char in to_ascii(txt.strip()):
+        if char not in alphabet:
+            return False
+    return True
+
+
+def from_base64(b64: str, encoding='utf-8', errors='surrogatepass') -> str:
+    """Convert base64 encoded string back to normal strings.
+
+    >>> from_base64(b'aGVsbG8/\\n')
+    'hello?'
+
+    """
+    return base64.decodebytes(b64).decode(encoding, errors)
+
+
+def chunk(txt: str, chunk_size):
+    """Chunk up a string.
+
+    >>> ' '.join(chunk('010011011100010110101010101010101001111110101000', 8))
+    '01001101 11000101 10101010 10101010 10011111 10101000'
+
+    """
+    if len(txt) % chunk_size != 0:
+        logger.warning(
+            f'String to chunk\'s length ({len(txt)} is not an even multiple of chunk_size ({chunk_size})')
+    for x in range(0, len(txt), chunk_size):
+        yield txt[x:x+chunk_size]
+
+
+def to_bitstring(txt: str, *, delimiter='', encoding='utf-8', errors='surrogatepass') -> str:
+    """Encode txt and then chop it into bytes.  Note: only bitstrings
+    with delimiter='' are interpretable by from_bitstring.
+
+    >>> to_bitstring('hello?')
+    '011010000110010101101100011011000110111100111111'
+
+    >>> to_bitstring('test', delimiter=' ')
+    '01110100 01100101 01110011 01110100'
+
+    >>> to_bitstring(b'test')
+    '01110100011001010111001101110100'
+
+    """
+    etxt = to_ascii(txt)
+    bits = bin(
+        int.from_bytes(
+            etxt,
+            'big'
+        )
+    )
+    bits = bits[2:]
+    return delimiter.join(chunk(bits.zfill(8 * ((len(bits) + 7) // 8)), 8))
+
+
+def is_bitstring(txt: str) -> bool:
+    """Is this a bitstring?
+
+    >>> is_bitstring('011010000110010101101100011011000110111100111111')
+    True
+
+    >>> is_bitstring('1234')
+    False
+
+    """
+    return is_binary_integer_number(f'0b{txt}')
+
+
+def from_bitstring(bits: str, encoding='utf-8', errors='surrogatepass') -> str:
+    """Convert from bitstring back to bytes then decode into a str.
+
+    >>> from_bitstring('011010000110010101101100011011000110111100111111')
+    'hello?'
+
+    """
+    n = int(bits, 2)
+    return n.to_bytes((n.bit_length() + 7) // 8, 'big').decode(encoding, errors) or '\0'
+
+
+def ip_v4_sort_key(txt: str) -> Tuple[int]:
+    """Turn an IPv4 address into a tuple for sorting purposes.
+
+    >>> ip_v4_sort_key('10.0.0.18')
+    (10, 0, 0, 18)
+
+    >>> ips = ['10.0.0.10', '100.0.0.1', '1.2.3.4', '10.0.0.9']
+    >>> sorted(ips, key=lambda x: ip_v4_sort_key(x))
+    ['1.2.3.4', '10.0.0.9', '10.0.0.10', '100.0.0.1']
+
+    """
+    if not is_ip_v4(txt):
+        print(f"not IP: {txt}")
+        return None
+    return tuple([int(x) for x in txt.split('.')])
+
+
+def path_ancestors_before_descendants_sort_key(volume: str) -> Tuple[str]:
+    """Chunk up a file path so that parent/ancestor paths sort before
+    children/descendant paths.
+
+    >>> path_ancestors_before_descendants_sort_key('/usr/local/bin')
+    ('usr', 'local', 'bin')
+
+    >>> paths = ['/usr/local', '/usr/local/bin', '/usr']
+    >>> sorted(paths, key=lambda x: path_ancestors_before_descendants_sort_key(x))
+    ['/usr', '/usr/local', '/usr/local/bin']
+
+    """
+    return tuple([x for x in volume.split('/') if len(x) > 0])
+
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()