init_value: Any = 1,
inc_function: Callable[..., Any] = lambda x: x + 1
) -> bool:
+ """
+ Initialize a dict value (if it doesn't exist) or increments it (using the
+ inc_function, which is customizable) if it already does exist. Returns
+ True if the key already existed or False otherwise.
+
+ >>> d = {}
+ >>> init_or_inc(d, "test")
+ False
+ >>> init_or_inc(d, "test")
+ True
+ >>> init_or_inc(d, 'ing')
+ False
+ >>> d
+ {'test': 2, 'ing': 1}
+ """
if key in d.keys():
d[key] = inc_function(d[key])
return True
def shard(d: Dict[Any, Any], size: int) -> Iterator[Dict[Any, Any]]:
+ """
+ Shards a dict into N subdicts which, together, contain all keys/values
+ from the original unsharded dict.
+ """
items = d.items()
for x in range(0, len(d), size):
yield {key: value for (key, value) in islice(items, x, x + size)}
*,
aggregation_function: Callable[[Any, Any], Any] = coalesce_by_creating_list
) -> Dict[Any, Any]:
+ """Merge N dicts into one dict containing the union of all keys/values in
+ the input dicts. When keys collide, apply the aggregation_function which,
+ by default, creates a list of values. See also coalesce_by_creating_set or
+ provide a user defined aggregation_function.
+
+ >>> a = {'a': 1, 'b': 2}
+ >>> b = {'b': 1, 'c': 2, 'd': 3}
+ >>> c = {'c': 1, 'd': 2}
+ >>> coalesce([a, b, c])
+ {'a': 1, 'b': [1, 2], 'c': [1, 2], 'd': [2, 3]}
+ """
out: Dict[Any, Any] = {}
for d in inputs:
for key in d:
if key in out:
- value = aggregation_function(d[key], out[key])
+ value = aggregation_function(key, d[key], out[key])
else:
value = d[key]
out[key] = value
def item_with_max_value(d: Dict[Any, Any]) -> Tuple[Any, Any]:
+ """Returns the key and value with the max value in a dict.
+
+ >>> d = {'a': 1, 'b': 2, 'c': 3}
+ >>> item_with_max_value(d)
+ ('c', 3)
+ >>> item_with_max_value({})
+ Traceback (most recent call last):
+ ...
+ ValueError: max() arg is an empty sequence
+ """
return max(d.items(), key=lambda _: _[1])
def item_with_min_value(d: Dict[Any, Any]) -> Tuple[Any, Any]:
+ """Returns the key and value with the min value in a dict.
+
+ >>> d = {'a': 1, 'b': 2, 'c': 3}
+ >>> item_with_min_value(d)
+ ('a', 1)
+ """
return min(d.items(), key=lambda _: _[1])
def key_with_max_value(d: Dict[Any, Any]) -> Any:
+ """Returns the key with the max value in the dict.
+
+ >>> d = {'a': 1, 'b': 2, 'c': 3}
+ >>> key_with_max_value(d)
+ 'c'
+ """
return item_with_max_value(d)[0]
def key_with_min_value(d: Dict[Any, Any]) -> Any:
+ """Returns the key with the min value in the dict.
+
+ >>> d = {'a': 1, 'b': 2, 'c': 3}
+ >>> key_with_min_value(d)
+ 'a'
+ """
return item_with_min_value(d)[0]
def max_value(d: Dict[Any, Any]) -> Any:
+ """Returns the maximum value in the dict.
+
+ >>> d = {'a': 1, 'b': 2, 'c': 3}
+ >>> max_value(d)
+ 3
+ """
return item_with_max_value(d)[1]
def min_value(d: Dict[Any, Any]) -> Any:
+ """Returns the minimum value in the dict.
+
+ >>> d = {'a': 1, 'b': 2, 'c': 3}
+ >>> min_value(d)
+ 1
+ """
return item_with_min_value(d)[1]
def max_key(d: Dict[Any, Any]) -> Any:
+ """Returns the maximum key in dict (ignoring values totally)
+
+ >>> d = {'a': 3, 'b': 2, 'c': 1}
+ >>> max_key(d)
+ 'c'
+ """
return max(d.keys())
def min_key(d: Dict[Any, Any]) -> Any:
+ """Returns the minimum key in dict (ignoring values totally)
+
+ >>> d = {'a': 3, 'b': 2, 'c': 1}
+ >>> min_key(d)
+ 'a'
+ """
return min(d.keys())
+
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
from itertools import zip_longest
import json
import logging
+import numbers
import random
import re
import string
SHALLOW_IP_V4_RE = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
+ANYWHERE_IP_V4_RE = re.compile(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}")
+
IP_V6_RE = re.compile(r"^([a-z\d]{0,4}:){7}[a-z\d]{0,4}$", re.IGNORECASE)
+ANYWHERE_IP_V6_RE = re.compile(r"([a-z\d]{0,4}:){7}[a-z\d]{0,4}", re.IGNORECASE)
+
MAC_ADDRESS_RE = re.compile(
- r"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})", re.IGNORECASE
+ r"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})$", re.IGNORECASE
+)
+
+ANYWHERE_MAC_ADDRESS_RE = re.compile(
+ r"([0-9A-F]{2}[:-]){5}([0-9A-F]{2})", re.IGNORECASE
)
WORDS_COUNT_RE = re.compile(
def is_none_or_empty(in_str: Optional[str]) -> bool:
+ """
+ Returns true if the input string is either None or an empty string.
+
+ >>> is_none_or_empty("")
+ True
+ >>> is_none_or_empty(None)
+ True
+ >>> is_none_or_empty(" ")
+ True
+ >>> is_none_or_empty('Test')
+ False
+ """
return in_str is None or len(in_str.strip()) == 0
def is_string(obj: Any) -> bool:
"""
Checks if an object is a string.
+
+ >>> is_string('test')
+ True
+ >>> is_string(123)
+ False
+ >>> is_string(100.3)
+ False
+ >>> is_string([1, 2, 3])
+ False
"""
return isinstance(obj, str)
def is_empty_string(in_str: Any) -> bool:
+ """
+ Checks if input is a string and empty or only whitespace.
+
+ >>> is_empty_string('')
+ True
+ >>> is_empty_string(' \t\t ')
+ True
+ >>> is_empty_string('test')
+ False
+ >>> is_empty_string(100.88)
+ False
+ >>> is_empty_string([1, 2, 3])
+ False
+ """
return is_string(in_str) and in_str.strip() == ""
def is_full_string(in_str: Any) -> bool:
+ """
+ Checks that input is a string and is not empty ('') or only whitespace.
+
+ >>> is_full_string('test!')
+ True
+ >>> is_full_string('')
+ False
+ >>> is_full_string(' ')
+ False
+ >>> is_full_string(100.999)
+ False
+ >>> is_full_string({"a": 1, "b": 2})
+ False
+ """
return is_string(in_str) and in_str.strip() != ""
def is_number(in_str: str) -> bool:
"""
Checks if a string is a valid number.
+
+ >>> is_number(100.5)
+ Traceback (most recent call last):
+ ...
+ ValueError: 100.5
+ >>> is_number("100.5")
+ True
+ >>> is_number("test")
+ False
+ >>> is_number("99")
+ True
+ >>> is_number([1, 2, 3])
+ Traceback (most recent call last):
+ ...
+ ValueError: [1, 2, 3]
"""
if not is_string(in_str):
raise ValueError(in_str)
An integer may be signed or unsigned or use a "scientific notation".
- *Examples:*
-
- >>> is_integer('42') # returns true
- >>> is_integer('42.0') # returns false
+ >>> is_integer_number('42')
+ True
+ >>> is_integer_number('42.0')
+ False
"""
return (
(is_number(in_str) and "." not in in_str) or
def is_hexidecimal_integer_number(in_str: str) -> bool:
+ """
+ Checks whether a string is a hex integer number.
+
+ >>> is_hexidecimal_integer_number('0x12345')
+ True
+ >>> is_hexidecimal_integer_number('0x1A3E')
+ True
+ >>> is_hexidecimal_integer_number('1234') # Needs 0x
+ False
+ >>> is_hexidecimal_integer_number('-0xff')
+ True
+ >>> is_hexidecimal_integer_number('test')
+ False
+ >>> is_hexidecimal_integer_number(12345) # Not a string
+ Traceback (most recent call last):
+ ...
+ ValueError: 12345
+ >>> is_hexidecimal_integer_number(101.4)
+ Traceback (most recent call last):
+ ...
+ ValueError: 101.4
+ >>> is_hexidecimal_integer_number(0x1A3E)
+ Traceback (most recent call last):
+ ...
+ ValueError: 6718
+ """
if not is_string(in_str):
raise ValueError(in_str)
return HEX_NUMBER_RE.match(in_str) is not None
def is_octal_integer_number(in_str: str) -> bool:
+ """
+ Checks whether a string is an octal number.
+
+ >>> is_octal_integer_number('0o777')
+ True
+ >>> is_octal_integer_number('-0O115')
+ True
+ >>> is_octal_integer_number('0xFF') # Not octal, needs 0o
+ False
+ >>> is_octal_integer_number('7777') # Needs 0o
+ False
+ >>> is_octal_integer_number('test')
+ False
+ """
if not is_string(in_str):
raise ValueError(in_str)
return OCT_NUMBER_RE.match(in_str) is not None
def is_binary_integer_number(in_str: str) -> bool:
+ """
+ Returns whether a string contains a binary number.
+
+ >>> is_binary_integer_number('0b10111')
+ True
+ >>> is_binary_integer_number('-0b111')
+ True
+ >>> is_binary_integer_number('0B10101')
+ True
+ >>> is_binary_integer_number('0b10102')
+ False
+ >>> is_binary_integer_number('0xFFF')
+ False
+ >>> is_binary_integer_number('test')
+ False
+ """
if not is_string(in_str):
raise ValueError(in_str)
return BIN_NUMBER_RE.match(in_str) is not None
def to_int(in_str: str) -> int:
+ """Returns the integral value of the string or raises on error.
+
+ >>> to_int('1234')
+ 1234
+ >>> to_int('test')
+ Traceback (most recent call last):
+ ...
+ ValueError: invalid literal for int() with base 10: 'test'
+ """
if not is_string(in_str):
raise ValueError(in_str)
if is_binary_integer_number(in_str):
A decimal may be signed or unsigned or use a "scientific notation".
- >>> is_decimal('42.0') # returns true
- >>> is_decimal('42') # returns false
+ >>> is_decimal_number('42.0')
+ True
+ >>> is_decimal_number('42')
+ False
"""
return is_number(in_str) and "." in in_str
def strip_escape_sequences(in_str: str) -> str:
+ """
+ Remove escape sequences in the input string.
+
+ >>> strip_escape_sequences('\e[12;11;22mthis is a test!')
+ 'this is a test!'
+ """
in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
return in_str
separator_char = ',',
places = 3
) -> str:
- if isinstance(in_str, int):
+ """
+ Add thousands separator to a numeric string. Also handles numbers.
+
+ >>> add_thousands_separator('12345678')
+ '12,345,678'
+ >>> add_thousands_separator(12345678)
+ '12,345,678'
+ >>> add_thousands_separator(12345678.99)
+ '12,345,678.99'
+ >>> add_thousands_separator('test')
+ Traceback (most recent call last):
+ ...
+ ValueError: test
+
+ """
+ if isinstance(in_str, numbers.Number):
in_str = f'{in_str}'
if is_number(in_str):
return _add_thousands_separator(
"""
Check if a string is a valid url.
- *Examples:*
-
- >>> is_url('http://www.mysite.com') # returns true
- >>> is_url('https://mysite.com') # returns true
- >>> is_url('.mysite.com') # returns false
+ >>> is_url('http://www.mysite.com')
+ True
+ >>> is_url('https://mysite.com')
+ True
+ >>> is_url('.mysite.com')
+ False
"""
if not is_full_string(in_str):
return False
Reference: https://tools.ietf.org/html/rfc3696#section-3
- *Examples:*
-
- >>> is_email('@gmail.com') # returns false
+ True
+ >>> is_email('@gmail.com')
+ False
"""
if (
not is_full_string(in_str)
def suffix_string_to_number(in_str: str) -> Optional[int]:
"""Take a string like "33Gb" and convert it into a number (of bytes)
like 34603008. Return None if the input string is not valid.
- """
+ >>> suffix_string_to_number('1Mb')
+ 1048576
+ >>> suffix_string_to_number('13.1Gb')
+ 14066017894
+ """
def suffix_capitalize(s: str) -> str:
if len(s) == 1:
return s.upper()
if multiplier is not None:
r = rest[x]
if is_integer_number(r):
- return int(r) * multiplier
+ return to_int(r) * multiplier
+ if is_decimal_number(r):
+ return int(float(r) * multiplier)
return None
def number_to_suffix_string(num: int) -> Optional[str]:
"""Take a number (of bytes) and returns a string like "43.8Gb".
Returns none if the input is invalid.
+
+ >>> number_to_suffix_string(14066017894)
+ '13.1Gb'
+ >>> number_to_suffix_string(1024 * 1024)
+ '1.0Mb'
+
"""
d = 0.0
suffix = None
- it's composed only by lowercase/uppercase letters and digits
- it contains at least one underscore (or provided separator)
- it does not start with a number
+
+ >>> is_snake_case('this_is_a_test')
+ True
+ >>> is_snake_case('___This_Is_A_Test_1_2_3___')
+ True
+ >>> is_snake_case('this-is-a-test')
+ False
+ >>> is_snake_case('this-is-a-test', separator='-')
+ True
+
"""
if is_full_string(in_str):
re_map = {"_": SNAKE_CASE_TEST_RE, "-": SNAKE_CASE_TEST_DASH_RE}
"""
Check if a string is a valid json.
- *Examples:*
-
- >>> is_json('{"name": "Peter"}') # returns true
- >>> is_json('[1, 2, 3]') # returns true
- >>> is_json('{nope}') # returns false
+ >>> is_json('{"name": "Peter"}')
+ True
+ >>> is_json('[1, 2, 3]')
+ True
+ >>> is_json('{nope}')
+ False
"""
if is_full_string(in_str) and JSON_WRAPPER_RE.match(in_str) is not None:
try:
"""
Check if a string is a valid UUID.
- *Example:*
-
- >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf') # returns true
- >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf') # returns false
- >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf', allow_hex=True) # returns true
+ >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf')
+ True
+ >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf')
+ False
+ >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf', allow_hex=True)
+ True
"""
# string casting is used to allow UUID itself as input data type
s = str(in_str)
"""
Checks if a string is a valid ip v4.
- *Examples:*
-
- >>> is_ip_v4('255.200.100.75') # returns true
- >>> is_ip_v4('nope') # returns false (not an ip)
- >>> is_ip_v4('255.200.100.999') # returns false (999 is out of range)
+ >>> is_ip_v4('255.200.100.75')
+ True
+ >>> is_ip_v4('nope')
+ False
+ >>> is_ip_v4('255.200.100.999') # 999 out of range
+ False
"""
if not is_full_string(in_str) or SHALLOW_IP_V4_RE.match(in_str) is None:
return False
def extract_ip_v4(in_str: Any) -> Optional[str]:
"""
Extracts the IPv4 chunk of a string or None.
+
+ >>> extract_ip_v4(' The secret IP address: 127.0.0.1 (use it wisely) ')
+ '127.0.0.1'
+ >>> extract_ip_v4('Your mom dresses you funny.')
"""
if not is_full_string(in_str):
return None
- in_str.strip()
- m = SHALLOW_IP_V4_RE.match(in_str)
+ m = ANYWHERE_IP_V4_RE.search(in_str)
if m is not None:
return m.group(0)
return None
"""
Checks if a string is a valid ip v6.
- *Examples:*
-
- >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
- >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?') # returns false (invalid "?")
+ >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334')
+ True
+ >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?') # invalid "?"
+ False
"""
return is_full_string(in_str) and IP_V6_RE.match(in_str) is not None
def extract_ip_v6(in_str: Any) -> Optional[str]:
"""
Extract IPv6 chunk or None.
+
+ >>> extract_ip_v6('IP: 2001:db8:85a3:0000:0000:8a2e:370:7334')
+ '2001:db8:85a3:0000:0000:8a2e:370:7334'
+ >>> extract_ip_v6("(and she's ugly too, btw)")
"""
if not is_full_string(in_str):
return None
- in_str.strip()
- m = IP_V6_RE.match(in_str)
+ m = ANYWHERE_IP_V6_RE.search(in_str)
if m is not None:
return m.group(0)
return None
*Examples:*
- >>> is_ip('255.200.100.75') # returns true
- >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
- >>> is_ip('1.2.3') # returns false
+ >>> is_ip('255.200.100.75')
+ True
+ >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334')
+ True
+ >>> is_ip('1.2.3')
+ False
+ >>> is_ip('1.2.3.999')
+ False
"""
return is_ip_v6(in_str) or is_ip_v4(in_str)
def extract_ip(in_str: Any) -> Optional[str]:
- """Extract the IP address or None."""
+ """
+ Extract the IP address or None.
+
+ >>> extract_ip('Attacker: 255.200.100.75')
+ '255.200.100.75'
+ >>> extract_ip('Remote host: 2001:db8:85a3:0000:0000:8a2e:370:7334')
+ '2001:db8:85a3:0000:0000:8a2e:370:7334'
+ >>> extract_ip('1.2.3')
+
+ """
ip = extract_ip_v4(in_str)
if ip is None:
ip = extract_ip_v6(in_str)
def is_mac_address(in_str: Any) -> bool:
- """Return True if in_str is a valid MAC address false otherwise."""
+ """Return True if in_str is a valid MAC address false otherwise.
+
+ >>> is_mac_address("34:29:8F:12:0D:2F")
+ True
+ >>> is_mac_address('34:29:8f:12:0d:2f')
+ True
+ >>> is_mac_address('34-29-8F-12-0D-2F')
+ True
+ >>> is_mac_address("test")
+ False
+ """
return is_full_string(in_str) and MAC_ADDRESS_RE.match(in_str) is not None
def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
- """Extract the MAC address from in_str"""
+ """
+ Extract the MAC address from in_str.
+
+ >>> extract_mac_address(' MAC Address: 34:29:8F:12:0D:2F')
+ '34:29:8F:12:0D:2F'
+
+ """
if not is_full_string(in_str):
return None
in_str.strip()
- m = MAC_ADDRESS_RE.match(in_str)
+ m = ANYWHERE_MAC_ADDRESS_RE.search(in_str)
if m is not None:
mac = m.group(0)
mac.replace(":", separator)
"""
Checks if a given string is a slug (as created by `slugify()`).
- *Examples:*
-
- >>> is_slug('my-blog-post-title') # returns true
- >>> is_slug('My blog post title') # returns false
+ >>> is_slug('my-blog-post-title')
+ True
+ >>> is_slug('My blog post title')
+ False
- :param in_str: String to check.
- :type in_str: str
- :param separator: Join sign used by the slug.
- :type separator: str
- :return: True if slug, false otherwise.
"""
if not is_full_string(in_str):
return False
By design, this function matches ANY type of tag, so don't expect to use it
as an HTML validator, its goal is to detect "malicious" or undesired tags in the text.
- *Examples:*
+ >>> contains_html('my string is <strong>bold</strong>')
+ True
+ >>> contains_html('my string is not bold')
+ False
- >>> contains_html('my string is <strong>bold</strong>') # returns true
- >>> contains_html('my string is not bold') # returns false
"""
if not is_string(in_str):
raise ValueError(in_str)
Moreover it is aware of punctuation, so the count for a string like "one,two,three.stop"
will be 4 not 1 (even if there are no spaces in the string).
- *Examples:*
+ >>> words_count('hello world')
+ 2
+ >>> words_count('one,two,three.stop')
+ 4
- >>> words_count('hello world') # returns 2
- >>> words_count('one,two,three.stop') # returns 4
"""
if not is_string(in_str):
raise ValueError(in_str)
"""
Generated an UUID string (using `uuid.uuid4()`).
- *Examples:*
+ generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
+ generate_uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
- >>> uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
- >>> uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
"""
uid = uuid4()
if as_hex:
Returns a string of the specified size containing random
characters (uppercase/lowercase ascii letters and digits).
- *Example:*
+ random_string(9) # possible output: "cx3QQbzYg"
- >>> random_string(9) # possible output: "cx3QQbzYg"
"""
if size < 1:
raise ValueError("size must be >= 1")
def reverse(in_str: str) -> str:
"""
Returns the string with its chars reversed.
+
+ >>> reverse('test')
+ 'tset'
+
"""
if not is_string(in_str):
raise ValueError(in_str)
"""
Convert a camel case string into a snake case one.
(The original string is returned if is not a valid camel case string)
+
+ >>> camel_case_to_snake_case('MacAddressExtractorFactory')
+ 'mac_address_extractor_factory'
+ >>> camel_case_to_snake_case('Luke Skywalker')
+ 'Luke Skywalker'
"""
if not is_string(in_str):
raise ValueError(in_str)
"""
Convert a snake case string into a camel case one.
(The original string is returned if is not a valid snake case string)
+
+ >>> snake_case_to_camel_case('this_is_a_test')
+ 'ThisIsATest'
+ >>> snake_case_to_camel_case('Han Solo')
+ 'Han Solo'
"""
if not is_string(in_str):
raise ValueError(in_str)
def to_char_list(in_str: str) -> List[str]:
+ """Convert a string into a list of chars.
+
+ >>> to_char_list('test')
+ ['t', 'e', 's', 't']
+ """
if not is_string(in_str):
return []
return list(in_str)
def from_char_list(in_list: List[str]) -> str:
+ """Convert a char list into a string.
+
+ >>> from_char_list(['t', 'e', 's', 't'])
+ 'test'
+ """
return "".join(in_list)
"""
Remove html code contained into the given string.
- *Examples:*
-
- >>> strip_html('test: <a href="foo/bar">click here</a>') # returns 'test: '
- >>> strip_html('test: <a href="foo/bar">click here</a>', keep_tag_content=True) # returns 'test: click here'
+ >>> strip_html('test: <a href="foo/bar">click here</a>')
+ 'test: '
+ >>> strip_html('test: <a href="foo/bar">click here</a>', keep_tag_content=True)
+ 'test: click here'
"""
if not is_string(in_str):
raise ValueError(in_str)
def asciify(in_str: str) -> str:
"""
- Force string content to be ascii-only by translating all non-ascii chars into the closest possible representation
- (eg: ó -> o, Ë -> E, ç -> c...).
-
- **Bear in mind**: Some chars may be lost if impossible to translate.
+ Force string content to be ascii-only by translating all non-ascii
+ chars into the closest possible representation (eg: ó -> o, Ë ->
+ E, ç -> c...).
- *Example:*
+ N.B. Some chars may be lost if impossible to translate.
- >>> asciify('èéùúòóäåëýñÅÀÁÇÌÍÑÓË') # returns 'eeuuooaaeynAAACIINOE'
+ >>> asciify('èéùúòóäåëýñÅÀÁÇÌÍÑÓË')
+ 'eeuuooaaeynAAACIINOE'
"""
if not is_string(in_str):
raise ValueError(in_str)
- all chars are encoded as ascii (by using `asciify()`)
- is safe for URL
- *Examples:*
-
- >>> slugify('Top 10 Reasons To Love Dogs!!!') # returns: 'top-10-reasons-to-love-dogs'
- >>> slugify('Mönstér Mägnët') # returns 'monster-magnet'
+ >>> slugify('Top 10 Reasons To Love Dogs!!!')
+ 'top-10-reasons-to-love-dogs'
+ >>> slugify('Mönstér Mägnët')
+ 'monster-magnet'
"""
if not is_string(in_str):
raise ValueError(in_str)
"""
Turns a string into a boolean based on its content (CASE INSENSITIVE).
- A positive boolean (True) is returned if the string value is one of the following:
+ A positive boolean (True) is returned if the string value is one
+ of the following:
- "true"
- "1"
- "y"
Otherwise False is returned.
+
+ >>> to_bool('True')
+ True
+ >>> to_bool('1')
+ True
+ >>> to_bool('yes')
+ True
+ >>> to_bool('no')
+ False
+ >>> to_bool('huh?')
+ False
"""
if not is_string(in_str):
raise ValueError(in_str)
def to_date(in_str: str) -> Optional[datetime.date]:
+ """
+ Parses a date string. See DateParser docs for details.
+ """
import dateparse.dateparse_utils as dp
try:
d = dp.DateParser()
def valid_date(in_str: str) -> bool:
+ """
+ True if the string represents a valid date.
+ """
import dateparse.dateparse_utils as dp
try:
d = dp.DateParser()
def to_datetime(in_str: str) -> Optional[datetime.datetime]:
+ """
+ Parses a datetime string. See DateParser docs for more info.
+ """
import dateparse.dateparse_utils as dp
try:
d = dp.DateParser()
def valid_datetime(in_str: str) -> bool:
+ """
+ True if the string represents a valid datetime.
+ """
_ = to_datetime(in_str)
if _ is not None:
return True
def dedent(in_str: str) -> str:
"""
Removes tab indentation from multi line strings (inspired by analogous Scala function).
-
- *Example:*
-
- >>> strip_margin('''
- >>> line 1
- >>> line 2
- >>> line 3
- >>> ''')
- >>> # returns:
- >>> '''
- >>> line 1
- >>> line 2
- >>> line 3
- >>> '''
"""
if not is_string(in_str):
raise ValueError(in_str)
def indent(in_str: str, amount: int) -> str:
+ """
+ Indents string by prepending amount spaces.
+
+ >>> indent('This is a test', 4)
+ ' This is a test'
+
+ """
if not is_string(in_str):
raise ValueError(in_str)
line_separator = '\n'
def sprintf(*args, **kwargs) -> str:
+ """String printf, like in C"""
ret = ""
sep = kwargs.pop("sep", None)
class SprintfStdout(object):
+ """
+ A context manager that captures outputs to stdout.
+
+ with SprintfStdout() as buf:
+ print("test")
+ print(buf())
+
+ 'test\n'
+ """
def __init__(self) -> None:
self.destination = io.StringIO()
self.recorder = None
def is_are(n: int) -> str:
+ """Is or are?
+
+ >>> is_are(1)
+ 'is'
+ >>> is_are(2)
+ 'are'
+
+ """
if n == 1:
return "is"
return "are"
def pluralize(n: int) -> str:
+ """Add an s?
+
+ >>> pluralize(15)
+ 's'
+ >>> count = 1
+ >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
+ There is 1 file.
+ >>> count = 4
+ >>> print(f'There {is_are(count)} {count} file{pluralize(count)}.')
+ There are 4 files.
+
+ """
if n == 1:
return ""
return "s"
def thify(n: int) -> str:
+ """Return the proper cardinal suffix for a number.
+
+ >>> thify(1)
+ 'st'
+ >>> thify(33)
+ 'rd'
+ >>> thify(16)
+ 'th'
+
+ """
digit = str(n)
assert is_integer_number(digit)
digit = digit[-1:]
def ngrams(txt: str, n: int):
+ """Return the ngrams from a string.
+
+ >>> [x for x in ngrams('This is a test', 2)]
+ ['This is', 'is a', 'a test']
+
+ """
words = txt.split()
return ngrams_presplit(words, n)
return ngrams(txt, 3)
-def shuffle_columns(
- txt: Iterable[str],
- specs: Iterable[Iterable[int]],
+def shuffle_columns_into_list(
+ input_lines: Iterable[str],
+ column_specs: Iterable[Iterable[int]],
delim=''
) -> Iterable[str]:
+ """Helper to shuffle / parse columnar data and return the results as a
+ list. The column_specs argument is an iterable collection of
+ numeric sequences that indicate one or more column numbers to
+ copy.
+
+ >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul 9 11:34 acl_test.py'.split()
+ >>> shuffle_columns_into_list(
+ ... cols,
+ ... [ [8], [2, 3], [5, 6, 7] ],
+ ... delim=' ',
+ ... )
+ ['acl_test.py', 'scott wheel', 'Jul 9 11:34']
+
+ """
out = []
- for spec in specs:
+
+ # Column specs map input lines' columns into outputs.
+ # [col1, col2...]
+ for spec in column_specs:
chunk = ''
for n in spec:
- chunk = chunk + delim + txt[n]
+ chunk = chunk + delim + input_lines[n]
chunk = chunk.strip(delim)
out.append(chunk)
return out
def shuffle_columns_into_dict(
- txt: Iterable[str],
- specs: Iterable[Tuple[str, Iterable[int]]],
+ input_lines: Iterable[str],
+ column_specs: Iterable[Tuple[str, Iterable[int]]],
delim=''
) -> Dict[str, str]:
+ """Helper to shuffle / parse columnar data and return the results
+ as a dict.
+
+ >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul 9 11:34 acl_test.py'.split()
+ >>> shuffle_columns_into_dict(
+ ... cols,
+ ... [ ('filename', [8]), ('owner', [2, 3]), ('mtime', [5, 6, 7]) ],
+ ... delim=' ',
+ ... )
+ {'filename': 'acl_test.py', 'owner': 'scott wheel', 'mtime': 'Jul 9 11:34'}
+
+ """
out = {}
- for spec in specs:
+
+ # Column specs map input lines' columns into outputs.
+ # "key", [col1, col2...]
+ for spec in column_specs:
chunk = ''
for n in spec[1]:
- chunk = chunk + delim + txt[n]
+ chunk = chunk + delim + input_lines[n]
chunk = chunk.strip(delim)
out[spec[0]] = chunk
return out
def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str:
+ """Interpolate a string with data from a dict.
+
+ >>> interpolate_using_dict('This is a {adjective} {noun}.',
+ ... {'adjective': 'good', 'noun': 'example'})
+ 'This is a good example.'
+
+ """
return sprintf(txt.format(**values), end='')
+
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()