string_utils.py

   1 #!/usr/bin/env python3
   2
   3 import datetime
   4 from itertools import zip_longest
   5 import json
   6 import logging
   7 import random
   8 import re
   9 import string
  10 from typing import Any, List, Optional
  11 import unicodedata
  12 from uuid import uuid4
  13
  14 logger = logging.getLogger(__name__)
  15
  16 NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
  17
  18 HEX_NUMBER_RE = re.compile(r"^([+|-]?)0[x|X]([0-9A-Fa-f]+)$")
  19
  20 OCT_NUMBER_RE = re.compile(r"^([+|-]?)0[O|o]([0-7]+)$")
  21
  22 BIN_NUMBER_RE = re.compile(r"^([+|-]?)0[B|b]([0|1]+)$")
  23
  24 URLS_RAW_STRING = (
  25     r"([a-z-]+://)"  # scheme
  26     r"([a-z_\d-]+:[a-z_\d-]+@)?"  # user:password
  27     r"(www\.)?"  # www.
  28     r"((?<!\.)[a-z\d]+[a-z\d.-]+\.[a-z]{2,6}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|localhost)"  # domain
  29     r"(:\d{2,})?"  # port number
  30     r"(/[a-z\d_%+-]*)*"  # folders
  31     r"(\.[a-z\d_%+-]+)*"  # file extension
  32     r"(\?[a-z\d_+%-=]*)?"  # query string
  33     r"(#\S*)?"  # hash
  34 )
  35
  36 URL_RE = re.compile(r"^{}$".format(URLS_RAW_STRING), re.IGNORECASE)
  37
  38 URLS_RE = re.compile(r"({})".format(URLS_RAW_STRING), re.IGNORECASE)
  39
  40 ESCAPED_AT_SIGN = re.compile(r'(?!"[^"]*)@+(?=[^"]*")|\\@')
  41
  42 EMAILS_RAW_STRING = r"[a-zA-Z\d._\+\-'`!%#$&*/=\?\^\{\}\|~\\]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}"
  43
  44 EMAIL_RE = re.compile(r"^{}$".format(EMAILS_RAW_STRING))
  45
  46 EMAILS_RE = re.compile(r"({})".format(EMAILS_RAW_STRING))
  47
  48 CAMEL_CASE_TEST_RE = re.compile(
  49     r"^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$"
  50 )
  51
  52 CAMEL_CASE_REPLACE_RE = re.compile(r"([a-z]|[A-Z]+)(?=[A-Z])")
  53
  54 SNAKE_CASE_TEST_RE = re.compile(
  55     r"^([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)$", re.IGNORECASE
  56 )
  57
  58 SNAKE_CASE_TEST_DASH_RE = re.compile(
  59     r"([a-z]+\d*-[a-z\d-]*|-+[a-z\d]+[a-z\d-]*)$", re.IGNORECASE
  60 )
  61
  62 SNAKE_CASE_REPLACE_RE = re.compile(r"(_)([a-z\d])")
  63
  64 SNAKE_CASE_REPLACE_DASH_RE = re.compile(r"(-)([a-z\d])")
  65
  66 CREDIT_CARDS = {
  67     "VISA": re.compile(r"^4\d{12}(?:\d{3})?$"),
  68     "MASTERCARD": re.compile(r"^5[1-5]\d{14}$"),
  69     "AMERICAN_EXPRESS": re.compile(r"^3[47]\d{13}$"),
  70     "DINERS_CLUB": re.compile(r"^3(?:0[0-5]|[68]\d)\d{11}$"),
  71     "DISCOVER": re.compile(r"^6(?:011|5\d{2})\d{12}$"),
  72     "JCB": re.compile(r"^(?:2131|1800|35\d{3})\d{11}$"),
  73 }
  74
  75 JSON_WRAPPER_RE = re.compile(
  76     r"^\s*[\[{]\s*(.*)\s*[\}\]]\s*$", re.MULTILINE | re.DOTALL
  77 )
  78
  79 UUID_RE = re.compile(
  80     r"^[a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12}$", re.IGNORECASE
  81 )
  82
  83 UUID_HEX_OK_RE = re.compile(
  84     r"^[a-f\d]{8}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{12}$",
  85     re.IGNORECASE,
  86 )
  87
  88 SHALLOW_IP_V4_RE = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
  89
  90 IP_V6_RE = re.compile(r"^([a-z\d]{0,4}:){7}[a-z\d]{0,4}$", re.IGNORECASE)
  91
  92 MAC_ADDRESS_RE = re.compile(
  93     r"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})", re.IGNORECASE
  94 )
  95
  96 WORDS_COUNT_RE = re.compile(
  97     r"\W*[^\W_]+\W*", re.IGNORECASE | re.MULTILINE | re.UNICODE
  98 )
  99
 100 HTML_RE = re.compile(
 101     r"((<([a-z]+:)?[a-z]+[^>]*/?>)(.*?(</([a-z]+:)?[a-z]+>))?|<!--.*-->|<!doctype.*>)",
 102     re.IGNORECASE | re.MULTILINE | re.DOTALL,
 103 )
 104
 105 HTML_TAG_ONLY_RE = re.compile(
 106     r"(<([a-z]+:)?[a-z]+[^>]*/?>|</([a-z]+:)?[a-z]+>|<!--.*-->|<!doctype.*>)",
 107     re.IGNORECASE | re.MULTILINE | re.DOTALL,
 108 )
 109
 110 SPACES_RE = re.compile(r"\s")
 111
 112 NO_LETTERS_OR_NUMBERS_RE = re.compile(
 113     r"[^\w\d]+|_+", re.IGNORECASE | re.UNICODE
 114 )
 115
 116 MARGIN_RE = re.compile(r"^[^\S\r\n]+")
 117
 118 ESCAPE_SEQUENCE_RE = re.compile(r"\e\[[^A-Za-z]*[A-Za-z]")
 119
 120 NUM_SUFFIXES = {
 121     "Pb": (1024 ** 5),
 122     "P": (1024 ** 5),
 123     "Tb": (1024 ** 4),
 124     "T": (1024 ** 4),
 125     "Gb": (1024 ** 3),
 126     "G": (1024 ** 3),
 127     "Mb": (1024 ** 2),
 128     "M": (1024 ** 2),
 129     "Kb": (1024 ** 1),
 130     "K": (1024 ** 1),
 131 }
 132
 133
 134 def is_none_or_empty(in_str: Optional[str]) -> bool:
 135     return in_str is None or len(in_str.strip()) == 0
 136
 137
 138 def is_string(obj: Any) -> bool:
 139     """
 140     Checks if an object is a string.
 141     """
 142     return isinstance(obj, str)
 143
 144
 145 def is_empty_string(in_str: Any) -> bool:
 146     return is_string(in_str) and in_str.strip() == ""
 147
 148
 149 def is_full_string(in_str: Any) -> bool:
 150     return is_string(in_str) and in_str.strip() != ""
 151
 152
 153 def is_number(in_str: str) -> bool:
 154     """
 155     Checks if a string is a valid number.
 156     """
 157     if not is_string(in_str):
 158         raise ValueError(in_str)
 159     return NUMBER_RE.match(in_str) is not None
 160
 161
 162 def is_integer_number(in_str: str) -> bool:
 163     """
 164     Checks whether the given string represents an integer or not.
 165
 166     An integer may be signed or unsigned or use a "scientific notation".
 167
 168     *Examples:*
 169
 170     >>> is_integer('42') # returns true
 171     >>> is_integer('42.0') # returns false
 172     """
 173     return (
 174         (is_number(in_str) and "." not in in_str) or
 175         is_hexidecimal_integer_number(in_str) or
 176         is_octal_integer_number(in_str) or
 177         is_binary_integer_number(in_str)
 178     )
 179
 180
 181 def is_hexidecimal_integer_number(in_str: str) -> bool:
 182     if not is_string(in_str):
 183         raise ValueError(in_str)
 184     return HEX_NUMBER_RE.match(in_str) is not None
 185
 186
 187 def is_octal_integer_number(in_str: str) -> bool:
 188     if not is_string(in_str):
 189         raise ValueError(in_str)
 190     return OCT_NUMBER_RE.match(in_str) is not None
 191
 192
 193 def is_binary_integer_number(in_str: str) -> bool:
 194     if not is_string(in_str):
 195         raise ValueError(in_str)
 196     return BIN_NUMBER_RE.match(in_str) is not None
 197
 198
 199 def to_int(in_str: str) -> int:
 200     if not is_string(in_str):
 201         raise ValueError(in_str)
 202     if is_binary_integer_number(in_str):
 203         return int(in_str, 2)
 204     if is_octal_integer_number(in_str):
 205         return int(in_str, 8)
 206     if is_hexidecimal_integer_number(in_str):
 207         return int(in_str, 16)
 208     return int(in_str)
 209
 210
 211 def is_decimal_number(in_str: str) -> bool:
 212     """
 213     Checks whether the given string represents a decimal or not.
 214
 215     A decimal may be signed or unsigned or use a "scientific notation".
 216
 217     >>> is_decimal('42.0') # returns true
 218     >>> is_decimal('42') # returns false
 219     """
 220     return is_number(in_str) and "." in in_str
 221
 222
 223 def strip_escape_sequences(in_str: str) -> str:
 224     in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
 225     return in_str
 226
 227
 228 def add_thousands_separator(
 229         in_str: str,
 230         *,
 231         separator_char = ',',
 232         places = 3
 233 ) -> str:
 234     if isinstance(in_str, int):
 235         in_str = f'{in_str}'
 236     if is_number(in_str):
 237         return _add_thousands_separator(
 238             in_str,
 239             separator_char = separator_char,
 240             places = places
 241         )
 242     raise ValueError(in_str)
 243
 244
 245 def _add_thousands_separator(in_str: str, *, separator_char = ',', places = 3) -> str:
 246     decimal_part = ""
 247     if '.' in in_str:
 248         (in_str, decimal_part) = in_str.split('.')
 249     tmp = [iter(in_str[::-1])] * places
 250     ret = separator_char.join(
 251         "".join(x) for x in zip_longest(*tmp, fillvalue=""))[::-1]
 252     if len(decimal_part) > 0:
 253         ret += '.'
 254         ret += decimal_part
 255     return ret
 256
 257
 258 # Full url example:
 259 # scheme://username:[email protected]:8042/folder/subfolder/file.extension?param=value&param2=value2#hash
 260 def is_url(in_str: Any, allowed_schemes: Optional[List[str]] = None) -> bool:
 261     """
 262     Check if a string is a valid url.
 263
 264     *Examples:*
 265
 266     >>> is_url('http://www.mysite.com') # returns true
 267     >>> is_url('https://mysite.com') # returns true
 268     >>> is_url('.mysite.com') # returns false
 269     """
 270     if not is_full_string(in_str):
 271         return False
 272
 273     valid = URL_RE.match(in_str) is not None
 274
 275     if allowed_schemes:
 276         return valid and any([in_str.startswith(s) for s in allowed_schemes])
 277     return valid
 278
 279
 280 def is_email(in_str: Any) -> bool:
 281     """
 282     Check if a string is a valid email.
 283
 284     Reference: https://tools.ietf.org/html/rfc3696#section-3
 285
 286     *Examples:*
 287
 288     >>> is_email('[email protected]') # returns true
 289     >>> is_email('@gmail.com') # returns false
 290     """
 291     if (
 292         not is_full_string(in_str)
 293         or len(in_str) > 320
 294         or in_str.startswith(".")
 295     ):
 296         return False
 297
 298     try:
 299         # we expect 2 tokens, one before "@" and one after, otherwise
 300         # we have an exception and the email is not valid.
 301         head, tail = in_str.split("@")
 302
 303         # head's size must be <= 64, tail <= 255, head must not start
 304         # with a dot or contain multiple consecutive dots.
 305         if (
 306             len(head) > 64
 307             or len(tail) > 255
 308             or head.endswith(".")
 309             or (".." in head)
 310         ):
 311             return False
 312
 313         # removes escaped spaces, so that later on the test regex will
 314         # accept the string.
 315         head = head.replace("\\ ", "")
 316         if head.startswith('"') and head.endswith('"'):
 317             head = head.replace(" ", "")[1:-1]
 318         return EMAIL_RE.match(head + "@" + tail) is not None
 319
 320     except ValueError:
 321         # borderline case in which we have multiple "@" signs but the
 322         # head part is correctly escaped.
 323         if ESCAPED_AT_SIGN.search(in_str) is not None:
 324             # replace "@" with "a" in the head
 325             return is_email(ESCAPED_AT_SIGN.sub("a", in_str))
 326         return False
 327
 328
 329 def suffix_string_to_number(in_str: str) -> Optional[int]:
 330     """Take a string like "33Gb" and convert it into a number (of bytes)
 331     like 34603008.  Return None if the input string is not valid.
 332     """
 333
 334     def suffix_capitalize(s: str) -> str:
 335         if len(s) == 1:
 336             return s.upper()
 337         elif len(s) == 2:
 338             return f"{s[0].upper()}{s[1].lower()}"
 339         return suffix_capitalize(s[0:1])
 340
 341     if is_string(in_str):
 342         if is_integer_number(in_str):
 343             return to_int(in_str)
 344         suffixes = [in_str[-2:], in_str[-1:]]
 345         rest = [in_str[:-2], in_str[:-1]]
 346         for x in range(len(suffixes)):
 347             s = suffixes[x]
 348             s = suffix_capitalize(s)
 349             multiplier = NUM_SUFFIXES.get(s, None)
 350             if multiplier is not None:
 351                 r = rest[x]
 352                 if is_integer_number(r):
 353                     return int(r) * multiplier
 354     return None
 355
 356
 357 def number_to_suffix_string(num: int) -> Optional[str]:
 358     """Take a number (of bytes) and returns a string like "43.8Gb".
 359     Returns none if the input is invalid.
 360     """
 361     d = 0.0
 362     suffix = None
 363     for (sfx, size) in NUM_SUFFIXES.items():
 364         if num >= size:
 365             d = num / size
 366             suffix = sfx
 367             break
 368     if suffix is not None:
 369         return f"{d:.1f}{suffix}"
 370     else:
 371         return f'{num:d}'
 372
 373
 374 def is_credit_card(in_str: Any, card_type: str = None) -> bool:
 375     """
 376     Checks if a string is a valid credit card number.
 377     If card type is provided then it checks against that specific type only,
 378     otherwise any known credit card number will be accepted.
 379
 380     Supported card types are the following:
 381
 382     - VISA
 383     - MASTERCARD
 384     - AMERICAN_EXPRESS
 385     - DINERS_CLUB
 386     - DISCOVER
 387     - JCB
 388     """
 389     if not is_full_string(in_str):
 390         return False
 391
 392     if card_type is not None:
 393         if card_type not in CREDIT_CARDS:
 394             raise KeyError(
 395                 f'Invalid card type "{card_type}". Valid types are: {CREDIT_CARDS.keys()}'
 396             )
 397         return CREDIT_CARDS[card_type].match(in_str) is not None
 398     for c in CREDIT_CARDS:
 399         if CREDIT_CARDS[c].match(in_str) is not None:
 400             return True
 401     return False
 402
 403
 404 def is_camel_case(in_str: Any) -> bool:
 405     """
 406     Checks if a string is formatted as camel case.
 407
 408     A string is considered camel case when:
 409
 410     - it's composed only by letters ([a-zA-Z]) and optionally numbers ([0-9])
 411     - it contains both lowercase and uppercase letters
 412     - it does not start with a number
 413     """
 414     return (
 415         is_full_string(in_str) and CAMEL_CASE_TEST_RE.match(in_str) is not None
 416     )
 417
 418
 419 def is_snake_case(in_str: Any, *, separator: str = "_") -> bool:
 420     """
 421     Checks if a string is formatted as "snake case".
 422
 423     A string is considered snake case when:
 424
 425     - it's composed only by lowercase/uppercase letters and digits
 426     - it contains at least one underscore (or provided separator)
 427     - it does not start with a number
 428     """
 429     if is_full_string(in_str):
 430         re_map = {"_": SNAKE_CASE_TEST_RE, "-": SNAKE_CASE_TEST_DASH_RE}
 431         re_template = (
 432             r"([a-z]+\d*{sign}[a-z\d{sign}]*|{sign}+[a-z\d]+[a-z\d{sign}]*)"
 433         )
 434         r = re_map.get(
 435             separator,
 436             re.compile(
 437                 re_template.format(sign=re.escape(separator)), re.IGNORECASE
 438             ),
 439         )
 440         return r.match(in_str) is not None
 441     return False
 442
 443
 444 def is_json(in_str: Any) -> bool:
 445     """
 446     Check if a string is a valid json.
 447
 448     *Examples:*
 449
 450     >>> is_json('{"name": "Peter"}') # returns true
 451     >>> is_json('[1, 2, 3]') # returns true
 452     >>> is_json('{nope}') # returns false
 453     """
 454     if is_full_string(in_str) and JSON_WRAPPER_RE.match(in_str) is not None:
 455         try:
 456             return isinstance(json.loads(in_str), (dict, list))
 457         except (TypeError, ValueError, OverflowError):
 458             pass
 459     return False
 460
 461
 462 def is_uuid(in_str: Any, allow_hex: bool = False) -> bool:
 463     """
 464     Check if a string is a valid UUID.
 465
 466     *Example:*
 467
 468     >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf') # returns true
 469     >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf') # returns false
 470     >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf', allow_hex=True) # returns true
 471     """
 472     # string casting is used to allow UUID itself as input data type
 473     s = str(in_str)
 474     if allow_hex:
 475         return UUID_HEX_OK_RE.match(s) is not None
 476     return UUID_RE.match(s) is not None
 477
 478
 479 def is_ip_v4(in_str: Any) -> bool:
 480     """
 481     Checks if a string is a valid ip v4.
 482
 483     *Examples:*
 484
 485     >>> is_ip_v4('255.200.100.75') # returns true
 486     >>> is_ip_v4('nope') # returns false (not an ip)
 487     >>> is_ip_v4('255.200.100.999') # returns false (999 is out of range)
 488     """
 489     if not is_full_string(in_str) or SHALLOW_IP_V4_RE.match(in_str) is None:
 490         return False
 491
 492     # checks that each entry in the ip is in the valid range (0 to 255)
 493     for token in in_str.split("."):
 494         if not 0 <= int(token) <= 255:
 495             return False
 496     return True
 497
 498
 499 def extract_ip_v4(in_str: Any) -> Optional[str]:
 500     """
 501     Extracts the IPv4 chunk of a string or None.
 502     """
 503     if not is_full_string(in_str):
 504         return None
 505     in_str.strip()
 506     m = SHALLOW_IP_V4_RE.match(in_str)
 507     if m is not None:
 508         return m.group(0)
 509     return None
 510
 511
 512 def is_ip_v6(in_str: Any) -> bool:
 513     """
 514     Checks if a string is a valid ip v6.
 515
 516     *Examples:*
 517
 518     >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
 519     >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?') # returns false (invalid "?")
 520     """
 521     return is_full_string(in_str) and IP_V6_RE.match(in_str) is not None
 522
 523
 524 def extract_ip_v6(in_str: Any) -> Optional[str]:
 525     """
 526     Extract IPv6 chunk or None.
 527     """
 528     if not is_full_string(in_str):
 529         return None
 530     in_str.strip()
 531     m = IP_V6_RE.match(in_str)
 532     if m is not None:
 533         return m.group(0)
 534     return None
 535
 536
 537 def is_ip(in_str: Any) -> bool:
 538     """
 539     Checks if a string is a valid ip (either v4 or v6).
 540
 541     *Examples:*
 542
 543     >>> is_ip('255.200.100.75') # returns true
 544     >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
 545     >>> is_ip('1.2.3') # returns false
 546     """
 547     return is_ip_v6(in_str) or is_ip_v4(in_str)
 548
 549
 550 def extract_ip(in_str: Any) -> Optional[str]:
 551     """Extract the IP address or None."""
 552     ip = extract_ip_v4(in_str)
 553     if ip is None:
 554         ip = extract_ip_v6(in_str)
 555     return ip
 556
 557
 558 def is_mac_address(in_str: Any) -> bool:
 559     """Return True if in_str is a valid MAC address false otherwise."""
 560     return is_full_string(in_str) and MAC_ADDRESS_RE.match(in_str) is not None
 561
 562
 563 def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
 564     """Extract the MAC address from in_str"""
 565     if not is_full_string(in_str):
 566         return None
 567     in_str.strip()
 568     m = MAC_ADDRESS_RE.match(in_str)
 569     if m is not None:
 570         mac = m.group(0)
 571         mac.replace(":", separator)
 572         mac.replace("-", separator)
 573         return mac
 574     return None
 575
 576
 577 def is_slug(in_str: Any, separator: str = "-") -> bool:
 578     """
 579     Checks if a given string is a slug (as created by `slugify()`).
 580
 581     *Examples:*
 582
 583     >>> is_slug('my-blog-post-title') # returns true
 584     >>> is_slug('My blog post title') # returns false
 585
 586     :param in_str: String to check.
 587     :type in_str: str
 588     :param separator: Join sign used by the slug.
 589     :type separator: str
 590     :return: True if slug, false otherwise.
 591     """
 592     if not is_full_string(in_str):
 593         return False
 594     rex = r"^([a-z\d]+" + re.escape(separator) + r"*?)*[a-z\d]$"
 595     return re.match(rex, in_str) is not None
 596
 597
 598 def contains_html(in_str: str) -> bool:
 599     """
 600     Checks if the given string contains HTML/XML tags.
 601
 602     By design, this function matches ANY type of tag, so don't expect to use it
 603     as an HTML validator, its goal is to detect "malicious" or undesired tags in the text.
 604
 605     *Examples:*
 606
 607     >>> contains_html('my string is <strong>bold</strong>') # returns true
 608     >>> contains_html('my string is not bold') # returns false
 609     """
 610     if not is_string(in_str):
 611         raise ValueError(in_str)
 612     return HTML_RE.search(in_str) is not None
 613
 614
 615 def words_count(in_str: str) -> int:
 616     """
 617     Returns the number of words contained into the given string.
 618
 619     This method is smart, it does consider only sequence of one or more letter and/or numbers
 620     as "words", so a string like this: "! @ # % ... []" will return zero!
 621     Moreover it is aware of punctuation, so the count for a string like "one,two,three.stop"
 622     will be 4 not 1 (even if there are no spaces in the string).
 623
 624     *Examples:*
 625
 626     >>> words_count('hello world') # returns 2
 627     >>> words_count('one,two,three.stop') # returns 4
 628     """
 629     if not is_string(in_str):
 630         raise ValueError(in_str)
 631     return len(WORDS_COUNT_RE.findall(in_str))
 632
 633
 634 def generate_uuid(as_hex: bool = False) -> str:
 635     """
 636     Generated an UUID string (using `uuid.uuid4()`).
 637
 638     *Examples:*
 639
 640     >>> uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
 641     >>> uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
 642     """
 643     uid = uuid4()
 644     if as_hex:
 645         return uid.hex
 646     return str(uid)
 647
 648
 649 def generate_random_alphanumeric_string(size: int) -> str:
 650     """
 651     Returns a string of the specified size containing random
 652     characters (uppercase/lowercase ascii letters and digits).
 653
 654     *Example:*
 655
 656     >>> random_string(9) # possible output: "cx3QQbzYg"
 657     """
 658     if size < 1:
 659         raise ValueError("size must be >= 1")
 660     chars = string.ascii_letters + string.digits
 661     buffer = [random.choice(chars) for _ in range(size)]
 662     return from_char_list(buffer)
 663
 664
 665 def reverse(in_str: str) -> str:
 666     """
 667     Returns the string with its chars reversed.
 668     """
 669     if not is_string(in_str):
 670         raise ValueError(in_str)
 671     return in_str[::-1]
 672
 673
 674 def camel_case_to_snake_case(in_str, *, separator="_"):
 675     """
 676     Convert a camel case string into a snake case one.
 677     (The original string is returned if is not a valid camel case string)
 678     """
 679     if not is_string(in_str):
 680         raise ValueError(in_str)
 681     if not is_camel_case(in_str):
 682         return in_str
 683     return CAMEL_CASE_REPLACE_RE.sub(
 684         lambda m: m.group(1) + separator, in_str
 685     ).lower()
 686
 687
 688 def snake_case_to_camel_case(
 689     in_str: str, *, upper_case_first: bool = True, separator: str = "_"
 690 ) -> str:
 691     """
 692     Convert a snake case string into a camel case one.
 693     (The original string is returned if is not a valid snake case string)
 694     """
 695     if not is_string(in_str):
 696         raise ValueError(in_str)
 697     if not is_snake_case(in_str, separator=separator):
 698         return in_str
 699     tokens = [s.title() for s in in_str.split(separator) if is_full_string(s)]
 700     if not upper_case_first:
 701         tokens[0] = tokens[0].lower()
 702     return from_char_list(tokens)
 703
 704
 705 def to_char_list(in_str: str) -> List[str]:
 706     if not is_string(in_str):
 707         return []
 708     return list(in_str)
 709
 710
 711 def from_char_list(in_list: List[str]) -> str:
 712     return "".join(in_list)
 713
 714
 715 def shuffle(in_str: str) -> str:
 716     """Return a new string containing same chars of the given one but in
 717     a randomized order.
 718     """
 719     if not is_string(in_str):
 720         raise ValueError(in_str)
 721
 722     # turn the string into a list of chars
 723     chars = to_char_list(in_str)
 724     random.shuffle(chars)
 725     return from_char_list(chars)
 726
 727
 728 def strip_html(in_str: str, keep_tag_content: bool = False) -> str:
 729     """
 730     Remove html code contained into the given string.
 731
 732     *Examples:*
 733
 734     >>> strip_html('test: <a href="foo/bar">click here</a>') # returns 'test: '
 735     >>> strip_html('test: <a href="foo/bar">click here</a>', keep_tag_content=True) # returns 'test: click here'
 736     """
 737     if not is_string(in_str):
 738         raise ValueError(in_str)
 739     r = HTML_TAG_ONLY_RE if keep_tag_content else HTML_RE
 740     return r.sub("", in_str)
 741
 742
 743 def asciify(in_str: str) -> str:
 744     """
 745     Force string content to be ascii-only by translating all non-ascii chars into the closest possible representation
 746     (eg: ó -> o, Ë -> E, ç -> c...).
 747
 748     **Bear in mind**: Some chars may be lost if impossible to translate.
 749
 750     *Example:*
 751
 752     >>> asciify('èéùúòóäåëýñÅÀÁÇÌÍÑÓË') # returns 'eeuuooaaeynAAACIINOE'
 753     """
 754     if not is_string(in_str):
 755         raise ValueError(in_str)
 756
 757     # "NFKD" is the algorithm which is able to successfully translate
 758     # the most of non-ascii chars.
 759     normalized = unicodedata.normalize("NFKD", in_str)
 760
 761     # encode string forcing ascii and ignore any errors
 762     # (unrepresentable chars will be stripped out)
 763     ascii_bytes = normalized.encode("ascii", "ignore")
 764
 765     # turns encoded bytes into an utf-8 string
 766     return ascii_bytes.decode("utf-8")
 767
 768
 769 def slugify(in_str: str, *, separator: str = "-") -> str:
 770     """
 771     Converts a string into a "slug" using provided separator.
 772     The returned string has the following properties:
 773
 774     - it has no spaces
 775     - all letters are in lower case
 776     - all punctuation signs and non alphanumeric chars are removed
 777     - words are divided using provided separator
 778     - all chars are encoded as ascii (by using `asciify()`)
 779     - is safe for URL
 780
 781     *Examples:*
 782
 783     >>> slugify('Top 10 Reasons To Love Dogs!!!') # returns: 'top-10-reasons-to-love-dogs'
 784     >>> slugify('Mönstér Mägnët') # returns 'monster-magnet'
 785     """
 786     if not is_string(in_str):
 787         raise ValueError(in_str)
 788
 789     # replace any character that is NOT letter or number with spaces
 790     out = NO_LETTERS_OR_NUMBERS_RE.sub(" ", in_str.lower()).strip()
 791
 792     # replace spaces with join sign
 793     out = SPACES_RE.sub(separator, out)
 794
 795     # normalize joins (remove duplicates)
 796     out = re.sub(re.escape(separator) + r"+", separator, out)
 797     return asciify(out)
 798
 799
 800 def to_bool(in_str: str) -> bool:
 801     """
 802     Turns a string into a boolean based on its content (CASE INSENSITIVE).
 803
 804     A positive boolean (True) is returned if the string value is one of the following:
 805
 806     - "true"
 807     - "1"
 808     - "yes"
 809     - "y"
 810
 811     Otherwise False is returned.
 812     """
 813     if not is_string(in_str):
 814         raise ValueError(in_str)
 815     return in_str.lower() in ("true", "1", "yes", "y", "t")
 816
 817
 818 def to_date(in_str: str) -> Optional[datetime.date]:
 819     import dateparse.dateparse_utils as dp
 820     try:
 821         d = dp.DateParser()
 822         d.parse(in_str)
 823         return d.get_date()
 824     except dp.ParseException:
 825         logger.warning(f'Unable to parse date {in_str}.')
 826     return None
 827
 828
 829 def valid_date(in_str: str) -> bool:
 830     import dateparse.dateparse_utils as dp
 831     try:
 832         d = dp.DateParser()
 833         _ = d.parse(in_str)
 834         return True
 835     except dp.ParseException:
 836         logger.warning(f'Unable to parse date {in_str}.')
 837     return False
 838
 839
 840 def to_datetime(in_str: str) -> Optional[datetime.datetime]:
 841     import dateparse.dateparse_utils as dp
 842     try:
 843         d = dp.DateParser()
 844         dt = d.parse(in_str)
 845         if type(dt) == datetime.datetime:
 846             return dt
 847     except ValueError:
 848         logger.warning(f'Unable to parse datetime {in_str}.')
 849     return None
 850
 851
 852 def valid_datetime(in_str: str) -> bool:
 853     _ = to_datetime(in_str)
 854     if _ is not None:
 855         return True
 856     logger.warning(f'Unable to parse datetime {in_str}.')
 857     return False
 858
 859
 860 def dedent(in_str: str) -> str:
 861     """
 862     Removes tab indentation from multi line strings (inspired by analogous Scala function).
 863
 864     *Example:*
 865
 866     >>> strip_margin('''
 867     >>>                 line 1
 868     >>>                 line 2
 869     >>>                 line 3
 870     >>> ''')
 871     >>> # returns:
 872     >>> '''
 873     >>> line 1
 874     >>> line 2
 875     >>> line 3
 876     >>> '''
 877     """
 878     if not is_string(in_str):
 879         raise ValueError(in_str)
 880     line_separator = '\n'
 881     lines = [MARGIN_RE.sub('', line) for line in in_str.split(line_separator)]
 882     return line_separator.join(lines)
 883
 884
 885 def indent(in_str: str, amount: int) -> str:
 886     if not is_string(in_str):
 887         raise ValueError(in_str)
 888     line_separator = '\n'
 889     lines = [" " * amount + line for line in in_str.split(line_separator)]
 890     return line_separator.join(lines)
 891
 892
 893 def sprintf(*args, **kwargs) -> str:
 894     ret = ""
 895
 896     sep = kwargs.pop("sep", None)
 897     if sep is not None:
 898         if not isinstance(sep, str):
 899             raise TypeError("sep must be None or a string")
 900
 901     end = kwargs.pop("end", None)
 902     if end is not None:
 903         if not isinstance(end, str):
 904             raise TypeError("end must be None or a string")
 905
 906     if kwargs:
 907         raise TypeError("invalid keyword arguments to sprint()")
 908
 909     if sep is None:
 910         sep = " "
 911     if end is None:
 912         end = "\n"
 913     for i, arg in enumerate(args):
 914         if i:
 915             ret += sep
 916         if isinstance(arg, str):
 917             ret += arg
 918         else:
 919             ret += str(arg)
 920     ret += end
 921     return ret
 922
 923
 924 def is_are(n: int) -> str:
 925     if n == 1:
 926         return "is"
 927     return "are"
 928
 929
 930 def pluralize(n: int) -> str:
 931     if n == 1:
 932         return ""
 933     return "s"
 934
 935
 936 def thify(n: int) -> str:
 937     digit = str(n)
 938     assert is_integer_number(digit)
 939     digit = digit[-1:]
 940     if digit == "1":
 941         return "st"
 942     elif digit == "2":
 943         return "nd"
 944     elif digit == "3":
 945         return "rd"
 946     else:
 947         return "th"