string_utils.py

   1 #!/usr/bin/env python3
   2
   3 from itertools import zip_longest
   4 import json
   5 import random
   6 import re
   7 import string
   8 from typing import Any, List, Optional
   9 import unicodedata
  10 from uuid import uuid4
  11
  12 NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
  13
  14 HEX_NUMBER_RE = re.compile(r"^([+|-]?)0[x|X]([0-9A-Fa-f]+)$")
  15
  16 OCT_NUMBER_RE = re.compile(r"^([+|-]?)0[O|o]([0-7]+)$")
  17
  18 BIN_NUMBER_RE = re.compile(r"^([+|-]?)0[B|b]([0|1]+)$")
  19
  20 URLS_RAW_STRING = (
  21     r"([a-z-]+://)"  # scheme
  22     r"([a-z_\d-]+:[a-z_\d-]+@)?"  # user:password
  23     r"(www\.)?"  # www.
  24     r"((?<!\.)[a-z\d]+[a-z\d.-]+\.[a-z]{2,6}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|localhost)"  # domain
  25     r"(:\d{2,})?"  # port number
  26     r"(/[a-z\d_%+-]*)*"  # folders
  27     r"(\.[a-z\d_%+-]+)*"  # file extension
  28     r"(\?[a-z\d_+%-=]*)?"  # query string
  29     r"(#\S*)?"  # hash
  30 )
  31
  32 URL_RE = re.compile(r"^{}$".format(URLS_RAW_STRING), re.IGNORECASE)
  33
  34 URLS_RE = re.compile(r"({})".format(URLS_RAW_STRING), re.IGNORECASE)
  35
  36 ESCAPED_AT_SIGN = re.compile(r'(?!"[^"]*)@+(?=[^"]*")|\\@')
  37
  38 EMAILS_RAW_STRING = r"[a-zA-Z\d._\+\-'`!%#$&*/=\?\^\{\}\|~\\]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}"
  39
  40 EMAIL_RE = re.compile(r"^{}$".format(EMAILS_RAW_STRING))
  41
  42 EMAILS_RE = re.compile(r"({})".format(EMAILS_RAW_STRING))
  43
  44 CAMEL_CASE_TEST_RE = re.compile(
  45     r"^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$"
  46 )
  47
  48 CAMEL_CASE_REPLACE_RE = re.compile(r"([a-z]|[A-Z]+)(?=[A-Z])")
  49
  50 SNAKE_CASE_TEST_RE = re.compile(
  51     r"^([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)$", re.IGNORECASE
  52 )
  53
  54 SNAKE_CASE_TEST_DASH_RE = re.compile(
  55     r"([a-z]+\d*-[a-z\d-]*|-+[a-z\d]+[a-z\d-]*)$", re.IGNORECASE
  56 )
  57
  58 SNAKE_CASE_REPLACE_RE = re.compile(r"(_)([a-z\d])")
  59
  60 SNAKE_CASE_REPLACE_DASH_RE = re.compile(r"(-)([a-z\d])")
  61
  62 CREDIT_CARDS = {
  63     "VISA": re.compile(r"^4\d{12}(?:\d{3})?$"),
  64     "MASTERCARD": re.compile(r"^5[1-5]\d{14}$"),
  65     "AMERICAN_EXPRESS": re.compile(r"^3[47]\d{13}$"),
  66     "DINERS_CLUB": re.compile(r"^3(?:0[0-5]|[68]\d)\d{11}$"),
  67     "DISCOVER": re.compile(r"^6(?:011|5\d{2})\d{12}$"),
  68     "JCB": re.compile(r"^(?:2131|1800|35\d{3})\d{11}$"),
  69 }
  70
  71 JSON_WRAPPER_RE = re.compile(
  72     r"^\s*[\[{]\s*(.*)\s*[\}\]]\s*$", re.MULTILINE | re.DOTALL
  73 )
  74
  75 UUID_RE = re.compile(
  76     r"^[a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12}$", re.IGNORECASE
  77 )
  78
  79 UUID_HEX_OK_RE = re.compile(
  80     r"^[a-f\d]{8}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{12}$",
  81     re.IGNORECASE,
  82 )
  83
  84 SHALLOW_IP_V4_RE = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
  85
  86 IP_V6_RE = re.compile(r"^([a-z\d]{0,4}:){7}[a-z\d]{0,4}$", re.IGNORECASE)
  87
  88 MAC_ADDRESS_RE = re.compile(
  89     r"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})", re.IGNORECASE
  90 )
  91
  92 WORDS_COUNT_RE = re.compile(
  93     r"\W*[^\W_]+\W*", re.IGNORECASE | re.MULTILINE | re.UNICODE
  94 )
  95
  96 HTML_RE = re.compile(
  97     r"((<([a-z]+:)?[a-z]+[^>]*/?>)(.*?(</([a-z]+:)?[a-z]+>))?|<!--.*-->|<!doctype.*>)",
  98     re.IGNORECASE | re.MULTILINE | re.DOTALL,
  99 )
 100
 101 HTML_TAG_ONLY_RE = re.compile(
 102     r"(<([a-z]+:)?[a-z]+[^>]*/?>|</([a-z]+:)?[a-z]+>|<!--.*-->|<!doctype.*>)",
 103     re.IGNORECASE | re.MULTILINE | re.DOTALL,
 104 )
 105
 106 SPACES_RE = re.compile(r"\s")
 107
 108 NO_LETTERS_OR_NUMBERS_RE = re.compile(
 109     r"[^\w\d]+|_+", re.IGNORECASE | re.UNICODE
 110 )
 111
 112 MARGIN_RE = re.compile(r"^[^\S\r\n]+")
 113
 114 ESCAPE_SEQUENCE_RE = re.compile(r"\e\[[^A-Za-z]*[A-Za-z]")
 115
 116 NUM_SUFFIXES = {
 117     "Pb": (1024 ** 5),
 118     "P": (1024 ** 5),
 119     "Tb": (1024 ** 4),
 120     "T": (1024 ** 4),
 121     "Gb": (1024 ** 3),
 122     "G": (1024 ** 3),
 123     "Mb": (1024 ** 2),
 124     "M": (1024 ** 2),
 125     "Kb": (1024 ** 1),
 126     "K": (1024 ** 1),
 127 }
 128
 129
 130 def is_none_or_empty(in_str: Optional[str]) -> bool:
 131     return in_str is None or len(in_str.strip()) == 0
 132
 133
 134 def is_string(obj: Any) -> bool:
 135     """
 136     Checks if an object is a string.
 137     """
 138     return isinstance(obj, str)
 139
 140
 141 def is_empty_string(in_str: Any) -> bool:
 142     return is_string(in_str) and in_str.strip() == ""
 143
 144
 145 def is_full_string(in_str: Any) -> bool:
 146     return is_string(in_str) and in_str.strip() != ""
 147
 148
 149 def is_number(in_str: str) -> bool:
 150     """
 151     Checks if a string is a valid number.
 152     """
 153     if not is_string(in_str):
 154         raise ValueError(in_str)
 155     return NUMBER_RE.match(in_str) is not None
 156
 157
 158 def is_integer_number(in_str: str) -> bool:
 159     """
 160     Checks whether the given string represents an integer or not.
 161
 162     An integer may be signed or unsigned or use a "scientific notation".
 163
 164     *Examples:*
 165
 166     >>> is_integer('42') # returns true
 167     >>> is_integer('42.0') # returns false
 168     """
 169     return (
 170         (is_number(in_str) and "." not in in_str) or
 171         is_hexidecimal_integer_number(in_str) or
 172         is_octal_integer_number(in_str) or
 173         is_binary_integer_number(in_str)
 174     )
 175
 176
 177 def is_hexidecimal_integer_number(in_str: str) -> bool:
 178     if not is_string(in_str):
 179         raise ValueError(in_str)
 180     return HEX_NUMBER_RE.match(in_str) is not None
 181
 182
 183 def is_octal_integer_number(in_str: str) -> bool:
 184     if not is_string(in_str):
 185         raise ValueError(in_str)
 186     return OCT_NUMBER_RE.match(in_str) is not None
 187
 188
 189 def is_binary_integer_number(in_str: str) -> bool:
 190     if not is_string(in_str):
 191         raise ValueError(in_str)
 192     return BIN_NUMBER_RE.match(in_str) is not None
 193
 194
 195 def to_int(in_str: str) -> int:
 196     if not is_string(in_str):
 197         raise ValueError(in_str)
 198     if is_binary_integer_number(in_str):
 199         return int(in_str, 2)
 200     if is_octal_integer_number(in_str):
 201         return int(in_str, 8)
 202     if is_hexidecimal_integer_number(in_str):
 203         return int(in_str, 16)
 204     return int(in_str)
 205
 206
 207 def is_decimal_number(in_str: str) -> bool:
 208     """
 209     Checks whether the given string represents a decimal or not.
 210
 211     A decimal may be signed or unsigned or use a "scientific notation".
 212
 213     >>> is_decimal('42.0') # returns true
 214     >>> is_decimal('42') # returns false
 215     """
 216     return is_number(in_str) and "." in in_str
 217
 218
 219 def strip_escape_sequences(in_str: str) -> str:
 220     in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
 221     return in_str
 222
 223
 224 def add_thousands_separator(in_str: str, *, separator_char = ',', places = 3) -> str:
 225     if isinstance(in_str, int):
 226         in_str = f'{in_str}'
 227
 228     if is_number(in_str):
 229         return _add_thousands_separator(
 230             in_str,
 231             separator_char = separator_char,
 232             places = places
 233         )
 234     raise ValueError(in_str)
 235
 236
 237 def _add_thousands_separator(in_str: str, *, separator_char = ',', places = 3) -> str:
 238     decimal_part = ""
 239     if '.' in in_str:
 240         (in_str, decimal_part) = in_str.split('.')
 241     tmp = [iter(in_str[::-1])] * places
 242     ret = separator_char.join(
 243         "".join(x) for x in zip_longest(*tmp, fillvalue=""))[::-1]
 244     if len(decimal_part) > 0:
 245         ret += '.'
 246         ret += decimal_part
 247     return ret
 248
 249
 250
 251 # Full url example:
 252 # scheme://username:password@www.domain.com:8042/folder/subfolder/file.extension?param=value&param2=value2#hash
 253 def is_url(in_str: Any, allowed_schemes: Optional[List[str]] = None) -> bool:
 254     """
 255     Check if a string is a valid url.
 256
 257     *Examples:*
 258
 259     >>> is_url('http://www.mysite.com') # returns true
 260     >>> is_url('https://mysite.com') # returns true
 261     >>> is_url('.mysite.com') # returns false
 262     """
 263     if not is_full_string(in_str):
 264         return False
 265
 266     valid = URL_RE.match(in_str) is not None
 267
 268     if allowed_schemes:
 269         return valid and any([in_str.startswith(s) for s in allowed_schemes])
 270     return valid
 271
 272
 273 def is_email(in_str: Any) -> bool:
 274     """
 275     Check if a string is a valid email.
 276
 277     Reference: https://tools.ietf.org/html/rfc3696#section-3
 278
 279     *Examples:*
 280
 281     >>> is_email('my.email@the-provider.com') # returns true
 282     >>> is_email('@gmail.com') # returns false
 283     """
 284     if (
 285         not is_full_string(in_str)
 286         or len(in_str) > 320
 287         or in_str.startswith(".")
 288     ):
 289         return False
 290
 291     try:
 292         # we expect 2 tokens, one before "@" and one after, otherwise
 293         # we have an exception and the email is not valid.
 294         head, tail = in_str.split("@")
 295
 296         # head's size must be <= 64, tail <= 255, head must not start
 297         # with a dot or contain multiple consecutive dots.
 298         if (
 299             len(head) > 64
 300             or len(tail) > 255
 301             or head.endswith(".")
 302             or (".." in head)
 303         ):
 304             return False
 305
 306         # removes escaped spaces, so that later on the test regex will
 307         # accept the string.
 308         head = head.replace("\\ ", "")
 309         if head.startswith('"') and head.endswith('"'):
 310             head = head.replace(" ", "")[1:-1]
 311         return EMAIL_RE.match(head + "@" + tail) is not None
 312
 313     except ValueError:
 314         # borderline case in which we have multiple "@" signs but the
 315         # head part is correctly escaped.
 316         if ESCAPED_AT_SIGN.search(in_str) is not None:
 317             # replace "@" with "a" in the head
 318             return is_email(ESCAPED_AT_SIGN.sub("a", in_str))
 319         return False
 320
 321
 322 def suffix_string_to_number(in_str: str) -> Optional[int]:
 323     """Take a string like "33Gb" and convert it into a number (of bytes)
 324     like 34603008.  Return None if the input string is not valid.
 325     """
 326
 327     def suffix_capitalize(s: str) -> str:
 328         if len(s) == 1:
 329             return s.upper()
 330         elif len(s) == 2:
 331             return f"{s[0].upper()}{s[1].lower()}"
 332         return suffix_capitalize(s[0:1])
 333
 334     if is_string(in_str):
 335         if is_integer_number(in_str):
 336             return to_int(in_str)
 337         suffixes = [in_str[-2:], in_str[-1:]]
 338         rest = [in_str[:-2], in_str[:-1]]
 339         for x in range(len(suffixes)):
 340             s = suffixes[x]
 341             s = suffix_capitalize(s)
 342             multiplier = NUM_SUFFIXES.get(s, None)
 343             if multiplier is not None:
 344                 r = rest[x]
 345                 if is_integer_number(r):
 346                     return int(r) * multiplier
 347     return None
 348
 349
 350 def number_to_suffix_string(num: int) -> Optional[str]:
 351     """Take a number (of bytes) and returns a string like "43.8Gb".
 352     Returns none if the input is invalid.
 353     """
 354     d = 0.0
 355     suffix = None
 356     for (sfx, size) in NUM_SUFFIXES.items():
 357         if num > size:
 358             d = num / size
 359             suffix = sfx
 360             break
 361     if suffix is not None:
 362         return f"{d:.1f}{suffix}"
 363     return None
 364
 365
 366 def is_credit_card(in_str: Any, card_type: str = None) -> bool:
 367     """
 368     Checks if a string is a valid credit card number.
 369     If card type is provided then it checks against that specific type only,
 370     otherwise any known credit card number will be accepted.
 371
 372     Supported card types are the following:
 373
 374     - VISA
 375     - MASTERCARD
 376     - AMERICAN_EXPRESS
 377     - DINERS_CLUB
 378     - DISCOVER
 379     - JCB
 380     """
 381     if not is_full_string(in_str):
 382         return False
 383
 384     if card_type is not None:
 385         if card_type not in CREDIT_CARDS:
 386             raise KeyError(
 387                 f'Invalid card type "{card_type}". Valid types are: {CREDIT_CARDS.keys()}'
 388             )
 389         return CREDIT_CARDS[card_type].match(in_str) is not None
 390     for c in CREDIT_CARDS:
 391         if CREDIT_CARDS[c].match(in_str) is not None:
 392             return True
 393     return False
 394
 395
 396 def is_camel_case(in_str: Any) -> bool:
 397     """
 398     Checks if a string is formatted as camel case.
 399
 400     A string is considered camel case when:
 401
 402     - it's composed only by letters ([a-zA-Z]) and optionally numbers ([0-9])
 403     - it contains both lowercase and uppercase letters
 404     - it does not start with a number
 405     """
 406     return (
 407         is_full_string(in_str) and CAMEL_CASE_TEST_RE.match(in_str) is not None
 408     )
 409
 410
 411 def is_snake_case(in_str: Any, *, separator: str = "_") -> bool:
 412     """
 413     Checks if a string is formatted as "snake case".
 414
 415     A string is considered snake case when:
 416
 417     - it's composed only by lowercase/uppercase letters and digits
 418     - it contains at least one underscore (or provided separator)
 419     - it does not start with a number
 420     """
 421     if is_full_string(in_str):
 422         re_map = {"_": SNAKE_CASE_TEST_RE, "-": SNAKE_CASE_TEST_DASH_RE}
 423         re_template = (
 424             r"([a-z]+\d*{sign}[a-z\d{sign}]*|{sign}+[a-z\d]+[a-z\d{sign}]*)"
 425         )
 426         r = re_map.get(
 427             separator,
 428             re.compile(
 429                 re_template.format(sign=re.escape(separator)), re.IGNORECASE
 430             ),
 431         )
 432         return r.match(in_str) is not None
 433     return False
 434
 435
 436 def is_json(in_str: Any) -> bool:
 437     """
 438     Check if a string is a valid json.
 439
 440     *Examples:*
 441
 442     >>> is_json('{"name": "Peter"}') # returns true
 443     >>> is_json('[1, 2, 3]') # returns true
 444     >>> is_json('{nope}') # returns false
 445     """
 446     if is_full_string(in_str) and JSON_WRAPPER_RE.match(in_str) is not None:
 447         try:
 448             return isinstance(json.loads(in_str), (dict, list))
 449         except (TypeError, ValueError, OverflowError):
 450             pass
 451     return False
 452
 453
 454 def is_uuid(in_str: Any, allow_hex: bool = False) -> bool:
 455     """
 456     Check if a string is a valid UUID.
 457
 458     *Example:*
 459
 460     >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf') # returns true
 461     >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf') # returns false
 462     >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf', allow_hex=True) # returns true
 463     """
 464     # string casting is used to allow UUID itself as input data type
 465     s = str(in_str)
 466     if allow_hex:
 467         return UUID_HEX_OK_RE.match(s) is not None
 468     return UUID_RE.match(s) is not None
 469
 470
 471 def is_ip_v4(in_str: Any) -> bool:
 472     """
 473     Checks if a string is a valid ip v4.
 474
 475     *Examples:*
 476
 477     >>> is_ip_v4('255.200.100.75') # returns true
 478     >>> is_ip_v4('nope') # returns false (not an ip)
 479     >>> is_ip_v4('255.200.100.999') # returns false (999 is out of range)
 480     """
 481     if not is_full_string(in_str) or SHALLOW_IP_V4_RE.match(in_str) is None:
 482         return False
 483
 484     # checks that each entry in the ip is in the valid range (0 to 255)
 485     for token in in_str.split("."):
 486         if not 0 <= int(token) <= 255:
 487             return False
 488     return True
 489
 490
 491 def extract_ip_v4(in_str: Any) -> Optional[str]:
 492     """
 493     Extracts the IPv4 chunk of a string or None.
 494     """
 495     if not is_full_string(in_str):
 496         return None
 497     in_str.strip()
 498     m = SHALLOW_IP_V4_RE.match(in_str)
 499     if m is not None:
 500         return m.group(0)
 501     return None
 502
 503
 504 def is_ip_v6(in_str: Any) -> bool:
 505     """
 506     Checks if a string is a valid ip v6.
 507
 508     *Examples:*
 509
 510     >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
 511     >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?') # returns false (invalid "?")
 512     """
 513     return is_full_string(in_str) and IP_V6_RE.match(in_str) is not None
 514
 515
 516 def extract_ip_v6(in_str: Any) -> Optional[str]:
 517     """
 518     Extract IPv6 chunk or None.
 519     """
 520     if not is_full_string(in_str):
 521         return None
 522     in_str.strip()
 523     m = IP_V6_RE.match(in_str)
 524     if m is not None:
 525         return m.group(0)
 526     return None
 527
 528
 529 def is_ip(in_str: Any) -> bool:
 530     """
 531     Checks if a string is a valid ip (either v4 or v6).
 532
 533     *Examples:*
 534
 535     >>> is_ip('255.200.100.75') # returns true
 536     >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
 537     >>> is_ip('1.2.3') # returns false
 538     """
 539     return is_ip_v6(in_str) or is_ip_v4(in_str)
 540
 541
 542 def extract_ip(in_str: Any) -> Optional[str]:
 543     """Extract the IP address or None."""
 544     ip = extract_ip_v4(in_str)
 545     if ip is None:
 546         ip = extract_ip_v6(in_str)
 547     return ip
 548
 549
 550 def is_mac_address(in_str: Any) -> bool:
 551     """Return True if in_str is a valid MAC address false otherwise."""
 552     return is_full_string(in_str) and MAC_ADDRESS_RE.match(in_str) is not None
 553
 554
 555 def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
 556     """Extract the MAC address from in_str"""
 557     if not is_full_string(in_str):
 558         return None
 559     in_str.strip()
 560     m = MAC_ADDRESS_RE.match(in_str)
 561     if m is not None:
 562         mac = m.group(0)
 563         mac.replace(":", separator)
 564         mac.replace("-", separator)
 565         return mac
 566     return None
 567
 568
 569 def is_slug(in_str: Any, separator: str = "-") -> bool:
 570     """
 571     Checks if a given string is a slug (as created by `slugify()`).
 572
 573     *Examples:*
 574
 575     >>> is_slug('my-blog-post-title') # returns true
 576     >>> is_slug('My blog post title') # returns false
 577
 578     :param in_str: String to check.
 579     :type in_str: str
 580     :param separator: Join sign used by the slug.
 581     :type separator: str
 582     :return: True if slug, false otherwise.
 583     """
 584     if not is_full_string(in_str):
 585         return False
 586     rex = r"^([a-z\d]+" + re.escape(separator) + r"*?)*[a-z\d]$"
 587     return re.match(rex, in_str) is not None
 588
 589
 590 def contains_html(in_str: str) -> bool:
 591     """
 592     Checks if the given string contains HTML/XML tags.
 593
 594     By design, this function matches ANY type of tag, so don't expect to use it
 595     as an HTML validator, its goal is to detect "malicious" or undesired tags in the text.
 596
 597     *Examples:*
 598
 599     >>> contains_html('my string is <strong>bold</strong>') # returns true
 600     >>> contains_html('my string is not bold') # returns false
 601     """
 602     if not is_string(in_str):
 603         raise ValueError(in_str)
 604     return HTML_RE.search(in_str) is not None
 605
 606
 607 def words_count(in_str: str) -> int:
 608     """
 609     Returns the number of words contained into the given string.
 610
 611     This method is smart, it does consider only sequence of one or more letter and/or numbers
 612     as "words", so a string like this: "! @ # % ... []" will return zero!
 613     Moreover it is aware of punctuation, so the count for a string like "one,two,three.stop"
 614     will be 4 not 1 (even if there are no spaces in the string).
 615
 616     *Examples:*
 617
 618     >>> words_count('hello world') # returns 2
 619     >>> words_count('one,two,three.stop') # returns 4
 620     """
 621     if not is_string(in_str):
 622         raise ValueError(in_str)
 623     return len(WORDS_COUNT_RE.findall(in_str))
 624
 625
 626 def generate_uuid(as_hex: bool = False) -> str:
 627     """
 628     Generated an UUID string (using `uuid.uuid4()`).
 629
 630     *Examples:*
 631
 632     >>> uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
 633     >>> uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
 634     """
 635     uid = uuid4()
 636     if as_hex:
 637         return uid.hex
 638     return str(uid)
 639
 640
 641 def generate_random_alphanumeric_string(size: int) -> str:
 642     """
 643     Returns a string of the specified size containing random
 644     characters (uppercase/lowercase ascii letters and digits).
 645
 646     *Example:*
 647
 648     >>> random_string(9) # possible output: "cx3QQbzYg"
 649     """
 650     if size < 1:
 651         raise ValueError("size must be >= 1")
 652     chars = string.ascii_letters + string.digits
 653     buffer = [random.choice(chars) for _ in range(size)]
 654     return from_char_list(buffer)
 655
 656
 657 def reverse(in_str: str) -> str:
 658     """
 659     Returns the string with its chars reversed.
 660     """
 661     if not is_string(in_str):
 662         raise ValueError(in_str)
 663     return in_str[::-1]
 664
 665
 666 def camel_case_to_snake_case(in_str, *, separator="_"):
 667     """
 668     Convert a camel case string into a snake case one.
 669     (The original string is returned if is not a valid camel case string)
 670     """
 671     if not is_string(in_str):
 672         raise ValueError(in_str)
 673     if not is_camel_case(in_str):
 674         return in_str
 675     return CAMEL_CASE_REPLACE_RE.sub(
 676         lambda m: m.group(1) + separator, in_str
 677     ).lower()
 678
 679
 680 def snake_case_to_camel_case(
 681     in_str: str, *, upper_case_first: bool = True, separator: str = "_"
 682 ) -> str:
 683     """
 684     Convert a snake case string into a camel case one.
 685     (The original string is returned if is not a valid snake case string)
 686     """
 687     if not is_string(in_str):
 688         raise ValueError(in_str)
 689     if not is_snake_case(in_str, separator=separator):
 690         return in_str
 691     tokens = [s.title() for s in in_str.split(separator) if is_full_string(s)]
 692     if not upper_case_first:
 693         tokens[0] = tokens[0].lower()
 694     return from_char_list(tokens)
 695
 696
 697 def to_char_list(in_str: str) -> List[str]:
 698     if not is_string(in_str):
 699         return []
 700     return list(in_str)
 701
 702
 703 def from_char_list(in_list: List[str]) -> str:
 704     return "".join(in_list)
 705
 706
 707 def shuffle(in_str: str) -> str:
 708     """Return a new string containing same chars of the given one but in
 709     a randomized order.
 710     """
 711     if not is_string(in_str):
 712         raise ValueError(in_str)
 713
 714     # turn the string into a list of chars
 715     chars = to_char_list(in_str)
 716     random.shuffle(chars)
 717     return from_char_list(chars)
 718
 719
 720 def strip_html(in_str: str, keep_tag_content: bool = False) -> str:
 721     """
 722     Remove html code contained into the given string.
 723
 724     *Examples:*
 725
 726     >>> strip_html('test: <a href="foo/bar">click here</a>') # returns 'test: '
 727     >>> strip_html('test: <a href="foo/bar">click here</a>', keep_tag_content=True) # returns 'test: click here'
 728     """
 729     if not is_string(in_str):
 730         raise ValueError(in_str)
 731     r = HTML_TAG_ONLY_RE if keep_tag_content else HTML_RE
 732     return r.sub("", in_str)
 733
 734
 735 def asciify(in_str: str) -> str:
 736     """
 737     Force string content to be ascii-only by translating all non-ascii chars into the closest possible representation
 738     (eg: ó -> o, Ë -> E, ç -> c...).
 739
 740     **Bear in mind**: Some chars may be lost if impossible to translate.
 741
 742     *Example:*
 743
 744     >>> asciify('èéùúòóäåëýñÅÀÁÇÌÍÑÓË') # returns 'eeuuooaaeynAAACIINOE'
 745     """
 746     if not is_string(in_str):
 747         raise ValueError(in_str)
 748
 749     # "NFKD" is the algorithm which is able to successfully translate
 750     # the most of non-ascii chars.
 751     normalized = unicodedata.normalize("NFKD", in_str)
 752
 753     # encode string forcing ascii and ignore any errors
 754     # (unrepresentable chars will be stripped out)
 755     ascii_bytes = normalized.encode("ascii", "ignore")
 756
 757     # turns encoded bytes into an utf-8 string
 758     return ascii_bytes.decode("utf-8")
 759
 760
 761 def slugify(in_str: str, *, separator: str = "-") -> str:
 762     """
 763     Converts a string into a "slug" using provided separator.
 764     The returned string has the following properties:
 765
 766     - it has no spaces
 767     - all letters are in lower case
 768     - all punctuation signs and non alphanumeric chars are removed
 769     - words are divided using provided separator
 770     - all chars are encoded as ascii (by using `asciify()`)
 771     - is safe for URL
 772
 773     *Examples:*
 774
 775     >>> slugify('Top 10 Reasons To Love Dogs!!!') # returns: 'top-10-reasons-to-love-dogs'
 776     >>> slugify('Mönstér Mägnët') # returns 'monster-magnet'
 777     """
 778     if not is_string(in_str):
 779         raise ValueError(in_str)
 780
 781     # replace any character that is NOT letter or number with spaces
 782     out = NO_LETTERS_OR_NUMBERS_RE.sub(" ", in_str.lower()).strip()
 783
 784     # replace spaces with join sign
 785     out = SPACES_RE.sub(separator, out)
 786
 787     # normalize joins (remove duplicates)
 788     out = re.sub(re.escape(separator) + r"+", separator, out)
 789     return asciify(out)
 790
 791
 792 def to_bool(in_str: str) -> bool:
 793     """
 794     Turns a string into a boolean based on its content (CASE INSENSITIVE).
 795
 796     A positive boolean (True) is returned if the string value is one of the following:
 797
 798     - "true"
 799     - "1"
 800     - "yes"
 801     - "y"
 802
 803     Otherwise False is returned.
 804     """
 805     if not is_string(in_str):
 806         raise ValueError(in_str)
 807     return in_str.lower() in ("true", "1", "yes", "y", "t")
 808
 809
 810 def dedent(in_str: str) -> str:
 811     """
 812     Removes tab indentation from multi line strings (inspired by analogous Scala function).
 813
 814     *Example:*
 815
 816     >>> strip_margin('''
 817     >>>                 line 1
 818     >>>                 line 2
 819     >>>                 line 3
 820     >>> ''')
 821     >>> # returns:
 822     >>> '''
 823     >>> line 1
 824     >>> line 2
 825     >>> line 3
 826     >>> '''
 827     """
 828     if not is_string(in_str):
 829         raise ValueError(in_str)
 830     line_separator = '\n'
 831     lines = [MARGIN_RE.sub('', line) for line in in_str.split(line_separator)]
 832     return line_separator.join(lines)
 833
 834
 835 def indent(in_str: str, amount: int) -> str:
 836     if not is_string(in_str):
 837         raise ValueError(in_str)
 838     line_separator = '\n'
 839     lines = [" " * amount + line for line in in_str.split(line_separator)]
 840     return line_separator.join(lines)
 841
 842
 843 def sprintf(*args, **kwargs) -> str:
 844     ret = ""
 845
 846     sep = kwargs.pop("sep", None)
 847     if sep is not None:
 848         if not isinstance(sep, str):
 849             raise TypeError("sep must be None or a string")
 850
 851     end = kwargs.pop("end", None)
 852     if end is not None:
 853         if not isinstance(end, str):
 854             raise TypeError("end must be None or a string")
 855
 856     if kwargs:
 857         raise TypeError("invalid keyword arguments to sprint()")
 858
 859     if sep is None:
 860         sep = " "
 861     if end is None:
 862         end = "\n"
 863     for i, arg in enumerate(args):
 864         if i:
 865             ret += sep
 866         if isinstance(arg, str):
 867             ret += arg
 868         else:
 869             ret += str(arg)
 870     ret += end
 871     return ret