string_utils.py

   1 #!/usr/bin/env python3
   2
   3 import json
   4 import random
   5 import re
   6 import string
   7 from typing import Any, List, Optional
   8 import unicodedata
   9 from uuid import uuid4
  10
  11 NUMBER_RE = re.compile(r"^([+\-]?)((\d+)(\.\d+)?([e|E]\d+)?|\.\d+)$")
  12
  13 HEX_NUMBER_RE = re.compile(r"^([+|-]?)0[x|X]([0-9A-Fa-f]+)$")
  14
  15 OCT_NUMBER_RE = re.compile(r"^([+|-]?)0[O|o]([0-7]+)$")
  16
  17 BIN_NUMBER_RE = re.compile(r"^([+|-]?)0[B|b]([0|1]+)$")
  18
  19 URLS_RAW_STRING = (
  20     r"([a-z-]+://)"  # scheme
  21     r"([a-z_\d-]+:[a-z_\d-]+@)?"  # user:password
  22     r"(www\.)?"  # www.
  23     r"((?<!\.)[a-z\d]+[a-z\d.-]+\.[a-z]{2,6}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|localhost)"  # domain
  24     r"(:\d{2,})?"  # port number
  25     r"(/[a-z\d_%+-]*)*"  # folders
  26     r"(\.[a-z\d_%+-]+)*"  # file extension
  27     r"(\?[a-z\d_+%-=]*)?"  # query string
  28     r"(#\S*)?"  # hash
  29 )
  30
  31 URL_RE = re.compile(r"^{}$".format(URLS_RAW_STRING), re.IGNORECASE)
  32
  33 URLS_RE = re.compile(r"({})".format(URLS_RAW_STRING), re.IGNORECASE)
  34
  35 ESCAPED_AT_SIGN = re.compile(r'(?!"[^"]*)@+(?=[^"]*")|\\@')
  36
  37 EMAILS_RAW_STRING = r"[a-zA-Z\d._\+\-'`!%#$&*/=\?\^\{\}\|~\\]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}"
  38
  39 EMAIL_RE = re.compile(r"^{}$".format(EMAILS_RAW_STRING))
  40
  41 EMAILS_RE = re.compile(r"({})".format(EMAILS_RAW_STRING))
  42
  43 CAMEL_CASE_TEST_RE = re.compile(
  44     r"^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$"
  45 )
  46
  47 CAMEL_CASE_REPLACE_RE = re.compile(r"([a-z]|[A-Z]+)(?=[A-Z])")
  48
  49 SNAKE_CASE_TEST_RE = re.compile(
  50     r"^([a-z]+\d*_[a-z\d_]*|_+[a-z\d]+[a-z\d_]*)$", re.IGNORECASE
  51 )
  52
  53 SNAKE_CASE_TEST_DASH_RE = re.compile(
  54     r"([a-z]+\d*-[a-z\d-]*|-+[a-z\d]+[a-z\d-]*)$", re.IGNORECASE
  55 )
  56
  57 SNAKE_CASE_REPLACE_RE = re.compile(r"(_)([a-z\d])")
  58
  59 SNAKE_CASE_REPLACE_DASH_RE = re.compile(r"(-)([a-z\d])")
  60
  61 CREDIT_CARDS = {
  62     "VISA": re.compile(r"^4\d{12}(?:\d{3})?$"),
  63     "MASTERCARD": re.compile(r"^5[1-5]\d{14}$"),
  64     "AMERICAN_EXPRESS": re.compile(r"^3[47]\d{13}$"),
  65     "DINERS_CLUB": re.compile(r"^3(?:0[0-5]|[68]\d)\d{11}$"),
  66     "DISCOVER": re.compile(r"^6(?:011|5\d{2})\d{12}$"),
  67     "JCB": re.compile(r"^(?:2131|1800|35\d{3})\d{11}$"),
  68 }
  69
  70 JSON_WRAPPER_RE = re.compile(
  71     r"^\s*[\[{]\s*(.*)\s*[\}\]]\s*$", re.MULTILINE | re.DOTALL
  72 )
  73
  74 UUID_RE = re.compile(
  75     r"^[a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12}$", re.IGNORECASE
  76 )
  77
  78 UUID_HEX_OK_RE = re.compile(
  79     r"^[a-f\d]{8}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{4}-?[a-f\d]{12}$",
  80     re.IGNORECASE,
  81 )
  82
  83 SHALLOW_IP_V4_RE = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
  84
  85 IP_V6_RE = re.compile(r"^([a-z\d]{0,4}:){7}[a-z\d]{0,4}$", re.IGNORECASE)
  86
  87 MAC_ADDRESS_RE = re.compile(
  88     r"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})", re.IGNORECASE
  89 )
  90
  91 WORDS_COUNT_RE = re.compile(
  92     r"\W*[^\W_]+\W*", re.IGNORECASE | re.MULTILINE | re.UNICODE
  93 )
  94
  95 HTML_RE = re.compile(
  96     r"((<([a-z]+:)?[a-z]+[^>]*/?>)(.*?(</([a-z]+:)?[a-z]+>))?|<!--.*-->|<!doctype.*>)",
  97     re.IGNORECASE | re.MULTILINE | re.DOTALL,
  98 )
  99
 100 HTML_TAG_ONLY_RE = re.compile(
 101     r"(<([a-z]+:)?[a-z]+[^>]*/?>|</([a-z]+:)?[a-z]+>|<!--.*-->|<!doctype.*>)",
 102     re.IGNORECASE | re.MULTILINE | re.DOTALL,
 103 )
 104
 105 SPACES_RE = re.compile(r"\s")
 106
 107 NO_LETTERS_OR_NUMBERS_RE = re.compile(
 108     r"[^\w\d]+|_+", re.IGNORECASE | re.UNICODE
 109 )
 110
 111 MARGIN_RE = re.compile(r"^[^\S\r\n]+")
 112
 113 ESCAPE_SEQUENCE_RE = re.compile(r"\e\[[^A-Za-z]*[A-Za-z]")
 114
 115 NUM_SUFFIXES = {
 116     "Pb": (1024 ** 5),
 117     "P": (1024 ** 5),
 118     "Tb": (1024 ** 4),
 119     "T": (1024 ** 4),
 120     "Gb": (1024 ** 3),
 121     "G": (1024 ** 3),
 122     "Mb": (1024 ** 2),
 123     "M": (1024 ** 2),
 124     "Kb": (1024 ** 1),
 125     "K": (1024 ** 1),
 126 }
 127
 128
 129 def is_none_or_empty(in_str: Optional[str]) -> bool:
 130     return in_str is None or len(in_str.strip()) == 0
 131
 132
 133 def is_string(obj: Any) -> bool:
 134     """
 135     Checks if an object is a string.
 136     """
 137     return isinstance(obj, str)
 138
 139
 140 def is_empty_string(in_str: Any) -> bool:
 141     return is_string(in_str) and in_str.strip() == ""
 142
 143
 144 def is_full_string(in_str: Any) -> bool:
 145     return is_string(in_str) and in_str.strip() != ""
 146
 147
 148 def is_number(in_str: str) -> bool:
 149     """
 150     Checks if a string is a valid number.
 151     """
 152     if not is_string(in_str):
 153         raise ValueError(in_str)
 154     return NUMBER_RE.match(in_str) is not None
 155
 156
 157 def is_integer_number(in_str: str) -> bool:
 158     """
 159     Checks whether the given string represents an integer or not.
 160
 161     An integer may be signed or unsigned or use a "scientific notation".
 162
 163     *Examples:*
 164
 165     >>> is_integer('42') # returns true
 166     >>> is_integer('42.0') # returns false
 167     """
 168     return (
 169         (is_number(in_str) and "." not in in_str) or
 170         is_hexidecimal_integer_number(in_str) or
 171         is_octal_integer_number(in_str) or
 172         is_binary_integer_number(in_str)
 173     )
 174
 175
 176 def is_hexidecimal_integer_number(in_str: str) -> bool:
 177     if not is_string(in_str):
 178         raise ValueError(in_str)
 179     return HEX_NUMBER_RE.match(in_str) is not None
 180
 181
 182 def is_octal_integer_number(in_str: str) -> bool:
 183     if not is_string(in_str):
 184         raise ValueError(in_str)
 185     return OCT_NUMBER_RE.match(in_str) is not None
 186
 187
 188 def is_binary_integer_number(in_str: str) -> bool:
 189     if not is_string(in_str):
 190         raise ValueError(in_str)
 191     return BIN_NUMBER_RE.match(in_str) is not None
 192
 193
 194 def to_int(in_str: str) -> int:
 195     if not is_string(in_str):
 196         raise ValueError(in_str)
 197     if is_binary_integer_number(in_str):
 198         return int(in_str, 2)
 199     if is_octal_integer_number(in_str):
 200         return int(in_str, 8)
 201     if is_hexidecimal_integer_number(in_str):
 202         return int(in_str, 16)
 203     return int(in_str)
 204
 205
 206 def is_decimal_number(in_str: str) -> bool:
 207     """
 208     Checks whether the given string represents a decimal or not.
 209
 210     A decimal may be signed or unsigned or use a "scientific notation".
 211
 212     >>> is_decimal('42.0') # returns true
 213     >>> is_decimal('42') # returns false
 214     """
 215     return is_number(in_str) and "." in in_str
 216
 217
 218 def strip_escape_sequences(in_str: str) -> str:
 219     in_str = ESCAPE_SEQUENCE_RE.sub("", in_str)
 220     return in_str
 221
 222
 223 # Full url example:
 224 # scheme://username:[email protected]:8042/folder/subfolder/file.extension?param=value&param2=value2#hash
 225 def is_url(in_str: Any, allowed_schemes: Optional[List[str]] = None) -> bool:
 226     """
 227     Check if a string is a valid url.
 228
 229     *Examples:*
 230
 231     >>> is_url('http://www.mysite.com') # returns true
 232     >>> is_url('https://mysite.com') # returns true
 233     >>> is_url('.mysite.com') # returns false
 234     """
 235     if not is_full_string(in_str):
 236         return False
 237
 238     valid = URL_RE.match(in_str) is not None
 239
 240     if allowed_schemes:
 241         return valid and any([in_str.startswith(s) for s in allowed_schemes])
 242     return valid
 243
 244
 245 def is_email(in_str: Any) -> bool:
 246     """
 247     Check if a string is a valid email.
 248
 249     Reference: https://tools.ietf.org/html/rfc3696#section-3
 250
 251     *Examples:*
 252
 253     >>> is_email('[email protected]') # returns true
 254     >>> is_email('@gmail.com') # returns false
 255     """
 256     if (
 257         not is_full_string(in_str)
 258         or len(in_str) > 320
 259         or in_str.startswith(".")
 260     ):
 261         return False
 262
 263     try:
 264         # we expect 2 tokens, one before "@" and one after, otherwise
 265         # we have an exception and the email is not valid.
 266         head, tail = in_str.split("@")
 267
 268         # head's size must be <= 64, tail <= 255, head must not start
 269         # with a dot or contain multiple consecutive dots.
 270         if (
 271             len(head) > 64
 272             or len(tail) > 255
 273             or head.endswith(".")
 274             or (".." in head)
 275         ):
 276             return False
 277
 278         # removes escaped spaces, so that later on the test regex will
 279         # accept the string.
 280         head = head.replace("\\ ", "")
 281         if head.startswith('"') and head.endswith('"'):
 282             head = head.replace(" ", "")[1:-1]
 283         return EMAIL_RE.match(head + "@" + tail) is not None
 284
 285     except ValueError:
 286         # borderline case in which we have multiple "@" signs but the
 287         # head part is correctly escaped.
 288         if ESCAPED_AT_SIGN.search(in_str) is not None:
 289             # replace "@" with "a" in the head
 290             return is_email(ESCAPED_AT_SIGN.sub("a", in_str))
 291         return False
 292
 293
 294 def suffix_string_to_number(in_str: str) -> Optional[int]:
 295     """Take a string like "33Gb" and convert it into a number (of bytes)
 296     like 34603008.  Return None if the input string is not valid.
 297     """
 298
 299     def suffix_capitalize(s: str) -> str:
 300         if len(s) == 1:
 301             return s.upper()
 302         elif len(s) == 2:
 303             return f"{s[0].upper()}{s[1].lower()}"
 304         return suffix_capitalize(s[0:1])
 305
 306     if is_string(in_str):
 307         if is_integer_number(in_str):
 308             return to_int(in_str)
 309         suffixes = [in_str[-2:], in_str[-1:]]
 310         rest = [in_str[:-2], in_str[:-1]]
 311         for x in range(len(suffixes)):
 312             s = suffixes[x]
 313             s = suffix_capitalize(s)
 314             multiplier = NUM_SUFFIXES.get(s, None)
 315             if multiplier is not None:
 316                 r = rest[x]
 317                 if is_integer_number(r):
 318                     return int(r) * multiplier
 319     return None
 320
 321
 322 def number_to_suffix_string(num: int) -> Optional[str]:
 323     """Take a number (of bytes) and returns a string like "43.8Gb".
 324     Returns none if the input is invalid.
 325     """
 326     d = 0.0
 327     suffix = None
 328     for (sfx, size) in NUM_SUFFIXES.items():
 329         if num > size:
 330             d = num / size
 331             suffix = sfx
 332             break
 333     if suffix is not None:
 334         return f"{d:.1f}{suffix}"
 335     return None
 336
 337
 338 def is_credit_card(in_str: Any, card_type: str = None) -> bool:
 339     """
 340     Checks if a string is a valid credit card number.
 341     If card type is provided then it checks against that specific type only,
 342     otherwise any known credit card number will be accepted.
 343
 344     Supported card types are the following:
 345
 346     - VISA
 347     - MASTERCARD
 348     - AMERICAN_EXPRESS
 349     - DINERS_CLUB
 350     - DISCOVER
 351     - JCB
 352     """
 353     if not is_full_string(in_str):
 354         return False
 355
 356     if card_type is not None:
 357         if card_type not in CREDIT_CARDS:
 358             raise KeyError(
 359                 f'Invalid card type "{card_type}". Valid types are: {CREDIT_CARDS.keys()}'
 360             )
 361         return CREDIT_CARDS[card_type].match(in_str) is not None
 362     for c in CREDIT_CARDS:
 363         if CREDIT_CARDS[c].match(in_str) is not None:
 364             return True
 365     return False
 366
 367
 368 def is_camel_case(in_str: Any) -> bool:
 369     """
 370     Checks if a string is formatted as camel case.
 371
 372     A string is considered camel case when:
 373
 374     - it's composed only by letters ([a-zA-Z]) and optionally numbers ([0-9])
 375     - it contains both lowercase and uppercase letters
 376     - it does not start with a number
 377     """
 378     return (
 379         is_full_string(in_str) and CAMEL_CASE_TEST_RE.match(in_str) is not None
 380     )
 381
 382
 383 def is_snake_case(in_str: Any, *, separator: str = "_") -> bool:
 384     """
 385     Checks if a string is formatted as "snake case".
 386
 387     A string is considered snake case when:
 388
 389     - it's composed only by lowercase/uppercase letters and digits
 390     - it contains at least one underscore (or provided separator)
 391     - it does not start with a number
 392     """
 393     if is_full_string(in_str):
 394         re_map = {"_": SNAKE_CASE_TEST_RE, "-": SNAKE_CASE_TEST_DASH_RE}
 395         re_template = (
 396             r"([a-z]+\d*{sign}[a-z\d{sign}]*|{sign}+[a-z\d]+[a-z\d{sign}]*)"
 397         )
 398         r = re_map.get(
 399             separator,
 400             re.compile(
 401                 re_template.format(sign=re.escape(separator)), re.IGNORECASE
 402             ),
 403         )
 404         return r.match(in_str) is not None
 405     return False
 406
 407
 408 def is_json(in_str: Any) -> bool:
 409     """
 410     Check if a string is a valid json.
 411
 412     *Examples:*
 413
 414     >>> is_json('{"name": "Peter"}') # returns true
 415     >>> is_json('[1, 2, 3]') # returns true
 416     >>> is_json('{nope}') # returns false
 417     """
 418     if is_full_string(in_str) and JSON_WRAPPER_RE.match(in_str) is not None:
 419         try:
 420             return isinstance(json.loads(in_str), (dict, list))
 421         except (TypeError, ValueError, OverflowError):
 422             pass
 423     return False
 424
 425
 426 def is_uuid(in_str: Any, allow_hex: bool = False) -> bool:
 427     """
 428     Check if a string is a valid UUID.
 429
 430     *Example:*
 431
 432     >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf') # returns true
 433     >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf') # returns false
 434     >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf', allow_hex=True) # returns true
 435     """
 436     # string casting is used to allow UUID itself as input data type
 437     s = str(in_str)
 438     if allow_hex:
 439         return UUID_HEX_OK_RE.match(s) is not None
 440     return UUID_RE.match(s) is not None
 441
 442
 443 def is_ip_v4(in_str: Any) -> bool:
 444     """
 445     Checks if a string is a valid ip v4.
 446
 447     *Examples:*
 448
 449     >>> is_ip_v4('255.200.100.75') # returns true
 450     >>> is_ip_v4('nope') # returns false (not an ip)
 451     >>> is_ip_v4('255.200.100.999') # returns false (999 is out of range)
 452     """
 453     if not is_full_string(in_str) or SHALLOW_IP_V4_RE.match(in_str) is None:
 454         return False
 455
 456     # checks that each entry in the ip is in the valid range (0 to 255)
 457     for token in in_str.split("."):
 458         if not 0 <= int(token) <= 255:
 459             return False
 460     return True
 461
 462
 463 def extract_ip_v4(in_str: Any) -> Optional[str]:
 464     """
 465     Extracts the IPv4 chunk of a string or None.
 466     """
 467     if not is_full_string(in_str):
 468         return None
 469     in_str.strip()
 470     m = SHALLOW_IP_V4_RE.match(in_str)
 471     if m is not None:
 472         return m.group(0)
 473     return None
 474
 475
 476 def is_ip_v6(in_str: Any) -> bool:
 477     """
 478     Checks if a string is a valid ip v6.
 479
 480     *Examples:*
 481
 482     >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
 483     >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?') # returns false (invalid "?")
 484     """
 485     return is_full_string(in_str) and IP_V6_RE.match(in_str) is not None
 486
 487
 488 def extract_ip_v6(in_str: Any) -> Optional[str]:
 489     """
 490     Extract IPv6 chunk or None.
 491     """
 492     if not is_full_string(in_str):
 493         return None
 494     in_str.strip()
 495     m = IP_V6_RE.match(in_str)
 496     if m is not None:
 497         return m.group(0)
 498     return None
 499
 500
 501 def is_ip(in_str: Any) -> bool:
 502     """
 503     Checks if a string is a valid ip (either v4 or v6).
 504
 505     *Examples:*
 506
 507     >>> is_ip('255.200.100.75') # returns true
 508     >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334') # returns true
 509     >>> is_ip('1.2.3') # returns false
 510     """
 511     return is_ip_v6(in_str) or is_ip_v4(in_str)
 512
 513
 514 def extract_ip(in_str: Any) -> Optional[str]:
 515     """Extract the IP address or None."""
 516     ip = extract_ip_v4(in_str)
 517     if ip is None:
 518         ip = extract_ip_v6(in_str)
 519     return ip
 520
 521
 522 def is_mac_address(in_str: Any) -> bool:
 523     """Return True if in_str is a valid MAC address false otherwise."""
 524     return is_full_string(in_str) and MAC_ADDRESS_RE.match(in_str) is not None
 525
 526
 527 def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]:
 528     """Extract the MAC address from in_str"""
 529     if not is_full_string(in_str):
 530         return None
 531     in_str.strip()
 532     m = MAC_ADDRESS_RE.match(in_str)
 533     if m is not None:
 534         mac = m.group(0)
 535         mac.replace(":", separator)
 536         mac.replace("-", separator)
 537         return mac
 538     return None
 539
 540
 541 def is_slug(in_str: Any, separator: str = "-") -> bool:
 542     """
 543     Checks if a given string is a slug (as created by `slugify()`).
 544
 545     *Examples:*
 546
 547     >>> is_slug('my-blog-post-title') # returns true
 548     >>> is_slug('My blog post title') # returns false
 549
 550     :param in_str: String to check.
 551     :type in_str: str
 552     :param separator: Join sign used by the slug.
 553     :type separator: str
 554     :return: True if slug, false otherwise.
 555     """
 556     if not is_full_string(in_str):
 557         return False
 558     rex = r"^([a-z\d]+" + re.escape(separator) + r"*?)*[a-z\d]$"
 559     return re.match(rex, in_str) is not None
 560
 561
 562 def contains_html(in_str: str) -> bool:
 563     """
 564     Checks if the given string contains HTML/XML tags.
 565
 566     By design, this function matches ANY type of tag, so don't expect to use it
 567     as an HTML validator, its goal is to detect "malicious" or undesired tags in the text.
 568
 569     *Examples:*
 570
 571     >>> contains_html('my string is <strong>bold</strong>') # returns true
 572     >>> contains_html('my string is not bold') # returns false
 573     """
 574     if not is_string(in_str):
 575         raise ValueError(in_str)
 576     return HTML_RE.search(in_str) is not None
 577
 578
 579 def words_count(in_str: str) -> int:
 580     """
 581     Returns the number of words contained into the given string.
 582
 583     This method is smart, it does consider only sequence of one or more letter and/or numbers
 584     as "words", so a string like this: "! @ # % ... []" will return zero!
 585     Moreover it is aware of punctuation, so the count for a string like "one,two,three.stop"
 586     will be 4 not 1 (even if there are no spaces in the string).
 587
 588     *Examples:*
 589
 590     >>> words_count('hello world') # returns 2
 591     >>> words_count('one,two,three.stop') # returns 4
 592     """
 593     if not is_string(in_str):
 594         raise ValueError(in_str)
 595     return len(WORDS_COUNT_RE.findall(in_str))
 596
 597
 598 def generate_uuid(as_hex: bool = False) -> str:
 599     """
 600     Generated an UUID string (using `uuid.uuid4()`).
 601
 602     *Examples:*
 603
 604     >>> uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b'
 605     >>> uuid(as_hex=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b'
 606     """
 607     uid = uuid4()
 608     if as_hex:
 609         return uid.hex
 610     return str(uid)
 611
 612
 613 def generate_random_alphanumeric_string(size: int) -> str:
 614     """
 615     Returns a string of the specified size containing random
 616     characters (uppercase/lowercase ascii letters and digits).
 617
 618     *Example:*
 619
 620     >>> random_string(9) # possible output: "cx3QQbzYg"
 621     """
 622     if size < 1:
 623         raise ValueError("size must be >= 1")
 624     chars = string.ascii_letters + string.digits
 625     buffer = [random.choice(chars) for _ in range(size)]
 626     return from_char_list(buffer)
 627
 628
 629 def reverse(in_str: str) -> str:
 630     """
 631     Returns the string with its chars reversed.
 632     """
 633     if not is_string(in_str):
 634         raise ValueError(in_str)
 635     return in_str[::-1]
 636
 637
 638 def camel_case_to_snake_case(in_str, *, separator="_"):
 639     """
 640     Convert a camel case string into a snake case one.
 641     (The original string is returned if is not a valid camel case string)
 642     """
 643     if not is_string(in_str):
 644         raise ValueError(in_str)
 645     if not is_camel_case(in_str):
 646         return in_str
 647     return CAMEL_CASE_REPLACE_RE.sub(
 648         lambda m: m.group(1) + separator, in_str
 649     ).lower()
 650
 651
 652 def snake_case_to_camel_case(
 653     in_str: str, *, upper_case_first: bool = True, separator: str = "_"
 654 ) -> str:
 655     """
 656     Convert a snake case string into a camel case one.
 657     (The original string is returned if is not a valid snake case string)
 658     """
 659     if not is_string(in_str):
 660         raise ValueError(in_str)
 661     if not is_snake_case(in_str, separator=separator):
 662         return in_str
 663     tokens = [s.title() for s in in_str.split(separator) if is_full_string(s)]
 664     if not upper_case_first:
 665         tokens[0] = tokens[0].lower()
 666     return from_char_list(tokens)
 667
 668
 669 def to_char_list(in_str: str) -> List[str]:
 670     if not is_string(in_str):
 671         return []
 672     return list(in_str)
 673
 674
 675 def from_char_list(in_list: List[str]) -> str:
 676     return "".join(in_list)
 677
 678
 679 def shuffle(in_str: str) -> str:
 680     """Return a new string containing same chars of the given one but in
 681     a randomized order.
 682     """
 683     if not is_string(in_str):
 684         raise ValueError(in_str)
 685
 686     # turn the string into a list of chars
 687     chars = to_char_list(in_str)
 688     random.shuffle(chars)
 689     return from_char_list(chars)
 690
 691
 692 def strip_html(in_str: str, keep_tag_content: bool = False) -> str:
 693     """
 694     Remove html code contained into the given string.
 695
 696     *Examples:*
 697
 698     >>> strip_html('test: <a href="foo/bar">click here</a>') # returns 'test: '
 699     >>> strip_html('test: <a href="foo/bar">click here</a>', keep_tag_content=True) # returns 'test: click here'
 700     """
 701     if not is_string(in_str):
 702         raise ValueError(in_str)
 703     r = HTML_TAG_ONLY_RE if keep_tag_content else HTML_RE
 704     return r.sub("", in_str)
 705
 706
 707 def asciify(in_str: str) -> str:
 708     """
 709     Force string content to be ascii-only by translating all non-ascii chars into the closest possible representation
 710     (eg: ó -> o, Ë -> E, ç -> c...).
 711
 712     **Bear in mind**: Some chars may be lost if impossible to translate.
 713
 714     *Example:*
 715
 716     >>> asciify('èéùúòóäåëýñÅÀÁÇÌÍÑÓË') # returns 'eeuuooaaeynAAACIINOE'
 717     """
 718     if not is_string(in_str):
 719         raise ValueError(in_str)
 720
 721     # "NFKD" is the algorithm which is able to successfully translate
 722     # the most of non-ascii chars.
 723     normalized = unicodedata.normalize("NFKD", in_str)
 724
 725     # encode string forcing ascii and ignore any errors
 726     # (unrepresentable chars will be stripped out)
 727     ascii_bytes = normalized.encode("ascii", "ignore")
 728
 729     # turns encoded bytes into an utf-8 string
 730     return ascii_bytes.decode("utf-8")
 731
 732
 733 def slugify(in_str: str, *, separator: str = "-") -> str:
 734     """
 735     Converts a string into a "slug" using provided separator.
 736     The returned string has the following properties:
 737
 738     - it has no spaces
 739     - all letters are in lower case
 740     - all punctuation signs and non alphanumeric chars are removed
 741     - words are divided using provided separator
 742     - all chars are encoded as ascii (by using `asciify()`)
 743     - is safe for URL
 744
 745     *Examples:*
 746
 747     >>> slugify('Top 10 Reasons To Love Dogs!!!') # returns: 'top-10-reasons-to-love-dogs'
 748     >>> slugify('Mönstér Mägnët') # returns 'monster-magnet'
 749     """
 750     if not is_string(in_str):
 751         raise ValueError(in_str)
 752
 753     # replace any character that is NOT letter or number with spaces
 754     out = NO_LETTERS_OR_NUMBERS_RE.sub(" ", in_str.lower()).strip()
 755
 756     # replace spaces with join sign
 757     out = SPACES_RE.sub(separator, out)
 758
 759     # normalize joins (remove duplicates)
 760     out = re.sub(re.escape(separator) + r"+", separator, out)
 761     return asciify(out)
 762
 763
 764 def to_bool(in_str: str) -> bool:
 765     """
 766     Turns a string into a boolean based on its content (CASE INSENSITIVE).
 767
 768     A positive boolean (True) is returned if the string value is one of the following:
 769
 770     - "true"
 771     - "1"
 772     - "yes"
 773     - "y"
 774
 775     Otherwise False is returned.
 776     """
 777     if not is_string(in_str):
 778         raise ValueError(in_str)
 779     return in_str.lower() in ("true", "1", "yes", "y", "t")
 780
 781
 782 def dedent(in_str: str) -> str:
 783     """
 784     Removes tab indentation from multi line strings (inspired by analogous Scala function).
 785
 786     *Example:*
 787
 788     >>> strip_margin('''
 789     >>>                 line 1
 790     >>>                 line 2
 791     >>>                 line 3
 792     >>> ''')
 793     >>> # returns:
 794     >>> '''
 795     >>> line 1
 796     >>> line 2
 797     >>> line 3
 798     >>> '''
 799     """
 800     if not is_string(in_str):
 801         raise ValueError(in_str)
 802     line_separator = '\n'
 803     lines = [MARGIN_RE.sub('', line) for line in in_str.split(line_separator)]
 804     return line_separator.join(lines)
 805
 806
 807 def indent(in_str: str, amount: int) -> str:
 808     if not is_string(in_str):
 809         raise ValueError(in_str)
 810     line_separator = '\n'
 811     lines = [" " * amount + line for line in in_str.split(line_separator)]
 812     return line_separator.join(lines)
 813
 814
 815 def sprintf(*args, **kwargs) -> str:
 816     ret = ""
 817
 818     sep = kwargs.pop("sep", None)
 819     if sep is not None:
 820         if not isinstance(sep, str):
 821             raise TypeError("sep must be None or a string")
 822
 823     end = kwargs.pop("end", None)
 824     if end is not None:
 825         if not isinstance(end, str):
 826             raise TypeError("end must be None or a string")
 827
 828     if kwargs:
 829         raise TypeError("invalid keyword arguments to sprint()")
 830
 831     if sep is None:
 832         sep = " "
 833     if end is None:
 834         end = "\n"
 835     for i, arg in enumerate(args):
 836         if i:
 837             ret += sep
 838         if isinstance(arg, str):
 839             ret += arg
 840         else:
 841             ret += str(arg)
 842     ret += end
 843     return ret