X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=src%2Fpyutils%2Fstring_utils.py;h=ae75ed07ed2a9d726818f42b1ab87939a1ccac81;hb=1d839a6f02bf3340daea6b1c617eba0cd9e8cefb;hp=f82ec4b5e7887ff9a22131de5ab708f7ce8fdbb0;hpb=8862fd84dbe1146aa0b4cd2fd3a90f374a6d6246;p=pyutils.git diff --git a/src/pyutils/string_utils.py b/src/pyutils/string_utils.py index f82ec4b..ae75ed0 100644 --- a/src/pyutils/string_utils.py +++ b/src/pyutils/string_utils.py @@ -25,9 +25,12 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -This class is based on: https://github.com/daveoncode/python-string-utils. -See NOTICE in the root of this module for a detailed enumeration of what -work is Davide's and what work was added by Scott. +This class is based on: +https://github.com/daveoncode/python-string-utils. See `NOTICE +`__ +in the root of this module for a detailed enumeration of what work is +Davide's and what work was added by Scott. + """ import base64 @@ -80,9 +83,9 @@ URLS_RAW_STRING = ( r"(#\S*)?" # hash ) -URL_RE = re.compile(r"^{}$".format(URLS_RAW_STRING), re.IGNORECASE) +URL_RE = re.compile(rf"^{URLS_RAW_STRING}$", re.IGNORECASE) -URLS_RE = re.compile(r"({})".format(URLS_RAW_STRING), re.IGNORECASE) +URLS_RE = re.compile(rf"({URLS_RAW_STRING})", re.IGNORECASE) ESCAPED_AT_SIGN = re.compile(r'(?!"[^"]*)@+(?=[^"]*")|\\@') @@ -90,9 +93,9 @@ EMAILS_RAW_STRING = ( r"[a-zA-Z\d._\+\-'`!%#$&*/=\?\^\{\}\|~\\]+@[a-z\d-]+\.?[a-z\d-]+\.[a-z]{2,4}" ) -EMAIL_RE = re.compile(r"^{}$".format(EMAILS_RAW_STRING)) +EMAIL_RE = re.compile(rf"^{EMAILS_RAW_STRING}$") -EMAILS_RE = re.compile(r"({})".format(EMAILS_RAW_STRING)) +EMAILS_RE = re.compile(rf"({EMAILS_RAW_STRING})") CAMEL_CASE_TEST_RE = re.compile(r"^[a-zA-Z]*([a-z]+[A-Z]+|[A-Z]+[a-z]+)[a-zA-Z\d]*$") @@ -162,7 +165,7 @@ NO_LETTERS_OR_NUMBERS_RE = re.compile(r"[^\w\d]+|_+", re.IGNORECASE | re.UNICODE MARGIN_RE = re.compile(r"^[^\S\r\n]+") -ESCAPE_SEQUENCE_RE = re.compile(r"\[[^A-Za-z]*[A-Za-z]") +ESCAPE_SEQUENCE_RE = re.compile(r"\x1B\[[^A-Za-z]*[A-Za-z]") NUM_SUFFIXES = { "Pb": (1024**5), @@ -213,7 +216,14 @@ TENS_WORDS = [ "ninety", ] -scales = ["hundred", "thousand", "million", "billion", "trillion", "quadrillion"] +MAGNITUDE_SCALES = [ + "hundred", + "thousand", + "million", + "billion", + "trillion", + "quadrillion", +] NUM_WORDS = {} NUM_WORDS["and"] = (1, 0) @@ -221,7 +231,7 @@ for i, word in enumerate(UNIT_WORDS): NUM_WORDS[word] = (1, i) for i, word in enumerate(TENS_WORDS): NUM_WORDS[word] = (1, i * 10) -for i, word in enumerate(scales): +for i, word in enumerate(MAGNITUDE_SCALES): if i == 0: NUM_WORDS[word] = (100, 0) else: @@ -238,6 +248,8 @@ def is_none_or_empty(in_str: Optional[str]) -> bool: True if the input string is either None or an empty string, False otherwise. + See also :meth:`is_string` and :meth:`is_empty_string`. + >>> is_none_or_empty("") True >>> is_none_or_empty(None) @@ -250,7 +262,7 @@ def is_none_or_empty(in_str: Optional[str]) -> bool: return in_str is None or len(in_str.strip()) == 0 -def is_string(obj: Any) -> bool: +def is_string(in_str: Any) -> bool: """ Args: in_str: the object to test @@ -258,6 +270,8 @@ def is_string(obj: Any) -> bool: Returns: True if the object is a string and False otherwise. + See also :meth:`is_empty_string`, :meth:`is_none_or_empty`. + >>> is_string('test') True >>> is_string(123) @@ -267,7 +281,7 @@ def is_string(obj: Any) -> bool: >>> is_string([1, 2, 3]) False """ - return isinstance(obj, str) + return isinstance(in_str, str) def is_empty_string(in_str: Any) -> bool: @@ -277,6 +291,8 @@ def is_empty_string(in_str: Any) -> bool: Returns: True if the string is empty and False otherwise. + + See also :meth:`is_none_or_empty`, :meth:`is_full_string`. """ return is_empty(in_str) @@ -289,6 +305,8 @@ def is_empty(in_str: Any) -> bool: Returns: True if the string is empty and false otherwise. + See also :meth:`is_none_or_empty`, :meth:`is_full_string`. + >>> is_empty('') True >>> is_empty(' \t\t ') @@ -312,6 +330,8 @@ def is_full_string(in_str: Any) -> bool: True if the object is a string and is not empty ('') and is not only composed of whitespace. + See also :meth:`is_string`, :meth:`is_empty_string`, :meth:`is_none_or_empty`. + >>> is_full_string('test!') True >>> is_full_string('') @@ -335,6 +355,10 @@ def is_number(in_str: str) -> bool: True if the string contains a valid numberic value and False otherwise. + See also :meth:`is_integer_number`, :meth:`is_decimal_number`, + :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`, + etc... + >>> is_number(100.5) Traceback (most recent call last): ... @@ -365,6 +389,10 @@ def is_integer_number(in_str: str) -> bool: decimal, hex, or octal, regular or scientific) integral expression and False otherwise. + See also :meth:`is_number`, :meth:`is_decimal_number`, + :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`, + etc... + >>> is_integer_number('42') True >>> is_integer_number('42.0') @@ -386,6 +414,9 @@ def is_hexidecimal_integer_number(in_str: str) -> bool: Returns: True if the string is a hex integer number and False otherwise. + See also :meth:`is_integer_number`, :meth:`is_decimal_number`, + :meth:`is_octal_integer_number`, :meth:`is_binary_integer_number`, etc... + >>> is_hexidecimal_integer_number('0x12345') True >>> is_hexidecimal_integer_number('0x1A3E') @@ -422,6 +453,10 @@ def is_octal_integer_number(in_str: str) -> bool: Returns: True if the string is a valid octal integral number and False otherwise. + See also :meth:`is_integer_number`, :meth:`is_decimal_number`, + :meth:`is_hexidecimal_integer_number`, :meth:`is_binary_integer_number`, + etc... + >>> is_octal_integer_number('0o777') True >>> is_octal_integer_number('-0O115') @@ -446,6 +481,10 @@ def is_binary_integer_number(in_str: str) -> bool: Returns: True if the string contains a binary integral number and False otherwise. + See also :meth:`is_integer_number`, :meth:`is_decimal_number`, + :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`, + etc... + >>> is_binary_integer_number('0b10111') True >>> is_binary_integer_number('-0b111') @@ -472,8 +511,18 @@ def to_int(in_str: str) -> int: Returns: The integral value of the string or raises on error. + See also :meth:`is_integer_number`, :meth:`is_decimal_number`, + :meth:`is_hexidecimal_integer_number`, :meth:`is_octal_integer_number`, + :meth:`is_binary_integer_number`, etc... + >>> to_int('1234') 1234 + >>> to_int('0x1234') + 4660 + >>> to_int('0b01101') + 13 + >>> to_int('0o777') + 511 >>> to_int('test') Traceback (most recent call last): ... @@ -493,6 +542,18 @@ def to_int(in_str: str) -> int: def number_string_to_integer(in_str: str) -> int: """Convert a string containing a written-out number into an int. + Args: + in_str: the string containing the long-hand written out integer number + in English. See examples below. + + Returns: + The integer whose value was parsed from in_str. + + See also :meth:`integer_to_number_string`. + + .. warning:: + This code only handles integers; it will not work with decimals / floats. + >>> number_string_to_integer("one hundred fifty two") 152 @@ -507,19 +568,19 @@ def number_string_to_integer(in_str: str) -> int: ... ValueError: Unknown word: xyzzy """ - if type(in_str) == int: - return in_str + if isinstance(in_str, int): + return int(in_str) current = result = 0 in_str = in_str.replace('-', ' ') - for word in in_str.split(): - if word not in NUM_WORDS: - if is_integer_number(word): - current += int(word) + for w in in_str.split(): + if w not in NUM_WORDS: + if is_integer_number(w): + current += int(w) continue else: - raise ValueError("Unknown word: " + word) - scale, increment = NUM_WORDS[word] + raise ValueError("Unknown word: " + w) + scale, increment = NUM_WORDS[w] current = current * scale + increment if scale > 100: result += current @@ -529,8 +590,19 @@ def number_string_to_integer(in_str: str) -> int: def integer_to_number_string(num: int) -> str: """ - Opposite of number_string_to_integer; convert a number to a written out - longhand format. + Opposite of :meth:`number_string_to_integer`; converts a number to a written out + longhand format in English. + + Args: + num: the integer number to convert + + Returns: + The long-hand written out English form of the number. See examples below. + + See also :meth:`number_string_to_integer`. + + .. warning:: + This method does not handle decimals or floats, only ints. >>> integer_to_number_string(9) 'nine' @@ -540,7 +612,6 @@ def integer_to_number_string(num: int) -> str: >>> integer_to_number_string(123219982) 'one hundred twenty three million two hundred nineteen thousand nine hundred eighty two' - """ if num < 20: @@ -583,6 +654,8 @@ def is_decimal_number(in_str: str) -> bool: otherwise. A decimal may be signed or unsigned or use a "scientific notation". + See also :meth:`is_integer_number`. + .. note:: We do not consider integers without a decimal point to be decimals; they return False (see example). @@ -603,19 +676,23 @@ def strip_escape_sequences(in_str: str) -> str: Returns: in_str with escape sequences removed. + See also: :mod:`pyutils.ansi`. + .. note:: What is considered to be an "escape sequence" is defined by a regular expression. While this gets common ones, there may exist valid sequences that it doesn't match. - >>> strip_escape_sequences('this is a test!') + >>> strip_escape_sequences('\x1B[12;11;22mthis is a test!') 'this is a test!' """ in_str = ESCAPE_SEQUENCE_RE.sub("", in_str) return in_str -def add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str: +def add_thousands_separator( + in_str: str, *, separator_char: str = ',', places: int = 3 +) -> str: """ Args: in_str: string or number to which to add thousands separator(s) @@ -647,6 +724,7 @@ def add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str def _add_thousands_separator(in_str: str, *, separator_char=',', places=3) -> str: + """Internal helper""" decimal_part = "" if '.' in in_str: (in_str, decimal_part) = in_str.split('.') @@ -743,6 +821,8 @@ def suffix_string_to_number(in_str: str) -> Optional[int]: Returns: An integer number of bytes or None to indicate an error. + See also :meth:`number_to_suffix_string`. + >>> suffix_string_to_number('1Mb') 1048576 >>> suffix_string_to_number('13.1Gb') @@ -784,6 +864,8 @@ def number_to_suffix_string(num: int) -> Optional[str]: A string with a suffix representing num bytes concisely or None to indicate an error. + See also: :meth:`suffix_string_to_number`. + >>> number_to_suffix_string(14066017894) '13.1Gb' >>> number_to_suffix_string(1024 * 1024) @@ -821,6 +903,13 @@ def is_credit_card(in_str: Any, card_type: str = None) -> bool: Returns: True if in_str is a valid credit card number. + + .. warning:: + This code is not verifying the authenticity of the credit card (i.e. + not checking whether it's a real card that can be charged); rather + it's only checking that the number follows the "rules" for numbering + established by credit card issuers. + """ if not is_full_string(in_str): return False @@ -849,6 +938,8 @@ def is_camel_case(in_str: Any) -> bool: * it's composed only by letters ([a-zA-Z]) and optionally numbers ([0-9]) * it contains both lowercase and uppercase letters * it does not start with a number + + See also :meth:`is_snake_case`, :meth:`is_slug`, and :meth:`camel_case_to_snake_case`. """ return is_full_string(in_str) and CAMEL_CASE_TEST_RE.match(in_str) is not None @@ -857,6 +948,7 @@ def is_snake_case(in_str: Any, *, separator: str = "_") -> bool: """ Args: in_str: the string to test + separator: the snake case separator character to use Returns: True if the string is snake case and False otherwise. A string is considered snake case when: @@ -865,6 +957,8 @@ def is_snake_case(in_str: Any, *, separator: str = "_") -> bool: * it contains at least one underscore (or provided separator) * it does not start with a number + See also :meth:`is_camel_case`, :meth:`is_slug`, and :meth:`snake_case_to_camel_case`. + >>> is_snake_case('this_is_a_test') True >>> is_snake_case('___This_Is_A_Test_1_2_3___') @@ -912,10 +1006,13 @@ def is_uuid(in_str: Any, allow_hex: bool = False) -> bool: """ Args: in_str: the string to test + allow_hex: should we allow hexidecimal digits in valid uuids? Returns: True if the in_str contains a valid UUID and False otherwise. + See also :meth:`generate_uuid`. + >>> is_uuid('6f8aa2f9-686c-4ac3-8766-5712354a04cf') True >>> is_uuid('6f8aa2f9686c4ac387665712354a04cf') @@ -938,6 +1035,9 @@ def is_ip_v4(in_str: Any) -> bool: Returns: True if in_str contains a valid IPv4 address and False otherwise. + See also :meth:`extract_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`, + and :meth:`is_ip`. + >>> is_ip_v4('255.200.100.75') True >>> is_ip_v4('nope') @@ -964,6 +1064,9 @@ def extract_ip_v4(in_str: Any) -> Optional[str]: The first extracted IPv4 address from in_str or None if none were found or an error occurred. + See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`, + and :meth:`is_ip`. + >>> extract_ip_v4(' The secret IP address: 127.0.0.1 (use it wisely) ') '127.0.0.1' >>> extract_ip_v4('Your mom dresses you funny.') @@ -984,6 +1087,9 @@ def is_ip_v6(in_str: Any) -> bool: Returns: True if in_str contains a valid IPv6 address and False otherwise. + See also :meth:`is_ip_v4`, :meth:`extract_ip_v4`, :meth:`extract_ip_v6`, + and :meth:`is_ip`. + >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:7334') True >>> is_ip_v6('2001:db8:85a3:0000:0000:8a2e:370:?') # invalid "?" @@ -1001,6 +1107,9 @@ def extract_ip_v6(in_str: Any) -> Optional[str]: The first IPv6 address found in in_str or None if no address was found or an error occurred. + See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v4`, + and :meth:`is_ip`. + >>> extract_ip_v6('IP: 2001:db8:85a3:0000:0000:8a2e:370:7334') '2001:db8:85a3:0000:0000:8a2e:370:7334' >>> extract_ip_v6("(and she's ugly too, btw)") @@ -1022,6 +1131,9 @@ def is_ip(in_str: Any) -> bool: True if in_str contains a valid IP address (either IPv4 or IPv6). + See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`, + and :meth:`extract_ip_v4`. + >>> is_ip('255.200.100.75') True >>> is_ip('2001:db8:85a3:0000:0000:8a2e:370:7334') @@ -1043,6 +1155,9 @@ def extract_ip(in_str: Any) -> Optional[str]: The first IP address (IPv4 or IPv6) found in in_str or None to indicate none found or an error condition. + See also :meth:`is_ip_v4`, :meth:`is_ip_v6`, :meth:`extract_ip_v6`, + and :meth:`extract_ip_v4`. + >>> extract_ip('Attacker: 255.200.100.75') '255.200.100.75' >>> extract_ip('Remote host: 2001:db8:85a3:0000:0000:8a2e:370:7334') @@ -1063,6 +1178,8 @@ def is_mac_address(in_str: Any) -> bool: Returns: True if in_str is a valid MAC address False otherwise. + See also :meth:`extract_mac_address`, :meth:`is_ip`, etc... + >>> is_mac_address("34:29:8F:12:0D:2F") True >>> is_mac_address('34:29:8f:12:0d:2f') @@ -1084,6 +1201,8 @@ def extract_mac_address(in_str: Any, *, separator: str = ":") -> Optional[str]: The first MAC address found in in_str or None to indicate no match or an error. + See also :meth:`is_mac_address`, :meth:`is_ip`, and :meth:`extract_ip`. + >>> extract_mac_address(' MAC Address: 34:29:8F:12:0D:2F') '34:29:8F:12:0D:2F' @@ -1106,10 +1225,13 @@ def is_slug(in_str: Any, separator: str = "-") -> bool: """ Args: in_str: string to test + separator: the slug character to use Returns: True if in_str is a slug string and False otherwise. + See also :meth:`is_camel_case`, :meth:`is_snake_case`, and :meth:`slugify`. + >>> is_slug('my-blog-post-title') True >>> is_slug('My blog post title') @@ -1130,6 +1252,8 @@ def contains_html(in_str: str) -> bool: True if the given string contains HTML/XML tags and False otherwise. + See also :meth:`strip_html`. + .. warning:: By design, this function matches ANY type of tag, so don't expect to use it as an HTML validator. It's a quick sanity check at @@ -1156,7 +1280,6 @@ def words_count(in_str: str) -> int: The number of words contained in the given string. .. note:: - This method is "smart" in that it does consider only sequences of one or more letter and/or numbers to be "words". Thus a string like this: "! @ # % ... []" will return zero. Moreover @@ -1183,7 +1306,6 @@ def word_count(in_str: str) -> int: The number of words contained in the given string. .. note:: - This method is "smart" in that it does consider only sequences of one or more letter and/or numbers to be "words". Thus a string like this: "! @ # % ... []" will return zero. Moreover @@ -1208,6 +1330,8 @@ def generate_uuid(omit_dashes: bool = False) -> str: A generated UUID string (using `uuid.uuid4()`) with or without dashes per the omit_dashes arg. + See also :meth:`is_uuid`, :meth:`generate_random_alphanumeric_string`. + generate_uuid() # possible output: '97e3a716-6b33-4ab9-9bb1-8128cb24d76b' generate_uuid(omit_dashes=True) # possible output: '97e3a7166b334ab99bb18128cb24d76b' """ @@ -1226,6 +1350,8 @@ def generate_random_alphanumeric_string(size: int) -> str: A string of the specified size containing random characters (uppercase/lowercase ascii letters and digits). + See also :meth:`asciify`, :meth:`generate_uuid`. + >>> random.seed(22) >>> generate_random_alphanumeric_string(9) '96ipbNClS' @@ -1253,16 +1379,19 @@ def reverse(in_str: str) -> str: return in_str[::-1] -def camel_case_to_snake_case(in_str, *, separator="_"): +def camel_case_to_snake_case(in_str: str, *, separator: str = "_"): """ Args: in_str: the camel case string to convert + separator: the snake case separator character to use Returns: A snake case string equivalent to the camel case input or the original string if it is not a valid camel case string or some other error occurs. + See also :meth:`is_camel_case`, :meth:`is_snake_case`, and :meth:`is_slug`. + >>> camel_case_to_snake_case('MacAddressExtractorFactory') 'mac_address_extractor_factory' >>> camel_case_to_snake_case('Luke Skywalker') @@ -1281,12 +1410,16 @@ def snake_case_to_camel_case( """ Args: in_str: the snake case string to convert + upper_case_first: should we capitalize the first letter? + separator: the separator character to use Returns: A camel case string that is equivalent to the snake case string provided or the original string back again if it is not valid snake case or another error occurs. + See also :meth:`is_camel_case`, :meth:`is_snake_case`, and :meth:`is_slug`. + >>> snake_case_to_camel_case('this_is_a_test') 'ThisIsATest' >>> snake_case_to_camel_case('Han Solo') @@ -1310,6 +1443,8 @@ def to_char_list(in_str: str) -> List[str]: Returns: A list of strings of length one each. + See also :meth:`from_char_list`. + >>> to_char_list('test') ['t', 'e', 's', 't'] """ @@ -1327,6 +1462,8 @@ def from_char_list(in_list: List[str]) -> str: The string resulting from gluing the characters in in_list together. + See also :meth:`to_char_list`. + >>> from_char_list(['t', 'e', 's', 't']) 'test' """ @@ -1366,6 +1503,8 @@ def scramble(in_str: str) -> Optional[str]: in the same original string as no check is done. Returns None to indicate error conditions. + See also :mod:`pyutils.unscrambler`. + >>> random.seed(22) >>> scramble('awesome') 'meosaew' @@ -1383,6 +1522,8 @@ def strip_html(in_str: str, keep_tag_content: bool = False) -> str: A string with all HTML tags removed (optionally with tag contents preserved). + See also :meth:`contains_html`. + .. note:: This method uses simple regular expressions to strip tags and is not a full fledged HTML parser by any means. Consider using @@ -1411,6 +1552,8 @@ def asciify(in_str: str) -> str: by translating all non-ascii chars into their closest possible ASCII representation (eg: ó -> o, Ë -> E, ç -> c...). + See also :meth:`to_ascii`, :meth:`generate_random_alphanumeric_string`. + .. warning:: Some chars may be lost if impossible to translate. @@ -1449,6 +1592,8 @@ def slugify(in_str: str, *, separator: str = "-") -> str: * all chars are encoded as ascii (by using :meth:`asciify`) * is safe for URL + See also :meth:`is_slug` and :meth:`asciify`. + >>> slugify('Top 10 Reasons To Love Dogs!!!') 'top-10-reasons-to-love-dogs' >>> slugify('Mönstér Mägnët') @@ -1487,6 +1632,8 @@ def to_bool(in_str: str) -> bool: Otherwise False is returned. + See also :mod:`pyutils.argparse_utils`. + >>> to_bool('True') True @@ -1507,7 +1654,7 @@ def to_bool(in_str: str) -> bool: """ if not is_string(in_str): raise ValueError(in_str) - return in_str.lower() in ("true", "1", "yes", "y", "t", "on") + return in_str.lower() in set(["true", "1", "yes", "y", "t", "on"]) def to_date(in_str: str) -> Optional[datetime.date]: @@ -1518,13 +1665,16 @@ def to_date(in_str: str) -> Optional[datetime.date]: Returns: The datetime.date the string contained or None to indicate an error. This parser is relatively clever; see - :class:`datetimez.dateparse_utils` docs for details. + :class:`datetimes.dateparse_utils` docs for details. + + See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`extract_date`, + :meth:`is_valid_date`, :meth:`to_datetime`, :meth:`valid_datetime`. >>> to_date('9/11/2001') datetime.date(2001, 9, 11) >>> to_date('xyzzy') """ - import pyutils.datetimez.dateparse_utils as du + import pyutils.datetimes.dateparse_utils as du try: d = du.DateParser() # type: ignore @@ -1545,6 +1695,9 @@ def extract_date(in_str: Any) -> Optional[datetime.datetime]: Returns: a datetime if date was found, otherwise None + See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`, + :meth:`is_valid_date`, :meth:`to_datetime`, :meth:`valid_datetime`. + >>> extract_date("filename.txt dec 13, 2022") datetime.datetime(2022, 12, 13, 0, 0) @@ -1553,7 +1706,7 @@ def extract_date(in_str: Any) -> Optional[datetime.datetime]: """ import itertools - import pyutils.datetimez.dateparse_utils as du + import pyutils.datetimes.dateparse_utils as du d = du.DateParser() # type: ignore chunks = in_str.split() @@ -1565,7 +1718,7 @@ def extract_date(in_str: Any) -> Optional[datetime.datetime]: ): try: expr = " ".join(ngram) - logger.debug(f"Trying {expr}") + logger.debug("Trying %s", expr) if d.parse(expr): return d.get_datetime() except du.ParseException: # type: ignore @@ -1581,7 +1734,10 @@ def is_valid_date(in_str: str) -> bool: Returns: True if the string represents a valid date that we can recognize and False otherwise. This parser is relatively clever; see - :class:`datetimez.dateparse_utils` docs for details. + :class:`datetimes.dateparse_utils` docs for details. + + See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`, + :meth:`extract_date`, :meth:`to_datetime`, :meth:`valid_datetime`. >>> is_valid_date('1/2/2022') True @@ -1592,7 +1748,7 @@ def is_valid_date(in_str: str) -> bool: >>> is_valid_date('xyzzy') False """ - import pyutils.datetimez.dateparse_utils as dp + import pyutils.datetimes.dateparse_utils as dp try: d = dp.DateParser() # type: ignore @@ -1612,12 +1768,15 @@ def to_datetime(in_str: str) -> Optional[datetime.datetime]: Returns: A python datetime parsed from in_str or None to indicate an error. This parser is relatively clever; see - :class:`datetimez.dateparse_utils` docs for details. + :class:`datetimes.dateparse_utils` docs for details. + + See also: :mod:`pyutils.datetimes.dateparse_utils`, :meth:`to_date`, + :meth:`extract_date`, :meth:`valid_datetime`. >>> to_datetime('7/20/1969 02:56 GMT') datetime.datetime(1969, 7, 20, 2, 56, tzinfo=) """ - import pyutils.datetimez.dateparse_utils as dp + import pyutils.datetimes.dateparse_utils as dp try: d = dp.DateParser() # type: ignore @@ -1638,7 +1797,7 @@ def valid_datetime(in_str: str) -> bool: Returns: True if in_str contains a valid datetime and False otherwise. This parser is relatively clever; see - :class:`datetimez.dateparse_utils` docs for details. + :class:`datetimes.dateparse_utils` docs for details. >>> valid_datetime('next wednesday at noon') True @@ -1689,9 +1848,7 @@ def dedent(in_str: str) -> Optional[str]: Returns: A string with tab indentation removed or None on error. - .. note:: - - Inspired by analogous Scala function. + See also :meth:`indent`. >>> dedent('\t\ttest\\n\t\ting') 'test\\ning' @@ -1712,6 +1869,8 @@ def indent(in_str: str, amount: int) -> str: Returns: An indented string created by prepending amount spaces. + See also :meth:`dedent`. + >>> indent('This is a test', 4) ' This is a test' """ @@ -1722,16 +1881,8 @@ def indent(in_str: str, amount: int) -> str: return line_separator.join(lines) -def sprintf(*args, **kwargs) -> str: - """ - Args: - This function uses the same syntax as the builtin print - function. - - Returns: - An interpolated string capturing print output, like man(3) - `sprintf`. - """ +def _sprintf(*args, **kwargs) -> str: + """Internal helper.""" ret = "" sep = kwargs.pop("sep", None) @@ -1751,8 +1902,8 @@ def sprintf(*args, **kwargs) -> str: sep = " " if end is None: end = "\n" - for i, arg in enumerate(args): - if i: + for n, arg in enumerate(args): + if n: ret += sep if isinstance(arg, str): ret += arg @@ -1770,6 +1921,8 @@ def strip_ansi_sequences(in_str: str) -> str: Returns: in_str with recognized ANSI escape sequences removed. + See also :mod:`pyutils.ansi`. + .. warning:: This method works by using a regular expression. It works for all ANSI escape sequences I've tested with but @@ -1800,7 +1953,6 @@ class SprintfStdout(contextlib.AbstractContextManager): >>> print(buf(), end='') test 1, 2, 3 - """ def __init__(self) -> None: @@ -1830,7 +1982,6 @@ def capitalize_first_letter(in_str: str) -> str: 'Test' >>> capitalize_first_letter("ALREADY!") 'ALREADY!' - """ return in_str[0].upper() + in_str[1:] @@ -1843,6 +1994,9 @@ def it_they(n: int) -> str: Returns: 'it' if n is one or 'they' otherwize. + See also :meth:`is_are`, :meth:`pluralize`, :meth:`make_contractions`, + :meth:`thify`. + Suggested usage:: n = num_files_saved_to_tmp() @@ -1867,6 +2021,9 @@ def is_are(n: int) -> str: Returns: 'is' if n is one or 'are' otherwize. + See also :meth:`it_they`, :meth:`pluralize`, :meth:`make_contractions`, + :meth:`thify`. + Suggested usage:: n = num_files_saved_to_tmp() @@ -1892,6 +2049,9 @@ def pluralize(n: int) -> str: Returns: 's' if n is greater than one otherwize ''. + See also :meth:`it_they`, :meth:`is_are`, :meth:`make_contractions`, + :meth:`thify`. + Suggested usage:: n = num_files_saved_to_tmp() @@ -1923,6 +2083,8 @@ def make_contractions(txt: str) -> str: Output text identical to original input except for any recognized contractions are formed. + See also :meth:`it_they`, :meth:`is_are`, :meth:`make_contractions`. + .. note:: The order in which we create contractions is defined by the implementation and what I thought made more sense when writing @@ -2005,7 +2167,7 @@ def make_contractions(txt: str) -> str: for second in second_list: # Disallow there're/where're. They're valid English # but sound weird. - if (first in ('there', 'where')) and second == 'a(re)': + if (first in set(['there', 'where'])) and second == 'a(re)': continue pattern = fr'\b({first})\s+{second}\b' @@ -2026,6 +2188,8 @@ def thify(n: int) -> str: Returns: The proper cardinal suffix for a number. + See also :meth:`it_they`, :meth:`is_are`, :meth:`make_contractions`. + Suggested usage:: attempt_count = 0 @@ -2064,20 +2228,24 @@ def ngrams(txt: str, n: int): Returns: Generates the ngrams from the input string. + See also :meth:`ngrams_presplit`, :meth:`bigrams`, :meth:`trigrams`. + >>> [x for x in ngrams('This is a test', 2)] ['This is', 'is a', 'a test'] """ words = txt.split() for ngram in ngrams_presplit(words, n): ret = '' - for word in ngram: - ret += f'{word} ' + for w in ngram: + ret += f'{w} ' yield ret.strip() def ngrams_presplit(words: Sequence[str], n: int): """ Same as :meth:`ngrams` but with the string pre-split. + + See also :meth:`ngrams`, :meth:`bigrams`, :meth:`trigrams`. """ return list_utils.ngrams(words, n) @@ -2085,6 +2253,8 @@ def ngrams_presplit(words: Sequence[str], n: int): def bigrams(txt: str): """Generates the bigrams (n=2) of the given string. + See also :meth:`ngrams`, :meth:`trigrams`. + >>> [x for x in bigrams('this is a test')] ['this is', 'is a', 'a test'] """ @@ -2092,12 +2262,15 @@ def bigrams(txt: str): def trigrams(txt: str): - """Generates the trigrams (n=3) of the given string.""" + """Generates the trigrams (n=3) of the given string. + + See also :meth:`ngrams`, :meth:`bigrams`. + """ return ngrams(txt, 3) def shuffle_columns_into_list( - input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim='' + input_lines: Sequence[str], column_specs: Iterable[Iterable[int]], delim: str = '' ) -> Iterable[str]: """Helper to shuffle / parse columnar data and return the results as a list. @@ -2116,6 +2289,8 @@ def shuffle_columns_into_list( A list of string created by following the instructions set forth in column_specs. + See also :meth:`shuffle_columns_into_dict`. + >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul 9 11:34 acl_test.py'.split() >>> shuffle_columns_into_list( ... cols, @@ -2140,7 +2315,7 @@ def shuffle_columns_into_list( def shuffle_columns_into_dict( input_lines: Sequence[str], column_specs: Iterable[Tuple[str, Iterable[int]]], - delim='', + delim: str = '', ) -> Dict[str, str]: """Helper to shuffle / parse columnar data and return the results as a dict. @@ -2158,6 +2333,8 @@ def shuffle_columns_into_dict( Returns: A dict formed by applying the column_specs instructions. + See also :meth:`shuffle_columns_into_list`, :meth:`interpolate_using_dict`. + >>> cols = '-rwxr-xr-x 1 scott wheel 3.1K Jul 9 11:34 acl_test.py'.split() >>> shuffle_columns_into_dict( ... cols, @@ -2187,11 +2364,13 @@ def interpolate_using_dict(txt: str, values: Dict[str, str]) -> str: txt: the mad libs template values: what you and your kids chose for each category. + See also :meth:`shuffle_columns_into_list`, :meth:`shuffle_columns_into_dict`. + >>> interpolate_using_dict('This is a {adjective} {noun}.', ... {'adjective': 'good', 'noun': 'example'}) 'This is a good example.' """ - return sprintf(txt.format(**values), end='') + return _sprintf(txt.format(**values), end='') def to_ascii(txt: str): @@ -2202,6 +2381,9 @@ def to_ascii(txt: str): Returns: txt encoded as an ASCII byte string. + See also :meth:`to_base64`, :meth:`to_bitstring`, :meth:`to_bytes`, + :meth:`generate_random_alphanumeric_string`, :meth:`asciify`. + >>> to_ascii('test') b'test' @@ -2215,15 +2397,22 @@ def to_ascii(txt: str): raise Exception('to_ascii works with strings and bytes') -def to_base64(txt: str, *, encoding='utf-8', errors='surrogatepass') -> bytes: +def to_base64( + txt: str, *, encoding: str = 'utf-8', errors: str = 'surrogatepass' +) -> bytes: """ Args: txt: the input data to encode + encoding: the encoding to use during conversion + errors: how to handle encoding errors Returns: txt encoded with a 64-chracter alphabet. Similar to and compatible with uuencode/uudecode. + See also :meth:`is_base64`, :meth:`to_ascii`, :meth:`to_bitstring`, + :meth:`from_base64`. + >>> to_base64('hello?') b'aGVsbG8/\\n' """ @@ -2240,6 +2429,8 @@ def is_base64(txt: str) -> bool: txt was encoded with Python's standard base64 alphabet which is the same as what uuencode/uudecode uses). + See also :meth:`to_base64`, :meth:`from_base64`. + >>> is_base64('test') # all letters in the b64 alphabet True @@ -2258,15 +2449,21 @@ def is_base64(txt: str) -> bool: return True -def from_base64(b64: bytes, encoding='utf-8', errors='surrogatepass') -> str: +def from_base64( + b64: bytes, encoding: str = 'utf-8', errors: str = 'surrogatepass' +) -> str: """ Args: b64: bytestring of 64-bit encoded data to decode / convert. + encoding: the encoding to use during conversion + errors: how to handle encoding errors Returns: The decoded form of b64 as a normal python string. Similar to and compatible with uuencode / uudecode. + See also :meth:`to_base64`, :meth:`is_base64`. + >>> from_base64(b'aGVsbG8/\\n') 'hello?' """ @@ -2293,7 +2490,7 @@ def chunk(txt: str, chunk_size: int): yield txt[x : x + chunk_size] -def to_bitstring(txt: str, *, delimiter='') -> str: +def to_bitstring(txt: str, *, delimiter: str = '') -> str: """ Args: txt: the string to convert into a bitstring @@ -2304,6 +2501,9 @@ def to_bitstring(txt: str, *, delimiter='') -> str: Returns: txt converted to ascii/binary and then chopped into bytes. + See also :meth:`to_base64`, :meth:`from_bitstring`, :meth:`is_bitstring`, + :meth:`chunk`. + >>> to_bitstring('hello?') '011010000110010101101100011011000110111100111111' @@ -2329,6 +2529,9 @@ def is_bitstring(txt: str) -> bool: Note that if delimiter is non empty this code will not recognize the bitstring. + See also :meth:`to_base64`, :meth:`from_bitstring`, :meth:`to_bitstring`, + :meth:`chunk`. + >>> is_bitstring('011010000110010101101100011011000110111100111111') True @@ -2338,16 +2541,22 @@ def is_bitstring(txt: str) -> bool: return is_binary_integer_number(f'0b{txt}') -def from_bitstring(bits: str, encoding='utf-8', errors='surrogatepass') -> str: +def from_bitstring( + bits: str, encoding: str = 'utf-8', errors: str = 'surrogatepass' +) -> str: """ Args: bits: the bitstring to convert back into a python string - encoding: the encoding to use + encoding: the encoding to use during conversion + errors: how to handle encoding errors Returns: The regular python string represented by bits. Note that this code does not work with to_bitstring when delimiter is non-empty. + See also :meth:`to_base64`, :meth:`to_bitstring`, :meth:`is_bitstring`, + :meth:`chunk`. + >>> from_bitstring('011010000110010101101100011011000110111100111111') 'hello?' """ @@ -2365,6 +2574,8 @@ def ip_v4_sort_key(txt: str) -> Optional[Tuple[int, ...]]: IP addresses using a normal comparator will do something sane and desireable. + See also :meth:`is_ip_v4`. + >>> ip_v4_sort_key('10.0.0.18') (10, 0, 0, 18) @@ -2388,6 +2599,8 @@ def path_ancestors_before_descendants_sort_key(volume: str) -> Tuple[str, ...]: volumes using a normal comparator will do something sane and desireable. + See also :mod:`pyutils.files.file_utils`. + >>> path_ancestors_before_descendants_sort_key('/usr/local/bin') ('usr', 'local', 'bin') @@ -2408,6 +2621,8 @@ def replace_all(in_str: str, replace_set: str, replacement: str) -> str: replacement: the character to replace any member of replace_set with + See also :meth:`replace_nth`. + Returns: The string with replacements executed. @@ -2430,6 +2645,8 @@ def replace_nth(in_str: str, source: str, target: str, nth: int): target: the replacement text nth: which occurrance of source to replace? + See also :meth:`replace_all`. + >>> replace_nth('this is a test', ' ', '-', 3) 'this is a-test' """