From cd069f03be92562dd4046382c2e814fdaeb4bd8b Mon Sep 17 00:00:00 2001 From: Scott Date: Thu, 3 Feb 2022 11:18:21 -0800 Subject: [PATCH] Contractions and stuff in string_utils. --- string_utils.py | 115 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/string_utils.py b/string_utils.py index 55e6731..1ed9b4a 100644 --- a/string_utils.py +++ b/string_utils.py @@ -1245,6 +1245,32 @@ class SprintfStdout(object): return None # don't suppress exceptions +def capitalize_first_letter(txt: str) -> str: + """Capitalize the first letter of a string. + + >>> capitalize_first_letter('test') + 'Test' + >>> capitalize_first_letter("ALREADY!") + 'ALREADY!' + + """ + return txt[0].upper() + txt[1:] + + +def it_they(n: int) -> str: + """It or they? + + >>> it_they(1) + 'it' + >>> it_they(100) + 'they' + + """ + if n == 1: + return "it" + return "they" + + def is_are(n: int) -> str: """Is or are? @@ -1277,6 +1303,95 @@ def pluralize(n: int) -> str: return "s" +def make_contractions(txt: str) -> str: + """Glue words together to form contractions. + + >>> make_contractions('It is nice today.') + "It's nice today." + + >>> make_contractions('I can not even...') + "I can't even..." + + >>> make_contractions('She could not see!') + "She couldn't see!" + + >>> make_contractions('But she will not go.') + "But she won't go." + + >>> make_contractions('Verily, I shall not.') + "Verily, I shan't." + + >>> make_contractions('No you cannot.') + "No you can't." + + >>> make_contractions('I said you can not go.') + "I said you can't go." + + """ + + first_second = [ + ( + [ + 'are', + 'could', + 'did', + 'has', + 'have', + 'is', + 'must', + 'should', + 'was', + 'were', + 'would', + ], + ['(n)o(t)'], + ), + ( + [ + "I", + "you", + "he", + "she", + "it", + "we", + "they", + "how", + "why", + "when", + "where", + "who", + "there", + ], + ['woul(d)', 'i(s)', 'a(re)', 'ha(s)', 'ha(ve)', 'ha(d)', 'wi(ll)'], + ), + ] + + # Special cases + txt = re.sub(r'\b(can)\s*no(t)\b', r"\1'\2", txt, count=0, flags=re.IGNORECASE) + txt = re.sub( + r'\b(sha)ll\s*(n)o(t)\b', r"\1\2'\3", txt, count=0, flags=re.IGNORECASE + ) + txt = re.sub( + r'\b(w)ill\s*(n)(o)(t)\b', r"\1\3\2'\4", txt, count=0, flags=re.IGNORECASE + ) + + for first_list, second_list in first_second: + for first in first_list: + for second in second_list: + # Disallow there're. It's valid English but sounds weird. + if first == 'there' and second == 'a(re)': + continue + + pattern = fr'\b({first})\s+{second}\b' + if second == '(n)o(t)': + replacement = r"\1\2'\3" + else: + replacement = r"\1'\2" + txt = re.sub(pattern, replacement, txt, count=0, flags=re.IGNORECASE) + + return txt + + def thify(n: int) -> str: """Return the proper cardinal suffix for a number. -- 2.45.2