X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=profanity_filter.py;h=fe5422179ba9a50c678188e088689184f139a14d;hb=52290f9c9e0eeaba3d5a067043f5ba98c9b386e5;hp=31577e0fbf1a4a76486dab066fff764aeb5bbc47;hpb=fa4298fa508e00759565c246aef423ba28fedf31;p=python_utils.git diff --git a/profanity_filter.py b/profanity_filter.py index 31577e0..fe54221 100755 --- a/profanity_filter.py +++ b/profanity_filter.py @@ -347,6 +347,7 @@ class ProfanityFilter(object): 'poop chute', 'poopchute', 'porn', + 'pron', 'pornhub', 'porno', 'pornographi', @@ -471,6 +472,11 @@ class ProfanityFilter(object): def _normalize(self, text: str) -> str: result = text.lower() result = result.replace("_", " ") + result = result.replace('0', 'o') + result = result.replace('1', 'l') + result = result.replace('4', 'a') + result = result.replace('5', 's') + result = result.replace('3', 'e') for x in string.punctuation: result = result.replace(x, "") chunks = [ @@ -489,14 +495,14 @@ class ProfanityFilter(object): for bigram in string_utils.ngrams_presplit(words, 2): bigram = ' '.join(bigram) if self.is_bad_word(bigram): - logger.debug('"{bigram}" is profanity') + logger.debug(f'"{bigram}" is profanity') return True if len(words) > 2: for trigram in string_utils.ngrams_presplit(words, 3): trigram = ' '.join(trigram) if self.is_bad_word(trigram): - logger.debug('"{trigram}" is profanity') + logger.debug(f'"{trigram}" is profanity') return True return False