X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=profanity_filter.py;fp=profanity_filter.py;h=6329a5511febccf1e7cda7102141e1bc4b2812fc;hb=c06bfef53f70551e7920bc4facce27f47b89e2ba;hp=0925e67f7397f4aafe6be52345e4c2a3d9f8d993;hpb=6cc940e0df9b8ea937fb955f959fa878c80f0d7c;p=kiosk.git diff --git a/profanity_filter.py b/profanity_filter.py index 0925e67..6329a55 100644 --- a/profanity_filter.py +++ b/profanity_filter.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import string import re @@ -380,7 +382,7 @@ class profanity_filter: "zoophilia", ] - def normalize(self, text): + def normalize(self, text: str) -> str: result = text.lower() result = result.replace("_", " ") for x in string.punctuation: @@ -388,58 +390,57 @@ class profanity_filter: result = re.sub(r"e?s$", "", result) return result - def filter_bad_words(self, text): + def filter_bad_words(self, text: str) -> str: badWordMask = "!@#$%!@#$%^~!@%^~@#$%!@#$%^~!" brokenStr1 = text.split() for word in brokenStr1: if self.normalize(word) in self.arrBad or word in self.arrBad: - print(('***** PROFANITY WORD="%s"' % word)) + print(f'***** PROFANITY WORD="{word}"') text = text.replace(word, badWordMask[: len(word)]) if len(brokenStr1) > 1: bigrams = list(zip(brokenStr1, brokenStr1[1:])) for bigram in bigrams: - phrase = "%s %s" % (bigram[0], bigram[1]) + phrase = f"{bigram[0]} {bigram[1]}" if self.normalize(phrase) in self.arrBad or phrase in self.arrBad: - print(('***** PROFANITY PHRASE="%s"' % phrase)) + print(f'***** PROFANITY PHRASE="{phrase}"') text = text.replace(bigram[0], badWordMask[: len(bigram[0])]) text = text.replace(bigram[1], badWordMask[: len(bigram[1])]) if len(brokenStr1) > 2: trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:])) for trigram in trigrams: - phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2]) + phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}" if self.normalize(phrase) in self.arrBad or phrase in self.arrBad: - print(('***** PROFANITY PHRASE="%s"' % phrase)) + print(f'***** PROFANITY PHRASE="{phrase}"') text = text.replace(trigram[0], badWordMask[: len(trigram[0])]) text = text.replace(trigram[1], badWordMask[: len(trigram[1])]) text = text.replace(trigram[2], badWordMask[: len(trigram[2])]) return text - def contains_bad_words(self, text): + def contains_bad_words(self, text: str) -> bool: brokenStr1 = text.split() for word in brokenStr1: if self.normalize(word) in self.arrBad or word in self.arrBad: - print(('***** PROFANITY WORD="%s"' % word)) + print(f'***** PROFANITY WORD="{word}"') return True if len(brokenStr1) > 1: bigrams = list(zip(brokenStr1, brokenStr1[1:])) for bigram in bigrams: - phrase = "%s %s" % (bigram[0], bigram[1]) + phrase = f"{bigram[0]} {bigram[1]}" if self.normalize(phrase) in self.arrBad or phrase in self.arrBad: - print(('***** PROFANITY PHRASE="%s"' % phrase)) + print(f'***** PROFANITY PHRASE="{phrase}"') return True if len(brokenStr1) > 2: trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:])) for trigram in trigrams: - phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2]) + phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}" if self.normalize(phrase) in self.arrBad or phrase in self.arrBad: - print(('***** PROFANITY PHRASE="%s"' % phrase)) + print(f'***** PROFANITY PHRASE="{phrase}"') return True - return False