+#!/usr/bin/env python3
+
import string
import re
"zoophilia",
]
- def normalize(self, text):
+ def normalize(self, text: str) -> str:
result = text.lower()
result = result.replace("_", " ")
for x in string.punctuation:
result = re.sub(r"e?s$", "", result)
return result
- def filter_bad_words(self, text):
+ def filter_bad_words(self, text: str) -> str:
badWordMask = "!@#$%!@#$%^~!@%^~@#$%!@#$%^~!"
brokenStr1 = text.split()
for word in brokenStr1:
if self.normalize(word) in self.arrBad or word in self.arrBad:
- print(('***** PROFANITY WORD="%s"' % word))
+ print(f'***** PROFANITY WORD="{word}"')
text = text.replace(word, badWordMask[: len(word)])
if len(brokenStr1) > 1:
bigrams = list(zip(brokenStr1, brokenStr1[1:]))
for bigram in bigrams:
- phrase = "%s %s" % (bigram[0], bigram[1])
+ phrase = f"{bigram[0]} {bigram[1]}"
if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
- print(('***** PROFANITY PHRASE="%s"' % phrase))
+ print(f'***** PROFANITY PHRASE="{phrase}"')
text = text.replace(bigram[0], badWordMask[: len(bigram[0])])
text = text.replace(bigram[1], badWordMask[: len(bigram[1])])
if len(brokenStr1) > 2:
trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
for trigram in trigrams:
- phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
+ phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}"
if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
- print(('***** PROFANITY PHRASE="%s"' % phrase))
+ print(f'***** PROFANITY PHRASE="{phrase}"')
text = text.replace(trigram[0], badWordMask[: len(trigram[0])])
text = text.replace(trigram[1], badWordMask[: len(trigram[1])])
text = text.replace(trigram[2], badWordMask[: len(trigram[2])])
return text
- def contains_bad_words(self, text):
+ def contains_bad_words(self, text: str) -> bool:
brokenStr1 = text.split()
for word in brokenStr1:
if self.normalize(word) in self.arrBad or word in self.arrBad:
- print(('***** PROFANITY WORD="%s"' % word))
+ print(f'***** PROFANITY WORD="{word}"')
return True
if len(brokenStr1) > 1:
bigrams = list(zip(brokenStr1, brokenStr1[1:]))
for bigram in bigrams:
- phrase = "%s %s" % (bigram[0], bigram[1])
+ phrase = f"{bigram[0]} {bigram[1]}"
if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
- print(('***** PROFANITY PHRASE="%s"' % phrase))
+ print(f'***** PROFANITY PHRASE="{phrase}"')
return True
if len(brokenStr1) > 2:
trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
for trigram in trigrams:
- phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
+ phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}"
if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
- print(('***** PROFANITY PHRASE="%s"' % phrase))
+ print(f'***** PROFANITY PHRASE="{phrase}"')
return True
-
return False