7 class profanity_filter:
125 "double penetration",
226 "missionary position",
385 def normalize(self, text: str) -> str:
386 result = text.lower()
387 result = result.replace("_", " ")
388 for x in string.punctuation:
389 result = result.replace(x, "")
390 result = re.sub(r"e?s$", "", result)
393 def filter_bad_words(self, text: str) -> str:
394 badWordMask = "!@#$%!@#$%^~!@%^~@#$%!@#$%^~!"
396 brokenStr1 = text.split()
397 for word in brokenStr1:
398 if self.normalize(word) in self.arrBad or word in self.arrBad:
399 print(f'***** PROFANITY WORD="{word}"')
400 text = text.replace(word, badWordMask[: len(word)])
402 if len(brokenStr1) > 1:
403 bigrams = list(zip(brokenStr1, brokenStr1[1:]))
404 for bigram in bigrams:
405 phrase = f"{bigram[0]} {bigram[1]}"
406 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
407 print(f'***** PROFANITY PHRASE="{phrase}"')
408 text = text.replace(bigram[0], badWordMask[: len(bigram[0])])
409 text = text.replace(bigram[1], badWordMask[: len(bigram[1])])
411 if len(brokenStr1) > 2:
412 trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
413 for trigram in trigrams:
414 phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}"
415 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
416 print(f'***** PROFANITY PHRASE="{phrase}"')
417 text = text.replace(trigram[0], badWordMask[: len(trigram[0])])
418 text = text.replace(trigram[1], badWordMask[: len(trigram[1])])
419 text = text.replace(trigram[2], badWordMask[: len(trigram[2])])
422 def contains_bad_words(self, text: str) -> bool:
423 brokenStr1 = text.split()
424 for word in brokenStr1:
425 if self.normalize(word) in self.arrBad or word in self.arrBad:
426 print(f'***** PROFANITY WORD="{word}"')
429 if len(brokenStr1) > 1:
430 bigrams = list(zip(brokenStr1, brokenStr1[1:]))
431 for bigram in bigrams:
432 phrase = f"{bigram[0]} {bigram[1]}"
433 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
434 print(f'***** PROFANITY PHRASE="{phrase}"')
437 if len(brokenStr1) > 2:
438 trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
439 for trigram in trigrams:
440 phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}"
441 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
442 print(f'***** PROFANITY PHRASE="{phrase}"')
447 # x = profanity_filter()
448 # print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work."))
449 # print(x.contains_bad_words("cream pie their daughter."))
450 # print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable. If you say it's half a foot no one will believe you."))
451 # print(x.normalize("dickes"));