7 class profanity_filter:
125 "double penetration",
226 "missionary position",
386 def normalize(self, text: str) -> str:
387 result = text.lower()
388 result = result.replace("_", " ")
389 for x in string.punctuation:
390 result = result.replace(x, "")
391 result = re.sub(r"e?s$", "", result)
394 def filter_bad_words(self, text: str) -> str:
395 badWordMask = "!@#$%!@#$%^~!@%^~@#$%!@#$%^~!"
397 brokenStr1 = text.split()
398 for word in brokenStr1:
399 if self.normalize(word) in self.arrBad or word in self.arrBad:
400 print(f'***** PROFANITY WORD="{word}"')
401 text = text.replace(word, badWordMask[: len(word)])
403 if len(brokenStr1) > 1:
404 bigrams = list(zip(brokenStr1, brokenStr1[1:]))
405 for bigram in bigrams:
406 phrase = f"{bigram[0]} {bigram[1]}"
407 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
408 print(f'***** PROFANITY PHRASE="{phrase}"')
409 text = text.replace(bigram[0], badWordMask[: len(bigram[0])])
410 text = text.replace(bigram[1], badWordMask[: len(bigram[1])])
412 if len(brokenStr1) > 2:
413 trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
414 for trigram in trigrams:
415 phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}"
416 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
417 print(f'***** PROFANITY PHRASE="{phrase}"')
418 text = text.replace(trigram[0], badWordMask[: len(trigram[0])])
419 text = text.replace(trigram[1], badWordMask[: len(trigram[1])])
420 text = text.replace(trigram[2], badWordMask[: len(trigram[2])])
423 def contains_bad_words(self, text: str) -> bool:
424 brokenStr1 = text.split()
425 for word in brokenStr1:
426 if self.normalize(word) in self.arrBad or word in self.arrBad:
427 print(f'***** PROFANITY WORD="{word}"')
430 if len(brokenStr1) > 1:
431 bigrams = list(zip(brokenStr1, brokenStr1[1:]))
432 for bigram in bigrams:
433 phrase = f"{bigram[0]} {bigram[1]}"
434 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
435 print(f'***** PROFANITY PHRASE="{phrase}"')
438 if len(brokenStr1) > 2:
439 trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
440 for trigram in trigrams:
441 phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}"
442 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
443 print(f'***** PROFANITY PHRASE="{phrase}"')
448 # x = profanity_filter()
449 # print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work."))
450 # print(x.contains_bad_words("cream pie their daughter."))
451 # print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable. If you say it's half a foot no one will believe you."))
452 # print(x.normalize("dickes"));