5 class profanity_filter:
123 "double penetration",
224 "missionary position",
383 def normalize(self, text):
384 result = text.lower()
385 result = result.replace("_", " ")
386 for x in string.punctuation:
387 result = result.replace(x, "")
388 result = re.sub(r"e?s$", "", result)
391 def filter_bad_words(self, text):
392 badWordMask = "!@#$%!@#$%^~!@%^~@#$%!@#$%^~!"
394 brokenStr1 = text.split()
395 for word in brokenStr1:
396 if self.normalize(word) in self.arrBad or word in self.arrBad:
397 print(('***** PROFANITY WORD="%s"' % word))
398 text = text.replace(word, badWordMask[: len(word)])
400 if len(brokenStr1) > 1:
401 bigrams = list(zip(brokenStr1, brokenStr1[1:]))
402 for bigram in bigrams:
403 phrase = "%s %s" % (bigram[0], bigram[1])
404 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
405 print(('***** PROFANITY PHRASE="%s"' % phrase))
406 text = text.replace(bigram[0], badWordMask[: len(bigram[0])])
407 text = text.replace(bigram[1], badWordMask[: len(bigram[1])])
409 if len(brokenStr1) > 2:
410 trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
411 for trigram in trigrams:
412 phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
413 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
414 print(('***** PROFANITY PHRASE="%s"' % phrase))
415 text = text.replace(trigram[0], badWordMask[: len(trigram[0])])
416 text = text.replace(trigram[1], badWordMask[: len(trigram[1])])
417 text = text.replace(trigram[2], badWordMask[: len(trigram[2])])
420 def contains_bad_words(self, text):
421 brokenStr1 = text.split()
422 for word in brokenStr1:
423 if self.normalize(word) in self.arrBad or word in self.arrBad:
424 print(('***** PROFANITY WORD="%s"' % word))
427 if len(brokenStr1) > 1:
428 bigrams = list(zip(brokenStr1, brokenStr1[1:]))
429 for bigram in bigrams:
430 phrase = "%s %s" % (bigram[0], bigram[1])
431 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
432 print(('***** PROFANITY PHRASE="%s"' % phrase))
435 if len(brokenStr1) > 2:
436 trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
437 for trigram in trigrams:
438 phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
439 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
440 print(('***** PROFANITY PHRASE="%s"' % phrase))
446 # x = profanity_filter()
447 # print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work."))
448 # print(x.contains_bad_words("cream pie their daughter."))
449 # print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable. If you say it's half a foot no one will believe you."))
450 # print(x.normalize("dickes"));