4 class profanity_filter:
122 'double penetration',
221 'missionary position',
380 def normalize(self, text):
381 result = text.lower()
382 result = result.replace('_', ' ')
383 for x in string.punctuation:
384 result = result.replace(x, '')
389 def filter_bad_words(self, text):
390 badWordMask = '!@#$%!@#$%^~!@%^~@#$%!@#$%^~!'
392 brokenStr1 = text.split()
393 for word in brokenStr1:
394 if (self.normalize(word) in self.arrBad or
395 word in self.arrBad):
396 print(('***** PROFANITY WORD="%s"' % word))
397 text = text.replace(word, badWordMask[:len(word)])
399 if len(brokenStr1) > 1:
400 bigrams = list(zip(brokenStr1, brokenStr1[1:]))
401 for bigram in bigrams:
402 phrase = "%s %s" % (bigram[0], bigram[1])
403 if (self.normalize(phrase) in self.arrBad or
404 phrase in self.arrBad):
405 print(('***** PROFANITY PHRASE="%s"' % phrase))
406 text = text.replace(bigram[0], badWordMask[:len(bigram[0])])
407 text = text.replace(bigram[1], badWordMask[:len(bigram[1])])
409 if len(brokenStr1) > 2:
410 trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
411 for trigram in trigrams:
412 phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
413 if (self.normalize(phrase) in self.arrBad or
414 phrase in self.arrBad):
415 print(('***** PROFANITY PHRASE="%s"' % phrase))
416 text = text.replace(trigram[0], badWordMask[:len(trigram[0])])
417 text = text.replace(trigram[1], badWordMask[:len(trigram[1])])
418 text = text.replace(trigram[2], badWordMask[:len(trigram[2])])
421 def contains_bad_words(self, text):
422 brokenStr1 = text.split()
423 for word in brokenStr1:
424 if (self.normalize(word) in self.arrBad or
425 word in self.arrBad):
426 print(('***** PROFANITY WORD="%s"' % word))
429 if len(brokenStr1) > 1:
430 bigrams = list(zip(brokenStr1, brokenStr1[1:]))
431 for bigram in bigrams:
432 phrase = "%s %s" % (bigram[0], bigram[1])
433 if (self.normalize(phrase) in self.arrBad or
434 phrase in self.arrBad):
435 print(('***** PROFANITY PHRASE="%s"' % phrase))
438 if len(brokenStr1) > 2:
439 trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
440 for trigram in trigrams:
441 phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
442 if (self.normalize(phrase) in self.arrBad or
443 phrase in self.arrBad):
444 print(('***** PROFANITY PHRASE="%s"' % phrase))
449 #x = profanity_filter()
450 #print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work."))
451 #print(x.contains_bad_words("cream pie their daughter."))
452 #print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable. If you say it's half a foot no one will believe you."))
453 #print(x.normalize("dickes"));