4 class profanity_filter:
122 'double penetration',
221 'missionary position',
380 def normalize(self, text):
381 result = text.lower()
382 result = result.replace('_', ' ')
383 for x in string.punctuation:
384 result = result.replace(x, '')
389 def filter_bad_words(self, text):
390 badWordMask = '!@#$%!@#$%^~!@%^~@#$%!@#$%^~!'
392 brokenStr1 = text.split()
393 for word in brokenStr1:
394 if (self.normalize(word) in self.arrBad or
395 word in self.arrBad):
396 print('***** PROFANITY WORD="%s"' % word)
397 text = text.replace(word, badWordMask[:len(word)])
399 if len(brokenStr1) > 1:
400 bigrams = zip(brokenStr1, brokenStr1[1:])
401 for bigram in bigrams:
402 phrase = "%s %s" % (bigram[0], bigram[1])
403 if (self.normalize(phrase) in self.arrBad or
404 phrase in self.arrBad):
405 print('***** PROFANITY PHRASE="%s"' % phrase)
406 text = text.replace(bigram[0], badWordMask[:len(bigram[0])])
407 text = text.replace(bigram[1], badWordMask[:len(bigram[1])])
409 if len(brokenStr1) > 2:
410 trigrams = zip(brokenStr1, brokenStr1[1:], brokenStr1[2:])
411 for trigram in trigrams:
412 phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
413 if (self.normalize(phrase) in self.arrBad or
414 phrase in self.arrBad):
415 print('***** PROFANITY PHRASE="%s"' % phrase)
416 text = text.replace(trigram[0], badWordMask[:len(trigram[0])])
417 text = text.replace(trigram[1], badWordMask[:len(trigram[1])])
418 text = text.replace(trigram[2], badWordMask[:len(trigram[2])])
421 def contains_bad_words(self, text):
422 brokenStr1 = text.split()
423 for word in brokenStr1:
424 if (self.normalize(word) in self.arrBad or
425 word in self.arrBad):
426 print('***** PROFANITY WORD="%s"' % word)
429 if len(brokenStr1) > 1:
430 bigrams = zip(brokenStr1, brokenStr1[1:])
431 for bigram in bigrams:
432 phrase = "%s %s" % (bigram[0], bigram[1])
433 if (self.normalize(phrase) in self.arrBad or
434 phrase in self.arrBad):
435 print('***** PROFANITY PHRASE="%s"' % phrase)
438 if len(brokenStr1) > 2:
439 trigrams = zip(brokenStr1, brokenStr1[1:], brokenStr1[2:])
440 for trigram in trigrams:
441 phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
442 if (self.normalize(phrase) in self.arrBad or
443 phrase in self.arrBad):
444 print('***** PROFANITY PHRASE="%s"' % phrase)
449 #x = profanity_filter()
450 #print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work."))
451 #print(x.contains_bad_words("cream pie their daughter."))
452 #print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable. If you say it's half a foot no one will believe you."))
453 #print(x.normalize("dickes"));