4 class profanity_filter:
122 'double penetration',
223 'missionary position',
382 def normalize(self, text):
383 result = text.lower()
384 result = result.replace('_', ' ')
385 for x in string.punctuation:
386 result = result.replace(x, '')
391 def filter_bad_words(self, text):
392 badWordMask = '!@#$%!@#$%^~!@%^~@#$%!@#$%^~!'
394 brokenStr1 = text.split()
395 for word in brokenStr1:
396 if (self.normalize(word) in self.arrBad or
397 word in self.arrBad):
398 print(('***** PROFANITY WORD="%s"' % word))
399 text = text.replace(word, badWordMask[:len(word)])
401 if len(brokenStr1) > 1:
402 bigrams = list(zip(brokenStr1, brokenStr1[1:]))
403 for bigram in bigrams:
404 phrase = "%s %s" % (bigram[0], bigram[1])
405 if (self.normalize(phrase) in self.arrBad or
406 phrase in self.arrBad):
407 print(('***** PROFANITY PHRASE="%s"' % phrase))
408 text = text.replace(bigram[0], badWordMask[:len(bigram[0])])
409 text = text.replace(bigram[1], badWordMask[:len(bigram[1])])
411 if len(brokenStr1) > 2:
412 trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
413 for trigram in trigrams:
414 phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
415 if (self.normalize(phrase) in self.arrBad or
416 phrase in self.arrBad):
417 print(('***** PROFANITY PHRASE="%s"' % phrase))
418 text = text.replace(trigram[0], badWordMask[:len(trigram[0])])
419 text = text.replace(trigram[1], badWordMask[:len(trigram[1])])
420 text = text.replace(trigram[2], badWordMask[:len(trigram[2])])
423 def contains_bad_words(self, text):
424 brokenStr1 = text.split()
425 for word in brokenStr1:
426 if (self.normalize(word) in self.arrBad or
427 word in self.arrBad):
428 print(('***** PROFANITY WORD="%s"' % word))
431 if len(brokenStr1) > 1:
432 bigrams = list(zip(brokenStr1, brokenStr1[1:]))
433 for bigram in bigrams:
434 phrase = "%s %s" % (bigram[0], bigram[1])
435 if (self.normalize(phrase) in self.arrBad or
436 phrase in self.arrBad):
437 print(('***** PROFANITY PHRASE="%s"' % phrase))
440 if len(brokenStr1) > 2:
441 trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
442 for trigram in trigrams:
443 phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
444 if (self.normalize(phrase) in self.arrBad or
445 phrase in self.arrBad):
446 print(('***** PROFANITY PHRASE="%s"' % phrase))
451 #x = profanity_filter()
452 #print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work."))
453 #print(x.contains_bad_words("cream pie their daughter."))
454 #print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable. If you say it's half a foot no one will believe you."))
455 #print(x.normalize("dickes"));