Adding type annotations and fixing up formatting.
[kiosk.git] / profanity_filter.py
index 0925e67f7397f4aafe6be52345e4c2a3d9f8d993..6329a5511febccf1e7cda7102141e1bc4b2812fc 100644 (file)
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 import string
 import re
 
@@ -380,7 +382,7 @@ class profanity_filter:
             "zoophilia",
         ]
 
-    def normalize(self, text):
+    def normalize(self, text: str) -> str:
         result = text.lower()
         result = result.replace("_", " ")
         for x in string.punctuation:
@@ -388,58 +390,57 @@ class profanity_filter:
         result = re.sub(r"e?s$", "", result)
         return result
 
-    def filter_bad_words(self, text):
+    def filter_bad_words(self, text: str) -> str:
         badWordMask = "!@#$%!@#$%^~!@%^~@#$%!@#$%^~!"
 
         brokenStr1 = text.split()
         for word in brokenStr1:
             if self.normalize(word) in self.arrBad or word in self.arrBad:
-                print(('***** PROFANITY WORD="%s"' % word))
+                print(f'***** PROFANITY WORD="{word}"')
                 text = text.replace(word, badWordMask[: len(word)])
 
         if len(brokenStr1) > 1:
             bigrams = list(zip(brokenStr1, brokenStr1[1:]))
             for bigram in bigrams:
-                phrase = "%s %s" % (bigram[0], bigram[1])
+                phrase = f"{bigram[0]} {bigram[1]}"
                 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
-                    print(('***** PROFANITY PHRASE="%s"' % phrase))
+                    print(f'***** PROFANITY PHRASE="{phrase}"')
                     text = text.replace(bigram[0], badWordMask[: len(bigram[0])])
                     text = text.replace(bigram[1], badWordMask[: len(bigram[1])])
 
         if len(brokenStr1) > 2:
             trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
             for trigram in trigrams:
-                phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
+                phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}"
                 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
-                    print(('***** PROFANITY PHRASE="%s"' % phrase))
+                    print(f'***** PROFANITY PHRASE="{phrase}"')
                     text = text.replace(trigram[0], badWordMask[: len(trigram[0])])
                     text = text.replace(trigram[1], badWordMask[: len(trigram[1])])
                     text = text.replace(trigram[2], badWordMask[: len(trigram[2])])
         return text
 
-    def contains_bad_words(self, text):
+    def contains_bad_words(self, text: str) -> bool:
         brokenStr1 = text.split()
         for word in brokenStr1:
             if self.normalize(word) in self.arrBad or word in self.arrBad:
-                print(('***** PROFANITY WORD="%s"' % word))
+                print(f'***** PROFANITY WORD="{word}"')
                 return True
 
         if len(brokenStr1) > 1:
             bigrams = list(zip(brokenStr1, brokenStr1[1:]))
             for bigram in bigrams:
-                phrase = "%s %s" % (bigram[0], bigram[1])
+                phrase = f"{bigram[0]} {bigram[1]}"
                 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
-                    print(('***** PROFANITY PHRASE="%s"' % phrase))
+                    print(f'***** PROFANITY PHRASE="{phrase}"')
                     return True
 
         if len(brokenStr1) > 2:
             trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
             for trigram in trigrams:
-                phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
+                phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}"
                 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
-                    print(('***** PROFANITY PHRASE="%s"' % phrase))
+                    print(f'***** PROFANITY PHRASE="{phrase}"')
                     return True
-
         return False