Merge branch 'master' of ssh://git.house:/usr/local/git/base/kiosk
[kiosk.git] / profanity_filter.py
index 1c862eb5f54f3769008ec4a73944f15c61bf60e0..6329a5511febccf1e7cda7102141e1bc4b2812fc 100644 (file)
+#!/usr/bin/env python3
+
 import string
 import re
 
+
 class profanity_filter:
     def __init__(self):
         self.arrBad = [
-            'acrotomophilia',
-            'anal',
-            'anally',
-            'anilingus',
-            'anus',
-            'arsehole',
-            'ass',
-            'asses',
-            'asshole',
-            'assmunch',
-            'auto erotic',
-            'autoerotic',
-            'babeland',
-            'baby batter',
-            'ball gag',
-            'ball gravy',
-            'ball kicking',
-            'ball licking',
-            'ball sack',
-            'ball zack',
-            'ball sucking',
-            'bangbros',
-            'bareback',
-            'barely legal',
-            'barenaked',
-            'bastardo',
-            'bastinado',
-            'bbw',
-            'bdsm',
-            'beaver cleaver',
-            'beaver lips',
-            'bestiality',
-            'bi curious',
-            'big black',
-            'big breasts',
-            'big knockers',
-            'big tits',
-            'bimbos',
-            'birdlock',
-            'bitch',
-            'bitches',
-            'black cock',
-            'blonde action',
-            'blonde on blonde',
-            'blow j',
-            'blow your l',
-            'blow ourselves',
-            'blow m',
-            'blue waffle',
-            'blumpkin',
-            'bollocks',
-            'bondage',
-            'boner',
-            'boob',
-            'boobs',
-            'booty call',
-            'breasts',
-            'brown showers',
-            'brunette action',
-            'bukkake',
-            'bulldyke',
-            'bullshit',
-            'bullet vibe',
-            'bung hole',
-            'bunghole',
-            'busty',
-            'butt',
-            'buttcheeks',
-            'butthole',
-            'camel toe',
-            'camgirl',
-            'camslut',
-            'camwhore',
-            'carpet muncher',
-            'carpetmuncher',
-            'chocolate rosebuds',
-            'circlejerk',
-            'cleveland steamer',
-            'clit',
-            'clitoris',
-            'clover clamps',
-            'clusterfuck',
-            'cock',
-            'cocks',
-            'coprolagnia',
-            'coprophilia',
-            'cornhole',
-            'creampie',
-            'cream pie',
-            'cum',
-            'cumming',
-            'cunnilingus',
-            'cunt',
-            'damn',
-            'darkie',
-            'date rape',
-            'daterape',
-            'deep throat',
-            'deepthroat',
-            'dick',
-            'dildo',
-            'dirty pillows',
-            'dirty sanchez',
-            'dog style',
-            'doggie style',
-            'doggiestyle',
-            'doggy style',
-            'doggystyle',
-            'dolcett',
-            'domination',
-            'dominatrix',
-            'dommes',
-            'donkey punch',
-            'double dick',
-            'double dong',
-            'double penetration',
-            'dp action',
-            'dtf',
-            'eat my ass',
-            'ecchi',
-            'ejaculation',
-            'erotic',
-            'erotism',
-            'escort',
-            'ethical slut',
-            'eunuch',
-            'faggot',
-            'posts each week',
-            'fecal',
-            'felch',
-            'fellatio',
-            'feltch',
-            'female squirting',
-            'femdom',
-            'figging',
-            'fingering',
-            'fisting',
-            'foot fetish',
-            'footjob',
-            'frotting',
-            'fuck',
-            'fucking',
-            'fuckin',
-            'fuckin\'',
-            'fucked',
-            'fuckers',
-            'fuck buttons',
-            'fuckhead',
-            'fudge packer',
-            'fudgepacker',
-            'futanari',
-            'g-spot',
-            'gspot',
-            'gang bang',
-            'gay sex',
-            'genitals',
-            'giant cock',
-            'girl on',
-            'girl on top',
-            'girls gone wild',
-            'goatcx',
-            'goatse',
-            'goddamn',
-            'gokkun',
-            'golden shower',
-            'goo girl',
-            'goodpoop',
-            'goregasm',
-            'grope',
-            'group sex',
-            'guro',
-            'hand job',
-            'handjob',
-            'hard core',
-            'hardcore',
-            'hentai',
-            'homoerotic',
-            'honkey',
-            'hooker',
-            'horny',
-            'hot chick',
-            'how to kill',
-            'how to murder',
-            'huge fat',
-            'humping',
-            'incest',
-            'intercourse',
-            'jack off',
-            'jail bait',
-            'jailbait',
-            'jerk off',
-            'jigaboo',
-            'jiggaboo',
-            'jiggerboo',
-            'jizz',
-            'juggs',
-            'kike',
-            'kinbaku',
-            'kinkster',
-            'kinky',
-            'knobbing',
-            'leather restraint',
-            'lemon party',
-            'lolita',
-            'lovemaking',
-            'lpt request',
-            'make me come',
-            'male squirting',
-            'masturbate',
-            'masturbated',
-            'masturbating',
-            'menage a trois',
-            'milf',
-            'milfs',
-            'missionary position',
-            'motherfucker',
-            'mound of venus',
-            'mr hands',
-            'muff diver',
-            'muffdiving',
-            'nambla',
-            'nawashi',
-            'negro',
-            'neonazi',
-            'nig nog',
-            'nigga',
-            'nigger',
-            'nimphomania',
-            'nipple',
-            'not safe for',
-            'nsfw',
-            'nsfw images',
-            'nude',
-            'nudity',
-            'nutsack',
-            'nut sack',
-            'nympho',
-            'nymphomania',
-            'octopussy',
-            'omorashi',
-            'one night stand',
-            'orgasm',
-            'orgy',
-            'paedophile',
-            'panties',
-            'panty',
-            'pedobear',
-            'pedophile',
-            'pegging',
-            'pee',
-            'penis',
-            'phone sex',
-            'piss pig',
-            'pissing',
-            'pisspig',
-            'playboy',
-            'pleasure chest',
-            'pole smoker',
-            'ponyplay',
-            'poof',
-            'poop chute',
-            'poopchute',
-            'porn',
-            'pornhub',
-            'porno',
-            'pornography',
-            'prince albert',
-            'pthc',
-            'pube',
-            'pubes',
-            'pussy',
-            'pussies',
-            'queaf',
-            'queer',
-            'raghead',
-            'raging boner',
-            'rape',
-            'raping',
-            'rapist',
-            'rectum',
-            'reverse cowgirl',
-            'rimjob',
-            'rimming',
-            'rosy palm',
-            'rusty trombone',
-            's&m',
-            'sadism',
-            'scat',
-            'schlong',
-            'scissoring',
-            'semen',
-            'sex',
-            'sexo',
-            'sexy',
-            'shaved beaver',
-            'shaved pussy',
-            'shemale',
-            'shibari',
-            'shit',
-            'shota',
-            'shrimping',
-            'slanteye',
-            'slut',
-            'smut',
-            'snatch',
-            'snowballing',
-            'sodomize',
-            'sodomy',
-            'spic',
-            'spooge',
-            'spread legs',
-            'strap on',
-            'strapon',
-            'strappado',
-            'strip club',
-            'style doggy',
-            'suck',
-            'sucks',
-            'suicide girls',
-            'sultry women',
-            'swastika',
-            'swinger',
-            'tainted love',
-            'taste my',
-            'tea bagging',
-            'threesome',
-            'throating',
-            'tied up',
-            'tight white',
-            'tit',
-            'tits',
-            'titties',
-            'titty',
-            'tongue in a',
-            'topless',
-            'tosser',
-            'towelhead',
-            'tranny',
-            'tribadism',
-            'tub girl',
-            'tubgirl',
-            'tushy',
-            'twat',
-            'twink',
-            'twinkie',
-            'undressing',
-            'upskirt',
-            'urethra play',
-            'urophilia',
-            'vagina',
-            'venus mound',
-            'vibrator',
-            'violet blue',
-            'violet wand',
-            'vorarephilia',
-            'voyeur',
-            'vulva',
-            'wank',
-            'wet dream',
-            'wetback',
-            'white power',
-            'whore',
-            'women rapping',
-            'wrapping men',
-            'wrinkled starfish',
-            'xx',
-            'xxx',
-            'yaoi',
-            'yellow showers',
-            'yiffy',
-            'zoophilia',
+            "acrotomophilia",
+            "anal",
+            "anally",
+            "anilingus",
+            "anus",
+            "arsehole",
+            "ass",
+            "asses",
+            "asshole",
+            "assmunch",
+            "auto erotic",
+            "autoerotic",
+            "babeland",
+            "baby batter",
+            "ball gag",
+            "ball gravy",
+            "ball kicking",
+            "ball licking",
+            "ball sack",
+            "ball zack",
+            "ball sucking",
+            "bangbros",
+            "bareback",
+            "barely legal",
+            "barenaked",
+            "bastardo",
+            "bastinado",
+            "bbw",
+            "bdsm",
+            "beaver cleaver",
+            "beaver lips",
+            "bestiality",
+            "bi curious",
+            "big black",
+            "big breasts",
+            "big knockers",
+            "big tits",
+            "bimbos",
+            "birdlock",
+            "bitch",
+            "bitches",
+            "black cock",
+            "blonde action",
+            "blonde on blonde",
+            "blow j",
+            "blow your l",
+            "blow ourselves",
+            "blow m",
+            "blue waffle",
+            "blumpkin",
+            "bollocks",
+            "bondage",
+            "boner",
+            "boob",
+            "boobs",
+            "booty call",
+            "breasts",
+            "brown showers",
+            "brunette action",
+            "bukkake",
+            "bulldyke",
+            "bullshit",
+            "bullet vibe",
+            "bung hole",
+            "bunghole",
+            "busty",
+            "butt",
+            "buttcheeks",
+            "butthole",
+            "camel toe",
+            "camgirl",
+            "camslut",
+            "camwhore",
+            "carpet muncher",
+            "carpetmuncher",
+            "chocolate rosebuds",
+            "circlejerk",
+            "cleveland steamer",
+            "clit",
+            "clitoris",
+            "clover clamps",
+            "clusterfuck",
+            "cock",
+            "cocks",
+            "coprolagnia",
+            "coprophilia",
+            "cornhole",
+            "creampie",
+            "cream pie",
+            "cum",
+            "cumming",
+            "cunnilingus",
+            "cunt",
+            "damn",
+            "darkie",
+            "date rape",
+            "daterape",
+            "deep throat",
+            "deepthroat",
+            "dick",
+            "dildo",
+            "dirty pillows",
+            "dirty sanchez",
+            "dog style",
+            "doggie style",
+            "doggiestyle",
+            "doggy style",
+            "doggystyle",
+            "dolcett",
+            "domination",
+            "dominatrix",
+            "dommes",
+            "donkey punch",
+            "double dick",
+            "double dong",
+            "double penetration",
+            "dp action",
+            "dtf",
+            "eat my ass",
+            "ecchi",
+            "ejaculation",
+            "erection",
+            "erotic",
+            "erotism",
+            "escort",
+            "ethical slut",
+            "eunuch",
+            "faggot",
+            "posts each week",
+            "fecal",
+            "felch",
+            "fellatio",
+            "feltch",
+            "female squirting",
+            "femdom",
+            "figging",
+            "fingering",
+            "fisting",
+            "foot fetish",
+            "footjob",
+            "frotting",
+            "fuck",
+            "fucking",
+            "fuckin",
+            "fuckin'",
+            "fucked",
+            "fuckers",
+            "fuck buttons",
+            "fuckhead",
+            "fudge packer",
+            "fudgepacker",
+            "futanari",
+            "g-spot",
+            "gspot",
+            "gang bang",
+            "gay sex",
+            "genitals",
+            "giant cock",
+            "girl on",
+            "girl on top",
+            "girls gone wild",
+            "goatcx",
+            "goatse",
+            "goddamn",
+            "gokkun",
+            "golden shower",
+            "goo girl",
+            "goodpoop",
+            "goregasm",
+            "grope",
+            "group sex",
+            "guro",
+            "hand job",
+            "handjob",
+            "hard core",
+            "hardcore",
+            "hentai",
+            "homoerotic",
+            "honkey",
+            "hooker",
+            "horny",
+            "hot chick",
+            "how to kill",
+            "how to murder",
+            "huge fat",
+            "humping",
+            "incest",
+            "intercourse",
+            "jack off",
+            "jail bait",
+            "jailbait",
+            "jerk off",
+            "jerking off",
+            "jigaboo",
+            "jiggaboo",
+            "jiggerboo",
+            "jizz",
+            "juggs",
+            "kike",
+            "kinbaku",
+            "kinkster",
+            "kinky",
+            "knobbing",
+            "leather restraint",
+            "lemon party",
+            "lolita",
+            "lovemaking",
+            "lpt request",
+            "make me come",
+            "male squirting",
+            "masturbate",
+            "masturbated",
+            "masturbating",
+            "menage a trois",
+            "milf",
+            "milfs",
+            "missionary position",
+            "motherfucker",
+            "mound of venus",
+            "mr hands",
+            "muff diver",
+            "muffdiving",
+            "nambla",
+            "nawashi",
+            "negro",
+            "neonazi",
+            "nig nog",
+            "nigga",
+            "nigger",
+            "nimphomania",
+            "nipple",
+            "not safe for",
+            "nsfw",
+            "nsfw images",
+            "nude",
+            "nudity",
+            "nutsack",
+            "nut sack",
+            "nympho",
+            "nymphomania",
+            "octopussy",
+            "omorashi",
+            "one night stand",
+            "orgasm",
+            "orgy",
+            "paedophile",
+            "panties",
+            "panty",
+            "pedobear",
+            "pedophile",
+            "pegging",
+            "pee",
+            "penis",
+            "phone sex",
+            "piss pig",
+            "pissing",
+            "pisspig",
+            "playboy",
+            "pleasure chest",
+            "pole smoker",
+            "ponyplay",
+            "poof",
+            "poop chute",
+            "poopchute",
+            "porn",
+            "pornhub",
+            "porno",
+            "pornography",
+            "prince albert",
+            "pthc",
+            "pube",
+            "pubes",
+            "pussy",
+            "pussies",
+            "queaf",
+            "queer",
+            "raghead",
+            "raging boner",
+            "rape",
+            "raping",
+            "rapist",
+            "rectum",
+            "reverse cowgirl",
+            "rimjob",
+            "rimming",
+            "rosy palm",
+            "rusty trombone",
+            "s&m",
+            "sadism",
+            "scat",
+            "schlong",
+            "scissoring",
+            "semen",
+            "sex",
+            "sexo",
+            "sexy",
+            "shaved beaver",
+            "shaved pussy",
+            "shemale",
+            "shibari",
+            "shit",
+            "shota",
+            "shrimping",
+            "slanteye",
+            "slut",
+            "smut",
+            "snatch",
+            "snowballing",
+            "sodomize",
+            "sodomy",
+            "spic",
+            "spooge",
+            "spread legs",
+            "strap on",
+            "strapon",
+            "strappado",
+            "strip club",
+            "style doggy",
+            "suck",
+            "sucks",
+            "suicide girls",
+            "sultry women",
+            "swastika",
+            "swinger",
+            "tainted love",
+            "taste my",
+            "tea bagging",
+            "threesome",
+            "throating",
+            "tied up",
+            "tight white",
+            "tit",
+            "tits",
+            "titties",
+            "titty",
+            "tongue in a",
+            "topless",
+            "tosser",
+            "towelhead",
+            "tranny",
+            "tribadism",
+            "tub girl",
+            "tubgirl",
+            "tushy",
+            "twat",
+            "twink",
+            "twinkie",
+            "undressing",
+            "upskirt",
+            "urethra play",
+            "urophilia",
+            "vagina",
+            "venus mound",
+            "vibrator",
+            "violet blue",
+            "violet wand",
+            "vorarephilia",
+            "voyeur",
+            "vulva",
+            "wank",
+            "wet dream",
+            "wetback",
+            "white power",
+            "whore",
+            "women rapping",
+            "wrapping men",
+            "wrinkled starfish",
+            "xx",
+            "xxx",
+            "yaoi",
+            "yellow showers",
+            "yiffy",
+            "zoophilia",
         ]
 
-    def normalize(self, text):
+    def normalize(self, text: str) -> str:
         result = text.lower()
-        result = result.replace('_', ' ')
+        result = result.replace("_", " ")
         for x in string.punctuation:
-            result = result.replace(x, '')
-        result = re.sub(
-            r"e?s$", "", result)
+            result = result.replace(x, "")
+        result = re.sub(r"e?s$", "", result)
         return result
 
-    def filter_bad_words(self, text):
-        badWordMask = '!@#$%!@#$%^~!@%^~@#$%!@#$%^~!'
+    def filter_bad_words(self, text: str) -> str:
+        badWordMask = "!@#$%!@#$%^~!@%^~@#$%!@#$%^~!"
 
         brokenStr1 = text.split()
         for word in brokenStr1:
-            if (self.normalize(word) in self.arrBad or
-                word in self.arrBad):
-                print(('***** PROFANITY WORD="%s"' % word))
-                text = text.replace(word, badWordMask[:len(word)])
+            if self.normalize(word) in self.arrBad or word in self.arrBad:
+                print(f'***** PROFANITY WORD="{word}"')
+                text = text.replace(word, badWordMask[: len(word)])
 
         if len(brokenStr1) > 1:
             bigrams = list(zip(brokenStr1, brokenStr1[1:]))
             for bigram in bigrams:
-                phrase = "%s %s" % (bigram[0], bigram[1])
-                if (self.normalize(phrase) in self.arrBad or
-                    phrase in self.arrBad):
-                    print(('***** PROFANITY PHRASE="%s"' % phrase))
-                    text = text.replace(bigram[0], badWordMask[:len(bigram[0])])
-                    text = text.replace(bigram[1], badWordMask[:len(bigram[1])])
+                phrase = f"{bigram[0]} {bigram[1]}"
+                if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
+                    print(f'***** PROFANITY PHRASE="{phrase}"')
+                    text = text.replace(bigram[0], badWordMask[: len(bigram[0])])
+                    text = text.replace(bigram[1], badWordMask[: len(bigram[1])])
 
         if len(brokenStr1) > 2:
             trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
             for trigram in trigrams:
-                phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
-                if (self.normalize(phrase) in self.arrBad or
-                    phrase in self.arrBad):
-                    print(('***** PROFANITY PHRASE="%s"' % phrase))
-                    text = text.replace(trigram[0], badWordMask[:len(trigram[0])])
-                    text = text.replace(trigram[1], badWordMask[:len(trigram[1])])
-                    text = text.replace(trigram[2], badWordMask[:len(trigram[2])])
+                phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}"
+                if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
+                    print(f'***** PROFANITY PHRASE="{phrase}"')
+                    text = text.replace(trigram[0], badWordMask[: len(trigram[0])])
+                    text = text.replace(trigram[1], badWordMask[: len(trigram[1])])
+                    text = text.replace(trigram[2], badWordMask[: len(trigram[2])])
         return text
 
-    def contains_bad_words(self, text):
+    def contains_bad_words(self, text: str) -> bool:
         brokenStr1 = text.split()
         for word in brokenStr1:
-            if (self.normalize(word) in self.arrBad or
-                word in self.arrBad):
-                print(('***** PROFANITY WORD="%s"' % word))
+            if self.normalize(word) in self.arrBad or word in self.arrBad:
+                print(f'***** PROFANITY WORD="{word}"')
                 return True
 
         if len(brokenStr1) > 1:
             bigrams = list(zip(brokenStr1, brokenStr1[1:]))
             for bigram in bigrams:
-                phrase = "%s %s" % (bigram[0], bigram[1])
-                if (self.normalize(phrase) in self.arrBad or
-                    phrase in self.arrBad):
-                    print(('***** PROFANITY PHRASE="%s"' % phrase))
+                phrase = f"{bigram[0]} {bigram[1]}"
+                if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
+                    print(f'***** PROFANITY PHRASE="{phrase}"')
                     return True
 
         if len(brokenStr1) > 2:
             trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
             for trigram in trigrams:
-                phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
-                if (self.normalize(phrase) in self.arrBad or
-                    phrase in self.arrBad):
-                    print(('***** PROFANITY PHRASE="%s"' % phrase))
+                phrase = f"{trigram[0]} {trigram[1]} {trigram[2]}"
+                if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
+                    print(f'***** PROFANITY PHRASE="{phrase}"')
                     return True
-
         return False
 
-#x = profanity_filter()
-#print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work."))
-#print(x.contains_bad_words("cream pie their daughter."))
-#print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable.  If you say it's half a foot no one will believe you."))
-#print(x.normalize("dickes"));
+
+# x = profanity_filter()
+# print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work."))
+# print(x.contains_bad_words("cream pie their daughter."))
+# print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable.  If you say it's half a foot no one will believe you."))
+# print(x.normalize("dickes"));