Cleanup
[kiosk.git] / profanity_filter.py
1 import string
2 import re
3
4
5 class profanity_filter:
6     def __init__(self):
7         self.arrBad = [
8             "acrotomophilia",
9             "anal",
10             "anally",
11             "anilingus",
12             "anus",
13             "arsehole",
14             "ass",
15             "asses",
16             "asshole",
17             "assmunch",
18             "auto erotic",
19             "autoerotic",
20             "babeland",
21             "baby batter",
22             "ball gag",
23             "ball gravy",
24             "ball kicking",
25             "ball licking",
26             "ball sack",
27             "ball zack",
28             "ball sucking",
29             "bangbros",
30             "bareback",
31             "barely legal",
32             "barenaked",
33             "bastardo",
34             "bastinado",
35             "bbw",
36             "bdsm",
37             "beaver cleaver",
38             "beaver lips",
39             "bestiality",
40             "bi curious",
41             "big black",
42             "big breasts",
43             "big knockers",
44             "big tits",
45             "bimbos",
46             "birdlock",
47             "bitch",
48             "bitches",
49             "black cock",
50             "blonde action",
51             "blonde on blonde",
52             "blow j",
53             "blow your l",
54             "blow ourselves",
55             "blow m",
56             "blue waffle",
57             "blumpkin",
58             "bollocks",
59             "bondage",
60             "boner",
61             "boob",
62             "boobs",
63             "booty call",
64             "breasts",
65             "brown showers",
66             "brunette action",
67             "bukkake",
68             "bulldyke",
69             "bullshit",
70             "bullet vibe",
71             "bung hole",
72             "bunghole",
73             "busty",
74             "butt",
75             "buttcheeks",
76             "butthole",
77             "camel toe",
78             "camgirl",
79             "camslut",
80             "camwhore",
81             "carpet muncher",
82             "carpetmuncher",
83             "chocolate rosebuds",
84             "circlejerk",
85             "cleveland steamer",
86             "clit",
87             "clitoris",
88             "clover clamps",
89             "clusterfuck",
90             "cock",
91             "cocks",
92             "coprolagnia",
93             "coprophilia",
94             "cornhole",
95             "creampie",
96             "cream pie",
97             "cum",
98             "cumming",
99             "cunnilingus",
100             "cunt",
101             "damn",
102             "darkie",
103             "date rape",
104             "daterape",
105             "deep throat",
106             "deepthroat",
107             "dick",
108             "dildo",
109             "dirty pillows",
110             "dirty sanchez",
111             "dog style",
112             "doggie style",
113             "doggiestyle",
114             "doggy style",
115             "doggystyle",
116             "dolcett",
117             "domination",
118             "dominatrix",
119             "dommes",
120             "donkey punch",
121             "double dick",
122             "double dong",
123             "double penetration",
124             "dp action",
125             "dtf",
126             "eat my ass",
127             "ecchi",
128             "ejaculation",
129             "erection",
130             "erotic",
131             "erotism",
132             "escort",
133             "ethical slut",
134             "eunuch",
135             "faggot",
136             "posts each week",
137             "fecal",
138             "felch",
139             "fellatio",
140             "feltch",
141             "female squirting",
142             "femdom",
143             "figging",
144             "fingering",
145             "fisting",
146             "foot fetish",
147             "footjob",
148             "frotting",
149             "fuck",
150             "fucking",
151             "fuckin",
152             "fuckin'",
153             "fucked",
154             "fuckers",
155             "fuck buttons",
156             "fuckhead",
157             "fudge packer",
158             "fudgepacker",
159             "futanari",
160             "g-spot",
161             "gspot",
162             "gang bang",
163             "gay sex",
164             "genitals",
165             "giant cock",
166             "girl on",
167             "girl on top",
168             "girls gone wild",
169             "goatcx",
170             "goatse",
171             "goddamn",
172             "gokkun",
173             "golden shower",
174             "goo girl",
175             "goodpoop",
176             "goregasm",
177             "grope",
178             "group sex",
179             "guro",
180             "hand job",
181             "handjob",
182             "hard core",
183             "hardcore",
184             "hentai",
185             "homoerotic",
186             "honkey",
187             "hooker",
188             "horny",
189             "hot chick",
190             "how to kill",
191             "how to murder",
192             "huge fat",
193             "humping",
194             "incest",
195             "intercourse",
196             "jack off",
197             "jail bait",
198             "jailbait",
199             "jerk off",
200             "jerking off",
201             "jigaboo",
202             "jiggaboo",
203             "jiggerboo",
204             "jizz",
205             "juggs",
206             "kike",
207             "kinbaku",
208             "kinkster",
209             "kinky",
210             "knobbing",
211             "leather restraint",
212             "lemon party",
213             "lolita",
214             "lovemaking",
215             "lpt request",
216             "make me come",
217             "male squirting",
218             "masturbate",
219             "masturbated",
220             "masturbating",
221             "menage a trois",
222             "milf",
223             "milfs",
224             "missionary position",
225             "motherfucker",
226             "mound of venus",
227             "mr hands",
228             "muff diver",
229             "muffdiving",
230             "nambla",
231             "nawashi",
232             "negro",
233             "neonazi",
234             "nig nog",
235             "nigga",
236             "nigger",
237             "nimphomania",
238             "nipple",
239             "not safe for",
240             "nsfw",
241             "nsfw images",
242             "nude",
243             "nudity",
244             "nutsack",
245             "nut sack",
246             "nympho",
247             "nymphomania",
248             "octopussy",
249             "omorashi",
250             "one night stand",
251             "orgasm",
252             "orgy",
253             "paedophile",
254             "panties",
255             "panty",
256             "pedobear",
257             "pedophile",
258             "pegging",
259             "pee",
260             "penis",
261             "phone sex",
262             "piss pig",
263             "pissing",
264             "pisspig",
265             "playboy",
266             "pleasure chest",
267             "pole smoker",
268             "ponyplay",
269             "poof",
270             "poop chute",
271             "poopchute",
272             "porn",
273             "pornhub",
274             "porno",
275             "pornography",
276             "prince albert",
277             "pthc",
278             "pube",
279             "pubes",
280             "pussy",
281             "pussies",
282             "queaf",
283             "queer",
284             "raghead",
285             "raging boner",
286             "rape",
287             "raping",
288             "rapist",
289             "rectum",
290             "reverse cowgirl",
291             "rimjob",
292             "rimming",
293             "rosy palm",
294             "rusty trombone",
295             "s&m",
296             "sadism",
297             "scat",
298             "schlong",
299             "scissoring",
300             "semen",
301             "sex",
302             "sexo",
303             "sexy",
304             "shaved beaver",
305             "shaved pussy",
306             "shemale",
307             "shibari",
308             "shit",
309             "shota",
310             "shrimping",
311             "slanteye",
312             "slut",
313             "smut",
314             "snatch",
315             "snowballing",
316             "sodomize",
317             "sodomy",
318             "spic",
319             "spooge",
320             "spread legs",
321             "strap on",
322             "strapon",
323             "strappado",
324             "strip club",
325             "style doggy",
326             "suck",
327             "sucks",
328             "suicide girls",
329             "sultry women",
330             "swastika",
331             "swinger",
332             "tainted love",
333             "taste my",
334             "tea bagging",
335             "threesome",
336             "throating",
337             "tied up",
338             "tight white",
339             "tit",
340             "tits",
341             "titties",
342             "titty",
343             "tongue in a",
344             "topless",
345             "tosser",
346             "towelhead",
347             "tranny",
348             "tribadism",
349             "tub girl",
350             "tubgirl",
351             "tushy",
352             "twat",
353             "twink",
354             "twinkie",
355             "undressing",
356             "upskirt",
357             "urethra play",
358             "urophilia",
359             "vagina",
360             "venus mound",
361             "vibrator",
362             "violet blue",
363             "violet wand",
364             "vorarephilia",
365             "voyeur",
366             "vulva",
367             "wank",
368             "wet dream",
369             "wetback",
370             "white power",
371             "whore",
372             "women rapping",
373             "wrapping men",
374             "wrinkled starfish",
375             "xx",
376             "xxx",
377             "yaoi",
378             "yellow showers",
379             "yiffy",
380             "zoophilia",
381         ]
382
383     def normalize(self, text):
384         result = text.lower()
385         result = result.replace("_", " ")
386         for x in string.punctuation:
387             result = result.replace(x, "")
388         result = re.sub(r"e?s$", "", result)
389         return result
390
391     def filter_bad_words(self, text):
392         badWordMask = "!@#$%!@#$%^~!@%^~@#$%!@#$%^~!"
393
394         brokenStr1 = text.split()
395         for word in brokenStr1:
396             if self.normalize(word) in self.arrBad or word in self.arrBad:
397                 print(('***** PROFANITY WORD="%s"' % word))
398                 text = text.replace(word, badWordMask[: len(word)])
399
400         if len(brokenStr1) > 1:
401             bigrams = list(zip(brokenStr1, brokenStr1[1:]))
402             for bigram in bigrams:
403                 phrase = "%s %s" % (bigram[0], bigram[1])
404                 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
405                     print(('***** PROFANITY PHRASE="%s"' % phrase))
406                     text = text.replace(bigram[0], badWordMask[: len(bigram[0])])
407                     text = text.replace(bigram[1], badWordMask[: len(bigram[1])])
408
409         if len(brokenStr1) > 2:
410             trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
411             for trigram in trigrams:
412                 phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
413                 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
414                     print(('***** PROFANITY PHRASE="%s"' % phrase))
415                     text = text.replace(trigram[0], badWordMask[: len(trigram[0])])
416                     text = text.replace(trigram[1], badWordMask[: len(trigram[1])])
417                     text = text.replace(trigram[2], badWordMask[: len(trigram[2])])
418         return text
419
420     def contains_bad_words(self, text):
421         brokenStr1 = text.split()
422         for word in brokenStr1:
423             if self.normalize(word) in self.arrBad or word in self.arrBad:
424                 print(('***** PROFANITY WORD="%s"' % word))
425                 return True
426
427         if len(brokenStr1) > 1:
428             bigrams = list(zip(brokenStr1, brokenStr1[1:]))
429             for bigram in bigrams:
430                 phrase = "%s %s" % (bigram[0], bigram[1])
431                 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
432                     print(('***** PROFANITY PHRASE="%s"' % phrase))
433                     return True
434
435         if len(brokenStr1) > 2:
436             trigrams = list(zip(brokenStr1, brokenStr1[1:], brokenStr1[2:]))
437             for trigram in trigrams:
438                 phrase = "%s %s %s" % (trigram[0], trigram[1], trigram[2])
439                 if self.normalize(phrase) in self.arrBad or phrase in self.arrBad:
440                     print(('***** PROFANITY PHRASE="%s"' % phrase))
441                     return True
442
443         return False
444
445
446 # x = profanity_filter()
447 # print(x.filter_bad_words("Fuck this auto erotic shit, it's not safe for work."))
448 # print(x.contains_bad_words("cream pie their daughter."))
449 # print(x.contains_bad_words("If you tell someone your penis is 6 inches it's pretty believable.  If you say it's half a foot no one will believe you."))
450 # print(x.normalize("dickes"));