Profanity filter, please.
[kiosk.git] / reuters_rss_renderer.py
1 import constants
2 import file_writer
3 import grab_bag
4 import renderer
5 import datetime
6 import http.client
7 import page_builder
8 import profanity_filter
9 import random
10 import re
11 import sets
12 import xml.etree.ElementTree as ET
13
14
15 class reuters_rss_renderer(renderer.debuggable_abstaining_renderer):
16     def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page):
17         super(reuters_rss_renderer, self).__init__(name_to_timeout_dict, False)
18         self.debug = 1
19         self.feed_site = feed_site
20         self.feed_uris = feed_uris
21         self.page = page
22         self.news = grab_bag.grab_bag()
23         self.details = grab_bag.grab_bag()
24         self.filter = profanity_filter.profanity_filter()
25
26     def debug_prefix(self):
27         return "reuters(%s)" % (self.page)
28
29     def periodic_render(self, key):
30         if key == "Fetch News":
31             return self.fetch_news()
32         elif key == "Shuffle News":
33             return self.shuffle_news()
34         else:
35             raise error("Unexpected operation")
36
37     def shuffle_news(self):
38         headlines = page_builder.page_builder()
39         headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
40         headlines.set_title("%s" % self.page)
41         subset = self.news.subset(4)
42         if subset is None:
43             self.debug_print("Not enough messages to choose from.")
44             return False
45         for msg in subset:
46             headlines.add_item(msg)
47         f = file_writer.file_writer("reuters-%s_4_none.html" % self.page)
48         headlines.render_html(f)
49         f.close()
50
51         details = page_builder.page_builder()
52         details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
53         details.set_title("%s" % self.page)
54         subset = self.details.subset(1)
55         if subset is None:
56             self.debug_print("Not enough details to choose from.")
57             return False
58         for msg in subset:
59             blurb = msg
60             blurb += "</TD>\n"
61             details.add_item(blurb)
62         g = file_writer.file_writer("reuters-details-%s_6_none.html" % self.page)
63         details.render_html(g)
64         g.close()
65         return True
66
67     def fetch_news(self):
68         count = 0
69         self.news.clear()
70         self.details.clear()
71         oldest = datetime.datetime.now() - datetime.timedelta(14)
72
73         for uri in self.feed_uris:
74             self.conn = http.client.HTTPConnection(self.feed_site)
75             self.conn.request("GET", uri, None, {"Accept-Charset": "utf-8"})
76             response = self.conn.getresponse()
77             if response.status != 200:
78                 print(
79                     (
80                         "%s: RSS fetch_news error, response: %d"
81                         % (self.page, response.status)
82                     )
83                 )
84                 self.debug_print(response.read())
85                 return False
86
87             rss = ET.fromstring(response.read())
88             channel = rss[0]
89             for item in channel.getchildren():
90                 title = item.findtext("title")
91                 if (
92                     title is None
93                     or "euters" in title
94                     or title == "Editor's Choice"
95                     or self.filter.contains_bad_words(title)
96                 ):
97                     continue
98                 pubdate = item.findtext("pubDate")
99                 image = item.findtext("image")
100                 descr = item.findtext("description")
101                 if descr is not None:
102                     descr = re.sub("<[^>]+>", "", descr)
103
104                 blurb = """<DIV style="padding:8px;
105                                        font-size:34pt;
106                                        -webkit-column-break-inside:avoid;">"""
107                 if image is not None:
108                     blurb += (
109                         '<IMG SRC="%s" ALIGN=LEFT HEIGHT=115" style="padding:8px;">\n'
110                         % image
111                     )
112                 blurb += "<P><B>%s</B>" % title
113
114                 if pubdate != None:
115                     # Thu, 04 Jun 2015 08:16:35 GMT|-0400
116                     pubdate = pubdate.rsplit(" ", 1)[0]
117                     dt = datetime.datetime.strptime(pubdate, "%a, %d %b %Y %H:%M:%S")
118                     if dt < oldest:
119                         continue
120                     blurb += dt.strftime(
121                         " <FONT COLOR=#bbbbbb>(%a&nbsp;%b&nbsp;%d)</FONT>"
122                     )
123
124                 if descr is not None:
125                     longblurb = blurb
126                     longblurb += "<BR>"
127                     longblurb += descr
128                     longblurb += "</DIV>"
129                     longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
130
131                 self.details.add(longblurb.encode("utf8"))
132                 blurb += "</DIV>"
133                 self.news.add(blurb.encode("utf8"))
134                 count += 1
135         return count > 0