e84b3cc3834ec9bd798f24e4be476dd6c1132a25
[kiosk.git] / reuters_rss_renderer.py
1 import constants
2 import file_writer
3 import grab_bag
4 import renderer
5 import datetime
6 import http.client
7 import page_builder
8 import profanity_filter
9 import random
10 import re
11 import sets
12 import xml.etree.ElementTree as ET
13
14 class reuters_rss_renderer(renderer.debuggable_abstaining_renderer):
15     def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page):
16         super(reuters_rss_renderer, self).__init__(name_to_timeout_dict, False)
17         self.debug = 1
18         self.feed_site = feed_site
19         self.feed_uris = feed_uris
20         self.page = page
21         self.news = grab_bag.grab_bag()
22         self.details = grab_bag.grab_bag()
23         self.filter = profanity_filter.profanity_filter()
24
25     def debug_prefix(self):
26         return "reuters(%s)" % (self.page)
27
28     def periodic_render(self, key):
29         if key == "Fetch News":
30             return self.fetch_news()
31         elif key == "Shuffle News":
32             return self.shuffle_news()
33         else:
34             raise error('Unexpected operation')
35
36     def shuffle_news(self):
37         headlines = page_builder.page_builder()
38         headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
39         headlines.set_title("%s" % self.page)
40         subset = self.news.subset(4)
41         if subset is None:
42             self.debug_print("Not enough messages to choose from.")
43             return False
44         for msg in subset:
45             headlines.add_item(msg)
46         f = file_writer.file_writer('reuters-%s_4_none.html' % self.page)
47         headlines.render_html(f)
48         f.close()
49
50         details = page_builder.page_builder()
51         details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
52         details.set_title("%s" % self.page)
53         subset = self.details.subset(1)
54         if subset is None:
55             self.debug_print("Not enough details to choose from.");
56             return False
57         for msg in subset:
58             blurb = msg
59             blurb += "</TD>\n"
60             details.add_item(blurb)
61         g = file_writer.file_writer('reuters-details-%s_6_none.html' % self.page)
62         details.render_html(g)
63         g.close()
64         return True
65
66     def fetch_news(self):
67         count = 0
68         self.news.clear()
69         self.details.clear()
70         oldest = datetime.datetime.now() - datetime.timedelta(14)
71
72         for uri in self.feed_uris:
73             self.conn = http.client.HTTPConnection(self.feed_site)
74             self.conn.request(
75                 "GET",
76                 uri,
77                 None,
78                 {"Accept-Charset": "utf-8"})
79             response = self.conn.getresponse()
80             if response.status != 200:
81                 print(("%s: RSS fetch_news error, response: %d" % (self.page,
82                                                                   response.status)))
83                 self.debug_print(response.read())
84                 return False
85
86             rss = ET.fromstring(response.read())
87             channel = rss[0]
88             for item in channel.getchildren():
89                 title = item.findtext('title')
90                 if (title is None or
91                     "euters" in title or
92                     title == "Editor's Choice" or
93                     self.filter.contains_bad_words(title)):
94                     continue
95                 pubdate = item.findtext('pubDate')
96                 image = item.findtext('image')
97                 descr = item.findtext('description')
98                 if descr is not None:
99                     descr = re.sub('<[^>]+>', '', descr)
100
101                 blurb = """<DIV style="padding:8px;
102                                        font-size:34pt;
103                                        -webkit-column-break-inside:avoid;">"""
104                 if image is not None:
105                     blurb += '<IMG SRC=\"%s\" ALIGN=LEFT HEIGHT=115" style="padding:8px;">\n' % image
106                 blurb += '<P><B>%s</B>' % title
107
108                 if pubdate != None:
109                     # Thu, 04 Jun 2015 08:16:35 GMT|-0400
110                     pubdate = pubdate.rsplit(' ', 1)[0]
111                     dt = datetime.datetime.strptime(pubdate,
112                                                     '%a, %d %b %Y %H:%M:%S')
113                     if dt < oldest:
114                         continue
115                     blurb += dt.strftime(" <FONT COLOR=#bbbbbb>(%a&nbsp;%b&nbsp;%d)</FONT>")
116
117                 if descr is not None:
118                     longblurb = blurb
119                     longblurb += "<BR>"
120                     longblurb += descr
121                     longblurb += "</DIV>"
122                     longblurb = longblurb.replace("font-size:34pt",
123                                                   "font-size:44pt")
124
125                 self.details.add(longblurb.encode('utf8'))
126                 blurb += "</DIV>"
127                 self.news.add(blurb.encode('utf8'))
128                 count += 1
129         return count > 0