Add a Google News RSS-based renderer. Minor improvements to all
[kiosk.git] / generic_news_rss_renderer.py
1 import datetime
2 from dateutil.parser import parse
3 import file_writer
4 import grab_bag
5 import renderer
6 import http.client
7 import page_builder
8 import profanity_filter
9 import random
10 import re
11 import xml.etree.ElementTree as ET
12
13 class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
14     def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
15         super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict,
16                                                         False)
17         self.debug = 1
18         self.feed_site = feed_site
19         self.feed_uris = feed_uris
20         self.page_title = page_title
21         self.news = grab_bag.grab_bag()
22         self.details = grab_bag.grab_bag()
23         self.filter = profanity_filter.profanity_filter()
24
25     def debug_prefix(self):
26         pass
27
28     def get_headlines_page_prefix(self):
29         pass
30
31     def get_details_page_prefix(self):
32         pass
33
34     def should_use_https(self):
35         pass
36
37     def should_profanity_filter(self):
38         return False
39
40     def find_title(self, item):
41         return item.findtext('title')
42
43     def munge_title(self, title):
44         return title
45
46     def find_description(self, item):
47         return item.findtext('description')
48
49     def munge_description(self, description):
50         description = re.sub('<[^>]+>', '', description)
51         return description
52
53     def find_link(self, item):
54         return item.findtext('link')
55
56     def munge_link(self, link):
57         return link
58
59     def find_image(self, item):
60         return item.findtext('image')
61
62     def munge_image(self, image):
63         return image
64
65     def item_is_interesting_for_headlines(self, title, description, item):
66         return True
67
68     def is_item_older_than_n_days(self, item, n):
69         pubdate = item.findtext('pubDate')
70         if pubdate is not None:
71             pubdate = parse(pubdate)
72             tzinfo = pubdate.tzinfo
73             now = datetime.datetime.now(tzinfo)
74             delta = (now - pubdate).total_seconds() / (60 * 60 * 24)
75             if (delta > n):
76                 return True
77         return False
78
79     def item_is_interesting_for_article(self, title, description, item):
80         return True
81
82     def periodic_render(self, key):
83         if key == "Fetch News":
84             return self.fetch_news()
85         elif key == "Shuffle News":
86             return self.shuffle_news()
87         else:
88             raise error('Unexpected operation')
89
90     def shuffle_news(self):
91         headlines = page_builder.page_builder()
92         headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
93         headlines.set_title("%s" % self.page_title)
94         subset = self.news.subset(4)
95         if subset is None:
96             self.debug_print("Not enough messages to choose from.")
97             return False
98         for msg in subset:
99             headlines.add_item(msg)
100         f = file_writer.file_writer('%s_4_none.html' % (
101             self.get_headlines_page_prefix()))
102         headlines.render_html(f)
103         f.close()
104
105         details = page_builder.page_builder()
106         details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
107         details.set_title("%s" % self.page_title)
108         subset = self.details.subset(1)
109         if subset is None:
110             self.debug_print("Not enough details to choose from.");
111             return False
112         for msg in subset:
113             blurb = msg
114             blurb += u'</TD>'
115             details.add_item(blurb)
116         g = file_writer.file_writer('%s_6_none.html' % (
117             self.get_details_page_prefix()))
118         details.render_html(g)
119         g.close()
120         return True
121
122     def fetch_news(self):
123         count = 0
124         self.news.clear()
125         self.details.clear()
126
127         for uri in self.feed_uris:
128             if self.should_use_https():
129                 self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
130                 self.conn = http.client.HTTPSConnection(self.feed_site)
131             else:
132                 self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
133                 self.conn = http.client.HTTPConnection(self.feed_site)
134             self.conn.request(
135                 "GET",
136                 uri,
137                 None,
138                 {"Accept-Charset": "utf-8"})
139             response = self.conn.getresponse()
140             if response.status != 200:
141                 print(("%s: RSS fetch_news error, response: %d" % (self.page_title,
142                                                                   response.status)))
143                 self.debug_print(response.read())
144                 return False
145
146             rss = ET.fromstring(response.read())
147             channel = rss[0]
148             for item in channel.getchildren():
149                 title = self.find_title(item)
150                 if title is not None:
151                     title = self.munge_title(title)
152                 description = item.findtext('description')
153                 if description is not None:
154                     description = self.munge_description(description)
155                 image = self.find_image(item)
156                 if image is not None:
157                     image = self.munge_image(image)
158                 link = item.findtext('link')
159                 if link is not None:
160                     link = self.munge_link(link)
161
162                 if (title is None or
163                     not self.item_is_interesting_for_headlines(title,
164                                                                description,
165                                                                item)):
166                     self.debug_print('Item "%s" is not interesting' % title)
167                     continue
168
169                 if (self.should_profanity_filter() and
170                     (self.filter.contains_bad_words(title) or
171                     self.filter.contains_bad_words(description))):
172                     self.debug_print('Found bad words in item "%s"' % title)
173                     continue
174
175                 #print u"Title: %s\nDescription: %s\nLink: %s\nImage: %s\n" % (
176                 #    title, description, link, image)
177
178                 blurb = u"""<DIV style="padding:8px;
179                                  font-size:34pt;
180                                  -webkit-column-break-inside:avoid;">"""
181                 if image is not None:
182                     blurb += u'<IMG SRC="%s" ALIGN=LEFT HEIGHT=115 ' % image
183                     blurb += u'style="padding:8px;">'
184
185                 if link is None:
186                     blurb += u'<P><B>%s</B>' % title
187                 else:
188                     blurb += u'<P><B><A HREF="%s">%s</A></B>' % (link, title)
189
190                 if (description is not None and
191                     self.item_is_interesting_for_article(title,
192                                                          description,
193                                                          item)):
194                     longblurb = blurb
195                     longblurb += u"<BR>"
196                     longblurb += description
197                     longblurb += u"</DIV>"
198                     longblurb = longblurb.replace("font-size:34pt",
199                                                   "font-size:44pt")
200                     self.details.add(longblurb)
201
202                 blurb += u"</DIV>"
203                 self.news.add(blurb)
204                 count += 1
205         return count > 0