3bc5f1be147026b7cac5f95eddfc569951f6e506
[kiosk.git] / generic_news_rss_renderer.py
1 import datetime
2 from dateutil.parser import parse
3 import file_writer
4 import grab_bag
5 import renderer
6 import http.client
7 import page_builder
8 import profanity_filter
9 import random
10 import re
11 import xml.etree.ElementTree as ET
12
13
14 class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
15     def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
16         super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False)
17         self.debug = 1
18         self.feed_site = feed_site
19         self.feed_uris = feed_uris
20         self.page_title = page_title
21         self.news = grab_bag.grab_bag()
22         self.details = grab_bag.grab_bag()
23         self.filter = profanity_filter.profanity_filter()
24
25     def debug_prefix(self):
26         pass
27
28     def get_headlines_page_prefix(self):
29         pass
30
31     def get_details_page_prefix(self):
32         pass
33
34     def get_headlines_page_priority(self):
35         return "4"
36
37     def get_details_page_priority(self):
38         return "6"
39
40     def should_use_https(self):
41         pass
42
43     def should_profanity_filter(self):
44         return False
45
46     def find_title(self, item):
47         return item.findtext("title")
48
49     def munge_title(self, title):
50         return title
51
52     def find_description(self, item):
53         return item.findtext("description")
54
55     def munge_description(self, description):
56         description = re.sub("<[^>]+>", "", description)
57         return description
58
59     def find_link(self, item):
60         return item.findtext("link")
61
62     def munge_link(self, link):
63         return link
64
65     def find_image(self, item):
66         return item.findtext("image")
67
68     def munge_image(self, image):
69         return image
70
71     def find_pubdate(self, item):
72         return item.findtext("pubDate")
73
74     def munge_pubdate(self, pubdate):
75         return pubdate
76
77     def item_is_interesting_for_headlines(self, title, description, item):
78         return True
79
80     def is_item_older_than_n_days(self, item, n):
81         pubdate = self.find_pubdate(item)
82         if pubdate is not None:
83             pubdate = parse(pubdate)
84             tzinfo = pubdate.tzinfo
85             now = datetime.datetime.now(tzinfo)
86             delta = (now - pubdate).total_seconds() / (60 * 60 * 24)
87             if delta > n:
88                 return True
89         return False
90
91     def item_is_interesting_for_article(self, title, description, item):
92         return True
93
94     def periodic_render(self, key):
95         if key == "Fetch News":
96             return self.fetch_news()
97         elif key == "Shuffle News":
98             return self.shuffle_news()
99         else:
100             raise error("Unexpected operation")
101
102     def shuffle_news(self):
103         headlines = page_builder.page_builder()
104         headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
105         headlines.set_title("%s" % self.page_title)
106         subset = self.news.subset(4)
107         if subset is None:
108             self.debug_print("Not enough messages to choose from.")
109             return False
110         for msg in subset:
111             headlines.add_item(msg)
112         headlines.set_custom_html(
113             """
114 <STYLE>
115 a:link {
116   color: black;
117   text-decoration: none;
118   font-weight: bold;
119 }
120 a:visited {
121   color: black;
122   text-decoration: none;
123   font-weight: bold;
124 }
125 a:active {
126   color: black;
127   text-decoration: none;
128   font-weight: bold;
129 }
130 </STYLE>"""
131         )
132         f = file_writer.file_writer(
133             "%s_%s_25900.html"
134             % (self.get_headlines_page_prefix(), self.get_headlines_page_priority())
135         )
136         headlines.render_html(f)
137         f.close()
138
139         details = page_builder.page_builder()
140         details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
141         details.set_custom_html(
142             """
143 <STYLE>
144 a:link {
145   color: black;
146   text-decoration: none;
147   font-weight: bold;
148 }
149 a:visited {
150   color: black;
151   text-decoration: none;
152   font-weight: bold;
153 }
154 a:active {
155   color: black;
156   text-decoration: none;
157   font-weight: bold;
158 }
159 </STYLE>"""
160         )
161         details.set_title("%s" % self.page_title)
162         subset = self.details.subset(1)
163         if subset is None:
164             self.debug_print("Not enough details to choose from.")
165             return False
166         for msg in subset:
167             blurb = msg
168             blurb += u"</TD>"
169             details.add_item(blurb)
170         g = file_writer.file_writer(
171             "%s_%s_86400.html"
172             % (self.get_details_page_prefix(), self.get_details_page_priority())
173         )
174         details.render_html(g)
175         g.close()
176         return True
177
178     def fetch_news(self):
179         count = 0
180         self.news.clear()
181         self.details.clear()
182
183         for uri in self.feed_uris:
184             if self.should_use_https():
185                 self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
186                 self.conn = http.client.HTTPSConnection(self.feed_site, timeout=20)
187             else:
188                 self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
189                 self.conn = http.client.HTTPConnection(self.feed_site, timeout=20)
190             self.conn.request(
191                 "GET",
192                 uri,
193                 None,
194                 {
195                     "Accept": "*/*",
196                     "Cache-control": "max-age=59",
197                     "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
198                 },
199             )
200             try:
201                 response = self.conn.getresponse()
202             except:
203                 print("Exception in generic RSS renderer HTTP connection")
204                 return False
205
206             if response.status != 200:
207                 print(
208                     (
209                         "%s: RSS fetch_news error, response: %d"
210                         % (self.page_title, response.status)
211                     )
212                 )
213                 self.debug_print(response.read())
214                 return False
215
216             rss = ET.fromstring(response.read())
217             channel = rss[0]
218             for item in channel.getchildren():
219                 title = self.find_title(item)
220                 if title is not None:
221                     title = self.munge_title(title)
222                 description = item.findtext("description")
223                 if description is not None:
224                     description = self.munge_description(description)
225                 image = self.find_image(item)
226                 if image is not None:
227                     image = self.munge_image(image)
228                 link = item.findtext("link")
229                 if link is not None:
230                     link = self.munge_link(link)
231
232                 if title is None or not self.item_is_interesting_for_headlines(
233                     title, description, item
234                 ):
235                     self.debug_print('Item "%s" is not interesting' % title)
236                     continue
237
238                 if self.should_profanity_filter() and (
239                     self.filter.contains_bad_words(title)
240                     or self.filter.contains_bad_words(description)
241                 ):
242                     self.debug_print('Found bad words in item "%s"' % title)
243                     continue
244
245                 blurb = u"""<DIV style="padding:8px;
246                                  font-size:34pt;
247                                  -webkit-column-break-inside:avoid;">"""
248                 if image is not None:
249                     blurb += u'<IMG SRC="%s" ALIGN=LEFT HEIGHT=115 ' % image
250                     blurb += u'style="padding:8px;">'
251
252                 if link is None:
253                     blurb += u"<P><B>%s</B>" % title
254                 else:
255                     blurb += u'<P><B><A HREF="%s">%s</A></B>' % (link, title)
256
257                 pubdate = self.find_pubdate(item)
258                 if pubdate is not None:
259                     pubdate = self.munge_pubdate(pubdate)
260                     ts = parse(pubdate)
261                     blurb += u"  <FONT COLOR=#cccccc>%s</FONT>" % (
262                         ts.strftime("%b&nbsp;%d")
263                     )
264
265                 if description is not None and self.item_is_interesting_for_article(
266                     title, description, item
267                 ):
268                     longblurb = blurb
269
270                     longblurb += u"<BR>"
271                     longblurb += description
272                     longblurb += u"</DIV>"
273                     longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
274                     self.details.add(longblurb)
275
276                 blurb += u"</DIV>"
277                 self.news.add(blurb)
278                 count += 1
279         return count > 0