More fuckery with indented sublists in gkeep and with the countdown line.
[kiosk.git] / generic_news_rss_renderer.py
1 import datetime
2 from dateutil.parser import parse
3 import file_writer
4 import grab_bag
5 import renderer
6 import http.client
7 import page_builder
8 import profanity_filter
9 import random
10 import re
11 import xml.etree.ElementTree as ET
12
13 class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
14     def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
15         super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict,
16                                                         False)
17         self.debug = 1
18         self.feed_site = feed_site
19         self.feed_uris = feed_uris
20         self.page_title = page_title
21         self.news = grab_bag.grab_bag()
22         self.details = grab_bag.grab_bag()
23         self.filter = profanity_filter.profanity_filter()
24
25     def debug_prefix(self):
26         pass
27
28     def get_headlines_page_prefix(self):
29         pass
30
31     def get_details_page_prefix(self):
32         pass
33
34     def get_headlines_page_priority(self):
35         return "4"
36
37     def get_details_page_priority(self):
38         return "6"
39
40     def should_use_https(self):
41         pass
42
43     def should_profanity_filter(self):
44         return False
45
46     def find_title(self, item):
47         return item.findtext('title')
48
49     def munge_title(self, title):
50         return title
51
52     def find_description(self, item):
53         return item.findtext('description')
54
55     def munge_description(self, description):
56         description = re.sub('<[^>]+>', '', description)
57         return description
58
59     def find_link(self, item):
60         return item.findtext('link')
61
62     def munge_link(self, link):
63         return link
64
65     def find_image(self, item):
66         return item.findtext('image')
67
68     def munge_image(self, image):
69         return image
70
71     def find_pubdate(self, item):
72         return item.findtext('pubDate')
73
74     def munge_pubdate(self, pubdate):
75         return pubdate
76
77     def item_is_interesting_for_headlines(self, title, description, item):
78         return True
79
80     def is_item_older_than_n_days(self, item, n):
81         pubdate = self.find_pubdate(item)
82         if pubdate is not None:
83             pubdate = parse(pubdate)
84             tzinfo = pubdate.tzinfo
85             now = datetime.datetime.now(tzinfo)
86             delta = (now - pubdate).total_seconds() / (60 * 60 * 24)
87             if (delta > n):
88                 return True
89         return False
90
91     def item_is_interesting_for_article(self, title, description, item):
92         return True
93
94     def periodic_render(self, key):
95         if key == "Fetch News":
96             return self.fetch_news()
97         elif key == "Shuffle News":
98             return self.shuffle_news()
99         else:
100             raise error('Unexpected operation')
101
102     def shuffle_news(self):
103         headlines = page_builder.page_builder()
104         headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
105         headlines.set_title("%s" % self.page_title)
106         subset = self.news.subset(4)
107         if subset is None:
108             self.debug_print("Not enough messages to choose from.")
109             return False
110         for msg in subset:
111             headlines.add_item(msg)
112         headlines.set_custom_html("""
113 <STYLE>
114 a:link {
115   color: black;
116   text-decoration: none;
117   font-weight: bold;
118 }
119 a:visited {
120   color: black;
121   text-decoration: none;
122   font-weight: bold;
123 }
124 a:active {
125   color: black;
126   text-decoration: none;
127   font-weight: bold;
128 }
129 </STYLE>""")
130         f = file_writer.file_writer('%s_%s_none.html' % (
131             self.get_headlines_page_prefix(),
132             self.get_headlines_page_priority()))
133         headlines.render_html(f)
134         f.close()
135
136         details = page_builder.page_builder()
137         details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
138         details.set_custom_html("""
139 <STYLE>
140 a:link {
141   color: black;
142   text-decoration: none;
143   font-weight: bold;
144 }
145 a:visited {
146   color: black;
147   text-decoration: none;
148   font-weight: bold;
149 }
150 a:active {
151   color: black;
152   text-decoration: none;
153   font-weight: bold;
154 }
155 </STYLE>""")
156         details.set_title("%s" % self.page_title)
157         subset = self.details.subset(1)
158         if subset is None:
159             self.debug_print("Not enough details to choose from.");
160             return False
161         for msg in subset:
162             blurb = msg
163             blurb += u'</TD>'
164             details.add_item(blurb)
165         g = file_writer.file_writer('%s_%s_none.html' % (
166             self.get_details_page_prefix(),
167             self.get_details_page_priority()))
168         details.render_html(g)
169         g.close()
170         return True
171
172     def fetch_news(self):
173         count = 0
174         self.news.clear()
175         self.details.clear()
176
177         for uri in self.feed_uris:
178             if self.should_use_https():
179                 self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
180                 self.conn = http.client.HTTPSConnection(self.feed_site)
181             else:
182                 self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
183                 self.conn = http.client.HTTPConnection(self.feed_site)
184             self.conn.request(
185                 "GET",
186                 uri,
187                 None,
188                 {"Accept-Charset": "utf-8"})
189             response = self.conn.getresponse()
190             if response.status != 200:
191                 print(("%s: RSS fetch_news error, response: %d" % (self.page_title,
192                                                                   response.status)))
193                 self.debug_print(response.read())
194                 return False
195
196             rss = ET.fromstring(response.read())
197             channel = rss[0]
198             for item in channel.getchildren():
199                 title = self.find_title(item)
200                 if title is not None:
201                     title = self.munge_title(title)
202                 description = item.findtext('description')
203                 if description is not None:
204                     description = self.munge_description(description)
205                 image = self.find_image(item)
206                 if image is not None:
207                     image = self.munge_image(image)
208                 link = item.findtext('link')
209                 if link is not None:
210                     link = self.munge_link(link)
211
212                 if (title is None or
213                     not self.item_is_interesting_for_headlines(title,
214                                                                description,
215                                                                item)):
216                     self.debug_print('Item "%s" is not interesting' % title)
217                     continue
218
219                 if (self.should_profanity_filter() and
220                     (self.filter.contains_bad_words(title) or
221                     self.filter.contains_bad_words(description))):
222                     self.debug_print('Found bad words in item "%s"' % title)
223                     continue
224
225                 blurb = u"""<DIV style="padding:8px;
226                                  font-size:34pt;
227                                  -webkit-column-break-inside:avoid;">"""
228                 if image is not None:
229                     blurb += u'<IMG SRC="%s" ALIGN=LEFT HEIGHT=115 ' % image
230                     blurb += u'style="padding:8px;">'
231
232                 if link is None:
233                     blurb += u'<P><B>%s</B>' % title
234                 else:
235                     blurb += u'<P><B><A HREF="%s">%s</A></B>' % (link, title)
236
237                 pubdate = self.find_pubdate(item)
238                 if pubdate is not None:
239                     pubdate = self.munge_pubdate(pubdate)
240                     ts = parse(pubdate)
241                     blurb += u"  <FONT COLOR=#cccccc>%s</FONT>" % (
242                         ts.strftime("%b&nbsp;%d"))
243
244                 if (description is not None and
245                     self.item_is_interesting_for_article(title,
246                                                          description,
247                                                          item)):
248                     longblurb = blurb
249
250                     longblurb += u"<BR>"
251                     longblurb += description
252                     longblurb += u"</DIV>"
253                     longblurb = longblurb.replace("font-size:34pt",
254                                                   "font-size:44pt")
255                     self.details.add(longblurb)
256
257                 blurb += u"</DIV>"
258                 self.news.add(blurb)
259                 count += 1
260         return count > 0