2 from dateutil.parser import parse
8 import profanity_filter
11 import xml.etree.ElementTree as ET
14 class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
15 def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
16 super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False)
18 self.feed_site = feed_site
19 self.feed_uris = feed_uris
20 self.page_title = page_title
21 self.news = grab_bag.grab_bag()
22 self.details = grab_bag.grab_bag()
23 self.filter = profanity_filter.profanity_filter()
25 def debug_prefix(self):
28 def get_headlines_page_prefix(self):
31 def get_details_page_prefix(self):
34 def get_headlines_page_priority(self):
37 def get_details_page_priority(self):
40 def should_use_https(self):
43 def should_profanity_filter(self):
46 def find_title(self, item):
47 return item.findtext("title")
49 def munge_title(self, title):
52 def find_description(self, item):
53 return item.findtext("description")
55 def munge_description(self, description):
56 description = re.sub("<[^>]+>", "", description)
59 def find_link(self, item):
60 return item.findtext("link")
62 def munge_link(self, link):
65 def find_image(self, item):
66 return item.findtext("image")
68 def munge_image(self, image):
71 def find_pubdate(self, item):
72 return item.findtext("pubDate")
74 def munge_pubdate(self, pubdate):
77 def item_is_interesting_for_headlines(self, title, description, item):
80 def is_item_older_than_n_days(self, item, n):
81 pubdate = self.find_pubdate(item)
82 if pubdate is not None:
83 pubdate = parse(pubdate)
84 tzinfo = pubdate.tzinfo
85 now = datetime.datetime.now(tzinfo)
86 delta = (now - pubdate).total_seconds() / (60 * 60 * 24)
91 def item_is_interesting_for_article(self, title, description, item):
94 def periodic_render(self, key):
95 if key == "Fetch News":
96 return self.fetch_news()
97 elif key == "Shuffle News":
98 return self.shuffle_news()
100 raise error("Unexpected operation")
102 def shuffle_news(self):
103 headlines = page_builder.page_builder()
104 headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
105 headlines.set_title("%s" % self.page_title)
106 subset = self.news.subset(4)
108 self.debug_print("Not enough messages to choose from.")
111 headlines.add_item(msg)
112 headlines.set_custom_html(
117 text-decoration: none;
122 text-decoration: none;
127 text-decoration: none;
132 f = file_writer.file_writer(
134 % (self.get_headlines_page_prefix(), self.get_headlines_page_priority())
136 headlines.render_html(f)
139 details = page_builder.page_builder()
140 details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
141 details.set_custom_html(
146 text-decoration: none;
151 text-decoration: none;
156 text-decoration: none;
161 details.set_title("%s" % self.page_title)
162 subset = self.details.subset(1)
164 self.debug_print("Not enough details to choose from.")
169 details.add_item(blurb)
170 g = file_writer.file_writer(
172 % (self.get_details_page_prefix(), self.get_details_page_priority())
174 details.render_html(g)
178 def fetch_news(self):
183 for uri in self.feed_uris:
184 if self.should_use_https():
185 self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
186 self.conn = http.client.HTTPSConnection(self.feed_site, timeout=20)
188 self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
189 self.conn = http.client.HTTPConnection(self.feed_site, timeout=20)
196 "Cache-control": "max-age=59",
197 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
201 response = self.conn.getresponse()
203 print("Exception in generic RSS renderer HTTP connection")
206 if response.status != 200:
209 "%s: RSS fetch_news error, response: %d"
210 % (self.page_title, response.status)
213 self.debug_print(response.read())
216 rss = ET.fromstring(response.read())
218 for item in channel.getchildren():
219 title = self.find_title(item)
220 if title is not None:
221 title = self.munge_title(title)
222 description = item.findtext("description")
223 if description is not None:
224 description = self.munge_description(description)
225 image = self.find_image(item)
226 if image is not None:
227 image = self.munge_image(image)
228 link = item.findtext("link")
230 link = self.munge_link(link)
232 if title is None or not self.item_is_interesting_for_headlines(
233 title, description, item
235 self.debug_print('Item "%s" is not interesting' % title)
238 if self.should_profanity_filter() and (
239 self.filter.contains_bad_words(title)
240 or self.filter.contains_bad_words(description)
242 self.debug_print('Found bad words in item "%s"' % title)
245 blurb = u"""<DIV style="padding:8px;
247 -webkit-column-break-inside:avoid;">"""
248 if image is not None:
249 blurb += u'<IMG SRC="%s" ALIGN=LEFT HEIGHT=115 ' % image
250 blurb += u'style="padding:8px;">'
253 blurb += u"<P><B>%s</B>" % title
255 blurb += u'<P><B><A HREF="%s">%s</A></B>' % (link, title)
257 pubdate = self.find_pubdate(item)
258 if pubdate is not None:
259 pubdate = self.munge_pubdate(pubdate)
261 blurb += u" <FONT COLOR=#cccccc>%s</FONT>" % (
262 ts.strftime("%b %d")
265 if description is not None and self.item_is_interesting_for_article(
266 title, description, item
271 longblurb += description
272 longblurb += u"</DIV>"
273 longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
274 self.details.add(longblurb)