import constants import file_writer import grab_bag import renderer import datetime import http.client import page_builder import profanity_filter import random import re import sets import xml.etree.ElementTree as ET class reuters_rss_renderer(renderer.debuggable_abstaining_renderer): def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page): super(reuters_rss_renderer, self).__init__(name_to_timeout_dict, False) self.debug = 1 self.feed_site = feed_site self.feed_uris = feed_uris self.page = page self.news = grab_bag.grab_bag() self.details = grab_bag.grab_bag() self.filter = profanity_filter.profanity_filter() def debug_prefix(self): return "reuters(%s)" % (self.page) def periodic_render(self, key): if key == "Fetch News": return self.fetch_news() elif key == "Shuffle News": return self.shuffle_news() else: raise error("Unexpected operation") def shuffle_news(self): headlines = page_builder.page_builder() headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS) headlines.set_title("%s" % self.page) subset = self.news.subset(4) if subset is None: self.debug_print("Not enough messages to choose from.") return False for msg in subset: headlines.add_item(msg) f = file_writer.file_writer("reuters-%s_4_none.html" % self.page) headlines.render_html(f) f.close() details = page_builder.page_builder() details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM) details.set_title("%s" % self.page) subset = self.details.subset(1) if subset is None: self.debug_print("Not enough details to choose from.") return False for msg in subset: blurb = msg blurb += "\n" details.add_item(blurb) g = file_writer.file_writer("reuters-details-%s_6_none.html" % self.page) details.render_html(g) g.close() return True def fetch_news(self): count = 0 self.news.clear() self.details.clear() oldest = datetime.datetime.now() - datetime.timedelta(14) for uri in self.feed_uris: self.conn = http.client.HTTPConnection(self.feed_site) self.conn.request("GET", uri, None, {"Accept-Charset": "utf-8"}) response = self.conn.getresponse() if response.status != 200: print( ( "%s: RSS fetch_news error, response: %d" % (self.page, response.status) ) ) self.debug_print(response.read()) return False rss = ET.fromstring(response.read()) channel = rss[0] for item in channel.getchildren(): title = item.findtext("title") if ( title is None or "euters" in title or title == "Editor's Choice" or self.filter.contains_bad_words(title) ): continue pubdate = item.findtext("pubDate") image = item.findtext("image") descr = item.findtext("description") if descr is not None: descr = re.sub("<[^>]+>", "", descr) blurb = """
""" if image is not None: blurb += ( '\n' % image ) blurb += "

%s" % title if pubdate != None: # Thu, 04 Jun 2015 08:16:35 GMT|-0400 pubdate = pubdate.rsplit(" ", 1)[0] dt = datetime.datetime.strptime(pubdate, "%a, %d %b %Y %H:%M:%S") if dt < oldest: continue blurb += dt.strftime( " (%a %b %d)" ) if descr is not None: longblurb = blurb longblurb += "
" longblurb += descr longblurb += "

" longblurb = longblurb.replace("font-size:34pt", "font-size:44pt") self.details.add(longblurb.encode("utf8")) blurb += "" self.news.add(blurb.encode("utf8")) count += 1 return count > 0