import constants import file_writer import grab_bag import renderer import datetime import http.client import page_builder import profanity_filter import random import re import sets import xml.etree.ElementTree as ET class reuters_rss_renderer(renderer.debuggable_abstaining_renderer): def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page): super(reuters_rss_renderer, self).__init__(name_to_timeout_dict, False) self.debug = 1 self.feed_site = feed_site self.feed_uris = feed_uris self.page = page self.news = grab_bag.grab_bag() self.details = grab_bag.grab_bag() self.filter = profanity_filter.profanity_filter() def debug_prefix(self): return "reuters(%s)" % (self.page) def periodic_render(self, key): if key == "Fetch News": return self.fetch_news() elif key == "Shuffle News": return self.shuffle_news() else: raise error("Unexpected operation") def shuffle_news(self): headlines = page_builder.page_builder() headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS) headlines.set_title("%s" % self.page) subset = self.news.subset(4) if subset is None: self.debug_print("Not enough messages to choose from.") return False for msg in subset: headlines.add_item(msg) f = file_writer.file_writer("reuters-%s_4_none.html" % self.page) headlines.render_html(f) f.close() details = page_builder.page_builder() details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM) details.set_title("%s" % self.page) subset = self.details.subset(1) if subset is None: self.debug_print("Not enough details to choose from.") return False for msg in subset: blurb = msg blurb += "\n" details.add_item(blurb) g = file_writer.file_writer("reuters-details-%s_6_none.html" % self.page) details.render_html(g) g.close() return True def fetch_news(self): count = 0 self.news.clear() self.details.clear() oldest = datetime.datetime.now() - datetime.timedelta(14) for uri in self.feed_uris: self.conn = http.client.HTTPConnection(self.feed_site) self.conn.request("GET", uri, None, {"Accept-Charset": "utf-8"}) response = self.conn.getresponse() if response.status != 200: print( ( "%s: RSS fetch_news error, response: %d" % (self.page, response.status) ) ) self.debug_print(response.read()) return False rss = ET.fromstring(response.read()) channel = rss[0] for item in channel.getchildren(): title = item.findtext("title") if ( title is None or "euters" in title or title == "Editor's Choice" or self.filter.contains_bad_words(title) ): continue pubdate = item.findtext("pubDate") image = item.findtext("image") descr = item.findtext("description") if descr is not None: descr = re.sub("<[^>]+>", "", descr) blurb = """
%s" % title
if pubdate != None:
# Thu, 04 Jun 2015 08:16:35 GMT|-0400
pubdate = pubdate.rsplit(" ", 1)[0]
dt = datetime.datetime.strptime(pubdate, "%a, %d %b %Y %H:%M:%S")
if dt < oldest:
continue
blurb += dt.strftime(
" (%a %b %d)"
)
if descr is not None:
longblurb = blurb
longblurb += "
"
longblurb += descr
longblurb += "