import constants import file_writer import grab_bag import renderer import datetime import http.client import page_builder import profanity_filter import random import re import sets import xml.etree.ElementTree as ET class reuters_rss_renderer(renderer.debuggable_abstaining_renderer): def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page): super(reuters_rss_renderer, self).__init__(name_to_timeout_dict, False) self.debug = 1 self.feed_site = feed_site self.feed_uris = feed_uris self.page = page self.news = grab_bag.grab_bag() self.details = grab_bag.grab_bag() self.filter = profanity_filter.profanity_filter() def debug_prefix(self): return "reuters(%s)" % (self.page) def periodic_render(self, key): if key == "Fetch News": return self.fetch_news() elif key == "Shuffle News": return self.shuffle_news() else: raise error('Unexpected operation') def shuffle_news(self): headlines = page_builder.page_builder() headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS) headlines.set_title("%s" % self.page) subset = self.news.subset(4) if subset is None: self.debug_print("Not enough messages to choose from.") return False for msg in subset: headlines.add_item(msg) f = file_writer.file_writer('reuters-%s_4_none.html' % self.page) headlines.render_html(f) f.close() details = page_builder.page_builder() details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM) details.set_title("%s" % self.page) subset = self.details.subset(1) if subset is None: self.debug_print("Not enough details to choose from."); return False for msg in subset: blurb = msg blurb += "\n" details.add_item(blurb) g = file_writer.file_writer('reuters-details-%s_6_none.html' % self.page) details.render_html(g) g.close() return True def fetch_news(self): count = 0 self.news.clear() self.details.clear() oldest = datetime.datetime.now() - datetime.timedelta(14) for uri in self.feed_uris: self.conn = http.client.HTTPConnection(self.feed_site) self.conn.request( "GET", uri, None, {"Accept-Charset": "utf-8"}) response = self.conn.getresponse() if response.status != 200: print(("%s: RSS fetch_news error, response: %d" % (self.page, response.status))) self.debug_print(response.read()) return False rss = ET.fromstring(response.read()) channel = rss[0] for item in channel.getchildren(): title = item.findtext('title') if (title is None or "euters" in title or title == "Editor's Choice" or self.filter.contains_bad_words(title)): continue pubdate = item.findtext('pubDate') image = item.findtext('image') descr = item.findtext('description') if descr is not None: descr = re.sub('<[^>]+>', '', descr) blurb = """
%s' % title
if pubdate != None:
# Thu, 04 Jun 2015 08:16:35 GMT|-0400
pubdate = pubdate.rsplit(' ', 1)[0]
dt = datetime.datetime.strptime(pubdate,
'%a, %d %b %Y %H:%M:%S')
if dt < oldest:
continue
blurb += dt.strftime(" (%a %b %d)")
if descr is not None:
longblurb = blurb
longblurb += "
"
longblurb += descr
longblurb += "