8 import profanity_filter
12 import xml.etree.ElementTree as ET
15 class reuters_rss_renderer(renderer.debuggable_abstaining_renderer):
16 def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page):
17 super(reuters_rss_renderer, self).__init__(name_to_timeout_dict, False)
19 self.feed_site = feed_site
20 self.feed_uris = feed_uris
22 self.news = grab_bag.grab_bag()
23 self.details = grab_bag.grab_bag()
24 self.filter = profanity_filter.profanity_filter()
26 def debug_prefix(self):
27 return "reuters(%s)" % (self.page)
29 def periodic_render(self, key):
30 if key == "Fetch News":
31 return self.fetch_news()
32 elif key == "Shuffle News":
33 return self.shuffle_news()
35 raise error("Unexpected operation")
37 def shuffle_news(self):
38 headlines = page_builder.page_builder()
39 headlines.set_layout(page_builder.page_builder.LAYOUT_FOUR_ITEMS)
40 headlines.set_title("%s" % self.page)
41 subset = self.news.subset(4)
43 self.debug_print("Not enough messages to choose from.")
46 headlines.add_item(msg)
47 f = file_writer.file_writer("reuters-%s_4_none.html" % self.page)
48 headlines.render_html(f)
51 details = page_builder.page_builder()
52 details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM)
53 details.set_title("%s" % self.page)
54 subset = self.details.subset(1)
56 self.debug_print("Not enough details to choose from.")
61 details.add_item(blurb)
62 g = file_writer.file_writer("reuters-details-%s_6_none.html" % self.page)
63 details.render_html(g)
71 oldest = datetime.datetime.now() - datetime.timedelta(14)
73 for uri in self.feed_uris:
74 self.conn = http.client.HTTPConnection(self.feed_site)
75 self.conn.request("GET", uri, None, {"Accept-Charset": "utf-8"})
76 response = self.conn.getresponse()
77 if response.status != 200:
80 "%s: RSS fetch_news error, response: %d"
81 % (self.page, response.status)
84 self.debug_print(response.read())
87 rss = ET.fromstring(response.read())
89 for item in channel.getchildren():
90 title = item.findtext("title")
94 or title == "Editor's Choice"
95 or self.filter.contains_bad_words(title)
98 pubdate = item.findtext("pubDate")
99 image = item.findtext("image")
100 descr = item.findtext("description")
101 if descr is not None:
102 descr = re.sub("<[^>]+>", "", descr)
104 blurb = """<DIV style="padding:8px;
106 -webkit-column-break-inside:avoid;">"""
107 if image is not None:
109 '<IMG SRC="%s" ALIGN=LEFT HEIGHT=115" style="padding:8px;">\n'
112 blurb += "<P><B>%s</B>" % title
115 # Thu, 04 Jun 2015 08:16:35 GMT|-0400
116 pubdate = pubdate.rsplit(" ", 1)[0]
117 dt = datetime.datetime.strptime(pubdate, "%a, %d %b %Y %H:%M:%S")
120 blurb += dt.strftime(
121 " <FONT COLOR=#bbbbbb>(%a %b %d)</FONT>"
124 if descr is not None:
128 longblurb += "</DIV>"
129 longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
131 self.details.add(longblurb.encode("utf8"))
133 self.news.add(blurb.encode("utf8"))