X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;ds=inline;f=generic_news_rss_renderer.py;h=798c06c90962d001a87ed93f1a9a2114a3ccbc88;hb=72938579d41b01fe93f18b97605bcb7dfd147d4b;hp=b87ab05caa49ed646ecff87266cfca482576aab5;hpb=4b1f3d8a8b278ca6d62f461ea80c8ea21080c301;p=kiosk.git diff --git a/generic_news_rss_renderer.py b/generic_news_rss_renderer.py index b87ab05..798c06c 100644 --- a/generic_news_rss_renderer.py +++ b/generic_news_rss_renderer.py @@ -1,7 +1,9 @@ +import datetime +from dateutil.parser import parse import file_writer import grab_bag import renderer -import httplib +import http.client import page_builder import profanity_filter import random @@ -10,7 +12,8 @@ import xml.etree.ElementTree as ET class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title): - super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False) + super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, + False) self.debug = 1 self.feed_site = feed_site self.feed_uris = feed_uris @@ -28,6 +31,12 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): def get_details_page_prefix(self): pass + def get_headlines_page_priority(self): + return "4" + + def get_details_page_priority(self): + return "6" + def should_use_https(self): pass @@ -50,14 +59,37 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): def find_link(self, item): return item.findtext('link') + def munge_link(self, link): + return link + def find_image(self, item): return item.findtext('image') + def munge_image(self, image): + return image + + def find_pubdate(self, item): + return item.findtext('pubDate') + + def munge_pubdate(self, pubdate): + return pubdate + def item_is_interesting_for_headlines(self, title, description, item): - pass + return True + + def is_item_older_than_n_days(self, item, n): + pubdate = self.find_pubdate(item) + if pubdate is not None: + pubdate = parse(pubdate) + tzinfo = pubdate.tzinfo + now = datetime.datetime.now(tzinfo) + delta = (now - pubdate).total_seconds() / (60 * 60 * 24) + if (delta > n): + return True + return False def item_is_interesting_for_article(self, title, description, item): - pass + return True def periodic_render(self, key): if key == "Fetch News": @@ -77,13 +109,50 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): return False for msg in subset: headlines.add_item(msg) - f = file_writer.file_writer('%s_4_none.html' % ( - self.get_headlines_page_prefix())) + headlines.set_custom_html(""" +""") + f = file_writer.file_writer('%s_%s_none.html' % ( + self.get_headlines_page_prefix(), + self.get_headlines_page_priority())) headlines.render_html(f) f.close() details = page_builder.page_builder() details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM) + details.set_custom_html(""" +""") details.set_title("%s" % self.page_title) subset = self.details.subset(1) if subset is None: @@ -91,10 +160,11 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): return False for msg in subset: blurb = msg - blurb += "\n" + blurb += u'' details.add_item(blurb) - g = file_writer.file_writer('%s_6_none.html' % ( - self.get_details_page_prefix())) + g = file_writer.file_writer('%s_%s_none.html' % ( + self.get_details_page_prefix(), + self.get_details_page_priority())) details.render_html(g) g.close() return True @@ -107,10 +177,10 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): for uri in self.feed_uris: if self.should_use_https(): self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri)) - self.conn = httplib.HTTPSConnection(self.feed_site) + self.conn = http.client.HTTPSConnection(self.feed_site) else: self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri)) - self.conn = httplib.HTTPConnection(self.feed_site) + self.conn = http.client.HTTPConnection(self.feed_site) self.conn.request( "GET", uri, @@ -118,8 +188,8 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): {"Accept-Charset": "utf-8"}) response = self.conn.getresponse() if response.status != 200: - print("%s: RSS fetch_news error, response: %d" % (self.page_title, - response.status)) + print(("%s: RSS fetch_news error, response: %d" % (self.page_title, + response.status))) self.debug_print(response.read()) return False @@ -132,8 +202,12 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): description = item.findtext('description') if description is not None: description = self.munge_description(description) + image = self.find_image(item) + if image is not None: + image = self.munge_image(image) link = item.findtext('link') - image = item.findtext('image') + if link is not None: + link = self.munge_link(link) if (title is None or not self.item_is_interesting_for_headlines(title, @@ -148,41 +222,39 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): self.debug_print('Found bad words in item "%s"' % title) continue - #print u"Title: %s\nDescription: %s\nLink: %s\nImage: %s\n" % ( - # title, description, link, image) - blurb = u"""
%s' % title + blurb += u'' + + if link is None: + blurb += u'
%s' % title + else: + blurb += u'
%s' % (link, title)
+
+ pubdate = self.find_pubdate(item)
+ if pubdate is not None:
+ pubdate = self.munge_pubdate(pubdate)
+ ts = parse(pubdate)
+ blurb += u" %s" % (
+ ts.strftime("%b %d"))
if (description is not None and
- self.item_is_interesting_for_article(title, description, item)):
+ self.item_is_interesting_for_article(title,
+ description,
+ item)):
longblurb = blurb
- longblurb += "
"
+
+ longblurb += u"
"
longblurb += description
- longblurb += "