X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=generic_news_rss_renderer.py;h=798c06c90962d001a87ed93f1a9a2114a3ccbc88;hb=72938579d41b01fe93f18b97605bcb7dfd147d4b;hp=b87ab05caa49ed646ecff87266cfca482576aab5;hpb=4b1f3d8a8b278ca6d62f461ea80c8ea21080c301;p=kiosk.git diff --git a/generic_news_rss_renderer.py b/generic_news_rss_renderer.py index b87ab05..798c06c 100644 --- a/generic_news_rss_renderer.py +++ b/generic_news_rss_renderer.py @@ -1,7 +1,9 @@ +import datetime +from dateutil.parser import parse import file_writer import grab_bag import renderer -import httplib +import http.client import page_builder import profanity_filter import random @@ -10,7 +12,8 @@ import xml.etree.ElementTree as ET class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title): - super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False) + super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, + False) self.debug = 1 self.feed_site = feed_site self.feed_uris = feed_uris @@ -28,6 +31,12 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): def get_details_page_prefix(self): pass + def get_headlines_page_priority(self): + return "4" + + def get_details_page_priority(self): + return "6" + def should_use_https(self): pass @@ -50,14 +59,37 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): def find_link(self, item): return item.findtext('link') + def munge_link(self, link): + return link + def find_image(self, item): return item.findtext('image') + def munge_image(self, image): + return image + + def find_pubdate(self, item): + return item.findtext('pubDate') + + def munge_pubdate(self, pubdate): + return pubdate + def item_is_interesting_for_headlines(self, title, description, item): - pass + return True + + def is_item_older_than_n_days(self, item, n): + pubdate = self.find_pubdate(item) + if pubdate is not None: + pubdate = parse(pubdate) + tzinfo = pubdate.tzinfo + now = datetime.datetime.now(tzinfo) + delta = (now - pubdate).total_seconds() / (60 * 60 * 24) + if (delta > n): + return True + return False def item_is_interesting_for_article(self, title, description, item): - pass + return True def periodic_render(self, key): if key == "Fetch News": @@ -77,13 +109,50 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): return False for msg in subset: headlines.add_item(msg) - f = file_writer.file_writer('%s_4_none.html' % ( - self.get_headlines_page_prefix())) + headlines.set_custom_html(""" +""") + f = file_writer.file_writer('%s_%s_none.html' % ( + self.get_headlines_page_prefix(), + self.get_headlines_page_priority())) headlines.render_html(f) f.close() details = page_builder.page_builder() details.set_layout(page_builder.page_builder.LAYOUT_ONE_ITEM) + details.set_custom_html(""" +""") details.set_title("%s" % self.page_title) subset = self.details.subset(1) if subset is None: @@ -91,10 +160,11 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): return False for msg in subset: blurb = msg - blurb += "\n" + blurb += u'' details.add_item(blurb) - g = file_writer.file_writer('%s_6_none.html' % ( - self.get_details_page_prefix())) + g = file_writer.file_writer('%s_%s_none.html' % ( + self.get_details_page_prefix(), + self.get_details_page_priority())) details.render_html(g) g.close() return True @@ -107,10 +177,10 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): for uri in self.feed_uris: if self.should_use_https(): self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri)) - self.conn = httplib.HTTPSConnection(self.feed_site) + self.conn = http.client.HTTPSConnection(self.feed_site) else: self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri)) - self.conn = httplib.HTTPConnection(self.feed_site) + self.conn = http.client.HTTPConnection(self.feed_site) self.conn.request( "GET", uri, @@ -118,8 +188,8 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): {"Accept-Charset": "utf-8"}) response = self.conn.getresponse() if response.status != 200: - print("%s: RSS fetch_news error, response: %d" % (self.page_title, - response.status)) + print(("%s: RSS fetch_news error, response: %d" % (self.page_title, + response.status))) self.debug_print(response.read()) return False @@ -132,8 +202,12 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): description = item.findtext('description') if description is not None: description = self.munge_description(description) + image = self.find_image(item) + if image is not None: + image = self.munge_image(image) link = item.findtext('link') - image = item.findtext('image') + if link is not None: + link = self.munge_link(link) if (title is None or not self.item_is_interesting_for_headlines(title, @@ -148,41 +222,39 @@ class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer): self.debug_print('Found bad words in item "%s"' % title) continue - #print u"Title: %s\nDescription: %s\nLink: %s\nImage: %s\n" % ( - # title, description, link, image) - blurb = u"""
""" if image is not None: - blurb += '\n' % image - blurb += '

%s' % title + blurb += u'' + + if link is None: + blurb += u'

%s' % title + else: + blurb += u'

%s' % (link, title) + + pubdate = self.find_pubdate(item) + if pubdate is not None: + pubdate = self.munge_pubdate(pubdate) + ts = parse(pubdate) + blurb += u" %s" % ( + ts.strftime("%b %d")) if (description is not None and - self.item_is_interesting_for_article(title, description, item)): + self.item_is_interesting_for_article(title, + description, + item)): longblurb = blurb - longblurb += "
" + + longblurb += u"
" longblurb += description - longblurb += "

" + longblurb += u"" longblurb = longblurb.replace("font-size:34pt", "font-size:44pt") - self.details.add(longblurb.encode('utf-8', errors='ignore')) + self.details.add(longblurb) - blurb += "" - self.news.add(blurb.encode('utf-8', errors='ignore')) + blurb += u"" + self.news.add(blurb) count += 1 return count > 0 - -# Test -#x = generic_news_rss_renderer( -# {"Fetch News" : 1, -# "Shuffle News" : 1}, -# "rss.cnn.com", -# [ "/rss/generic_news_topstories.rss", -# "/rss/money_latest.rss", -# "/rss/generic_news_tech.rss", -# ], -# "Test" ) -#if x.fetch_news() == 0: -# print "Error fetching news, no items fetched." -#x.shuffle_news()