projects
/
kiosk.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
f2f05b2
)
Cleanup and improve the RSS stuff.
author
Scott Gasch
<
[email protected]
>
Tue, 7 Jul 2020 22:46:10 +0000
(15:46 -0700)
committer
Scott Gasch
<
[email protected]
>
Tue, 7 Jul 2020 22:46:10 +0000
(15:46 -0700)
bellevue_reporter_rss_renderer.py
patch
|
blob
|
history
cnn_rss_renderer.py
patch
|
blob
|
history
generic_news_rss_renderer.py
patch
|
blob
|
history
mynorthwest_rss_renderer.py
patch
|
blob
|
history
seattletimes_rss_renderer.py
patch
|
blob
|
history
wsj_rss_renderer.py
patch
|
blob
|
history
diff --git
a/bellevue_reporter_rss_renderer.py
b/bellevue_reporter_rss_renderer.py
index f630aeee76002ad0bf2a8359a79e9582cee8f9d3..c94bbc009daa5ff40c8edc31eddbb621275e5f0d 100644
(file)
--- a/
bellevue_reporter_rss_renderer.py
+++ b/
bellevue_reporter_rss_renderer.py
@@
-30,10
+30,10
@@
class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer):
return description
def item_is_interesting_for_headlines(self, title, description, item):
return description
def item_is_interesting_for_headlines(self, title, description, item):
- return
True
+ return
not self.is_item_older_than_n_days(item, 10)
def item_is_interesting_for_article(self, title, description, item):
def item_is_interesting_for_article(self, title, description, item):
- return
True
+ return
not self.is_item_older_than_n_days(item, 10)
# Test
#x = bellevue_reporter_rss_renderer(
# Test
#x = bellevue_reporter_rss_renderer(
diff --git
a/cnn_rss_renderer.py
b/cnn_rss_renderer.py
index a93b4917d97ac169165fe7fc21f1c16bc2ec677b..0d8a0bd9b2d5af00e5d346e96c89b4b4814231b5 100644
(file)
--- a/
cnn_rss_renderer.py
+++ b/
cnn_rss_renderer.py
@@
-20,17
+20,30
@@
class cnn_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer):
return "cnn-details-%s" % (self.page_title)
def munge_description(self, description):
return "cnn-details-%s" % (self.page_title)
def munge_description(self, description):
- description = re.sub('[Rr]ead full story for latest details.', '', description)
+ description = re.sub('[Rr]ead full story for latest details.',
+ '',
+ description)
description = re.sub('<[^>]+>', '', description)
return description
description = re.sub('<[^>]+>', '', description)
return description
+ def find_image(self, item):
+ image = item.findtext('media:thumbnail')
+ if image is not None:
+ image_url = image.get('url')
+ return image_url
+ return None
+
def should_use_https(self):
return False
def item_is_interesting_for_headlines(self, title, description, item):
def should_use_https(self):
return False
def item_is_interesting_for_headlines(self, title, description, item):
+ if self.is_item_older_than_n_days(item, 7):
+ return False
return re.search(r'[Cc][Nn][Nn][A-Za-z]*\.com', title) is None
def item_is_interesting_for_article(self, title, description, item):
return re.search(r'[Cc][Nn][Nn][A-Za-z]*\.com', title) is None
def item_is_interesting_for_article(self, title, description, item):
+ if self.is_item_older_than_n_days(item, 7):
+ return False
return (re.search(r'[Cc][Nn][Nn][A-Za-z]*\.com', title) is None and
len(description) >= 65)
return (re.search(r'[Cc][Nn][Nn][A-Za-z]*\.com', title) is None and
len(description) >= 65)
diff --git
a/generic_news_rss_renderer.py
b/generic_news_rss_renderer.py
index ec7a7a5d0e840c65a27284fbb98f9e6ea5e91dd2..21f9afea3e630003f60f5e6f105d71b7548a19e6 100644
(file)
--- a/
generic_news_rss_renderer.py
+++ b/
generic_news_rss_renderer.py
@@
-1,3
+1,5
@@
+import datetime
+from dateutil.parser import parse
import file_writer
import grab_bag
import renderer
import file_writer
import grab_bag
import renderer
@@
-10,7
+12,8
@@
import xml.etree.ElementTree as ET
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
- super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False)
+ super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict,
+ False)
self.debug = 1
self.feed_site = feed_site
self.feed_uris = feed_uris
self.debug = 1
self.feed_site = feed_site
self.feed_uris = feed_uris
@@
-50,14
+53,31
@@
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
def find_link(self, item):
return item.findtext('link')
def find_link(self, item):
return item.findtext('link')
+ def munge_link(self, link):
+ return link
+
def find_image(self, item):
return item.findtext('image')
def find_image(self, item):
return item.findtext('image')
+ def munge_image(self, image):
+ return image
+
def item_is_interesting_for_headlines(self, title, description, item):
def item_is_interesting_for_headlines(self, title, description, item):
- pass
+ return True
+
+ def is_item_older_than_n_days(self, item, n):
+ pubdate = item.findtext('pubDate')
+ if pubdate is not None:
+ pubdate = parse(pubdate)
+ tzinfo = pubdate.tzinfo
+ now = datetime.datetime.now(tzinfo)
+ delta = (now - pubdate).total_seconds() / (60 * 60 * 24)
+ if (delta > n):
+ return True
+ return False
def item_is_interesting_for_article(self, title, description, item):
def item_is_interesting_for_article(self, title, description, item):
- pass
+ return True
def periodic_render(self, key):
if key == "Fetch News":
def periodic_render(self, key):
if key == "Fetch News":
@@
-132,8
+152,12
@@
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
description = item.findtext('description')
if description is not None:
description = self.munge_description(description)
description = item.findtext('description')
if description is not None:
description = self.munge_description(description)
+ image = self.find_image(item)
+ if image is not None:
+ image = self.munge_image(image)
link = item.findtext('link')
link = item.findtext('link')
- image = item.findtext('image')
+ if link is not None:
+ link = self.munge_link(link)
if (title is None or
not self.item_is_interesting_for_headlines(title,
if (title is None or
not self.item_is_interesting_for_headlines(title,
@@
-155,11
+179,18
@@
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
font-size:34pt;
-webkit-column-break-inside:avoid;">"""
if image is not None:
font-size:34pt;
-webkit-column-break-inside:avoid;">"""
if image is not None:
- blurb += u'<IMG SRC="%s" ALIGN=LEFT HEIGHT=115 style="padding:8px;">' % image
- blurb += u'<P><B>%s</B>' % title
+ blurb += u'<IMG SRC="%s" ALIGN=LEFT HEIGHT=115 ' % image
+ blurb += u'style="padding:8px;">'
+
+ if link is None:
+ blurb += u'<P><B>%s</B>' % title
+ else:
+ blurb += u'<P><B><A HREF="%s">%s</A></B>' % (link, title)
if (description is not None and
if (description is not None and
- self.item_is_interesting_for_article(title, description, item)):
+ self.item_is_interesting_for_article(title,
+ description,
+ item)):
longblurb = blurb
longblurb += u"<BR>"
longblurb += description
longblurb = blurb
longblurb += u"<BR>"
longblurb += description
diff --git
a/mynorthwest_rss_renderer.py
b/mynorthwest_rss_renderer.py
index 38bcd28dd7698675503c379355ad90849fc801e1..fd7a6a795fd348bb2a4f9e43e30605cc1eaaa924 100644
(file)
--- a/
mynorthwest_rss_renderer.py
+++ b/
mynorthwest_rss_renderer.py
@@
-18,14
+18,21
@@
class mynorthwest_rss_renderer(generic_news_rss_renderer.generic_news_rss_render
def get_details_page_prefix(self):
return "mynorthwest-details-%s" % (self.page_title)
def get_details_page_prefix(self):
return "mynorthwest-details-%s" % (self.page_title)
+ def find_image(self, item):
+ image = item.findtext('media:content')
+ if image is not None:
+ image_url = image.get('url')
+ return image_url
+ return None
+
def should_use_https(self):
return True
def item_is_interesting_for_headlines(self, title, description, item):
def should_use_https(self):
return True
def item_is_interesting_for_headlines(self, title, description, item):
- return
True
+ return
not self.is_item_older_than_n_days(item, 10)
def item_is_interesting_for_article(self, title, description, item):
def item_is_interesting_for_article(self, title, description, item):
- return
True
+ return
not self.is_item_older_than_n_days(item, 10)
# Test
#x = mynorthwest_rss_renderer(
# Test
#x = mynorthwest_rss_renderer(
diff --git
a/seattletimes_rss_renderer.py
b/seattletimes_rss_renderer.py
index c8d12ce17d6bcadef5a79c645f0b2cdae1121df9..4d02008b7a2aac075b70d4ea28c2a28d0e5c9ae6 100644
(file)
--- a/
seattletimes_rss_renderer.py
+++ b/
seattletimes_rss_renderer.py
@@
-26,8
+26,6
@@
class seattletimes_rss_renderer(gnrss.generic_news_rss_renderer):
feed_site,
feed_uris,
page_title)
feed_site,
feed_uris,
page_title)
- self.oldest = datetime.datetime.now() - datetime.timedelta(14)
- self.debug_print("oldest story we'll keep: %s" % self.oldest)
def debug_prefix(self):
return "seattletimes"
def debug_prefix(self):
return "seattletimes"
@@
-45,6
+43,8
@@
class seattletimes_rss_renderer(gnrss.generic_news_rss_renderer):
if item.tag != "item":
self.debug_print("Item.tag isn't item?!")
return False
if item.tag != "item":
self.debug_print("Item.tag isn't item?!")
return False
+ if self.is_item_older_than_n_days(item, 14):
+ return False
details = {}
for detail in item.getchildren():
details = {}
for detail in item.getchildren():
@@
-79,6
+79,8
@@
class seattletimes_rss_renderer(gnrss.generic_news_rss_renderer):
return True
def item_is_interesting_for_article(self, title, description, item):
return True
def item_is_interesting_for_article(self, title, description, item):
+ if self.is_item_older_than_n_days(item, 14):
+ return False
return len(description) >= 65
#x = seattletimes_rss_renderer({"Test", 123},
return len(description) >= 65
#x = seattletimes_rss_renderer({"Test", 123},
diff --git
a/wsj_rss_renderer.py
b/wsj_rss_renderer.py
index 8e2b0ccbe2dede11b3c82d73ed45b9e4dde1d005..aa56fddad51abc4e68a2961e23db93a5d7e1f3b9 100644
(file)
--- a/
wsj_rss_renderer.py
+++ b/
wsj_rss_renderer.py
@@
-18,14
+18,25
@@
class wsj_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer):
def get_details_page_prefix(self):
return "wsj-details-%s" % (self.page_title)
def get_details_page_prefix(self):
return "wsj-details-%s" % (self.page_title)
+ def find_image(self, item):
+ image = item.findtext('image')
+ if image is not None:
+ url = image.get('url')
+ return url
+ return None
+
def should_use_https(self):
return True
def item_is_interesting_for_headlines(self, title, description, item):
def should_use_https(self):
return True
def item_is_interesting_for_headlines(self, title, description, item):
+ if self.is_item_older_than_n_days(item, 7):
+ return False
return ("WSJ.com" not in title and
"WSJ.com" not in description)
def item_is_interesting_for_article(self, title, description, item):
return ("WSJ.com" not in title and
"WSJ.com" not in description)
def item_is_interesting_for_article(self, title, description, item):
+ if self.is_item_older_than_n_days(item, 7):
+ return False
return ("WSJ.com" not in title and
"WSJ.com" not in description)
return ("WSJ.com" not in title and
"WSJ.com" not in description)