RSS renderers.
return description
def item_is_interesting_for_headlines(self, title, description, item):
- return not self.is_item_older_than_n_days(item, 10)
+ if self.is_item_older_than_n_days(item, 10):
+ self.debug_print("%s: is too old!" % title)
+ return False
+ return True
def item_is_interesting_for_article(self, title, description, item):
- return not self.is_item_older_than_n_days(item, 10)
+ if self.is_item_older_than_n_days(item, 10):
+ self.debug_print("%s: is too old!" % title)
+ return False
+ return True
# Test
#x = bellevue_reporter_rss_renderer(
return False
def item_is_interesting_for_headlines(self, title, description, item):
- if self.is_item_older_than_n_days(item, 7):
+ if self.is_item_older_than_n_days(item, 14):
+ self.debug_print("%s: is too old!" % title)
return False
return re.search(r'[Cc][Nn][Nn][A-Za-z]*\.com', title) is None
def item_is_interesting_for_article(self, title, description, item):
if self.is_item_older_than_n_days(item, 7):
+ self.debug_print("%s: is too old!" % title)
return False
return (re.search(r'[Cc][Nn][Nn][A-Za-z]*\.com', title) is None and
len(description) >= 65)
--- /dev/null
+from bs4 import BeautifulSoup
+import generic_news_rss_renderer
+import re
+
+class google_news_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer):
+ def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
+ super(google_news_rss_renderer, self).__init__(
+ name_to_timeout_dict,
+ feed_site,
+ feed_uris,
+ page_title)
+ self.debug = 1
+
+ def debug_prefix(self):
+ return "google-news"
+
+ def get_headlines_page_prefix(self):
+ return "google-news"
+
+ def get_details_page_prefix(self):
+ return "google-news-details"
+
+ def find_description(self, item):
+ descr = item.findtext('description')
+ source = item.findtext('source')
+ if source is not None:
+ descr = descr + " (%s)" % source
+ return descr
+
+ def munge_description(self, description):
+ soup = BeautifulSoup(description)
+ for a in soup.findAll('a'):
+ del a['href']
+ return str(soup)
+
+ def find_image(self, item):
+ return None
+
+ def should_use_https(self):
+ return True
+
+ def item_is_interesting_for_headlines(self, title, description, item):
+ return not self.is_item_older_than_n_days(item, 2)
+
+ def item_is_interesting_for_article(self, title, description, item):
+ return not self.is_item_older_than_n_days(item, 2)
+
+# Test
+#x = google_news_rss_renderer(
+# {"Fetch News" : 1,
+# "Shuffle News" : 1},
+# "news.google.com",
+# [ "/rss?hl=en-US&gl=US&ceid=US:en" ],
+# "Test" )
+#if x.fetch_news() == 0:
+# print("Error fetching news, no items fetched.")
+#x.shuffle_news()
return True
def item_is_interesting_for_headlines(self, title, description, item):
- return not self.is_item_older_than_n_days(item, 10)
+ if self.is_item_older_than_n_days(item, 10):
+ self.debug_print("%s: is too old!" % title)
+ return False
+ return True
def item_is_interesting_for_article(self, title, description, item):
- return not self.is_item_older_than_n_days(item, 10)
+ if self.is_item_older_than_n_days(item, 10):
+ self.debug_print("%s: is too old!" % title)
+ return False
+ return True
# Test
#x = mynorthwest_rss_renderer(
tries_per_key[key] = 0
if tries_per_key[key] >= 3:
- print('renderer: Too many failures/retries for "%s.%s", ' +
- ', giving up for now' % (self.get_name(), key))
+ print('renderer: Too many failures for "%s.%s", giving up' % (
+ self.get_name(), key))
keys_to_skip.add(key)
else:
msg = 'renderer: executing "%s.%s"' % (self.get_name(), key)
import cnn_rss_renderer
import gdata_oauth
import gcal_renderer
+import google_news_rss_renderer
import gkeep_renderer
import health_renderer
import local_photos_mirror_renderer
"mynorthwest.com",
[ "/feed/" ],
"MyNorthwest News" ),
- cnn_rss_renderer.cnn_rss_renderer(
- {"Fetch News" : (hours * 1),
- "Shuffle News" : (always)},
- "rss.cnn.com",
- [ "/rss/money_latest.rss",
- "/rss/money_mostpopular.rss",
- "/rss/money_news_economy.rss",
- "/rss/money_news_companies.rss" ],
- "CNNMoney" ),
cnn_rss_renderer.cnn_rss_renderer(
{"Fetch News" : (hours * 1),
"Shuffle News" : (always)},
[ "/rss/RSSMarketsMain.xml",
"/rss/WSJcomUSBusiness.xml"],
"WSJBusiness" ),
+ google_news_rss_renderer.google_news_rss_renderer(
+ {"Fetch News" : (minutes * 30),
+ "Shuffle News" : (always)},
+ "news.google.com",
+ [ "/rss?hl=en-US&gl=US&ceid=US:en" ],
+ "Google News" ),
health_renderer.periodic_health_renderer(
{"Update Perioidic Job Health" : (seconds * 45)}),
stock_renderer.stock_quote_renderer(
self.debug_print("Item.tag isn't item?!")
return False
if self.is_item_older_than_n_days(item, 14):
+ self.debug_print("%s: is too old!" % title)
return False
details = {}
interesting = True
if not interesting:
return False
-
- if 'enclosure' in details:
- if 'pubDate' in details:
- x = details['pubDate']
- x = x.rsplit(' ', 1)[0]
- # Fri, 13 Nov 2015 10:07:00
- dt = datetime.datetime.strptime(x, '%a, %d %b %Y %H:%M:%S')
- if dt < self.oldest:
- self.debug_print("%s is too old." % (
- details["pubDate"]))
- return False
return True
def item_is_interesting_for_article(self, title, description, item):
if self.is_item_older_than_n_days(item, 14):
+ self.debug_print("%s: is too old!" % title)
return False
return len(description) >= 65
def item_is_interesting_for_headlines(self, title, description, item):
if self.is_item_older_than_n_days(item, 7):
+ self.debug_print("%s: is too old!" % title)
return False
return ("WSJ.com" not in title and
"WSJ.com" not in description)
def item_is_interesting_for_article(self, title, description, item):
if self.is_item_older_than_n_days(item, 7):
+ self.debug_print("%s: is too old!" % title)
return False
return ("WSJ.com" not in title and
"WSJ.com" not in description)