-import generic_news_rss_renderer as gnrss
+#!/usr/bin/env python3
+
+import logging
import re
+from typing import List, Dict
+import xml
+import xml.etree.ElementTree as ET
+
+import generic_news_rss_renderer as gnrss
+
+
+logger = logging.getLogger(__file__)
+
class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer):
- def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title):
- super(bellevue_reporter_rss_renderer, self).__init__(
- name_to_timeout_dict,
- feed_site,
- feed_uris,
- page_title)
- self.debug = 1
-
- def debug_prefix(self):
- return "bellevue_reporter(%s)" % (self.page_title)
-
- def get_headlines_page_prefix(self):
+ """Read the Bellevue Reporter's RSS feed."""
+
+ def __init__(
+ self,
+ name_to_timeout_dict: Dict[str, int],
+ feed_site: str,
+ feed_uris: List[str],
+ page_title: str,
+ ):
+ super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title)
+
+ def get_headlines_page_prefix(self) -> str:
return "bellevue-reporter"
- def get_details_page_prefix(self):
+ def get_details_page_prefix(self) -> str:
return "bellevue-reporter-details"
- def should_use_https(self):
+ def should_use_https(self) -> bool:
return True
- def munge_description(self, description):
- description = re.sub('<[^>]+>', '', description)
- description = re.sub('Bellevue\s+Reporter\s+Bellevue\s+Reporter', '',
- description)
- description = re.sub('\s*\-\s*Your local homepage\.\s*', '', description)
+ def munge_description(self, description: str, item: ET.Element) -> str:
+ description = re.sub("<[^>]+>", "", description)
+ description = re.sub(
+ "Bellevue\s+Reporter\s+Bellevue\s+Reporter", "", description
+ )
+ description = re.sub("\s*\-\s*Your local homepage\.\s*", "", description)
+ description = re.sub("[Ww]ire [Ss]ervice", "", description)
return description
- def item_is_interesting_for_headlines(self, title, description, item):
- return not self.is_item_older_than_n_days(item, 10)
+ @staticmethod
+ def looks_like_football(title: str, description: str) -> bool:
+ return (
+ title.find("NFL") != -1
+ or re.search("[Ll]ive [Ss]tream", title) is not None
+ or re.search("[Ll]ive[Ss]tream", title) is not None
+ or re.search("[Ll]ive [Ss]tream", description) is not None
+ )
+
+ @staticmethod
+ def looks_like_review(title: str, description: str) -> bool:
+ return "review" in title or "Review" in title
+
+ @staticmethod
+ def looks_like_spam(title: str, description: str) -> bool:
+ return (
+ description is not None
+ and title is not None
+ and (
+ "marketplace" in description
+ or "national-marketplace" in description
+ or re.search("[Ww]eed", title) is not None
+ or re.search("[Cc]annabis", title) is not None
+ or re.search("[Cc]annabis", description) is not None
+ or "THC" in title
+ or re.search("[Tt]op.[Rr]ated", title) is not None
+ or re.search("[Ll]ose [Ww]eight", title) is not None
+ or re.search("[Ll]ose [Ww]eight", description) is not None
+ )
+ )
+
+ @staticmethod
+ def looks_very_boring(title: str, description: str) -> bool:
+ return description in title
+
+ def item_is_interesting_for_headlines(
+ self, title: str, description: str, item: xml.etree.ElementTree.Element
+ ) -> bool:
+ unfiltered_description = item.findtext("description")
+ if self.is_item_older_than_n_days(item, 10):
+ logger.info(f"{title}: is too old!")
+ return False
+ if bellevue_reporter_rss_renderer.looks_like_spam(
+ title, unfiltered_description
+ ):
+ logger.debug(f"{title}: looks like spam")
+ return False
+ if bellevue_reporter_rss_renderer.looks_like_football(title, description):
+ logger.debug(f"{title}: looks like it's about football.")
+ return False
+ if bellevue_reporter_rss_renderer.looks_like_review(title, description):
+ logger.debug(f"{title}: looks like a review.")
+ return False
+ if bellevue_reporter_rss_renderer.looks_very_boring(title, description):
+ logger.debug(f"{title}: looks very boring.")
+ return False
+ return True
+
+ def item_is_interesting_for_article(
+ self, title: str, description: str, item: xml.etree.ElementTree.Element
+ ) -> bool:
+ unfiltered_description = item.findtext("description")
+ if self.is_item_older_than_n_days(item, 10):
+ logger.debug(f"{title}: is too old!")
+ return False
+ if bellevue_reporter_rss_renderer.looks_like_spam(
+ title, unfiltered_description
+ ):
+ logger.debug(f"{title}: looks like spam")
+ return False
+ if bellevue_reporter_rss_renderer.looks_like_football(title, description):
+ logger.debug(f"{title}: looks like it's about football.")
+ return False
+ if bellevue_reporter_rss_renderer.looks_like_review(title, description):
+ logger.debug(f"{title}: looks like a review.")
+ return False
+ return True
- def item_is_interesting_for_article(self, title, description, item):
- return not self.is_item_older_than_n_days(item, 10)
# Test
-#x = bellevue_reporter_rss_renderer(
+# x = bellevue_reporter_rss_renderer(
# {"Fetch News" : 1,
# "Shuffle News" : 1},
# "www.bellevuereporter.com",
# [ "/feed/" ],
# "Test" )
-#d = """
-#<DIV style="padding:8px;
+# d = """
+# <DIV style="padding:8px;
# font-size:44pt;
# -webkit-column-break-inside:avoid;"><P>
-#<B>Task force will tackle issues of racial justice, police reform</B>
-#<BR>Bellevue Reporter
-#Bellevue Reporter - Your local homepage.
-#Inslee names civil rights activists, pastors, and cops to panel that may forge ideas f#or new laws Task force will tackle issues of racial justice, police reform
-#Wire Service
-#</DIV>"""
-#d = x.munge_description(d)
-#print d
-#if x.fetch_news() == 0:
-# print "Error fetching news, no items fetched."
-#x.shuffle_news()
+# <B>Task force will tackle issues of racial justice, police reform</B>
+# <BR>Bellevue Reporter
+# Bellevue Reporter - Your local homepage.
+# Inslee names civil rights activists, pastors, and cops to panel that may forge ideas f#or new laws Task force will tackle issues of racial justice, police reform
+# Wire Service
+# </DIV>"""
+# d = x.munge_description(d)
+# print(d)
+# if x.fetch_news() == 0:
+# print("Error fetching news, no items fetched.")
+# x.shuffle_news()