5 from typing import List, Dict
7 import xml.etree.ElementTree as ET
9 import generic_news_rss_renderer as gnrss
12 logger = logging.getLogger(__name__)
15 class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer):
16 """Read the Bellevue Reporter's RSS feed."""
20 name_to_timeout_dict: Dict[str, int],
25 super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title)
27 def get_headlines_page_prefix(self) -> str:
28 return "bellevue-reporter"
30 def get_details_page_prefix(self) -> str:
31 return "bellevue-reporter-details"
33 def should_use_https(self) -> bool:
36 def munge_description(self, description: str, item: ET.Element) -> str:
37 description = re.sub("<[^>]+>", "", description)
39 "Bellevue\s+Reporter\s+Bellevue\s+Reporter", "", description
41 description = re.sub("\s*\-\s*Your local homepage\.\s*", "", description)
42 description = re.sub("[Ww]ire [Ss]ervice", "", description)
46 def looks_like_football(title: str, description: str) -> bool:
48 title.find("NFL") != -1
49 or re.search("[Ll]ive [Ss]tream", title) is not None
50 or re.search("[Ll]ive[Ss]tream", title) is not None
51 or re.search("[Ll]ive [Ss]tream", description) is not None
55 def looks_like_review(title: str, description: str) -> bool:
56 return "review" in title or "Review" in title
59 def looks_like_spam(title: str, description: str) -> bool:
61 description is not None
64 "marketplace" in description
65 or "national-marketplace" in description
66 or re.search("[Ww]eed", title) is not None
67 or re.search("[Cc]annabis", title) is not None
68 or re.search("[Cc]annabis", description) is not None
70 or re.search("[Tt]op.[Rr]ated", title) is not None
71 or re.search("[Ll]ose [Ww]eight", title) is not None
72 or re.search("[Ll]ose [Ww]eight", description) is not None
77 def looks_very_boring(title: str, description: str) -> bool:
78 return description.lower() in title.lower()
80 def item_is_interesting_for_headlines(
81 self, title: str, description: str, item: xml.etree.ElementTree.Element
83 unfiltered_description = item.findtext("description")
84 if self.is_item_older_than_n_days(item, 10):
85 logger.info(f"{title}: is too old!")
87 if bellevue_reporter_rss_renderer.looks_like_spam(
88 title, unfiltered_description
90 logger.debug(f"{title}: looks like spam")
92 if bellevue_reporter_rss_renderer.looks_like_football(title, description):
93 logger.debug(f"{title}: looks like it's about football.")
95 if bellevue_reporter_rss_renderer.looks_like_review(title, description):
96 logger.debug(f"{title}: looks like a review.")
98 if bellevue_reporter_rss_renderer.looks_very_boring(title, description):
99 logger.debug(f"{title}: looks very boring.")
103 def item_is_interesting_for_article(
104 self, title: str, description: str, item: xml.etree.ElementTree.Element
106 unfiltered_description = item.findtext("description")
107 if self.is_item_older_than_n_days(item, 10):
108 logger.debug(f"{title}: is too old!")
110 if bellevue_reporter_rss_renderer.looks_like_spam(
111 title, unfiltered_description
113 logger.debug(f"{title}: looks like spam")
115 if bellevue_reporter_rss_renderer.looks_like_football(title, description):
116 logger.debug(f"{title}: looks like it's about football.")
118 if bellevue_reporter_rss_renderer.looks_like_review(title, description):
119 logger.debug(f"{title}: looks like a review.")
125 # x = bellevue_reporter_rss_renderer(
127 # "Shuffle News" : 1},
128 # "www.bellevuereporter.com",
132 # <DIV style="padding:8px;
134 # -webkit-column-break-inside:avoid;"><P>
135 # <B>Task force will tackle issues of racial justice, police reform</B>
136 # <BR>Bellevue Reporter
137 # Bellevue Reporter - Your local homepage.
138 # Inslee names civil rights activists, pastors, and cops to panel that may forge ideas f#or new laws Task force will tackle issues of racial justice, police reform
141 # d = x.munge_description(d)
143 # if x.fetch_news() == 0:
144 # print("Error fetching news, no items fetched.")