5 from typing import List, Dict
7 import xml.etree.ElementTree as ET
9 import generic_news_rss_renderer as gnrss
12 logger = logging.getLogger(__file__)
15 class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer):
16 """Read the Bellevue Reporter's RSS feed."""
20 name_to_timeout_dict: Dict[str, int],
26 name_to_timeout_dict, feed_site, feed_uris, page_title
29 def get_headlines_page_prefix(self) -> str:
30 return "bellevue-reporter"
32 def get_details_page_prefix(self) -> str:
33 return "bellevue-reporter-details"
35 def should_use_https(self) -> bool:
38 def munge_description(self, description: str, item: ET.Element) -> str:
39 description = re.sub("<[^>]+>", "", description)
41 "Bellevue\s+Reporter\s+Bellevue\s+Reporter", "", description
43 description = re.sub("\s*\-\s*Your local homepage\.\s*", "", description)
44 description = re.sub("[Ww]ire [Ss]ervice", "", description)
48 def looks_like_football(title: str, description: str) -> bool:
50 title.find("NFL") != -1
51 or re.search("[Ll]ive [Ss]tream", title) is not None
52 or re.search("[Ll]ive[Ss]tream", title) is not None
53 or re.search("[Ll]ive [Ss]tream", description) is not None
57 def looks_like_review(title: str, description: str) -> bool:
58 return "review" in title or "Review" in title
61 def looks_like_spam(title: str, description: str) -> bool:
63 description is not None
66 'marketplace' in description
67 or 'national-marketplace' in description
68 or re.search('[Ww]eed', title) is not None
69 or re.search('[Cc]annabis', title) is not None
70 or re.search('[Cc]annabis', description) is not None
72 or re.search('[Tt]op.[Rr]ated', title) is not None
73 or re.search('[Ll]ose [Ww]eight', title) is not None
74 or re.search('[Ll]ose [Ww]eight', description) is not None
78 def item_is_interesting_for_headlines(
79 self, title: str, description: str, item: xml.etree.ElementTree.Element
81 unfiltered_description = item.findtext("description")
82 if self.is_item_older_than_n_days(item, 10):
83 logger.info(f'{title}: is too old!')
85 if bellevue_reporter_rss_renderer.looks_like_spam(title, unfiltered_description):
86 logger.debug(f'{title}: looks like spam')
88 if bellevue_reporter_rss_renderer.looks_like_football(title, description):
89 logger.debug(f'{title}: looks like it\'s about football.')
91 if bellevue_reporter_rss_renderer.looks_like_review(title, description):
92 logger.debug(f'{title}: looks like a review.')
96 def item_is_interesting_for_article(
97 self, title: str, description: str, item: xml.etree.ElementTree.Element
99 unfiltered_description = item.findtext("description")
100 if self.is_item_older_than_n_days(item, 10):
101 logger.debug(f'{title}: is too old!')
103 if bellevue_reporter_rss_renderer.looks_like_spam(title, unfiltered_description):
104 logger.debug(f'{title}: looks like spam')
106 if bellevue_reporter_rss_renderer.looks_like_football(title, description):
107 logger.debug(f'{title}: looks like it\'s about football.')
109 if bellevue_reporter_rss_renderer.looks_like_review(title, description):
110 logger.debug(f'{title}: looks like a review.')
116 # x = bellevue_reporter_rss_renderer(
118 # "Shuffle News" : 1},
119 # "www.bellevuereporter.com",
123 # <DIV style="padding:8px;
125 # -webkit-column-break-inside:avoid;"><P>
126 # <B>Task force will tackle issues of racial justice, police reform</B>
127 # <BR>Bellevue Reporter
128 # Bellevue Reporter - Your local homepage.
129 # Inslee names civil rights activists, pastors, and cops to panel that may forge ideas f#or new laws Task force will tackle issues of racial justice, police reform
132 # d = x.munge_description(d)
134 # if x.fetch_news() == 0:
135 # print("Error fetching news, no items fetched.")