#!/usr/bin/env python3 import logging import re from typing import List, Dict import xml import xml.etree.ElementTree as ET import generic_news_rss_renderer as gnrss logger = logging.getLogger(__name__) class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer): """Read the Bellevue Reporter's RSS feed.""" def __init__( self, name_to_timeout_dict: Dict[str, int], feed_site: str, feed_uris: List[str], page_title: str, ): super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title) def get_headlines_page_prefix(self) -> str: return "bellevue-reporter" def get_details_page_prefix(self) -> str: return "bellevue-reporter-details" def should_use_https(self) -> bool: return True def munge_description(self, description: str, item: ET.Element) -> str: description = re.sub("<[^>]+>", "", description) description = re.sub( "Bellevue\s+Reporter\s+Bellevue\s+Reporter", "", description ) description = re.sub("\s*\-\s*Your local homepage\.\s*", "", description) description = re.sub("[Ww]ire [Ss]ervice", "", description) return description @staticmethod def looks_like_football(title: str, description: str) -> bool: return ( title.find("NFL") != -1 or re.search("[Ll]ive [Ss]tream", title) is not None or re.search("[Ll]ive[Ss]tream", title) is not None or re.search("[Ll]ive [Ss]tream", description) is not None ) @staticmethod def looks_like_review(title: str, description: str) -> bool: return "review" in title or "Review" in title @staticmethod def looks_like_spam(title: str, description: str) -> bool: return ( description is not None and title is not None and ( "marketplace" in description or "national-marketplace" in description or re.search("[Ww]eed", title) is not None or re.search("[Cc]annabis", title) is not None or re.search("[Cc]annabis", description) is not None or "THC" in title or re.search("[Tt]op.[Rr]ated", title) is not None or re.search("[Ll]ose [Ww]eight", title) is not None or re.search("[Ll]ose [Ww]eight", description) is not None ) ) @staticmethod def looks_very_boring(title: str, description: str) -> bool: return description.lower() in title.lower() def item_is_interesting_for_headlines( self, title: str, description: str, item: xml.etree.ElementTree.Element ) -> bool: unfiltered_description = item.findtext("description") if self.is_item_older_than_n_days(item, 10): logger.info(f"{title}: is too old!") return False if bellevue_reporter_rss_renderer.looks_like_spam( title, unfiltered_description ): logger.debug(f"{title}: looks like spam") return False if bellevue_reporter_rss_renderer.looks_like_football(title, description): logger.debug(f"{title}: looks like it's about football.") return False if bellevue_reporter_rss_renderer.looks_like_review(title, description): logger.debug(f"{title}: looks like a review.") return False if bellevue_reporter_rss_renderer.looks_very_boring(title, description): logger.debug(f"{title}: looks very boring.") return False return True def item_is_interesting_for_article( self, title: str, description: str, item: xml.etree.ElementTree.Element ) -> bool: unfiltered_description = item.findtext("description") if self.is_item_older_than_n_days(item, 10): logger.debug(f"{title}: is too old!") return False if bellevue_reporter_rss_renderer.looks_like_spam( title, unfiltered_description ): logger.debug(f"{title}: looks like spam") return False if bellevue_reporter_rss_renderer.looks_like_football(title, description): logger.debug(f"{title}: looks like it's about football.") return False if bellevue_reporter_rss_renderer.looks_like_review(title, description): logger.debug(f"{title}: looks like a review.") return False return True # Test # x = bellevue_reporter_rss_renderer( # {"Fetch News" : 1, # "Shuffle News" : 1}, # "www.bellevuereporter.com", # [ "/feed/" ], # "Test" ) # d = """ #

# Task force will tackle issues of racial justice, police reform #
Bellevue Reporter # Bellevue Reporter - Your local homepage. # Inslee names civil rights activists, pastors, and cops to panel that may forge ideas f#or new laws Task force will tackle issues of racial justice, police reform # Wire Service #

""" # d = x.munge_description(d) # print(d) # if x.fetch_news() == 0: # print("Error fetching news, no items fetched.") # x.shuffle_news()