X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=bellevue_reporter_rss_renderer.py;h=16596896cdee8d76df35b81db62e1d603f675bde;hb=2b7583ad79a2adb41b08a1086a9a780da83b776c;hp=f630aeee76002ad0bf2a8359a79e9582cee8f9d3;hpb=4b1f3d8a8b278ca6d62f461ea80c8ea21080c301;p=kiosk.git diff --git a/bellevue_reporter_rss_renderer.py b/bellevue_reporter_rss_renderer.py index f630aee..1659689 100644 --- a/bellevue_reporter_rss_renderer.py +++ b/bellevue_reporter_rss_renderer.py @@ -1,59 +1,145 @@ -import generic_news_rss_renderer as gnrss +#!/usr/bin/env python3 + +import logging import re +from typing import List, Dict +import xml +import xml.etree.ElementTree as ET + +import generic_news_rss_renderer as gnrss + + +logger = logging.getLogger(__file__) + class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer): - def __init__(self, name_to_timeout_dict, feed_site, feed_uris, page_title): - super(bellevue_reporter_rss_renderer, self).__init__( - name_to_timeout_dict, - feed_site, - feed_uris, - page_title) - self.debug = 1 - - def debug_prefix(self): - return "bellevue_reporter(%s)" % (self.page_title) - - def get_headlines_page_prefix(self): + """Read the Bellevue Reporter's RSS feed.""" + + def __init__( + self, + name_to_timeout_dict: Dict[str, int], + feed_site: str, + feed_uris: List[str], + page_title: str, + ): + super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title) + + def get_headlines_page_prefix(self) -> str: return "bellevue-reporter" - def get_details_page_prefix(self): + def get_details_page_prefix(self) -> str: return "bellevue-reporter-details" - def should_use_https(self): + def should_use_https(self) -> bool: return True - def munge_description(self, description): - description = re.sub('<[^>]+>', '', description) - description = re.sub('Bellevue\s+Reporter\s+Bellevue\s+Reporter', '', - description) - description = re.sub('\s*\-\s*Your local homepage\.\s*', '', description) + def munge_description(self, description: str, item: ET.Element) -> str: + description = re.sub("<[^>]+>", "", description) + description = re.sub( + "Bellevue\s+Reporter\s+Bellevue\s+Reporter", "", description + ) + description = re.sub("\s*\-\s*Your local homepage\.\s*", "", description) + description = re.sub("[Ww]ire [Ss]ervice", "", description) return description - def item_is_interesting_for_headlines(self, title, description, item): + @staticmethod + def looks_like_football(title: str, description: str) -> bool: + return ( + title.find("NFL") != -1 + or re.search("[Ll]ive [Ss]tream", title) is not None + or re.search("[Ll]ive[Ss]tream", title) is not None + or re.search("[Ll]ive [Ss]tream", description) is not None + ) + + @staticmethod + def looks_like_review(title: str, description: str) -> bool: + return "review" in title or "Review" in title + + @staticmethod + def looks_like_spam(title: str, description: str) -> bool: + return ( + description is not None + and title is not None + and ( + "marketplace" in description + or "national-marketplace" in description + or re.search("[Ww]eed", title) is not None + or re.search("[Cc]annabis", title) is not None + or re.search("[Cc]annabis", description) is not None + or "THC" in title + or re.search("[Tt]op.[Rr]ated", title) is not None + or re.search("[Ll]ose [Ww]eight", title) is not None + or re.search("[Ll]ose [Ww]eight", description) is not None + ) + ) + + @staticmethod + def looks_very_boring(title: str, description: str) -> bool: + return description.lower() in title.lower() + + def item_is_interesting_for_headlines( + self, title: str, description: str, item: xml.etree.ElementTree.Element + ) -> bool: + unfiltered_description = item.findtext("description") + if self.is_item_older_than_n_days(item, 10): + logger.info(f"{title}: is too old!") + return False + if bellevue_reporter_rss_renderer.looks_like_spam( + title, unfiltered_description + ): + logger.debug(f"{title}: looks like spam") + return False + if bellevue_reporter_rss_renderer.looks_like_football(title, description): + logger.debug(f"{title}: looks like it's about football.") + return False + if bellevue_reporter_rss_renderer.looks_like_review(title, description): + logger.debug(f"{title}: looks like a review.") + return False + if bellevue_reporter_rss_renderer.looks_very_boring(title, description): + logger.debug(f"{title}: looks very boring.") + return False return True - def item_is_interesting_for_article(self, title, description, item): + def item_is_interesting_for_article( + self, title: str, description: str, item: xml.etree.ElementTree.Element + ) -> bool: + unfiltered_description = item.findtext("description") + if self.is_item_older_than_n_days(item, 10): + logger.debug(f"{title}: is too old!") + return False + if bellevue_reporter_rss_renderer.looks_like_spam( + title, unfiltered_description + ): + logger.debug(f"{title}: looks like spam") + return False + if bellevue_reporter_rss_renderer.looks_like_football(title, description): + logger.debug(f"{title}: looks like it's about football.") + return False + if bellevue_reporter_rss_renderer.looks_like_review(title, description): + logger.debug(f"{title}: looks like a review.") + return False return True + # Test -#x = bellevue_reporter_rss_renderer( +# x = bellevue_reporter_rss_renderer( # {"Fetch News" : 1, # "Shuffle News" : 1}, # "www.bellevuereporter.com", # [ "/feed/" ], # "Test" ) -#d = """ -#

-#Task force will tackle issues of racial justice, police reform -#
Bellevue Reporter -#Bellevue Reporter - Your local homepage. -#Inslee names civil rights activists, pastors, and cops to panel that may forge ideas f#or new laws Task force will tackle issues of racial justice, police reform -#Wire Service -#

""" -#d = x.munge_description(d) -#print d -#if x.fetch_news() == 0: -# print "Error fetching news, no items fetched." -#x.shuffle_news() +# Task force will tackle issues of racial justice, police reform +#
Bellevue Reporter +# Bellevue Reporter - Your local homepage. +# Inslee names civil rights activists, pastors, and cops to panel that may forge ideas f#or new laws Task force will tackle issues of racial justice, police reform +# Wire Service +# """ +# d = x.munge_description(d) +# print(d) +# if x.fetch_news() == 0: +# print("Error fetching news, no items fetched.") +# x.shuffle_news()