X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=bellevue_reporter_rss_renderer.py;h=16596896cdee8d76df35b81db62e1d603f675bde;hb=2b7583ad79a2adb41b08a1086a9a780da83b776c;hp=4e1ff6261f31650c609ec153f3fcf3f878573809;hpb=6b8d4eeb7153617221f822a243a117f0bcab07bf;p=kiosk.git diff --git a/bellevue_reporter_rss_renderer.py b/bellevue_reporter_rss_renderer.py index 4e1ff62..1659689 100644 --- a/bellevue_reporter_rss_renderer.py +++ b/bellevue_reporter_rss_renderer.py @@ -22,9 +22,7 @@ class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer): feed_uris: List[str], page_title: str, ): - super().__init__( - name_to_timeout_dict, feed_site, feed_uris, page_title - ) + super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title) def get_headlines_page_prefix(self) -> str: return "bellevue-reporter" @@ -60,31 +58,45 @@ class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer): @staticmethod def looks_like_spam(title: str, description: str) -> bool: return ( - 'marketplace' in description - or 'national-marketplace' in description - or re.search('[Ww]eed', title) is not None - or re.search('[Cc]annabis', title) is not None - or re.search('[Cc]annabis', description) is not None - or 'THC' in title - or re.search('[Ll]ose [Ww]eight', title) is not None - or re.search('[Ll]ose [Ww]eight', description) is not None + description is not None + and title is not None + and ( + "marketplace" in description + or "national-marketplace" in description + or re.search("[Ww]eed", title) is not None + or re.search("[Cc]annabis", title) is not None + or re.search("[Cc]annabis", description) is not None + or "THC" in title + or re.search("[Tt]op.[Rr]ated", title) is not None + or re.search("[Ll]ose [Ww]eight", title) is not None + or re.search("[Ll]ose [Ww]eight", description) is not None + ) ) + @staticmethod + def looks_very_boring(title: str, description: str) -> bool: + return description.lower() in title.lower() + def item_is_interesting_for_headlines( self, title: str, description: str, item: xml.etree.ElementTree.Element ) -> bool: unfiltered_description = item.findtext("description") if self.is_item_older_than_n_days(item, 10): - logger.info(f'{title}: is too old!') + logger.info(f"{title}: is too old!") return False - if bellevue_reporter_rss_renderer.looks_like_spam(title, unfiltered_description): - logger.debug(f'{title}: looks like spam') + if bellevue_reporter_rss_renderer.looks_like_spam( + title, unfiltered_description + ): + logger.debug(f"{title}: looks like spam") return False if bellevue_reporter_rss_renderer.looks_like_football(title, description): - logger.debug(f'{title}: looks like it\'s about football.') + logger.debug(f"{title}: looks like it's about football.") return False if bellevue_reporter_rss_renderer.looks_like_review(title, description): - logger.debug(f'{title}: looks like a review.') + logger.debug(f"{title}: looks like a review.") + return False + if bellevue_reporter_rss_renderer.looks_very_boring(title, description): + logger.debug(f"{title}: looks very boring.") return False return True @@ -93,16 +105,18 @@ class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer): ) -> bool: unfiltered_description = item.findtext("description") if self.is_item_older_than_n_days(item, 10): - logger.debug(f'{title}: is too old!') + logger.debug(f"{title}: is too old!") return False - if bellevue_reporter_rss_renderer.looks_like_spam(title, unfiltered_description): - logger.debug(f'{title}: looks like spam') + if bellevue_reporter_rss_renderer.looks_like_spam( + title, unfiltered_description + ): + logger.debug(f"{title}: looks like spam") return False if bellevue_reporter_rss_renderer.looks_like_football(title, description): - logger.debug(f'{title}: looks like it\'s about football.') + logger.debug(f"{title}: looks like it's about football.") return False if bellevue_reporter_rss_renderer.looks_like_review(title, description): - logger.debug(f'{title}: looks like a review.') + logger.debug(f"{title}: looks like a review.") return False return True