#!/usr/bin/env python3
+import logging
import re
from typing import List, Dict
import xml
import generic_news_rss_renderer as gnrss
+logger = logging.getLogger(__name__)
+
+
class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer):
"""Read the Bellevue Reporter's RSS feed."""
feed_uris: List[str],
page_title: str,
):
- super(bellevue_reporter_rss_renderer, self).__init__(
- name_to_timeout_dict, feed_site, feed_uris, page_title
- )
- self.debug = True
-
- def debug_prefix(self) -> str:
- return f"bellevue_reporter({self.page_title})"
+ super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title)
def get_headlines_page_prefix(self) -> str:
return "bellevue-reporter"
@staticmethod
def looks_like_spam(title: str, description: str) -> bool:
return (
- 'marketplace' in description
- or 'national-marketplace' in description
- or re.search('[Ww]eed', title) is not None
- or re.search('[Cc]annabis', title) is not None
- or re.search('[Cc]annabis', description) is not None
- or 'THC' in title
- or re.search('[Ll]ose [Ww]eight', title) is not None
- or re.search('[Ll]ose [Ww]eight', description) is not None
+ description is not None
+ and title is not None
+ and (
+ "marketplace" in description
+ or "national-marketplace" in description
+ or re.search("[Ww]eed", title) is not None
+ or re.search("[Cc]annabis", title) is not None
+ or re.search("[Cc]annabis", description) is not None
+ or "THC" in title
+ or re.search("[Tt]op.[Rr]ated", title) is not None
+ or re.search("[Ll]ose [Ww]eight", title) is not None
+ or re.search("[Ll]ose [Ww]eight", description) is not None
+ )
)
+ @staticmethod
+ def looks_very_boring(title: str, description: str) -> bool:
+ return description.lower() in title.lower()
+
def item_is_interesting_for_headlines(
self, title: str, description: str, item: xml.etree.ElementTree.Element
) -> bool:
unfiltered_description = item.findtext("description")
if self.is_item_older_than_n_days(item, 10):
- self.debug_print(f'{title}: is too old!')
+ logger.info(f"{title}: is too old!")
return False
- if bellevue_reporter_rss_renderer.looks_like_spam(title, unfiltered_description):
- self.debug_print(f'{title}: looks like spam')
+ if bellevue_reporter_rss_renderer.looks_like_spam(
+ title, unfiltered_description
+ ):
+ logger.debug(f"{title}: looks like spam")
return False
if bellevue_reporter_rss_renderer.looks_like_football(title, description):
- self.debug_print(f'{title}: looks like it\'s about football.')
+ logger.debug(f"{title}: looks like it's about football.")
return False
if bellevue_reporter_rss_renderer.looks_like_review(title, description):
- self.debug_print(f'{title}: looks like a review.')
+ logger.debug(f"{title}: looks like a review.")
+ return False
+ if bellevue_reporter_rss_renderer.looks_very_boring(title, description):
+ logger.debug(f"{title}: looks very boring.")
return False
return True
) -> bool:
unfiltered_description = item.findtext("description")
if self.is_item_older_than_n_days(item, 10):
- self.debug_print(f'{title}: is too old!')
+ logger.debug(f"{title}: is too old!")
return False
- if bellevue_reporter_rss_renderer.looks_like_spam(title, unfiltered_description):
- self.debug_print(f'{title}: looks like spam')
+ if bellevue_reporter_rss_renderer.looks_like_spam(
+ title, unfiltered_description
+ ):
+ logger.debug(f"{title}: looks like spam")
return False
if bellevue_reporter_rss_renderer.looks_like_football(title, description):
- self.debug_print(f'{title}: looks like it\'s about football.')
+ logger.debug(f"{title}: looks like it's about football.")
return False
if bellevue_reporter_rss_renderer.looks_like_review(title, description):
- self.debug_print(f'{title}: looks like a review.')
+ logger.debug(f"{title}: looks like a review.")
return False
return True