import re
from typing import List, Dict
import xml
+import xml.etree.ElementTree as ET
import generic_news_rss_renderer as gnrss
self.debug = True
def debug_prefix(self) -> str:
- return "bellevue_reporter(%s)" % (self.page_title)
+ return f"bellevue_reporter({self.page_title})"
def get_headlines_page_prefix(self) -> str:
return "bellevue-reporter"
def should_use_https(self) -> bool:
return True
- def munge_description(self, description: str) -> str:
+ def munge_description(self, description: str, item: ET.Element) -> str:
description = re.sub("<[^>]+>", "", description)
description = re.sub(
"Bellevue\s+Reporter\s+Bellevue\s+Reporter", "", description
def looks_like_review(title: str, description: str) -> bool:
return "review" in title or "Review" in title
+ @staticmethod
+ def looks_like_spam(title: str, description: str) -> bool:
+ return (
+ 'marketplace' in description
+ or 'national-marketplace' in description
+ or re.search('[Ww]eed', title) is not None
+ or re.search('[Cc]annabis', title) is not None
+ or re.search('[Cc]annabis', description) is not None
+ or 'THC' in title
+ or re.search('[Ll]ose [Ww]eight', title) is not None
+ or re.search('[Ll]ose [Ww]eight', description) is not None
+ )
+
def item_is_interesting_for_headlines(
self, title: str, description: str, item: xml.etree.ElementTree.Element
) -> bool:
+ unfiltered_description = item.findtext("description")
if self.is_item_older_than_n_days(item, 10):
- self.debug_print("%s: is too old!" % title)
+ self.debug_print(f'{title}: is too old!')
+ return False
+ if bellevue_reporter_rss_renderer.looks_like_spam(title, unfiltered_description):
+ self.debug_print(f'{title}: looks like spam')
return False
if bellevue_reporter_rss_renderer.looks_like_football(title, description):
- self.debug_print("%s: looks like it's about football." % title)
+ self.debug_print(f'{title}: looks like it\'s about football.')
return False
if bellevue_reporter_rss_renderer.looks_like_review(title, description):
- self.debug_print("%s: looks like bullshit." % title)
+ self.debug_print(f'{title}: looks like a review.')
return False
return True
def item_is_interesting_for_article(
self, title: str, description: str, item: xml.etree.ElementTree.Element
) -> bool:
+ unfiltered_description = item.findtext("description")
if self.is_item_older_than_n_days(item, 10):
- self.debug_print("%s: is too old!" % title)
+ self.debug_print(f'{title}: is too old!')
+ return False
+ if bellevue_reporter_rss_renderer.looks_like_spam(title, unfiltered_description):
+ self.debug_print(f'{title}: looks like spam')
return False
if bellevue_reporter_rss_renderer.looks_like_football(title, description):
- self.debug_print("%s: looks like it's about football." % title)
+ self.debug_print(f'{title}: looks like it\'s about football.')
return False
if bellevue_reporter_rss_renderer.looks_like_review(title, description):
- self.debug_print("%s: looks like bullshit." % title)
+ self.debug_print(f'{title}: looks like a review.')
return False
return True