Testosterone and sensitivity.
[kiosk.git] / bellevue_reporter_rss_renderer.py
1 #!/usr/bin/env python3
2
3 import logging
4 import re
5 from typing import List, Dict
6 import xml
7 import xml.etree.ElementTree as ET
8
9 import generic_news_rss_renderer as gnrss
10
11
12 logger = logging.getLogger(__name__)
13
14
15 class bellevue_reporter_rss_renderer(gnrss.generic_news_rss_renderer):
16     """Read the Bellevue Reporter's RSS feed."""
17
18     def __init__(
19         self,
20         name_to_timeout_dict: Dict[str, int],
21         feed_site: str,
22         feed_uris: List[str],
23         page_title: str,
24     ):
25         super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title)
26
27     def get_headlines_page_prefix(self) -> str:
28         return "bellevue-reporter"
29
30     def get_details_page_prefix(self) -> str:
31         return "bellevue-reporter-details"
32
33     def should_use_https(self) -> bool:
34         return True
35
36     def munge_description(self, description: str, item: ET.Element) -> str:
37         description = re.sub("<[^>]+>", "", description)
38         description = re.sub(
39             "Bellevue\s+Reporter\s+Bellevue\s+Reporter", "", description
40         )
41         description = re.sub("\s*\-\s*Your local homepage\.\s*", "", description)
42         description = re.sub("[Ww]ire [Ss]ervice", "", description)
43         return description
44
45     @staticmethod
46     def looks_like_football(title: str, description: str) -> bool:
47         return (
48             title.find("NFL") != -1
49             or re.search("[Ll]ive [Ss]tream", title) is not None
50             or re.search("[Ll]ive[Ss]tream", title) is not None
51             or re.search("[Ll]ive [Ss]tream", description) is not None
52         )
53
54     @staticmethod
55     def looks_like_review(title: str, description: str) -> bool:
56         return "review" in title or "Review" in title
57
58     @staticmethod
59     def looks_like_spam(title: str, description: str) -> bool:
60         return (
61             description is not None
62             and title is not None
63             and (
64                 "marketplace" in description
65                 or "national-marketplace" in description
66                 or re.search("[Ww]eed", title) is not None
67                 or re.search("[Tt]estosterone", title) is not None
68                 or re.search("[Cc]annabis", title) is not None
69                 or re.search("[Cc]annabis", description) is not None
70                 or "THC" in title
71                 or re.search("[Tt]op.[Rr]ated", title) is not None
72                 or re.search("[Ll]ose [Ww]eight", title) is not None
73                 or re.search("[Ll]ose [Ww]eight", description) is not None
74             )
75         )
76
77     @staticmethod
78     def looks_very_boring(title: str, description: str) -> bool:
79         return description.lower() in title.lower()
80
81     def item_is_interesting_for_headlines(
82         self, title: str, description: str, item: xml.etree.ElementTree.Element
83     ) -> bool:
84         unfiltered_description = item.findtext("description")
85         if self.is_item_older_than_n_days(item, 10):
86             logger.info(f"{title}: is too old!")
87             return False
88         if bellevue_reporter_rss_renderer.looks_like_spam(
89             title, unfiltered_description
90         ):
91             logger.debug(f"{title}: looks like spam")
92             return False
93         if bellevue_reporter_rss_renderer.looks_like_football(title, description):
94             logger.debug(f"{title}: looks like it's about football.")
95             return False
96         if bellevue_reporter_rss_renderer.looks_like_review(title, description):
97             logger.debug(f"{title}: looks like a review.")
98             return False
99         if bellevue_reporter_rss_renderer.looks_very_boring(title, description):
100             logger.debug(f"{title}: looks very boring.")
101             return False
102         return True
103
104     def item_is_interesting_for_article(
105         self, title: str, description: str, item: xml.etree.ElementTree.Element
106     ) -> bool:
107         unfiltered_description = item.findtext("description")
108         if self.is_item_older_than_n_days(item, 10):
109             logger.debug(f"{title}: is too old!")
110             return False
111         if bellevue_reporter_rss_renderer.looks_like_spam(
112             title, unfiltered_description
113         ):
114             logger.debug(f"{title}: looks like spam")
115             return False
116         if bellevue_reporter_rss_renderer.looks_like_football(title, description):
117             logger.debug(f"{title}: looks like it's about football.")
118             return False
119         if bellevue_reporter_rss_renderer.looks_like_review(title, description):
120             logger.debug(f"{title}: looks like a review.")
121             return False
122         return True
123
124
125 # Test
126 # x = bellevue_reporter_rss_renderer(
127 #    {"Fetch News" : 1,
128 #     "Shuffle News" : 1},
129 #    "www.bellevuereporter.com",
130 #    [ "/feed/" ],
131 #    "Test" )
132 # d = """
133 # <DIV style="padding:8px;
134 #     font-size:44pt;
135 #     -webkit-column-break-inside:avoid;"><P>
136 # <B>Task force will tackle issues of racial justice, police reform</B>
137 # <BR>Bellevue Reporter
138 # Bellevue Reporter - Your local homepage.
139 # Inslee names civil rights activists, pastors, and cops to panel that may forge ideas f#or new laws Task force will tackle issues of racial justice, police reform
140 # Wire Service
141 # </DIV>"""
142 # d = x.munge_description(d)
143 # print(d)
144 # if x.fetch_news() == 0:
145 #    print("Error fetching news, no items fetched.")
146 # x.shuffle_news()