projects
/
kiosk.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Ok, simply and fix up this crap.
[kiosk.git]
/
generic_news_rss_renderer.py
diff --git
a/generic_news_rss_renderer.py
b/generic_news_rss_renderer.py
index 149f8acb3aa9f163d195d42deec8e82b442da34f..1ffe024a7e9d3d798e6432804badcb2c430a18f4 100644
(file)
--- a/
generic_news_rss_renderer.py
+++ b/
generic_news_rss_renderer.py
@@
-4,21
+4,23
@@
from abc import abstractmethod
import datetime
from dateutil.parser import parse
import http.client
import datetime
from dateutil.parser import parse
import http.client
-import
random
+import
logging
import re
import re
-import sys
-import traceback
from typing import Dict, List, Optional, Union
import xml.etree.ElementTree as ET
from typing import Dict, List, Optional, Union
import xml.etree.ElementTree as ET
+from scottutilz import profanity_filter
+
import file_writer
import grab_bag
import renderer
import page_builder
import file_writer
import grab_bag
import renderer
import page_builder
-import profanity_filter
-class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
+logger = logging.getLogger(__file__)
+
+
+class generic_news_rss_renderer(renderer.abstaining_renderer):
def __init__(
self,
name_to_timeout_dict: Dict[str, int],
def __init__(
self,
name_to_timeout_dict: Dict[str, int],
@@
-26,8
+28,7
@@
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
feed_uris: List[str],
page_title: str,
):
feed_uris: List[str],
page_title: str,
):
- super(generic_news_rss_renderer, self).__init__(name_to_timeout_dict, False)
- self.debug = True
+ super().__init__(name_to_timeout_dict)
self.feed_site = feed_site
self.feed_uris = feed_uris
self.page_title = page_title
self.feed_site = feed_site
self.feed_uris = feed_uris
self.page_title = page_title
@@
-35,10
+36,6
@@
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
self.details = grab_bag.grab_bag()
self.filter = profanity_filter.ProfanityFilter()
self.details = grab_bag.grab_bag()
self.filter = profanity_filter.ProfanityFilter()
- @abstractmethod
- def debug_prefix(self) -> str:
- pass
-
@abstractmethod
def get_headlines_page_prefix(self) -> str:
pass
@abstractmethod
def get_headlines_page_prefix(self) -> str:
pass
@@
-136,7
+133,7
@@
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
headlines.set_title("%s" % self.page_title)
subset = self.news.subset(4)
if subset is None:
headlines.set_title("%s" % self.page_title)
subset = self.news.subset(4)
if subset is None:
-
self.debug_print("Not enough messages to choose from."
)
+
logger.warning('Not enough messages to select from in shuffle_news?!'
)
return False
for msg in subset:
headlines.add_item(msg)
return False
for msg in subset:
headlines.add_item(msg)
@@
-187,10
+184,11
@@
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
}
</STYLE>"""
)
}
</STYLE>"""
)
- details.set_title(
f"{self.page_title}"
)
+ details.set_title(
self.page_title
)
subset = self.details.subset(1)
if subset is None:
subset = self.details.subset(1)
if subset is None:
- self.debug_print("Not enough details to choose from.")
+ logger.warning('Not enough details to choose from in do_details')
+ logger.debug("Not enough details to choose from.")
return False
for msg in subset:
blurb = msg
return False
for msg in subset:
blurb = msg
@@
-209,47
+207,55
@@
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
http.client.HTTPSConnection]] = None
for uri in self.feed_uris:
http.client.HTTPSConnection]] = None
for uri in self.feed_uris:
+ url = None
if self.should_use_https():
if self.should_use_https():
- self.debug_print("Fetching: https://%s%s" % (self.feed_site, uri))
+ url = f'https://{self.feed_site}{uri}'
+ logger.info(f'Fetching: {url}')
self.conn = http.client.HTTPSConnection(self.feed_site, timeout=10)
else:
self.conn = http.client.HTTPSConnection(self.feed_site, timeout=10)
else:
- self.debug_print("Fetching: http://%s%s" % (self.feed_site, uri))
+ url = f'http://{self.feed_site}{uri}'
+ logger.info(f'Fetching: {url}')
self.conn = http.client.HTTPConnection(self.feed_site, timeout=10)
self.conn = http.client.HTTPConnection(self.feed_site, timeout=10)
- assert(self.conn is not None)
+ assert self.conn is not None
+ assert url is not None
self.conn.request(
"GET",
uri,
None,
{
"Accept": "*/*",
self.conn.request(
"GET",
uri,
None,
{
"Accept": "*/*",
-# "Cache-control": "max-age=50",
-# "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
+ "Cache-control": "max-age=50",
},
)
try:
response = self.conn.getresponse()
except Exception as e:
},
)
try:
response = self.conn.getresponse()
except Exception as e:
-
traceback.print_exc(file=sys.stdout
)
-
print
(
- f"Exception in generic RSS renderer HTTP connection fetching {
self.feed_site}{uri}
"
+
logger.exception(e
)
+
logger.error
(
+ f"Exception in generic RSS renderer HTTP connection fetching {
url}; giving up.
"
)
return False
if response.status != 200:
)
return False
if response.status != 200:
-
print
(
- f
"{self.page_title}: RSS fetch_news error, response: {response.status}"
+
logger.error
(
+ f
'Unexpected status {response.status} while fetching {url}; giving up.'
)
)
- self.debug_print(str(response.read()))
return False
return False
- rss = ET.fromstring(response.read())
+ raw = response.read()
+ logger.info(f'Status 200: got {len(raw)} bytes back from {url}')
+ rss = ET.fromstring(raw)
channel = rss[0]
title_filter = set()
channel = rss[0]
title_filter = set()
- for item in
channel.getchildren(
):
+ for item in
list(channel
):
title = self.find_title(item)
description = item.findtext("description")
if title is not None:
title = self.munge_title(title, item)
title = self.find_title(item)
description = item.findtext("description")
if title is not None:
title = self.munge_title(title, item)
+ else:
+ logger.info('Skipping RSS feed item with no title.')
+ continue
+ logger.debug(f'Considering RSS item {title}...')
if description is not None:
description = self.munge_description(description, item)
else:
if description is not None:
description = self.munge_description(description, item)
else:
@@
-260,22
+266,22
@@
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
link = item.findtext("link")
if link is not None:
link = self.munge_link(link)
link = item.findtext("link")
if link is not None:
link = self.munge_link(link)
-
- if title is None or not self.item_is_interesting_for_headlines(
- title, description, item
+ if not self.item_is_interesting_for_headlines(
+ title, description, item
):
):
-
self.debug_print(f'Item "{title}" is not interesting
')
+
logger.info(f'Skipping {title} because it\'s not interesting.
')
continue
if self.should_profanity_filter() and (
self.filter.contains_bad_word(title)
or self.filter.contains_bad_word(description)
):
continue
if self.should_profanity_filter() and (
self.filter.contains_bad_word(title)
or self.filter.contains_bad_word(description)
):
-
self.debug_print(f'Found bad words in item "{title}"
')
+
logger.info(f'Skipping {title} because it contains profanity.
')
continue
if title in title_filter:
continue
if title in title_filter:
- self.debug_print(f'Already saw title {title}, skipping.')
+ logger.info(f'Skipping {title} because we already saw an item with the same title.')
+ continue
title_filter.add(title)
blurb = """<DIV style="padding:8px;
title_filter.add(title)
blurb = """<DIV style="padding:8px;
@@
-292,18
+298,24
@@
class generic_news_rss_renderer(renderer.debuggable_abstaining_renderer):
pubdate = self.find_pubdate(item)
if pubdate is not None:
pubdate = self.find_pubdate(item)
if pubdate is not None:
+ logger.debug(f'Raw pubdate={pubdate}')
pubdate = self.munge_pubdate(pubdate)
ts = parse(pubdate)
pubdate = self.munge_pubdate(pubdate)
ts = parse(pubdate)
+ logger.debug(f'Translated pubdate into: {ts}')
blurb += f' <FONT COLOR=#cccccc>{ts.strftime("%b %d")}</FONT>'
if self.item_is_interesting_for_article(title, description, item):
blurb += f' <FONT COLOR=#cccccc>{ts.strftime("%b %d")}</FONT>'
if self.item_is_interesting_for_article(title, description, item):
+ logger.info(f'Item {title} is also interesting as an article details page; creating...')
longblurb = blurb
longblurb += "<BR>"
longblurb += description
longblurb += "</DIV>"
longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
self.details.add(longblurb)
longblurb = blurb
longblurb += "<BR>"
longblurb += description
longblurb += "</DIV>"
longblurb = longblurb.replace("font-size:34pt", "font-size:44pt")
self.details.add(longblurb)
+ else:
+ logger.info(f'Item {title} isn\'t interesting for article details page; skipped.')
blurb += "</DIV>"
self.news.add(blurb)
count += 1
blurb += "</DIV>"
self.news.add(blurb)
count += 1
+ logger.debug(f'Added {count} items so far...')
return count > 0
return count > 0