#!/usr/bin/env python3
-from bs4 import BeautifulSoup # type: ignore
+import logging
import re
from typing import Dict, List, Optional
import xml
+import xml.etree.ElementTree as ET
+
+from bs4 import BeautifulSoup # type: ignore
import generic_news_rss_renderer
+logger = logging.getLogger(__name__)
+
class google_news_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer):
def __init__(
feed_uris: List[str],
page_title: str,
) -> None:
- super(google_news_rss_renderer, self).__init__(
- name_to_timeout_dict, feed_site, feed_uris, page_title
- )
- self.debug = True
-
- def debug_prefix(self) -> str:
- return "google-news"
+ super().__init__(name_to_timeout_dict, feed_site, feed_uris, page_title)
def get_headlines_page_prefix(self) -> str:
return "google-news"
def find_description(self, item: xml.etree.ElementTree.Element) -> str:
descr = item.findtext("description")
- source = item.findtext("source")
if descr is not None:
+ source = item.findtext("source")
if source is not None:
- descr = descr + f" (source)"
+ descr = descr + f" ({source})"
else:
descr = ""
return descr
- def munge_description_internal(self, descr: str) -> str:
+ def munge_description_internal(self, descr: str, item: ET.Element) -> str:
if len(descr) > 450:
descr = descr[:450]
descr = re.sub(r"\<[^\>]*$", "", descr)
descr += "</A></LI></UL></OL></P>"
return descr
- def munge_description(self, description: str) -> str:
+ def munge_description(self, description: str, item: ET.Element) -> str:
soup = BeautifulSoup(description, features="lxml")
for a in soup.findAll("a"):
del a["href"]
descr = str(soup)
- return self.munge_description_internal(descr)
+ return self.munge_description_internal(descr, item)
def find_image(self, item: xml.etree.ElementTree.Element) -> Optional[str]:
return None