google_news_rss_renderer.py

   1 #!/usr/bin/env python3
   2
   3 from bs4 import BeautifulSoup  # type: ignore
   4 import re
   5 from typing import Dict, List, Optional
   6 import xml
   7 import xml.etree.ElementTree as ET
   8
   9 import generic_news_rss_renderer
  10
  11
  12 class google_news_rss_renderer(generic_news_rss_renderer.generic_news_rss_renderer):
  13     def __init__(
  14         self,
  15         name_to_timeout_dict: Dict[str, int],
  16         feed_site: str,
  17         feed_uris: List[str],
  18         page_title: str,
  19     ) -> None:
  20         super(google_news_rss_renderer, self).__init__(
  21             name_to_timeout_dict, feed_site, feed_uris, page_title
  22         )
  23         self.debug = True
  24
  25     def debug_prefix(self) -> str:
  26         return "google-news"
  27
  28     def get_headlines_page_prefix(self) -> str:
  29         return "google-news"
  30
  31     def get_details_page_prefix(self) -> str:
  32         return "google-news-details"
  33
  34     def find_description(self, item: xml.etree.ElementTree.Element) -> str:
  35         descr = item.findtext("description")
  36         source = item.findtext("source")
  37         if descr is not None:
  38             if source is not None:
  39                 descr = descr + f" (source)"
  40         else:
  41             descr = ""
  42         return descr
  43
  44     def munge_description_internal(self, descr: str, item: ET.Element) -> str:
  45         if len(descr) > 450:
  46             descr = descr[:450]
  47             descr = re.sub(r"\<[^\>]*$", "", descr)
  48             descr = descr + " [...]"
  49         descr += "</A></LI></UL></OL></P>"
  50         return descr
  51
  52     def munge_description(self, description: str, item: ET.Element) -> str:
  53         soup = BeautifulSoup(description, features="lxml")
  54         for a in soup.findAll("a"):
  55             del a["href"]
  56         descr = str(soup)
  57         return self.munge_description_internal(descr, item)
  58
  59     def find_image(self, item: xml.etree.ElementTree.Element) -> Optional[str]:
  60         return None
  61
  62     def should_use_https(self) -> bool:
  63         return True
  64
  65     def item_is_interesting_for_headlines(
  66         self, title: str, description: str, item: xml.etree.ElementTree.Element
  67     ) -> bool:
  68         return not self.is_item_older_than_n_days(item, 2)
  69
  70     def item_is_interesting_for_article(
  71         self, title: str, description: str, item: xml.etree.ElementTree.Element
  72     ) -> bool:
  73         return not self.is_item_older_than_n_days(item, 2)
  74
  75
  76 # Test
  77 # x = google_news_rss_renderer(
  78 #    {"Fetch News" : 1,
  79 #     "Shuffle News" : 1},
  80 #    "news.google.com",
  81 #    [ "/rss?hl=en-US&gl=US&ceid=US:en" ],
  82 #    "Test" )
  83 # if x.fetch_news() == 0:
  84 #    print("Error fetching news, no items fetched.")
  85 # x.shuffle_news()
  86 #
  87 # descr = "this is a lot of really long text about nothign in particular.  It's pretty interesting, don't you think?  I hope that the munge description method works by both truncating it and remembering to close any open <LI>items as well as making sure not to truncate in the middle of a <A HREF=\"whatever\" these are a bunch of useless arguments to the A tag that make it really long so that the truncate will happen in the middle of it.  I'm getting kind of tired of typing shit so I'm going to revert to copy pasta now.  Sorry if you were getting into this story.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.  The quick brown fox jumps over the lazy dog.</A></LI> Out!"
  88 # d = x.munge_description_internal(descr)
  89 # print(d)