...
 
Commits (2)
......@@ -17,183 +17,22 @@
# You should have received a copy of the GNU General Public License
# along with getpods. If not, see <http://www.gnu.org/licenses/>.
import collections
import configparser
import os
import re
import shutil
import sys
import tempfile
import urllib.request
import feedparser
#------------------------------------------------------------------------------
from podfeed import Item, Feed
config_filename = "~/.getpods"
max_summary_lines = 20
#------------------------------------------------------------------------------
def clear_screen():
os.system('clear')
#------------------------------------------------------------------------------
class Item(object):
"""Class encapsulating the information for a single podcast item,
i.e. a single episode. Also contains a static cache keeping track
of already downloaded episodes.
"""
cache = collections.OrderedDict()
cache_read = False
cache_file = "cache"
def __init__(self, item_data, feed):
if not Item.cache_read:
Item.read_cache()
self.data = item_data
self.feed = feed
summ = self.data["summary"]
summ = re.sub('<[^<]+?>', ' ', summ)
summ = summ.replace('&#38;', '&').replace('&#8230;','...')
summ = re.sub('&#?[0-9a-z]+;','', summ)
summ = re.sub('[ \t]+', ' ', summ)
summ = re.sub('\n+', '\n', summ)
self.summary = summ
def __str__(self):
return "[{0}] {1}".format(self.feed.title(),
self.title())
def is_new(self):
return self.guid() not in Item.cache
def mark_as_seen(self):
Item.cache[self.guid()] = self.title()
def guid(self):
"""Method that determines and return a unique identifier of
the item."""
return self.data["guid"]
def author(self):
if "author" in self.data:
return self.data["author"]
return ""
def title(self):
return self.data["title"]
def auto_download(self):
return self.feed.do_auto
def download_url(self):
url = ""
if "media_content" in self.data:
for media in self.data["media_content"]:
if "url" in media:
url = media["url"]
elif "enclosure" in self.data:
url = self.data["enclosure"]["url"]
elif "links" in self.data:
for link in self.data["links"]:
if link["rel"] == "enclosure":
url = link["href"]
return url
def download_localname(self):
dl_url = self.download_url()
parts = dl_url.rpartition('.')
return re.sub('[^a-zA-Z0-9]','_', self.title()) + '.' + parts[2]
def print_summary(self):
#clear_screen()
print("\n\n*", self)
sumlines = self.summary.splitlines()
summary = "\n".join(sumlines[0:max_summary_lines])
if len(sumlines) > max_summary_lines:
summary += "\n..."
author = self.author()
if author != "":
summary = "Author: " + author + "\n" + summary
print(summary)
@staticmethod
def setup_cache(dir):
Item.cache_file = dir+"/cache"
@staticmethod
def save_cache():
"""Saves cache from memory to file."""
with open(Item.cache_file, "w") as fp:
for guid, title in Item.cache.items():
fp.write(guid + '\t' + title + '\n')
@staticmethod
def read_cache():
"""Reads old cache from file to memory."""
try:
with open(Item.cache_file) as fp:
for line in fp:
(guid, title) = line.rstrip().split('\t')
Item.cache[guid] = title
except:
pass
Item.cache_read = True
#------------------------------------------------------------------------------
class Feed(object):
"""
Class encapsulating the information for a podcast feed.
"""
def __init__(self, url, dirname, do_auto):
self.url = url
self.dirname = dirname
self.do_auto = do_auto
self.data = {}
def update(self, newest=0):
self.data = feedparser.parse(self.url)
title = self.title()
if not title:
print("Error updating", self.url)
return []
print("Updating", self.title(), "...")
new_items = []
count = 0
for item_data in self.data["items"]:
item = Item(item_data, self)
if item.is_new():
if newest==0 or count<newest:
new_items.append(item)
count += 1
else:
item.mark_as_seen()
# Item.save_cache()
return new_items
def title(self):
if "title" in self.data["channel"]:
return self.data["channel"]["title"]
else:
return ""
#------------------------------------------------------------------------------
def read_urls(urls_fname):
"""
......@@ -208,7 +47,7 @@ def read_urls(urls_fname):
"""
feed_list = []
with open(urls_fname) as fp:
ln = 0
for line in fp:
......@@ -232,7 +71,7 @@ def read_urls(urls_fname):
if len(error) != 0:
print("ERROR in config file {0} on line {1},\n".
format(urls_fname, ln, error) +
error + ":\n\n--> " + line +
error + ":\n\n--> " + line +
"\nCorrect format is: url dir [?]. "
"I'm skipping this line until it has been fixed.\n")
else:
......@@ -240,9 +79,10 @@ def read_urls(urls_fname):
return feed_list
#------------------------------------------------------------------------------
old_output = ''
def download_progress(current, size):
global old_output
......@@ -258,12 +98,11 @@ def download_progress(current, size):
sys.stdout.write("\b" * len(output))
old_output = output
#------------------------------------------------------------------------------
def download_url(url, localname):
read_bytes = 0
with urllib.request.urlopen(url) as r, \
tempfile.NamedTemporaryFile(delete=False) as fp:
with urllib.request.urlopen(url) as \
r, tempfile.NamedTemporaryFile(delete=False) as fp:
total_bytes = r.length
while not r.closed and r.length > 0:
......@@ -275,10 +114,8 @@ def download_url(url, localname):
fp.close()
tmpname = fp.name
shutil.move(tmpname, localname)
return localname
#------------------------------------------------------------------------------
def getpods(action, podcasts_dir, urls_filename):
Item.setup_cache(podcasts_dir)
......@@ -299,11 +136,11 @@ def getpods(action, podcasts_dir, urls_filename):
# report new items, if any
n = len(new_items)
if n==0:
if n == 0:
print("No new episodes found.")
return
if n==1:
if n == 1:
print("One new episode found!")
else:
print(n, "new episodes found!")
......@@ -336,7 +173,7 @@ def getpods(action, podcasts_dir, urls_filename):
# unless we are in auto mode, query about each non-auto episode
if action != 'auto':
for item in query_items:
item.print_summary()
item.print_summary(max_summary_lines)
answer = raw_input('Download this episode? [Y/n] ')
if answer.lower() != 'n':
download_items.append(item)
......@@ -351,7 +188,7 @@ def getpods(action, podcasts_dir, urls_filename):
print("\nDownloading episodes...")
for item in download_items:
print("*", item)
dl_url = item.download_url()
if not dl_url:
......@@ -377,7 +214,6 @@ def getpods(action, podcasts_dir, urls_filename):
return num_downloaded
#------------------------------------------------------------------------------
def main():
global config_filename, max_summary_lines
......@@ -389,7 +225,7 @@ def main():
"script again.".format(config_filename,
"getpods.sample.conf"))
sys.exit(1)
config = configparser.ConfigParser()
config.read(config_filename)
podcasts_dir = os.path.expanduser(config.get("general", "podcasts_dir"))
......@@ -415,7 +251,7 @@ def main():
sys.exit(1)
action = "all"
if len(sys.argv) > 1:
action = sys.argv[1]
......@@ -434,7 +270,7 @@ def main():
print("catchup - marks all new episodes as seen, without\n"
" downloading anything.")
sys.exit(1)
nd = getpods(action, podcasts_dir, urls_filename)
if nd and config.has_option("general", "post_download_hook"):
......@@ -442,7 +278,6 @@ def main():
"post_download_hook"))
os.system(post_download_hook)
#------------------------------------------------------------------------------
if __name__ == '__main__':
main()