expunge.py 2.55 KB
Newer Older
1 2 3 4 5 6 7
""" Expunge old entries from a cache of entries """
import glob, os, planet, config, feedparser
from xml.dom import minidom
from spider import filename

def expungeCache():
    """ Expunge old entries from a cache of entries """
Sam Ruby's avatar
Sam Ruby committed
8
    log = planet.logger
9 10 11 12 13 14 15 16

    log.info("Determining feed subscriptions")
    entry_count = {}
    sources = config.cache_sources_directory()
    for sub in config.subscriptions():
        data=feedparser.parse(filename(sources,sub))
        if not data.feed.has_key('id'): continue
        if config.feed_options(sub).has_key('cache_keep_entries'):
17
            entry_count[data.feed.id] = int(config.feed_options(sub)['cache_keep_entries'])
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
        else:
            entry_count[data.feed.id] = config.cache_keep_entries()

    log.info("Listing cached entries")
    cache = config.cache_directory()
    dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")
        if not os.path.isdir(file)]
    dir.sort()
    dir.reverse()

    for mtime,file in dir:

        try:
            entry=minidom.parse(file)
            # determine source of entry
            entry.normalize()
            sources = entry.getElementsByTagName('source')
            if not sources:
                # no source determined, do not delete
                log.debug("No source found for %s", file)
                continue
            ids = sources[0].getElementsByTagName('id')
            if not ids:
                # feed id not found, do not delete
                log.debug("No source feed id found for %s", file)
                continue
            if ids[0].childNodes[0].nodeValue in entry_count:
                # subscribed to feed, update entry count
                entry_count[ids[0].childNodes[0].nodeValue] = entry_count[
                    ids[0].childNodes[0].nodeValue] - 1
                if entry_count[ids[0].childNodes[0].nodeValue] >= 0:
                    # maximum not reached, do not delete
                    log.debug("Maximum not reached for %s from %s",
                        file, ids[0].childNodes[0].nodeValue)
                    continue
                else:
                    # maximum reached
                    log.debug("Removing %s, maximum reached for %s",
                        file, ids[0].childNodes[0].nodeValue)
            else:
                # not subscribed
                log.debug("Removing %s, not subscribed to %s",
                    file, ids[0].childNodes[0].nodeValue)
            # remove old entry
62
            os.unlink(file)
63 64 65 66 67

        except:
            log.error("Error parsing %s", file)

# end of expungeCache()