Commit b31973d5 authored by Sam Ruby's avatar Sam Ruby

Initial load

parents
body {
border-right: 1px solid black;
margin-right: 200px;
padding-left: 20px;
padding-right: 20px;
}
h1 {
margin-top: 0px;
padding-top: 20px;
font-family: "Bitstream Vera Sans", sans-serif;
font-weight: normal;
letter-spacing: -2px;
text-transform: lowercase;
text-align: right;
color: grey;
}
.admin {
text-align: right;
}
h2 {
font-family: "Bitstream Vera Sans", sans-serif;
font-weight: normal;
color: #200080;
margin-left: -20px;
}
h3 {
font-family: "Bitstream Vera Sans", sans-serif;
font-weight: normal;
background-color: #a0c0ff;
border: 1px solid #5080b0;
padding: 4px;
}
h3 a {
text-decoration: none;
color: inherit;
}
h4 {
font-family: "Bitstream Vera Sans", sans-serif;
font-weight: bold;
}
h4 a {
text-decoration: none;
color: inherit;
}
img.face {
float: right;
margin-top: -3em;
}
.entry {
margin-bottom: 2em;
}
.entry .date {
font-family: "Bitstream Vera Sans", sans-serif;
color: grey;
}
.entry .date a {
text-decoration: none;
color: inherit;
}
.sidebar {
position: absolute;
top: 0px;
right: 0px;
width: 200px;
margin-left: 0px;
margin-right: 0px;
padding-right: 0px;
padding-top: 20px;
padding-left: 0px;
font-family: "Bitstream Vera Sans", sans-serif;
font-size: 85%;
}
.sidebar h2 {
font-size: 110%;
font-weight: bold;
color: black;
padding-left: 5px;
margin-left: 0px;
}
.sidebar ul {
padding-left: 1em;
margin-left: 0px;
list-style-type: none;
}
.sidebar ul li:hover {
color: grey;
}
.sidebar ul li a {
text-decoration: none;
}
.sidebar ul li a:hover {
text-decoration: underline;
}
.sidebar ul li a img {
border: 0;
}
.sidebar p {
border-top: 1px solid grey;
margin-top: 30px;
padding-top: 10px;
padding-left: 5px;
}
.sidebar .message {
cursor: help;
border-bottom: 1px dashed red;
}
.sidebar a.message:hover {
cursor: help;
background-color: #ff0000;
color: #ffffff !important;
text-decoration: none !important;
}
a:hover {
text-decoration: underline !important;
color: blue !important;
}
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:planet="http://planet.intertwingly.net/"
xmlns="http://www.w3.org/1999/xhtml">
<xsl:template match="atom:feed">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<link rel="stylesheet" href="planet.css" type="text/css" />
<title><xsl:value-of select="atom:title"/></title>
</head>
<body>
<h1><xsl:value-of select="atom:title"/></h1>
<xsl:apply-templates select="atom:entry"/>
<div class="sidebar">
<img src="images/logo.png" width="136" height="136" alt=""/>
<h2>Subscriptions</h2>
<ul>
<xsl:for-each select="planet:subscription">
<xsl:sort select="planet:name"/>
<li>
<a href="{atom:link[@rel='self']/@href}" title="subscribe">
<img src="images/feed-icon-10x10.png" alt="(feed)"/>
</a>
<xsl:value-of select="planet:name"/>
</li>
</xsl:for-each>
</ul>
</div>
</body>
</html>
</xsl:template>
<xsl:template match="atom:entry">
<xsl:variable name="date" select="substring(atom:updated,1,10)"/>
<xsl:if test="not(preceding-sibling::atom:entry
[substring(atom:updated,1,10) = $date])">
<h2 class="date"><xsl:value-of select="$date"/></h2>
</xsl:if>
<h3>
<a href="{atom:source/atom:link[@rel='alternate']/@href}">
<xsl:value-of select="atom:source/planet:name"/>
</a>
&#x2014;
<a href="{atom:link[@rel='alternate']/@href}">
<xsl:value-of select="atom:title"/>
</a>
</h3>
<div class="content">
<xsl:choose>
<xsl:when test="atom:content">
<p><xsl:copy-of select="atom:content/*"/></p>
</xsl:when>
<xsl:otherwise>
<p><xsl:copy-of select="atom:summary/*"/></p>
</xsl:otherwise>
</xsl:choose>
</div>
</xsl:template>
</xsl:stylesheet>
This diff is collapsed.
logger = None
def getLogger(level):
""" get a logger with the specified log level """
global logger
if logger: return logger
try:
import logging
except:
import compat_logging as logging
logging.basicConfig()
logging.getLogger().setLevel(logging.getLevelName(level))
logger = logging.getLogger("planet.runner")
try:
logger.warning
except:
logger.warning = logger.warn
return logger
def setTimeout(timeout):
""" time out rather than hang forever on ultra-slow servers."""
if timeout:
try:
timeout = float(timeout)
except:
logger.warning("Timeout set to invalid value '%s', skipping", timeout)
timeout = None
if timeout:
try:
from planet import timeoutsocket
timeoutsocket.setDefaultSocketTimeout(timeout)
logger.debug("Socket timeout set to %d seconds", timeout)
except ImportError:
import socket
if hasattr(socket, 'setdefaulttimeout'):
logger.debug("timeoutsocket not found, using python function")
socket.setdefaulttimeout(timeout)
logger.debug("Socket timeout set to %d seconds", timeout)
else:
logger.error("Unable to set timeout to %d seconds", timeout)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
"""
Planet Configuration
This module encapsulates all planet configuration. This is not a generic
configuration parser, it knows everything about configuring a planet - from
the structure of the ini file, to knowledge of data types, even down to
what are the defaults.
Usage:
from planet import config
config.load('config.ini')
# administrative / structural information
print config.templates()
print config.feeds()
# planet wide configuration
print config.name()
print config.link()
# per template configuration
print config.days_per_page('atom.xml.tmpl')
print config.encoding('index.html.tmpl')
Todo:
* error handling (example: no planet section)
"""
import sys
from ConfigParser import ConfigParser
parser = ConfigParser()
planet_predefined_options = []
def __init__():
"""define the struture of an ini file"""
from planet import config
def get(section, option, default):
if section and parser.has_option(section, option):
return parser.get(section, option)
elif parser.has_option('Planet', option):
return parser.get('Planet', option)
else:
return default
def define_planet(name, default):
setattr(config, name, lambda default=default: get(None,name,default))
planet_predefined_options.append(name)
def define_tmpl(name, default):
setattr(config, name, lambda section, default=default:
get(section,name,default))
def define_tmpl_int(name, default):
setattr(config, name, lambda section, default=default:
int(get(section,name,default)))
# planet wide options
define_planet('name', "Unconfigured Planet")
define_planet('link', "Unconfigured Planet")
define_planet('cache_directory', "cache")
define_planet('log_level', "WARNING")
define_planet('feed_timeout', 20)
# template options
define_tmpl_int('days_per_page', 0)
define_tmpl_int('items_per_page', 60)
define_tmpl('encoding', 'utf-8')
# prevent re-initialization
setattr(config, '__init__', lambda: None)
def load(file):
""" initialize and load a configuration"""
__init__()
global parser
parser = ConfigParser()
parser.read(file)
def template_files():
""" list the templates defined """
return parser.get('Planet','template_files').split(' ')
def feeds():
""" list the feeds defined """
return filter(lambda feed: feed!='Planet' and feed not in template_files(),
parser.sections())
def planet_options():
""" dictionary of planet wide options"""
return dict(map(lambda opt: (opt, parser.get('Planet',opt)),
parser.options('Planet')))
def feed_options(section):
""" dictionary of feed specific options"""
from planet import config
options = dict([(key,value) for key,value in planet_options().items()
if key not in planet_predefined_options])
if parser.has_section(section):
options.update(dict(map(lambda opt: (opt, parser.get(section,opt)),
parser.options(section))))
return options
def template_options(section):
""" dictionary of template specific options"""
return feed_options(section)
def write(file=sys.stdout):
""" write out an updated template """
print parser.write(file)
This diff is collapsed.
"""
Reconstitute an entry document from the output of the Universal Feed Parser.
The main entry point is called 'reconstitute'. Input parameters are:
results: this is the entire hash table return by the UFP
entry: this is the entry in the hash that you want reconstituted
The value returned is an XML DOM. Every effort is made to convert
everything to unicode, and text fields into either plain text or
well formed XHTML.
Todo:
* extension elements
"""
import re, time, md5, sgmllib
from xml.sax.saxutils import escape
from xml.dom import minidom
from BeautifulSoup import BeautifulSoup
from xml.parsers.expat import ExpatError
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
def createTextElement(parent, name, value):
""" utility function to create a child element with the specified text"""
if not value: return
xdoc = parent.ownerDocument
xelement = xdoc.createElement(name)
xelement.appendChild(xdoc.createTextNode(value))
parent.appendChild(xelement)
def invalidate(c):
""" replace invalid characters """
return '<acronym title="U+%s">\xef\xbf\xbd</acronym>' % \
hex(ord(c.group(0)))[2:].rjust(4,'0')
def ncr2c(value):
""" convert numeric character references to characters """
value=value.group(1)
if value.startswith('x'):
value=unichr(int(value[1:],16))
else:
value=unichr(int(value))
return value
def normalize(text, bozo):
""" convert everything to well formed XML """
if text.has_key('type'):
if text.type.lower().find('html')<0:
text['value'] = escape(text.value)
text['type'] = 'text/html'
if text.type.lower() == 'text/html' or bozo:
dom=BeautifulSoup(text.value,convertEntities="html")
for tag in dom.findAll(True):
for attr,value in tag.attrs:
value=sgmllib.charref.sub(ncr2c,value)
value=illegal_xml_chars.sub(u'\uFFFD',value)
tag[attr]=value
text['value'] = illegal_xml_chars.sub(invalidate, str(dom))
return text
def id(xentry, entry):
""" copy or compute an id for the entry """
if entry.has_key("id"):
entry_id = entry.id
elif entry.has_key("link"):
entry_id = entry.link
elif entry.has_key("title"):
entry_id = (entry.title_detail.base + "/" +
md5.new(entry.title).hexdigest())
elif entry.has_key("summary"):
entry_id = (entry.summary_detail.base + "/" +
md5.new(entry.summary).hexdigest())
elif entry.has_key("content"):
entry_id = (entry.content[0].base + "/" +
md5.new(entry.content[0].value).hexdigest())
else:
return
if xentry: createTextElement(xentry, 'id', entry_id)
return entry_id
def links(xentry, entry):
""" copy links to the entry """
if not entry.has_key('links'): return
xdoc = xentry.ownerDocument
for link in entry.links:
xlink = xdoc.createElement('link')
xlink.setAttribute('type', link.type)
xlink.setAttribute('href', link.href)
xlink.setAttribute('rel', link.rel)
xentry.appendChild(xlink)
def date(xentry, name, parsed):
""" insert a date-formated element into the entry """
if not parsed: return
formatted = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsed)
createTextElement(xentry, name, formatted)
def author(xentry, name, detail):
""" insert an author-like element into the entry """
if not detail: return
xdoc = xentry.ownerDocument
xauthor = xdoc.createElement(name)
createTextElement(xauthor, 'name', detail.get('name', None))
createTextElement(xauthor, 'email', detail.get('email', None))
createTextElement(xauthor, 'uri', detail.get('href', None))
xentry.appendChild(xauthor)
def content(xentry, name, detail, bozo):
""" insert a content-like element into the entry """
if not detail or not detail.value: return
normalize(detail, bozo)
xdoc = xentry.ownerDocument
xcontent = xdoc.createElement(name)
try:
# see if the resulting text is a well-formed XML fragment
div = '<div xmlns="http://www.w3.org/1999/xhtml">%s</div>'
if isinstance(detail.value,unicode):
detail.value=detail.value.encode('utf-8')
data = minidom.parseString(div % detail.value).documentElement
if detail.value.find('<') < 0:
xcontent.appendChild(data.firstChild)
else:
xcontent.setAttribute('type', 'xhtml')
xcontent.appendChild(data)
except ExpatError:
# leave as html
xcontent.setAttribute('type', 'html')
xcontent.appendChild(xdoc.createTextNode(detail.value.decode('utf-8')))
if detail.language:
xcontent.setAttribute('xml:lang', detail.language)
xentry.appendChild(xcontent)
def source(xentry, source, bozo):
""" copy source information to the entry """
xdoc = xentry.ownerDocument
xsource = xdoc.createElement('source')
createTextElement(xsource, 'id', source.get('id', None))
createTextElement(xsource, 'icon', source.get('icon', None))
createTextElement(xsource, 'logo', source.get('logo', None))
author(xsource, 'author', source.get('author_detail',None))
for contributor in source.get('contributors',[]):
author(xsource, 'contributor', contributor)
links(xsource, source)
content(xsource, 'rights', source.get('rights_detail',None), bozo)
content(xsource, 'subtitle', source.get('subtitle_detail',None), bozo)
content(xsource, 'title', source.get('title_detail',None), bozo)
date(xsource, 'updated', source.get('updated_parsed',None))
# propagate planet inserted information
for key, value in source.items():
if key.startswith('planet:'):
createTextElement(xsource, key, value)
xentry.appendChild(xsource)
def reconstitute(feed, entry):
""" create an entry document from a parsed feed """
xdoc=minidom.parseString('<entry xmlns="http://www.w3.org/2005/Atom"/>\n')
xentry=xdoc.documentElement
xentry.setAttribute('xmlns:planet','http://planet.intertwingly.net/')
id(xentry, entry)
links(xentry, entry)
bozo = feed.bozo
content(xentry, 'title', entry.get('title_detail',None), bozo)
content(xentry, 'summary', entry.get('summary_detail',None), bozo)
content(xentry, 'content', entry.get('content',[None])[0], bozo)
content(xentry, 'rights', entry.get('rights_detail',None), bozo)
date(xentry, 'updated', entry.get('updated_parsed',time.gmtime()))
date(xentry, 'published', entry.get('published_parsed',None))
author(xentry, 'author', entry.get('author_detail',None))
for contributor in entry.get('contributors',[]):
author(xentry, 'contributor', contributor)
source(xentry, entry.get('source', feed.feed), bozo)
return xdoc
"""
Fetch either a single feed, or a set of feeds, normalize to Atom and XHTML,
and write each as a set of entries in a cache directory.
"""
from planet import config, feedparser, reconstitute
import time, calendar, re, os
try:
from xml.dom.ext import PrettyPrint
except:
PrettyPrint = None
# Regular expressions to sanitise cache filenames
re_url_scheme = re.compile(r'^[^:]*://')
re_slash = re.compile(r'[?/]+')
re_initial_cruft = re.compile(r'^[,.]*')
re_final_cruft = re.compile(r'[,.]*$')
def filename(directory, filename):
"""Return a filename suitable for the cache.
Strips dangerous and common characters to create a filename we
can use to store the cache in.
"""
try:
if re_url_scheme.match(filename):
if isinstance(filename,str):
filename=filename.decode('utf-8').encode('idna')
else:
filename=filename.encode('idna')
except:
pass
filename = re_url_scheme.sub("", filename)
filename = re_slash.sub(",", filename)
filename = re_initial_cruft.sub("", filename)
filename = re_final_cruft.sub("", filename)
return os.path.join(directory, filename)
def spiderFeed(feed):
""" Spider (fetch) a single feed """
data = feedparser.parse(feed)
cache = config.cache_directory()
# capture data from the planet configuration file
for name, value in config.feed_options(feed).items():
data.feed['planet:'+name] = value
for entry in data.entries:
if not entry.has_key('id'):
entry['id'] = reconstitute.id(None, entry)
if not entry['id']: continue
out = filename(cache, entry.id)
if entry.has_key('updated_parsed'):
mtime = calendar.timegm(entry.updated_parsed)
else:
try:
mtime = os.stat(out).st_mtime
except:
mtime = time.time()
entry['updated_parsed'] = time.gmtime(mtime)
xml = reconstitute.reconstitute(data, entry)
file = open(out,'w')
if PrettyPrint:
PrettyPrint(reconstitute.reconstitute(data, entry), file)
else:
file.write(reconstitute.reconstitute(data, entry).toxml('utf-8'))
file.close()
os.utime(out, (mtime, mtime))
def spiderPlanet(configFile):
""" Spider (fetch) an entire planet """
import planet
config.load(configFile)
log = planet.getLogger(config.log_level())
planet.setTimeout(config.feed_timeout())
for feed in config.feeds():
log.info("Updating feed %s", feed)
spiderFeed(feed)
""" Splice together a planet from a cache of feed entries """
import glob, os
from planet import config
from xml.dom import minidom
from reconstitute import createTextElement
def splice(configFile):
""" Splice together a planet from a cache of entries """
import planet
config.load(configFile)
log = planet.getLogger(config.log_level())
cache = config.cache_directory()
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")]
dir.sort()
dir.reverse()
items=max([config.items_per_page(templ)
for templ in config.template_files()])
doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>')
feed = doc.documentElement
# insert feed information
createTextElement(feed, 'title', config.name())
# insert entry information
for mtime,file in dir[:items]:
entry=minidom.parse(file)
feed.appendChild(entry