1.1 --- a/MoinShare.py Sun Jan 26 00:42:49 2014 +0100
1.2 +++ b/MoinShare.py Mon Mar 31 23:54:42 2014 +0200
1.3 @@ -9,15 +9,19 @@
1.4 """
1.5
1.6 from ContentTypeSupport import getContentPreferences
1.7 -from DateSupport import getCurrentTime, getDateTimeFromRFC2822
1.8 +from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \
1.9 + getDateTimeFromISO8601, DateTime
1.10 from MoinSupport import *
1.11 +from MoinRemoteSupport import *
1.12 from ItemSupport import ItemStore
1.13 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError
1.14 from MoinMessageSupport import get_homedir, get_username_for_fingerprint
1.15 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup
1.16 from MoinMoin import wikiutil
1.17 from email.parser import Parser
1.18 +from email.utils import parsedate
1.19 from codecs import getwriter
1.20 +import xml.dom.pulldom
1.21
1.22 try:
1.23 from cStringIO import StringIO
1.24 @@ -28,6 +32,35 @@
1.25
1.26 __version__ = "0.1"
1.27
1.28 +ATOM_NS = "http://www.w3.org/2005/Atom"
1.29 +
1.30 +# Utility functions.
1.31 +
1.32 +def text(element):
1.33 + nodes = []
1.34 + for node in element.childNodes:
1.35 + if node.nodeType == node.TEXT_NODE:
1.36 + nodes.append(node.nodeValue)
1.37 + return "".join(nodes)
1.38 +
1.39 +def children(element):
1.40 + nodes = []
1.41 + for node in element.childNodes:
1.42 + nodes.append(node.toxml())
1.43 + return "".join(nodes)
1.44 +
1.45 +def unescape(text):
1.46 + return text.replace("<", "<").replace(">", ">").replace("&", "&")
1.47 +
1.48 +def linktext(element, feed_type):
1.49 + if feed_type == "rss":
1.50 + return text(element)
1.51 + else:
1.52 + return element.getAttribute("href")
1.53 +
1.54 +def need_content(show_content, tagname):
1.55 + return show_content in ("content", "description") and tagname in ("content", "description")
1.56 +
1.57 # More Moin 1.9 compatibility functions.
1.58
1.59 def has_member(request, groupname, username):
1.60 @@ -138,6 +171,160 @@
1.61 update.path.append(part_number)
1.62 return update
1.63
1.64 +# Error classes.
1.65 +
1.66 +class FeedError(Exception):
1.67 + pass
1.68 +
1.69 +class FeedMissingError(FeedError):
1.70 + pass
1.71 +
1.72 +class FeedContentTypeError(FeedError):
1.73 + pass
1.74 +
1.75 +# Feed retrieval from URLs.
1.76 +
1.77 +def getUpdates(request, feed_url, max_entries, show_content):
1.78 +
1.79 + """
1.80 + Using the given 'request', retrieve from 'feed_url' up to the given number
1.81 + 'max_entries' of update entries. The 'show_content' parameter can indicate
1.82 + that a "summary" is to be obtained for each update, that the "content" of
1.83 + each update is to be obtained (falling back to a summary if no content is
1.84 + provided), or no content (indicated by a false value) is to be obtained.
1.85 +
1.86 + A tuple of the form ((feed_type, channel_title, channel_link), updates) is
1.87 + returned.
1.88 + """
1.89 +
1.90 + feed_updates = []
1.91 +
1.92 + # Obtain the resource, using a cached version if appropriate.
1.93 +
1.94 + max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))
1.95 + data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age)
1.96 + if not data:
1.97 + raise FeedMissingError
1.98 +
1.99 + # Interpret the cached feed.
1.100 +
1.101 + feed = StringIO(data)
1.102 + _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed)
1.103 +
1.104 + if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"):
1.105 + raise FeedContentTypeError
1.106 +
1.107 + try:
1.108 + # Parse each node from the feed.
1.109 +
1.110 + channel_title = channel_link = None
1.111 +
1.112 + feed_type = None
1.113 + update = None
1.114 + in_source = False
1.115 +
1.116 + events = xml.dom.pulldom.parse(feed)
1.117 +
1.118 + for event, value in events:
1.119 +
1.120 + if not in_source and event == xml.dom.pulldom.START_ELEMENT:
1.121 + tagname = value.localName
1.122 +
1.123 + # Detect the feed type and items.
1.124 +
1.125 + if tagname == "feed" and value.namespaceURI == ATOM_NS:
1.126 + feed_type = "atom"
1.127 +
1.128 + elif tagname == "rss":
1.129 + feed_type = "rss"
1.130 +
1.131 + # Detect items.
1.132 +
1.133 + elif feed_type == "rss" and tagname == "item" or \
1.134 + feed_type == "atom" and tagname == "entry":
1.135 +
1.136 + update = Update()
1.137 +
1.138 + # Detect source declarations.
1.139 +
1.140 + elif feed_type == "atom" and tagname == "source":
1.141 + in_source = True
1.142 +
1.143 + # Handle item elements.
1.144 +
1.145 + elif tagname == "title":
1.146 + events.expandNode(value)
1.147 + if update:
1.148 + update.title = text(value)
1.149 + else:
1.150 + channel_title = text(value)
1.151 +
1.152 + elif tagname == "link":
1.153 + events.expandNode(value)
1.154 + if update:
1.155 + update.link = linktext(value, feed_type)
1.156 + else:
1.157 + channel_link = linktext(value, feed_type)
1.158 +
1.159 + elif show_content and (
1.160 + feed_type == "atom" and tagname in ("content", "summary") or
1.161 + feed_type == "rss" and tagname == "description"):
1.162 +
1.163 + events.expandNode(value)
1.164 +
1.165 + # Obtain content where requested or, failing that, a
1.166 + # summary.
1.167 +
1.168 + if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):
1.169 + if feed_type == "atom":
1.170 + update.content_type = value.getAttribute("type") or "text"
1.171 +
1.172 + # Normalise the content types and extract the
1.173 + # content.
1.174 +
1.175 + if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):
1.176 + update.content = children(value)
1.177 + update.content_type = "application/xhtml+xml"
1.178 + elif update.content_type in ("html", "text/html"):
1.179 + update.content = text(value)
1.180 + update.content_type = "text/html"
1.181 + else:
1.182 + update.content = text(value)
1.183 + update.content_type = "text/plain"
1.184 + else:
1.185 + update.content_type = "text/html"
1.186 + update.content = text(value)
1.187 +
1.188 + elif feed_type == "atom" and tagname == "updated" or \
1.189 + feed_type == "rss" and tagname == "pubDate":
1.190 +
1.191 + events.expandNode(value)
1.192 +
1.193 + if update:
1.194 + if feed_type == "atom":
1.195 + value = getDateTimeFromISO8601(text(value))
1.196 + else:
1.197 + value = DateTime(parsedate(text(value)))
1.198 + update.updated = value
1.199 +
1.200 + elif event == xml.dom.pulldom.END_ELEMENT:
1.201 + tagname = value.localName
1.202 +
1.203 + if feed_type == "rss" and tagname == "item" or \
1.204 + feed_type == "atom" and tagname == "entry":
1.205 +
1.206 + feed_updates.append(update)
1.207 +
1.208 + update = None
1.209 +
1.210 + elif feed_type == "atom" and tagname == "source":
1.211 + in_source = False
1.212 +
1.213 + finally:
1.214 + feed.close()
1.215 +
1.216 + return (feed_type, channel_title, channel_link), feed_updates
1.217 +
1.218 # Update retrieval from pages.
1.219
1.220 def getUpdatesFromPage(page, request):