1.1 --- a/MoinShare.py Sun Jan 26 00:42:49 2014 +0100
1.2 +++ b/MoinShare.py Mon Mar 31 23:54:42 2014 +0200
1.3 @@ -9,15 +9,19 @@
1.4 """
1.5
1.6 from ContentTypeSupport import getContentPreferences
1.7 -from DateSupport import getCurrentTime, getDateTimeFromRFC2822
1.8 +from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \
1.9 + getDateTimeFromISO8601, DateTime
1.10 from MoinSupport import *
1.11 +from MoinRemoteSupport import *
1.12 from ItemSupport import ItemStore
1.13 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError
1.14 from MoinMessageSupport import get_homedir, get_username_for_fingerprint
1.15 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup
1.16 from MoinMoin import wikiutil
1.17 from email.parser import Parser
1.18 +from email.utils import parsedate
1.19 from codecs import getwriter
1.20 +import xml.dom.pulldom
1.21
1.22 try:
1.23 from cStringIO import StringIO
1.24 @@ -28,6 +32,35 @@
1.25
1.26 __version__ = "0.1"
1.27
1.28 +ATOM_NS = "http://www.w3.org/2005/Atom"
1.29 +
1.30 +# Utility functions.
1.31 +
1.32 +def text(element):
1.33 + nodes = []
1.34 + for node in element.childNodes:
1.35 + if node.nodeType == node.TEXT_NODE:
1.36 + nodes.append(node.nodeValue)
1.37 + return "".join(nodes)
1.38 +
1.39 +def children(element):
1.40 + nodes = []
1.41 + for node in element.childNodes:
1.42 + nodes.append(node.toxml())
1.43 + return "".join(nodes)
1.44 +
1.45 +def unescape(text):
1.46 + return text.replace("<", "<").replace(">", ">").replace("&", "&")
1.47 +
1.48 +def linktext(element, feed_type):
1.49 + if feed_type == "rss":
1.50 + return text(element)
1.51 + else:
1.52 + return element.getAttribute("href")
1.53 +
1.54 +def need_content(show_content, tagname):
1.55 + return show_content in ("content", "description") and tagname in ("content", "description")
1.56 +
1.57 # More Moin 1.9 compatibility functions.
1.58
1.59 def has_member(request, groupname, username):
1.60 @@ -138,6 +171,160 @@
1.61 update.path.append(part_number)
1.62 return update
1.63
1.64 +# Error classes.
1.65 +
1.66 +class FeedError(Exception):
1.67 + pass
1.68 +
1.69 +class FeedMissingError(FeedError):
1.70 + pass
1.71 +
1.72 +class FeedContentTypeError(FeedError):
1.73 + pass
1.74 +
1.75 +# Feed retrieval from URLs.
1.76 +
1.77 +def getUpdates(request, feed_url, max_entries, show_content):
1.78 +
1.79 + """
1.80 + Using the given 'request', retrieve from 'feed_url' up to the given number
1.81 + 'max_entries' of update entries. The 'show_content' parameter can indicate
1.82 + that a "summary" is to be obtained for each update, that the "content" of
1.83 + each update is to be obtained (falling back to a summary if no content is
1.84 + provided), or no content (indicated by a false value) is to be obtained.
1.85 +
1.86 + A tuple of the form ((feed_type, channel_title, channel_link), updates) is
1.87 + returned.
1.88 + """
1.89 +
1.90 + feed_updates = []
1.91 +
1.92 + # Obtain the resource, using a cached version if appropriate.
1.93 +
1.94 + max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))
1.95 + data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age)
1.96 + if not data:
1.97 + raise FeedMissingError
1.98 +
1.99 + # Interpret the cached feed.
1.100 +
1.101 + feed = StringIO(data)
1.102 + _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed)
1.103 +
1.104 + if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"):
1.105 + raise FeedContentTypeError
1.106 +
1.107 + try:
1.108 + # Parse each node from the feed.
1.109 +
1.110 + channel_title = channel_link = None
1.111 +
1.112 + feed_type = None
1.113 + update = None
1.114 + in_source = False
1.115 +
1.116 + events = xml.dom.pulldom.parse(feed)
1.117 +
1.118 + for event, value in events:
1.119 +
1.120 + if not in_source and event == xml.dom.pulldom.START_ELEMENT:
1.121 + tagname = value.localName
1.122 +
1.123 + # Detect the feed type and items.
1.124 +
1.125 + if tagname == "feed" and value.namespaceURI == ATOM_NS:
1.126 + feed_type = "atom"
1.127 +
1.128 + elif tagname == "rss":
1.129 + feed_type = "rss"
1.130 +
1.131 + # Detect items.
1.132 +
1.133 + elif feed_type == "rss" and tagname == "item" or \
1.134 + feed_type == "atom" and tagname == "entry":
1.135 +
1.136 + update = Update()
1.137 +
1.138 + # Detect source declarations.
1.139 +
1.140 + elif feed_type == "atom" and tagname == "source":
1.141 + in_source = True
1.142 +
1.143 + # Handle item elements.
1.144 +
1.145 + elif tagname == "title":
1.146 + events.expandNode(value)
1.147 + if update:
1.148 + update.title = text(value)
1.149 + else:
1.150 + channel_title = text(value)
1.151 +
1.152 + elif tagname == "link":
1.153 + events.expandNode(value)
1.154 + if update:
1.155 + update.link = linktext(value, feed_type)
1.156 + else:
1.157 + channel_link = linktext(value, feed_type)
1.158 +
1.159 + elif show_content and (
1.160 + feed_type == "atom" and tagname in ("content", "summary") or
1.161 + feed_type == "rss" and tagname == "description"):
1.162 +
1.163 + events.expandNode(value)
1.164 +
1.165 + # Obtain content where requested or, failing that, a
1.166 + # summary.
1.167 +
1.168 + if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):
1.169 + if feed_type == "atom":
1.170 + update.content_type = value.getAttribute("type") or "text"
1.171 +
1.172 + # Normalise the content types and extract the
1.173 + # content.
1.174 +
1.175 + if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):
1.176 + update.content = children(value)
1.177 + update.content_type = "application/xhtml+xml"
1.178 + elif update.content_type in ("html", "text/html"):
1.179 + update.content = text(value)
1.180 + update.content_type = "text/html"
1.181 + else:
1.182 + update.content = text(value)
1.183 + update.content_type = "text/plain"
1.184 + else:
1.185 + update.content_type = "text/html"
1.186 + update.content = text(value)
1.187 +
1.188 + elif feed_type == "atom" and tagname == "updated" or \
1.189 + feed_type == "rss" and tagname == "pubDate":
1.190 +
1.191 + events.expandNode(value)
1.192 +
1.193 + if update:
1.194 + if feed_type == "atom":
1.195 + value = getDateTimeFromISO8601(text(value))
1.196 + else:
1.197 + value = DateTime(parsedate(text(value)))
1.198 + update.updated = value
1.199 +
1.200 + elif event == xml.dom.pulldom.END_ELEMENT:
1.201 + tagname = value.localName
1.202 +
1.203 + if feed_type == "rss" and tagname == "item" or \
1.204 + feed_type == "atom" and tagname == "entry":
1.205 +
1.206 + feed_updates.append(update)
1.207 +
1.208 + update = None
1.209 +
1.210 + elif feed_type == "atom" and tagname == "source":
1.211 + in_source = False
1.212 +
1.213 + finally:
1.214 + feed.close()
1.215 +
1.216 + return (feed_type, channel_title, channel_link), feed_updates
1.217 +
1.218 # Update retrieval from pages.
1.219
1.220 def getUpdatesFromPage(page, request):
2.1 --- a/macros/SharedContent.py Sun Jan 26 00:42:49 2014 +0100
2.2 +++ b/macros/SharedContent.py Mon Mar 31 23:54:42 2014 +0200
2.3 @@ -2,210 +2,19 @@
2.4 """
2.5 MoinMoin - SharedContent macro, based on the FeedReader macro
2.6
2.7 - @copyright: 2008, 2012, 2013 by Paul Boddie <paul@boddie.org.uk>
2.8 + @copyright: 2008, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk>
2.9 @license: GNU GPL (v2 or later), see COPYING.txt for details.
2.10 """
2.11
2.12 -from DateSupport import getDateTimeFromISO8601, DateTime
2.13 from MoinMoin.Page import Page
2.14 -from MoinRemoteSupport import *
2.15 from MoinSupport import parseMacroArguments
2.16 -from MoinShare import getUpdateSources, getUpdatesFromPage, \
2.17 - getUpdatesFromStore, formatUpdate, \
2.18 - Update
2.19 -from email.utils import parsedate
2.20 -import xml.dom.pulldom
2.21 -
2.22 -try:
2.23 - from cStringIO import StringIO
2.24 -except ImportError:
2.25 - from StringIO import StringIO
2.26 +from MoinShare import getUpdateSources, getUpdates, \
2.27 + getUpdatesFromPage, getUpdatesFromStore, \
2.28 + formatUpdate, Update
2.29
2.30 Dependencies = ["time"]
2.31
2.32 MAX_ENTRIES = 5
2.33 -ATOM_NS = "http://www.w3.org/2005/Atom"
2.34 -
2.35 -# Utility functions.
2.36 -
2.37 -def text(element):
2.38 - nodes = []
2.39 - for node in element.childNodes:
2.40 - if node.nodeType == node.TEXT_NODE:
2.41 - nodes.append(node.nodeValue)
2.42 - return "".join(nodes)
2.43 -
2.44 -def children(element):
2.45 - nodes = []
2.46 - for node in element.childNodes:
2.47 - nodes.append(node.toxml())
2.48 - return "".join(nodes)
2.49 -
2.50 -def unescape(text):
2.51 - return text.replace("<", "<").replace(">", ">").replace("&", "&")
2.52 -
2.53 -def linktext(element, feed_type):
2.54 - if feed_type == "rss":
2.55 - return text(element)
2.56 - else:
2.57 - return element.getAttribute("href")
2.58 -
2.59 -def need_content(show_content, tagname):
2.60 - return show_content in ("content", "description") and tagname in ("content", "description")
2.61 -
2.62 -# Error classes.
2.63 -
2.64 -class FeedError(Exception):
2.65 - pass
2.66 -
2.67 -class FeedMissingError(FeedError):
2.68 - pass
2.69 -
2.70 -class FeedContentTypeError(FeedError):
2.71 - pass
2.72 -
2.73 -# Feed retrieval.
2.74 -
2.75 -def getUpdates(request, feed_url, max_entries, show_content):
2.76 -
2.77 - """
2.78 - Using the given 'request', retrieve from 'feed_url' up to the given number
2.79 - 'max_entries' of update entries. The 'show_content' parameter can indicate
2.80 - that a "summary" is to be obtained for each update, that the "content" of
2.81 - each update is to be obtained (falling back to a summary if no content is
2.82 - provided), or no content (indicated by a false value) is to be obtained.
2.83 -
2.84 - A tuple of the form ((feed_type, channel_title, channel_link), updates) is
2.85 - returned.
2.86 - """
2.87 -
2.88 - feed_updates = []
2.89 -
2.90 - # Obtain the resource, using a cached version if appropriate.
2.91 -
2.92 - max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))
2.93 - data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age)
2.94 - if not data:
2.95 - raise FeedMissingError
2.96 -
2.97 - # Interpret the cached feed.
2.98 -
2.99 - feed = StringIO(data)
2.100 - _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed)
2.101 -
2.102 - if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"):
2.103 - raise FeedContentTypeError
2.104 -
2.105 - try:
2.106 - # Parse each node from the feed.
2.107 -
2.108 - channel_title = channel_link = None
2.109 -
2.110 - feed_type = None
2.111 - update = None
2.112 - in_source = False
2.113 -
2.114 - events = xml.dom.pulldom.parse(feed)
2.115 -
2.116 - for event, value in events:
2.117 -
2.118 - if not in_source and event == xml.dom.pulldom.START_ELEMENT:
2.119 - tagname = value.localName
2.120 -
2.121 - # Detect the feed type and items.
2.122 -
2.123 - if tagname == "feed" and value.namespaceURI == ATOM_NS:
2.124 - feed_type = "atom"
2.125 -
2.126 - elif tagname == "rss":
2.127 - feed_type = "rss"
2.128 -
2.129 - # Detect items.
2.130 -
2.131 - elif feed_type == "rss" and tagname == "item" or \
2.132 - feed_type == "atom" and tagname == "entry":
2.133 -
2.134 - update = Update()
2.135 -
2.136 - # Detect source declarations.
2.137 -
2.138 - elif feed_type == "atom" and tagname == "source":
2.139 - in_source = True
2.140 -
2.141 - # Handle item elements.
2.142 -
2.143 - elif tagname == "title":
2.144 - events.expandNode(value)
2.145 - if update:
2.146 - update.title = text(value)
2.147 - else:
2.148 - channel_title = text(value)
2.149 -
2.150 - elif tagname == "link":
2.151 - events.expandNode(value)
2.152 - if update:
2.153 - update.link = linktext(value, feed_type)
2.154 - else:
2.155 - channel_link = linktext(value, feed_type)
2.156 -
2.157 - elif show_content and (
2.158 - feed_type == "atom" and tagname in ("content", "summary") or
2.159 - feed_type == "rss" and tagname == "description"):
2.160 -
2.161 - events.expandNode(value)
2.162 -
2.163 - # Obtain content where requested or, failing that, a
2.164 - # summary.
2.165 -
2.166 - if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):
2.167 - if feed_type == "atom":
2.168 - update.content_type = value.getAttribute("type") or "text"
2.169 -
2.170 - # Normalise the content types and extract the
2.171 - # content.
2.172 -
2.173 - if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):
2.174 - update.content = children(value)
2.175 - update.content_type = "application/xhtml+xml"
2.176 - elif update.content_type in ("html", "text/html"):
2.177 - update.content = text(value)
2.178 - update.content_type = "text/html"
2.179 - else:
2.180 - update.content = text(value)
2.181 - update.content_type = "text/plain"
2.182 - else:
2.183 - update.content_type = "text/html"
2.184 - update.content = text(value)
2.185 -
2.186 - elif feed_type == "atom" and tagname == "updated" or \
2.187 - feed_type == "rss" and tagname == "pubDate":
2.188 -
2.189 - events.expandNode(value)
2.190 -
2.191 - if update:
2.192 - if feed_type == "atom":
2.193 - value = getDateTimeFromISO8601(text(value))
2.194 - else:
2.195 - value = DateTime(parsedate(text(value)))
2.196 - update.updated = value
2.197 -
2.198 - elif event == xml.dom.pulldom.END_ELEMENT:
2.199 - tagname = value.localName
2.200 -
2.201 - if feed_type == "rss" and tagname == "item" or \
2.202 - feed_type == "atom" and tagname == "entry":
2.203 -
2.204 - feed_updates.append(update)
2.205 -
2.206 - update = None
2.207 -
2.208 - elif feed_type == "atom" and tagname == "source":
2.209 - in_source = False
2.210 -
2.211 - finally:
2.212 - feed.close()
2.213 -
2.214 - return (feed_type, channel_title, channel_link), feed_updates
2.215
2.216 # The macro itself.
2.217