1.1 --- a/MoinShare.py	Sun Jan 26 00:42:49 2014 +0100
     1.2 +++ b/MoinShare.py	Mon Mar 31 23:54:42 2014 +0200
     1.3 @@ -9,15 +9,19 @@
     1.4  """
     1.5  
     1.6  from ContentTypeSupport import getContentPreferences
     1.7 -from DateSupport import getCurrentTime, getDateTimeFromRFC2822
     1.8 +from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \
     1.9 +                        getDateTimeFromISO8601, DateTime
    1.10  from MoinSupport import *
    1.11 +from MoinRemoteSupport import *
    1.12  from ItemSupport import ItemStore
    1.13  from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError
    1.14  from MoinMessageSupport import get_homedir, get_username_for_fingerprint
    1.15  from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup
    1.16  from MoinMoin import wikiutil
    1.17  from email.parser import Parser
    1.18 +from email.utils import parsedate
    1.19  from codecs import getwriter
    1.20 +import xml.dom.pulldom
    1.21  
    1.22  try:
    1.23      from cStringIO import StringIO
    1.24 @@ -28,6 +32,35 @@
    1.25  
    1.26  __version__ = "0.1"
    1.27  
    1.28 +ATOM_NS = "http://www.w3.org/2005/Atom"
    1.29 +
    1.30 +# Utility functions.
    1.31 +
    1.32 +def text(element):
    1.33 +    nodes = []
    1.34 +    for node in element.childNodes:
    1.35 +        if node.nodeType == node.TEXT_NODE:
    1.36 +            nodes.append(node.nodeValue)
    1.37 +    return "".join(nodes)
    1.38 +
    1.39 +def children(element):
    1.40 +    nodes = []
    1.41 +    for node in element.childNodes:
    1.42 +        nodes.append(node.toxml())
    1.43 +    return "".join(nodes)
    1.44 +
    1.45 +def unescape(text):
    1.46 +    return text.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
    1.47 +
    1.48 +def linktext(element, feed_type):
    1.49 +    if feed_type == "rss":
    1.50 +        return text(element)
    1.51 +    else:
    1.52 +        return element.getAttribute("href")
    1.53 +
    1.54 +def need_content(show_content, tagname):
    1.55 +    return show_content in ("content", "description") and tagname in ("content", "description")
    1.56 +
    1.57  # More Moin 1.9 compatibility functions.
    1.58  
    1.59  def has_member(request, groupname, username):
    1.60 @@ -138,6 +171,160 @@
    1.61              update.path.append(part_number)
    1.62          return update
    1.63  
    1.64 +# Error classes.
    1.65 +
    1.66 +class FeedError(Exception):
    1.67 +    pass
    1.68 +
    1.69 +class FeedMissingError(FeedError):
    1.70 +    pass
    1.71 +
    1.72 +class FeedContentTypeError(FeedError):
    1.73 +    pass
    1.74 +
    1.75 +# Feed retrieval from URLs.
    1.76 +
    1.77 +def getUpdates(request, feed_url, max_entries, show_content):
    1.78 +
    1.79 +    """
    1.80 +    Using the given 'request', retrieve from 'feed_url' up to the given number
    1.81 +    'max_entries' of update entries. The 'show_content' parameter can indicate
    1.82 +    that a "summary" is to be obtained for each update, that the "content" of
    1.83 +    each update is to be obtained (falling back to a summary if no content is
    1.84 +    provided), or no content (indicated by a false value) is to be obtained.
    1.85 +
    1.86 +    A tuple of the form ((feed_type, channel_title, channel_link), updates) is
    1.87 +    returned.
    1.88 +    """
    1.89 +
    1.90 +    feed_updates = []
    1.91 +
    1.92 +    # Obtain the resource, using a cached version if appropriate.
    1.93 +
    1.94 +    max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))
    1.95 +    data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age)
    1.96 +    if not data:
    1.97 +        raise FeedMissingError
    1.98 +
    1.99 +    # Interpret the cached feed.
   1.100 +
   1.101 +    feed = StringIO(data)
   1.102 +    _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed)
   1.103 +
   1.104 +    if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"):
   1.105 +        raise FeedContentTypeError
   1.106 +
   1.107 +    try:
   1.108 +        # Parse each node from the feed.
   1.109 +
   1.110 +        channel_title = channel_link = None
   1.111 +
   1.112 +        feed_type = None
   1.113 +        update = None
   1.114 +        in_source = False
   1.115 +
   1.116 +        events = xml.dom.pulldom.parse(feed)
   1.117 +
   1.118 +        for event, value in events:
   1.119 +
   1.120 +            if not in_source and event == xml.dom.pulldom.START_ELEMENT:
   1.121 +                tagname = value.localName
   1.122 +
   1.123 +                # Detect the feed type and items.
   1.124 +
   1.125 +                if tagname == "feed" and value.namespaceURI == ATOM_NS:
   1.126 +                    feed_type = "atom"
   1.127 +
   1.128 +                elif tagname == "rss":
   1.129 +                    feed_type = "rss"
   1.130 +
   1.131 +                # Detect items.
   1.132 +
   1.133 +                elif feed_type == "rss" and tagname == "item" or \
   1.134 +                    feed_type == "atom" and tagname == "entry":
   1.135 +
   1.136 +                    update = Update()
   1.137 +
   1.138 +                # Detect source declarations.
   1.139 +
   1.140 +                elif feed_type == "atom" and tagname == "source":
   1.141 +                    in_source = True
   1.142 +
   1.143 +                # Handle item elements.
   1.144 +
   1.145 +                elif tagname == "title":
   1.146 +                    events.expandNode(value)
   1.147 +                    if update:
   1.148 +                        update.title = text(value)
   1.149 +                    else:
   1.150 +                        channel_title = text(value)
   1.151 +
   1.152 +                elif tagname == "link":
   1.153 +                    events.expandNode(value)
   1.154 +                    if update:
   1.155 +                        update.link = linktext(value, feed_type)
   1.156 +                    else:
   1.157 +                        channel_link = linktext(value, feed_type)
   1.158 +
   1.159 +                elif show_content and (
   1.160 +                    feed_type == "atom" and tagname in ("content", "summary") or
   1.161 +                    feed_type == "rss" and tagname == "description"):
   1.162 +
   1.163 +                    events.expandNode(value)
   1.164 +
   1.165 +                    # Obtain content where requested or, failing that, a
   1.166 +                    # summary.
   1.167 +
   1.168 +                    if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):
   1.169 +                        if feed_type == "atom":
   1.170 +                            update.content_type = value.getAttribute("type") or "text"
   1.171 +
   1.172 +                            # Normalise the content types and extract the
   1.173 +                            # content.
   1.174 +
   1.175 +                            if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):
   1.176 +                                update.content = children(value)
   1.177 +                                update.content_type = "application/xhtml+xml"
   1.178 +                            elif update.content_type in ("html", "text/html"):
   1.179 +                                update.content = text(value)
   1.180 +                                update.content_type = "text/html"
   1.181 +                            else:
   1.182 +                                update.content = text(value)
   1.183 +                                update.content_type = "text/plain"
   1.184 +                        else:
   1.185 +                            update.content_type = "text/html"
   1.186 +                            update.content = text(value)
   1.187 +
   1.188 +                elif feed_type == "atom" and tagname == "updated" or \
   1.189 +                    feed_type == "rss" and tagname == "pubDate":
   1.190 +
   1.191 +                    events.expandNode(value)
   1.192 +
   1.193 +                    if update:
   1.194 +                        if feed_type == "atom":
   1.195 +                            value = getDateTimeFromISO8601(text(value))
   1.196 +                        else:
   1.197 +                            value = DateTime(parsedate(text(value)))
   1.198 +                        update.updated = value
   1.199 +
   1.200 +            elif event == xml.dom.pulldom.END_ELEMENT:
   1.201 +                tagname = value.localName
   1.202 +
   1.203 +                if feed_type == "rss" and tagname == "item" or \
   1.204 +                    feed_type == "atom" and tagname == "entry":
   1.205 +
   1.206 +                    feed_updates.append(update)
   1.207 +
   1.208 +                    update = None
   1.209 +
   1.210 +                elif feed_type == "atom" and tagname == "source":
   1.211 +                    in_source = False
   1.212 +
   1.213 +    finally:
   1.214 +        feed.close()
   1.215 +
   1.216 +    return (feed_type, channel_title, channel_link), feed_updates
   1.217 +
   1.218  # Update retrieval from pages.
   1.219  
   1.220  def getUpdatesFromPage(page, request):