Moved RSS/Atom feed retrieval into the library from the macro.

     1.1 --- a/MoinShare.py	Sun Jan 26 00:42:49 2014 +0100
     1.2 +++ b/MoinShare.py	Mon Mar 31 23:54:42 2014 +0200
     1.3 @@ -9,15 +9,19 @@
     1.4  """
     1.5  
     1.6  from ContentTypeSupport import getContentPreferences
     1.7 -from DateSupport import getCurrentTime, getDateTimeFromRFC2822
     1.8 +from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \
     1.9 +                        getDateTimeFromISO8601, DateTime
    1.10  from MoinSupport import *
    1.11 +from MoinRemoteSupport import *
    1.12  from ItemSupport import ItemStore
    1.13  from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError
    1.14  from MoinMessageSupport import get_homedir, get_username_for_fingerprint
    1.15  from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup
    1.16  from MoinMoin import wikiutil
    1.17  from email.parser import Parser
    1.18 +from email.utils import parsedate
    1.19  from codecs import getwriter
    1.20 +import xml.dom.pulldom
    1.21  
    1.22  try:
    1.23      from cStringIO import StringIO
    1.24 @@ -28,6 +32,35 @@
    1.25  
    1.26  __version__ = "0.1"
    1.27  
    1.28 +ATOM_NS = "http://www.w3.org/2005/Atom"
    1.29 +
    1.30 +# Utility functions.
    1.31 +
    1.32 +def text(element):
    1.33 +    nodes = []
    1.34 +    for node in element.childNodes:
    1.35 +        if node.nodeType == node.TEXT_NODE:
    1.36 +            nodes.append(node.nodeValue)
    1.37 +    return "".join(nodes)
    1.38 +
    1.39 +def children(element):
    1.40 +    nodes = []
    1.41 +    for node in element.childNodes:
    1.42 +        nodes.append(node.toxml())
    1.43 +    return "".join(nodes)
    1.44 +
    1.45 +def unescape(text):
    1.46 +    return text.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
    1.47 +
    1.48 +def linktext(element, feed_type):
    1.49 +    if feed_type == "rss":
    1.50 +        return text(element)
    1.51 +    else:
    1.52 +        return element.getAttribute("href")
    1.53 +
    1.54 +def need_content(show_content, tagname):
    1.55 +    return show_content in ("content", "description") and tagname in ("content", "description")
    1.56 +
    1.57  # More Moin 1.9 compatibility functions.
    1.58  
    1.59  def has_member(request, groupname, username):
    1.60 @@ -138,6 +171,160 @@
    1.61              update.path.append(part_number)
    1.62          return update
    1.63  
    1.64 +# Error classes.
    1.65 +
    1.66 +class FeedError(Exception):
    1.67 +    pass
    1.68 +
    1.69 +class FeedMissingError(FeedError):
    1.70 +    pass
    1.71 +
    1.72 +class FeedContentTypeError(FeedError):
    1.73 +    pass
    1.74 +
    1.75 +# Feed retrieval from URLs.
    1.76 +
    1.77 +def getUpdates(request, feed_url, max_entries, show_content):
    1.78 +
    1.79 +    """
    1.80 +    Using the given 'request', retrieve from 'feed_url' up to the given number
    1.81 +    'max_entries' of update entries. The 'show_content' parameter can indicate
    1.82 +    that a "summary" is to be obtained for each update, that the "content" of
    1.83 +    each update is to be obtained (falling back to a summary if no content is
    1.84 +    provided), or no content (indicated by a false value) is to be obtained.
    1.85 +
    1.86 +    A tuple of the form ((feed_type, channel_title, channel_link), updates) is
    1.87 +    returned.
    1.88 +    """
    1.89 +
    1.90 +    feed_updates = []
    1.91 +
    1.92 +    # Obtain the resource, using a cached version if appropriate.
    1.93 +
    1.94 +    max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))
    1.95 +    data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age)
    1.96 +    if not data:
    1.97 +        raise FeedMissingError
    1.98 +
    1.99 +    # Interpret the cached feed.
   1.100 +
   1.101 +    feed = StringIO(data)
   1.102 +    _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed)
   1.103 +
   1.104 +    if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"):
   1.105 +        raise FeedContentTypeError
   1.106 +
   1.107 +    try:
   1.108 +        # Parse each node from the feed.
   1.109 +
   1.110 +        channel_title = channel_link = None
   1.111 +
   1.112 +        feed_type = None
   1.113 +        update = None
   1.114 +        in_source = False
   1.115 +
   1.116 +        events = xml.dom.pulldom.parse(feed)
   1.117 +
   1.118 +        for event, value in events:
   1.119 +
   1.120 +            if not in_source and event == xml.dom.pulldom.START_ELEMENT:
   1.121 +                tagname = value.localName
   1.122 +
   1.123 +                # Detect the feed type and items.
   1.124 +
   1.125 +                if tagname == "feed" and value.namespaceURI == ATOM_NS:
   1.126 +                    feed_type = "atom"
   1.127 +
   1.128 +                elif tagname == "rss":
   1.129 +                    feed_type = "rss"
   1.130 +
   1.131 +                # Detect items.
   1.132 +
   1.133 +                elif feed_type == "rss" and tagname == "item" or \
   1.134 +                    feed_type == "atom" and tagname == "entry":
   1.135 +
   1.136 +                    update = Update()
   1.137 +
   1.138 +                # Detect source declarations.
   1.139 +
   1.140 +                elif feed_type == "atom" and tagname == "source":
   1.141 +                    in_source = True
   1.142 +
   1.143 +                # Handle item elements.
   1.144 +
   1.145 +                elif tagname == "title":
   1.146 +                    events.expandNode(value)
   1.147 +                    if update:
   1.148 +                        update.title = text(value)
   1.149 +                    else:
   1.150 +                        channel_title = text(value)
   1.151 +
   1.152 +                elif tagname == "link":
   1.153 +                    events.expandNode(value)
   1.154 +                    if update:
   1.155 +                        update.link = linktext(value, feed_type)
   1.156 +                    else:
   1.157 +                        channel_link = linktext(value, feed_type)
   1.158 +
   1.159 +                elif show_content and (
   1.160 +                    feed_type == "atom" and tagname in ("content", "summary") or
   1.161 +                    feed_type == "rss" and tagname == "description"):
   1.162 +
   1.163 +                    events.expandNode(value)
   1.164 +
   1.165 +                    # Obtain content where requested or, failing that, a
   1.166 +                    # summary.
   1.167 +
   1.168 +                    if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):
   1.169 +                        if feed_type == "atom":
   1.170 +                            update.content_type = value.getAttribute("type") or "text"
   1.171 +
   1.172 +                            # Normalise the content types and extract the
   1.173 +                            # content.
   1.174 +
   1.175 +                            if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):
   1.176 +                                update.content = children(value)
   1.177 +                                update.content_type = "application/xhtml+xml"
   1.178 +                            elif update.content_type in ("html", "text/html"):
   1.179 +                                update.content = text(value)
   1.180 +                                update.content_type = "text/html"
   1.181 +                            else:
   1.182 +                                update.content = text(value)
   1.183 +                                update.content_type = "text/plain"
   1.184 +                        else:
   1.185 +                            update.content_type = "text/html"
   1.186 +                            update.content = text(value)
   1.187 +
   1.188 +                elif feed_type == "atom" and tagname == "updated" or \
   1.189 +                    feed_type == "rss" and tagname == "pubDate":
   1.190 +
   1.191 +                    events.expandNode(value)
   1.192 +
   1.193 +                    if update:
   1.194 +                        if feed_type == "atom":
   1.195 +                            value = getDateTimeFromISO8601(text(value))
   1.196 +                        else:
   1.197 +                            value = DateTime(parsedate(text(value)))
   1.198 +                        update.updated = value
   1.199 +
   1.200 +            elif event == xml.dom.pulldom.END_ELEMENT:
   1.201 +                tagname = value.localName
   1.202 +
   1.203 +                if feed_type == "rss" and tagname == "item" or \
   1.204 +                    feed_type == "atom" and tagname == "entry":
   1.205 +
   1.206 +                    feed_updates.append(update)
   1.207 +
   1.208 +                    update = None
   1.209 +
   1.210 +                elif feed_type == "atom" and tagname == "source":
   1.211 +                    in_source = False
   1.212 +
   1.213 +    finally:
   1.214 +        feed.close()
   1.215 +
   1.216 +    return (feed_type, channel_title, channel_link), feed_updates
   1.217 +
   1.218  # Update retrieval from pages.
   1.219  
   1.220  def getUpdatesFromPage(page, request):

     2.1 --- a/macros/SharedContent.py	Sun Jan 26 00:42:49 2014 +0100
     2.2 +++ b/macros/SharedContent.py	Mon Mar 31 23:54:42 2014 +0200
     2.3 @@ -2,210 +2,19 @@
     2.4  """
     2.5      MoinMoin - SharedContent macro, based on the FeedReader macro
     2.6  
     2.7 -    @copyright: 2008, 2012, 2013 by Paul Boddie <paul@boddie.org.uk>
     2.8 +    @copyright: 2008, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk>
     2.9      @license: GNU GPL (v2 or later), see COPYING.txt for details.
    2.10  """
    2.11  
    2.12 -from DateSupport import getDateTimeFromISO8601, DateTime
    2.13  from MoinMoin.Page import Page
    2.14 -from MoinRemoteSupport import *
    2.15  from MoinSupport import parseMacroArguments
    2.16 -from MoinShare import getUpdateSources, getUpdatesFromPage, \
    2.17 -                      getUpdatesFromStore, formatUpdate, \
    2.18 -                      Update
    2.19 -from email.utils import parsedate
    2.20 -import xml.dom.pulldom
    2.21 -
    2.22 -try:
    2.23 -    from cStringIO import StringIO
    2.24 -except ImportError:
    2.25 -    from StringIO import StringIO
    2.26 +from MoinShare import getUpdateSources, getUpdates, \
    2.27 +                      getUpdatesFromPage, getUpdatesFromStore, \
    2.28 +                      formatUpdate, Update
    2.29  
    2.30  Dependencies = ["time"]
    2.31  
    2.32  MAX_ENTRIES = 5
    2.33 -ATOM_NS = "http://www.w3.org/2005/Atom"
    2.34 -
    2.35 -# Utility functions.
    2.36 -
    2.37 -def text(element):
    2.38 -    nodes = []
    2.39 -    for node in element.childNodes:
    2.40 -        if node.nodeType == node.TEXT_NODE:
    2.41 -            nodes.append(node.nodeValue)
    2.42 -    return "".join(nodes)
    2.43 -
    2.44 -def children(element):
    2.45 -    nodes = []
    2.46 -    for node in element.childNodes:
    2.47 -        nodes.append(node.toxml())
    2.48 -    return "".join(nodes)
    2.49 -
    2.50 -def unescape(text):
    2.51 -    return text.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
    2.52 -
    2.53 -def linktext(element, feed_type):
    2.54 -    if feed_type == "rss":
    2.55 -        return text(element)
    2.56 -    else:
    2.57 -        return element.getAttribute("href")
    2.58 -
    2.59 -def need_content(show_content, tagname):
    2.60 -    return show_content in ("content", "description") and tagname in ("content", "description")
    2.61 -
    2.62 -# Error classes.
    2.63 -
    2.64 -class FeedError(Exception):
    2.65 -    pass
    2.66 -
    2.67 -class FeedMissingError(FeedError):
    2.68 -    pass
    2.69 -
    2.70 -class FeedContentTypeError(FeedError):
    2.71 -    pass
    2.72 -
    2.73 -# Feed retrieval.
    2.74 -
    2.75 -def getUpdates(request, feed_url, max_entries, show_content):
    2.76 -
    2.77 -    """
    2.78 -    Using the given 'request', retrieve from 'feed_url' up to the given number
    2.79 -    'max_entries' of update entries. The 'show_content' parameter can indicate
    2.80 -    that a "summary" is to be obtained for each update, that the "content" of
    2.81 -    each update is to be obtained (falling back to a summary if no content is
    2.82 -    provided), or no content (indicated by a false value) is to be obtained.
    2.83 -
    2.84 -    A tuple of the form ((feed_type, channel_title, channel_link), updates) is
    2.85 -    returned.
    2.86 -    """
    2.87 -
    2.88 -    feed_updates = []
    2.89 -
    2.90 -    # Obtain the resource, using a cached version if appropriate.
    2.91 -
    2.92 -    max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))
    2.93 -    data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age)
    2.94 -    if not data:
    2.95 -        raise FeedMissingError
    2.96 -
    2.97 -    # Interpret the cached feed.
    2.98 -
    2.99 -    feed = StringIO(data)
   2.100 -    _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed)
   2.101 -
   2.102 -    if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"):
   2.103 -        raise FeedContentTypeError
   2.104 -
   2.105 -    try:
   2.106 -        # Parse each node from the feed.
   2.107 -
   2.108 -        channel_title = channel_link = None
   2.109 -
   2.110 -        feed_type = None
   2.111 -        update = None
   2.112 -        in_source = False
   2.113 -
   2.114 -        events = xml.dom.pulldom.parse(feed)
   2.115 -
   2.116 -        for event, value in events:
   2.117 -
   2.118 -            if not in_source and event == xml.dom.pulldom.START_ELEMENT:
   2.119 -                tagname = value.localName
   2.120 -
   2.121 -                # Detect the feed type and items.
   2.122 -
   2.123 -                if tagname == "feed" and value.namespaceURI == ATOM_NS:
   2.124 -                    feed_type = "atom"
   2.125 -
   2.126 -                elif tagname == "rss":
   2.127 -                    feed_type = "rss"
   2.128 -
   2.129 -                # Detect items.
   2.130 -
   2.131 -                elif feed_type == "rss" and tagname == "item" or \
   2.132 -                    feed_type == "atom" and tagname == "entry":
   2.133 -
   2.134 -                    update = Update()
   2.135 -
   2.136 -                # Detect source declarations.
   2.137 -
   2.138 -                elif feed_type == "atom" and tagname == "source":
   2.139 -                    in_source = True
   2.140 -
   2.141 -                # Handle item elements.
   2.142 -
   2.143 -                elif tagname == "title":
   2.144 -                    events.expandNode(value)
   2.145 -                    if update:
   2.146 -                        update.title = text(value)
   2.147 -                    else:
   2.148 -                        channel_title = text(value)
   2.149 -
   2.150 -                elif tagname == "link":
   2.151 -                    events.expandNode(value)
   2.152 -                    if update:
   2.153 -                        update.link = linktext(value, feed_type)
   2.154 -                    else:
   2.155 -                        channel_link = linktext(value, feed_type)
   2.156 -
   2.157 -                elif show_content and (
   2.158 -                    feed_type == "atom" and tagname in ("content", "summary") or
   2.159 -                    feed_type == "rss" and tagname == "description"):
   2.160 -
   2.161 -                    events.expandNode(value)
   2.162 -
   2.163 -                    # Obtain content where requested or, failing that, a
   2.164 -                    # summary.
   2.165 -
   2.166 -                    if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):
   2.167 -                        if feed_type == "atom":
   2.168 -                            update.content_type = value.getAttribute("type") or "text"
   2.169 -
   2.170 -                            # Normalise the content types and extract the
   2.171 -                            # content.
   2.172 -
   2.173 -                            if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):
   2.174 -                                update.content = children(value)
   2.175 -                                update.content_type = "application/xhtml+xml"
   2.176 -                            elif update.content_type in ("html", "text/html"):
   2.177 -                                update.content = text(value)
   2.178 -                                update.content_type = "text/html"
   2.179 -                            else:
   2.180 -                                update.content = text(value)
   2.181 -                                update.content_type = "text/plain"
   2.182 -                        else:
   2.183 -                            update.content_type = "text/html"
   2.184 -                            update.content = text(value)
   2.185 -
   2.186 -                elif feed_type == "atom" and tagname == "updated" or \
   2.187 -                    feed_type == "rss" and tagname == "pubDate":
   2.188 -
   2.189 -                    events.expandNode(value)
   2.190 -
   2.191 -                    if update:
   2.192 -                        if feed_type == "atom":
   2.193 -                            value = getDateTimeFromISO8601(text(value))
   2.194 -                        else:
   2.195 -                            value = DateTime(parsedate(text(value)))
   2.196 -                        update.updated = value
   2.197 -
   2.198 -            elif event == xml.dom.pulldom.END_ELEMENT:
   2.199 -                tagname = value.localName
   2.200 -
   2.201 -                if feed_type == "rss" and tagname == "item" or \
   2.202 -                    feed_type == "atom" and tagname == "entry":
   2.203 -
   2.204 -                    feed_updates.append(update)
   2.205 -
   2.206 -                    update = None
   2.207 -
   2.208 -                elif feed_type == "atom" and tagname == "source":
   2.209 -                    in_source = False
   2.210 -
   2.211 -    finally:
   2.212 -        feed.close()
   2.213 -
   2.214 -    return (feed_type, channel_title, channel_link), feed_updates
   2.215  
   2.216  # The macro itself.
   2.217
2014-03-31	Paul Boddie	raw files shortlog changelog graph	Moved RSS/Atom feed retrieval into the library from the macro.
			MoinShare.py (file) macros/SharedContent.py (file)