# HG changeset patch # User Paul Boddie # Date 1396475879 -7200 # Node ID 006442452766085502241f74d53a3eb244439998 # Parent be8948aeef47e5fa9fcc8c606cc856e221e85595 Made URL retrieval slightly more general, supporting retrieval from IMAP mailboxes. diff -r be8948aeef47 -r 006442452766 MoinShare.py --- a/MoinShare.py Wed Apr 02 00:41:37 2014 +0200 +++ b/MoinShare.py Wed Apr 02 23:57:59 2014 +0200 @@ -202,10 +202,16 @@ if feed_url.startswith("file:"): raise FeedMissingError + elif feed_url.startswith("imap"): + reader = imapreader + + else: + reader = None + # Obtain the resource, using a cached version if appropriate. max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300")) - data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age) + data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader) if not data: raise FeedMissingError @@ -217,6 +223,10 @@ if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"): return getUpdatesFromFeed(f, max_entries, show_content) + + elif content_type == "multipart/mixed": + return getUpdatesFromMailbox(f, max_entries, show_content, request) + else: raise FeedContentTypeError @@ -240,117 +250,150 @@ feed_updates = [] - try: - # Parse each node from the feed. + # Parse each node from the feed. - channel_title = channel_link = None + channel_title = channel_link = None - feed_type = None - update = None - in_source = False + feed_type = None + update = None + in_source = False - events = xml.dom.pulldom.parse(feed) + events = xml.dom.pulldom.parse(feed) - for event, value in events: + for event, value in events: - if not in_source and event == xml.dom.pulldom.START_ELEMENT: - tagname = value.localName + if not in_source and event == xml.dom.pulldom.START_ELEMENT: + tagname = value.localName - # Detect the feed type and items. + # Detect the feed type and items. - if tagname == "feed" and value.namespaceURI == ATOM_NS: - feed_type = "atom" + if tagname == "feed" and value.namespaceURI == ATOM_NS: + feed_type = "atom" - elif tagname == "rss": - feed_type = "rss" + elif tagname == "rss": + feed_type = "rss" - # Detect items. + # Detect items. - elif feed_type == "rss" and tagname == "item" or \ - feed_type == "atom" and tagname == "entry": + elif feed_type == "rss" and tagname == "item" or \ + feed_type == "atom" and tagname == "entry": - update = Update() + update = Update() - # Detect source declarations. + # Detect source declarations. - elif feed_type == "atom" and tagname == "source": - in_source = True + elif feed_type == "atom" and tagname == "source": + in_source = True - # Handle item elements. + # Handle item elements. - elif tagname == "title": - events.expandNode(value) - if update: - update.title = text(value) - else: - channel_title = text(value) + elif tagname == "title": + events.expandNode(value) + if update: + update.title = text(value) + else: + channel_title = text(value) - elif tagname == "link": - events.expandNode(value) - if update: - update.link = linktext(value, feed_type) - else: - channel_link = linktext(value, feed_type) + elif tagname == "link": + events.expandNode(value) + if update: + update.link = linktext(value, feed_type) + else: + channel_link = linktext(value, feed_type) - elif show_content and ( - feed_type == "atom" and tagname in ("content", "summary") or - feed_type == "rss" and tagname == "description"): - - events.expandNode(value) + elif show_content and ( + feed_type == "atom" and tagname in ("content", "summary") or + feed_type == "rss" and tagname == "description"): - # Obtain content where requested or, failing that, a - # summary. + events.expandNode(value) - if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): - if feed_type == "atom": - update.content_type = value.getAttribute("type") or "text" + # Obtain content where requested or, failing that, a + # summary. - # Normalise the content types and extract the - # content. + if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): + if feed_type == "atom": + update.content_type = value.getAttribute("type") or "text" + + # Normalise the content types and extract the + # content. - if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): - update.content = children(value) - update.content_type = "application/xhtml+xml" - elif update.content_type in ("html", "text/html"): - update.content = text(value) - update.content_type = "text/html" - else: - update.content = text(value) - update.content_type = "text/plain" + if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): + update.content = children(value) + update.content_type = "application/xhtml+xml" + elif update.content_type in ("html", "text/html"): + update.content = text(value) + update.content_type = "text/html" else: - update.content_type = "text/html" update.content = text(value) - - elif feed_type == "atom" and tagname == "updated" or \ - feed_type == "rss" and tagname == "pubDate": - - events.expandNode(value) + update.content_type = "text/plain" + else: + update.content_type = "text/html" + update.content = text(value) - if update: - if feed_type == "atom": - value = getDateTimeFromISO8601(text(value)) - else: - value = DateTime(parsedate(text(value))) - update.updated = value + elif feed_type == "atom" and tagname == "updated" or \ + feed_type == "rss" and tagname == "pubDate": + + events.expandNode(value) - elif event == xml.dom.pulldom.END_ELEMENT: - tagname = value.localName + if update: + if feed_type == "atom": + value = getDateTimeFromISO8601(text(value)) + else: + value = DateTime(parsedate(text(value))) + update.updated = value - if feed_type == "rss" and tagname == "item" or \ - feed_type == "atom" and tagname == "entry": - - feed_updates.append(update) + elif event == xml.dom.pulldom.END_ELEMENT: + tagname = value.localName - update = None + if feed_type == "rss" and tagname == "item" or \ + feed_type == "atom" and tagname == "entry": + + feed_updates.append(update) - elif feed_type == "atom" and tagname == "source": - in_source = False + update = None - finally: - feed.close() + elif feed_type == "atom" and tagname == "source": + in_source = False return (feed_type, channel_title, channel_link), feed_updates +# Update retrieval from mailboxes and multipart messages. + +def getUpdatesFromMailbox(feed, max_entries, show_content, request): + + """ + Retrieve from 'feed' up to the given number 'max_entries' of update entries. + The 'show_content' parameter can indicate that a "summary" is to be obtained + for each update, that the "content" of each update is to be obtained + (falling back to a summary if no content is provided), or no content + (indicated by a false value) is to be obtained. + + A tuple of the form ((feed_type, channel_title, channel_link), updates) is + returned. + """ + + mailbox = Parser().parse(feed) + + feed_updates = [] + + # Parse each message from the feed as a separate update. + + for message_number, part in enumerate(mailbox.get_payload()): + update = Update() + update.fragment = update.updated = getDateTimeFromRFC2822(part.get("date")) + update.title = part.get("subject", "Update #%d" % message_number) + update.message_number = message_number + + update.content, update.content_type, update.parts, actual_author = \ + getUpdateContentFromPart(part, request) + + if actual_author: + update.author = actual_author + + feed_updates.append(update) + + return ("mbox", None, None), feed_updates + # Update retrieval from pages. def getUpdatesFromPage(page, request):