# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1368813904 -7200
# Node ID 145f3765afdc8e6a9c890c48caac2fce38e65bdc
# Parent  885085e0371597d2a9af3fc32d1880d2674f4953
Added support for presenting multipart/related messages obtained from message stores.
To support incoming HTML messages, attachment/component references are rewritten
using an extended version of the usual Moin HTML parsing framework.

diff -r 885085e03715 -r 145f3765afdc MoinShare.py
--- a/MoinShare.py	Thu May 16 15:34:49 2013 +0200
+++ b/MoinShare.py	Fri May 17 20:05:04 2013 +0200
@@ -3,22 +3,23 @@
     MoinMoin - MoinShare library
 
     @copyright: 2011, 2012, 2013 by Paul Boddie <paul@boddie.org.uk>
+    @copyright: 2003-2006 Edgewall Software
+    @copyright: 2006 MoinMoin:AlexanderSchremmer
     @license: GNU GPL (v2 or later), see COPYING.txt for details.
 """
 
 from ContentTypeSupport import getContentPreferences
 from DateSupport import getCurrentTime, getDateTimeFromRFC2822
 from MoinSupport import *
+from MoinMoin.support.htmlmarkup import HTMLSanitizer, Markup
 from MoinMoin import wikiutil
 from email.parser import Parser
-import re
 
 try:
     from cStringIO import StringIO
 except ImportError:
     from StringIO import StringIO
 
-escape = wikiutil.escape
 _getFragments = getFragments
 
 __version__ = "0.1"
@@ -121,8 +122,13 @@
 
         # Message-related attributes.
 
+        self.message_number = None
         self.parts = None
 
+        # Message- and page-related attributes.
+
+        self.page = None
+
     def __cmp__(self, other):
         if self.updated is None and other.updated is not None:
             return 1
@@ -187,6 +193,7 @@
 
             update.content_type = "text/html"
 
+        update.page = page
         update.link = page.url(request)
         update.updated = updated
 
@@ -220,13 +227,30 @@
         update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date"))
         update.title = message.get("subject", "Update #%d" % n)
 
+        update.page = page
+        update.message_number = n
+
         # Determine whether the message has several representations.
 
+        # For a single part, use it as the update content.
+
         if not message.is_multipart():
             update.content = message.get_payload()
             update.content_type = message.get_content_type()
+
+        # For a collection of related parts, use the first as the update content
+        # and assume that the formatter will reference the other parts.
+
+        elif message.get_content_subtype() == "related":
+            main_part = message.get_payload()[0]
+            update.content = main_part.get_payload()
+            update.content_type = main_part.get_content_type()
+
+        # Otherwise, just obtain the parts for separate display.
+
         else:
             update.parts = message.get_payload()
+            update.content_type = message.get_content_type()
 
         updates.append(update)
 
@@ -276,4 +300,94 @@
 
     return parameters
 
+# HTML parsing support.
+
+class IncomingHTMLSanitizer(HTMLSanitizer):
+
+    "An HTML parser that rewrites references to attachments."
+
+    def __init__(self, out, request, page, message_number):
+        HTMLSanitizer.__init__(self, out)
+        self.request = request
+        self.message_number = message_number
+        self.page = page
+
+    def rewrite_reference(self, ref):
+        if ref.startswith("cid:"):
+            part = ref[len("cid:"):]
+            action_link = self.page.url(self.request, {
+                "action" : "ReadMessage", "doit" : "1",
+                "message" : self.message_number, "part" : part
+                })
+            return action_link
+        else:
+            return ref
+
+    def handle_starttag(self, tag, attrs):
+        new_attrs = []
+        for attrname, attrvalue in attrs:
+            if attrname in self.uri_attrs:
+                new_attrs.append((attrname, self.rewrite_reference(attrvalue)))
+            else:
+                new_attrs.append((attrname, attrvalue))
+        HTMLSanitizer.handle_starttag(self, tag, new_attrs)
+
+class IncomingMarkup(Markup):
+
+    "A special markup processor for incoming HTML."
+
+    def sanitize(self, request, page, message_number):
+        out = StringIO()
+        sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)
+        sanitizer.feed(self.stripentities(keepxmlentities=True))
+        return IncomingMarkup(out.getvalue())
+
+class IncomingHTMLParser:
+
+    "Filters and rewrites incoming HTML content."
+
+    def __init__(self, raw, request, **kw):
+        self.raw = raw
+        self.request = request
+        self.message_number = None
+        self.page = None
+
+    def format(self, formatter, **kw):
+
+        "Send the text."
+
+        try:
+            self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))
+        except HTMLParseError, e:
+            self.request.write(formatter.sysmsg(1) +
+                formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,
+                                  self.raw.splitlines()[e.lineno - 1].strip())) +
+                formatter.sysmsg(0))
+
+class MakeIncomingHTMLParser:
+
+    "A class that makes parsers configured for messages."
+
+    def __init__(self, page, message_number):
+
+        "Initialise with state that is used to configure instantiated parsers."
+
+        self.message_number = message_number
+        self.page = page
+
+    def __call__(self, *args, **kw):
+        parser = IncomingHTMLParser(*args, **kw)
+        parser.message_number = self.message_number
+        parser.page = self.page
+        return parser
+
+def get_make_parser(page, message_number):
+
+    """
+    Return a callable that will return a parser configured for the message from
+    the given 'page' with the given 'message_number'.
+    """
+
+    return MakeIncomingHTMLParser(page, message_number)
+
 # vim: tabstop=4 expandtab shiftwidth=4
diff -r 885085e03715 -r 145f3765afdc docs/COPYING.txt
--- a/docs/COPYING.txt	Thu May 16 15:34:49 2013 +0200
+++ b/docs/COPYING.txt	Fri May 17 20:05:04 2013 +0200
@@ -3,6 +3,12 @@
 
 Copyright (C) 2008, 2011, 2012, 2013 Paul Boddie <paul@boddie.org.uk>
 
+Small HTML parsing-related fragments incorporated from
+MoinMoin.parser.text_html and MoinMoin.support.htmlmarkup:
+
+Copyright (C) 2003-2006 Edgewall Software
+Copyright (C) 2006 MoinMoin:AlexanderSchremmer
+
 This software is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License as
 published by the Free Software Foundation; either version 2 of
diff -r 885085e03715 -r 145f3765afdc macros/SharedContent.py
--- a/macros/SharedContent.py	Thu May 16 15:34:49 2013 +0200
+++ b/macros/SharedContent.py	Fri May 17 20:05:04 2013 +0200
@@ -10,7 +10,8 @@
 from MoinMoin.Page import Page
 from MoinRemoteSupport import *
 from MoinSupport import parseMacroArguments, getParsersForContentType, formatText
-from MoinShare import getUpdateSources, getUpdatesFromPage, getUpdatesFromStore, Update
+from MoinShare import getUpdateSources, getUpdatesFromPage, \
+                      getUpdatesFromStore, Update, get_make_parser
 from email.utils import parsedate
 import xml.dom.pulldom
 
@@ -333,7 +334,10 @@
             append(fmt.div(on=1, css_class="moinshare-content"))
 
             if update.content:
-                parsers = getParsersForContentType(request.cfg, update.content_type)
+                if update.content_type == "text/html" and update.message_number is not None:
+                    parsers = [get_make_parser(update.page, update.message_number)]
+                else:
+                    parsers = getParsersForContentType(request.cfg, update.content_type)
 
                 if parsers:
                     for parser_cls in parsers: