# HG changeset patch # User Paul Boddie # Date 1368813904 -7200 # Node ID 145f3765afdc8e6a9c890c48caac2fce38e65bdc # Parent 885085e0371597d2a9af3fc32d1880d2674f4953 Added support for presenting multipart/related messages obtained from message stores. To support incoming HTML messages, attachment/component references are rewritten using an extended version of the usual Moin HTML parsing framework. diff -r 885085e03715 -r 145f3765afdc MoinShare.py --- a/MoinShare.py Thu May 16 15:34:49 2013 +0200 +++ b/MoinShare.py Fri May 17 20:05:04 2013 +0200 @@ -3,22 +3,23 @@ MoinMoin - MoinShare library @copyright: 2011, 2012, 2013 by Paul Boddie + @copyright: 2003-2006 Edgewall Software + @copyright: 2006 MoinMoin:AlexanderSchremmer @license: GNU GPL (v2 or later), see COPYING.txt for details. """ from ContentTypeSupport import getContentPreferences from DateSupport import getCurrentTime, getDateTimeFromRFC2822 from MoinSupport import * +from MoinMoin.support.htmlmarkup import HTMLSanitizer, Markup from MoinMoin import wikiutil from email.parser import Parser -import re try: from cStringIO import StringIO except ImportError: from StringIO import StringIO -escape = wikiutil.escape _getFragments = getFragments __version__ = "0.1" @@ -121,8 +122,13 @@ # Message-related attributes. + self.message_number = None self.parts = None + # Message- and page-related attributes. + + self.page = None + def __cmp__(self, other): if self.updated is None and other.updated is not None: return 1 @@ -187,6 +193,7 @@ update.content_type = "text/html" + update.page = page update.link = page.url(request) update.updated = updated @@ -220,13 +227,30 @@ update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) update.title = message.get("subject", "Update #%d" % n) + update.page = page + update.message_number = n + # Determine whether the message has several representations. + # For a single part, use it as the update content. + if not message.is_multipart(): update.content = message.get_payload() update.content_type = message.get_content_type() + + # For a collection of related parts, use the first as the update content + # and assume that the formatter will reference the other parts. + + elif message.get_content_subtype() == "related": + main_part = message.get_payload()[0] + update.content = main_part.get_payload() + update.content_type = main_part.get_content_type() + + # Otherwise, just obtain the parts for separate display. + else: update.parts = message.get_payload() + update.content_type = message.get_content_type() updates.append(update) @@ -276,4 +300,94 @@ return parameters +# HTML parsing support. + +class IncomingHTMLSanitizer(HTMLSanitizer): + + "An HTML parser that rewrites references to attachments." + + def __init__(self, out, request, page, message_number): + HTMLSanitizer.__init__(self, out) + self.request = request + self.message_number = message_number + self.page = page + + def rewrite_reference(self, ref): + if ref.startswith("cid:"): + part = ref[len("cid:"):] + action_link = self.page.url(self.request, { + "action" : "ReadMessage", "doit" : "1", + "message" : self.message_number, "part" : part + }) + return action_link + else: + return ref + + def handle_starttag(self, tag, attrs): + new_attrs = [] + for attrname, attrvalue in attrs: + if attrname in self.uri_attrs: + new_attrs.append((attrname, self.rewrite_reference(attrvalue))) + else: + new_attrs.append((attrname, attrvalue)) + HTMLSanitizer.handle_starttag(self, tag, new_attrs) + +class IncomingMarkup(Markup): + + "A special markup processor for incoming HTML." + + def sanitize(self, request, page, message_number): + out = StringIO() + sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) + sanitizer.feed(self.stripentities(keepxmlentities=True)) + return IncomingMarkup(out.getvalue()) + +class IncomingHTMLParser: + + "Filters and rewrites incoming HTML content." + + def __init__(self, raw, request, **kw): + self.raw = raw + self.request = request + self.message_number = None + self.page = None + + def format(self, formatter, **kw): + + "Send the text." + + try: + self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) + except HTMLParseError, e: + self.request.write(formatter.sysmsg(1) + + formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, + self.raw.splitlines()[e.lineno - 1].strip())) + + formatter.sysmsg(0)) + +class MakeIncomingHTMLParser: + + "A class that makes parsers configured for messages." + + def __init__(self, page, message_number): + + "Initialise with state that is used to configure instantiated parsers." + + self.message_number = message_number + self.page = page + + def __call__(self, *args, **kw): + parser = IncomingHTMLParser(*args, **kw) + parser.message_number = self.message_number + parser.page = self.page + return parser + +def get_make_parser(page, message_number): + + """ + Return a callable that will return a parser configured for the message from + the given 'page' with the given 'message_number'. + """ + + return MakeIncomingHTMLParser(page, message_number) + # vim: tabstop=4 expandtab shiftwidth=4 diff -r 885085e03715 -r 145f3765afdc docs/COPYING.txt --- a/docs/COPYING.txt Thu May 16 15:34:49 2013 +0200 +++ b/docs/COPYING.txt Fri May 17 20:05:04 2013 +0200 @@ -3,6 +3,12 @@ Copyright (C) 2008, 2011, 2012, 2013 Paul Boddie +Small HTML parsing-related fragments incorporated from +MoinMoin.parser.text_html and MoinMoin.support.htmlmarkup: + +Copyright (C) 2003-2006 Edgewall Software +Copyright (C) 2006 MoinMoin:AlexanderSchremmer + This software is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of diff -r 885085e03715 -r 145f3765afdc macros/SharedContent.py --- a/macros/SharedContent.py Thu May 16 15:34:49 2013 +0200 +++ b/macros/SharedContent.py Fri May 17 20:05:04 2013 +0200 @@ -10,7 +10,8 @@ from MoinMoin.Page import Page from MoinRemoteSupport import * from MoinSupport import parseMacroArguments, getParsersForContentType, formatText -from MoinShare import getUpdateSources, getUpdatesFromPage, getUpdatesFromStore, Update +from MoinShare import getUpdateSources, getUpdatesFromPage, \ + getUpdatesFromStore, Update, get_make_parser from email.utils import parsedate import xml.dom.pulldom @@ -333,7 +334,10 @@ append(fmt.div(on=1, css_class="moinshare-content")) if update.content: - parsers = getParsersForContentType(request.cfg, update.content_type) + if update.content_type == "text/html" and update.message_number is not None: + parsers = [get_make_parser(update.page, update.message_number)] + else: + parsers = getParsersForContentType(request.cfg, update.content_type) if parsers: for parser_cls in parsers: