1.1 --- a/MoinShare.py Thu May 16 15:34:49 2013 +0200
1.2 +++ b/MoinShare.py Fri May 17 20:05:04 2013 +0200
1.3 @@ -3,22 +3,23 @@
1.4 MoinMoin - MoinShare library
1.5
1.6 @copyright: 2011, 2012, 2013 by Paul Boddie <paul@boddie.org.uk>
1.7 + @copyright: 2003-2006 Edgewall Software
1.8 + @copyright: 2006 MoinMoin:AlexanderSchremmer
1.9 @license: GNU GPL (v2 or later), see COPYING.txt for details.
1.10 """
1.11
1.12 from ContentTypeSupport import getContentPreferences
1.13 from DateSupport import getCurrentTime, getDateTimeFromRFC2822
1.14 from MoinSupport import *
1.15 +from MoinMoin.support.htmlmarkup import HTMLSanitizer, Markup
1.16 from MoinMoin import wikiutil
1.17 from email.parser import Parser
1.18 -import re
1.19
1.20 try:
1.21 from cStringIO import StringIO
1.22 except ImportError:
1.23 from StringIO import StringIO
1.24
1.25 -escape = wikiutil.escape
1.26 _getFragments = getFragments
1.27
1.28 __version__ = "0.1"
1.29 @@ -121,8 +122,13 @@
1.30
1.31 # Message-related attributes.
1.32
1.33 + self.message_number = None
1.34 self.parts = None
1.35
1.36 + # Message- and page-related attributes.
1.37 +
1.38 + self.page = None
1.39 +
1.40 def __cmp__(self, other):
1.41 if self.updated is None and other.updated is not None:
1.42 return 1
1.43 @@ -187,6 +193,7 @@
1.44
1.45 update.content_type = "text/html"
1.46
1.47 + update.page = page
1.48 update.link = page.url(request)
1.49 update.updated = updated
1.50
1.51 @@ -220,13 +227,30 @@
1.52 update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date"))
1.53 update.title = message.get("subject", "Update #%d" % n)
1.54
1.55 + update.page = page
1.56 + update.message_number = n
1.57 +
1.58 # Determine whether the message has several representations.
1.59
1.60 + # For a single part, use it as the update content.
1.61 +
1.62 if not message.is_multipart():
1.63 update.content = message.get_payload()
1.64 update.content_type = message.get_content_type()
1.65 +
1.66 + # For a collection of related parts, use the first as the update content
1.67 + # and assume that the formatter will reference the other parts.
1.68 +
1.69 + elif message.get_content_subtype() == "related":
1.70 + main_part = message.get_payload()[0]
1.71 + update.content = main_part.get_payload()
1.72 + update.content_type = main_part.get_content_type()
1.73 +
1.74 + # Otherwise, just obtain the parts for separate display.
1.75 +
1.76 else:
1.77 update.parts = message.get_payload()
1.78 + update.content_type = message.get_content_type()
1.79
1.80 updates.append(update)
1.81
1.82 @@ -276,4 +300,94 @@
1.83
1.84 return parameters
1.85
1.86 +# HTML parsing support.
1.87 +
1.88 +class IncomingHTMLSanitizer(HTMLSanitizer):
1.89 +
1.90 + "An HTML parser that rewrites references to attachments."
1.91 +
1.92 + def __init__(self, out, request, page, message_number):
1.93 + HTMLSanitizer.__init__(self, out)
1.94 + self.request = request
1.95 + self.message_number = message_number
1.96 + self.page = page
1.97 +
1.98 + def rewrite_reference(self, ref):
1.99 + if ref.startswith("cid:"):
1.100 + part = ref[len("cid:"):]
1.101 + action_link = self.page.url(self.request, {
1.102 + "action" : "ReadMessage", "doit" : "1",
1.103 + "message" : self.message_number, "part" : part
1.104 + })
1.105 + return action_link
1.106 + else:
1.107 + return ref
1.108 +
1.109 + def handle_starttag(self, tag, attrs):
1.110 + new_attrs = []
1.111 + for attrname, attrvalue in attrs:
1.112 + if attrname in self.uri_attrs:
1.113 + new_attrs.append((attrname, self.rewrite_reference(attrvalue)))
1.114 + else:
1.115 + new_attrs.append((attrname, attrvalue))
1.116 + HTMLSanitizer.handle_starttag(self, tag, new_attrs)
1.117 +
1.118 +class IncomingMarkup(Markup):
1.119 +
1.120 + "A special markup processor for incoming HTML."
1.121 +
1.122 + def sanitize(self, request, page, message_number):
1.123 + out = StringIO()
1.124 + sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)
1.125 + sanitizer.feed(self.stripentities(keepxmlentities=True))
1.126 + return IncomingMarkup(out.getvalue())
1.127 +
1.128 +class IncomingHTMLParser:
1.129 +
1.130 + "Filters and rewrites incoming HTML content."
1.131 +
1.132 + def __init__(self, raw, request, **kw):
1.133 + self.raw = raw
1.134 + self.request = request
1.135 + self.message_number = None
1.136 + self.page = None
1.137 +
1.138 + def format(self, formatter, **kw):
1.139 +
1.140 + "Send the text."
1.141 +
1.142 + try:
1.143 + self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))
1.144 + except HTMLParseError, e:
1.145 + self.request.write(formatter.sysmsg(1) +
1.146 + formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,
1.147 + self.raw.splitlines()[e.lineno - 1].strip())) +
1.148 + formatter.sysmsg(0))
1.149 +
1.150 +class MakeIncomingHTMLParser:
1.151 +
1.152 + "A class that makes parsers configured for messages."
1.153 +
1.154 + def __init__(self, page, message_number):
1.155 +
1.156 + "Initialise with state that is used to configure instantiated parsers."
1.157 +
1.158 + self.message_number = message_number
1.159 + self.page = page
1.160 +
1.161 + def __call__(self, *args, **kw):
1.162 + parser = IncomingHTMLParser(*args, **kw)
1.163 + parser.message_number = self.message_number
1.164 + parser.page = self.page
1.165 + return parser
1.166 +
1.167 +def get_make_parser(page, message_number):
1.168 +
1.169 + """
1.170 + Return a callable that will return a parser configured for the message from
1.171 + the given 'page' with the given 'message_number'.
1.172 + """
1.173 +
1.174 + return MakeIncomingHTMLParser(page, message_number)
1.175 +
1.176 # vim: tabstop=4 expandtab shiftwidth=4
2.1 --- a/docs/COPYING.txt Thu May 16 15:34:49 2013 +0200
2.2 +++ b/docs/COPYING.txt Fri May 17 20:05:04 2013 +0200
2.3 @@ -3,6 +3,12 @@
2.4
2.5 Copyright (C) 2008, 2011, 2012, 2013 Paul Boddie <paul@boddie.org.uk>
2.6
2.7 +Small HTML parsing-related fragments incorporated from
2.8 +MoinMoin.parser.text_html and MoinMoin.support.htmlmarkup:
2.9 +
2.10 +Copyright (C) 2003-2006 Edgewall Software
2.11 +Copyright (C) 2006 MoinMoin:AlexanderSchremmer
2.12 +
2.13 This software is free software; you can redistribute it and/or
2.14 modify it under the terms of the GNU General Public License as
2.15 published by the Free Software Foundation; either version 2 of
3.1 --- a/macros/SharedContent.py Thu May 16 15:34:49 2013 +0200
3.2 +++ b/macros/SharedContent.py Fri May 17 20:05:04 2013 +0200
3.3 @@ -10,7 +10,8 @@
3.4 from MoinMoin.Page import Page
3.5 from MoinRemoteSupport import *
3.6 from MoinSupport import parseMacroArguments, getParsersForContentType, formatText
3.7 -from MoinShare import getUpdateSources, getUpdatesFromPage, getUpdatesFromStore, Update
3.8 +from MoinShare import getUpdateSources, getUpdatesFromPage, \
3.9 + getUpdatesFromStore, Update, get_make_parser
3.10 from email.utils import parsedate
3.11 import xml.dom.pulldom
3.12
3.13 @@ -333,7 +334,10 @@
3.14 append(fmt.div(on=1, css_class="moinshare-content"))
3.15
3.16 if update.content:
3.17 - parsers = getParsersForContentType(request.cfg, update.content_type)
3.18 + if update.content_type == "text/html" and update.message_number is not None:
3.19 + parsers = [get_make_parser(update.page, update.message_number)]
3.20 + else:
3.21 + parsers = getParsersForContentType(request.cfg, update.content_type)
3.22
3.23 if parsers:
3.24 for parser_cls in parsers: