# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1371165583 -7200
# Node ID f933c3145dcb1837072ddb56a8b6b989ed85f773
# Parent  8bf0bbc9c1790be45472d1c9d73b42744a7933ec
Fixed the charset and transfer encoding support for updates.

diff -r 8bf0bbc9c179 -r f933c3145dcb MoinShare.py
--- a/MoinShare.py	Fri May 17 23:08:31 2013 +0200
+++ b/MoinShare.py	Fri Jun 14 01:19:43 2013 +0200
@@ -11,9 +11,10 @@
 from ContentTypeSupport import getContentPreferences
 from DateSupport import getCurrentTime, getDateTimeFromRFC2822
 from MoinSupport import *
-from MoinMoin.support.htmlmarkup import HTMLSanitizer, Markup
+from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup
 from MoinMoin import wikiutil
 from email.parser import Parser
+from codecs import getwriter
 
 try:
     from cStringIO import StringIO
@@ -46,21 +47,6 @@
             fragments.append((format, attributes, body))
     return fragments
 
-def getOutputTypes(request, format):
-
-    """
-    Using the 'request' and the 'format' of a fragment, return the media types
-    available for the fragment.
-    """
-
-    # This uses an extended parser API method if available.
-
-    parser = getParserClass(request, format)
-    if hasattr(parser, "getOutputTypes"):
-        return parser.getOutputTypes()
-    else:
-        return ["text/html"]
-
 def getPreferredOutputTypes(request, mimetypes):
 
     """
@@ -194,7 +180,11 @@
             update.content_type = "text/html"
 
         update.page = page
-        update.link = page.url(request)
+
+        # NOTE: The anchor would be supported in the page, but this requires
+        # NOTE: formatter modifications for the regions providing updates.
+
+        update.link = page.url(request, anchor=update.fragment)
         update.updated = updated
 
         updates.append(update)
@@ -235,7 +225,9 @@
         # For a single part, use it as the update content.
 
         if not message.is_multipart():
-            update.content = message.get_payload()
+            charset = message.get_content_charset()
+            payload = message.get_payload(decode=True)
+            update.content = charset and unicode(payload, charset) or payload
             update.content_type = message.get_content_type()
 
         # For a collection of related parts, use the first as the update content
@@ -243,7 +235,9 @@
 
         elif message.get_content_subtype() == "related":
             main_part = message.get_payload()[0]
-            update.content = main_part.get_payload()
+            charset = main_part.get_content_charset()
+            payload = main_part.get_payload(decode=True)
+            update.content = charset and unicode(payload, charset) or payload
             update.content_type = main_part.get_content_type()
 
         # Otherwise, just obtain the parts for separate display.
@@ -337,10 +331,10 @@
     "A special markup processor for incoming HTML."
 
     def sanitize(self, request, page, message_number):
-        out = StringIO()
+        out = getwriter("utf-8")(StringIO())
         sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)
         sanitizer.feed(self.stripentities(keepxmlentities=True))
-        return IncomingMarkup(out.getvalue())
+        return IncomingMarkup(unicode(out.getvalue(), "utf-8"))
 
 class IncomingHTMLParser: