paul@0 | 1 | # -*- coding: iso-8859-1 -*- |
paul@0 | 2 | """ |
paul@0 | 3 | MoinMoin - MoinShare library |
paul@0 | 4 | |
paul@17 | 5 | @copyright: 2011, 2012, 2013 by Paul Boddie <paul@boddie.org.uk> |
paul@34 | 6 | @copyright: 2003-2006 Edgewall Software |
paul@34 | 7 | @copyright: 2006 MoinMoin:AlexanderSchremmer |
paul@0 | 8 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@0 | 9 | """ |
paul@0 | 10 | |
paul@17 | 11 | from ContentTypeSupport import getContentPreferences |
paul@33 | 12 | from DateSupport import getCurrentTime, getDateTimeFromRFC2822 |
paul@0 | 13 | from MoinSupport import * |
paul@34 | 14 | from MoinMoin.support.htmlmarkup import HTMLSanitizer, Markup |
paul@15 | 15 | from MoinMoin import wikiutil |
paul@33 | 16 | from email.parser import Parser |
paul@0 | 17 | |
paul@25 | 18 | try: |
paul@25 | 19 | from cStringIO import StringIO |
paul@25 | 20 | except ImportError: |
paul@25 | 21 | from StringIO import StringIO |
paul@25 | 22 | |
paul@16 | 23 | _getFragments = getFragments |
paul@0 | 24 | |
paul@0 | 25 | __version__ = "0.1" |
paul@0 | 26 | |
paul@0 | 27 | # More Moin 1.9 compatibility functions. |
paul@0 | 28 | |
paul@0 | 29 | def has_member(request, groupname, username): |
paul@0 | 30 | if hasattr(request.dicts, "has_member"): |
paul@0 | 31 | return request.dicts.has_member(groupname, username) |
paul@0 | 32 | else: |
paul@0 | 33 | return username in request.dicts.get(groupname, []) |
paul@0 | 34 | |
paul@3 | 35 | # Fragments employ a "moinshare" attribute. |
paul@3 | 36 | |
paul@3 | 37 | fragment_attribute = "moinshare" |
paul@2 | 38 | |
paul@16 | 39 | def getFragments(s): |
paul@25 | 40 | |
paul@25 | 41 | "Return all fragments in 's' having the MoinShare fragment attribute." |
paul@25 | 42 | |
paul@2 | 43 | fragments = [] |
paul@16 | 44 | for format, attributes, body in _getFragments(s): |
paul@16 | 45 | if attributes.has_key(fragment_attribute): |
paul@16 | 46 | fragments.append((format, attributes, body)) |
paul@2 | 47 | return fragments |
paul@2 | 48 | |
paul@9 | 49 | def getOutputTypes(request, format): |
paul@9 | 50 | |
paul@9 | 51 | """ |
paul@9 | 52 | Using the 'request' and the 'format' of a fragment, return the media types |
paul@9 | 53 | available for the fragment. |
paul@9 | 54 | """ |
paul@9 | 55 | |
paul@9 | 56 | # This uses an extended parser API method if available. |
paul@9 | 57 | |
paul@9 | 58 | parser = getParserClass(request, format) |
paul@9 | 59 | if hasattr(parser, "getOutputTypes"): |
paul@9 | 60 | return parser.getOutputTypes() |
paul@9 | 61 | else: |
paul@9 | 62 | return ["text/html"] |
paul@9 | 63 | |
paul@9 | 64 | def getPreferredOutputTypes(request, mimetypes): |
paul@9 | 65 | |
paul@9 | 66 | """ |
paul@9 | 67 | Using the 'request', perform content negotiation, obtaining mimetypes common |
paul@9 | 68 | to the fragment (given by 'mimetypes') and the client (found in the Accept |
paul@9 | 69 | header). |
paul@9 | 70 | """ |
paul@9 | 71 | |
paul@9 | 72 | accept = getHeader(request, "Accept", "HTTP") |
paul@12 | 73 | if accept: |
paul@12 | 74 | prefs = getContentPreferences(accept) |
paul@12 | 75 | return prefs.get_preferred_types(mimetypes) |
paul@12 | 76 | else: |
paul@12 | 77 | return mimetypes |
paul@9 | 78 | |
paul@9 | 79 | def getUpdatedTime(metadata): |
paul@9 | 80 | |
paul@9 | 81 | """ |
paul@9 | 82 | Return the last updated time based on the given 'metadata', using the |
paul@9 | 83 | current time if no explicit last modified time is specified. |
paul@9 | 84 | """ |
paul@9 | 85 | |
paul@9 | 86 | # NOTE: We could attempt to get the last edit time of a fragment. |
paul@9 | 87 | |
paul@9 | 88 | latest_timestamp = metadata.get("last-modified") |
paul@9 | 89 | if latest_timestamp: |
paul@33 | 90 | return latest_timestamp |
paul@9 | 91 | else: |
paul@33 | 92 | return getCurrentTime() |
paul@9 | 93 | |
paul@25 | 94 | def getUpdateSources(request, sources_page): |
paul@25 | 95 | |
paul@25 | 96 | """ |
paul@25 | 97 | Using the 'request', return the update sources defined on the given |
paul@25 | 98 | 'sources_page'. |
paul@25 | 99 | """ |
paul@25 | 100 | |
paul@25 | 101 | # Remote sources are accessed via dictionary page definitions. |
paul@25 | 102 | |
paul@25 | 103 | return getWikiDict(sources_page, request) |
paul@25 | 104 | |
paul@30 | 105 | # Entry/update classes. |
paul@30 | 106 | |
paul@30 | 107 | class Update: |
paul@30 | 108 | |
paul@30 | 109 | "A feed update entry." |
paul@30 | 110 | |
paul@30 | 111 | def __init__(self): |
paul@30 | 112 | self.title = None |
paul@30 | 113 | self.link = None |
paul@30 | 114 | self.content = None |
paul@30 | 115 | self.content_type = None |
paul@30 | 116 | self.updated = None |
paul@30 | 117 | |
paul@30 | 118 | # Page-related attributes. |
paul@30 | 119 | |
paul@30 | 120 | self.fragment = None |
paul@30 | 121 | self.preferred = None |
paul@30 | 122 | |
paul@33 | 123 | # Message-related attributes. |
paul@33 | 124 | |
paul@34 | 125 | self.message_number = None |
paul@33 | 126 | self.parts = None |
paul@33 | 127 | |
paul@34 | 128 | # Message- and page-related attributes. |
paul@34 | 129 | |
paul@34 | 130 | self.page = None |
paul@34 | 131 | |
paul@30 | 132 | def __cmp__(self, other): |
paul@30 | 133 | if self.updated is None and other.updated is not None: |
paul@30 | 134 | return 1 |
paul@30 | 135 | elif self.updated is not None and other.updated is None: |
paul@30 | 136 | return -1 |
paul@30 | 137 | else: |
paul@30 | 138 | return cmp(self.updated, other.updated) |
paul@30 | 139 | |
paul@30 | 140 | # Update retrieval from pages. |
paul@30 | 141 | |
paul@30 | 142 | def getUpdatesFromPage(page, request): |
paul@25 | 143 | |
paul@25 | 144 | """ |
paul@30 | 145 | Get updates from the given 'page' using the 'request'. A list of update |
paul@30 | 146 | objects is returned. |
paul@25 | 147 | """ |
paul@25 | 148 | |
paul@25 | 149 | updates = [] |
paul@25 | 150 | |
paul@25 | 151 | # NOTE: Use the updated datetime from the page for updates. |
paul@25 | 152 | # NOTE: The published and updated details would need to be deduced from |
paul@25 | 153 | # NOTE: the page history instead of being taken from the page as a whole. |
paul@25 | 154 | |
paul@25 | 155 | metadata = getMetadata(page) |
paul@25 | 156 | updated = getUpdatedTime(metadata) |
paul@25 | 157 | |
paul@25 | 158 | # Get the fragment regions for the page. |
paul@25 | 159 | |
paul@25 | 160 | for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): |
paul@25 | 161 | |
paul@33 | 162 | update = Update() |
paul@33 | 163 | |
paul@25 | 164 | # Produce a fragment identifier. |
paul@25 | 165 | # NOTE: Choose a more robust identifier where none is explicitly given. |
paul@25 | 166 | |
paul@30 | 167 | update.fragment = attributes.get("fragment", str(n)) |
paul@30 | 168 | update.title = attributes.get("summary", "Update #%d" % n) |
paul@25 | 169 | |
paul@25 | 170 | # Get the preferred content types available for the fragment. |
paul@25 | 171 | |
paul@30 | 172 | update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) |
paul@25 | 173 | |
paul@25 | 174 | # Try and obtain some suitable content for the entry. |
paul@25 | 175 | # NOTE: Could potentially get a summary for the fragment. |
paul@25 | 176 | |
paul@30 | 177 | update.content = None |
paul@25 | 178 | |
paul@30 | 179 | if "text/html" in update.preferred: |
paul@25 | 180 | parser_cls = getParserClass(request, format) |
paul@25 | 181 | parser = parser_cls(body, request) |
paul@25 | 182 | |
paul@25 | 183 | if format == "html": |
paul@30 | 184 | update.content = body |
paul@25 | 185 | elif hasattr(parser, "formatForOutputType"): |
paul@25 | 186 | s = StringIO() |
paul@25 | 187 | parser.formatForOutputType("text/html", write=s.write) |
paul@30 | 188 | update.content = s.getvalue() |
paul@25 | 189 | else: |
paul@25 | 190 | fmt = request.html_formatter |
paul@25 | 191 | fmt.setPage(page) |
paul@30 | 192 | update.content = formatText(body, request, fmt, parser_cls) |
paul@30 | 193 | |
paul@32 | 194 | update.content_type = "text/html" |
paul@25 | 195 | |
paul@34 | 196 | update.page = page |
paul@30 | 197 | update.link = page.url(request) |
paul@30 | 198 | update.updated = updated |
paul@30 | 199 | |
paul@30 | 200 | updates.append(update) |
paul@25 | 201 | |
paul@25 | 202 | return updates |
paul@25 | 203 | |
paul@33 | 204 | # Update retrieval from message stores. |
paul@33 | 205 | |
paul@33 | 206 | def getUpdatesFromStore(page, request): |
paul@33 | 207 | |
paul@33 | 208 | """ |
paul@33 | 209 | Get updates from the message store associated with the given 'page' using |
paul@33 | 210 | the 'request'. A list of update objects is returned. |
paul@33 | 211 | """ |
paul@33 | 212 | |
paul@33 | 213 | updates = [] |
paul@33 | 214 | |
paul@33 | 215 | metadata = getMetadata(page) |
paul@33 | 216 | updated = getUpdatedTime(metadata) |
paul@33 | 217 | |
paul@33 | 218 | store = ItemStore(page, "messages", "message-locks") |
paul@33 | 219 | |
paul@33 | 220 | for n, message_text in enumerate(iter(store)): |
paul@33 | 221 | |
paul@33 | 222 | update = Update() |
paul@33 | 223 | message = Parser().parse(StringIO(message_text)) |
paul@33 | 224 | |
paul@33 | 225 | # Produce a fragment identifier. |
paul@33 | 226 | |
paul@33 | 227 | update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) |
paul@33 | 228 | update.title = message.get("subject", "Update #%d" % n) |
paul@33 | 229 | |
paul@34 | 230 | update.page = page |
paul@34 | 231 | update.message_number = n |
paul@34 | 232 | |
paul@33 | 233 | # Determine whether the message has several representations. |
paul@33 | 234 | |
paul@34 | 235 | # For a single part, use it as the update content. |
paul@34 | 236 | |
paul@33 | 237 | if not message.is_multipart(): |
paul@33 | 238 | update.content = message.get_payload() |
paul@33 | 239 | update.content_type = message.get_content_type() |
paul@34 | 240 | |
paul@34 | 241 | # For a collection of related parts, use the first as the update content |
paul@34 | 242 | # and assume that the formatter will reference the other parts. |
paul@34 | 243 | |
paul@34 | 244 | elif message.get_content_subtype() == "related": |
paul@34 | 245 | main_part = message.get_payload()[0] |
paul@34 | 246 | update.content = main_part.get_payload() |
paul@34 | 247 | update.content_type = main_part.get_content_type() |
paul@34 | 248 | |
paul@34 | 249 | # Otherwise, just obtain the parts for separate display. |
paul@34 | 250 | |
paul@33 | 251 | else: |
paul@33 | 252 | update.parts = message.get_payload() |
paul@34 | 253 | update.content_type = message.get_content_type() |
paul@33 | 254 | |
paul@33 | 255 | updates.append(update) |
paul@33 | 256 | |
paul@33 | 257 | return updates |
paul@33 | 258 | |
paul@31 | 259 | # Source management. |
paul@31 | 260 | |
paul@31 | 261 | def getUpdateSources(pagename, request): |
paul@31 | 262 | |
paul@31 | 263 | "Return the update sources from the given 'pagename' using the 'request'." |
paul@31 | 264 | |
paul@31 | 265 | sources = {} |
paul@31 | 266 | |
paul@31 | 267 | source_definitions = getWikiDict(pagename, request) |
paul@31 | 268 | |
paul@31 | 269 | if source_definitions: |
paul@31 | 270 | for name, value in source_definitions.items(): |
paul@31 | 271 | sources[name] = getSourceParameters(value) |
paul@31 | 272 | |
paul@31 | 273 | return sources |
paul@31 | 274 | |
paul@31 | 275 | def getSourceParameters(source_definition): |
paul@31 | 276 | |
paul@31 | 277 | "Return the parameters from the given 'source_definition' string." |
paul@31 | 278 | |
paul@31 | 279 | parameters = {} |
paul@31 | 280 | unqualified = ("type", "location") |
paul@31 | 281 | |
paul@31 | 282 | for arg in source_definition.split(): |
paul@31 | 283 | try: |
paul@31 | 284 | argname, argvalue = arg.split("=", 1) |
paul@31 | 285 | |
paul@31 | 286 | # Detect unlikely parameter names. |
paul@31 | 287 | |
paul@31 | 288 | if not argname.isalpha(): |
paul@31 | 289 | raise ValueError |
paul@31 | 290 | |
paul@31 | 291 | parameters[argname] = argvalue |
paul@31 | 292 | |
paul@31 | 293 | # Unqualified parameters are assumed to be one of a recognised set. |
paul@31 | 294 | |
paul@31 | 295 | except ValueError: |
paul@31 | 296 | for argname in unqualified: |
paul@31 | 297 | if not parameters.has_key(argname): |
paul@31 | 298 | parameters[argname] = arg |
paul@31 | 299 | break |
paul@31 | 300 | |
paul@31 | 301 | return parameters |
paul@31 | 302 | |
paul@34 | 303 | # HTML parsing support. |
paul@34 | 304 | |
paul@34 | 305 | class IncomingHTMLSanitizer(HTMLSanitizer): |
paul@34 | 306 | |
paul@34 | 307 | "An HTML parser that rewrites references to attachments." |
paul@34 | 308 | |
paul@34 | 309 | def __init__(self, out, request, page, message_number): |
paul@34 | 310 | HTMLSanitizer.__init__(self, out) |
paul@34 | 311 | self.request = request |
paul@34 | 312 | self.message_number = message_number |
paul@34 | 313 | self.page = page |
paul@34 | 314 | |
paul@34 | 315 | def rewrite_reference(self, ref): |
paul@34 | 316 | if ref.startswith("cid:"): |
paul@34 | 317 | part = ref[len("cid:"):] |
paul@34 | 318 | action_link = self.page.url(self.request, { |
paul@34 | 319 | "action" : "ReadMessage", "doit" : "1", |
paul@34 | 320 | "message" : self.message_number, "part" : part |
paul@34 | 321 | }) |
paul@34 | 322 | return action_link |
paul@34 | 323 | else: |
paul@34 | 324 | return ref |
paul@34 | 325 | |
paul@34 | 326 | def handle_starttag(self, tag, attrs): |
paul@34 | 327 | new_attrs = [] |
paul@34 | 328 | for attrname, attrvalue in attrs: |
paul@34 | 329 | if attrname in self.uri_attrs: |
paul@34 | 330 | new_attrs.append((attrname, self.rewrite_reference(attrvalue))) |
paul@34 | 331 | else: |
paul@34 | 332 | new_attrs.append((attrname, attrvalue)) |
paul@34 | 333 | HTMLSanitizer.handle_starttag(self, tag, new_attrs) |
paul@34 | 334 | |
paul@34 | 335 | class IncomingMarkup(Markup): |
paul@34 | 336 | |
paul@34 | 337 | "A special markup processor for incoming HTML." |
paul@34 | 338 | |
paul@34 | 339 | def sanitize(self, request, page, message_number): |
paul@34 | 340 | out = StringIO() |
paul@34 | 341 | sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) |
paul@34 | 342 | sanitizer.feed(self.stripentities(keepxmlentities=True)) |
paul@34 | 343 | return IncomingMarkup(out.getvalue()) |
paul@34 | 344 | |
paul@34 | 345 | class IncomingHTMLParser: |
paul@34 | 346 | |
paul@34 | 347 | "Filters and rewrites incoming HTML content." |
paul@34 | 348 | |
paul@34 | 349 | def __init__(self, raw, request, **kw): |
paul@34 | 350 | self.raw = raw |
paul@34 | 351 | self.request = request |
paul@34 | 352 | self.message_number = None |
paul@34 | 353 | self.page = None |
paul@34 | 354 | |
paul@34 | 355 | def format(self, formatter, **kw): |
paul@34 | 356 | |
paul@34 | 357 | "Send the text." |
paul@34 | 358 | |
paul@34 | 359 | try: |
paul@34 | 360 | self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) |
paul@34 | 361 | except HTMLParseError, e: |
paul@34 | 362 | self.request.write(formatter.sysmsg(1) + |
paul@34 | 363 | formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, |
paul@34 | 364 | self.raw.splitlines()[e.lineno - 1].strip())) + |
paul@34 | 365 | formatter.sysmsg(0)) |
paul@34 | 366 | |
paul@34 | 367 | class MakeIncomingHTMLParser: |
paul@34 | 368 | |
paul@34 | 369 | "A class that makes parsers configured for messages." |
paul@34 | 370 | |
paul@34 | 371 | def __init__(self, page, message_number): |
paul@34 | 372 | |
paul@34 | 373 | "Initialise with state that is used to configure instantiated parsers." |
paul@34 | 374 | |
paul@34 | 375 | self.message_number = message_number |
paul@34 | 376 | self.page = page |
paul@34 | 377 | |
paul@34 | 378 | def __call__(self, *args, **kw): |
paul@34 | 379 | parser = IncomingHTMLParser(*args, **kw) |
paul@34 | 380 | parser.message_number = self.message_number |
paul@34 | 381 | parser.page = self.page |
paul@34 | 382 | return parser |
paul@34 | 383 | |
paul@34 | 384 | def get_make_parser(page, message_number): |
paul@34 | 385 | |
paul@34 | 386 | """ |
paul@34 | 387 | Return a callable that will return a parser configured for the message from |
paul@34 | 388 | the given 'page' with the given 'message_number'. |
paul@34 | 389 | """ |
paul@34 | 390 | |
paul@34 | 391 | return MakeIncomingHTMLParser(page, message_number) |
paul@34 | 392 | |
paul@0 | 393 | # vim: tabstop=4 expandtab shiftwidth=4 |