paul@0 | 1 | # -*- coding: iso-8859-1 -*- |
paul@0 | 2 | """ |
paul@0 | 3 | MoinMoin - MoinShare library |
paul@0 | 4 | |
paul@17 | 5 | @copyright: 2011, 2012, 2013 by Paul Boddie <paul@boddie.org.uk> |
paul@34 | 6 | @copyright: 2003-2006 Edgewall Software |
paul@34 | 7 | @copyright: 2006 MoinMoin:AlexanderSchremmer |
paul@0 | 8 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@0 | 9 | """ |
paul@0 | 10 | |
paul@17 | 11 | from ContentTypeSupport import getContentPreferences |
paul@33 | 12 | from DateSupport import getCurrentTime, getDateTimeFromRFC2822 |
paul@0 | 13 | from MoinSupport import * |
paul@37 | 14 | from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup |
paul@15 | 15 | from MoinMoin import wikiutil |
paul@33 | 16 | from email.parser import Parser |
paul@37 | 17 | from codecs import getwriter |
paul@0 | 18 | |
paul@25 | 19 | try: |
paul@25 | 20 | from cStringIO import StringIO |
paul@25 | 21 | except ImportError: |
paul@25 | 22 | from StringIO import StringIO |
paul@25 | 23 | |
paul@16 | 24 | _getFragments = getFragments |
paul@0 | 25 | |
paul@0 | 26 | __version__ = "0.1" |
paul@0 | 27 | |
paul@0 | 28 | # More Moin 1.9 compatibility functions. |
paul@0 | 29 | |
paul@0 | 30 | def has_member(request, groupname, username): |
paul@0 | 31 | if hasattr(request.dicts, "has_member"): |
paul@0 | 32 | return request.dicts.has_member(groupname, username) |
paul@0 | 33 | else: |
paul@0 | 34 | return username in request.dicts.get(groupname, []) |
paul@0 | 35 | |
paul@3 | 36 | # Fragments employ a "moinshare" attribute. |
paul@3 | 37 | |
paul@3 | 38 | fragment_attribute = "moinshare" |
paul@2 | 39 | |
paul@16 | 40 | def getFragments(s): |
paul@25 | 41 | |
paul@25 | 42 | "Return all fragments in 's' having the MoinShare fragment attribute." |
paul@25 | 43 | |
paul@2 | 44 | fragments = [] |
paul@16 | 45 | for format, attributes, body in _getFragments(s): |
paul@16 | 46 | if attributes.has_key(fragment_attribute): |
paul@16 | 47 | fragments.append((format, attributes, body)) |
paul@2 | 48 | return fragments |
paul@2 | 49 | |
paul@9 | 50 | def getPreferredOutputTypes(request, mimetypes): |
paul@9 | 51 | |
paul@9 | 52 | """ |
paul@9 | 53 | Using the 'request', perform content negotiation, obtaining mimetypes common |
paul@9 | 54 | to the fragment (given by 'mimetypes') and the client (found in the Accept |
paul@9 | 55 | header). |
paul@9 | 56 | """ |
paul@9 | 57 | |
paul@9 | 58 | accept = getHeader(request, "Accept", "HTTP") |
paul@12 | 59 | if accept: |
paul@12 | 60 | prefs = getContentPreferences(accept) |
paul@12 | 61 | return prefs.get_preferred_types(mimetypes) |
paul@12 | 62 | else: |
paul@12 | 63 | return mimetypes |
paul@9 | 64 | |
paul@9 | 65 | def getUpdatedTime(metadata): |
paul@9 | 66 | |
paul@9 | 67 | """ |
paul@9 | 68 | Return the last updated time based on the given 'metadata', using the |
paul@9 | 69 | current time if no explicit last modified time is specified. |
paul@9 | 70 | """ |
paul@9 | 71 | |
paul@9 | 72 | # NOTE: We could attempt to get the last edit time of a fragment. |
paul@9 | 73 | |
paul@9 | 74 | latest_timestamp = metadata.get("last-modified") |
paul@9 | 75 | if latest_timestamp: |
paul@33 | 76 | return latest_timestamp |
paul@9 | 77 | else: |
paul@33 | 78 | return getCurrentTime() |
paul@9 | 79 | |
paul@30 | 80 | # Entry/update classes. |
paul@30 | 81 | |
paul@30 | 82 | class Update: |
paul@30 | 83 | |
paul@30 | 84 | "A feed update entry." |
paul@30 | 85 | |
paul@30 | 86 | def __init__(self): |
paul@30 | 87 | self.title = None |
paul@30 | 88 | self.link = None |
paul@30 | 89 | self.content = None |
paul@30 | 90 | self.content_type = None |
paul@30 | 91 | self.updated = None |
paul@30 | 92 | |
paul@30 | 93 | # Page-related attributes. |
paul@30 | 94 | |
paul@30 | 95 | self.fragment = None |
paul@30 | 96 | self.preferred = None |
paul@30 | 97 | |
paul@33 | 98 | # Message-related attributes. |
paul@33 | 99 | |
paul@34 | 100 | self.message_number = None |
paul@33 | 101 | self.parts = None |
paul@33 | 102 | |
paul@34 | 103 | # Message- and page-related attributes. |
paul@34 | 104 | |
paul@34 | 105 | self.page = None |
paul@34 | 106 | |
paul@40 | 107 | # Identification. |
paul@40 | 108 | |
paul@40 | 109 | self.path = [] |
paul@40 | 110 | |
paul@40 | 111 | def unique_id(self): |
paul@40 | 112 | return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) |
paul@40 | 113 | |
paul@30 | 114 | def __cmp__(self, other): |
paul@30 | 115 | if self.updated is None and other.updated is not None: |
paul@30 | 116 | return 1 |
paul@30 | 117 | elif self.updated is not None and other.updated is None: |
paul@30 | 118 | return -1 |
paul@30 | 119 | else: |
paul@30 | 120 | return cmp(self.updated, other.updated) |
paul@30 | 121 | |
paul@40 | 122 | def copy(self, part_number=None): |
paul@40 | 123 | update = Update() |
paul@40 | 124 | update.title = self.title |
paul@40 | 125 | update.link = self.link |
paul@40 | 126 | update.updated = self.updated |
paul@40 | 127 | update.fragment = self.fragment |
paul@40 | 128 | update.preferred = self.preferred |
paul@40 | 129 | update.message_number = self.message_number |
paul@40 | 130 | update.page = self.page |
paul@40 | 131 | update.path = self.path[:] |
paul@40 | 132 | if part_number is not None: |
paul@40 | 133 | update.path.append(part_number) |
paul@40 | 134 | return update |
paul@40 | 135 | |
paul@30 | 136 | # Update retrieval from pages. |
paul@30 | 137 | |
paul@30 | 138 | def getUpdatesFromPage(page, request): |
paul@25 | 139 | |
paul@25 | 140 | """ |
paul@30 | 141 | Get updates from the given 'page' using the 'request'. A list of update |
paul@30 | 142 | objects is returned. |
paul@25 | 143 | """ |
paul@25 | 144 | |
paul@25 | 145 | updates = [] |
paul@25 | 146 | |
paul@25 | 147 | # NOTE: Use the updated datetime from the page for updates. |
paul@25 | 148 | # NOTE: The published and updated details would need to be deduced from |
paul@25 | 149 | # NOTE: the page history instead of being taken from the page as a whole. |
paul@25 | 150 | |
paul@25 | 151 | metadata = getMetadata(page) |
paul@25 | 152 | updated = getUpdatedTime(metadata) |
paul@25 | 153 | |
paul@25 | 154 | # Get the fragment regions for the page. |
paul@25 | 155 | |
paul@25 | 156 | for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): |
paul@25 | 157 | |
paul@33 | 158 | update = Update() |
paul@33 | 159 | |
paul@25 | 160 | # Produce a fragment identifier. |
paul@25 | 161 | # NOTE: Choose a more robust identifier where none is explicitly given. |
paul@25 | 162 | |
paul@30 | 163 | update.fragment = attributes.get("fragment", str(n)) |
paul@30 | 164 | update.title = attributes.get("summary", "Update #%d" % n) |
paul@25 | 165 | |
paul@25 | 166 | # Get the preferred content types available for the fragment. |
paul@25 | 167 | |
paul@30 | 168 | update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) |
paul@25 | 169 | |
paul@25 | 170 | # Try and obtain some suitable content for the entry. |
paul@25 | 171 | # NOTE: Could potentially get a summary for the fragment. |
paul@25 | 172 | |
paul@30 | 173 | update.content = None |
paul@25 | 174 | |
paul@30 | 175 | if "text/html" in update.preferred: |
paul@25 | 176 | parser_cls = getParserClass(request, format) |
paul@25 | 177 | |
paul@25 | 178 | if format == "html": |
paul@30 | 179 | update.content = body |
paul@39 | 180 | elif hasattr(parser_cls, "formatForOutputType"): |
paul@39 | 181 | update.content = formatTextForOutputType(body, request, parser_cls, "text/html") |
paul@25 | 182 | else: |
paul@25 | 183 | fmt = request.html_formatter |
paul@25 | 184 | fmt.setPage(page) |
paul@30 | 185 | update.content = formatText(body, request, fmt, parser_cls) |
paul@30 | 186 | |
paul@32 | 187 | update.content_type = "text/html" |
paul@25 | 188 | |
paul@34 | 189 | update.page = page |
paul@37 | 190 | |
paul@37 | 191 | # NOTE: The anchor would be supported in the page, but this requires |
paul@37 | 192 | # NOTE: formatter modifications for the regions providing updates. |
paul@37 | 193 | |
paul@37 | 194 | update.link = page.url(request, anchor=update.fragment) |
paul@30 | 195 | update.updated = updated |
paul@30 | 196 | |
paul@30 | 197 | updates.append(update) |
paul@25 | 198 | |
paul@25 | 199 | return updates |
paul@25 | 200 | |
paul@33 | 201 | # Update retrieval from message stores. |
paul@33 | 202 | |
paul@33 | 203 | def getUpdatesFromStore(page, request): |
paul@33 | 204 | |
paul@33 | 205 | """ |
paul@33 | 206 | Get updates from the message store associated with the given 'page' using |
paul@33 | 207 | the 'request'. A list of update objects is returned. |
paul@33 | 208 | """ |
paul@33 | 209 | |
paul@33 | 210 | updates = [] |
paul@33 | 211 | |
paul@33 | 212 | metadata = getMetadata(page) |
paul@33 | 213 | updated = getUpdatedTime(metadata) |
paul@33 | 214 | |
paul@33 | 215 | store = ItemStore(page, "messages", "message-locks") |
paul@33 | 216 | |
paul@33 | 217 | for n, message_text in enumerate(iter(store)): |
paul@33 | 218 | |
paul@33 | 219 | update = Update() |
paul@33 | 220 | message = Parser().parse(StringIO(message_text)) |
paul@33 | 221 | |
paul@33 | 222 | # Produce a fragment identifier. |
paul@33 | 223 | |
paul@33 | 224 | update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) |
paul@33 | 225 | update.title = message.get("subject", "Update #%d" % n) |
paul@33 | 226 | |
paul@34 | 227 | update.page = page |
paul@34 | 228 | update.message_number = n |
paul@34 | 229 | |
paul@40 | 230 | update.content, update.content_type, update.parts = getUpdateContentFromPart(message) |
paul@33 | 231 | |
paul@33 | 232 | updates.append(update) |
paul@33 | 233 | |
paul@33 | 234 | return updates |
paul@33 | 235 | |
paul@40 | 236 | def getUpdateContentFromPart(part): |
paul@40 | 237 | |
paul@40 | 238 | """ |
paul@40 | 239 | Return decoded content, the content type and any subparts in a tuple for a |
paul@40 | 240 | given 'part'. |
paul@40 | 241 | """ |
paul@40 | 242 | |
paul@40 | 243 | # Determine whether the part has several representations. |
paul@40 | 244 | |
paul@40 | 245 | # For a single part, use it as the update content. |
paul@40 | 246 | |
paul@40 | 247 | if not part.is_multipart(): |
paul@40 | 248 | content, content_type = getPartContent(part) |
paul@40 | 249 | return content, content_type, None |
paul@40 | 250 | |
paul@40 | 251 | # For a collection of related parts, use the first as the update content |
paul@40 | 252 | # and assume that the formatter will reference the other parts. |
paul@40 | 253 | |
paul@40 | 254 | elif part.get_content_subtype() == "related": |
paul@40 | 255 | main_part = part.get_payload()[0] |
paul@40 | 256 | content, content_type = getPartContent(main_part) |
paul@40 | 257 | return content, content_type, [main_part] |
paul@40 | 258 | |
paul@40 | 259 | # Otherwise, just obtain the parts for separate display. |
paul@40 | 260 | |
paul@40 | 261 | else: |
paul@40 | 262 | return None, part.get_content_type(), part.get_payload() |
paul@40 | 263 | |
paul@40 | 264 | def getPartContent(part): |
paul@40 | 265 | |
paul@40 | 266 | "Decode the 'part', returning the decoded payload and the content type." |
paul@40 | 267 | |
paul@40 | 268 | charset = part.get_content_charset() |
paul@40 | 269 | payload = part.get_payload(decode=True) |
paul@40 | 270 | return (charset and unicode(payload, charset) or payload), part.get_content_type() |
paul@40 | 271 | |
paul@40 | 272 | def getUpdateFromPart(parent, part, part_number): |
paul@40 | 273 | |
paul@40 | 274 | "Using the 'parent' update, return an update object for the given 'part'." |
paul@40 | 275 | |
paul@40 | 276 | update = parent.copy(part_number) |
paul@40 | 277 | update.content, update.content_type, update.parts = getUpdateContentFromPart(part) |
paul@40 | 278 | return update |
paul@40 | 279 | |
paul@31 | 280 | # Source management. |
paul@31 | 281 | |
paul@31 | 282 | def getUpdateSources(pagename, request): |
paul@31 | 283 | |
paul@31 | 284 | "Return the update sources from the given 'pagename' using the 'request'." |
paul@31 | 285 | |
paul@31 | 286 | sources = {} |
paul@31 | 287 | |
paul@31 | 288 | source_definitions = getWikiDict(pagename, request) |
paul@31 | 289 | |
paul@31 | 290 | if source_definitions: |
paul@31 | 291 | for name, value in source_definitions.items(): |
paul@31 | 292 | sources[name] = getSourceParameters(value) |
paul@31 | 293 | |
paul@31 | 294 | return sources |
paul@31 | 295 | |
paul@31 | 296 | def getSourceParameters(source_definition): |
paul@31 | 297 | |
paul@31 | 298 | "Return the parameters from the given 'source_definition' string." |
paul@31 | 299 | |
paul@43 | 300 | return parseDictEntry(source_definition, ("type", "location")) |
paul@31 | 301 | |
paul@34 | 302 | # HTML parsing support. |
paul@34 | 303 | |
paul@34 | 304 | class IncomingHTMLSanitizer(HTMLSanitizer): |
paul@34 | 305 | |
paul@34 | 306 | "An HTML parser that rewrites references to attachments." |
paul@34 | 307 | |
paul@34 | 308 | def __init__(self, out, request, page, message_number): |
paul@34 | 309 | HTMLSanitizer.__init__(self, out) |
paul@34 | 310 | self.request = request |
paul@34 | 311 | self.message_number = message_number |
paul@34 | 312 | self.page = page |
paul@34 | 313 | |
paul@34 | 314 | def rewrite_reference(self, ref): |
paul@34 | 315 | if ref.startswith("cid:"): |
paul@34 | 316 | part = ref[len("cid:"):] |
paul@34 | 317 | action_link = self.page.url(self.request, { |
paul@34 | 318 | "action" : "ReadMessage", "doit" : "1", |
paul@34 | 319 | "message" : self.message_number, "part" : part |
paul@34 | 320 | }) |
paul@34 | 321 | return action_link |
paul@34 | 322 | else: |
paul@34 | 323 | return ref |
paul@34 | 324 | |
paul@34 | 325 | def handle_starttag(self, tag, attrs): |
paul@34 | 326 | new_attrs = [] |
paul@34 | 327 | for attrname, attrvalue in attrs: |
paul@34 | 328 | if attrname in self.uri_attrs: |
paul@34 | 329 | new_attrs.append((attrname, self.rewrite_reference(attrvalue))) |
paul@34 | 330 | else: |
paul@34 | 331 | new_attrs.append((attrname, attrvalue)) |
paul@34 | 332 | HTMLSanitizer.handle_starttag(self, tag, new_attrs) |
paul@34 | 333 | |
paul@34 | 334 | class IncomingMarkup(Markup): |
paul@34 | 335 | |
paul@34 | 336 | "A special markup processor for incoming HTML." |
paul@34 | 337 | |
paul@34 | 338 | def sanitize(self, request, page, message_number): |
paul@37 | 339 | out = getwriter("utf-8")(StringIO()) |
paul@34 | 340 | sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) |
paul@34 | 341 | sanitizer.feed(self.stripentities(keepxmlentities=True)) |
paul@37 | 342 | return IncomingMarkup(unicode(out.getvalue(), "utf-8")) |
paul@34 | 343 | |
paul@34 | 344 | class IncomingHTMLParser: |
paul@34 | 345 | |
paul@34 | 346 | "Filters and rewrites incoming HTML content." |
paul@34 | 347 | |
paul@34 | 348 | def __init__(self, raw, request, **kw): |
paul@34 | 349 | self.raw = raw |
paul@34 | 350 | self.request = request |
paul@34 | 351 | self.message_number = None |
paul@34 | 352 | self.page = None |
paul@34 | 353 | |
paul@34 | 354 | def format(self, formatter, **kw): |
paul@34 | 355 | |
paul@34 | 356 | "Send the text." |
paul@34 | 357 | |
paul@34 | 358 | try: |
paul@34 | 359 | self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) |
paul@34 | 360 | except HTMLParseError, e: |
paul@34 | 361 | self.request.write(formatter.sysmsg(1) + |
paul@34 | 362 | formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, |
paul@34 | 363 | self.raw.splitlines()[e.lineno - 1].strip())) + |
paul@34 | 364 | formatter.sysmsg(0)) |
paul@34 | 365 | |
paul@34 | 366 | class MakeIncomingHTMLParser: |
paul@34 | 367 | |
paul@34 | 368 | "A class that makes parsers configured for messages." |
paul@34 | 369 | |
paul@34 | 370 | def __init__(self, page, message_number): |
paul@34 | 371 | |
paul@34 | 372 | "Initialise with state that is used to configure instantiated parsers." |
paul@34 | 373 | |
paul@34 | 374 | self.message_number = message_number |
paul@34 | 375 | self.page = page |
paul@34 | 376 | |
paul@34 | 377 | def __call__(self, *args, **kw): |
paul@34 | 378 | parser = IncomingHTMLParser(*args, **kw) |
paul@34 | 379 | parser.message_number = self.message_number |
paul@34 | 380 | parser.page = self.page |
paul@34 | 381 | return parser |
paul@34 | 382 | |
paul@34 | 383 | def get_make_parser(page, message_number): |
paul@34 | 384 | |
paul@34 | 385 | """ |
paul@34 | 386 | Return a callable that will return a parser configured for the message from |
paul@34 | 387 | the given 'page' with the given 'message_number'. |
paul@34 | 388 | """ |
paul@34 | 389 | |
paul@34 | 390 | return MakeIncomingHTMLParser(page, message_number) |
paul@34 | 391 | |
paul@0 | 392 | # vim: tabstop=4 expandtab shiftwidth=4 |