paul@0 | 1 | # -*- coding: iso-8859-1 -*- |
paul@0 | 2 | """ |
paul@0 | 3 | MoinMoin - MoinShare library |
paul@0 | 4 | |
paul@56 | 5 | @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> |
paul@34 | 6 | @copyright: 2003-2006 Edgewall Software |
paul@34 | 7 | @copyright: 2006 MoinMoin:AlexanderSchremmer |
paul@0 | 8 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@0 | 9 | """ |
paul@0 | 10 | |
paul@17 | 11 | from ContentTypeSupport import getContentPreferences |
paul@33 | 12 | from DateSupport import getCurrentTime, getDateTimeFromRFC2822 |
paul@0 | 13 | from MoinSupport import * |
paul@51 | 14 | from ItemSupport import ItemStore |
paul@50 | 15 | from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError |
paul@54 | 16 | from MoinMessageSupport import get_homedir, get_username_for_fingerprint |
paul@37 | 17 | from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup |
paul@15 | 18 | from MoinMoin import wikiutil |
paul@33 | 19 | from email.parser import Parser |
paul@37 | 20 | from codecs import getwriter |
paul@0 | 21 | |
paul@25 | 22 | try: |
paul@25 | 23 | from cStringIO import StringIO |
paul@25 | 24 | except ImportError: |
paul@25 | 25 | from StringIO import StringIO |
paul@25 | 26 | |
paul@16 | 27 | _getFragments = getFragments |
paul@0 | 28 | |
paul@0 | 29 | __version__ = "0.1" |
paul@0 | 30 | |
paul@0 | 31 | # More Moin 1.9 compatibility functions. |
paul@0 | 32 | |
paul@0 | 33 | def has_member(request, groupname, username): |
paul@0 | 34 | if hasattr(request.dicts, "has_member"): |
paul@0 | 35 | return request.dicts.has_member(groupname, username) |
paul@0 | 36 | else: |
paul@0 | 37 | return username in request.dicts.get(groupname, []) |
paul@0 | 38 | |
paul@3 | 39 | # Fragments employ a "moinshare" attribute. |
paul@3 | 40 | |
paul@3 | 41 | fragment_attribute = "moinshare" |
paul@2 | 42 | |
paul@16 | 43 | def getFragments(s): |
paul@25 | 44 | |
paul@25 | 45 | "Return all fragments in 's' having the MoinShare fragment attribute." |
paul@25 | 46 | |
paul@2 | 47 | fragments = [] |
paul@16 | 48 | for format, attributes, body in _getFragments(s): |
paul@16 | 49 | if attributes.has_key(fragment_attribute): |
paul@16 | 50 | fragments.append((format, attributes, body)) |
paul@2 | 51 | return fragments |
paul@2 | 52 | |
paul@9 | 53 | def getPreferredOutputTypes(request, mimetypes): |
paul@9 | 54 | |
paul@9 | 55 | """ |
paul@9 | 56 | Using the 'request', perform content negotiation, obtaining mimetypes common |
paul@9 | 57 | to the fragment (given by 'mimetypes') and the client (found in the Accept |
paul@9 | 58 | header). |
paul@9 | 59 | """ |
paul@9 | 60 | |
paul@9 | 61 | accept = getHeader(request, "Accept", "HTTP") |
paul@12 | 62 | if accept: |
paul@12 | 63 | prefs = getContentPreferences(accept) |
paul@12 | 64 | return prefs.get_preferred_types(mimetypes) |
paul@12 | 65 | else: |
paul@12 | 66 | return mimetypes |
paul@9 | 67 | |
paul@9 | 68 | def getUpdatedTime(metadata): |
paul@9 | 69 | |
paul@9 | 70 | """ |
paul@9 | 71 | Return the last updated time based on the given 'metadata', using the |
paul@9 | 72 | current time if no explicit last modified time is specified. |
paul@9 | 73 | """ |
paul@9 | 74 | |
paul@9 | 75 | # NOTE: We could attempt to get the last edit time of a fragment. |
paul@9 | 76 | |
paul@9 | 77 | latest_timestamp = metadata.get("last-modified") |
paul@9 | 78 | if latest_timestamp: |
paul@33 | 79 | return latest_timestamp |
paul@9 | 80 | else: |
paul@33 | 81 | return getCurrentTime() |
paul@9 | 82 | |
paul@30 | 83 | # Entry/update classes. |
paul@30 | 84 | |
paul@30 | 85 | class Update: |
paul@30 | 86 | |
paul@30 | 87 | "A feed update entry." |
paul@30 | 88 | |
paul@30 | 89 | def __init__(self): |
paul@30 | 90 | self.title = None |
paul@30 | 91 | self.link = None |
paul@30 | 92 | self.content = None |
paul@30 | 93 | self.content_type = None |
paul@30 | 94 | self.updated = None |
paul@54 | 95 | self.author = None |
paul@30 | 96 | |
paul@30 | 97 | # Page-related attributes. |
paul@30 | 98 | |
paul@30 | 99 | self.fragment = None |
paul@30 | 100 | self.preferred = None |
paul@30 | 101 | |
paul@33 | 102 | # Message-related attributes. |
paul@33 | 103 | |
paul@34 | 104 | self.message_number = None |
paul@33 | 105 | self.parts = None |
paul@33 | 106 | |
paul@34 | 107 | # Message- and page-related attributes. |
paul@34 | 108 | |
paul@34 | 109 | self.page = None |
paul@34 | 110 | |
paul@40 | 111 | # Identification. |
paul@40 | 112 | |
paul@40 | 113 | self.path = [] |
paul@40 | 114 | |
paul@40 | 115 | def unique_id(self): |
paul@40 | 116 | return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) |
paul@40 | 117 | |
paul@30 | 118 | def __cmp__(self, other): |
paul@30 | 119 | if self.updated is None and other.updated is not None: |
paul@30 | 120 | return 1 |
paul@30 | 121 | elif self.updated is not None and other.updated is None: |
paul@30 | 122 | return -1 |
paul@30 | 123 | else: |
paul@30 | 124 | return cmp(self.updated, other.updated) |
paul@30 | 125 | |
paul@40 | 126 | def copy(self, part_number=None): |
paul@40 | 127 | update = Update() |
paul@40 | 128 | update.title = self.title |
paul@40 | 129 | update.link = self.link |
paul@40 | 130 | update.updated = self.updated |
paul@54 | 131 | update.author = self.author |
paul@40 | 132 | update.fragment = self.fragment |
paul@40 | 133 | update.preferred = self.preferred |
paul@40 | 134 | update.message_number = self.message_number |
paul@40 | 135 | update.page = self.page |
paul@40 | 136 | update.path = self.path[:] |
paul@40 | 137 | if part_number is not None: |
paul@40 | 138 | update.path.append(part_number) |
paul@40 | 139 | return update |
paul@40 | 140 | |
paul@30 | 141 | # Update retrieval from pages. |
paul@30 | 142 | |
paul@30 | 143 | def getUpdatesFromPage(page, request): |
paul@25 | 144 | |
paul@25 | 145 | """ |
paul@30 | 146 | Get updates from the given 'page' using the 'request'. A list of update |
paul@30 | 147 | objects is returned. |
paul@25 | 148 | """ |
paul@25 | 149 | |
paul@25 | 150 | updates = [] |
paul@25 | 151 | |
paul@25 | 152 | # NOTE: Use the updated datetime from the page for updates. |
paul@25 | 153 | # NOTE: The published and updated details would need to be deduced from |
paul@25 | 154 | # NOTE: the page history instead of being taken from the page as a whole. |
paul@25 | 155 | |
paul@25 | 156 | metadata = getMetadata(page) |
paul@25 | 157 | updated = getUpdatedTime(metadata) |
paul@25 | 158 | |
paul@25 | 159 | # Get the fragment regions for the page. |
paul@25 | 160 | |
paul@25 | 161 | for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): |
paul@25 | 162 | |
paul@33 | 163 | update = Update() |
paul@33 | 164 | |
paul@25 | 165 | # Produce a fragment identifier. |
paul@25 | 166 | # NOTE: Choose a more robust identifier where none is explicitly given. |
paul@25 | 167 | |
paul@30 | 168 | update.fragment = attributes.get("fragment", str(n)) |
paul@30 | 169 | update.title = attributes.get("summary", "Update #%d" % n) |
paul@25 | 170 | |
paul@25 | 171 | # Get the preferred content types available for the fragment. |
paul@25 | 172 | |
paul@30 | 173 | update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) |
paul@25 | 174 | |
paul@25 | 175 | # Try and obtain some suitable content for the entry. |
paul@25 | 176 | # NOTE: Could potentially get a summary for the fragment. |
paul@25 | 177 | |
paul@30 | 178 | update.content = None |
paul@25 | 179 | |
paul@30 | 180 | if "text/html" in update.preferred: |
paul@25 | 181 | parser_cls = getParserClass(request, format) |
paul@25 | 182 | |
paul@25 | 183 | if format == "html": |
paul@30 | 184 | update.content = body |
paul@39 | 185 | elif hasattr(parser_cls, "formatForOutputType"): |
paul@39 | 186 | update.content = formatTextForOutputType(body, request, parser_cls, "text/html") |
paul@25 | 187 | else: |
paul@25 | 188 | fmt = request.html_formatter |
paul@25 | 189 | fmt.setPage(page) |
paul@30 | 190 | update.content = formatText(body, request, fmt, parser_cls) |
paul@30 | 191 | |
paul@32 | 192 | update.content_type = "text/html" |
paul@25 | 193 | |
paul@34 | 194 | update.page = page |
paul@37 | 195 | |
paul@37 | 196 | # NOTE: The anchor would be supported in the page, but this requires |
paul@37 | 197 | # NOTE: formatter modifications for the regions providing updates. |
paul@37 | 198 | |
paul@37 | 199 | update.link = page.url(request, anchor=update.fragment) |
paul@30 | 200 | update.updated = updated |
paul@30 | 201 | |
paul@30 | 202 | updates.append(update) |
paul@25 | 203 | |
paul@25 | 204 | return updates |
paul@25 | 205 | |
paul@33 | 206 | # Update retrieval from message stores. |
paul@33 | 207 | |
paul@33 | 208 | def getUpdatesFromStore(page, request): |
paul@33 | 209 | |
paul@33 | 210 | """ |
paul@33 | 211 | Get updates from the message store associated with the given 'page' using |
paul@33 | 212 | the 'request'. A list of update objects is returned. |
paul@33 | 213 | """ |
paul@33 | 214 | |
paul@33 | 215 | updates = [] |
paul@33 | 216 | |
paul@33 | 217 | metadata = getMetadata(page) |
paul@33 | 218 | updated = getUpdatedTime(metadata) |
paul@33 | 219 | |
paul@33 | 220 | store = ItemStore(page, "messages", "message-locks") |
paul@33 | 221 | |
paul@57 | 222 | keys = store.keys() |
paul@57 | 223 | keys.sort() |
paul@57 | 224 | |
paul@57 | 225 | for key in keys: |
paul@57 | 226 | message_text = store[key] |
paul@57 | 227 | update = getUpdateFromMessageText(message_text, key, request) |
paul@34 | 228 | update.page = page |
paul@33 | 229 | updates.append(update) |
paul@33 | 230 | |
paul@33 | 231 | return updates |
paul@33 | 232 | |
paul@47 | 233 | def getUpdateFromMessageText(message_text, message_number, request): |
paul@46 | 234 | |
paul@46 | 235 | "Return an update for the given 'message_text' and 'message_number'." |
paul@46 | 236 | |
paul@46 | 237 | update = Update() |
paul@56 | 238 | message = Parser().parsestr(message_text) |
paul@46 | 239 | |
paul@46 | 240 | # Produce a fragment identifier. |
paul@46 | 241 | |
paul@46 | 242 | update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) |
paul@46 | 243 | update.title = message.get("subject", "Update #%d" % message_number) |
paul@54 | 244 | update.author = message.get("moin-user") |
paul@46 | 245 | |
paul@46 | 246 | update.message_number = message_number |
paul@46 | 247 | |
paul@54 | 248 | update.content, update.content_type, update.parts, actual_author = \ |
paul@54 | 249 | getUpdateContentFromPart(message, request) |
paul@54 | 250 | |
paul@54 | 251 | if actual_author: |
paul@54 | 252 | update.author = actual_author |
paul@54 | 253 | |
paul@46 | 254 | return update |
paul@46 | 255 | |
paul@47 | 256 | def getUpdateContentFromPart(part, request): |
paul@40 | 257 | |
paul@40 | 258 | """ |
paul@54 | 259 | Return decoded content, the content type, any subparts, and any author |
paul@54 | 260 | identity in a tuple for a given 'part'. |
paul@40 | 261 | """ |
paul@40 | 262 | |
paul@40 | 263 | # Determine whether the part has several representations. |
paul@40 | 264 | |
paul@40 | 265 | # For a single part, use it as the update content. |
paul@40 | 266 | |
paul@40 | 267 | if not part.is_multipart(): |
paul@40 | 268 | content, content_type = getPartContent(part) |
paul@54 | 269 | return content, content_type, None, None |
paul@40 | 270 | |
paul@40 | 271 | # For a collection of related parts, use the first as the update content |
paul@40 | 272 | # and assume that the formatter will reference the other parts. |
paul@40 | 273 | |
paul@40 | 274 | elif part.get_content_subtype() == "related": |
paul@40 | 275 | main_part = part.get_payload()[0] |
paul@40 | 276 | content, content_type = getPartContent(main_part) |
paul@54 | 277 | return content, content_type, [main_part], None |
paul@40 | 278 | |
paul@46 | 279 | # Encrypted content cannot be meaningfully separated. |
paul@46 | 280 | |
paul@46 | 281 | elif part.get_content_subtype() == "encrypted": |
paul@50 | 282 | try: |
paul@54 | 283 | part, author = getDecryptedParts(part, request) |
paul@54 | 284 | content, content_type, parts, _author = getUpdateContentFromPart(part, request) |
paul@54 | 285 | return content, content_type, parts, author |
paul@50 | 286 | except MoinMessageError: |
paul@54 | 287 | return None, part.get_content_type(), part.get_payload(), None |
paul@46 | 288 | |
paul@40 | 289 | # Otherwise, just obtain the parts for separate display. |
paul@40 | 290 | |
paul@40 | 291 | else: |
paul@54 | 292 | return None, part.get_content_type(), part.get_payload(), None |
paul@40 | 293 | |
paul@47 | 294 | def getDecryptedParts(part, request): |
paul@47 | 295 | |
paul@47 | 296 | "Decrypt the given 'part', returning the decoded content." |
paul@47 | 297 | |
paul@47 | 298 | homedir = get_homedir(request) |
paul@47 | 299 | gpg = GPG(homedir) |
paul@47 | 300 | |
paul@47 | 301 | # Decrypt the part. |
paul@47 | 302 | |
paul@47 | 303 | if is_encrypted(part): |
paul@47 | 304 | text = gpg.decryptMessage(part) |
paul@56 | 305 | part = Parser().parsestr(text) |
paul@47 | 306 | |
paul@47 | 307 | # Extract any signature details. |
paul@47 | 308 | |
paul@47 | 309 | if is_signed(part): |
paul@47 | 310 | result = gpg.verifyMessage(part) |
paul@47 | 311 | if result: |
paul@47 | 312 | fingerprint, identity, content = result |
paul@54 | 313 | return content, get_username_for_fingerprint(request, fingerprint) |
paul@47 | 314 | |
paul@54 | 315 | return part, None |
paul@47 | 316 | |
paul@40 | 317 | def getPartContent(part): |
paul@40 | 318 | |
paul@40 | 319 | "Decode the 'part', returning the decoded payload and the content type." |
paul@40 | 320 | |
paul@40 | 321 | charset = part.get_content_charset() |
paul@40 | 322 | payload = part.get_payload(decode=True) |
paul@40 | 323 | return (charset and unicode(payload, charset) or payload), part.get_content_type() |
paul@40 | 324 | |
paul@47 | 325 | def getUpdateFromPart(parent, part, part_number, request): |
paul@40 | 326 | |
paul@40 | 327 | "Using the 'parent' update, return an update object for the given 'part'." |
paul@40 | 328 | |
paul@40 | 329 | update = parent.copy(part_number) |
paul@55 | 330 | update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request) |
paul@40 | 331 | return update |
paul@40 | 332 | |
paul@47 | 333 | def getUpdatesForFormatting(update, request): |
paul@46 | 334 | |
paul@46 | 335 | "Get a list of updates for formatting given 'update'." |
paul@46 | 336 | |
paul@46 | 337 | updates = [] |
paul@46 | 338 | |
paul@46 | 339 | # Handle multipart/alternative and other non-related multiparts. |
paul@46 | 340 | |
paul@46 | 341 | if update.parts: |
paul@46 | 342 | for n, part in enumerate(update.parts): |
paul@47 | 343 | update_part = getUpdateFromPart(update, part, n, request) |
paul@47 | 344 | updates += getUpdatesForFormatting(update_part, request) |
paul@46 | 345 | else: |
paul@46 | 346 | updates.append(update) |
paul@46 | 347 | |
paul@46 | 348 | return updates |
paul@46 | 349 | |
paul@46 | 350 | # Update formatting. |
paul@46 | 351 | |
paul@46 | 352 | def getFormattedUpdate(update, request, fmt): |
paul@46 | 353 | |
paul@46 | 354 | """ |
paul@46 | 355 | Return the formatted form of the given 'update' using the given 'request' |
paul@46 | 356 | and 'fmt'. |
paul@46 | 357 | """ |
paul@46 | 358 | |
paul@46 | 359 | # NOTE: Some control over the HTML and XHTML should be exercised. |
paul@46 | 360 | |
paul@46 | 361 | if update.content: |
paul@46 | 362 | if update.content_type == "text/html" and update.message_number is not None: |
paul@46 | 363 | parsers = [get_make_parser(update.page, update.message_number)] |
paul@46 | 364 | else: |
paul@46 | 365 | parsers = getParsersForContentType(request.cfg, update.content_type) |
paul@46 | 366 | |
paul@46 | 367 | if parsers: |
paul@46 | 368 | for parser_cls in parsers: |
paul@46 | 369 | if hasattr(parser_cls, "formatForOutputType"): |
paul@46 | 370 | return formatTextForOutputType(update.content, request, parser_cls, "text/html") |
paul@46 | 371 | else: |
paul@46 | 372 | return formatText(update.content, request, fmt, parser_cls=parser_cls) |
paul@46 | 373 | break |
paul@46 | 374 | else: |
paul@46 | 375 | return None |
paul@46 | 376 | else: |
paul@46 | 377 | return None |
paul@46 | 378 | |
paul@46 | 379 | def formatUpdate(update, request, fmt): |
paul@46 | 380 | |
paul@46 | 381 | "Format the given 'update' using the given 'request' and 'fmt'." |
paul@46 | 382 | |
paul@46 | 383 | result = [] |
paul@46 | 384 | append = result.append |
paul@46 | 385 | |
paul@47 | 386 | updates = getUpdatesForFormatting(update, request) |
paul@46 | 387 | single = len(updates) == 1 |
paul@46 | 388 | |
paul@46 | 389 | # Format some navigation tabs. |
paul@46 | 390 | |
paul@46 | 391 | if not single: |
paul@46 | 392 | append(fmt.div(on=1, css_class="moinshare-alternatives")) |
paul@46 | 393 | |
paul@46 | 394 | first = True |
paul@46 | 395 | |
paul@46 | 396 | for update_part in updates: |
paul@46 | 397 | append(fmt.url(1, "#%s" % update_part.unique_id())) |
paul@46 | 398 | append(fmt.text(update_part.content_type)) |
paul@46 | 399 | append(fmt.url(0)) |
paul@46 | 400 | |
paul@46 | 401 | first = False |
paul@46 | 402 | |
paul@46 | 403 | append(fmt.div(on=0)) |
paul@46 | 404 | |
paul@46 | 405 | # Format the content. |
paul@46 | 406 | |
paul@46 | 407 | first = True |
paul@46 | 408 | |
paul@46 | 409 | for update_part in updates: |
paul@46 | 410 | |
paul@46 | 411 | # Encapsulate each alternative if many exist. |
paul@46 | 412 | |
paul@46 | 413 | if not single: |
paul@46 | 414 | css_class = first and "moinshare-default" or "moinshare-other" |
paul@46 | 415 | append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id())) |
paul@46 | 416 | |
paul@46 | 417 | # Include the content. |
paul@46 | 418 | |
paul@46 | 419 | append(formatUpdatePart(update_part, request, fmt)) |
paul@46 | 420 | |
paul@46 | 421 | if not single: |
paul@46 | 422 | append(fmt.div(on=0)) |
paul@46 | 423 | |
paul@46 | 424 | first = False |
paul@46 | 425 | |
paul@46 | 426 | return "".join(result) |
paul@46 | 427 | |
paul@46 | 428 | def formatUpdatePart(update, request, fmt): |
paul@46 | 429 | |
paul@46 | 430 | "Format the given 'update' using the given 'request' and 'fmt'." |
paul@46 | 431 | |
paul@46 | 432 | _ = request.getText |
paul@46 | 433 | |
paul@46 | 434 | result = [] |
paul@46 | 435 | append = result.append |
paul@46 | 436 | |
paul@46 | 437 | # Encapsulate the content. |
paul@46 | 438 | |
paul@46 | 439 | append(fmt.div(on=1, css_class="moinshare-content")) |
paul@46 | 440 | text = getFormattedUpdate(update, request, fmt) |
paul@46 | 441 | if text: |
paul@46 | 442 | append(text) |
paul@46 | 443 | else: |
paul@46 | 444 | append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type)) |
paul@46 | 445 | append(fmt.div(on=0)) |
paul@46 | 446 | |
paul@46 | 447 | return "".join(result) |
paul@46 | 448 | |
paul@31 | 449 | # Source management. |
paul@31 | 450 | |
paul@31 | 451 | def getUpdateSources(pagename, request): |
paul@31 | 452 | |
paul@31 | 453 | "Return the update sources from the given 'pagename' using the 'request'." |
paul@31 | 454 | |
paul@31 | 455 | sources = {} |
paul@31 | 456 | |
paul@31 | 457 | source_definitions = getWikiDict(pagename, request) |
paul@31 | 458 | |
paul@31 | 459 | if source_definitions: |
paul@31 | 460 | for name, value in source_definitions.items(): |
paul@31 | 461 | sources[name] = getSourceParameters(value) |
paul@31 | 462 | |
paul@31 | 463 | return sources |
paul@31 | 464 | |
paul@31 | 465 | def getSourceParameters(source_definition): |
paul@31 | 466 | |
paul@31 | 467 | "Return the parameters from the given 'source_definition' string." |
paul@31 | 468 | |
paul@43 | 469 | return parseDictEntry(source_definition, ("type", "location")) |
paul@31 | 470 | |
paul@34 | 471 | # HTML parsing support. |
paul@34 | 472 | |
paul@34 | 473 | class IncomingHTMLSanitizer(HTMLSanitizer): |
paul@34 | 474 | |
paul@34 | 475 | "An HTML parser that rewrites references to attachments." |
paul@34 | 476 | |
paul@34 | 477 | def __init__(self, out, request, page, message_number): |
paul@34 | 478 | HTMLSanitizer.__init__(self, out) |
paul@34 | 479 | self.request = request |
paul@34 | 480 | self.message_number = message_number |
paul@34 | 481 | self.page = page |
paul@34 | 482 | |
paul@34 | 483 | def rewrite_reference(self, ref): |
paul@34 | 484 | if ref.startswith("cid:"): |
paul@34 | 485 | part = ref[len("cid:"):] |
paul@34 | 486 | action_link = self.page.url(self.request, { |
paul@34 | 487 | "action" : "ReadMessage", "doit" : "1", |
paul@34 | 488 | "message" : self.message_number, "part" : part |
paul@34 | 489 | }) |
paul@34 | 490 | return action_link |
paul@34 | 491 | else: |
paul@34 | 492 | return ref |
paul@34 | 493 | |
paul@34 | 494 | def handle_starttag(self, tag, attrs): |
paul@34 | 495 | new_attrs = [] |
paul@34 | 496 | for attrname, attrvalue in attrs: |
paul@34 | 497 | if attrname in self.uri_attrs: |
paul@34 | 498 | new_attrs.append((attrname, self.rewrite_reference(attrvalue))) |
paul@34 | 499 | else: |
paul@34 | 500 | new_attrs.append((attrname, attrvalue)) |
paul@34 | 501 | HTMLSanitizer.handle_starttag(self, tag, new_attrs) |
paul@34 | 502 | |
paul@34 | 503 | class IncomingMarkup(Markup): |
paul@34 | 504 | |
paul@34 | 505 | "A special markup processor for incoming HTML." |
paul@34 | 506 | |
paul@34 | 507 | def sanitize(self, request, page, message_number): |
paul@37 | 508 | out = getwriter("utf-8")(StringIO()) |
paul@34 | 509 | sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) |
paul@34 | 510 | sanitizer.feed(self.stripentities(keepxmlentities=True)) |
paul@37 | 511 | return IncomingMarkup(unicode(out.getvalue(), "utf-8")) |
paul@34 | 512 | |
paul@34 | 513 | class IncomingHTMLParser: |
paul@34 | 514 | |
paul@34 | 515 | "Filters and rewrites incoming HTML content." |
paul@34 | 516 | |
paul@34 | 517 | def __init__(self, raw, request, **kw): |
paul@34 | 518 | self.raw = raw |
paul@34 | 519 | self.request = request |
paul@34 | 520 | self.message_number = None |
paul@34 | 521 | self.page = None |
paul@34 | 522 | |
paul@34 | 523 | def format(self, formatter, **kw): |
paul@34 | 524 | |
paul@34 | 525 | "Send the text." |
paul@34 | 526 | |
paul@34 | 527 | try: |
paul@34 | 528 | self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) |
paul@34 | 529 | except HTMLParseError, e: |
paul@34 | 530 | self.request.write(formatter.sysmsg(1) + |
paul@34 | 531 | formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, |
paul@34 | 532 | self.raw.splitlines()[e.lineno - 1].strip())) + |
paul@34 | 533 | formatter.sysmsg(0)) |
paul@34 | 534 | |
paul@34 | 535 | class MakeIncomingHTMLParser: |
paul@34 | 536 | |
paul@34 | 537 | "A class that makes parsers configured for messages." |
paul@34 | 538 | |
paul@34 | 539 | def __init__(self, page, message_number): |
paul@34 | 540 | |
paul@34 | 541 | "Initialise with state that is used to configure instantiated parsers." |
paul@34 | 542 | |
paul@34 | 543 | self.message_number = message_number |
paul@34 | 544 | self.page = page |
paul@34 | 545 | |
paul@34 | 546 | def __call__(self, *args, **kw): |
paul@34 | 547 | parser = IncomingHTMLParser(*args, **kw) |
paul@34 | 548 | parser.message_number = self.message_number |
paul@34 | 549 | parser.page = self.page |
paul@34 | 550 | return parser |
paul@34 | 551 | |
paul@34 | 552 | def get_make_parser(page, message_number): |
paul@34 | 553 | |
paul@34 | 554 | """ |
paul@34 | 555 | Return a callable that will return a parser configured for the message from |
paul@34 | 556 | the given 'page' with the given 'message_number'. |
paul@34 | 557 | """ |
paul@34 | 558 | |
paul@34 | 559 | return MakeIncomingHTMLParser(page, message_number) |
paul@34 | 560 | |
paul@0 | 561 | # vim: tabstop=4 expandtab shiftwidth=4 |