1 # -*- coding: iso-8859-1 -*- 2 """ 3 MoinMoin - MoinShare library 4 5 @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> 6 @copyright: 2003-2006 Edgewall Software 7 @copyright: 2006 MoinMoin:AlexanderSchremmer 8 @license: GNU GPL (v2 or later), see COPYING.txt for details. 9 """ 10 11 from ContentTypeSupport import getContentPreferences 12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822 13 from MoinSupport import * 14 from ItemSupport import ItemStore 15 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError 16 from MoinMessageSupport import get_homedir, get_username_for_fingerprint 17 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup 18 from MoinMoin import wikiutil 19 from email.parser import Parser 20 from codecs import getwriter 21 22 try: 23 from cStringIO import StringIO 24 except ImportError: 25 from StringIO import StringIO 26 27 _getFragments = getFragments 28 29 __version__ = "0.1" 30 31 # More Moin 1.9 compatibility functions. 32 33 def has_member(request, groupname, username): 34 if hasattr(request.dicts, "has_member"): 35 return request.dicts.has_member(groupname, username) 36 else: 37 return username in request.dicts.get(groupname, []) 38 39 # Fragments employ a "moinshare" attribute. 40 41 fragment_attribute = "moinshare" 42 43 def getFragments(s): 44 45 "Return all fragments in 's' having the MoinShare fragment attribute." 46 47 fragments = [] 48 for format, attributes, body in _getFragments(s): 49 if attributes.has_key(fragment_attribute): 50 fragments.append((format, attributes, body)) 51 return fragments 52 53 def getPreferredOutputTypes(request, mimetypes): 54 55 """ 56 Using the 'request', perform content negotiation, obtaining mimetypes common 57 to the fragment (given by 'mimetypes') and the client (found in the Accept 58 header). 59 """ 60 61 accept = getHeader(request, "Accept", "HTTP") 62 if accept: 63 prefs = getContentPreferences(accept) 64 return prefs.get_preferred_types(mimetypes) 65 else: 66 return mimetypes 67 68 def getUpdatedTime(metadata): 69 70 """ 71 Return the last updated time based on the given 'metadata', using the 72 current time if no explicit last modified time is specified. 73 """ 74 75 # NOTE: We could attempt to get the last edit time of a fragment. 76 77 latest_timestamp = metadata.get("last-modified") 78 if latest_timestamp: 79 return latest_timestamp 80 else: 81 return getCurrentTime() 82 83 # Entry/update classes. 84 85 class Update: 86 87 "A feed update entry." 88 89 def __init__(self): 90 self.title = None 91 self.link = None 92 self.content = None 93 self.content_type = None 94 self.updated = None 95 self.author = None 96 97 # Page-related attributes. 98 99 self.fragment = None 100 self.preferred = None 101 102 # Message-related attributes. 103 104 self.message_number = None 105 self.parts = None 106 107 # Message- and page-related attributes. 108 109 self.page = None 110 111 # Identification. 112 113 self.path = [] 114 115 def unique_id(self): 116 return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) 117 118 def __cmp__(self, other): 119 if self.updated is None and other.updated is not None: 120 return 1 121 elif self.updated is not None and other.updated is None: 122 return -1 123 else: 124 return cmp(self.updated, other.updated) 125 126 def copy(self, part_number=None): 127 update = Update() 128 update.title = self.title 129 update.link = self.link 130 update.updated = self.updated 131 update.author = self.author 132 update.fragment = self.fragment 133 update.preferred = self.preferred 134 update.message_number = self.message_number 135 update.page = self.page 136 update.path = self.path[:] 137 if part_number is not None: 138 update.path.append(part_number) 139 return update 140 141 # Update retrieval from pages. 142 143 def getUpdatesFromPage(page, request): 144 145 """ 146 Get updates from the given 'page' using the 'request'. A list of update 147 objects is returned. 148 """ 149 150 updates = [] 151 152 # NOTE: Use the updated datetime from the page for updates. 153 # NOTE: The published and updated details would need to be deduced from 154 # NOTE: the page history instead of being taken from the page as a whole. 155 156 metadata = getMetadata(page) 157 updated = getUpdatedTime(metadata) 158 159 # Get the fragment regions for the page. 160 161 for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): 162 163 update = Update() 164 165 # Produce a fragment identifier. 166 # NOTE: Choose a more robust identifier where none is explicitly given. 167 168 update.fragment = attributes.get("fragment", str(n)) 169 update.title = attributes.get("summary", "Update #%d" % n) 170 171 # Get the preferred content types available for the fragment. 172 173 update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) 174 175 # Try and obtain some suitable content for the entry. 176 # NOTE: Could potentially get a summary for the fragment. 177 178 update.content = None 179 180 if "text/html" in update.preferred: 181 parser_cls = getParserClass(request, format) 182 183 if format == "html": 184 update.content = body 185 elif hasattr(parser_cls, "formatForOutputType"): 186 update.content = formatTextForOutputType(body, request, parser_cls, "text/html") 187 else: 188 fmt = request.html_formatter 189 fmt.setPage(page) 190 update.content = formatText(body, request, fmt, parser_cls) 191 192 update.content_type = "text/html" 193 194 update.page = page 195 196 # NOTE: The anchor would be supported in the page, but this requires 197 # NOTE: formatter modifications for the regions providing updates. 198 199 update.link = page.url(request, anchor=update.fragment) 200 update.updated = updated 201 202 updates.append(update) 203 204 return updates 205 206 # Update retrieval from message stores. 207 208 def getUpdatesFromStore(page, request): 209 210 """ 211 Get updates from the message store associated with the given 'page' using 212 the 'request'. A list of update objects is returned. 213 """ 214 215 updates = [] 216 217 metadata = getMetadata(page) 218 updated = getUpdatedTime(metadata) 219 220 store = ItemStore(page, "messages", "message-locks") 221 222 keys = store.keys() 223 keys.sort() 224 225 for key in keys: 226 message_text = store[key] 227 update = getUpdateFromMessageText(message_text, key, request) 228 update.page = page 229 updates.append(update) 230 231 return updates 232 233 def getUpdateFromMessageText(message_text, message_number, request): 234 235 "Return an update for the given 'message_text' and 'message_number'." 236 237 update = Update() 238 message = Parser().parsestr(message_text) 239 240 # Produce a fragment identifier. 241 242 update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) 243 update.title = message.get("subject", "Update #%d" % message_number) 244 update.author = message.get("moin-user") 245 246 update.message_number = message_number 247 248 update.content, update.content_type, update.parts, actual_author = \ 249 getUpdateContentFromPart(message, request) 250 251 if actual_author: 252 update.author = actual_author 253 254 return update 255 256 def getUpdateContentFromPart(part, request): 257 258 """ 259 Return decoded content, the content type, any subparts, and any author 260 identity in a tuple for a given 'part'. 261 """ 262 263 # Determine whether the part has several representations. 264 265 # For a single part, use it as the update content. 266 267 if not part.is_multipart(): 268 content, content_type = getPartContent(part) 269 return content, content_type, None, None 270 271 # For a collection of related parts, use the first as the update content 272 # and assume that the formatter will reference the other parts. 273 274 elif part.get_content_subtype() == "related": 275 main_part = part.get_payload()[0] 276 content, content_type = getPartContent(main_part) 277 return content, content_type, [main_part], None 278 279 # Encrypted content cannot be meaningfully separated. 280 281 elif part.get_content_subtype() == "encrypted": 282 try: 283 part, author = getDecryptedParts(part, request) 284 content, content_type, parts, _author = getUpdateContentFromPart(part, request) 285 return content, content_type, parts, author 286 except MoinMessageError: 287 return None, part.get_content_type(), part.get_payload(), None 288 289 # Otherwise, just obtain the parts for separate display. 290 291 else: 292 return None, part.get_content_type(), part.get_payload(), None 293 294 def getDecryptedParts(part, request): 295 296 "Decrypt the given 'part', returning the decoded content." 297 298 homedir = get_homedir(request) 299 gpg = GPG(homedir) 300 301 # Decrypt the part. 302 303 if is_encrypted(part): 304 text = gpg.decryptMessage(part) 305 part = Parser().parsestr(text) 306 307 # Extract any signature details. 308 309 if is_signed(part): 310 result = gpg.verifyMessage(part) 311 if result: 312 fingerprint, identity, content = result 313 return content, get_username_for_fingerprint(request, fingerprint) 314 315 return part, None 316 317 def getPartContent(part): 318 319 "Decode the 'part', returning the decoded payload and the content type." 320 321 charset = part.get_content_charset() 322 payload = part.get_payload(decode=True) 323 return (charset and unicode(payload, charset) or payload), part.get_content_type() 324 325 def getUpdateFromPart(parent, part, part_number, request): 326 327 "Using the 'parent' update, return an update object for the given 'part'." 328 329 update = parent.copy(part_number) 330 update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request) 331 return update 332 333 def getUpdatesForFormatting(update, request): 334 335 "Get a list of updates for formatting given 'update'." 336 337 updates = [] 338 339 # Handle multipart/alternative and other non-related multiparts. 340 341 if update.parts: 342 for n, part in enumerate(update.parts): 343 update_part = getUpdateFromPart(update, part, n, request) 344 updates += getUpdatesForFormatting(update_part, request) 345 else: 346 updates.append(update) 347 348 return updates 349 350 # Update formatting. 351 352 def getFormattedUpdate(update, request, fmt): 353 354 """ 355 Return the formatted form of the given 'update' using the given 'request' 356 and 'fmt'. 357 """ 358 359 # NOTE: Some control over the HTML and XHTML should be exercised. 360 361 if update.content: 362 if update.content_type == "text/html" and update.message_number is not None: 363 parsers = [get_make_parser(update.page, update.message_number)] 364 else: 365 parsers = getParsersForContentType(request.cfg, update.content_type) 366 367 if parsers: 368 for parser_cls in parsers: 369 if hasattr(parser_cls, "formatForOutputType"): 370 return formatTextForOutputType(update.content, request, parser_cls, "text/html") 371 else: 372 return formatText(update.content, request, fmt, parser_cls=parser_cls) 373 break 374 else: 375 return None 376 else: 377 return None 378 379 def formatUpdate(update, request, fmt): 380 381 "Format the given 'update' using the given 'request' and 'fmt'." 382 383 result = [] 384 append = result.append 385 386 updates = getUpdatesForFormatting(update, request) 387 single = len(updates) == 1 388 389 # Format some navigation tabs. 390 391 if not single: 392 append(fmt.div(on=1, css_class="moinshare-alternatives")) 393 394 first = True 395 396 for update_part in updates: 397 append(fmt.url(1, "#%s" % update_part.unique_id())) 398 append(fmt.text(update_part.content_type)) 399 append(fmt.url(0)) 400 401 first = False 402 403 append(fmt.div(on=0)) 404 405 # Format the content. 406 407 first = True 408 409 for update_part in updates: 410 411 # Encapsulate each alternative if many exist. 412 413 if not single: 414 css_class = first and "moinshare-default" or "moinshare-other" 415 append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id())) 416 417 # Include the content. 418 419 append(formatUpdatePart(update_part, request, fmt)) 420 421 if not single: 422 append(fmt.div(on=0)) 423 424 first = False 425 426 return "".join(result) 427 428 def formatUpdatePart(update, request, fmt): 429 430 "Format the given 'update' using the given 'request' and 'fmt'." 431 432 _ = request.getText 433 434 result = [] 435 append = result.append 436 437 # Encapsulate the content. 438 439 append(fmt.div(on=1, css_class="moinshare-content")) 440 text = getFormattedUpdate(update, request, fmt) 441 if text: 442 append(text) 443 else: 444 append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type)) 445 append(fmt.div(on=0)) 446 447 return "".join(result) 448 449 # Source management. 450 451 def getUpdateSources(pagename, request): 452 453 "Return the update sources from the given 'pagename' using the 'request'." 454 455 sources = {} 456 457 source_definitions = getWikiDict(pagename, request) 458 459 if source_definitions: 460 for name, value in source_definitions.items(): 461 sources[name] = getSourceParameters(value) 462 463 return sources 464 465 def getSourceParameters(source_definition): 466 467 "Return the parameters from the given 'source_definition' string." 468 469 return parseDictEntry(source_definition, ("type", "location")) 470 471 # HTML parsing support. 472 473 class IncomingHTMLSanitizer(HTMLSanitizer): 474 475 "An HTML parser that rewrites references to attachments." 476 477 def __init__(self, out, request, page, message_number): 478 HTMLSanitizer.__init__(self, out) 479 self.request = request 480 self.message_number = message_number 481 self.page = page 482 483 def rewrite_reference(self, ref): 484 if ref.startswith("cid:"): 485 part = ref[len("cid:"):] 486 action_link = self.page.url(self.request, { 487 "action" : "ReadMessage", "doit" : "1", 488 "message" : self.message_number, "part" : part 489 }) 490 return action_link 491 else: 492 return ref 493 494 def handle_starttag(self, tag, attrs): 495 new_attrs = [] 496 for attrname, attrvalue in attrs: 497 if attrname in self.uri_attrs: 498 new_attrs.append((attrname, self.rewrite_reference(attrvalue))) 499 else: 500 new_attrs.append((attrname, attrvalue)) 501 HTMLSanitizer.handle_starttag(self, tag, new_attrs) 502 503 class IncomingMarkup(Markup): 504 505 "A special markup processor for incoming HTML." 506 507 def sanitize(self, request, page, message_number): 508 out = getwriter("utf-8")(StringIO()) 509 sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) 510 sanitizer.feed(self.stripentities(keepxmlentities=True)) 511 return IncomingMarkup(unicode(out.getvalue(), "utf-8")) 512 513 class IncomingHTMLParser: 514 515 "Filters and rewrites incoming HTML content." 516 517 def __init__(self, raw, request, **kw): 518 self.raw = raw 519 self.request = request 520 self.message_number = None 521 self.page = None 522 523 def format(self, formatter, **kw): 524 525 "Send the text." 526 527 try: 528 self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) 529 except HTMLParseError, e: 530 self.request.write(formatter.sysmsg(1) + 531 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, 532 self.raw.splitlines()[e.lineno - 1].strip())) + 533 formatter.sysmsg(0)) 534 535 class MakeIncomingHTMLParser: 536 537 "A class that makes parsers configured for messages." 538 539 def __init__(self, page, message_number): 540 541 "Initialise with state that is used to configure instantiated parsers." 542 543 self.message_number = message_number 544 self.page = page 545 546 def __call__(self, *args, **kw): 547 parser = IncomingHTMLParser(*args, **kw) 548 parser.message_number = self.message_number 549 parser.page = self.page 550 return parser 551 552 def get_make_parser(page, message_number): 553 554 """ 555 Return a callable that will return a parser configured for the message from 556 the given 'page' with the given 'message_number'. 557 """ 558 559 return MakeIncomingHTMLParser(page, message_number) 560 561 # vim: tabstop=4 expandtab shiftwidth=4