1 # -*- coding: iso-8859-1 -*- 2 """ 3 MoinMoin - MoinShare library 4 5 @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> 6 @copyright: 2003-2006 Edgewall Software 7 @copyright: 2006 MoinMoin:AlexanderSchremmer 8 @license: GNU GPL (v2 or later), see COPYING.txt for details. 9 """ 10 11 from ContentTypeSupport import getContentPreferences 12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \ 13 getDateTimeFromISO8601, DateTime 14 from MoinSupport import * 15 from MoinRemoteSupport import * 16 from ItemSupport import ItemStore 17 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError 18 from MoinMessageSupport import get_homedir, get_username_for_fingerprint 19 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup 20 from MoinMoin import wikiutil 21 from email.parser import Parser 22 from email.utils import parsedate 23 from codecs import getwriter 24 import xml.dom.pulldom 25 26 try: 27 from cStringIO import StringIO 28 except ImportError: 29 from StringIO import StringIO 30 31 _getFragments = getFragments 32 33 __version__ = "0.1" 34 35 ATOM_NS = "http://www.w3.org/2005/Atom" 36 37 # Utility functions. 38 39 def text(element): 40 nodes = [] 41 for node in element.childNodes: 42 if node.nodeType == node.TEXT_NODE: 43 nodes.append(node.nodeValue) 44 return "".join(nodes) 45 46 def children(element): 47 nodes = [] 48 for node in element.childNodes: 49 nodes.append(node.toxml()) 50 return "".join(nodes) 51 52 def unescape(text): 53 return text.replace("<", "<").replace(">", ">").replace("&", "&") 54 55 def linktext(element, feed_type): 56 if feed_type == "rss": 57 return text(element) 58 else: 59 return element.getAttribute("href") 60 61 def need_content(show_content, tagname): 62 return show_content in ("content", "description") and tagname in ("content", "description") 63 64 # More Moin 1.9 compatibility functions. 65 66 def has_member(request, groupname, username): 67 if hasattr(request.dicts, "has_member"): 68 return request.dicts.has_member(groupname, username) 69 else: 70 return username in request.dicts.get(groupname, []) 71 72 # Fragments employ a "moinshare" attribute. 73 74 fragment_attribute = "moinshare" 75 76 def getFragments(s): 77 78 "Return all fragments in 's' having the MoinShare fragment attribute." 79 80 fragments = [] 81 for format, attributes, body in _getFragments(s): 82 if attributes.has_key(fragment_attribute): 83 fragments.append((format, attributes, body)) 84 return fragments 85 86 def getPreferredOutputTypes(request, mimetypes): 87 88 """ 89 Using the 'request', perform content negotiation, obtaining mimetypes common 90 to the fragment (given by 'mimetypes') and the client (found in the Accept 91 header). 92 """ 93 94 accept = getHeader(request, "Accept", "HTTP") 95 if accept: 96 prefs = getContentPreferences(accept) 97 return prefs.get_preferred_types(mimetypes) 98 else: 99 return mimetypes 100 101 def getUpdatedTime(metadata): 102 103 """ 104 Return the last updated time based on the given 'metadata', using the 105 current time if no explicit last modified time is specified. 106 """ 107 108 # NOTE: We could attempt to get the last edit time of a fragment. 109 110 latest_timestamp = metadata.get("last-modified") 111 if latest_timestamp: 112 return latest_timestamp 113 else: 114 return getCurrentTime() 115 116 # Entry/update classes. 117 118 class Update: 119 120 "A feed update entry." 121 122 def __init__(self): 123 self.title = None 124 self.link = None 125 self.content = None 126 self.content_type = None 127 self.updated = None 128 self.author = None 129 130 # Page-related attributes. 131 132 self.fragment = None 133 self.preferred = None 134 135 # Message-related attributes. 136 137 self.message_number = None 138 self.parts = None 139 140 # Message- and page-related attributes. 141 142 self.page = None 143 144 # Identification. 145 146 self.path = [] 147 148 def unique_id(self): 149 return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) 150 151 def __cmp__(self, other): 152 if self.updated is None and other.updated is not None: 153 return 1 154 elif self.updated is not None and other.updated is None: 155 return -1 156 else: 157 return cmp(self.updated, other.updated) 158 159 def copy(self, part_number=None): 160 update = Update() 161 update.title = self.title 162 update.link = self.link 163 update.updated = self.updated 164 update.author = self.author 165 update.fragment = self.fragment 166 update.preferred = self.preferred 167 update.message_number = self.message_number 168 update.page = self.page 169 update.path = self.path[:] 170 if part_number is not None: 171 update.path.append(part_number) 172 return update 173 174 # Error classes. 175 176 class FeedError(Exception): 177 pass 178 179 class FeedMissingError(FeedError): 180 pass 181 182 class FeedContentTypeError(FeedError): 183 pass 184 185 # Feed retrieval from URLs. 186 187 def getUpdates(request, feed_url, max_entries, show_content): 188 189 """ 190 Using the given 'request', retrieve from 'feed_url' up to the given number 191 'max_entries' of update entries. The 'show_content' parameter can indicate 192 that a "summary" is to be obtained for each update, that the "content" of 193 each update is to be obtained (falling back to a summary if no content is 194 provided), or no content (indicated by a false value) is to be obtained. 195 196 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 197 returned. 198 """ 199 200 feed_updates = [] 201 202 # Obtain the resource, using a cached version if appropriate. 203 204 max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300")) 205 data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age) 206 if not data: 207 raise FeedMissingError 208 209 # Interpret the cached feed. 210 211 feed = StringIO(data) 212 _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed) 213 214 if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"): 215 raise FeedContentTypeError 216 217 try: 218 # Parse each node from the feed. 219 220 channel_title = channel_link = None 221 222 feed_type = None 223 update = None 224 in_source = False 225 226 events = xml.dom.pulldom.parse(feed) 227 228 for event, value in events: 229 230 if not in_source and event == xml.dom.pulldom.START_ELEMENT: 231 tagname = value.localName 232 233 # Detect the feed type and items. 234 235 if tagname == "feed" and value.namespaceURI == ATOM_NS: 236 feed_type = "atom" 237 238 elif tagname == "rss": 239 feed_type = "rss" 240 241 # Detect items. 242 243 elif feed_type == "rss" and tagname == "item" or \ 244 feed_type == "atom" and tagname == "entry": 245 246 update = Update() 247 248 # Detect source declarations. 249 250 elif feed_type == "atom" and tagname == "source": 251 in_source = True 252 253 # Handle item elements. 254 255 elif tagname == "title": 256 events.expandNode(value) 257 if update: 258 update.title = text(value) 259 else: 260 channel_title = text(value) 261 262 elif tagname == "link": 263 events.expandNode(value) 264 if update: 265 update.link = linktext(value, feed_type) 266 else: 267 channel_link = linktext(value, feed_type) 268 269 elif show_content and ( 270 feed_type == "atom" and tagname in ("content", "summary") or 271 feed_type == "rss" and tagname == "description"): 272 273 events.expandNode(value) 274 275 # Obtain content where requested or, failing that, a 276 # summary. 277 278 if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): 279 if feed_type == "atom": 280 update.content_type = value.getAttribute("type") or "text" 281 282 # Normalise the content types and extract the 283 # content. 284 285 if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): 286 update.content = children(value) 287 update.content_type = "application/xhtml+xml" 288 elif update.content_type in ("html", "text/html"): 289 update.content = text(value) 290 update.content_type = "text/html" 291 else: 292 update.content = text(value) 293 update.content_type = "text/plain" 294 else: 295 update.content_type = "text/html" 296 update.content = text(value) 297 298 elif feed_type == "atom" and tagname == "updated" or \ 299 feed_type == "rss" and tagname == "pubDate": 300 301 events.expandNode(value) 302 303 if update: 304 if feed_type == "atom": 305 value = getDateTimeFromISO8601(text(value)) 306 else: 307 value = DateTime(parsedate(text(value))) 308 update.updated = value 309 310 elif event == xml.dom.pulldom.END_ELEMENT: 311 tagname = value.localName 312 313 if feed_type == "rss" and tagname == "item" or \ 314 feed_type == "atom" and tagname == "entry": 315 316 feed_updates.append(update) 317 318 update = None 319 320 elif feed_type == "atom" and tagname == "source": 321 in_source = False 322 323 finally: 324 feed.close() 325 326 return (feed_type, channel_title, channel_link), feed_updates 327 328 # Update retrieval from pages. 329 330 def getUpdatesFromPage(page, request): 331 332 """ 333 Get updates from the given 'page' using the 'request'. A list of update 334 objects is returned. 335 """ 336 337 updates = [] 338 339 # NOTE: Use the updated datetime from the page for updates. 340 # NOTE: The published and updated details would need to be deduced from 341 # NOTE: the page history instead of being taken from the page as a whole. 342 343 metadata = getMetadata(page) 344 updated = getUpdatedTime(metadata) 345 346 # Get the fragment regions for the page. 347 348 for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): 349 350 update = Update() 351 352 # Produce a fragment identifier. 353 # NOTE: Choose a more robust identifier where none is explicitly given. 354 355 update.fragment = attributes.get("fragment", str(n)) 356 update.title = attributes.get("summary", "Update #%d" % n) 357 358 # Get the preferred content types available for the fragment. 359 360 update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) 361 362 # Try and obtain some suitable content for the entry. 363 # NOTE: Could potentially get a summary for the fragment. 364 365 update.content = None 366 367 if "text/html" in update.preferred: 368 parser_cls = getParserClass(request, format) 369 370 if format == "html": 371 update.content = body 372 elif hasattr(parser_cls, "formatForOutputType"): 373 update.content = formatTextForOutputType(body, request, parser_cls, "text/html") 374 else: 375 fmt = request.html_formatter 376 fmt.setPage(page) 377 update.content = formatText(body, request, fmt, parser_cls) 378 379 update.content_type = "text/html" 380 381 update.page = page 382 383 # NOTE: The anchor would be supported in the page, but this requires 384 # NOTE: formatter modifications for the regions providing updates. 385 386 update.link = page.url(request, anchor=update.fragment) 387 update.updated = updated 388 389 updates.append(update) 390 391 return updates 392 393 # Update retrieval from message stores. 394 395 def getUpdatesFromStore(page, request): 396 397 """ 398 Get updates from the message store associated with the given 'page' using 399 the 'request'. A list of update objects is returned. 400 """ 401 402 updates = [] 403 404 metadata = getMetadata(page) 405 updated = getUpdatedTime(metadata) 406 407 store = ItemStore(page, "messages", "message-locks") 408 409 keys = store.keys() 410 keys.sort() 411 412 for key in keys: 413 message_text = store[key] 414 update = getUpdateFromMessageText(message_text, key, request) 415 update.page = page 416 updates.append(update) 417 418 return updates 419 420 def getUpdateFromMessageText(message_text, message_number, request): 421 422 "Return an update for the given 'message_text' and 'message_number'." 423 424 update = Update() 425 message = Parser().parsestr(message_text) 426 427 # Produce a fragment identifier. 428 429 update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) 430 update.title = message.get("subject", "Update #%d" % message_number) 431 update.author = message.get("moin-user") 432 433 update.message_number = message_number 434 435 update.content, update.content_type, update.parts, actual_author = \ 436 getUpdateContentFromPart(message, request) 437 438 if actual_author: 439 update.author = actual_author 440 441 return update 442 443 def getUpdateContentFromPart(part, request): 444 445 """ 446 Return decoded content, the content type, any subparts, and any author 447 identity in a tuple for a given 'part'. 448 """ 449 450 # Determine whether the part has several representations. 451 452 # For a single part, use it as the update content. 453 454 if not part.is_multipart(): 455 content, content_type = getPartContent(part) 456 return content, content_type, None, None 457 458 # For a collection of related parts, use the first as the update content 459 # and assume that the formatter will reference the other parts. 460 461 elif part.get_content_subtype() == "related": 462 main_part = part.get_payload()[0] 463 content, content_type = getPartContent(main_part) 464 return content, content_type, [main_part], None 465 466 # Encrypted content cannot be meaningfully separated. 467 468 elif part.get_content_subtype() == "encrypted": 469 try: 470 part, author = getDecryptedParts(part, request) 471 content, content_type, parts, _author = getUpdateContentFromPart(part, request) 472 return content, content_type, parts, author 473 except MoinMessageError: 474 return None, part.get_content_type(), part.get_payload(), None 475 476 # Otherwise, just obtain the parts for separate display. 477 478 else: 479 return None, part.get_content_type(), part.get_payload(), None 480 481 def getDecryptedParts(part, request): 482 483 "Decrypt the given 'part', returning the decoded content." 484 485 homedir = get_homedir(request) 486 gpg = GPG(homedir) 487 488 # Decrypt the part. 489 490 if is_encrypted(part): 491 text = gpg.decryptMessage(part) 492 part = Parser().parsestr(text) 493 494 # Extract any signature details. 495 496 if is_signed(part): 497 result = gpg.verifyMessage(part) 498 if result: 499 fingerprint, identity, content = result 500 return content, get_username_for_fingerprint(request, fingerprint) 501 502 return part, None 503 504 def getPartContent(part): 505 506 "Decode the 'part', returning the decoded payload and the content type." 507 508 charset = part.get_content_charset() 509 payload = part.get_payload(decode=True) 510 return (charset and unicode(payload, charset) or payload), part.get_content_type() 511 512 def getUpdateFromPart(parent, part, part_number, request): 513 514 "Using the 'parent' update, return an update object for the given 'part'." 515 516 update = parent.copy(part_number) 517 update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request) 518 return update 519 520 def getUpdatesForFormatting(update, request): 521 522 "Get a list of updates for formatting given 'update'." 523 524 updates = [] 525 526 # Handle multipart/alternative and other non-related multiparts. 527 528 if update.parts: 529 for n, part in enumerate(update.parts): 530 update_part = getUpdateFromPart(update, part, n, request) 531 updates += getUpdatesForFormatting(update_part, request) 532 else: 533 updates.append(update) 534 535 return updates 536 537 # Update formatting. 538 539 def getFormattedUpdate(update, request, fmt): 540 541 """ 542 Return the formatted form of the given 'update' using the given 'request' 543 and 'fmt'. 544 """ 545 546 # NOTE: Some control over the HTML and XHTML should be exercised. 547 548 if update.content: 549 if update.content_type == "text/html" and update.message_number is not None: 550 parsers = [get_make_parser(update.page, update.message_number)] 551 else: 552 parsers = getParsersForContentType(request.cfg, update.content_type) 553 554 if parsers: 555 for parser_cls in parsers: 556 if hasattr(parser_cls, "formatForOutputType"): 557 return formatTextForOutputType(update.content, request, parser_cls, "text/html") 558 else: 559 return formatText(update.content, request, fmt, parser_cls=parser_cls) 560 break 561 else: 562 return None 563 else: 564 return None 565 566 def formatUpdate(update, request, fmt): 567 568 "Format the given 'update' using the given 'request' and 'fmt'." 569 570 result = [] 571 append = result.append 572 573 updates = getUpdatesForFormatting(update, request) 574 single = len(updates) == 1 575 576 # Format some navigation tabs. 577 578 if not single: 579 append(fmt.div(on=1, css_class="moinshare-alternatives")) 580 581 first = True 582 583 for update_part in updates: 584 append(fmt.url(1, "#%s" % update_part.unique_id())) 585 append(fmt.text(update_part.content_type)) 586 append(fmt.url(0)) 587 588 first = False 589 590 append(fmt.div(on=0)) 591 592 # Format the content. 593 594 first = True 595 596 for update_part in updates: 597 598 # Encapsulate each alternative if many exist. 599 600 if not single: 601 css_class = first and "moinshare-default" or "moinshare-other" 602 append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id())) 603 604 # Include the content. 605 606 append(formatUpdatePart(update_part, request, fmt)) 607 608 if not single: 609 append(fmt.div(on=0)) 610 611 first = False 612 613 return "".join(result) 614 615 def formatUpdatePart(update, request, fmt): 616 617 "Format the given 'update' using the given 'request' and 'fmt'." 618 619 _ = request.getText 620 621 result = [] 622 append = result.append 623 624 # Encapsulate the content. 625 626 append(fmt.div(on=1, css_class="moinshare-content")) 627 text = getFormattedUpdate(update, request, fmt) 628 if text: 629 append(text) 630 else: 631 append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type)) 632 append(fmt.div(on=0)) 633 634 return "".join(result) 635 636 # Source management. 637 638 def getUpdateSources(pagename, request): 639 640 "Return the update sources from the given 'pagename' using the 'request'." 641 642 sources = {} 643 644 source_definitions = getWikiDict(pagename, request) 645 646 if source_definitions: 647 for name, value in source_definitions.items(): 648 sources[name] = getSourceParameters(value) 649 650 return sources 651 652 def getSourceParameters(source_definition): 653 654 "Return the parameters from the given 'source_definition' string." 655 656 return parseDictEntry(source_definition, ("type", "location")) 657 658 # HTML parsing support. 659 660 class IncomingHTMLSanitizer(HTMLSanitizer): 661 662 "An HTML parser that rewrites references to attachments." 663 664 def __init__(self, out, request, page, message_number): 665 HTMLSanitizer.__init__(self, out) 666 self.request = request 667 self.message_number = message_number 668 self.page = page 669 670 def rewrite_reference(self, ref): 671 if ref.startswith("cid:"): 672 part = ref[len("cid:"):] 673 action_link = self.page.url(self.request, { 674 "action" : "ReadMessage", "doit" : "1", 675 "message" : self.message_number, "part" : part 676 }) 677 return action_link 678 else: 679 return ref 680 681 def handle_starttag(self, tag, attrs): 682 new_attrs = [] 683 for attrname, attrvalue in attrs: 684 if attrname in self.uri_attrs: 685 new_attrs.append((attrname, self.rewrite_reference(attrvalue))) 686 else: 687 new_attrs.append((attrname, attrvalue)) 688 HTMLSanitizer.handle_starttag(self, tag, new_attrs) 689 690 class IncomingMarkup(Markup): 691 692 "A special markup processor for incoming HTML." 693 694 def sanitize(self, request, page, message_number): 695 out = getwriter("utf-8")(StringIO()) 696 sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) 697 sanitizer.feed(self.stripentities(keepxmlentities=True)) 698 return IncomingMarkup(unicode(out.getvalue(), "utf-8")) 699 700 class IncomingHTMLParser: 701 702 "Filters and rewrites incoming HTML content." 703 704 def __init__(self, raw, request, **kw): 705 self.raw = raw 706 self.request = request 707 self.message_number = None 708 self.page = None 709 710 def format(self, formatter, **kw): 711 712 "Send the text." 713 714 try: 715 self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) 716 except HTMLParseError, e: 717 self.request.write(formatter.sysmsg(1) + 718 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, 719 self.raw.splitlines()[e.lineno - 1].strip())) + 720 formatter.sysmsg(0)) 721 722 class MakeIncomingHTMLParser: 723 724 "A class that makes parsers configured for messages." 725 726 def __init__(self, page, message_number): 727 728 "Initialise with state that is used to configure instantiated parsers." 729 730 self.message_number = message_number 731 self.page = page 732 733 def __call__(self, *args, **kw): 734 parser = IncomingHTMLParser(*args, **kw) 735 parser.message_number = self.message_number 736 parser.page = self.page 737 return parser 738 739 def get_make_parser(page, message_number): 740 741 """ 742 Return a callable that will return a parser configured for the message from 743 the given 'page' with the given 'message_number'. 744 """ 745 746 return MakeIncomingHTMLParser(page, message_number) 747 748 # vim: tabstop=4 expandtab shiftwidth=4