1 # -*- coding: iso-8859-1 -*- 2 """ 3 MoinMoin - MoinShare library 4 5 @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> 6 @copyright: 2003-2006 Edgewall Software 7 @copyright: 2006 MoinMoin:AlexanderSchremmer 8 @license: GNU GPL (v2 or later), see COPYING.txt for details. 9 """ 10 11 from ContentTypeSupport import getContentPreferences 12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \ 13 getDateTimeFromISO8601, DateTime 14 from MoinSupport import * 15 from MoinRemoteSupport import * 16 from ItemSupport import ItemStore 17 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError 18 from MoinMessageSupport import get_homedir, get_username_for_fingerprint 19 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup 20 from MoinMoin import wikiutil 21 from email.parser import Parser 22 from email.utils import parsedate 23 from codecs import getwriter 24 import xml.dom.pulldom 25 26 try: 27 from cStringIO import StringIO 28 except ImportError: 29 from StringIO import StringIO 30 31 _getFragments = getFragments 32 33 __version__ = "0.1" 34 35 ATOM_NS = "http://www.w3.org/2005/Atom" 36 37 # Utility functions. 38 39 def text(element): 40 nodes = [] 41 for node in element.childNodes: 42 if node.nodeType == node.TEXT_NODE: 43 nodes.append(node.nodeValue) 44 return "".join(nodes) 45 46 def children(element): 47 nodes = [] 48 for node in element.childNodes: 49 nodes.append(node.toxml()) 50 return "".join(nodes) 51 52 def unescape(text): 53 return text.replace("<", "<").replace(">", ">").replace("&", "&") 54 55 def linktext(element, feed_type): 56 if feed_type == "rss": 57 return text(element) 58 else: 59 return element.getAttribute("href") 60 61 def need_content(show_content, tagname): 62 return show_content in ("content", "description") and tagname in ("content", "description") 63 64 # More Moin 1.9 compatibility functions. 65 66 def has_member(request, groupname, username): 67 if hasattr(request.dicts, "has_member"): 68 return request.dicts.has_member(groupname, username) 69 else: 70 return username in request.dicts.get(groupname, []) 71 72 # Fragments employ a "moinshare" attribute. 73 74 fragment_attribute = "moinshare" 75 76 def getFragments(s): 77 78 "Return all fragments in 's' having the MoinShare fragment attribute." 79 80 fragments = [] 81 for format, attributes, body in _getFragments(s): 82 if attributes.has_key(fragment_attribute): 83 fragments.append((format, attributes, body)) 84 return fragments 85 86 def getPreferredOutputTypes(request, mimetypes): 87 88 """ 89 Using the 'request', perform content negotiation, obtaining mimetypes common 90 to the fragment (given by 'mimetypes') and the client (found in the Accept 91 header). 92 """ 93 94 accept = getHeader(request, "Accept", "HTTP") 95 if accept: 96 prefs = getContentPreferences(accept) 97 return prefs.get_preferred_types(mimetypes) 98 else: 99 return mimetypes 100 101 def getUpdatedTime(metadata): 102 103 """ 104 Return the last updated time based on the given 'metadata', using the 105 current time if no explicit last modified time is specified. 106 """ 107 108 # NOTE: We could attempt to get the last edit time of a fragment. 109 110 latest_timestamp = metadata.get("last-modified") 111 if latest_timestamp: 112 return latest_timestamp 113 else: 114 return getCurrentTime() 115 116 # Entry/update classes. 117 118 class Update: 119 120 "A feed update entry." 121 122 def __init__(self): 123 self.title = None 124 self.link = None 125 self.content = None 126 self.content_type = None 127 self.updated = None 128 self.author = None 129 130 # Page-related attributes. 131 132 self.fragment = None 133 self.preferred = None 134 135 # Message-related attributes. 136 137 self.message_number = None 138 self.parts = None 139 140 # Message- and page-related attributes. 141 142 self.page = None 143 144 # Identification. 145 146 self.path = [] 147 148 def unique_id(self): 149 return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) 150 151 def __cmp__(self, other): 152 if self.updated is None and other.updated is not None: 153 return 1 154 elif self.updated is not None and other.updated is None: 155 return -1 156 else: 157 return cmp(self.updated, other.updated) 158 159 def copy(self, part_number=None): 160 update = Update() 161 update.title = self.title 162 update.link = self.link 163 update.updated = self.updated 164 update.author = self.author 165 update.fragment = self.fragment 166 update.preferred = self.preferred 167 update.message_number = self.message_number 168 update.page = self.page 169 update.path = self.path[:] 170 if part_number is not None: 171 update.path.append(part_number) 172 return update 173 174 # Error classes. 175 176 class FeedError(Exception): 177 pass 178 179 class FeedMissingError(FeedError): 180 pass 181 182 class FeedContentTypeError(FeedError): 183 pass 184 185 # Update retrieval from URLs. 186 187 def getUpdates(request, feed_url, max_entries, show_content): 188 189 """ 190 Using the given 'request', retrieve from 'feed_url' up to the given number 191 'max_entries' of update entries. The 'show_content' parameter can indicate 192 that a "summary" is to be obtained for each update, that the "content" of 193 each update is to be obtained (falling back to a summary if no content is 194 provided), or no content (indicated by a false value) is to be obtained. 195 196 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 197 returned. 198 """ 199 200 # Prevent local file access. 201 202 if feed_url.startswith("file:"): 203 raise FeedMissingError 204 205 elif feed_url.startswith("imap"): 206 reader = imapreader 207 208 else: 209 reader = None 210 211 # Obtain the resource, using a cached version if appropriate. 212 213 max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300")) 214 data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader) 215 if not data: 216 raise FeedMissingError 217 218 # Interpret the cached feed. 219 220 f = StringIO(data) 221 try: 222 _url, content_type, _encoding, _metadata = getCachedResourceMetadata(f) 223 224 if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"): 225 return getUpdatesFromFeed(f, max_entries, show_content) 226 227 elif content_type == "multipart/mixed": 228 return getUpdatesFromMailbox(f, max_entries, show_content, request) 229 230 else: 231 raise FeedContentTypeError 232 233 finally: 234 f.close() 235 236 # Update retrieval from feeds. 237 238 def getUpdatesFromFeed(feed, max_entries, show_content): 239 240 """ 241 Retrieve from 'feed' up to the given number 'max_entries' of update entries. 242 The 'show_content' parameter can indicate that a "summary" is to be obtained 243 for each update, that the "content" of each update is to be obtained 244 (falling back to a summary if no content is provided), or no content 245 (indicated by a false value) is to be obtained. 246 247 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 248 returned. 249 """ 250 251 feed_updates = [] 252 253 # Parse each node from the feed. 254 255 channel_title = channel_link = None 256 257 feed_type = None 258 update = None 259 in_source = False 260 261 events = xml.dom.pulldom.parse(feed) 262 263 for event, value in events: 264 265 if not in_source and event == xml.dom.pulldom.START_ELEMENT: 266 tagname = value.localName 267 268 # Detect the feed type and items. 269 270 if tagname == "feed" and value.namespaceURI == ATOM_NS: 271 feed_type = "atom" 272 273 elif tagname == "rss": 274 feed_type = "rss" 275 276 # Detect items. 277 278 elif feed_type == "rss" and tagname == "item" or \ 279 feed_type == "atom" and tagname == "entry": 280 281 update = Update() 282 283 # Detect source declarations. 284 285 elif feed_type == "atom" and tagname == "source": 286 in_source = True 287 288 # Handle item elements. 289 290 elif tagname == "title": 291 events.expandNode(value) 292 if update: 293 update.title = text(value) 294 else: 295 channel_title = text(value) 296 297 elif tagname == "link": 298 events.expandNode(value) 299 if update: 300 update.link = linktext(value, feed_type) 301 else: 302 channel_link = linktext(value, feed_type) 303 304 elif show_content and ( 305 feed_type == "atom" and tagname in ("content", "summary") or 306 feed_type == "rss" and tagname == "description"): 307 308 events.expandNode(value) 309 310 # Obtain content where requested or, failing that, a 311 # summary. 312 313 if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): 314 if feed_type == "atom": 315 update.content_type = value.getAttribute("type") or "text" 316 317 # Normalise the content types and extract the 318 # content. 319 320 if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): 321 update.content = children(value) 322 update.content_type = "application/xhtml+xml" 323 elif update.content_type in ("html", "text/html"): 324 update.content = text(value) 325 update.content_type = "text/html" 326 else: 327 update.content = text(value) 328 update.content_type = "text/plain" 329 else: 330 update.content_type = "text/html" 331 update.content = text(value) 332 333 elif feed_type == "atom" and tagname == "updated" or \ 334 feed_type == "rss" and tagname == "pubDate": 335 336 events.expandNode(value) 337 338 if update: 339 if feed_type == "atom": 340 value = getDateTimeFromISO8601(text(value)) 341 else: 342 value = DateTime(parsedate(text(value))) 343 update.updated = value 344 345 elif event == xml.dom.pulldom.END_ELEMENT: 346 tagname = value.localName 347 348 if feed_type == "rss" and tagname == "item" or \ 349 feed_type == "atom" and tagname == "entry": 350 351 feed_updates.append(update) 352 353 update = None 354 355 elif feed_type == "atom" and tagname == "source": 356 in_source = False 357 358 return (feed_type, channel_title, channel_link), feed_updates 359 360 # Update retrieval from mailboxes and multipart messages. 361 362 def getUpdatesFromMailbox(feed, max_entries, show_content, request): 363 364 """ 365 Retrieve from 'feed' up to the given number 'max_entries' of update entries. 366 The 'show_content' parameter can indicate that a "summary" is to be obtained 367 for each update, that the "content" of each update is to be obtained 368 (falling back to a summary if no content is provided), or no content 369 (indicated by a false value) is to be obtained. 370 371 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 372 returned. 373 """ 374 375 mailbox = Parser().parse(feed) 376 377 feed_updates = [] 378 379 # Parse each message from the feed as a separate update. 380 381 for message_number, part in enumerate(mailbox.get_payload()): 382 update = Update() 383 update.fragment = update.updated = getDateTimeFromRFC2822(part.get("date")) 384 update.title = part.get("subject", "Update #%d" % message_number) 385 update.message_number = message_number 386 387 update.content, update.content_type, update.parts, actual_author = \ 388 getUpdateContentFromPart(part, request) 389 390 if actual_author: 391 update.author = actual_author 392 393 feed_updates.append(update) 394 395 return ("mbox", None, None), feed_updates 396 397 # Update retrieval from pages. 398 399 def getUpdatesFromPage(page, request): 400 401 """ 402 Get updates from the given 'page' using the 'request'. A list of update 403 objects is returned. 404 """ 405 406 updates = [] 407 408 # NOTE: Use the updated datetime from the page for updates. 409 # NOTE: The published and updated details would need to be deduced from 410 # NOTE: the page history instead of being taken from the page as a whole. 411 412 metadata = getMetadata(page) 413 updated = getUpdatedTime(metadata) 414 415 # Get the fragment regions for the page. 416 417 for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): 418 419 update = Update() 420 421 # Produce a fragment identifier. 422 # NOTE: Choose a more robust identifier where none is explicitly given. 423 424 update.fragment = attributes.get("fragment", str(n)) 425 update.title = attributes.get("summary", "Update #%d" % n) 426 427 # Get the preferred content types available for the fragment. 428 429 update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) 430 431 # Try and obtain some suitable content for the entry. 432 # NOTE: Could potentially get a summary for the fragment. 433 434 update.content = None 435 436 if "text/html" in update.preferred: 437 parser_cls = getParserClass(request, format) 438 439 if format == "html": 440 update.content = body 441 elif hasattr(parser_cls, "formatForOutputType"): 442 update.content = formatTextForOutputType(body, request, parser_cls, "text/html") 443 else: 444 fmt = request.html_formatter 445 fmt.setPage(page) 446 update.content = formatText(body, request, fmt, parser_cls) 447 448 update.content_type = "text/html" 449 450 update.page = page 451 452 # NOTE: The anchor would be supported in the page, but this requires 453 # NOTE: formatter modifications for the regions providing updates. 454 455 update.link = page.url(request, anchor=update.fragment) 456 update.updated = updated 457 458 updates.append(update) 459 460 return updates 461 462 # Update retrieval from message stores. 463 464 def getUpdatesFromStore(page, request): 465 466 """ 467 Get updates from the message store associated with the given 'page' using 468 the 'request'. A list of update objects is returned. 469 """ 470 471 updates = [] 472 473 metadata = getMetadata(page) 474 updated = getUpdatedTime(metadata) 475 476 store = ItemStore(page, "messages", "message-locks") 477 478 keys = store.keys() 479 keys.sort() 480 481 for key in keys: 482 message_text = store[key] 483 update = getUpdateFromMessageText(message_text, key, request) 484 update.page = page 485 updates.append(update) 486 487 return updates 488 489 def getUpdateFromMessageText(message_text, message_number, request): 490 491 "Return an update for the given 'message_text' and 'message_number'." 492 493 update = Update() 494 message = Parser().parsestr(message_text) 495 496 # Produce a fragment identifier. 497 498 update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) 499 update.title = message.get("subject", "Update #%d" % message_number) 500 update.author = message.get("moin-user") 501 502 update.message_number = message_number 503 504 update.content, update.content_type, update.parts, actual_author = \ 505 getUpdateContentFromPart(message, request) 506 507 if actual_author: 508 update.author = actual_author 509 510 return update 511 512 def getUpdateContentFromPart(part, request): 513 514 """ 515 Return decoded content, the content type, any subparts, and any author 516 identity in a tuple for a given 'part'. 517 """ 518 519 # Determine whether the part has several representations. 520 521 # For a single part, use it as the update content. 522 523 if not part.is_multipart(): 524 content, content_type = getPartContent(part) 525 return content, content_type, None, None 526 527 # For a collection of related parts, use the first as the update content 528 # and assume that the formatter will reference the other parts. 529 530 elif part.get_content_subtype() == "related": 531 main_part = part.get_payload()[0] 532 content, content_type = getPartContent(main_part) 533 return content, content_type, [main_part], None 534 535 # Encrypted content cannot be meaningfully separated. 536 537 elif part.get_content_subtype() == "encrypted": 538 try: 539 part, author = getDecryptedParts(part, request) 540 content, content_type, parts, _author = getUpdateContentFromPart(part, request) 541 return content, content_type, parts, author 542 except MoinMessageError: 543 return None, part.get_content_type(), part.get_payload(), None 544 545 # Otherwise, just obtain the parts for separate display. 546 547 else: 548 return None, part.get_content_type(), part.get_payload(), None 549 550 def getDecryptedParts(part, request): 551 552 "Decrypt the given 'part', returning the decoded content." 553 554 homedir = get_homedir(request) 555 gpg = GPG(homedir) 556 557 # Decrypt the part. 558 559 if is_encrypted(part): 560 text = gpg.decryptMessage(part) 561 part = Parser().parsestr(text) 562 563 # Extract any signature details. 564 565 if is_signed(part): 566 result = gpg.verifyMessage(part) 567 if result: 568 fingerprint, identity, content = result 569 return content, get_username_for_fingerprint(request, fingerprint) 570 571 return part, None 572 573 def getPartContent(part): 574 575 "Decode the 'part', returning the decoded payload and the content type." 576 577 charset = part.get_content_charset() 578 payload = part.get_payload(decode=True) 579 return (charset and unicode(payload, charset) or payload), part.get_content_type() 580 581 def getUpdateFromPart(parent, part, part_number, request): 582 583 "Using the 'parent' update, return an update object for the given 'part'." 584 585 update = parent.copy(part_number) 586 update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request) 587 return update 588 589 def getUpdatesForFormatting(update, request): 590 591 "Get a list of updates for formatting given 'update'." 592 593 updates = [] 594 595 # Handle multipart/alternative and other non-related multiparts. 596 597 if update.parts: 598 for n, part in enumerate(update.parts): 599 update_part = getUpdateFromPart(update, part, n, request) 600 updates += getUpdatesForFormatting(update_part, request) 601 else: 602 updates.append(update) 603 604 return updates 605 606 # Update formatting. 607 608 def getFormattedUpdate(update, request, fmt): 609 610 """ 611 Return the formatted form of the given 'update' using the given 'request' 612 and 'fmt'. 613 """ 614 615 # NOTE: Some control over the HTML and XHTML should be exercised. 616 617 if update.content: 618 if update.content_type == "text/html" and update.message_number is not None: 619 parsers = [get_make_parser(update.page, update.message_number)] 620 else: 621 parsers = getParsersForContentType(request.cfg, update.content_type) 622 623 if parsers: 624 for parser_cls in parsers: 625 if hasattr(parser_cls, "formatForOutputType"): 626 return formatTextForOutputType(update.content, request, parser_cls, "text/html") 627 else: 628 return formatText(update.content, request, fmt, parser_cls=parser_cls) 629 break 630 else: 631 return None 632 else: 633 return None 634 635 def formatUpdate(update, request, fmt): 636 637 "Format the given 'update' using the given 'request' and 'fmt'." 638 639 result = [] 640 append = result.append 641 642 updates = getUpdatesForFormatting(update, request) 643 single = len(updates) == 1 644 645 # Format some navigation tabs. 646 647 if not single: 648 append(fmt.div(on=1, css_class="moinshare-alternatives")) 649 650 first = True 651 652 for update_part in updates: 653 append(fmt.url(1, "#%s" % update_part.unique_id())) 654 append(fmt.text(update_part.content_type)) 655 append(fmt.url(0)) 656 657 first = False 658 659 append(fmt.div(on=0)) 660 661 # Format the content. 662 663 first = True 664 665 for update_part in updates: 666 667 # Encapsulate each alternative if many exist. 668 669 if not single: 670 css_class = first and "moinshare-default" or "moinshare-other" 671 append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id())) 672 673 # Include the content. 674 675 append(formatUpdatePart(update_part, request, fmt)) 676 677 if not single: 678 append(fmt.div(on=0)) 679 680 first = False 681 682 return "".join(result) 683 684 def formatUpdatePart(update, request, fmt): 685 686 "Format the given 'update' using the given 'request' and 'fmt'." 687 688 _ = request.getText 689 690 result = [] 691 append = result.append 692 693 # Encapsulate the content. 694 695 append(fmt.div(on=1, css_class="moinshare-content")) 696 text = getFormattedUpdate(update, request, fmt) 697 if text: 698 append(text) 699 else: 700 append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type)) 701 append(fmt.div(on=0)) 702 703 return "".join(result) 704 705 # Source management. 706 707 def getUpdateSources(pagename, request): 708 709 "Return the update sources from the given 'pagename' using the 'request'." 710 711 sources = {} 712 713 source_definitions = getWikiDict(pagename, request) 714 715 if source_definitions: 716 for name, value in source_definitions.items(): 717 sources[name] = getSourceParameters(value) 718 719 return sources 720 721 def getSourceParameters(source_definition): 722 723 "Return the parameters from the given 'source_definition' string." 724 725 return parseDictEntry(source_definition, ("type", "location")) 726 727 # HTML parsing support. 728 729 class IncomingHTMLSanitizer(HTMLSanitizer): 730 731 "An HTML parser that rewrites references to attachments." 732 733 def __init__(self, out, request, page, message_number): 734 HTMLSanitizer.__init__(self, out) 735 self.request = request 736 self.message_number = message_number 737 self.page = page 738 739 def rewrite_reference(self, ref): 740 if ref.startswith("cid:"): 741 part = ref[len("cid:"):] 742 action_link = self.page.url(self.request, { 743 "action" : "ReadMessage", "doit" : "1", 744 "message" : self.message_number, "part" : part 745 }) 746 return action_link 747 else: 748 return ref 749 750 def handle_starttag(self, tag, attrs): 751 new_attrs = [] 752 for attrname, attrvalue in attrs: 753 if attrname in self.uri_attrs: 754 new_attrs.append((attrname, self.rewrite_reference(attrvalue))) 755 else: 756 new_attrs.append((attrname, attrvalue)) 757 HTMLSanitizer.handle_starttag(self, tag, new_attrs) 758 759 class IncomingMarkup(Markup): 760 761 "A special markup processor for incoming HTML." 762 763 def sanitize(self, request, page, message_number): 764 out = getwriter("utf-8")(StringIO()) 765 sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) 766 sanitizer.feed(self.stripentities(keepxmlentities=True)) 767 return IncomingMarkup(unicode(out.getvalue(), "utf-8")) 768 769 class IncomingHTMLParser: 770 771 "Filters and rewrites incoming HTML content." 772 773 def __init__(self, raw, request, **kw): 774 self.raw = raw 775 self.request = request 776 self.message_number = None 777 self.page = None 778 779 def format(self, formatter, **kw): 780 781 "Send the text." 782 783 try: 784 self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) 785 except HTMLParseError, e: 786 self.request.write(formatter.sysmsg(1) + 787 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, 788 self.raw.splitlines()[e.lineno - 1].strip())) + 789 formatter.sysmsg(0)) 790 791 class MakeIncomingHTMLParser: 792 793 "A class that makes parsers configured for messages." 794 795 def __init__(self, page, message_number): 796 797 "Initialise with state that is used to configure instantiated parsers." 798 799 self.message_number = message_number 800 self.page = page 801 802 def __call__(self, *args, **kw): 803 parser = IncomingHTMLParser(*args, **kw) 804 parser.message_number = self.message_number 805 parser.page = self.page 806 return parser 807 808 def get_make_parser(page, message_number): 809 810 """ 811 Return a callable that will return a parser configured for the message from 812 the given 'page' with the given 'message_number'. 813 """ 814 815 return MakeIncomingHTMLParser(page, message_number) 816 817 # vim: tabstop=4 expandtab shiftwidth=4