1 # -*- coding: iso-8859-1 -*- 2 """ 3 MoinMoin - MoinShare library 4 5 @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> 6 @copyright: 2003-2006 Edgewall Software 7 @copyright: 2006 MoinMoin:AlexanderSchremmer 8 @license: GNU GPL (v2 or later), see COPYING.txt for details. 9 """ 10 11 from ContentTypeSupport import getContentPreferences 12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \ 13 getDateTimeFromISO8601, DateTime 14 from MoinSupport import * 15 from MoinRemoteSupport import * 16 from ItemSupport import ItemStore 17 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError 18 from MoinMessageSupport import get_homedir, get_username_for_fingerprint 19 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup 20 from MoinMoin import wikiutil 21 from email.parser import Parser 22 from email.utils import parsedate 23 from codecs import getwriter 24 import xml.dom.pulldom 25 26 try: 27 from cStringIO import StringIO 28 except ImportError: 29 from StringIO import StringIO 30 31 _getFragments = getFragments 32 33 __version__ = "0.1" 34 35 ATOM_NS = "http://www.w3.org/2005/Atom" 36 37 # Utility functions. 38 39 def text(element): 40 nodes = [] 41 for node in element.childNodes: 42 if node.nodeType == node.TEXT_NODE: 43 nodes.append(node.nodeValue) 44 return "".join(nodes) 45 46 def children(element): 47 nodes = [] 48 for node in element.childNodes: 49 nodes.append(node.toxml()) 50 return "".join(nodes) 51 52 def unescape(text): 53 return text.replace("<", "<").replace(">", ">").replace("&", "&") 54 55 def linktext(element, feed_type): 56 if feed_type == "rss": 57 return text(element) 58 else: 59 return element.getAttribute("href") 60 61 def need_content(show_content, tagname): 62 return show_content in ("content", "description") and tagname in ("content", "description") 63 64 # More Moin 1.9 compatibility functions. 65 66 def has_member(request, groupname, username): 67 if hasattr(request.dicts, "has_member"): 68 return request.dicts.has_member(groupname, username) 69 else: 70 return username in request.dicts.get(groupname, []) 71 72 # Fragments employ a "moinshare" attribute. 73 74 fragment_attribute = "moinshare" 75 76 def getFragments(s): 77 78 "Return all fragments in 's' having the MoinShare fragment attribute." 79 80 fragments = [] 81 for format, attributes, body in _getFragments(s): 82 if attributes.has_key(fragment_attribute): 83 fragments.append((format, attributes, body)) 84 return fragments 85 86 def getPreferredOutputTypes(request, mimetypes): 87 88 """ 89 Using the 'request', perform content negotiation, obtaining mimetypes common 90 to the fragment (given by 'mimetypes') and the client (found in the Accept 91 header). 92 """ 93 94 accept = getHeader(request, "Accept", "HTTP") 95 if accept: 96 prefs = getContentPreferences(accept) 97 return prefs.get_preferred_types(mimetypes) 98 else: 99 return mimetypes 100 101 def getUpdatedTime(metadata): 102 103 """ 104 Return the last updated time based on the given 'metadata', using the 105 current time if no explicit last modified time is specified. 106 """ 107 108 # NOTE: We could attempt to get the last edit time of a fragment. 109 110 latest_timestamp = metadata.get("last-modified") 111 if latest_timestamp: 112 return latest_timestamp 113 else: 114 return getCurrentTime() 115 116 # Entry/update classes. 117 118 class Update: 119 120 "A feed update entry." 121 122 def __init__(self): 123 self.content = None 124 self.content_type = None 125 self.updated = None 126 self.author = None 127 128 # Message-related attributes. 129 130 self.parts = None 131 132 # Feed-related attributes. 133 134 self.title = None 135 self.link = None 136 137 # Page-related attributes. 138 139 self.fragment = None 140 self.preferred = None 141 142 # Store-related attributes. 143 144 self.message_number = None 145 146 # Store- and page-related attributes. 147 148 self.page = None 149 150 # Identification. 151 152 self.path = [] 153 154 def unique_id(self): 155 156 """ 157 A unique identifier used for anchors to parts of presented updates. 158 """ 159 160 return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) 161 162 def __cmp__(self, other): 163 if self.updated is None and other.updated is not None: 164 return 1 165 elif self.updated is not None and other.updated is None: 166 return -1 167 else: 168 return cmp(self.updated, other.updated) 169 170 def copy(self, part_number=None): 171 update = Update() 172 update.title = self.title 173 update.link = self.link 174 update.updated = self.updated 175 update.author = self.author 176 update.fragment = self.fragment 177 update.preferred = self.preferred 178 update.message_number = self.message_number 179 update.page = self.page 180 update.path = self.path[:] 181 if part_number is not None: 182 update.path.append(part_number) 183 return update 184 185 # Error classes. 186 187 class FeedError(Exception): 188 pass 189 190 class FeedMissingError(FeedError): 191 pass 192 193 class FeedContentTypeError(FeedError): 194 pass 195 196 # Update retrieval from URLs. 197 198 def getUpdates(request, feed_url, max_entries, show_content): 199 200 """ 201 Using the given 'request', retrieve from 'feed_url' up to the given number 202 'max_entries' of update entries. The 'show_content' parameter can indicate 203 that a "summary" is to be obtained for each update, that the "content" of 204 each update is to be obtained (falling back to a summary if no content is 205 provided), or no content (indicated by a false value) is to be obtained. 206 207 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 208 returned. 209 """ 210 211 # Prevent local file access. 212 213 if feed_url.startswith("file:"): 214 raise FeedMissingError 215 216 elif feed_url.startswith("imap"): 217 reader = imapreader 218 219 else: 220 reader = None 221 222 # Obtain the resource, using a cached version if appropriate. 223 224 max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300")) 225 data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader) 226 if not data: 227 raise FeedMissingError 228 229 # Interpret the cached feed. 230 231 f = StringIO(data) 232 try: 233 _url, content_type, _encoding, _metadata = getCachedResourceMetadata(f) 234 235 if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"): 236 return getUpdatesFromFeed(f, max_entries, show_content) 237 238 elif content_type == "multipart/mixed": 239 return getUpdatesFromMailbox(f, max_entries, show_content, request) 240 241 else: 242 raise FeedContentTypeError 243 244 finally: 245 f.close() 246 247 # Update retrieval from feeds. 248 249 def getUpdatesFromFeed(feed, max_entries, show_content): 250 251 """ 252 Retrieve from 'feed' up to the given number 'max_entries' of update entries. 253 The 'show_content' parameter can indicate that a "summary" is to be obtained 254 for each update, that the "content" of each update is to be obtained 255 (falling back to a summary if no content is provided), or no content 256 (indicated by a false value) is to be obtained. 257 258 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 259 returned. 260 """ 261 262 feed_updates = [] 263 264 # Parse each node from the feed. 265 266 channel_title = channel_link = None 267 268 feed_type = None 269 update = None 270 in_source = False 271 272 events = xml.dom.pulldom.parse(feed) 273 274 for event, value in events: 275 276 if not in_source and event == xml.dom.pulldom.START_ELEMENT: 277 tagname = value.localName 278 279 # Detect the feed type and items. 280 281 if tagname == "feed" and value.namespaceURI == ATOM_NS: 282 feed_type = "atom" 283 284 elif tagname == "rss": 285 feed_type = "rss" 286 287 # Detect items. 288 289 elif feed_type == "rss" and tagname == "item" or \ 290 feed_type == "atom" and tagname == "entry": 291 292 update = Update() 293 294 # Detect source declarations. 295 296 elif feed_type == "atom" and tagname == "source": 297 in_source = True 298 299 # Handle item elements. 300 301 elif tagname == "title": 302 events.expandNode(value) 303 if update: 304 update.title = text(value) 305 else: 306 channel_title = text(value) 307 308 elif tagname == "link": 309 events.expandNode(value) 310 if update: 311 update.link = linktext(value, feed_type) 312 else: 313 channel_link = linktext(value, feed_type) 314 315 elif show_content and ( 316 feed_type == "atom" and tagname in ("content", "summary") or 317 feed_type == "rss" and tagname == "description"): 318 319 events.expandNode(value) 320 321 # Obtain content where requested or, failing that, a 322 # summary. 323 324 if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): 325 if feed_type == "atom": 326 update.content_type = value.getAttribute("type") or "text" 327 328 # Normalise the content types and extract the 329 # content. 330 331 if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): 332 update.content = children(value) 333 update.content_type = "application/xhtml+xml" 334 elif update.content_type in ("html", "text/html"): 335 update.content = text(value) 336 update.content_type = "text/html" 337 else: 338 update.content = text(value) 339 update.content_type = "text/plain" 340 else: 341 update.content_type = "text/html" 342 update.content = text(value) 343 344 elif feed_type == "atom" and tagname == "updated" or \ 345 feed_type == "rss" and tagname == "pubDate": 346 347 events.expandNode(value) 348 349 if update: 350 if feed_type == "atom": 351 value = getDateTimeFromISO8601(text(value)) 352 else: 353 value = DateTime(parsedate(text(value))) 354 update.updated = value 355 356 elif event == xml.dom.pulldom.END_ELEMENT: 357 tagname = value.localName 358 359 if feed_type == "rss" and tagname == "item" or \ 360 feed_type == "atom" and tagname == "entry": 361 362 feed_updates.append(update) 363 364 update = None 365 366 elif feed_type == "atom" and tagname == "source": 367 in_source = False 368 369 return (feed_type, channel_title, channel_link), feed_updates 370 371 # Update retrieval from mailboxes and multipart messages. 372 373 def getUpdatesFromMailbox(feed, max_entries, show_content, request): 374 375 """ 376 Retrieve from 'feed' up to the given number 'max_entries' of update entries. 377 The 'show_content' parameter can indicate that a "summary" is to be obtained 378 for each update, that the "content" of each update is to be obtained 379 (falling back to a summary if no content is provided), or no content 380 (indicated by a false value) is to be obtained. 381 382 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 383 returned. 384 """ 385 386 mailbox = Parser().parse(feed) 387 388 feed_updates = [] 389 390 # Parse each message from the feed as a separate update. 391 392 for message_number, part in enumerate(mailbox.get_payload()): 393 update = Update() 394 update.updated = getDateTimeFromRFC2822(part.get("date")) 395 update.title = part.get("subject", "Update #%d" % message_number) 396 update.message_number = message_number 397 398 update.content, update.content_type, update.parts, actual_author = \ 399 getUpdateContentFromPart(part, request) 400 401 if actual_author: 402 update.author = actual_author 403 404 feed_updates.append(update) 405 406 return ("mbox", None, None), feed_updates 407 408 # Update retrieval from pages. 409 410 def getUpdatesFromPage(page, request): 411 412 """ 413 Get updates from the given 'page' using the 'request'. A list of update 414 objects is returned. 415 """ 416 417 updates = [] 418 419 # NOTE: Use the updated datetime from the page for updates. 420 # NOTE: The published and updated details would need to be deduced from 421 # NOTE: the page history instead of being taken from the page as a whole. 422 423 metadata = getMetadata(page) 424 updated = getUpdatedTime(metadata) 425 426 # Get the fragment regions for the page. 427 428 for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): 429 430 update = Update() 431 432 # Produce a fragment identifier. 433 # NOTE: Choose a more robust identifier where none is explicitly given. 434 435 update.fragment = attributes.get("fragment", str(n)) 436 update.title = attributes.get("summary", "Update #%d" % n) 437 438 # Get the preferred content types available for the fragment. 439 440 update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) 441 442 # Try and obtain some suitable content for the entry. 443 # NOTE: Could potentially get a summary for the fragment. 444 445 update.content = None 446 447 if "text/html" in update.preferred: 448 parser_cls = getParserClass(request, format) 449 450 if format == "html": 451 update.content = body 452 elif hasattr(parser_cls, "formatForOutputType"): 453 update.content = formatTextForOutputType(body, request, parser_cls, "text/html") 454 else: 455 fmt = request.html_formatter 456 fmt.setPage(page) 457 update.content = formatText(body, request, fmt, parser_cls) 458 459 update.content_type = "text/html" 460 461 update.page = page 462 463 # NOTE: The anchor would be supported in the page, but this requires 464 # NOTE: formatter modifications for the regions providing updates. 465 466 update.link = page.url(request, anchor=update.fragment) 467 update.updated = updated 468 469 updates.append(update) 470 471 return updates 472 473 # Update retrieval from message stores. 474 475 def getUpdatesFromStore(page, request): 476 477 """ 478 Get updates from the message store associated with the given 'page' using 479 the 'request'. A list of update objects is returned. 480 """ 481 482 updates = [] 483 484 metadata = getMetadata(page) 485 updated = getUpdatedTime(metadata) 486 487 store = ItemStore(page, "messages", "message-locks") 488 489 keys = store.keys() 490 keys.sort() 491 492 for key in keys: 493 message_text = store[key] 494 update = getUpdateFromMessageText(message_text, key, request) 495 update.page = page 496 updates.append(update) 497 498 return updates 499 500 def getUpdateFromMessageText(message_text, message_number, request): 501 502 "Return an update for the given 'message_text' and 'message_number'." 503 504 update = Update() 505 message = Parser().parsestr(message_text) 506 507 # Produce a fragment identifier. 508 509 update.updated = getDateTimeFromRFC2822(message.get("date")) 510 update.title = message.get("subject", "Update #%d" % message_number) 511 update.author = message.get("moin-user") 512 513 update.message_number = message_number 514 515 update.content, update.content_type, update.parts, actual_author = \ 516 getUpdateContentFromPart(message, request) 517 518 if actual_author: 519 update.author = actual_author 520 521 return update 522 523 def getUpdateContentFromPart(part, request): 524 525 """ 526 Return decoded content, the content type, any subparts, and any author 527 identity in a tuple for a given 'part'. 528 """ 529 530 # Determine whether the part has several representations. 531 532 # For a single part, use it as the update content. 533 534 if not part.is_multipart(): 535 content, content_type = getPartContent(part) 536 return content, content_type, None, None 537 538 # For a collection of related parts, use the first as the update content 539 # and assume that the formatter will reference the other parts. 540 541 elif part.get_content_subtype() == "related": 542 main_part = part.get_payload()[0] 543 content, content_type = getPartContent(main_part) 544 return content, content_type, [main_part], None 545 546 # Encrypted content cannot be meaningfully separated. 547 548 elif part.get_content_subtype() == "encrypted": 549 try: 550 part, author = getDecryptedParts(part, request) 551 content, content_type, parts, _author = getUpdateContentFromPart(part, request) 552 return content, content_type, parts, author 553 except MoinMessageError: 554 return None, part.get_content_type(), part.get_payload(), None 555 556 # Otherwise, just obtain the parts for separate display. 557 558 else: 559 return None, part.get_content_type(), part.get_payload(), None 560 561 def getDecryptedParts(part, request): 562 563 "Decrypt the given 'part', returning the decoded content." 564 565 homedir = get_homedir(request) 566 gpg = GPG(homedir) 567 568 # Decrypt the part. 569 570 if is_encrypted(part): 571 text = gpg.decryptMessage(part) 572 part = Parser().parsestr(text) 573 574 # Extract any signature details. 575 576 if is_signed(part): 577 result = gpg.verifyMessage(part) 578 if result: 579 fingerprint, identity, content = result 580 return content, get_username_for_fingerprint(request, fingerprint) 581 582 return part, None 583 584 def getPartContent(part): 585 586 "Decode the 'part', returning the decoded payload and the content type." 587 588 charset = part.get_content_charset() 589 payload = part.get_payload(decode=True) 590 return (charset and unicode(payload, charset) or payload), part.get_content_type() 591 592 def getUpdateFromPart(parent, part, part_number, request): 593 594 "Using the 'parent' update, return an update object for the given 'part'." 595 596 update = parent.copy(part_number) 597 update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request) 598 return update 599 600 def getUpdatesForFormatting(update, request): 601 602 "Get a list of updates for formatting given 'update'." 603 604 updates = [] 605 606 # Handle multipart/alternative and other non-related multiparts. 607 608 if update.parts: 609 for n, part in enumerate(update.parts): 610 update_part = getUpdateFromPart(update, part, n, request) 611 updates += getUpdatesForFormatting(update_part, request) 612 else: 613 updates.append(update) 614 615 return updates 616 617 # Update formatting. 618 619 def getFormattedUpdate(update, request, fmt): 620 621 """ 622 Return the formatted form of the given 'update' using the given 'request' 623 and 'fmt'. 624 """ 625 626 # NOTE: Some control over the HTML and XHTML should be exercised. 627 628 if update.content: 629 if update.content_type == "text/html" and update.message_number is not None: 630 parsers = [get_make_parser(update.page, update.message_number)] 631 else: 632 parsers = getParsersForContentType(request.cfg, update.content_type) 633 634 if parsers: 635 for parser_cls in parsers: 636 if hasattr(parser_cls, "formatForOutputType"): 637 return formatTextForOutputType(update.content, request, parser_cls, "text/html") 638 else: 639 return formatText(update.content, request, fmt, parser_cls=parser_cls) 640 break 641 else: 642 return None 643 else: 644 return None 645 646 def formatUpdate(update, request, fmt): 647 648 "Format the given 'update' using the given 'request' and 'fmt'." 649 650 result = [] 651 append = result.append 652 653 updates = getUpdatesForFormatting(update, request) 654 single = len(updates) == 1 655 656 # Format some navigation tabs. 657 # This only occurs for multipart updates. 658 659 if not single: 660 append(fmt.div(on=1, css_class="moinshare-alternatives")) 661 662 first = True 663 664 for update_part in updates: 665 append(fmt.url(1, "#%s" % update_part.unique_id())) 666 append(fmt.text(update_part.content_type)) 667 append(fmt.url(0)) 668 669 first = False 670 671 append(fmt.div(on=0)) 672 673 # Format the content. 674 675 first = True 676 677 for update_part in updates: 678 679 # Encapsulate each alternative if many exist. 680 681 if not single: 682 css_class = first and "moinshare-default" or "moinshare-other" 683 append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id())) 684 685 # Include the content. 686 687 append(formatUpdatePart(update_part, request, fmt)) 688 689 if not single: 690 append(fmt.div(on=0)) 691 692 first = False 693 694 return "".join(result) 695 696 def formatUpdatePart(update, request, fmt): 697 698 "Format the given 'update' using the given 'request' and 'fmt'." 699 700 _ = request.getText 701 702 result = [] 703 append = result.append 704 705 # Encapsulate the content. 706 707 append(fmt.div(on=1, css_class="moinshare-content")) 708 text = getFormattedUpdate(update, request, fmt) 709 if text: 710 append(text) 711 else: 712 append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type)) 713 append(fmt.div(on=0)) 714 715 return "".join(result) 716 717 # Source management. 718 719 def getUpdateSources(pagename, request): 720 721 "Return the update sources from the given 'pagename' using the 'request'." 722 723 sources = {} 724 725 source_definitions = getWikiDict(pagename, request) 726 727 if source_definitions: 728 for name, value in source_definitions.items(): 729 sources[name] = getSourceParameters(value) 730 731 return sources 732 733 def getSourceParameters(source_definition): 734 735 "Return the parameters from the given 'source_definition' string." 736 737 return parseDictEntry(source_definition, ("type", "location")) 738 739 # HTML parsing support. 740 741 class IncomingHTMLSanitizer(HTMLSanitizer): 742 743 "An HTML parser that rewrites references to attachments." 744 745 def __init__(self, out, request, page, message_number): 746 HTMLSanitizer.__init__(self, out) 747 self.request = request 748 self.message_number = message_number 749 self.page = page 750 751 def rewrite_reference(self, ref): 752 if ref.startswith("cid:"): 753 part = ref[len("cid:"):] 754 action_link = self.page.url(self.request, { 755 "action" : "ReadMessage", "doit" : "1", 756 "message" : self.message_number, "part" : part 757 }) 758 return action_link 759 else: 760 return ref 761 762 def handle_starttag(self, tag, attrs): 763 new_attrs = [] 764 for attrname, attrvalue in attrs: 765 if attrname in self.uri_attrs: 766 new_attrs.append((attrname, self.rewrite_reference(attrvalue))) 767 else: 768 new_attrs.append((attrname, attrvalue)) 769 HTMLSanitizer.handle_starttag(self, tag, new_attrs) 770 771 class IncomingMarkup(Markup): 772 773 "A special markup processor for incoming HTML." 774 775 def sanitize(self, request, page, message_number): 776 out = getwriter("utf-8")(StringIO()) 777 sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) 778 sanitizer.feed(self.stripentities(keepxmlentities=True)) 779 return IncomingMarkup(unicode(out.getvalue(), "utf-8")) 780 781 class IncomingHTMLParser: 782 783 "Filters and rewrites incoming HTML content." 784 785 def __init__(self, raw, request, **kw): 786 self.raw = raw 787 self.request = request 788 self.message_number = None 789 self.page = None 790 791 def format(self, formatter, **kw): 792 793 "Send the text." 794 795 try: 796 self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) 797 except HTMLParseError, e: 798 self.request.write(formatter.sysmsg(1) + 799 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, 800 self.raw.splitlines()[e.lineno - 1].strip())) + 801 formatter.sysmsg(0)) 802 803 class MakeIncomingHTMLParser: 804 805 "A class that makes parsers configured for messages." 806 807 def __init__(self, page, message_number): 808 809 "Initialise with state that is used to configure instantiated parsers." 810 811 self.message_number = message_number 812 self.page = page 813 814 def __call__(self, *args, **kw): 815 parser = IncomingHTMLParser(*args, **kw) 816 parser.message_number = self.message_number 817 parser.page = self.page 818 return parser 819 820 def get_make_parser(page, message_number): 821 822 """ 823 Return a callable that will return a parser configured for the message from 824 the given 'page' with the given 'message_number'. 825 """ 826 827 return MakeIncomingHTMLParser(page, message_number) 828 829 # vim: tabstop=4 expandtab shiftwidth=4