1 # -*- coding: iso-8859-1 -*- 2 """ 3 MoinMoin - MoinShare library 4 5 @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> 6 @copyright: 2003-2006 Edgewall Software 7 @copyright: 2006 MoinMoin:AlexanderSchremmer 8 @license: GNU GPL (v2 or later), see COPYING.txt for details. 9 """ 10 11 from ContentTypeSupport import getContentPreferences 12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \ 13 getDateTimeFromISO8601, DateTime 14 from MoinSupport import * 15 from MoinRemoteSupport import * 16 from ItemSupport import ItemStore 17 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError 18 from MoinMessageSupport import get_homedir, get_username_for_fingerprint 19 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup 20 from MoinMoin import wikiutil 21 from email.parser import Parser 22 from email.utils import parsedate 23 from codecs import getwriter 24 import xml.dom.pulldom 25 26 try: 27 from cStringIO import StringIO 28 except ImportError: 29 from StringIO import StringIO 30 31 _getFragments = getFragments 32 33 __version__ = "0.1" 34 35 ATOM_NS = "http://www.w3.org/2005/Atom" 36 37 # Utility functions. 38 39 def text(element): 40 nodes = [] 41 for node in element.childNodes: 42 if node.nodeType == node.TEXT_NODE: 43 nodes.append(node.nodeValue) 44 return "".join(nodes) 45 46 def children(element): 47 nodes = [] 48 for node in element.childNodes: 49 nodes.append(node.toxml()) 50 return "".join(nodes) 51 52 def unescape(text): 53 return text.replace("<", "<").replace(">", ">").replace("&", "&") 54 55 def linktext(element, feed_type): 56 if feed_type == "rss": 57 return text(element) 58 else: 59 return element.getAttribute("href") 60 61 def need_content(show_content, tagname): 62 return show_content in ("content", "description") and tagname in ("content", "description") 63 64 # More Moin 1.9 compatibility functions. 65 66 def has_member(request, groupname, username): 67 if hasattr(request.dicts, "has_member"): 68 return request.dicts.has_member(groupname, username) 69 else: 70 return username in request.dicts.get(groupname, []) 71 72 # Fragments employ a "moinshare" attribute. 73 74 fragment_attribute = "moinshare" 75 76 def getFragments(s): 77 78 "Return all fragments in 's' having the MoinShare fragment attribute." 79 80 fragments = [] 81 for format, attributes, body in _getFragments(s): 82 if attributes.has_key(fragment_attribute): 83 fragments.append((format, attributes, body)) 84 return fragments 85 86 def getPreferredOutputTypes(request, mimetypes): 87 88 """ 89 Using the 'request', perform content negotiation, obtaining mimetypes common 90 to the fragment (given by 'mimetypes') and the client (found in the Accept 91 header). 92 """ 93 94 accept = getHeader(request, "Accept", "HTTP") 95 if accept: 96 prefs = getContentPreferences(accept) 97 return prefs.get_preferred_types(mimetypes) 98 else: 99 return mimetypes 100 101 def getUpdatedTime(metadata): 102 103 """ 104 Return the last updated time based on the given 'metadata', using the 105 current time if no explicit last modified time is specified. 106 """ 107 108 # NOTE: We could attempt to get the last edit time of a fragment. 109 110 latest_timestamp = metadata.get("last-modified") 111 if latest_timestamp: 112 return latest_timestamp 113 else: 114 return getCurrentTime() 115 116 # Entry/update classes. 117 118 class Update: 119 120 "A feed update entry." 121 122 def __init__(self): 123 self.title = None 124 self.link = None 125 self.content = None 126 self.content_type = None 127 self.updated = None 128 self.author = None 129 130 # Page-related attributes. 131 132 self.fragment = None 133 self.preferred = None 134 135 # Message-related attributes. 136 137 self.message_number = None 138 self.parts = None 139 140 # Message- and page-related attributes. 141 142 self.page = None 143 144 # Identification. 145 146 self.path = [] 147 148 def unique_id(self): 149 return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) 150 151 def __cmp__(self, other): 152 if self.updated is None and other.updated is not None: 153 return 1 154 elif self.updated is not None and other.updated is None: 155 return -1 156 else: 157 return cmp(self.updated, other.updated) 158 159 def copy(self, part_number=None): 160 update = Update() 161 update.title = self.title 162 update.link = self.link 163 update.updated = self.updated 164 update.author = self.author 165 update.fragment = self.fragment 166 update.preferred = self.preferred 167 update.message_number = self.message_number 168 update.page = self.page 169 update.path = self.path[:] 170 if part_number is not None: 171 update.path.append(part_number) 172 return update 173 174 # Error classes. 175 176 class FeedError(Exception): 177 pass 178 179 class FeedMissingError(FeedError): 180 pass 181 182 class FeedContentTypeError(FeedError): 183 pass 184 185 # Update retrieval from URLs. 186 187 def getUpdates(request, feed_url, max_entries, show_content): 188 189 """ 190 Using the given 'request', retrieve from 'feed_url' up to the given number 191 'max_entries' of update entries. The 'show_content' parameter can indicate 192 that a "summary" is to be obtained for each update, that the "content" of 193 each update is to be obtained (falling back to a summary if no content is 194 provided), or no content (indicated by a false value) is to be obtained. 195 196 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 197 returned. 198 """ 199 200 # Prevent local file access. 201 202 if feed_url.startswith("file:"): 203 raise FeedMissingError 204 205 # Obtain the resource, using a cached version if appropriate. 206 207 max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300")) 208 data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age) 209 if not data: 210 raise FeedMissingError 211 212 # Interpret the cached feed. 213 214 f = StringIO(data) 215 try: 216 _url, content_type, _encoding, _metadata = getCachedResourceMetadata(f) 217 218 if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"): 219 return getUpdatesFromFeed(f, max_entries, show_content) 220 else: 221 raise FeedContentTypeError 222 223 finally: 224 f.close() 225 226 # Update retrieval from feeds. 227 228 def getUpdatesFromFeed(feed, max_entries, show_content): 229 230 """ 231 Retrieve from 'feed' up to the given number 'max_entries' of update entries. 232 The 'show_content' parameter can indicate that a "summary" is to be obtained 233 for each update, that the "content" of each update is to be obtained 234 (falling back to a summary if no content is provided), or no content 235 (indicated by a false value) is to be obtained. 236 237 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 238 returned. 239 """ 240 241 feed_updates = [] 242 243 try: 244 # Parse each node from the feed. 245 246 channel_title = channel_link = None 247 248 feed_type = None 249 update = None 250 in_source = False 251 252 events = xml.dom.pulldom.parse(feed) 253 254 for event, value in events: 255 256 if not in_source and event == xml.dom.pulldom.START_ELEMENT: 257 tagname = value.localName 258 259 # Detect the feed type and items. 260 261 if tagname == "feed" and value.namespaceURI == ATOM_NS: 262 feed_type = "atom" 263 264 elif tagname == "rss": 265 feed_type = "rss" 266 267 # Detect items. 268 269 elif feed_type == "rss" and tagname == "item" or \ 270 feed_type == "atom" and tagname == "entry": 271 272 update = Update() 273 274 # Detect source declarations. 275 276 elif feed_type == "atom" and tagname == "source": 277 in_source = True 278 279 # Handle item elements. 280 281 elif tagname == "title": 282 events.expandNode(value) 283 if update: 284 update.title = text(value) 285 else: 286 channel_title = text(value) 287 288 elif tagname == "link": 289 events.expandNode(value) 290 if update: 291 update.link = linktext(value, feed_type) 292 else: 293 channel_link = linktext(value, feed_type) 294 295 elif show_content and ( 296 feed_type == "atom" and tagname in ("content", "summary") or 297 feed_type == "rss" and tagname == "description"): 298 299 events.expandNode(value) 300 301 # Obtain content where requested or, failing that, a 302 # summary. 303 304 if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): 305 if feed_type == "atom": 306 update.content_type = value.getAttribute("type") or "text" 307 308 # Normalise the content types and extract the 309 # content. 310 311 if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): 312 update.content = children(value) 313 update.content_type = "application/xhtml+xml" 314 elif update.content_type in ("html", "text/html"): 315 update.content = text(value) 316 update.content_type = "text/html" 317 else: 318 update.content = text(value) 319 update.content_type = "text/plain" 320 else: 321 update.content_type = "text/html" 322 update.content = text(value) 323 324 elif feed_type == "atom" and tagname == "updated" or \ 325 feed_type == "rss" and tagname == "pubDate": 326 327 events.expandNode(value) 328 329 if update: 330 if feed_type == "atom": 331 value = getDateTimeFromISO8601(text(value)) 332 else: 333 value = DateTime(parsedate(text(value))) 334 update.updated = value 335 336 elif event == xml.dom.pulldom.END_ELEMENT: 337 tagname = value.localName 338 339 if feed_type == "rss" and tagname == "item" or \ 340 feed_type == "atom" and tagname == "entry": 341 342 feed_updates.append(update) 343 344 update = None 345 346 elif feed_type == "atom" and tagname == "source": 347 in_source = False 348 349 finally: 350 feed.close() 351 352 return (feed_type, channel_title, channel_link), feed_updates 353 354 # Update retrieval from pages. 355 356 def getUpdatesFromPage(page, request): 357 358 """ 359 Get updates from the given 'page' using the 'request'. A list of update 360 objects is returned. 361 """ 362 363 updates = [] 364 365 # NOTE: Use the updated datetime from the page for updates. 366 # NOTE: The published and updated details would need to be deduced from 367 # NOTE: the page history instead of being taken from the page as a whole. 368 369 metadata = getMetadata(page) 370 updated = getUpdatedTime(metadata) 371 372 # Get the fragment regions for the page. 373 374 for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): 375 376 update = Update() 377 378 # Produce a fragment identifier. 379 # NOTE: Choose a more robust identifier where none is explicitly given. 380 381 update.fragment = attributes.get("fragment", str(n)) 382 update.title = attributes.get("summary", "Update #%d" % n) 383 384 # Get the preferred content types available for the fragment. 385 386 update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) 387 388 # Try and obtain some suitable content for the entry. 389 # NOTE: Could potentially get a summary for the fragment. 390 391 update.content = None 392 393 if "text/html" in update.preferred: 394 parser_cls = getParserClass(request, format) 395 396 if format == "html": 397 update.content = body 398 elif hasattr(parser_cls, "formatForOutputType"): 399 update.content = formatTextForOutputType(body, request, parser_cls, "text/html") 400 else: 401 fmt = request.html_formatter 402 fmt.setPage(page) 403 update.content = formatText(body, request, fmt, parser_cls) 404 405 update.content_type = "text/html" 406 407 update.page = page 408 409 # NOTE: The anchor would be supported in the page, but this requires 410 # NOTE: formatter modifications for the regions providing updates. 411 412 update.link = page.url(request, anchor=update.fragment) 413 update.updated = updated 414 415 updates.append(update) 416 417 return updates 418 419 # Update retrieval from message stores. 420 421 def getUpdatesFromStore(page, request): 422 423 """ 424 Get updates from the message store associated with the given 'page' using 425 the 'request'. A list of update objects is returned. 426 """ 427 428 updates = [] 429 430 metadata = getMetadata(page) 431 updated = getUpdatedTime(metadata) 432 433 store = ItemStore(page, "messages", "message-locks") 434 435 keys = store.keys() 436 keys.sort() 437 438 for key in keys: 439 message_text = store[key] 440 update = getUpdateFromMessageText(message_text, key, request) 441 update.page = page 442 updates.append(update) 443 444 return updates 445 446 def getUpdateFromMessageText(message_text, message_number, request): 447 448 "Return an update for the given 'message_text' and 'message_number'." 449 450 update = Update() 451 message = Parser().parsestr(message_text) 452 453 # Produce a fragment identifier. 454 455 update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) 456 update.title = message.get("subject", "Update #%d" % message_number) 457 update.author = message.get("moin-user") 458 459 update.message_number = message_number 460 461 update.content, update.content_type, update.parts, actual_author = \ 462 getUpdateContentFromPart(message, request) 463 464 if actual_author: 465 update.author = actual_author 466 467 return update 468 469 def getUpdateContentFromPart(part, request): 470 471 """ 472 Return decoded content, the content type, any subparts, and any author 473 identity in a tuple for a given 'part'. 474 """ 475 476 # Determine whether the part has several representations. 477 478 # For a single part, use it as the update content. 479 480 if not part.is_multipart(): 481 content, content_type = getPartContent(part) 482 return content, content_type, None, None 483 484 # For a collection of related parts, use the first as the update content 485 # and assume that the formatter will reference the other parts. 486 487 elif part.get_content_subtype() == "related": 488 main_part = part.get_payload()[0] 489 content, content_type = getPartContent(main_part) 490 return content, content_type, [main_part], None 491 492 # Encrypted content cannot be meaningfully separated. 493 494 elif part.get_content_subtype() == "encrypted": 495 try: 496 part, author = getDecryptedParts(part, request) 497 content, content_type, parts, _author = getUpdateContentFromPart(part, request) 498 return content, content_type, parts, author 499 except MoinMessageError: 500 return None, part.get_content_type(), part.get_payload(), None 501 502 # Otherwise, just obtain the parts for separate display. 503 504 else: 505 return None, part.get_content_type(), part.get_payload(), None 506 507 def getDecryptedParts(part, request): 508 509 "Decrypt the given 'part', returning the decoded content." 510 511 homedir = get_homedir(request) 512 gpg = GPG(homedir) 513 514 # Decrypt the part. 515 516 if is_encrypted(part): 517 text = gpg.decryptMessage(part) 518 part = Parser().parsestr(text) 519 520 # Extract any signature details. 521 522 if is_signed(part): 523 result = gpg.verifyMessage(part) 524 if result: 525 fingerprint, identity, content = result 526 return content, get_username_for_fingerprint(request, fingerprint) 527 528 return part, None 529 530 def getPartContent(part): 531 532 "Decode the 'part', returning the decoded payload and the content type." 533 534 charset = part.get_content_charset() 535 payload = part.get_payload(decode=True) 536 return (charset and unicode(payload, charset) or payload), part.get_content_type() 537 538 def getUpdateFromPart(parent, part, part_number, request): 539 540 "Using the 'parent' update, return an update object for the given 'part'." 541 542 update = parent.copy(part_number) 543 update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request) 544 return update 545 546 def getUpdatesForFormatting(update, request): 547 548 "Get a list of updates for formatting given 'update'." 549 550 updates = [] 551 552 # Handle multipart/alternative and other non-related multiparts. 553 554 if update.parts: 555 for n, part in enumerate(update.parts): 556 update_part = getUpdateFromPart(update, part, n, request) 557 updates += getUpdatesForFormatting(update_part, request) 558 else: 559 updates.append(update) 560 561 return updates 562 563 # Update formatting. 564 565 def getFormattedUpdate(update, request, fmt): 566 567 """ 568 Return the formatted form of the given 'update' using the given 'request' 569 and 'fmt'. 570 """ 571 572 # NOTE: Some control over the HTML and XHTML should be exercised. 573 574 if update.content: 575 if update.content_type == "text/html" and update.message_number is not None: 576 parsers = [get_make_parser(update.page, update.message_number)] 577 else: 578 parsers = getParsersForContentType(request.cfg, update.content_type) 579 580 if parsers: 581 for parser_cls in parsers: 582 if hasattr(parser_cls, "formatForOutputType"): 583 return formatTextForOutputType(update.content, request, parser_cls, "text/html") 584 else: 585 return formatText(update.content, request, fmt, parser_cls=parser_cls) 586 break 587 else: 588 return None 589 else: 590 return None 591 592 def formatUpdate(update, request, fmt): 593 594 "Format the given 'update' using the given 'request' and 'fmt'." 595 596 result = [] 597 append = result.append 598 599 updates = getUpdatesForFormatting(update, request) 600 single = len(updates) == 1 601 602 # Format some navigation tabs. 603 604 if not single: 605 append(fmt.div(on=1, css_class="moinshare-alternatives")) 606 607 first = True 608 609 for update_part in updates: 610 append(fmt.url(1, "#%s" % update_part.unique_id())) 611 append(fmt.text(update_part.content_type)) 612 append(fmt.url(0)) 613 614 first = False 615 616 append(fmt.div(on=0)) 617 618 # Format the content. 619 620 first = True 621 622 for update_part in updates: 623 624 # Encapsulate each alternative if many exist. 625 626 if not single: 627 css_class = first and "moinshare-default" or "moinshare-other" 628 append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id())) 629 630 # Include the content. 631 632 append(formatUpdatePart(update_part, request, fmt)) 633 634 if not single: 635 append(fmt.div(on=0)) 636 637 first = False 638 639 return "".join(result) 640 641 def formatUpdatePart(update, request, fmt): 642 643 "Format the given 'update' using the given 'request' and 'fmt'." 644 645 _ = request.getText 646 647 result = [] 648 append = result.append 649 650 # Encapsulate the content. 651 652 append(fmt.div(on=1, css_class="moinshare-content")) 653 text = getFormattedUpdate(update, request, fmt) 654 if text: 655 append(text) 656 else: 657 append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type)) 658 append(fmt.div(on=0)) 659 660 return "".join(result) 661 662 # Source management. 663 664 def getUpdateSources(pagename, request): 665 666 "Return the update sources from the given 'pagename' using the 'request'." 667 668 sources = {} 669 670 source_definitions = getWikiDict(pagename, request) 671 672 if source_definitions: 673 for name, value in source_definitions.items(): 674 sources[name] = getSourceParameters(value) 675 676 return sources 677 678 def getSourceParameters(source_definition): 679 680 "Return the parameters from the given 'source_definition' string." 681 682 return parseDictEntry(source_definition, ("type", "location")) 683 684 # HTML parsing support. 685 686 class IncomingHTMLSanitizer(HTMLSanitizer): 687 688 "An HTML parser that rewrites references to attachments." 689 690 def __init__(self, out, request, page, message_number): 691 HTMLSanitizer.__init__(self, out) 692 self.request = request 693 self.message_number = message_number 694 self.page = page 695 696 def rewrite_reference(self, ref): 697 if ref.startswith("cid:"): 698 part = ref[len("cid:"):] 699 action_link = self.page.url(self.request, { 700 "action" : "ReadMessage", "doit" : "1", 701 "message" : self.message_number, "part" : part 702 }) 703 return action_link 704 else: 705 return ref 706 707 def handle_starttag(self, tag, attrs): 708 new_attrs = [] 709 for attrname, attrvalue in attrs: 710 if attrname in self.uri_attrs: 711 new_attrs.append((attrname, self.rewrite_reference(attrvalue))) 712 else: 713 new_attrs.append((attrname, attrvalue)) 714 HTMLSanitizer.handle_starttag(self, tag, new_attrs) 715 716 class IncomingMarkup(Markup): 717 718 "A special markup processor for incoming HTML." 719 720 def sanitize(self, request, page, message_number): 721 out = getwriter("utf-8")(StringIO()) 722 sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) 723 sanitizer.feed(self.stripentities(keepxmlentities=True)) 724 return IncomingMarkup(unicode(out.getvalue(), "utf-8")) 725 726 class IncomingHTMLParser: 727 728 "Filters and rewrites incoming HTML content." 729 730 def __init__(self, raw, request, **kw): 731 self.raw = raw 732 self.request = request 733 self.message_number = None 734 self.page = None 735 736 def format(self, formatter, **kw): 737 738 "Send the text." 739 740 try: 741 self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) 742 except HTMLParseError, e: 743 self.request.write(formatter.sysmsg(1) + 744 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, 745 self.raw.splitlines()[e.lineno - 1].strip())) + 746 formatter.sysmsg(0)) 747 748 class MakeIncomingHTMLParser: 749 750 "A class that makes parsers configured for messages." 751 752 def __init__(self, page, message_number): 753 754 "Initialise with state that is used to configure instantiated parsers." 755 756 self.message_number = message_number 757 self.page = page 758 759 def __call__(self, *args, **kw): 760 parser = IncomingHTMLParser(*args, **kw) 761 parser.message_number = self.message_number 762 parser.page = self.page 763 return parser 764 765 def get_make_parser(page, message_number): 766 767 """ 768 Return a callable that will return a parser configured for the message from 769 the given 'page' with the given 'message_number'. 770 """ 771 772 return MakeIncomingHTMLParser(page, message_number) 773 774 # vim: tabstop=4 expandtab shiftwidth=4