MoinShare (file MoinShare.py at 4233e4fa5ee6)

     1 # -*- coding: iso-8859-1 -*-     2 """     3     MoinMoin - MoinShare library     4      5     @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk>     6     @copyright: 2003-2006 Edgewall Software     7     @copyright: 2006 MoinMoin:AlexanderSchremmer     8     @license: GNU GPL (v2 or later), see COPYING.txt for details.     9 """    10     11 from ContentTypeSupport import getContentPreferences    12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \    13                         getDateTimeFromISO8601, DateTime    14 from MoinSupport import *    15 from MoinRemoteSupport import *    16 from ItemSupport import ItemStore    17 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError    18 from MoinMessageSupport import get_homedir, get_username_for_fingerprint    19 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup    20 from MoinMoin import wikiutil    21 from email.parser import Parser    22 from email.utils import parsedate    23 from codecs import getwriter    24 import xml.dom.pulldom    25     26 try:    27     from cStringIO import StringIO    28 except ImportError:    29     from StringIO import StringIO    30     31 _getFragments = getFragments    32     33 __version__ = "0.1"    34     35 ATOM_NS = "http://www.w3.org/2005/Atom"    36     37 # Utility functions.    38     39 def text(element):    40     nodes = []    41     for node in element.childNodes:    42         if node.nodeType == node.TEXT_NODE:    43             nodes.append(node.nodeValue)    44     return "".join(nodes)    45     46 def children(element):    47     nodes = []    48     for node in element.childNodes:    49         nodes.append(node.toxml())    50     return "".join(nodes)    51     52 def unescape(text):    53     return text.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")    54     55 def linktext(element, feed_type):    56     if feed_type == "rss":    57         return text(element)    58     else:    59         return element.getAttribute("href")    60     61 def need_content(show_content, tagname):    62     return show_content in ("content", "description") and tagname in ("content", "description")    63     64 # More Moin 1.9 compatibility functions.    65     66 def has_member(request, groupname, username):    67     if hasattr(request.dicts, "has_member"):    68         return request.dicts.has_member(groupname, username)    69     else:    70         return username in request.dicts.get(groupname, [])    71     72 # Fragments employ a "moinshare" attribute.    73     74 fragment_attribute = "moinshare"    75     76 def getFragments(s):    77     78     "Return all fragments in 's' having the MoinShare fragment attribute."    79     80     fragments = []    81     for format, attributes, body in _getFragments(s):    82         if attributes.has_key(fragment_attribute):    83             fragments.append((format, attributes, body))    84     return fragments    85     86 def getPreferredOutputTypes(request, mimetypes):    87     88     """    89     Using the 'request', perform content negotiation, obtaining mimetypes common    90     to the fragment (given by 'mimetypes') and the client (found in the Accept    91     header).    92     """    93     94     accept = getHeader(request, "Accept", "HTTP")    95     if accept:    96         prefs = getContentPreferences(accept)    97         return prefs.get_preferred_types(mimetypes)    98     else:    99         return mimetypes   100    101 def getUpdatedTime(metadata):   102    103     """   104     Return the last updated time based on the given 'metadata', using the   105     current time if no explicit last modified time is specified.   106     """   107    108     # NOTE: We could attempt to get the last edit time of a fragment.   109    110     latest_timestamp = metadata.get("last-modified")   111     if latest_timestamp:   112         return latest_timestamp   113     else:   114         return getCurrentTime()   115    116 # Entry/update classes.   117    118 class Update:   119    120     "A feed update entry."   121    122     def __init__(self):   123         self.content = None   124         self.content_type = None   125         self.updated = None   126         self.author = None   127    128         # Message-related attributes.   129    130         self.parts = None   131    132         # Feed-related attributes.   133    134         self.title = None   135         self.link = None   136    137         # Page-related attributes.   138    139         self.fragment = None   140         self.preferred = None   141    142         # Store-related attributes.   143    144         self.message_number = None   145    146         # Store- and page-related attributes.   147    148         self.page = None   149    150         # Identification.   151    152         self.path = []   153    154     def unique_id(self):   155    156         """   157         A unique identifier used for anchors to parts of presented updates.   158         """   159    160         return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path)))   161    162     def __cmp__(self, other):   163         if self.updated is None and other.updated is not None:   164             return 1   165         elif self.updated is not None and other.updated is None:   166             return -1   167         else:   168             return cmp(self.updated, other.updated)   169    170     def copy(self, part_number=None):   171         update = Update()   172         update.title = self.title   173         update.link = self.link   174         update.updated = self.updated   175         update.author = self.author   176         update.fragment = self.fragment   177         update.preferred = self.preferred   178         update.message_number = self.message_number   179         update.page = self.page   180         update.path = self.path[:]   181         if part_number is not None:   182             update.path.append(part_number)   183         return update   184    185 # Error classes.   186    187 class FeedError(Exception):   188     pass   189    190 class FeedMissingError(FeedError):   191     pass   192    193 class FeedContentTypeError(FeedError):   194     pass   195    196 # Update retrieval from URLs.   197    198 def getUpdates(request, feed_url, max_entries, show_content):   199    200     """   201     Using the given 'request', retrieve from 'feed_url' up to the given number   202     'max_entries' of update entries. The 'show_content' parameter can indicate   203     that a "summary" is to be obtained for each update, that the "content" of   204     each update is to be obtained (falling back to a summary if no content is   205     provided), or no content (indicated by a false value) is to be obtained.   206    207     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   208     returned.   209     """   210    211     # Prevent local file access.   212    213     if feed_url.startswith("file:"):   214         raise FeedMissingError   215    216     elif feed_url.startswith("imap"):   217         reader = imapreader   218    219     else:   220         reader = None   221    222     # Obtain the resource, using a cached version if appropriate.   223    224     max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))   225     data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader)   226     if not data:   227         raise FeedMissingError   228    229     # Interpret the cached feed.   230    231     f = StringIO(data)   232     try:   233         _url, content_type, _encoding, _metadata = getCachedResourceMetadata(f)   234    235         if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"):   236             return getUpdatesFromFeed(f, max_entries, show_content)   237    238         elif content_type == "multipart/mixed":   239             return getUpdatesFromMailbox(f, max_entries, show_content, request)   240    241         else:   242             raise FeedContentTypeError   243    244     finally:   245         f.close()   246    247 # Update retrieval from feeds.   248    249 def getUpdatesFromFeed(feed, max_entries, show_content):   250    251     """   252     Retrieve from 'feed' up to the given number 'max_entries' of update entries.   253     The 'show_content' parameter can indicate that a "summary" is to be obtained   254     for each update, that the "content" of each update is to be obtained   255     (falling back to a summary if no content is provided), or no content   256     (indicated by a false value) is to be obtained.   257    258     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   259     returned.   260     """   261    262     feed_updates = []   263    264     # Parse each node from the feed.   265    266     channel_title = channel_link = None   267    268     feed_type = None   269     update = None   270     in_source = False   271    272     events = xml.dom.pulldom.parse(feed)   273    274     for event, value in events:   275    276         if not in_source and event == xml.dom.pulldom.START_ELEMENT:   277             tagname = value.localName   278    279             # Detect the feed type and items.   280    281             if tagname == "feed" and value.namespaceURI == ATOM_NS:   282                 feed_type = "atom"   283    284             elif tagname == "rss":   285                 feed_type = "rss"   286    287             # Detect items.   288    289             elif feed_type == "rss" and tagname == "item" or \   290                 feed_type == "atom" and tagname == "entry":   291    292                 update = Update()   293    294             # Detect source declarations.   295    296             elif feed_type == "atom" and tagname == "source":   297                 in_source = True   298    299             # Handle item elements.   300    301             elif tagname == "title":   302                 events.expandNode(value)   303                 if update:   304                     update.title = text(value)   305                 else:   306                     channel_title = text(value)   307    308             elif tagname == "link":   309                 events.expandNode(value)   310                 if update:   311                     update.link = linktext(value, feed_type)   312                 else:   313                     channel_link = linktext(value, feed_type)   314    315             elif show_content and (   316                 feed_type == "atom" and tagname in ("content", "summary") or   317                 feed_type == "rss" and tagname == "description"):   318    319                 events.expandNode(value)   320    321                 # Obtain content where requested or, failing that, a   322                 # summary.   323    324                 if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):   325                     if feed_type == "atom":   326                         update.content_type = value.getAttribute("type") or "text"   327    328                         # Normalise the content types and extract the   329                         # content.   330    331                         if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):   332                             update.content = children(value)   333                             update.content_type = "application/xhtml+xml"   334                         elif update.content_type in ("html", "text/html"):   335                             update.content = text(value)   336                             update.content_type = "text/html"   337                         else:   338                             update.content = text(value)   339                             update.content_type = "text/plain"   340                     else:   341                         update.content_type = "text/html"   342                         update.content = text(value)   343    344             elif feed_type == "atom" and tagname == "updated" or \   345                 feed_type == "rss" and tagname == "pubDate":   346    347                 events.expandNode(value)   348    349                 if update:   350                     if feed_type == "atom":   351                         value = getDateTimeFromISO8601(text(value))   352                     else:   353                         value = DateTime(parsedate(text(value)))   354                     update.updated = value   355    356         elif event == xml.dom.pulldom.END_ELEMENT:   357             tagname = value.localName   358    359             if feed_type == "rss" and tagname == "item" or \   360                 feed_type == "atom" and tagname == "entry":   361    362                 feed_updates.append(update)   363    364                 update = None   365    366             elif feed_type == "atom" and tagname == "source":   367                 in_source = False   368    369     return (feed_type, channel_title, channel_link), feed_updates   370    371 # Update retrieval from mailboxes and multipart messages.   372    373 def getUpdatesFromMailbox(feed, max_entries, show_content, request):   374    375     """   376     Retrieve from 'feed' up to the given number 'max_entries' of update entries.   377     The 'show_content' parameter can indicate that a "summary" is to be obtained   378     for each update, that the "content" of each update is to be obtained   379     (falling back to a summary if no content is provided), or no content   380     (indicated by a false value) is to be obtained.   381    382     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   383     returned.   384     """   385    386     mailbox = Parser().parse(feed)   387    388     feed_updates = []   389    390     # Parse each message from the feed as a separate update.   391    392     for message_number, part in enumerate(mailbox.get_payload()):   393         update = Update()   394         update.updated = getDateTimeFromRFC2822(part.get("date"))   395         update.title = part.get("subject", "Update #%d" % message_number)   396         update.message_number = message_number   397    398         update.content, update.content_type, update.parts, actual_author = \   399             getUpdateContentFromPart(part, request)   400    401         if actual_author:   402             update.author = actual_author   403    404         feed_updates.append(update)   405    406     return ("mbox", None, None), feed_updates   407    408 # Update retrieval from pages.   409    410 def getUpdatesFromPage(page, request):   411    412     """   413     Get updates from the given 'page' using the 'request'. A list of update   414     objects is returned.   415     """   416    417     updates = []   418    419     # NOTE: Use the updated datetime from the page for updates.   420     # NOTE: The published and updated details would need to be deduced from   421     # NOTE: the page history instead of being taken from the page as a whole.   422    423     metadata = getMetadata(page)   424     updated = getUpdatedTime(metadata)   425    426     # Get the fragment regions for the page.   427    428     for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())):   429    430         update = Update()   431    432         # Produce a fragment identifier.   433         # NOTE: Choose a more robust identifier where none is explicitly given.   434    435         update.fragment = attributes.get("fragment", str(n))   436         update.title = attributes.get("summary", "Update #%d" % n)   437    438         # Get the preferred content types available for the fragment.   439    440         update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format))   441    442         # Try and obtain some suitable content for the entry.   443         # NOTE: Could potentially get a summary for the fragment.   444    445         update.content = None   446    447         if "text/html" in update.preferred:   448             parser_cls = getParserClass(request, format)   449    450             if format == "html":   451                 update.content = body   452             elif hasattr(parser_cls, "formatForOutputType"):   453                 update.content = formatTextForOutputType(body, request, parser_cls, "text/html")   454             else:   455                 fmt = request.html_formatter   456                 fmt.setPage(page)   457                 update.content = formatText(body, request, fmt, parser_cls)   458    459             update.content_type = "text/html"   460    461         update.page = page   462    463         # NOTE: The anchor would be supported in the page, but this requires   464         # NOTE: formatter modifications for the regions providing updates.   465    466         update.link = page.url(request, anchor=update.fragment)   467         update.updated = updated   468    469         updates.append(update)   470    471     return updates   472    473 # Update retrieval from message stores.   474    475 def getUpdatesFromStore(page, request):   476    477     """   478     Get updates from the message store associated with the given 'page' using   479     the 'request'. A list of update objects is returned.   480     """   481    482     updates = []   483    484     metadata = getMetadata(page)   485     updated = getUpdatedTime(metadata)   486    487     store = ItemStore(page, "messages", "message-locks")   488    489     keys = store.keys()   490     keys.sort()   491    492     for key in keys:   493         message_text = store[key]   494         update = getUpdateFromMessageText(message_text, key, request)   495         update.page = page   496         updates.append(update)   497    498     return updates   499    500 def getUpdateFromMessageText(message_text, message_number, request):   501    502     "Return an update for the given 'message_text' and 'message_number'."   503    504     update = Update()   505     message = Parser().parsestr(message_text)   506    507     # Produce a fragment identifier.   508    509     update.updated = getDateTimeFromRFC2822(message.get("date"))   510     update.title = message.get("subject", "Update #%d" % message_number)   511     update.author = message.get("moin-user")   512    513     update.message_number = message_number   514    515     update.content, update.content_type, update.parts, actual_author = \   516         getUpdateContentFromPart(message, request)   517    518     if actual_author:   519         update.author = actual_author   520    521     return update   522    523 def getUpdateContentFromPart(part, request):   524    525     """   526     Return decoded content, the content type, any subparts, and any author   527     identity in a tuple for a given 'part'.   528     """   529    530     # Determine whether the part has several representations.   531    532     # For a single part, use it as the update content.   533    534     if not part.is_multipart():   535         content, content_type = getPartContent(part)   536         return content, content_type, None, None   537    538     # For a collection of related parts, use the first as the update content   539     # and assume that the formatter will reference the other parts.   540    541     elif part.get_content_subtype() == "related":   542         main_part = part.get_payload()[0]   543         content, content_type = getPartContent(main_part)   544         return content, content_type, [main_part], None   545    546     # Encrypted content cannot be meaningfully separated.   547    548     elif part.get_content_subtype() == "encrypted":   549         try:   550             part, author = getDecryptedParts(part, request)   551             content, content_type, parts, _author = getUpdateContentFromPart(part, request)   552             return content, content_type, parts, author   553         except MoinMessageError:   554             return None, part.get_content_type(), part.get_payload(), None   555    556     # Otherwise, just obtain the parts for separate display.   557    558     else:   559         return None, part.get_content_type(), part.get_payload(), None   560    561 def getDecryptedParts(part, request):   562    563     "Decrypt the given 'part', returning the decoded content."   564    565     homedir = get_homedir(request)   566     gpg = GPG(homedir)   567    568     # Decrypt the part.   569    570     if is_encrypted(part):   571         text = gpg.decryptMessage(part)   572         part = Parser().parsestr(text)   573    574     # Extract any signature details.   575    576     if is_signed(part):   577         result = gpg.verifyMessage(part)   578         if result:   579             fingerprint, identity, content = result   580             return content, get_username_for_fingerprint(request, fingerprint)   581    582     return part, None   583    584 def getPartContent(part):   585    586     "Decode the 'part', returning the decoded payload and the content type."   587    588     charset = part.get_content_charset()   589     payload = part.get_payload(decode=True)   590     return (charset and unicode(payload, charset) or payload), part.get_content_type()   591    592 def getUpdateFromPart(parent, part, part_number, request):   593    594     "Using the 'parent' update, return an update object for the given 'part'."   595    596     update = parent.copy(part_number)   597     update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request)   598     return update   599    600 def getUpdatesForFormatting(update, request):   601    602     "Get a list of updates for formatting given 'update'."   603    604     updates = []   605    606     # Handle multipart/alternative and other non-related multiparts.   607    608     if update.parts:   609         for n, part in enumerate(update.parts):   610             update_part = getUpdateFromPart(update, part, n, request)   611             updates += getUpdatesForFormatting(update_part, request)   612     else:   613         updates.append(update)   614    615     return updates   616    617 # Update formatting.   618    619 def getFormattedUpdate(update, request, fmt):   620    621     """   622     Return the formatted form of the given 'update' using the given 'request'   623     and 'fmt'.   624     """   625    626     # NOTE: Some control over the HTML and XHTML should be exercised.   627    628     if update.content:   629         if update.content_type == "text/html" and update.message_number is not None:   630             parsers = [get_make_parser(update.page, update.message_number)]   631         else:   632             parsers = getParsersForContentType(request.cfg, update.content_type)   633    634         if parsers:   635             for parser_cls in parsers:   636                 if hasattr(parser_cls, "formatForOutputType"):   637                     return formatTextForOutputType(update.content, request, parser_cls, "text/html")   638                 else:   639                     return formatText(update.content, request, fmt, parser_cls=parser_cls)   640                 break   641         else:   642             return None   643     else:   644         return None   645    646 def formatUpdate(update, request, fmt):   647    648     "Format the given 'update' using the given 'request' and 'fmt'."   649    650     result = []   651     append = result.append   652    653     updates = getUpdatesForFormatting(update, request)   654     single = len(updates) == 1   655    656     # Format some navigation tabs.   657     # This only occurs for multipart updates.   658    659     if not single:   660         append(fmt.div(on=1, css_class="moinshare-alternatives"))   661    662         first = True   663    664         for update_part in updates:   665             append(fmt.url(1, "#%s" % update_part.unique_id()))   666             append(fmt.text(update_part.content_type))   667             append(fmt.url(0))   668    669             first = False   670    671         append(fmt.div(on=0))   672    673     # Format the content.   674    675     first = True   676    677     for update_part in updates:   678    679         # Encapsulate each alternative if many exist.   680    681         if not single:   682             css_class = first and "moinshare-default" or "moinshare-other"   683             append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id()))   684    685         # Include the content.   686    687         append(formatUpdatePart(update_part, request, fmt))   688    689         if not single:   690             append(fmt.div(on=0))   691    692         first = False   693    694     return "".join(result)   695    696 def formatUpdatePart(update, request, fmt):   697    698     "Format the given 'update' using the given 'request' and 'fmt'."   699    700     _ = request.getText   701    702     result = []   703     append = result.append   704    705     # Encapsulate the content.   706    707     append(fmt.div(on=1, css_class="moinshare-content"))   708     text = getFormattedUpdate(update, request, fmt)   709     if text:   710         append(text)   711     else:   712         append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type))   713     append(fmt.div(on=0))   714    715     return "".join(result)   716    717 # Source management.   718    719 def getUpdateSources(pagename, request):   720    721     "Return the update sources from the given 'pagename' using the 'request'."   722    723     sources = {}   724    725     source_definitions = getWikiDict(pagename, request)   726    727     if source_definitions:   728         for name, value in source_definitions.items():   729             sources[name] = getSourceParameters(value)   730    731     return sources   732    733 def getSourceParameters(source_definition):   734    735     "Return the parameters from the given 'source_definition' string."   736    737     return parseDictEntry(source_definition, ("type", "location"))   738    739 # HTML parsing support.   740    741 class IncomingHTMLSanitizer(HTMLSanitizer):   742    743     "An HTML parser that rewrites references to attachments."   744    745     def __init__(self, out, request, page, message_number):   746         HTMLSanitizer.__init__(self, out)   747         self.request = request   748         self.message_number = message_number   749         self.page = page   750    751     def rewrite_reference(self, ref):   752         if ref.startswith("cid:"):   753             part = ref[len("cid:"):]   754             action_link = self.page.url(self.request, {   755                 "action" : "ReadMessage", "doit" : "1",   756                 "message" : self.message_number, "part" : part   757                 })   758             return action_link   759         else:   760             return ref   761    762     def handle_starttag(self, tag, attrs):   763         new_attrs = []   764         for attrname, attrvalue in attrs:   765             if attrname in self.uri_attrs:   766                 new_attrs.append((attrname, self.rewrite_reference(attrvalue)))   767             else:   768                 new_attrs.append((attrname, attrvalue))   769         HTMLSanitizer.handle_starttag(self, tag, new_attrs)   770    771 class IncomingMarkup(Markup):   772    773     "A special markup processor for incoming HTML."   774    775     def sanitize(self, request, page, message_number):   776         out = getwriter("utf-8")(StringIO())   777         sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)   778         sanitizer.feed(self.stripentities(keepxmlentities=True))   779         return IncomingMarkup(unicode(out.getvalue(), "utf-8"))   780    781 class IncomingHTMLParser:   782    783     "Filters and rewrites incoming HTML content."   784    785     def __init__(self, raw, request, **kw):   786         self.raw = raw   787         self.request = request   788         self.message_number = None   789         self.page = None   790    791     def format(self, formatter, **kw):   792    793         "Send the text."   794    795         try:   796             self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))   797         except HTMLParseError, e:   798             self.request.write(formatter.sysmsg(1) +   799                 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,   800                                   self.raw.splitlines()[e.lineno - 1].strip())) +   801                 formatter.sysmsg(0))   802    803 class MakeIncomingHTMLParser:   804    805     "A class that makes parsers configured for messages."   806    807     def __init__(self, page, message_number):   808    809         "Initialise with state that is used to configure instantiated parsers."   810    811         self.message_number = message_number   812         self.page = page   813    814     def __call__(self, *args, **kw):   815         parser = IncomingHTMLParser(*args, **kw)   816         parser.message_number = self.message_number   817         parser.page = self.page   818         return parser   819    820 def get_make_parser(page, message_number):   821    822     """   823     Return a callable that will return a parser configured for the message from   824     the given 'page' with the given 'message_number'.   825     """   826    827     return MakeIncomingHTMLParser(page, message_number)   828    829 # vim: tabstop=4 expandtab shiftwidth=4