MoinShare (file MoinShare.py at f8d989e2f62f)

     1 # -*- coding: iso-8859-1 -*-     2 """     3     MoinMoin - MoinShare library     4      5     @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk>     6     @copyright: 2003-2006 Edgewall Software     7     @copyright: 2006 MoinMoin:AlexanderSchremmer     8     @license: GNU GPL (v2 or later), see COPYING.txt for details.     9 """    10     11 from ContentTypeSupport import getContentPreferences    12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \    13                         getDateTimeFromISO8601, DateTime    14 from MoinSupport import *    15 from MoinRemoteSupport import *    16 from ItemSupport import ItemStore    17 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError    18 from MoinMessageSupport import get_homedir, get_username_for_fingerprint    19 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup    20 from MoinMoin import wikiutil    21 from email.parser import Parser    22 from email.utils import parsedate    23 from codecs import getwriter    24 import xml.dom.pulldom    25     26 try:    27     from cStringIO import StringIO    28 except ImportError:    29     from StringIO import StringIO    30     31 _getFragments = getFragments    32     33 __version__ = "0.1"    34     35 ATOM_NS = "http://www.w3.org/2005/Atom"    36     37 # Utility functions.    38     39 def text(element):    40     nodes = []    41     for node in element.childNodes:    42         if node.nodeType == node.TEXT_NODE:    43             nodes.append(node.nodeValue)    44     return "".join(nodes)    45     46 def children(element):    47     nodes = []    48     for node in element.childNodes:    49         nodes.append(node.toxml())    50     return "".join(nodes)    51     52 def unescape(text):    53     return text.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")    54     55 def linktext(element, feed_type):    56     if feed_type == "rss":    57         return text(element)    58     else:    59         return element.getAttribute("href")    60     61 def need_content(show_content, tagname):    62     return show_content in ("content", "description") and tagname in ("content", "description")    63     64 # More Moin 1.9 compatibility functions.    65     66 def has_member(request, groupname, username):    67     if hasattr(request.dicts, "has_member"):    68         return request.dicts.has_member(groupname, username)    69     else:    70         return username in request.dicts.get(groupname, [])    71     72 # Fragments employ a "moinshare" attribute.    73     74 fragment_attribute = "moinshare"    75     76 def getFragments(s):    77     78     "Return all fragments in 's' having the MoinShare fragment attribute."    79     80     fragments = []    81     for format, attributes, body in _getFragments(s):    82         if attributes.has_key(fragment_attribute):    83             fragments.append((format, attributes, body))    84     return fragments    85     86 def getPreferredOutputTypes(request, mimetypes):    87     88     """    89     Using the 'request', perform content negotiation, obtaining mimetypes common    90     to the fragment (given by 'mimetypes') and the client (found in the Accept    91     header).    92     """    93     94     accept = getHeader(request, "Accept", "HTTP")    95     if accept:    96         prefs = getContentPreferences(accept)    97         return prefs.get_preferred_types(mimetypes)    98     else:    99         return mimetypes   100    101 def getUpdatedTime(metadata):   102    103     """   104     Return the last updated time based on the given 'metadata', using the   105     current time if no explicit last modified time is specified.   106     """   107    108     # NOTE: We could attempt to get the last edit time of a fragment.   109    110     latest_timestamp = metadata.get("last-modified")   111     if latest_timestamp:   112         return latest_timestamp   113     else:   114         return getCurrentTime()   115    116 # Entry/update classes.   117    118 class Update:   119    120     "A feed update entry."   121    122     def __init__(self):   123         self.title = None   124         self.link = None   125         self.content = None   126         self.content_type = None   127         self.updated = None   128         self.author = None   129    130         # Page-related attributes.   131    132         self.fragment = None   133         self.preferred = None   134    135         # Message-related attributes.   136    137         self.message_number = None   138         self.parts = None   139    140         # Message- and page-related attributes.   141    142         self.page = None   143    144         # Identification.   145    146         self.path = []   147    148     def unique_id(self):   149         return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path)))   150    151     def __cmp__(self, other):   152         if self.updated is None and other.updated is not None:   153             return 1   154         elif self.updated is not None and other.updated is None:   155             return -1   156         else:   157             return cmp(self.updated, other.updated)   158    159     def copy(self, part_number=None):   160         update = Update()   161         update.title = self.title   162         update.link = self.link   163         update.updated = self.updated   164         update.author = self.author   165         update.fragment = self.fragment   166         update.preferred = self.preferred   167         update.message_number = self.message_number   168         update.page = self.page   169         update.path = self.path[:]   170         if part_number is not None:   171             update.path.append(part_number)   172         return update   173    174 # Error classes.   175    176 class FeedError(Exception):   177     pass   178    179 class FeedMissingError(FeedError):   180     pass   181    182 class FeedContentTypeError(FeedError):   183     pass   184    185 # Update retrieval from URLs.   186    187 def getUpdates(request, feed_url, max_entries, show_content):   188    189     """   190     Using the given 'request', retrieve from 'feed_url' up to the given number   191     'max_entries' of update entries. The 'show_content' parameter can indicate   192     that a "summary" is to be obtained for each update, that the "content" of   193     each update is to be obtained (falling back to a summary if no content is   194     provided), or no content (indicated by a false value) is to be obtained.   195    196     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   197     returned.   198     """   199    200     # Prevent local file access.   201    202     if feed_url.startswith("file:"):   203         raise FeedMissingError   204    205     # Obtain the resource, using a cached version if appropriate.   206    207     max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))   208     data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age)   209     if not data:   210         raise FeedMissingError   211    212     # Interpret the cached feed.   213    214     f = StringIO(data)   215     try:   216         _url, content_type, _encoding, _metadata = getCachedResourceMetadata(f)   217    218         if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"):   219             return getUpdatesFromFeed(f, max_entries, show_content)   220         else:   221             raise FeedContentTypeError   222    223     finally:   224         f.close()   225    226 # Update retrieval from feeds.   227    228 def getUpdatesFromFeed(feed, max_entries, show_content):   229    230     """   231     Retrieve from 'feed' up to the given number 'max_entries' of update entries.   232     The 'show_content' parameter can indicate that a "summary" is to be obtained   233     for each update, that the "content" of each update is to be obtained   234     (falling back to a summary if no content is provided), or no content   235     (indicated by a false value) is to be obtained.   236    237     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   238     returned.   239     """   240    241     feed_updates = []   242    243     try:   244         # Parse each node from the feed.   245    246         channel_title = channel_link = None   247    248         feed_type = None   249         update = None   250         in_source = False   251    252         events = xml.dom.pulldom.parse(feed)   253    254         for event, value in events:   255    256             if not in_source and event == xml.dom.pulldom.START_ELEMENT:   257                 tagname = value.localName   258    259                 # Detect the feed type and items.   260    261                 if tagname == "feed" and value.namespaceURI == ATOM_NS:   262                     feed_type = "atom"   263    264                 elif tagname == "rss":   265                     feed_type = "rss"   266    267                 # Detect items.   268    269                 elif feed_type == "rss" and tagname == "item" or \   270                     feed_type == "atom" and tagname == "entry":   271    272                     update = Update()   273    274                 # Detect source declarations.   275    276                 elif feed_type == "atom" and tagname == "source":   277                     in_source = True   278    279                 # Handle item elements.   280    281                 elif tagname == "title":   282                     events.expandNode(value)   283                     if update:   284                         update.title = text(value)   285                     else:   286                         channel_title = text(value)   287    288                 elif tagname == "link":   289                     events.expandNode(value)   290                     if update:   291                         update.link = linktext(value, feed_type)   292                     else:   293                         channel_link = linktext(value, feed_type)   294    295                 elif show_content and (   296                     feed_type == "atom" and tagname in ("content", "summary") or   297                     feed_type == "rss" and tagname == "description"):   298    299                     events.expandNode(value)   300    301                     # Obtain content where requested or, failing that, a   302                     # summary.   303    304                     if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):   305                         if feed_type == "atom":   306                             update.content_type = value.getAttribute("type") or "text"   307    308                             # Normalise the content types and extract the   309                             # content.   310    311                             if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):   312                                 update.content = children(value)   313                                 update.content_type = "application/xhtml+xml"   314                             elif update.content_type in ("html", "text/html"):   315                                 update.content = text(value)   316                                 update.content_type = "text/html"   317                             else:   318                                 update.content = text(value)   319                                 update.content_type = "text/plain"   320                         else:   321                             update.content_type = "text/html"   322                             update.content = text(value)   323    324                 elif feed_type == "atom" and tagname == "updated" or \   325                     feed_type == "rss" and tagname == "pubDate":   326    327                     events.expandNode(value)   328    329                     if update:   330                         if feed_type == "atom":   331                             value = getDateTimeFromISO8601(text(value))   332                         else:   333                             value = DateTime(parsedate(text(value)))   334                         update.updated = value   335    336             elif event == xml.dom.pulldom.END_ELEMENT:   337                 tagname = value.localName   338    339                 if feed_type == "rss" and tagname == "item" or \   340                     feed_type == "atom" and tagname == "entry":   341    342                     feed_updates.append(update)   343    344                     update = None   345    346                 elif feed_type == "atom" and tagname == "source":   347                     in_source = False   348    349     finally:   350         feed.close()   351    352     return (feed_type, channel_title, channel_link), feed_updates   353    354 # Update retrieval from pages.   355    356 def getUpdatesFromPage(page, request):   357    358     """   359     Get updates from the given 'page' using the 'request'. A list of update   360     objects is returned.   361     """   362    363     updates = []   364    365     # NOTE: Use the updated datetime from the page for updates.   366     # NOTE: The published and updated details would need to be deduced from   367     # NOTE: the page history instead of being taken from the page as a whole.   368    369     metadata = getMetadata(page)   370     updated = getUpdatedTime(metadata)   371    372     # Get the fragment regions for the page.   373    374     for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())):   375    376         update = Update()   377    378         # Produce a fragment identifier.   379         # NOTE: Choose a more robust identifier where none is explicitly given.   380    381         update.fragment = attributes.get("fragment", str(n))   382         update.title = attributes.get("summary", "Update #%d" % n)   383    384         # Get the preferred content types available for the fragment.   385    386         update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format))   387    388         # Try and obtain some suitable content for the entry.   389         # NOTE: Could potentially get a summary for the fragment.   390    391         update.content = None   392    393         if "text/html" in update.preferred:   394             parser_cls = getParserClass(request, format)   395    396             if format == "html":   397                 update.content = body   398             elif hasattr(parser_cls, "formatForOutputType"):   399                 update.content = formatTextForOutputType(body, request, parser_cls, "text/html")   400             else:   401                 fmt = request.html_formatter   402                 fmt.setPage(page)   403                 update.content = formatText(body, request, fmt, parser_cls)   404    405             update.content_type = "text/html"   406    407         update.page = page   408    409         # NOTE: The anchor would be supported in the page, but this requires   410         # NOTE: formatter modifications for the regions providing updates.   411    412         update.link = page.url(request, anchor=update.fragment)   413         update.updated = updated   414    415         updates.append(update)   416    417     return updates   418    419 # Update retrieval from message stores.   420    421 def getUpdatesFromStore(page, request):   422    423     """   424     Get updates from the message store associated with the given 'page' using   425     the 'request'. A list of update objects is returned.   426     """   427    428     updates = []   429    430     metadata = getMetadata(page)   431     updated = getUpdatedTime(metadata)   432    433     store = ItemStore(page, "messages", "message-locks")   434    435     keys = store.keys()   436     keys.sort()   437    438     for key in keys:   439         message_text = store[key]   440         update = getUpdateFromMessageText(message_text, key, request)   441         update.page = page   442         updates.append(update)   443    444     return updates   445    446 def getUpdateFromMessageText(message_text, message_number, request):   447    448     "Return an update for the given 'message_text' and 'message_number'."   449    450     update = Update()   451     message = Parser().parsestr(message_text)   452    453     # Produce a fragment identifier.   454    455     update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date"))   456     update.title = message.get("subject", "Update #%d" % message_number)   457     update.author = message.get("moin-user")   458    459     update.message_number = message_number   460    461     update.content, update.content_type, update.parts, actual_author = \   462         getUpdateContentFromPart(message, request)   463    464     if actual_author:   465         update.author = actual_author   466    467     return update   468    469 def getUpdateContentFromPart(part, request):   470    471     """   472     Return decoded content, the content type, any subparts, and any author   473     identity in a tuple for a given 'part'.   474     """   475    476     # Determine whether the part has several representations.   477    478     # For a single part, use it as the update content.   479    480     if not part.is_multipart():   481         content, content_type = getPartContent(part)   482         return content, content_type, None, None   483    484     # For a collection of related parts, use the first as the update content   485     # and assume that the formatter will reference the other parts.   486    487     elif part.get_content_subtype() == "related":   488         main_part = part.get_payload()[0]   489         content, content_type = getPartContent(main_part)   490         return content, content_type, [main_part], None   491    492     # Encrypted content cannot be meaningfully separated.   493    494     elif part.get_content_subtype() == "encrypted":   495         try:   496             part, author = getDecryptedParts(part, request)   497             content, content_type, parts, _author = getUpdateContentFromPart(part, request)   498             return content, content_type, parts, author   499         except MoinMessageError:   500             return None, part.get_content_type(), part.get_payload(), None   501    502     # Otherwise, just obtain the parts for separate display.   503    504     else:   505         return None, part.get_content_type(), part.get_payload(), None   506    507 def getDecryptedParts(part, request):   508    509     "Decrypt the given 'part', returning the decoded content."   510    511     homedir = get_homedir(request)   512     gpg = GPG(homedir)   513    514     # Decrypt the part.   515    516     if is_encrypted(part):   517         text = gpg.decryptMessage(part)   518         part = Parser().parsestr(text)   519    520     # Extract any signature details.   521    522     if is_signed(part):   523         result = gpg.verifyMessage(part)   524         if result:   525             fingerprint, identity, content = result   526             return content, get_username_for_fingerprint(request, fingerprint)   527    528     return part, None   529    530 def getPartContent(part):   531    532     "Decode the 'part', returning the decoded payload and the content type."   533    534     charset = part.get_content_charset()   535     payload = part.get_payload(decode=True)   536     return (charset and unicode(payload, charset) or payload), part.get_content_type()   537    538 def getUpdateFromPart(parent, part, part_number, request):   539    540     "Using the 'parent' update, return an update object for the given 'part'."   541    542     update = parent.copy(part_number)   543     update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request)   544     return update   545    546 def getUpdatesForFormatting(update, request):   547    548     "Get a list of updates for formatting given 'update'."   549    550     updates = []   551    552     # Handle multipart/alternative and other non-related multiparts.   553    554     if update.parts:   555         for n, part in enumerate(update.parts):   556             update_part = getUpdateFromPart(update, part, n, request)   557             updates += getUpdatesForFormatting(update_part, request)   558     else:   559         updates.append(update)   560    561     return updates   562    563 # Update formatting.   564    565 def getFormattedUpdate(update, request, fmt):   566    567     """   568     Return the formatted form of the given 'update' using the given 'request'   569     and 'fmt'.   570     """   571    572     # NOTE: Some control over the HTML and XHTML should be exercised.   573    574     if update.content:   575         if update.content_type == "text/html" and update.message_number is not None:   576             parsers = [get_make_parser(update.page, update.message_number)]   577         else:   578             parsers = getParsersForContentType(request.cfg, update.content_type)   579    580         if parsers:   581             for parser_cls in parsers:   582                 if hasattr(parser_cls, "formatForOutputType"):   583                     return formatTextForOutputType(update.content, request, parser_cls, "text/html")   584                 else:   585                     return formatText(update.content, request, fmt, parser_cls=parser_cls)   586                 break   587         else:   588             return None   589     else:   590         return None   591    592 def formatUpdate(update, request, fmt):   593    594     "Format the given 'update' using the given 'request' and 'fmt'."   595    596     result = []   597     append = result.append   598    599     updates = getUpdatesForFormatting(update, request)   600     single = len(updates) == 1   601    602     # Format some navigation tabs.   603    604     if not single:   605         append(fmt.div(on=1, css_class="moinshare-alternatives"))   606    607         first = True   608    609         for update_part in updates:   610             append(fmt.url(1, "#%s" % update_part.unique_id()))   611             append(fmt.text(update_part.content_type))   612             append(fmt.url(0))   613    614             first = False   615    616         append(fmt.div(on=0))   617    618     # Format the content.   619    620     first = True   621    622     for update_part in updates:   623    624         # Encapsulate each alternative if many exist.   625    626         if not single:   627             css_class = first and "moinshare-default" or "moinshare-other"   628             append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id()))   629    630         # Include the content.   631    632         append(formatUpdatePart(update_part, request, fmt))   633    634         if not single:   635             append(fmt.div(on=0))   636    637         first = False   638    639     return "".join(result)   640    641 def formatUpdatePart(update, request, fmt):   642    643     "Format the given 'update' using the given 'request' and 'fmt'."   644    645     _ = request.getText   646    647     result = []   648     append = result.append   649    650     # Encapsulate the content.   651    652     append(fmt.div(on=1, css_class="moinshare-content"))   653     text = getFormattedUpdate(update, request, fmt)   654     if text:   655         append(text)   656     else:   657         append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type))   658     append(fmt.div(on=0))   659    660     return "".join(result)   661    662 # Source management.   663    664 def getUpdateSources(pagename, request):   665    666     "Return the update sources from the given 'pagename' using the 'request'."   667    668     sources = {}   669    670     source_definitions = getWikiDict(pagename, request)   671    672     if source_definitions:   673         for name, value in source_definitions.items():   674             sources[name] = getSourceParameters(value)   675    676     return sources   677    678 def getSourceParameters(source_definition):   679    680     "Return the parameters from the given 'source_definition' string."   681    682     return parseDictEntry(source_definition, ("type", "location"))   683    684 # HTML parsing support.   685    686 class IncomingHTMLSanitizer(HTMLSanitizer):   687    688     "An HTML parser that rewrites references to attachments."   689    690     def __init__(self, out, request, page, message_number):   691         HTMLSanitizer.__init__(self, out)   692         self.request = request   693         self.message_number = message_number   694         self.page = page   695    696     def rewrite_reference(self, ref):   697         if ref.startswith("cid:"):   698             part = ref[len("cid:"):]   699             action_link = self.page.url(self.request, {   700                 "action" : "ReadMessage", "doit" : "1",   701                 "message" : self.message_number, "part" : part   702                 })   703             return action_link   704         else:   705             return ref   706    707     def handle_starttag(self, tag, attrs):   708         new_attrs = []   709         for attrname, attrvalue in attrs:   710             if attrname in self.uri_attrs:   711                 new_attrs.append((attrname, self.rewrite_reference(attrvalue)))   712             else:   713                 new_attrs.append((attrname, attrvalue))   714         HTMLSanitizer.handle_starttag(self, tag, new_attrs)   715    716 class IncomingMarkup(Markup):   717    718     "A special markup processor for incoming HTML."   719    720     def sanitize(self, request, page, message_number):   721         out = getwriter("utf-8")(StringIO())   722         sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)   723         sanitizer.feed(self.stripentities(keepxmlentities=True))   724         return IncomingMarkup(unicode(out.getvalue(), "utf-8"))   725    726 class IncomingHTMLParser:   727    728     "Filters and rewrites incoming HTML content."   729    730     def __init__(self, raw, request, **kw):   731         self.raw = raw   732         self.request = request   733         self.message_number = None   734         self.page = None   735    736     def format(self, formatter, **kw):   737    738         "Send the text."   739    740         try:   741             self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))   742         except HTMLParseError, e:   743             self.request.write(formatter.sysmsg(1) +   744                 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,   745                                   self.raw.splitlines()[e.lineno - 1].strip())) +   746                 formatter.sysmsg(0))   747    748 class MakeIncomingHTMLParser:   749    750     "A class that makes parsers configured for messages."   751    752     def __init__(self, page, message_number):   753    754         "Initialise with state that is used to configure instantiated parsers."   755    756         self.message_number = message_number   757         self.page = page   758    759     def __call__(self, *args, **kw):   760         parser = IncomingHTMLParser(*args, **kw)   761         parser.message_number = self.message_number   762         parser.page = self.page   763         return parser   764    765 def get_make_parser(page, message_number):   766    767     """   768     Return a callable that will return a parser configured for the message from   769     the given 'page' with the given 'message_number'.   770     """   771    772     return MakeIncomingHTMLParser(page, message_number)   773    774 # vim: tabstop=4 expandtab shiftwidth=4