MoinShare (file MoinShare.py at 3605eb85d56f)

     1 # -*- coding: iso-8859-1 -*-     2 """     3     MoinMoin - MoinShare library     4      5     @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk>     6     @copyright: 2003-2006 Edgewall Software     7     @copyright: 2006 MoinMoin:AlexanderSchremmer     8     @license: GNU GPL (v2 or later), see COPYING.txt for details.     9 """    10     11 from ContentTypeSupport import getContentPreferences    12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \    13                         getDateTimeFromISO8601, DateTime    14 from MoinSupport import *    15 from MoinRemoteSupport import *    16 from ItemSupport import ItemStore    17 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError    18 from MoinMessageSupport import get_homedir, get_username_for_fingerprint    19 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup    20 from MoinMoin import wikiutil    21 from email.parser import Parser    22 from email.utils import parsedate    23 from codecs import getwriter    24 import xml.dom.pulldom    25     26 try:    27     from cStringIO import StringIO    28 except ImportError:    29     from StringIO import StringIO    30     31 _getFragments = getFragments    32     33 __version__ = "0.1"    34     35 ATOM_NS = "http://www.w3.org/2005/Atom"    36     37 # Utility functions.    38     39 def text(element):    40     nodes = []    41     for node in element.childNodes:    42         if node.nodeType == node.TEXT_NODE:    43             nodes.append(node.nodeValue)    44     return "".join(nodes)    45     46 def children(element):    47     nodes = []    48     for node in element.childNodes:    49         nodes.append(node.toxml())    50     return "".join(nodes)    51     52 def unescape(text):    53     return text.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")    54     55 def linktext(element, feed_type):    56     if feed_type == "rss":    57         return text(element)    58     else:    59         return element.getAttribute("href")    60     61 def need_content(show_content, tagname):    62     return show_content in ("content", "description") and tagname in ("content", "description")    63     64 # More Moin 1.9 compatibility functions.    65     66 def has_member(request, groupname, username):    67     if hasattr(request.dicts, "has_member"):    68         return request.dicts.has_member(groupname, username)    69     else:    70         return username in request.dicts.get(groupname, [])    71     72 # Fragments employ a "moinshare" attribute.    73     74 fragment_attribute = "moinshare"    75     76 def getFragments(s):    77     78     "Return all fragments in 's' having the MoinShare fragment attribute."    79     80     fragments = []    81     for format, attributes, body in _getFragments(s):    82         if attributes.has_key(fragment_attribute):    83             fragments.append((format, attributes, body))    84     return fragments    85     86 def getPreferredOutputTypes(request, mimetypes):    87     88     """    89     Using the 'request', perform content negotiation, obtaining mimetypes common    90     to the fragment (given by 'mimetypes') and the client (found in the Accept    91     header).    92     """    93     94     accept = getHeader(request, "Accept", "HTTP")    95     if accept:    96         prefs = getContentPreferences(accept)    97         return prefs.get_preferred_types(mimetypes)    98     else:    99         return mimetypes   100    101 def getUpdatedTime(metadata):   102    103     """   104     Return the last updated time based on the given 'metadata', using the   105     current time if no explicit last modified time is specified.   106     """   107    108     # NOTE: We could attempt to get the last edit time of a fragment.   109    110     latest_timestamp = metadata.get("last-modified")   111     if latest_timestamp:   112         return latest_timestamp   113     else:   114         return getCurrentTime()   115    116 # Entry/update classes.   117    118 class Update:   119    120     "A feed update entry."   121    122     def __init__(self):   123         self.title = None   124         self.link = None   125         self.content = None   126         self.content_type = None   127         self.updated = None   128         self.author = None   129    130         # Page-related attributes.   131    132         self.fragment = None   133         self.preferred = None   134    135         # Message-related attributes.   136    137         self.message_number = None   138         self.parts = None   139    140         # Message- and page-related attributes.   141    142         self.page = None   143    144         # Identification.   145    146         self.path = []   147    148     def unique_id(self):   149         return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path)))   150    151     def __cmp__(self, other):   152         if self.updated is None and other.updated is not None:   153             return 1   154         elif self.updated is not None and other.updated is None:   155             return -1   156         else:   157             return cmp(self.updated, other.updated)   158    159     def copy(self, part_number=None):   160         update = Update()   161         update.title = self.title   162         update.link = self.link   163         update.updated = self.updated   164         update.author = self.author   165         update.fragment = self.fragment   166         update.preferred = self.preferred   167         update.message_number = self.message_number   168         update.page = self.page   169         update.path = self.path[:]   170         if part_number is not None:   171             update.path.append(part_number)   172         return update   173    174 # Error classes.   175    176 class FeedError(Exception):   177     pass   178    179 class FeedMissingError(FeedError):   180     pass   181    182 class FeedContentTypeError(FeedError):   183     pass   184    185 # Feed retrieval from URLs.   186    187 def getUpdates(request, feed_url, max_entries, show_content):   188    189     """   190     Using the given 'request', retrieve from 'feed_url' up to the given number   191     'max_entries' of update entries. The 'show_content' parameter can indicate   192     that a "summary" is to be obtained for each update, that the "content" of   193     each update is to be obtained (falling back to a summary if no content is   194     provided), or no content (indicated by a false value) is to be obtained.   195    196     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   197     returned.   198     """   199    200     feed_updates = []   201    202     # Obtain the resource, using a cached version if appropriate.   203    204     max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))   205     data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age)   206     if not data:   207         raise FeedMissingError   208    209     # Interpret the cached feed.   210    211     feed = StringIO(data)   212     _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed)   213    214     if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"):   215         raise FeedContentTypeError   216    217     try:   218         # Parse each node from the feed.   219    220         channel_title = channel_link = None   221    222         feed_type = None   223         update = None   224         in_source = False   225    226         events = xml.dom.pulldom.parse(feed)   227    228         for event, value in events:   229    230             if not in_source and event == xml.dom.pulldom.START_ELEMENT:   231                 tagname = value.localName   232    233                 # Detect the feed type and items.   234    235                 if tagname == "feed" and value.namespaceURI == ATOM_NS:   236                     feed_type = "atom"   237    238                 elif tagname == "rss":   239                     feed_type = "rss"   240    241                 # Detect items.   242    243                 elif feed_type == "rss" and tagname == "item" or \   244                     feed_type == "atom" and tagname == "entry":   245    246                     update = Update()   247    248                 # Detect source declarations.   249    250                 elif feed_type == "atom" and tagname == "source":   251                     in_source = True   252    253                 # Handle item elements.   254    255                 elif tagname == "title":   256                     events.expandNode(value)   257                     if update:   258                         update.title = text(value)   259                     else:   260                         channel_title = text(value)   261    262                 elif tagname == "link":   263                     events.expandNode(value)   264                     if update:   265                         update.link = linktext(value, feed_type)   266                     else:   267                         channel_link = linktext(value, feed_type)   268    269                 elif show_content and (   270                     feed_type == "atom" and tagname in ("content", "summary") or   271                     feed_type == "rss" and tagname == "description"):   272    273                     events.expandNode(value)   274    275                     # Obtain content where requested or, failing that, a   276                     # summary.   277    278                     if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):   279                         if feed_type == "atom":   280                             update.content_type = value.getAttribute("type") or "text"   281    282                             # Normalise the content types and extract the   283                             # content.   284    285                             if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):   286                                 update.content = children(value)   287                                 update.content_type = "application/xhtml+xml"   288                             elif update.content_type in ("html", "text/html"):   289                                 update.content = text(value)   290                                 update.content_type = "text/html"   291                             else:   292                                 update.content = text(value)   293                                 update.content_type = "text/plain"   294                         else:   295                             update.content_type = "text/html"   296                             update.content = text(value)   297    298                 elif feed_type == "atom" and tagname == "updated" or \   299                     feed_type == "rss" and tagname == "pubDate":   300    301                     events.expandNode(value)   302    303                     if update:   304                         if feed_type == "atom":   305                             value = getDateTimeFromISO8601(text(value))   306                         else:   307                             value = DateTime(parsedate(text(value)))   308                         update.updated = value   309    310             elif event == xml.dom.pulldom.END_ELEMENT:   311                 tagname = value.localName   312    313                 if feed_type == "rss" and tagname == "item" or \   314                     feed_type == "atom" and tagname == "entry":   315    316                     feed_updates.append(update)   317    318                     update = None   319    320                 elif feed_type == "atom" and tagname == "source":   321                     in_source = False   322    323     finally:   324         feed.close()   325    326     return (feed_type, channel_title, channel_link), feed_updates   327    328 # Update retrieval from pages.   329    330 def getUpdatesFromPage(page, request):   331    332     """   333     Get updates from the given 'page' using the 'request'. A list of update   334     objects is returned.   335     """   336    337     updates = []   338    339     # NOTE: Use the updated datetime from the page for updates.   340     # NOTE: The published and updated details would need to be deduced from   341     # NOTE: the page history instead of being taken from the page as a whole.   342    343     metadata = getMetadata(page)   344     updated = getUpdatedTime(metadata)   345    346     # Get the fragment regions for the page.   347    348     for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())):   349    350         update = Update()   351    352         # Produce a fragment identifier.   353         # NOTE: Choose a more robust identifier where none is explicitly given.   354    355         update.fragment = attributes.get("fragment", str(n))   356         update.title = attributes.get("summary", "Update #%d" % n)   357    358         # Get the preferred content types available for the fragment.   359    360         update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format))   361    362         # Try and obtain some suitable content for the entry.   363         # NOTE: Could potentially get a summary for the fragment.   364    365         update.content = None   366    367         if "text/html" in update.preferred:   368             parser_cls = getParserClass(request, format)   369    370             if format == "html":   371                 update.content = body   372             elif hasattr(parser_cls, "formatForOutputType"):   373                 update.content = formatTextForOutputType(body, request, parser_cls, "text/html")   374             else:   375                 fmt = request.html_formatter   376                 fmt.setPage(page)   377                 update.content = formatText(body, request, fmt, parser_cls)   378    379             update.content_type = "text/html"   380    381         update.page = page   382    383         # NOTE: The anchor would be supported in the page, but this requires   384         # NOTE: formatter modifications for the regions providing updates.   385    386         update.link = page.url(request, anchor=update.fragment)   387         update.updated = updated   388    389         updates.append(update)   390    391     return updates   392    393 # Update retrieval from message stores.   394    395 def getUpdatesFromStore(page, request):   396    397     """   398     Get updates from the message store associated with the given 'page' using   399     the 'request'. A list of update objects is returned.   400     """   401    402     updates = []   403    404     metadata = getMetadata(page)   405     updated = getUpdatedTime(metadata)   406    407     store = ItemStore(page, "messages", "message-locks")   408    409     keys = store.keys()   410     keys.sort()   411    412     for key in keys:   413         message_text = store[key]   414         update = getUpdateFromMessageText(message_text, key, request)   415         update.page = page   416         updates.append(update)   417    418     return updates   419    420 def getUpdateFromMessageText(message_text, message_number, request):   421    422     "Return an update for the given 'message_text' and 'message_number'."   423    424     update = Update()   425     message = Parser().parsestr(message_text)   426    427     # Produce a fragment identifier.   428    429     update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date"))   430     update.title = message.get("subject", "Update #%d" % message_number)   431     update.author = message.get("moin-user")   432    433     update.message_number = message_number   434    435     update.content, update.content_type, update.parts, actual_author = \   436         getUpdateContentFromPart(message, request)   437    438     if actual_author:   439         update.author = actual_author   440    441     return update   442    443 def getUpdateContentFromPart(part, request):   444    445     """   446     Return decoded content, the content type, any subparts, and any author   447     identity in a tuple for a given 'part'.   448     """   449    450     # Determine whether the part has several representations.   451    452     # For a single part, use it as the update content.   453    454     if not part.is_multipart():   455         content, content_type = getPartContent(part)   456         return content, content_type, None, None   457    458     # For a collection of related parts, use the first as the update content   459     # and assume that the formatter will reference the other parts.   460    461     elif part.get_content_subtype() == "related":   462         main_part = part.get_payload()[0]   463         content, content_type = getPartContent(main_part)   464         return content, content_type, [main_part], None   465    466     # Encrypted content cannot be meaningfully separated.   467    468     elif part.get_content_subtype() == "encrypted":   469         try:   470             part, author = getDecryptedParts(part, request)   471             content, content_type, parts, _author = getUpdateContentFromPart(part, request)   472             return content, content_type, parts, author   473         except MoinMessageError:   474             return None, part.get_content_type(), part.get_payload(), None   475    476     # Otherwise, just obtain the parts for separate display.   477    478     else:   479         return None, part.get_content_type(), part.get_payload(), None   480    481 def getDecryptedParts(part, request):   482    483     "Decrypt the given 'part', returning the decoded content."   484    485     homedir = get_homedir(request)   486     gpg = GPG(homedir)   487    488     # Decrypt the part.   489    490     if is_encrypted(part):   491         text = gpg.decryptMessage(part)   492         part = Parser().parsestr(text)   493    494     # Extract any signature details.   495    496     if is_signed(part):   497         result = gpg.verifyMessage(part)   498         if result:   499             fingerprint, identity, content = result   500             return content, get_username_for_fingerprint(request, fingerprint)   501    502     return part, None   503    504 def getPartContent(part):   505    506     "Decode the 'part', returning the decoded payload and the content type."   507    508     charset = part.get_content_charset()   509     payload = part.get_payload(decode=True)   510     return (charset and unicode(payload, charset) or payload), part.get_content_type()   511    512 def getUpdateFromPart(parent, part, part_number, request):   513    514     "Using the 'parent' update, return an update object for the given 'part'."   515    516     update = parent.copy(part_number)   517     update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request)   518     return update   519    520 def getUpdatesForFormatting(update, request):   521    522     "Get a list of updates for formatting given 'update'."   523    524     updates = []   525    526     # Handle multipart/alternative and other non-related multiparts.   527    528     if update.parts:   529         for n, part in enumerate(update.parts):   530             update_part = getUpdateFromPart(update, part, n, request)   531             updates += getUpdatesForFormatting(update_part, request)   532     else:   533         updates.append(update)   534    535     return updates   536    537 # Update formatting.   538    539 def getFormattedUpdate(update, request, fmt):   540    541     """   542     Return the formatted form of the given 'update' using the given 'request'   543     and 'fmt'.   544     """   545    546     # NOTE: Some control over the HTML and XHTML should be exercised.   547    548     if update.content:   549         if update.content_type == "text/html" and update.message_number is not None:   550             parsers = [get_make_parser(update.page, update.message_number)]   551         else:   552             parsers = getParsersForContentType(request.cfg, update.content_type)   553    554         if parsers:   555             for parser_cls in parsers:   556                 if hasattr(parser_cls, "formatForOutputType"):   557                     return formatTextForOutputType(update.content, request, parser_cls, "text/html")   558                 else:   559                     return formatText(update.content, request, fmt, parser_cls=parser_cls)   560                 break   561         else:   562             return None   563     else:   564         return None   565    566 def formatUpdate(update, request, fmt):   567    568     "Format the given 'update' using the given 'request' and 'fmt'."   569    570     result = []   571     append = result.append   572    573     updates = getUpdatesForFormatting(update, request)   574     single = len(updates) == 1   575    576     # Format some navigation tabs.   577    578     if not single:   579         append(fmt.div(on=1, css_class="moinshare-alternatives"))   580    581         first = True   582    583         for update_part in updates:   584             append(fmt.url(1, "#%s" % update_part.unique_id()))   585             append(fmt.text(update_part.content_type))   586             append(fmt.url(0))   587    588             first = False   589    590         append(fmt.div(on=0))   591    592     # Format the content.   593    594     first = True   595    596     for update_part in updates:   597    598         # Encapsulate each alternative if many exist.   599    600         if not single:   601             css_class = first and "moinshare-default" or "moinshare-other"   602             append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id()))   603    604         # Include the content.   605    606         append(formatUpdatePart(update_part, request, fmt))   607    608         if not single:   609             append(fmt.div(on=0))   610    611         first = False   612    613     return "".join(result)   614    615 def formatUpdatePart(update, request, fmt):   616    617     "Format the given 'update' using the given 'request' and 'fmt'."   618    619     _ = request.getText   620    621     result = []   622     append = result.append   623    624     # Encapsulate the content.   625    626     append(fmt.div(on=1, css_class="moinshare-content"))   627     text = getFormattedUpdate(update, request, fmt)   628     if text:   629         append(text)   630     else:   631         append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type))   632     append(fmt.div(on=0))   633    634     return "".join(result)   635    636 # Source management.   637    638 def getUpdateSources(pagename, request):   639    640     "Return the update sources from the given 'pagename' using the 'request'."   641    642     sources = {}   643    644     source_definitions = getWikiDict(pagename, request)   645    646     if source_definitions:   647         for name, value in source_definitions.items():   648             sources[name] = getSourceParameters(value)   649    650     return sources   651    652 def getSourceParameters(source_definition):   653    654     "Return the parameters from the given 'source_definition' string."   655    656     return parseDictEntry(source_definition, ("type", "location"))   657    658 # HTML parsing support.   659    660 class IncomingHTMLSanitizer(HTMLSanitizer):   661    662     "An HTML parser that rewrites references to attachments."   663    664     def __init__(self, out, request, page, message_number):   665         HTMLSanitizer.__init__(self, out)   666         self.request = request   667         self.message_number = message_number   668         self.page = page   669    670     def rewrite_reference(self, ref):   671         if ref.startswith("cid:"):   672             part = ref[len("cid:"):]   673             action_link = self.page.url(self.request, {   674                 "action" : "ReadMessage", "doit" : "1",   675                 "message" : self.message_number, "part" : part   676                 })   677             return action_link   678         else:   679             return ref   680    681     def handle_starttag(self, tag, attrs):   682         new_attrs = []   683         for attrname, attrvalue in attrs:   684             if attrname in self.uri_attrs:   685                 new_attrs.append((attrname, self.rewrite_reference(attrvalue)))   686             else:   687                 new_attrs.append((attrname, attrvalue))   688         HTMLSanitizer.handle_starttag(self, tag, new_attrs)   689    690 class IncomingMarkup(Markup):   691    692     "A special markup processor for incoming HTML."   693    694     def sanitize(self, request, page, message_number):   695         out = getwriter("utf-8")(StringIO())   696         sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)   697         sanitizer.feed(self.stripentities(keepxmlentities=True))   698         return IncomingMarkup(unicode(out.getvalue(), "utf-8"))   699    700 class IncomingHTMLParser:   701    702     "Filters and rewrites incoming HTML content."   703    704     def __init__(self, raw, request, **kw):   705         self.raw = raw   706         self.request = request   707         self.message_number = None   708         self.page = None   709    710     def format(self, formatter, **kw):   711    712         "Send the text."   713    714         try:   715             self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))   716         except HTMLParseError, e:   717             self.request.write(formatter.sysmsg(1) +   718                 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,   719                                   self.raw.splitlines()[e.lineno - 1].strip())) +   720                 formatter.sysmsg(0))   721    722 class MakeIncomingHTMLParser:   723    724     "A class that makes parsers configured for messages."   725    726     def __init__(self, page, message_number):   727    728         "Initialise with state that is used to configure instantiated parsers."   729    730         self.message_number = message_number   731         self.page = page   732    733     def __call__(self, *args, **kw):   734         parser = IncomingHTMLParser(*args, **kw)   735         parser.message_number = self.message_number   736         parser.page = self.page   737         return parser   738    739 def get_make_parser(page, message_number):   740    741     """   742     Return a callable that will return a parser configured for the message from   743     the given 'page' with the given 'message_number'.   744     """   745    746     return MakeIncomingHTMLParser(page, message_number)   747    748 # vim: tabstop=4 expandtab shiftwidth=4