MoinShare (file MoinShare.py at 006442452766)

     1 # -*- coding: iso-8859-1 -*-     2 """     3     MoinMoin - MoinShare library     4      5     @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk>     6     @copyright: 2003-2006 Edgewall Software     7     @copyright: 2006 MoinMoin:AlexanderSchremmer     8     @license: GNU GPL (v2 or later), see COPYING.txt for details.     9 """    10     11 from ContentTypeSupport import getContentPreferences    12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \    13                         getDateTimeFromISO8601, DateTime    14 from MoinSupport import *    15 from MoinRemoteSupport import *    16 from ItemSupport import ItemStore    17 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError    18 from MoinMessageSupport import get_homedir, get_username_for_fingerprint    19 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup    20 from MoinMoin import wikiutil    21 from email.parser import Parser    22 from email.utils import parsedate    23 from codecs import getwriter    24 import xml.dom.pulldom    25     26 try:    27     from cStringIO import StringIO    28 except ImportError:    29     from StringIO import StringIO    30     31 _getFragments = getFragments    32     33 __version__ = "0.1"    34     35 ATOM_NS = "http://www.w3.org/2005/Atom"    36     37 # Utility functions.    38     39 def text(element):    40     nodes = []    41     for node in element.childNodes:    42         if node.nodeType == node.TEXT_NODE:    43             nodes.append(node.nodeValue)    44     return "".join(nodes)    45     46 def children(element):    47     nodes = []    48     for node in element.childNodes:    49         nodes.append(node.toxml())    50     return "".join(nodes)    51     52 def unescape(text):    53     return text.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")    54     55 def linktext(element, feed_type):    56     if feed_type == "rss":    57         return text(element)    58     else:    59         return element.getAttribute("href")    60     61 def need_content(show_content, tagname):    62     return show_content in ("content", "description") and tagname in ("content", "description")    63     64 # More Moin 1.9 compatibility functions.    65     66 def has_member(request, groupname, username):    67     if hasattr(request.dicts, "has_member"):    68         return request.dicts.has_member(groupname, username)    69     else:    70         return username in request.dicts.get(groupname, [])    71     72 # Fragments employ a "moinshare" attribute.    73     74 fragment_attribute = "moinshare"    75     76 def getFragments(s):    77     78     "Return all fragments in 's' having the MoinShare fragment attribute."    79     80     fragments = []    81     for format, attributes, body in _getFragments(s):    82         if attributes.has_key(fragment_attribute):    83             fragments.append((format, attributes, body))    84     return fragments    85     86 def getPreferredOutputTypes(request, mimetypes):    87     88     """    89     Using the 'request', perform content negotiation, obtaining mimetypes common    90     to the fragment (given by 'mimetypes') and the client (found in the Accept    91     header).    92     """    93     94     accept = getHeader(request, "Accept", "HTTP")    95     if accept:    96         prefs = getContentPreferences(accept)    97         return prefs.get_preferred_types(mimetypes)    98     else:    99         return mimetypes   100    101 def getUpdatedTime(metadata):   102    103     """   104     Return the last updated time based on the given 'metadata', using the   105     current time if no explicit last modified time is specified.   106     """   107    108     # NOTE: We could attempt to get the last edit time of a fragment.   109    110     latest_timestamp = metadata.get("last-modified")   111     if latest_timestamp:   112         return latest_timestamp   113     else:   114         return getCurrentTime()   115    116 # Entry/update classes.   117    118 class Update:   119    120     "A feed update entry."   121    122     def __init__(self):   123         self.title = None   124         self.link = None   125         self.content = None   126         self.content_type = None   127         self.updated = None   128         self.author = None   129    130         # Page-related attributes.   131    132         self.fragment = None   133         self.preferred = None   134    135         # Message-related attributes.   136    137         self.message_number = None   138         self.parts = None   139    140         # Message- and page-related attributes.   141    142         self.page = None   143    144         # Identification.   145    146         self.path = []   147    148     def unique_id(self):   149         return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path)))   150    151     def __cmp__(self, other):   152         if self.updated is None and other.updated is not None:   153             return 1   154         elif self.updated is not None and other.updated is None:   155             return -1   156         else:   157             return cmp(self.updated, other.updated)   158    159     def copy(self, part_number=None):   160         update = Update()   161         update.title = self.title   162         update.link = self.link   163         update.updated = self.updated   164         update.author = self.author   165         update.fragment = self.fragment   166         update.preferred = self.preferred   167         update.message_number = self.message_number   168         update.page = self.page   169         update.path = self.path[:]   170         if part_number is not None:   171             update.path.append(part_number)   172         return update   173    174 # Error classes.   175    176 class FeedError(Exception):   177     pass   178    179 class FeedMissingError(FeedError):   180     pass   181    182 class FeedContentTypeError(FeedError):   183     pass   184    185 # Update retrieval from URLs.   186    187 def getUpdates(request, feed_url, max_entries, show_content):   188    189     """   190     Using the given 'request', retrieve from 'feed_url' up to the given number   191     'max_entries' of update entries. The 'show_content' parameter can indicate   192     that a "summary" is to be obtained for each update, that the "content" of   193     each update is to be obtained (falling back to a summary if no content is   194     provided), or no content (indicated by a false value) is to be obtained.   195    196     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   197     returned.   198     """   199    200     # Prevent local file access.   201    202     if feed_url.startswith("file:"):   203         raise FeedMissingError   204    205     elif feed_url.startswith("imap"):   206         reader = imapreader   207    208     else:   209         reader = None   210    211     # Obtain the resource, using a cached version if appropriate.   212    213     max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))   214     data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader)   215     if not data:   216         raise FeedMissingError   217    218     # Interpret the cached feed.   219    220     f = StringIO(data)   221     try:   222         _url, content_type, _encoding, _metadata = getCachedResourceMetadata(f)   223    224         if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"):   225             return getUpdatesFromFeed(f, max_entries, show_content)   226    227         elif content_type == "multipart/mixed":   228             return getUpdatesFromMailbox(f, max_entries, show_content, request)   229    230         else:   231             raise FeedContentTypeError   232    233     finally:   234         f.close()   235    236 # Update retrieval from feeds.   237    238 def getUpdatesFromFeed(feed, max_entries, show_content):   239    240     """   241     Retrieve from 'feed' up to the given number 'max_entries' of update entries.   242     The 'show_content' parameter can indicate that a "summary" is to be obtained   243     for each update, that the "content" of each update is to be obtained   244     (falling back to a summary if no content is provided), or no content   245     (indicated by a false value) is to be obtained.   246    247     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   248     returned.   249     """   250    251     feed_updates = []   252    253     # Parse each node from the feed.   254    255     channel_title = channel_link = None   256    257     feed_type = None   258     update = None   259     in_source = False   260    261     events = xml.dom.pulldom.parse(feed)   262    263     for event, value in events:   264    265         if not in_source and event == xml.dom.pulldom.START_ELEMENT:   266             tagname = value.localName   267    268             # Detect the feed type and items.   269    270             if tagname == "feed" and value.namespaceURI == ATOM_NS:   271                 feed_type = "atom"   272    273             elif tagname == "rss":   274                 feed_type = "rss"   275    276             # Detect items.   277    278             elif feed_type == "rss" and tagname == "item" or \   279                 feed_type == "atom" and tagname == "entry":   280    281                 update = Update()   282    283             # Detect source declarations.   284    285             elif feed_type == "atom" and tagname == "source":   286                 in_source = True   287    288             # Handle item elements.   289    290             elif tagname == "title":   291                 events.expandNode(value)   292                 if update:   293                     update.title = text(value)   294                 else:   295                     channel_title = text(value)   296    297             elif tagname == "link":   298                 events.expandNode(value)   299                 if update:   300                     update.link = linktext(value, feed_type)   301                 else:   302                     channel_link = linktext(value, feed_type)   303    304             elif show_content and (   305                 feed_type == "atom" and tagname in ("content", "summary") or   306                 feed_type == "rss" and tagname == "description"):   307    308                 events.expandNode(value)   309    310                 # Obtain content where requested or, failing that, a   311                 # summary.   312    313                 if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):   314                     if feed_type == "atom":   315                         update.content_type = value.getAttribute("type") or "text"   316    317                         # Normalise the content types and extract the   318                         # content.   319    320                         if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):   321                             update.content = children(value)   322                             update.content_type = "application/xhtml+xml"   323                         elif update.content_type in ("html", "text/html"):   324                             update.content = text(value)   325                             update.content_type = "text/html"   326                         else:   327                             update.content = text(value)   328                             update.content_type = "text/plain"   329                     else:   330                         update.content_type = "text/html"   331                         update.content = text(value)   332    333             elif feed_type == "atom" and tagname == "updated" or \   334                 feed_type == "rss" and tagname == "pubDate":   335    336                 events.expandNode(value)   337    338                 if update:   339                     if feed_type == "atom":   340                         value = getDateTimeFromISO8601(text(value))   341                     else:   342                         value = DateTime(parsedate(text(value)))   343                     update.updated = value   344    345         elif event == xml.dom.pulldom.END_ELEMENT:   346             tagname = value.localName   347    348             if feed_type == "rss" and tagname == "item" or \   349                 feed_type == "atom" and tagname == "entry":   350    351                 feed_updates.append(update)   352    353                 update = None   354    355             elif feed_type == "atom" and tagname == "source":   356                 in_source = False   357    358     return (feed_type, channel_title, channel_link), feed_updates   359    360 # Update retrieval from mailboxes and multipart messages.   361    362 def getUpdatesFromMailbox(feed, max_entries, show_content, request):   363    364     """   365     Retrieve from 'feed' up to the given number 'max_entries' of update entries.   366     The 'show_content' parameter can indicate that a "summary" is to be obtained   367     for each update, that the "content" of each update is to be obtained   368     (falling back to a summary if no content is provided), or no content   369     (indicated by a false value) is to be obtained.   370    371     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   372     returned.   373     """   374    375     mailbox = Parser().parse(feed)   376    377     feed_updates = []   378    379     # Parse each message from the feed as a separate update.   380    381     for message_number, part in enumerate(mailbox.get_payload()):   382         update = Update()   383         update.fragment = update.updated = getDateTimeFromRFC2822(part.get("date"))   384         update.title = part.get("subject", "Update #%d" % message_number)   385         update.message_number = message_number   386    387         update.content, update.content_type, update.parts, actual_author = \   388             getUpdateContentFromPart(part, request)   389    390         if actual_author:   391             update.author = actual_author   392    393         feed_updates.append(update)   394    395     return ("mbox", None, None), feed_updates   396    397 # Update retrieval from pages.   398    399 def getUpdatesFromPage(page, request):   400    401     """   402     Get updates from the given 'page' using the 'request'. A list of update   403     objects is returned.   404     """   405    406     updates = []   407    408     # NOTE: Use the updated datetime from the page for updates.   409     # NOTE: The published and updated details would need to be deduced from   410     # NOTE: the page history instead of being taken from the page as a whole.   411    412     metadata = getMetadata(page)   413     updated = getUpdatedTime(metadata)   414    415     # Get the fragment regions for the page.   416    417     for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())):   418    419         update = Update()   420    421         # Produce a fragment identifier.   422         # NOTE: Choose a more robust identifier where none is explicitly given.   423    424         update.fragment = attributes.get("fragment", str(n))   425         update.title = attributes.get("summary", "Update #%d" % n)   426    427         # Get the preferred content types available for the fragment.   428    429         update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format))   430    431         # Try and obtain some suitable content for the entry.   432         # NOTE: Could potentially get a summary for the fragment.   433    434         update.content = None   435    436         if "text/html" in update.preferred:   437             parser_cls = getParserClass(request, format)   438    439             if format == "html":   440                 update.content = body   441             elif hasattr(parser_cls, "formatForOutputType"):   442                 update.content = formatTextForOutputType(body, request, parser_cls, "text/html")   443             else:   444                 fmt = request.html_formatter   445                 fmt.setPage(page)   446                 update.content = formatText(body, request, fmt, parser_cls)   447    448             update.content_type = "text/html"   449    450         update.page = page   451    452         # NOTE: The anchor would be supported in the page, but this requires   453         # NOTE: formatter modifications for the regions providing updates.   454    455         update.link = page.url(request, anchor=update.fragment)   456         update.updated = updated   457    458         updates.append(update)   459    460     return updates   461    462 # Update retrieval from message stores.   463    464 def getUpdatesFromStore(page, request):   465    466     """   467     Get updates from the message store associated with the given 'page' using   468     the 'request'. A list of update objects is returned.   469     """   470    471     updates = []   472    473     metadata = getMetadata(page)   474     updated = getUpdatedTime(metadata)   475    476     store = ItemStore(page, "messages", "message-locks")   477    478     keys = store.keys()   479     keys.sort()   480    481     for key in keys:   482         message_text = store[key]   483         update = getUpdateFromMessageText(message_text, key, request)   484         update.page = page   485         updates.append(update)   486    487     return updates   488    489 def getUpdateFromMessageText(message_text, message_number, request):   490    491     "Return an update for the given 'message_text' and 'message_number'."   492    493     update = Update()   494     message = Parser().parsestr(message_text)   495    496     # Produce a fragment identifier.   497    498     update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date"))   499     update.title = message.get("subject", "Update #%d" % message_number)   500     update.author = message.get("moin-user")   501    502     update.message_number = message_number   503    504     update.content, update.content_type, update.parts, actual_author = \   505         getUpdateContentFromPart(message, request)   506    507     if actual_author:   508         update.author = actual_author   509    510     return update   511    512 def getUpdateContentFromPart(part, request):   513    514     """   515     Return decoded content, the content type, any subparts, and any author   516     identity in a tuple for a given 'part'.   517     """   518    519     # Determine whether the part has several representations.   520    521     # For a single part, use it as the update content.   522    523     if not part.is_multipart():   524         content, content_type = getPartContent(part)   525         return content, content_type, None, None   526    527     # For a collection of related parts, use the first as the update content   528     # and assume that the formatter will reference the other parts.   529    530     elif part.get_content_subtype() == "related":   531         main_part = part.get_payload()[0]   532         content, content_type = getPartContent(main_part)   533         return content, content_type, [main_part], None   534    535     # Encrypted content cannot be meaningfully separated.   536    537     elif part.get_content_subtype() == "encrypted":   538         try:   539             part, author = getDecryptedParts(part, request)   540             content, content_type, parts, _author = getUpdateContentFromPart(part, request)   541             return content, content_type, parts, author   542         except MoinMessageError:   543             return None, part.get_content_type(), part.get_payload(), None   544    545     # Otherwise, just obtain the parts for separate display.   546    547     else:   548         return None, part.get_content_type(), part.get_payload(), None   549    550 def getDecryptedParts(part, request):   551    552     "Decrypt the given 'part', returning the decoded content."   553    554     homedir = get_homedir(request)   555     gpg = GPG(homedir)   556    557     # Decrypt the part.   558    559     if is_encrypted(part):   560         text = gpg.decryptMessage(part)   561         part = Parser().parsestr(text)   562    563     # Extract any signature details.   564    565     if is_signed(part):   566         result = gpg.verifyMessage(part)   567         if result:   568             fingerprint, identity, content = result   569             return content, get_username_for_fingerprint(request, fingerprint)   570    571     return part, None   572    573 def getPartContent(part):   574    575     "Decode the 'part', returning the decoded payload and the content type."   576    577     charset = part.get_content_charset()   578     payload = part.get_payload(decode=True)   579     return (charset and unicode(payload, charset) or payload), part.get_content_type()   580    581 def getUpdateFromPart(parent, part, part_number, request):   582    583     "Using the 'parent' update, return an update object for the given 'part'."   584    585     update = parent.copy(part_number)   586     update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request)   587     return update   588    589 def getUpdatesForFormatting(update, request):   590    591     "Get a list of updates for formatting given 'update'."   592    593     updates = []   594    595     # Handle multipart/alternative and other non-related multiparts.   596    597     if update.parts:   598         for n, part in enumerate(update.parts):   599             update_part = getUpdateFromPart(update, part, n, request)   600             updates += getUpdatesForFormatting(update_part, request)   601     else:   602         updates.append(update)   603    604     return updates   605    606 # Update formatting.   607    608 def getFormattedUpdate(update, request, fmt):   609    610     """   611     Return the formatted form of the given 'update' using the given 'request'   612     and 'fmt'.   613     """   614    615     # NOTE: Some control over the HTML and XHTML should be exercised.   616    617     if update.content:   618         if update.content_type == "text/html" and update.message_number is not None:   619             parsers = [get_make_parser(update.page, update.message_number)]   620         else:   621             parsers = getParsersForContentType(request.cfg, update.content_type)   622    623         if parsers:   624             for parser_cls in parsers:   625                 if hasattr(parser_cls, "formatForOutputType"):   626                     return formatTextForOutputType(update.content, request, parser_cls, "text/html")   627                 else:   628                     return formatText(update.content, request, fmt, parser_cls=parser_cls)   629                 break   630         else:   631             return None   632     else:   633         return None   634    635 def formatUpdate(update, request, fmt):   636    637     "Format the given 'update' using the given 'request' and 'fmt'."   638    639     result = []   640     append = result.append   641    642     updates = getUpdatesForFormatting(update, request)   643     single = len(updates) == 1   644    645     # Format some navigation tabs.   646    647     if not single:   648         append(fmt.div(on=1, css_class="moinshare-alternatives"))   649    650         first = True   651    652         for update_part in updates:   653             append(fmt.url(1, "#%s" % update_part.unique_id()))   654             append(fmt.text(update_part.content_type))   655             append(fmt.url(0))   656    657             first = False   658    659         append(fmt.div(on=0))   660    661     # Format the content.   662    663     first = True   664    665     for update_part in updates:   666    667         # Encapsulate each alternative if many exist.   668    669         if not single:   670             css_class = first and "moinshare-default" or "moinshare-other"   671             append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id()))   672    673         # Include the content.   674    675         append(formatUpdatePart(update_part, request, fmt))   676    677         if not single:   678             append(fmt.div(on=0))   679    680         first = False   681    682     return "".join(result)   683    684 def formatUpdatePart(update, request, fmt):   685    686     "Format the given 'update' using the given 'request' and 'fmt'."   687    688     _ = request.getText   689    690     result = []   691     append = result.append   692    693     # Encapsulate the content.   694    695     append(fmt.div(on=1, css_class="moinshare-content"))   696     text = getFormattedUpdate(update, request, fmt)   697     if text:   698         append(text)   699     else:   700         append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type))   701     append(fmt.div(on=0))   702    703     return "".join(result)   704    705 # Source management.   706    707 def getUpdateSources(pagename, request):   708    709     "Return the update sources from the given 'pagename' using the 'request'."   710    711     sources = {}   712    713     source_definitions = getWikiDict(pagename, request)   714    715     if source_definitions:   716         for name, value in source_definitions.items():   717             sources[name] = getSourceParameters(value)   718    719     return sources   720    721 def getSourceParameters(source_definition):   722    723     "Return the parameters from the given 'source_definition' string."   724    725     return parseDictEntry(source_definition, ("type", "location"))   726    727 # HTML parsing support.   728    729 class IncomingHTMLSanitizer(HTMLSanitizer):   730    731     "An HTML parser that rewrites references to attachments."   732    733     def __init__(self, out, request, page, message_number):   734         HTMLSanitizer.__init__(self, out)   735         self.request = request   736         self.message_number = message_number   737         self.page = page   738    739     def rewrite_reference(self, ref):   740         if ref.startswith("cid:"):   741             part = ref[len("cid:"):]   742             action_link = self.page.url(self.request, {   743                 "action" : "ReadMessage", "doit" : "1",   744                 "message" : self.message_number, "part" : part   745                 })   746             return action_link   747         else:   748             return ref   749    750     def handle_starttag(self, tag, attrs):   751         new_attrs = []   752         for attrname, attrvalue in attrs:   753             if attrname in self.uri_attrs:   754                 new_attrs.append((attrname, self.rewrite_reference(attrvalue)))   755             else:   756                 new_attrs.append((attrname, attrvalue))   757         HTMLSanitizer.handle_starttag(self, tag, new_attrs)   758    759 class IncomingMarkup(Markup):   760    761     "A special markup processor for incoming HTML."   762    763     def sanitize(self, request, page, message_number):   764         out = getwriter("utf-8")(StringIO())   765         sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)   766         sanitizer.feed(self.stripentities(keepxmlentities=True))   767         return IncomingMarkup(unicode(out.getvalue(), "utf-8"))   768    769 class IncomingHTMLParser:   770    771     "Filters and rewrites incoming HTML content."   772    773     def __init__(self, raw, request, **kw):   774         self.raw = raw   775         self.request = request   776         self.message_number = None   777         self.page = None   778    779     def format(self, formatter, **kw):   780    781         "Send the text."   782    783         try:   784             self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))   785         except HTMLParseError, e:   786             self.request.write(formatter.sysmsg(1) +   787                 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,   788                                   self.raw.splitlines()[e.lineno - 1].strip())) +   789                 formatter.sysmsg(0))   790    791 class MakeIncomingHTMLParser:   792    793     "A class that makes parsers configured for messages."   794    795     def __init__(self, page, message_number):   796    797         "Initialise with state that is used to configure instantiated parsers."   798    799         self.message_number = message_number   800         self.page = page   801    802     def __call__(self, *args, **kw):   803         parser = IncomingHTMLParser(*args, **kw)   804         parser.message_number = self.message_number   805         parser.page = self.page   806         return parser   807    808 def get_make_parser(page, message_number):   809    810     """   811     Return a callable that will return a parser configured for the message from   812     the given 'page' with the given 'message_number'.   813     """   814    815     return MakeIncomingHTMLParser(page, message_number)   816    817 # vim: tabstop=4 expandtab shiftwidth=4