1 # -*- coding: iso-8859-1 -*- 2 """ 3 MoinMoin - MoinShare library 4 5 @copyright: 2011, 2012, 2013 by Paul Boddie <paul@boddie.org.uk> 6 @copyright: 2003-2006 Edgewall Software 7 @copyright: 2006 MoinMoin:AlexanderSchremmer 8 @license: GNU GPL (v2 or later), see COPYING.txt for details. 9 """ 10 11 from ContentTypeSupport import getContentPreferences 12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822 13 from MoinSupport import * 14 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup 15 from MoinMoin import wikiutil 16 from email.parser import Parser 17 from codecs import getwriter 18 19 try: 20 from cStringIO import StringIO 21 except ImportError: 22 from StringIO import StringIO 23 24 _getFragments = getFragments 25 26 __version__ = "0.1" 27 28 # More Moin 1.9 compatibility functions. 29 30 def has_member(request, groupname, username): 31 if hasattr(request.dicts, "has_member"): 32 return request.dicts.has_member(groupname, username) 33 else: 34 return username in request.dicts.get(groupname, []) 35 36 # Fragments employ a "moinshare" attribute. 37 38 fragment_attribute = "moinshare" 39 40 def getFragments(s): 41 42 "Return all fragments in 's' having the MoinShare fragment attribute." 43 44 fragments = [] 45 for format, attributes, body in _getFragments(s): 46 if attributes.has_key(fragment_attribute): 47 fragments.append((format, attributes, body)) 48 return fragments 49 50 def getPreferredOutputTypes(request, mimetypes): 51 52 """ 53 Using the 'request', perform content negotiation, obtaining mimetypes common 54 to the fragment (given by 'mimetypes') and the client (found in the Accept 55 header). 56 """ 57 58 accept = getHeader(request, "Accept", "HTTP") 59 if accept: 60 prefs = getContentPreferences(accept) 61 return prefs.get_preferred_types(mimetypes) 62 else: 63 return mimetypes 64 65 def getUpdatedTime(metadata): 66 67 """ 68 Return the last updated time based on the given 'metadata', using the 69 current time if no explicit last modified time is specified. 70 """ 71 72 # NOTE: We could attempt to get the last edit time of a fragment. 73 74 latest_timestamp = metadata.get("last-modified") 75 if latest_timestamp: 76 return latest_timestamp 77 else: 78 return getCurrentTime() 79 80 # Entry/update classes. 81 82 class Update: 83 84 "A feed update entry." 85 86 def __init__(self): 87 self.title = None 88 self.link = None 89 self.content = None 90 self.content_type = None 91 self.updated = None 92 93 # Page-related attributes. 94 95 self.fragment = None 96 self.preferred = None 97 98 # Message-related attributes. 99 100 self.message_number = None 101 self.parts = None 102 103 # Message- and page-related attributes. 104 105 self.page = None 106 107 # Identification. 108 109 self.path = [] 110 111 def unique_id(self): 112 return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) 113 114 def __cmp__(self, other): 115 if self.updated is None and other.updated is not None: 116 return 1 117 elif self.updated is not None and other.updated is None: 118 return -1 119 else: 120 return cmp(self.updated, other.updated) 121 122 def copy(self, part_number=None): 123 update = Update() 124 update.title = self.title 125 update.link = self.link 126 update.updated = self.updated 127 update.fragment = self.fragment 128 update.preferred = self.preferred 129 update.message_number = self.message_number 130 update.page = self.page 131 update.path = self.path[:] 132 if part_number is not None: 133 update.path.append(part_number) 134 return update 135 136 # Update retrieval from pages. 137 138 def getUpdatesFromPage(page, request): 139 140 """ 141 Get updates from the given 'page' using the 'request'. A list of update 142 objects is returned. 143 """ 144 145 updates = [] 146 147 # NOTE: Use the updated datetime from the page for updates. 148 # NOTE: The published and updated details would need to be deduced from 149 # NOTE: the page history instead of being taken from the page as a whole. 150 151 metadata = getMetadata(page) 152 updated = getUpdatedTime(metadata) 153 154 # Get the fragment regions for the page. 155 156 for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): 157 158 update = Update() 159 160 # Produce a fragment identifier. 161 # NOTE: Choose a more robust identifier where none is explicitly given. 162 163 update.fragment = attributes.get("fragment", str(n)) 164 update.title = attributes.get("summary", "Update #%d" % n) 165 166 # Get the preferred content types available for the fragment. 167 168 update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) 169 170 # Try and obtain some suitable content for the entry. 171 # NOTE: Could potentially get a summary for the fragment. 172 173 update.content = None 174 175 if "text/html" in update.preferred: 176 parser_cls = getParserClass(request, format) 177 178 if format == "html": 179 update.content = body 180 elif hasattr(parser_cls, "formatForOutputType"): 181 update.content = formatTextForOutputType(body, request, parser_cls, "text/html") 182 else: 183 fmt = request.html_formatter 184 fmt.setPage(page) 185 update.content = formatText(body, request, fmt, parser_cls) 186 187 update.content_type = "text/html" 188 189 update.page = page 190 191 # NOTE: The anchor would be supported in the page, but this requires 192 # NOTE: formatter modifications for the regions providing updates. 193 194 update.link = page.url(request, anchor=update.fragment) 195 update.updated = updated 196 197 updates.append(update) 198 199 return updates 200 201 # Update retrieval from message stores. 202 203 def getUpdatesFromStore(page, request): 204 205 """ 206 Get updates from the message store associated with the given 'page' using 207 the 'request'. A list of update objects is returned. 208 """ 209 210 updates = [] 211 212 metadata = getMetadata(page) 213 updated = getUpdatedTime(metadata) 214 215 store = ItemStore(page, "messages", "message-locks") 216 217 for n, message_text in enumerate(iter(store)): 218 update = getUpdateFromMessageText(message_text, n) 219 update.page = page 220 updates.append(update) 221 222 return updates 223 224 def getUpdateFromMessageText(message_text, message_number): 225 226 "Return an update for the given 'message_text' and 'message_number'." 227 228 update = Update() 229 message = Parser().parse(StringIO(message_text)) 230 231 # Produce a fragment identifier. 232 233 update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) 234 update.title = message.get("subject", "Update #%d" % message_number) 235 236 update.message_number = message_number 237 238 update.content, update.content_type, update.parts = getUpdateContentFromPart(message) 239 return update 240 241 def getUpdateContentFromPart(part): 242 243 """ 244 Return decoded content, the content type and any subparts in a tuple for a 245 given 'part'. 246 """ 247 248 # Determine whether the part has several representations. 249 250 # For a single part, use it as the update content. 251 252 if not part.is_multipart(): 253 content, content_type = getPartContent(part) 254 return content, content_type, None 255 256 # For a collection of related parts, use the first as the update content 257 # and assume that the formatter will reference the other parts. 258 259 elif part.get_content_subtype() == "related": 260 main_part = part.get_payload()[0] 261 content, content_type = getPartContent(main_part) 262 return content, content_type, [main_part] 263 264 # Encrypted content cannot be meaningfully separated. 265 266 elif part.get_content_subtype() == "encrypted": 267 return part.as_string(), part.get_content_type(), None 268 269 # Otherwise, just obtain the parts for separate display. 270 271 else: 272 return None, part.get_content_type(), part.get_payload() 273 274 def getPartContent(part): 275 276 "Decode the 'part', returning the decoded payload and the content type." 277 278 charset = part.get_content_charset() 279 payload = part.get_payload(decode=True) 280 return (charset and unicode(payload, charset) or payload), part.get_content_type() 281 282 def getUpdateFromPart(parent, part, part_number): 283 284 "Using the 'parent' update, return an update object for the given 'part'." 285 286 update = parent.copy(part_number) 287 update.content, update.content_type, update.parts = getUpdateContentFromPart(part) 288 return update 289 290 def getUpdatesForFormatting(update): 291 292 "Get a list of updates for formatting given 'update'." 293 294 updates = [] 295 296 # Handle multipart/alternative and other non-related multiparts. 297 298 if update.parts: 299 for n, part in enumerate(update.parts): 300 update_part = getUpdateFromPart(update, part, n) 301 updates += getUpdatesForFormatting(update_part) 302 else: 303 updates.append(update) 304 305 return updates 306 307 # Update formatting. 308 309 def getFormattedUpdate(update, request, fmt): 310 311 """ 312 Return the formatted form of the given 'update' using the given 'request' 313 and 'fmt'. 314 """ 315 316 # NOTE: Some control over the HTML and XHTML should be exercised. 317 318 if update.content: 319 if update.content_type == "text/html" and update.message_number is not None: 320 parsers = [get_make_parser(update.page, update.message_number)] 321 else: 322 parsers = getParsersForContentType(request.cfg, update.content_type) 323 324 if parsers: 325 for parser_cls in parsers: 326 if hasattr(parser_cls, "formatForOutputType"): 327 return formatTextForOutputType(update.content, request, parser_cls, "text/html") 328 else: 329 return formatText(update.content, request, fmt, parser_cls=parser_cls) 330 break 331 else: 332 return None 333 else: 334 return None 335 336 def formatUpdate(update, request, fmt): 337 338 "Format the given 'update' using the given 'request' and 'fmt'." 339 340 result = [] 341 append = result.append 342 343 updates = getUpdatesForFormatting(update) 344 single = len(updates) == 1 345 346 # Format some navigation tabs. 347 348 if not single: 349 append(fmt.div(on=1, css_class="moinshare-alternatives")) 350 351 first = True 352 353 for update_part in updates: 354 append(fmt.url(1, "#%s" % update_part.unique_id())) 355 append(fmt.text(update_part.content_type)) 356 append(fmt.url(0)) 357 358 first = False 359 360 append(fmt.div(on=0)) 361 362 # Format the content. 363 364 first = True 365 366 for update_part in updates: 367 368 # Encapsulate each alternative if many exist. 369 370 if not single: 371 css_class = first and "moinshare-default" or "moinshare-other" 372 append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id())) 373 374 # Include the content. 375 376 append(formatUpdatePart(update_part, request, fmt)) 377 378 if not single: 379 append(fmt.div(on=0)) 380 381 first = False 382 383 return "".join(result) 384 385 def formatUpdatePart(update, request, fmt): 386 387 "Format the given 'update' using the given 'request' and 'fmt'." 388 389 _ = request.getText 390 391 result = [] 392 append = result.append 393 394 # Encapsulate the content. 395 396 append(fmt.div(on=1, css_class="moinshare-content")) 397 text = getFormattedUpdate(update, request, fmt) 398 if text: 399 append(text) 400 else: 401 append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type)) 402 append(fmt.div(on=0)) 403 404 return "".join(result) 405 406 # Source management. 407 408 def getUpdateSources(pagename, request): 409 410 "Return the update sources from the given 'pagename' using the 'request'." 411 412 sources = {} 413 414 source_definitions = getWikiDict(pagename, request) 415 416 if source_definitions: 417 for name, value in source_definitions.items(): 418 sources[name] = getSourceParameters(value) 419 420 return sources 421 422 def getSourceParameters(source_definition): 423 424 "Return the parameters from the given 'source_definition' string." 425 426 return parseDictEntry(source_definition, ("type", "location")) 427 428 # HTML parsing support. 429 430 class IncomingHTMLSanitizer(HTMLSanitizer): 431 432 "An HTML parser that rewrites references to attachments." 433 434 def __init__(self, out, request, page, message_number): 435 HTMLSanitizer.__init__(self, out) 436 self.request = request 437 self.message_number = message_number 438 self.page = page 439 440 def rewrite_reference(self, ref): 441 if ref.startswith("cid:"): 442 part = ref[len("cid:"):] 443 action_link = self.page.url(self.request, { 444 "action" : "ReadMessage", "doit" : "1", 445 "message" : self.message_number, "part" : part 446 }) 447 return action_link 448 else: 449 return ref 450 451 def handle_starttag(self, tag, attrs): 452 new_attrs = [] 453 for attrname, attrvalue in attrs: 454 if attrname in self.uri_attrs: 455 new_attrs.append((attrname, self.rewrite_reference(attrvalue))) 456 else: 457 new_attrs.append((attrname, attrvalue)) 458 HTMLSanitizer.handle_starttag(self, tag, new_attrs) 459 460 class IncomingMarkup(Markup): 461 462 "A special markup processor for incoming HTML." 463 464 def sanitize(self, request, page, message_number): 465 out = getwriter("utf-8")(StringIO()) 466 sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) 467 sanitizer.feed(self.stripentities(keepxmlentities=True)) 468 return IncomingMarkup(unicode(out.getvalue(), "utf-8")) 469 470 class IncomingHTMLParser: 471 472 "Filters and rewrites incoming HTML content." 473 474 def __init__(self, raw, request, **kw): 475 self.raw = raw 476 self.request = request 477 self.message_number = None 478 self.page = None 479 480 def format(self, formatter, **kw): 481 482 "Send the text." 483 484 try: 485 self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) 486 except HTMLParseError, e: 487 self.request.write(formatter.sysmsg(1) + 488 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, 489 self.raw.splitlines()[e.lineno - 1].strip())) + 490 formatter.sysmsg(0)) 491 492 class MakeIncomingHTMLParser: 493 494 "A class that makes parsers configured for messages." 495 496 def __init__(self, page, message_number): 497 498 "Initialise with state that is used to configure instantiated parsers." 499 500 self.message_number = message_number 501 self.page = page 502 503 def __call__(self, *args, **kw): 504 parser = IncomingHTMLParser(*args, **kw) 505 parser.message_number = self.message_number 506 parser.page = self.page 507 return parser 508 509 def get_make_parser(page, message_number): 510 511 """ 512 Return a callable that will return a parser configured for the message from 513 the given 'page' with the given 'message_number'. 514 """ 515 516 return MakeIncomingHTMLParser(page, message_number) 517 518 # vim: tabstop=4 expandtab shiftwidth=4