1 # -*- coding: iso-8859-1 -*- 2 """ 3 MoinMoin - MoinShare library 4 5 @copyright: 2011, 2012, 2013 by Paul Boddie <paul@boddie.org.uk> 6 @copyright: 2003-2006 Edgewall Software 7 @copyright: 2006 MoinMoin:AlexanderSchremmer 8 @license: GNU GPL (v2 or later), see COPYING.txt for details. 9 """ 10 11 from ContentTypeSupport import getContentPreferences 12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822 13 from MoinSupport import * 14 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup 15 from MoinMoin import wikiutil 16 from email.parser import Parser 17 from codecs import getwriter 18 19 try: 20 from cStringIO import StringIO 21 except ImportError: 22 from StringIO import StringIO 23 24 _getFragments = getFragments 25 26 __version__ = "0.1" 27 28 # More Moin 1.9 compatibility functions. 29 30 def has_member(request, groupname, username): 31 if hasattr(request.dicts, "has_member"): 32 return request.dicts.has_member(groupname, username) 33 else: 34 return username in request.dicts.get(groupname, []) 35 36 # Fragments employ a "moinshare" attribute. 37 38 fragment_attribute = "moinshare" 39 40 def getFragments(s): 41 42 "Return all fragments in 's' having the MoinShare fragment attribute." 43 44 fragments = [] 45 for format, attributes, body in _getFragments(s): 46 if attributes.has_key(fragment_attribute): 47 fragments.append((format, attributes, body)) 48 return fragments 49 50 def getPreferredOutputTypes(request, mimetypes): 51 52 """ 53 Using the 'request', perform content negotiation, obtaining mimetypes common 54 to the fragment (given by 'mimetypes') and the client (found in the Accept 55 header). 56 """ 57 58 accept = getHeader(request, "Accept", "HTTP") 59 if accept: 60 prefs = getContentPreferences(accept) 61 return prefs.get_preferred_types(mimetypes) 62 else: 63 return mimetypes 64 65 def getUpdatedTime(metadata): 66 67 """ 68 Return the last updated time based on the given 'metadata', using the 69 current time if no explicit last modified time is specified. 70 """ 71 72 # NOTE: We could attempt to get the last edit time of a fragment. 73 74 latest_timestamp = metadata.get("last-modified") 75 if latest_timestamp: 76 return latest_timestamp 77 else: 78 return getCurrentTime() 79 80 def getUpdateSources(request, sources_page): 81 82 """ 83 Using the 'request', return the update sources defined on the given 84 'sources_page'. 85 """ 86 87 # Remote sources are accessed via dictionary page definitions. 88 89 return getWikiDict(sources_page, request) 90 91 # Entry/update classes. 92 93 class Update: 94 95 "A feed update entry." 96 97 def __init__(self): 98 self.title = None 99 self.link = None 100 self.content = None 101 self.content_type = None 102 self.updated = None 103 104 # Page-related attributes. 105 106 self.fragment = None 107 self.preferred = None 108 109 # Message-related attributes. 110 111 self.message_number = None 112 self.parts = None 113 114 # Message- and page-related attributes. 115 116 self.page = None 117 118 def __cmp__(self, other): 119 if self.updated is None and other.updated is not None: 120 return 1 121 elif self.updated is not None and other.updated is None: 122 return -1 123 else: 124 return cmp(self.updated, other.updated) 125 126 # Update retrieval from pages. 127 128 def getUpdatesFromPage(page, request): 129 130 """ 131 Get updates from the given 'page' using the 'request'. A list of update 132 objects is returned. 133 """ 134 135 updates = [] 136 137 # NOTE: Use the updated datetime from the page for updates. 138 # NOTE: The published and updated details would need to be deduced from 139 # NOTE: the page history instead of being taken from the page as a whole. 140 141 metadata = getMetadata(page) 142 updated = getUpdatedTime(metadata) 143 144 # Get the fragment regions for the page. 145 146 for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): 147 148 update = Update() 149 150 # Produce a fragment identifier. 151 # NOTE: Choose a more robust identifier where none is explicitly given. 152 153 update.fragment = attributes.get("fragment", str(n)) 154 update.title = attributes.get("summary", "Update #%d" % n) 155 156 # Get the preferred content types available for the fragment. 157 158 update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) 159 160 # Try and obtain some suitable content for the entry. 161 # NOTE: Could potentially get a summary for the fragment. 162 163 update.content = None 164 165 if "text/html" in update.preferred: 166 parser_cls = getParserClass(request, format) 167 parser = parser_cls(body, request) 168 169 if format == "html": 170 update.content = body 171 elif hasattr(parser, "formatForOutputType"): 172 s = codecs.getwriter("utf-8")(StringIO()) 173 parser.formatForOutputType("text/html", write=s.write) 174 update.content = unicode(s.getvalue(), "utf-8") 175 else: 176 fmt = request.html_formatter 177 fmt.setPage(page) 178 update.content = formatText(body, request, fmt, parser_cls) 179 180 update.content_type = "text/html" 181 182 update.page = page 183 184 # NOTE: The anchor would be supported in the page, but this requires 185 # NOTE: formatter modifications for the regions providing updates. 186 187 update.link = page.url(request, anchor=update.fragment) 188 update.updated = updated 189 190 updates.append(update) 191 192 return updates 193 194 # Update retrieval from message stores. 195 196 def getUpdatesFromStore(page, request): 197 198 """ 199 Get updates from the message store associated with the given 'page' using 200 the 'request'. A list of update objects is returned. 201 """ 202 203 updates = [] 204 205 metadata = getMetadata(page) 206 updated = getUpdatedTime(metadata) 207 208 store = ItemStore(page, "messages", "message-locks") 209 210 for n, message_text in enumerate(iter(store)): 211 212 update = Update() 213 message = Parser().parse(StringIO(message_text)) 214 215 # Produce a fragment identifier. 216 217 update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) 218 update.title = message.get("subject", "Update #%d" % n) 219 220 update.page = page 221 update.message_number = n 222 223 # Determine whether the message has several representations. 224 225 # For a single part, use it as the update content. 226 227 if not message.is_multipart(): 228 charset = message.get_content_charset() 229 payload = message.get_payload(decode=True) 230 update.content = charset and unicode(payload, charset) or payload 231 update.content_type = message.get_content_type() 232 233 # For a collection of related parts, use the first as the update content 234 # and assume that the formatter will reference the other parts. 235 236 elif message.get_content_subtype() == "related": 237 main_part = message.get_payload()[0] 238 charset = main_part.get_content_charset() 239 payload = main_part.get_payload(decode=True) 240 update.content = charset and unicode(payload, charset) or payload 241 update.content_type = main_part.get_content_type() 242 243 # Otherwise, just obtain the parts for separate display. 244 245 else: 246 update.parts = message.get_payload() 247 update.content_type = message.get_content_type() 248 249 updates.append(update) 250 251 return updates 252 253 # Source management. 254 255 def getUpdateSources(pagename, request): 256 257 "Return the update sources from the given 'pagename' using the 'request'." 258 259 sources = {} 260 261 source_definitions = getWikiDict(pagename, request) 262 263 if source_definitions: 264 for name, value in source_definitions.items(): 265 sources[name] = getSourceParameters(value) 266 267 return sources 268 269 def getSourceParameters(source_definition): 270 271 "Return the parameters from the given 'source_definition' string." 272 273 parameters = {} 274 unqualified = ("type", "location") 275 276 for arg in source_definition.split(): 277 try: 278 argname, argvalue = arg.split("=", 1) 279 280 # Detect unlikely parameter names. 281 282 if not argname.isalpha(): 283 raise ValueError 284 285 parameters[argname] = argvalue 286 287 # Unqualified parameters are assumed to be one of a recognised set. 288 289 except ValueError: 290 for argname in unqualified: 291 if not parameters.has_key(argname): 292 parameters[argname] = arg 293 break 294 295 return parameters 296 297 # HTML parsing support. 298 299 class IncomingHTMLSanitizer(HTMLSanitizer): 300 301 "An HTML parser that rewrites references to attachments." 302 303 def __init__(self, out, request, page, message_number): 304 HTMLSanitizer.__init__(self, out) 305 self.request = request 306 self.message_number = message_number 307 self.page = page 308 309 def rewrite_reference(self, ref): 310 if ref.startswith("cid:"): 311 part = ref[len("cid:"):] 312 action_link = self.page.url(self.request, { 313 "action" : "ReadMessage", "doit" : "1", 314 "message" : self.message_number, "part" : part 315 }) 316 return action_link 317 else: 318 return ref 319 320 def handle_starttag(self, tag, attrs): 321 new_attrs = [] 322 for attrname, attrvalue in attrs: 323 if attrname in self.uri_attrs: 324 new_attrs.append((attrname, self.rewrite_reference(attrvalue))) 325 else: 326 new_attrs.append((attrname, attrvalue)) 327 HTMLSanitizer.handle_starttag(self, tag, new_attrs) 328 329 class IncomingMarkup(Markup): 330 331 "A special markup processor for incoming HTML." 332 333 def sanitize(self, request, page, message_number): 334 out = getwriter("utf-8")(StringIO()) 335 sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) 336 sanitizer.feed(self.stripentities(keepxmlentities=True)) 337 return IncomingMarkup(unicode(out.getvalue(), "utf-8")) 338 339 class IncomingHTMLParser: 340 341 "Filters and rewrites incoming HTML content." 342 343 def __init__(self, raw, request, **kw): 344 self.raw = raw 345 self.request = request 346 self.message_number = None 347 self.page = None 348 349 def format(self, formatter, **kw): 350 351 "Send the text." 352 353 try: 354 self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) 355 except HTMLParseError, e: 356 self.request.write(formatter.sysmsg(1) + 357 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, 358 self.raw.splitlines()[e.lineno - 1].strip())) + 359 formatter.sysmsg(0)) 360 361 class MakeIncomingHTMLParser: 362 363 "A class that makes parsers configured for messages." 364 365 def __init__(self, page, message_number): 366 367 "Initialise with state that is used to configure instantiated parsers." 368 369 self.message_number = message_number 370 self.page = page 371 372 def __call__(self, *args, **kw): 373 parser = IncomingHTMLParser(*args, **kw) 374 parser.message_number = self.message_number 375 parser.page = self.page 376 return parser 377 378 def get_make_parser(page, message_number): 379 380 """ 381 Return a callable that will return a parser configured for the message from 382 the given 'page' with the given 'message_number'. 383 """ 384 385 return MakeIncomingHTMLParser(page, message_number) 386 387 # vim: tabstop=4 expandtab shiftwidth=4