MoinShare (annotate MoinShare.py in 145f3765afdc)

MoinShare

Annotated MoinShare.py

34:145f3765afdc

2013-05-17

Paul Boddie

Added support for presenting multipart/related messages obtained from message stores. To support incoming HTML messages, attachment/component references are rewritten using an extended version of the usual Moin HTML parsing framework.

paul@0	1	# -- coding: iso-8859-1 --
paul@0	2	"""
paul@0	3	MoinMoin - MoinShare library
paul@0	4
paul@17	5	@copyright: 2011, 2012, 2013 by Paul Boddie <paul@boddie.org.uk>
paul@34	6	@copyright: 2003-2006 Edgewall Software
paul@34	7	@copyright: 2006 MoinMoin:AlexanderSchremmer
paul@0	8	@license: GNU GPL (v2 or later), see COPYING.txt for details.
paul@0	9	"""
paul@0	10
paul@17	11	from ContentTypeSupport import getContentPreferences
paul@33	12	from DateSupport import getCurrentTime, getDateTimeFromRFC2822
paul@0	13	from MoinSupport import *
paul@34	14	from MoinMoin.support.htmlmarkup import HTMLSanitizer, Markup
paul@15	15	from MoinMoin import wikiutil
paul@33	16	from email.parser import Parser
paul@0	17
paul@25	18	try:
paul@25	19	from cStringIO import StringIO
paul@25	20	except ImportError:
paul@25	21	from StringIO import StringIO
paul@25	22
paul@16	23	_getFragments = getFragments
paul@0	24
paul@0	25	__version__ = "0.1"
paul@0	26
paul@0	27	# More Moin 1.9 compatibility functions.
paul@0	28
paul@0	29	def has_member(request, groupname, username):
paul@0	30	if hasattr(request.dicts, "has_member"):
paul@0	31	return request.dicts.has_member(groupname, username)
paul@0	32	else:
paul@0	33	return username in request.dicts.get(groupname, [])
paul@0	34
paul@3	35	# Fragments employ a "moinshare" attribute.
paul@3	36
paul@3	37	fragment_attribute = "moinshare"
paul@2	38
paul@16	39	def getFragments(s):
paul@25	40
paul@25	41	"Return all fragments in 's' having the MoinShare fragment attribute."
paul@25	42
paul@2	43	fragments = []
paul@16	44	for format, attributes, body in _getFragments(s):
paul@16	45	if attributes.has_key(fragment_attribute):
paul@16	46	fragments.append((format, attributes, body))
paul@2	47	return fragments
paul@2	48
paul@9	49	def getOutputTypes(request, format):
paul@9	50
paul@9	51	"""
paul@9	52	Using the 'request' and the 'format' of a fragment, return the media types
paul@9	53	available for the fragment.
paul@9	54	"""
paul@9	55
paul@9	56	# This uses an extended parser API method if available.
paul@9	57
paul@9	58	parser = getParserClass(request, format)
paul@9	59	if hasattr(parser, "getOutputTypes"):
paul@9	60	return parser.getOutputTypes()
paul@9	61	else:
paul@9	62	return ["text/html"]
paul@9	63
paul@9	64	def getPreferredOutputTypes(request, mimetypes):
paul@9	65
paul@9	66	"""
paul@9	67	Using the 'request', perform content negotiation, obtaining mimetypes common
paul@9	68	to the fragment (given by 'mimetypes') and the client (found in the Accept
paul@9	69	header).
paul@9	70	"""
paul@9	71
paul@9	72	accept = getHeader(request, "Accept", "HTTP")
paul@12	73	if accept:
paul@12	74	prefs = getContentPreferences(accept)
paul@12	75	return prefs.get_preferred_types(mimetypes)
paul@12	76	else:
paul@12	77	return mimetypes
paul@9	78
paul@9	79	def getUpdatedTime(metadata):
paul@9	80
paul@9	81	"""
paul@9	82	Return the last updated time based on the given 'metadata', using the
paul@9	83	current time if no explicit last modified time is specified.
paul@9	84	"""
paul@9	85
paul@9	86	# NOTE: We could attempt to get the last edit time of a fragment.
paul@9	87
paul@9	88	latest_timestamp = metadata.get("last-modified")
paul@9	89	if latest_timestamp:
paul@33	90	return latest_timestamp
paul@9	91	else:
paul@33	92	return getCurrentTime()
paul@9	93
paul@25	94	def getUpdateSources(request, sources_page):
paul@25	95
paul@25	96	"""
paul@25	97	Using the 'request', return the update sources defined on the given
paul@25	98	'sources_page'.
paul@25	99	"""
paul@25	100
paul@25	101	# Remote sources are accessed via dictionary page definitions.
paul@25	102
paul@25	103	return getWikiDict(sources_page, request)
paul@25	104
paul@30	105	# Entry/update classes.
paul@30	106
paul@30	107	class Update:
paul@30	108
paul@30	109	"A feed update entry."
paul@30	110
paul@30	111	def __init__(self):
paul@30	112	self.title = None
paul@30	113	self.link = None
paul@30	114	self.content = None
paul@30	115	self.content_type = None
paul@30	116	self.updated = None
paul@30	117
paul@30	118	# Page-related attributes.
paul@30	119
paul@30	120	self.fragment = None
paul@30	121	self.preferred = None
paul@30	122
paul@33	123	# Message-related attributes.
paul@33	124
paul@34	125	self.message_number = None
paul@33	126	self.parts = None
paul@33	127
paul@34	128	# Message- and page-related attributes.
paul@34	129
paul@34	130	self.page = None
paul@34	131
paul@30	132	def __cmp__(self, other):
paul@30	133	if self.updated is None and other.updated is not None:
paul@30	134	return 1
paul@30	135	elif self.updated is not None and other.updated is None:
paul@30	136	return -1
paul@30	137	else:
paul@30	138	return cmp(self.updated, other.updated)
paul@30	139
paul@30	140	# Update retrieval from pages.
paul@30	141
paul@30	142	def getUpdatesFromPage(page, request):
paul@25	143
paul@25	144	"""
paul@30	145	Get updates from the given 'page' using the 'request'. A list of update
paul@30	146	objects is returned.
paul@25	147	"""
paul@25	148
paul@25	149	updates = []
paul@25	150
paul@25	151	# NOTE: Use the updated datetime from the page for updates.
paul@25	152	# NOTE: The published and updated details would need to be deduced from
paul@25	153	# NOTE: the page history instead of being taken from the page as a whole.
paul@25	154
paul@25	155	metadata = getMetadata(page)
paul@25	156	updated = getUpdatedTime(metadata)
paul@25	157
paul@25	158	# Get the fragment regions for the page.
paul@25	159
paul@25	160	for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())):
paul@25	161
paul@33	162	update = Update()
paul@33	163
paul@25	164	# Produce a fragment identifier.
paul@25	165	# NOTE: Choose a more robust identifier where none is explicitly given.
paul@25	166
paul@30	167	update.fragment = attributes.get("fragment", str(n))
paul@30	168	update.title = attributes.get("summary", "Update #%d" % n)
paul@25	169
paul@25	170	# Get the preferred content types available for the fragment.
paul@25	171
paul@30	172	update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format))
paul@25	173
paul@25	174	# Try and obtain some suitable content for the entry.
paul@25	175	# NOTE: Could potentially get a summary for the fragment.
paul@25	176
paul@30	177	update.content = None
paul@25	178
paul@30	179	if "text/html" in update.preferred:
paul@25	180	parser_cls = getParserClass(request, format)
paul@25	181	parser = parser_cls(body, request)
paul@25	182
paul@25	183	if format == "html":
paul@30	184	update.content = body
paul@25	185	elif hasattr(parser, "formatForOutputType"):
paul@25	186	s = StringIO()
paul@25	187	parser.formatForOutputType("text/html", write=s.write)
paul@30	188	update.content = s.getvalue()
paul@25	189	else:
paul@25	190	fmt = request.html_formatter
paul@25	191	fmt.setPage(page)
paul@30	192	update.content = formatText(body, request, fmt, parser_cls)
paul@30	193
paul@32	194	update.content_type = "text/html"
paul@25	195
paul@34	196	update.page = page
paul@30	197	update.link = page.url(request)
paul@30	198	update.updated = updated
paul@30	199
paul@30	200	updates.append(update)
paul@25	201
paul@25	202	return updates
paul@25	203
paul@33	204	# Update retrieval from message stores.
paul@33	205
paul@33	206	def getUpdatesFromStore(page, request):
paul@33	207
paul@33	208	"""
paul@33	209	Get updates from the message store associated with the given 'page' using
paul@33	210	the 'request'. A list of update objects is returned.
paul@33	211	"""
paul@33	212
paul@33	213	updates = []
paul@33	214
paul@33	215	metadata = getMetadata(page)
paul@33	216	updated = getUpdatedTime(metadata)
paul@33	217
paul@33	218	store = ItemStore(page, "messages", "message-locks")
paul@33	219
paul@33	220	for n, message_text in enumerate(iter(store)):
paul@33	221
paul@33	222	update = Update()
paul@33	223	message = Parser().parse(StringIO(message_text))
paul@33	224
paul@33	225	# Produce a fragment identifier.
paul@33	226
paul@33	227	update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date"))
paul@33	228	update.title = message.get("subject", "Update #%d" % n)
paul@33	229
paul@34	230	update.page = page
paul@34	231	update.message_number = n
paul@34	232
paul@33	233	# Determine whether the message has several representations.
paul@33	234
paul@34	235	# For a single part, use it as the update content.
paul@34	236
paul@33	237	if not message.is_multipart():
paul@33	238	update.content = message.get_payload()
paul@33	239	update.content_type = message.get_content_type()
paul@34	240
paul@34	241	# For a collection of related parts, use the first as the update content
paul@34	242	# and assume that the formatter will reference the other parts.
paul@34	243
paul@34	244	elif message.get_content_subtype() == "related":
paul@34	245	main_part = message.get_payload()[0]
paul@34	246	update.content = main_part.get_payload()
paul@34	247	update.content_type = main_part.get_content_type()
paul@34	248
paul@34	249	# Otherwise, just obtain the parts for separate display.
paul@34	250
paul@33	251	else:
paul@33	252	update.parts = message.get_payload()
paul@34	253	update.content_type = message.get_content_type()
paul@33	254
paul@33	255	updates.append(update)
paul@33	256
paul@33	257	return updates
paul@33	258
paul@31	259	# Source management.
paul@31	260
paul@31	261	def getUpdateSources(pagename, request):
paul@31	262
paul@31	263	"Return the update sources from the given 'pagename' using the 'request'."
paul@31	264
paul@31	265	sources = {}
paul@31	266
paul@31	267	source_definitions = getWikiDict(pagename, request)
paul@31	268
paul@31	269	if source_definitions:
paul@31	270	for name, value in source_definitions.items():
paul@31	271	sources[name] = getSourceParameters(value)
paul@31	272
paul@31	273	return sources
paul@31	274
paul@31	275	def getSourceParameters(source_definition):
paul@31	276
paul@31	277	"Return the parameters from the given 'source_definition' string."
paul@31	278
paul@31	279	parameters = {}
paul@31	280	unqualified = ("type", "location")
paul@31	281
paul@31	282	for arg in source_definition.split():
paul@31	283	try:
paul@31	284	argname, argvalue = arg.split("=", 1)
paul@31	285
paul@31	286	# Detect unlikely parameter names.
paul@31	287
paul@31	288	if not argname.isalpha():
paul@31	289	raise ValueError
paul@31	290
paul@31	291	parameters[argname] = argvalue
paul@31	292
paul@31	293	# Unqualified parameters are assumed to be one of a recognised set.
paul@31	294
paul@31	295	except ValueError:
paul@31	296	for argname in unqualified:
paul@31	297	if not parameters.has_key(argname):
paul@31	298	parameters[argname] = arg
paul@31	299	break
paul@31	300
paul@31	301	return parameters
paul@31	302
paul@34	303	# HTML parsing support.
paul@34	304
paul@34	305	class IncomingHTMLSanitizer(HTMLSanitizer):
paul@34	306
paul@34	307	"An HTML parser that rewrites references to attachments."
paul@34	308
paul@34	309	def __init__(self, out, request, page, message_number):
paul@34	310	HTMLSanitizer.__init__(self, out)
paul@34	311	self.request = request
paul@34	312	self.message_number = message_number
paul@34	313	self.page = page
paul@34	314
paul@34	315	def rewrite_reference(self, ref):
paul@34	316	if ref.startswith("cid:"):
paul@34	317	part = ref[len("cid:"):]
paul@34	318	action_link = self.page.url(self.request, {
paul@34	319	"action" : "ReadMessage", "doit" : "1",
paul@34	320	"message" : self.message_number, "part" : part
paul@34	321	})
paul@34	322	return action_link
paul@34	323	else:
paul@34	324	return ref
paul@34	325
paul@34	326	def handle_starttag(self, tag, attrs):
paul@34	327	new_attrs = []
paul@34	328	for attrname, attrvalue in attrs:
paul@34	329	if attrname in self.uri_attrs:
paul@34	330	new_attrs.append((attrname, self.rewrite_reference(attrvalue)))
paul@34	331	else:
paul@34	332	new_attrs.append((attrname, attrvalue))
paul@34	333	HTMLSanitizer.handle_starttag(self, tag, new_attrs)
paul@34	334
paul@34	335	class IncomingMarkup(Markup):
paul@34	336
paul@34	337	"A special markup processor for incoming HTML."
paul@34	338
paul@34	339	def sanitize(self, request, page, message_number):
paul@34	340	out = StringIO()
paul@34	341	sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)
paul@34	342	sanitizer.feed(self.stripentities(keepxmlentities=True))
paul@34	343	return IncomingMarkup(out.getvalue())
paul@34	344
paul@34	345	class IncomingHTMLParser:
paul@34	346
paul@34	347	"Filters and rewrites incoming HTML content."
paul@34	348
paul@34	349	def __init__(self, raw, request, **kw):
paul@34	350	self.raw = raw
paul@34	351	self.request = request
paul@34	352	self.message_number = None
paul@34	353	self.page = None
paul@34	354
paul@34	355	def format(self, formatter, **kw):
paul@34	356
paul@34	357	"Send the text."
paul@34	358
paul@34	359	try:
paul@34	360	self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))
paul@34	361	except HTMLParseError, e:
paul@34	362	self.request.write(formatter.sysmsg(1) +
paul@34	363	formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,
paul@34	364	self.raw.splitlines()[e.lineno - 1].strip())) +
paul@34	365	formatter.sysmsg(0))
paul@34	366
paul@34	367	class MakeIncomingHTMLParser:
paul@34	368
paul@34	369	"A class that makes parsers configured for messages."
paul@34	370
paul@34	371	def __init__(self, page, message_number):
paul@34	372
paul@34	373	"Initialise with state that is used to configure instantiated parsers."
paul@34	374
paul@34	375	self.message_number = message_number
paul@34	376	self.page = page
paul@34	377
paul@34	378	def __call__(self, args, *kw):
paul@34	379	parser = IncomingHTMLParser(args, *kw)
paul@34	380	parser.message_number = self.message_number
paul@34	381	parser.page = self.page
paul@34	382	return parser
paul@34	383
paul@34	384	def get_make_parser(page, message_number):
paul@34	385
paul@34	386	"""
paul@34	387	Return a callable that will return a parser configured for the message from
paul@34	388	the given 'page' with the given 'message_number'.
paul@34	389	"""
paul@34	390
paul@34	391	return MakeIncomingHTMLParser(page, message_number)
paul@34	392
paul@0	393	# vim: tabstop=4 expandtab shiftwidth=4