MoinShare (annotate MoinShare.py in c32d2ccbc23f)

MoinShare

Annotated MoinShare.py

43:c32d2ccbc23f

2013-06-22

Paul Boddie

Use parseDictEntry from MoinSupport to parse source details.

paul@0	1	# -- coding: iso-8859-1 --
paul@0	2	"""
paul@0	3	MoinMoin - MoinShare library
paul@0	4
paul@17	5	@copyright: 2011, 2012, 2013 by Paul Boddie <paul@boddie.org.uk>
paul@34	6	@copyright: 2003-2006 Edgewall Software
paul@34	7	@copyright: 2006 MoinMoin:AlexanderSchremmer
paul@0	8	@license: GNU GPL (v2 or later), see COPYING.txt for details.
paul@0	9	"""
paul@0	10
paul@17	11	from ContentTypeSupport import getContentPreferences
paul@33	12	from DateSupport import getCurrentTime, getDateTimeFromRFC2822
paul@0	13	from MoinSupport import *
paul@37	14	from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup
paul@15	15	from MoinMoin import wikiutil
paul@33	16	from email.parser import Parser
paul@37	17	from codecs import getwriter
paul@0	18
paul@25	19	try:
paul@25	20	from cStringIO import StringIO
paul@25	21	except ImportError:
paul@25	22	from StringIO import StringIO
paul@25	23
paul@16	24	_getFragments = getFragments
paul@0	25
paul@0	26	__version__ = "0.1"
paul@0	27
paul@0	28	# More Moin 1.9 compatibility functions.
paul@0	29
paul@0	30	def has_member(request, groupname, username):
paul@0	31	if hasattr(request.dicts, "has_member"):
paul@0	32	return request.dicts.has_member(groupname, username)
paul@0	33	else:
paul@0	34	return username in request.dicts.get(groupname, [])
paul@0	35
paul@3	36	# Fragments employ a "moinshare" attribute.
paul@3	37
paul@3	38	fragment_attribute = "moinshare"
paul@2	39
paul@16	40	def getFragments(s):
paul@25	41
paul@25	42	"Return all fragments in 's' having the MoinShare fragment attribute."
paul@25	43
paul@2	44	fragments = []
paul@16	45	for format, attributes, body in _getFragments(s):
paul@16	46	if attributes.has_key(fragment_attribute):
paul@16	47	fragments.append((format, attributes, body))
paul@2	48	return fragments
paul@2	49
paul@9	50	def getPreferredOutputTypes(request, mimetypes):
paul@9	51
paul@9	52	"""
paul@9	53	Using the 'request', perform content negotiation, obtaining mimetypes common
paul@9	54	to the fragment (given by 'mimetypes') and the client (found in the Accept
paul@9	55	header).
paul@9	56	"""
paul@9	57
paul@9	58	accept = getHeader(request, "Accept", "HTTP")
paul@12	59	if accept:
paul@12	60	prefs = getContentPreferences(accept)
paul@12	61	return prefs.get_preferred_types(mimetypes)
paul@12	62	else:
paul@12	63	return mimetypes
paul@9	64
paul@9	65	def getUpdatedTime(metadata):
paul@9	66
paul@9	67	"""
paul@9	68	Return the last updated time based on the given 'metadata', using the
paul@9	69	current time if no explicit last modified time is specified.
paul@9	70	"""
paul@9	71
paul@9	72	# NOTE: We could attempt to get the last edit time of a fragment.
paul@9	73
paul@9	74	latest_timestamp = metadata.get("last-modified")
paul@9	75	if latest_timestamp:
paul@33	76	return latest_timestamp
paul@9	77	else:
paul@33	78	return getCurrentTime()
paul@9	79
paul@30	80	# Entry/update classes.
paul@30	81
paul@30	82	class Update:
paul@30	83
paul@30	84	"A feed update entry."
paul@30	85
paul@30	86	def __init__(self):
paul@30	87	self.title = None
paul@30	88	self.link = None
paul@30	89	self.content = None
paul@30	90	self.content_type = None
paul@30	91	self.updated = None
paul@30	92
paul@30	93	# Page-related attributes.
paul@30	94
paul@30	95	self.fragment = None
paul@30	96	self.preferred = None
paul@30	97
paul@33	98	# Message-related attributes.
paul@33	99
paul@34	100	self.message_number = None
paul@33	101	self.parts = None
paul@33	102
paul@34	103	# Message- and page-related attributes.
paul@34	104
paul@34	105	self.page = None
paul@34	106
paul@40	107	# Identification.
paul@40	108
paul@40	109	self.path = []
paul@40	110
paul@40	111	def unique_id(self):
paul@40	112	return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path)))
paul@40	113
paul@30	114	def __cmp__(self, other):
paul@30	115	if self.updated is None and other.updated is not None:
paul@30	116	return 1
paul@30	117	elif self.updated is not None and other.updated is None:
paul@30	118	return -1
paul@30	119	else:
paul@30	120	return cmp(self.updated, other.updated)
paul@30	121
paul@40	122	def copy(self, part_number=None):
paul@40	123	update = Update()
paul@40	124	update.title = self.title
paul@40	125	update.link = self.link
paul@40	126	update.updated = self.updated
paul@40	127	update.fragment = self.fragment
paul@40	128	update.preferred = self.preferred
paul@40	129	update.message_number = self.message_number
paul@40	130	update.page = self.page
paul@40	131	update.path = self.path[:]
paul@40	132	if part_number is not None:
paul@40	133	update.path.append(part_number)
paul@40	134	return update
paul@40	135
paul@30	136	# Update retrieval from pages.
paul@30	137
paul@30	138	def getUpdatesFromPage(page, request):
paul@25	139
paul@25	140	"""
paul@30	141	Get updates from the given 'page' using the 'request'. A list of update
paul@30	142	objects is returned.
paul@25	143	"""
paul@25	144
paul@25	145	updates = []
paul@25	146
paul@25	147	# NOTE: Use the updated datetime from the page for updates.
paul@25	148	# NOTE: The published and updated details would need to be deduced from
paul@25	149	# NOTE: the page history instead of being taken from the page as a whole.
paul@25	150
paul@25	151	metadata = getMetadata(page)
paul@25	152	updated = getUpdatedTime(metadata)
paul@25	153
paul@25	154	# Get the fragment regions for the page.
paul@25	155
paul@25	156	for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())):
paul@25	157
paul@33	158	update = Update()
paul@33	159
paul@25	160	# Produce a fragment identifier.
paul@25	161	# NOTE: Choose a more robust identifier where none is explicitly given.
paul@25	162
paul@30	163	update.fragment = attributes.get("fragment", str(n))
paul@30	164	update.title = attributes.get("summary", "Update #%d" % n)
paul@25	165
paul@25	166	# Get the preferred content types available for the fragment.
paul@25	167
paul@30	168	update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format))
paul@25	169
paul@25	170	# Try and obtain some suitable content for the entry.
paul@25	171	# NOTE: Could potentially get a summary for the fragment.
paul@25	172
paul@30	173	update.content = None
paul@25	174
paul@30	175	if "text/html" in update.preferred:
paul@25	176	parser_cls = getParserClass(request, format)
paul@25	177
paul@25	178	if format == "html":
paul@30	179	update.content = body
paul@39	180	elif hasattr(parser_cls, "formatForOutputType"):
paul@39	181	update.content = formatTextForOutputType(body, request, parser_cls, "text/html")
paul@25	182	else:
paul@25	183	fmt = request.html_formatter
paul@25	184	fmt.setPage(page)
paul@30	185	update.content = formatText(body, request, fmt, parser_cls)
paul@30	186
paul@32	187	update.content_type = "text/html"
paul@25	188
paul@34	189	update.page = page
paul@37	190
paul@37	191	# NOTE: The anchor would be supported in the page, but this requires
paul@37	192	# NOTE: formatter modifications for the regions providing updates.
paul@37	193
paul@37	194	update.link = page.url(request, anchor=update.fragment)
paul@30	195	update.updated = updated
paul@30	196
paul@30	197	updates.append(update)
paul@25	198
paul@25	199	return updates
paul@25	200
paul@33	201	# Update retrieval from message stores.
paul@33	202
paul@33	203	def getUpdatesFromStore(page, request):
paul@33	204
paul@33	205	"""
paul@33	206	Get updates from the message store associated with the given 'page' using
paul@33	207	the 'request'. A list of update objects is returned.
paul@33	208	"""
paul@33	209
paul@33	210	updates = []
paul@33	211
paul@33	212	metadata = getMetadata(page)
paul@33	213	updated = getUpdatedTime(metadata)
paul@33	214
paul@33	215	store = ItemStore(page, "messages", "message-locks")
paul@33	216
paul@33	217	for n, message_text in enumerate(iter(store)):
paul@33	218
paul@33	219	update = Update()
paul@33	220	message = Parser().parse(StringIO(message_text))
paul@33	221
paul@33	222	# Produce a fragment identifier.
paul@33	223
paul@33	224	update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date"))
paul@33	225	update.title = message.get("subject", "Update #%d" % n)
paul@33	226
paul@34	227	update.page = page
paul@34	228	update.message_number = n
paul@34	229
paul@40	230	update.content, update.content_type, update.parts = getUpdateContentFromPart(message)
paul@33	231
paul@33	232	updates.append(update)
paul@33	233
paul@33	234	return updates
paul@33	235
paul@40	236	def getUpdateContentFromPart(part):
paul@40	237
paul@40	238	"""
paul@40	239	Return decoded content, the content type and any subparts in a tuple for a
paul@40	240	given 'part'.
paul@40	241	"""
paul@40	242
paul@40	243	# Determine whether the part has several representations.
paul@40	244
paul@40	245	# For a single part, use it as the update content.
paul@40	246
paul@40	247	if not part.is_multipart():
paul@40	248	content, content_type = getPartContent(part)
paul@40	249	return content, content_type, None
paul@40	250
paul@40	251	# For a collection of related parts, use the first as the update content
paul@40	252	# and assume that the formatter will reference the other parts.
paul@40	253
paul@40	254	elif part.get_content_subtype() == "related":
paul@40	255	main_part = part.get_payload()[0]
paul@40	256	content, content_type = getPartContent(main_part)
paul@40	257	return content, content_type, [main_part]
paul@40	258
paul@40	259	# Otherwise, just obtain the parts for separate display.
paul@40	260
paul@40	261	else:
paul@40	262	return None, part.get_content_type(), part.get_payload()
paul@40	263
paul@40	264	def getPartContent(part):
paul@40	265
paul@40	266	"Decode the 'part', returning the decoded payload and the content type."
paul@40	267
paul@40	268	charset = part.get_content_charset()
paul@40	269	payload = part.get_payload(decode=True)
paul@40	270	return (charset and unicode(payload, charset) or payload), part.get_content_type()
paul@40	271
paul@40	272	def getUpdateFromPart(parent, part, part_number):
paul@40	273
paul@40	274	"Using the 'parent' update, return an update object for the given 'part'."
paul@40	275
paul@40	276	update = parent.copy(part_number)
paul@40	277	update.content, update.content_type, update.parts = getUpdateContentFromPart(part)
paul@40	278	return update
paul@40	279
paul@31	280	# Source management.
paul@31	281
paul@31	282	def getUpdateSources(pagename, request):
paul@31	283
paul@31	284	"Return the update sources from the given 'pagename' using the 'request'."
paul@31	285
paul@31	286	sources = {}
paul@31	287
paul@31	288	source_definitions = getWikiDict(pagename, request)
paul@31	289
paul@31	290	if source_definitions:
paul@31	291	for name, value in source_definitions.items():
paul@31	292	sources[name] = getSourceParameters(value)
paul@31	293
paul@31	294	return sources
paul@31	295
paul@31	296	def getSourceParameters(source_definition):
paul@31	297
paul@31	298	"Return the parameters from the given 'source_definition' string."
paul@31	299
paul@43	300	return parseDictEntry(source_definition, ("type", "location"))
paul@31	301
paul@34	302	# HTML parsing support.
paul@34	303
paul@34	304	class IncomingHTMLSanitizer(HTMLSanitizer):
paul@34	305
paul@34	306	"An HTML parser that rewrites references to attachments."
paul@34	307
paul@34	308	def __init__(self, out, request, page, message_number):
paul@34	309	HTMLSanitizer.__init__(self, out)
paul@34	310	self.request = request
paul@34	311	self.message_number = message_number
paul@34	312	self.page = page
paul@34	313
paul@34	314	def rewrite_reference(self, ref):
paul@34	315	if ref.startswith("cid:"):
paul@34	316	part = ref[len("cid:"):]
paul@34	317	action_link = self.page.url(self.request, {
paul@34	318	"action" : "ReadMessage", "doit" : "1",
paul@34	319	"message" : self.message_number, "part" : part
paul@34	320	})
paul@34	321	return action_link
paul@34	322	else:
paul@34	323	return ref
paul@34	324
paul@34	325	def handle_starttag(self, tag, attrs):
paul@34	326	new_attrs = []
paul@34	327	for attrname, attrvalue in attrs:
paul@34	328	if attrname in self.uri_attrs:
paul@34	329	new_attrs.append((attrname, self.rewrite_reference(attrvalue)))
paul@34	330	else:
paul@34	331	new_attrs.append((attrname, attrvalue))
paul@34	332	HTMLSanitizer.handle_starttag(self, tag, new_attrs)
paul@34	333
paul@34	334	class IncomingMarkup(Markup):
paul@34	335
paul@34	336	"A special markup processor for incoming HTML."
paul@34	337
paul@34	338	def sanitize(self, request, page, message_number):
paul@37	339	out = getwriter("utf-8")(StringIO())
paul@34	340	sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)
paul@34	341	sanitizer.feed(self.stripentities(keepxmlentities=True))
paul@37	342	return IncomingMarkup(unicode(out.getvalue(), "utf-8"))
paul@34	343
paul@34	344	class IncomingHTMLParser:
paul@34	345
paul@34	346	"Filters and rewrites incoming HTML content."
paul@34	347
paul@34	348	def __init__(self, raw, request, **kw):
paul@34	349	self.raw = raw
paul@34	350	self.request = request
paul@34	351	self.message_number = None
paul@34	352	self.page = None
paul@34	353
paul@34	354	def format(self, formatter, **kw):
paul@34	355
paul@34	356	"Send the text."
paul@34	357
paul@34	358	try:
paul@34	359	self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))
paul@34	360	except HTMLParseError, e:
paul@34	361	self.request.write(formatter.sysmsg(1) +
paul@34	362	formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,
paul@34	363	self.raw.splitlines()[e.lineno - 1].strip())) +
paul@34	364	formatter.sysmsg(0))
paul@34	365
paul@34	366	class MakeIncomingHTMLParser:
paul@34	367
paul@34	368	"A class that makes parsers configured for messages."
paul@34	369
paul@34	370	def __init__(self, page, message_number):
paul@34	371
paul@34	372	"Initialise with state that is used to configure instantiated parsers."
paul@34	373
paul@34	374	self.message_number = message_number
paul@34	375	self.page = page
paul@34	376
paul@34	377	def __call__(self, args, *kw):
paul@34	378	parser = IncomingHTMLParser(args, *kw)
paul@34	379	parser.message_number = self.message_number
paul@34	380	parser.page = self.page
paul@34	381	return parser
paul@34	382
paul@34	383	def get_make_parser(page, message_number):
paul@34	384
paul@34	385	"""
paul@34	386	Return a callable that will return a parser configured for the message from
paul@34	387	the given 'page' with the given 'message_number'.
paul@34	388	"""
paul@34	389
paul@34	390	return MakeIncomingHTMLParser(page, message_number)
paul@34	391
paul@0	392	# vim: tabstop=4 expandtab shiftwidth=4