ImprovedTableParser (annotate ImprovedTableParser.py in 2faa4def44ae)

ImprovedTableParser

Annotated ImprovedTableParser.py

3:197226a815ca

2:2faa4def44ae

2012-01-17

Paul Boddie

Added copyright and licensing information.

paul@0	1	# -- coding: iso-8859-1 --
paul@0	2	"""
paul@0	3	MoinMoin - ImprovedTableParser library
paul@0	4
paul@0	5	@copyright: 2012 by Paul Boddie <paul@boddie.org.uk>
paul@0	6	@license: GNU GPL (v2 or later), see COPYING.txt for details.
paul@0	7	"""
paul@0	8
paul@0	9	from MoinMoin import wikiutil
paul@0	10	from shlex import shlex
paul@0	11	from StringIO import StringIO
paul@0	12	import re
paul@0	13
paul@0	14	# Regular expressions.
paul@0	15
paul@0	16	syntax = {
paul@1	17	# For section markers.
paul@1	18	"markers" : (r"^\s*(?P<n>\\+)(?P<b>{\|})(?P=n)(?P=b)(?P=n)(?P=b)", re.MULTILINE),
paul@1	19	"marker" : (r"(\\+)", 0),
paul@1	20
paul@0	21	# At start of line:
paul@1	22	"sections" : (r"(^\s{{{.?^\s*}}})", re.MULTILINE \| re.DOTALL), # {{{ ... }}}
paul@1	23	"rows" : (r"^==", re.MULTILINE), # ==
paul@1	24
paul@0	25	# Within text:
paul@1	26	"columns" : (r"\\|\\|[ \t]*", 0), # \|\| ws-excl-nl
paul@1	27
paul@0	28	# At start of column text:
paul@1	29	"column" : (r"^\s<(.?)>\s(.)", re.DOTALL), # ws < attributes > ws
paul@0	30	}
paul@0	31
paul@0	32	patterns = {}
paul@0	33	for name, (value, flags) in syntax.items():
paul@0	34	patterns[name] = re.compile(value, re.UNICODE \| flags)
paul@0	35
paul@0	36	# Functions.
paul@0	37
paul@0	38	def parse(s):
paul@0	39
paul@0	40	"Parse 's', returning a table definition."
paul@0	41
paul@1	42	s = replaceMarkers(s)
paul@1	43
paul@0	44	table_attrs = {}
paul@1	45	rows = []
paul@0	46
paul@1	47	# The following will be redefined upon the construction of the first row.
paul@1	48
paul@1	49	row_attrs = {}
paul@1	50	columns = []
paul@1	51
paul@1	52	# Process exposed text and sections.
paul@1	53
paul@1	54	exposed = True
paul@1	55
paul@1	56	# Initially, start a new row.
paul@1	57
paul@1	58	row_continued = False
paul@1	59
paul@1	60	for region in patterns["sections"].split(s):
paul@0	61
paul@1	62	# Only look for table features in exposed text.
paul@1	63
paul@1	64	if exposed:
paul@1	65
paul@1	66	# Extract each row from the definition.
paul@1	67
paul@1	68	for row_text in patterns["rows"].split(region):
paul@1	69
paul@1	70	# Only create a new row when a boundary has been found.
paul@0	71
paul@1	72	if not row_continued:
paul@1	73	if columns:
paul@1	74	extractAttributes(columns[0][0], row_attrs, table_attrs)
paul@0	75
paul@1	76	row_attrs = {}
paul@1	77	columns = []
paul@1	78	rows.append((row_attrs, columns))
paul@1	79	column_continued = False
paul@0	80
paul@1	81	# Extract each column from the row.
paul@0	82
paul@1	83	for text in patterns["columns"].split(row_text):
paul@1	84
paul@1	85	# Only create a new column when a boundary has been found.
paul@1	86
paul@1	87	if not column_continued:
paul@1	88
paul@1	89	# Extract the attribute and text sections.
paul@0	90
paul@1	91	match = patterns["column"].search(text)
paul@1	92	if match:
paul@1	93	attribute_text, text = match.groups()
paul@1	94	columns.append([parseAttributes(attribute_text, True), text])
paul@1	95	else:
paul@1	96	columns.append([{}, text])
paul@0	97
paul@1	98	else:
paul@1	99	columns[-1][1] += text
paul@1	100
paul@1	101	# Permit columns immediately following this one.
paul@1	102
paul@1	103	column_continued = False
paul@0	104
paul@1	105	# Permit a continuation of the current column.
paul@1	106
paul@1	107	column_continued = True
paul@1	108
paul@1	109	# Permit rows immediately following this one.
paul@1	110
paul@1	111	row_continued = False
paul@1	112
paul@1	113	# Permit a continuation if the current row.
paul@0	114
paul@1	115	row_continued = True
paul@1	116
paul@1	117	# Write any section into the current column.
paul@0	118
paul@1	119	else:
paul@1	120	columns[-1][1] += region
paul@1	121
paul@1	122	exposed = not exposed
paul@1	123
paul@1	124	if columns:
paul@1	125	extractAttributes(columns[0][0], row_attrs, table_attrs)
paul@0	126
paul@0	127	return table_attrs, rows
paul@0	128
paul@1	129	def extractAttributes(attrs, row_attrs, table_attrs):
paul@1	130
paul@1	131	"""
paul@1	132	Extract row- and table-level attributes from 'attrs', storing them in
paul@1	133	'row_attrs' and 'table_attrs' respectively.
paul@1	134	"""
paul@1	135
paul@1	136	for name, value in attrs.items():
paul@1	137	if name.startswith("row"):
paul@1	138	row_attrs[name] = value
paul@1	139	del attrs[name]
paul@1	140	elif name.startswith("table"):
paul@1	141	table_attrs[name] = value
paul@1	142	del attrs[name]
paul@1	143
paul@1	144	def replaceMarkers(s):
paul@1	145
paul@1	146	"Convert the section notation in 's'."
paul@1	147
paul@1	148	l = []
paul@1	149	last = 0
paul@1	150
paul@1	151	# Get each marker and convert it.
paul@1	152
paul@1	153	for match in patterns["markers"].finditer(s):
paul@1	154	start, stop = match.span()
paul@1	155	l.append(s[last:start])
paul@1	156
paul@1	157	# Convert the marker.
paul@1	158
paul@1	159	marker = []
paul@1	160	brace = True
paul@1	161	for text in patterns["marker"].split(match.group()):
paul@1	162	if brace:
paul@1	163	marker.append(text)
paul@1	164	else:
paul@1	165	marker.append(text[:-1])
paul@1	166	brace = not brace
paul@1	167
paul@1	168	l.append("".join(marker))
paul@1	169	last = stop
paul@1	170	else:
paul@1	171	l.append(s[last:])
paul@1	172
paul@1	173	return "".join(l)
paul@1	174
paul@0	175	def parseAttributes(s, escape=True):
paul@0	176
paul@0	177	"""
paul@0	178	Parse the table attributes string 's', returning a mapping of names to
paul@0	179	values. If 'escape' is set to a true value, the attributes will be suitable
paul@0	180	for use with the formatter API.
paul@0	181	"""
paul@0	182
paul@0	183	attrs = {}
paul@0	184	f = StringIO(s)
paul@0	185	name = None
paul@0	186	need_value = False
paul@0	187
paul@0	188	for token in shlex(f):
paul@0	189
paul@0	190	# Capture the name if needed.
paul@0	191
paul@0	192	if name is None:
paul@0	193	name = escape and wikiutil.escape(token) or token
paul@0	194
paul@0	195	# Detect either an equals sign or another name.
paul@0	196
paul@0	197	elif not need_value:
paul@0	198	if token == "=":
paul@0	199	need_value = True
paul@0	200	else:
paul@0	201	attrs[name.lower()] = escape and "true" or True
paul@0	202	name = wikiutil.escape(token)
paul@0	203
paul@0	204	# Otherwise, capture a value.
paul@0	205
paul@0	206	else:
paul@0	207	# Quoting of attributes done similarly to parseAttributes.
paul@0	208
paul@0	209	if escape and token:
paul@0	210	if token[0] in ("'", '"'):
paul@0	211	token = wikiutil.escape(token)
paul@0	212	else:
paul@0	213	token = '"%s"' % wikiutil.escape(token, 1)
paul@0	214
paul@0	215	attrs[name.lower()] = token
paul@0	216	name = None
paul@0	217	need_value = False
paul@0	218
paul@0	219	return attrs
paul@0	220
paul@0	221	# Formatting of embedded content.
paul@0	222	# NOTE: Borrowed from EventAggregator.
paul@0	223
paul@0	224	def getParserClass(request, format):
paul@0	225
paul@0	226	"""
paul@0	227	Return a parser class using the 'request' for the given 'format', returning
paul@0	228	a plain text parser if no parser can be found for the specified 'format'.
paul@0	229	"""
paul@0	230
paul@0	231	try:
paul@0	232	return wikiutil.searchAndImportPlugin(request.cfg, "parser", format or "plain")
paul@0	233	except wikiutil.PluginMissingError:
paul@0	234	return wikiutil.searchAndImportPlugin(request.cfg, "parser", "plain")
paul@0	235
paul@0	236	def formatText(text, request, fmt):
paul@0	237
paul@0	238	"Format the given 'text' using the specified 'request' and formatter 'fmt'."
paul@0	239
paul@0	240	parser_cls = getParserClass(request, request.page.pi["format"])
paul@0	241	parser = parser_cls(text, request, line_anchors=False)
paul@0	242	return request.redirectedOutput(parser.format, fmt, inhibit_p=True)
paul@0	243
paul@0	244	# Common formatting functions.
paul@0	245
paul@0	246	def formatTable(text, request, fmt):
paul@0	247
paul@0	248	"Format the given 'text' using the specified 'request' and formatter 'fmt'."
paul@0	249
paul@0	250	attrs, table = parse(text)
paul@0	251
paul@0	252	request.write(fmt.table(1, attrs))
paul@0	253
paul@0	254	for row_attrs, columns in table:
paul@0	255	request.write(fmt.table_row(1, row_attrs))
paul@0	256
paul@0	257	for column_attrs, column_text in columns:
paul@0	258	request.write(fmt.table_cell(1, column_attrs))
paul@0	259	request.write(formatText(column_text, request, fmt))
paul@0	260	request.write(fmt.table_cell(0))
paul@0	261
paul@0	262	request.write(fmt.table_row(0))
paul@0	263
paul@0	264	request.write(fmt.table(0))
paul@0	265
paul@0	266	# vim: tabstop=4 expandtab shiftwidth=4