MoinLight (annotate moinformat/__init_

MoinLight

Annotated moinformat/init.py

16:3e5b1b8cb456

2017-04-30

Paul Boddie

Added initial definition list element support. Consolidated common serialisation routines.

paul@0	1	#!/usr/bin/env python
paul@0	2
paul@0	3	"""
paul@0	4	Moin wiki format parser.
paul@0	5
paul@11	6	Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
paul@0	7
paul@0	8	This program is free software; you can redistribute it and/or modify it under
paul@0	9	the terms of the GNU General Public License as published by the Free Software
paul@0	10	Foundation; either version 3 of the License, or (at your option) any later
paul@0	11	version.
paul@0	12
paul@0	13	This program is distributed in the hope that it will be useful, but WITHOUT
paul@0	14	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0	15	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
paul@0	16	details.
paul@0	17
paul@0	18	You should have received a copy of the GNU General Public License along with
paul@0	19	this program. If not, see <http://www.gnu.org/licenses/>.
paul@0	20	"""
paul@0	21
paul@16	22	from moinformat.tree import Block, Break, DefItem, DefTerm, Heading, ListItem, Region, Rule, Text
paul@0	23	import re
paul@0	24
paul@0	25	# Regular expressions.
paul@0	26
paul@0	27	syntax = {
paul@0	28	# Page regions:
paul@13	29	"regionstart" : r"((^\s*)([{]{3,}))", # {{{...
paul@13	30	"regionend" : r"^\s*([}]{3,})", # }}}...
paul@13	31	"header" : r"#!(.*?)\n", # #! char-excl-nl
paul@0	32
paul@0	33	# Region contents:
paul@13	34	# Line-oriented patterns:
paul@14	35	# blank line
paul@14	36	"break" : r"^(\s*?)\n",
paul@16	37	# ws... expecting text ::
paul@16	38	"defterm" : r"^(\s+)(?=.+?::)",
paul@16	39	# ws... expecting :: ws...
paul@16	40	"defterm_empty" : r"^(\s+)(?=::\s+)",
paul@14	41	# [ws...] =... ws... expecting headingend
paul@14	42	"heading" : r"^(\s)(?P<x>=+)(\s+)(?=.?\s+(?P=x)\s*\n)",
paul@16	43	# ws... list-item [ws...]
paul@14	44	"listitem" : r"^(\s+)(\)(\s)",
paul@16	45	# ws... number-item ws...
paul@14	46	"listitem_num" : r"^(\s+)(\d+\.)(\s+)",
paul@16	47	# ws... alpha-item ws...
paul@14	48	"listitem_alpha": r"^(\s+)([aA]\.)(\s+)",
paul@16	49	# ws... roman-item ws...
paul@14	50	"listitem_roman": r"^(\s+)([iI]\.)(\s+)",
paul@16	51	# ws... dot-item [ws...]
paul@14	52	"listitem_dot" : r"^(\s+)(\.)(\s*)",
paul@13	53
paul@13	54	# Region contents:
paul@13	55	# Inline patterns:
paul@13	56	"rule" : r"(-----*)", # ----...
paul@13	57
paul@13	58	# Heading contents:
paul@13	59	"headingend" : r"(\s+)(=+)(\s*\n)", # ws... =... [ws...] nl
paul@9	60
paul@9	61	# List contents:
paul@16	62	"deftermend" : r"::(\s*?\n)",
paul@16	63	"deftermsep" : r"::(\s+)",
paul@13	64	"listitemend" : r"^", # next line
paul@0	65	}
paul@0	66
paul@0	67	# Define patterns for the regular expressions.
paul@0	68
paul@0	69	patterns = {}
paul@13	70	for name, value in syntax.items():
paul@13	71	patterns[name] = re.compile(value, re.UNICODE \| re.MULTILINE)
paul@0	72
paul@0	73
paul@0	74
paul@2	75	# Tokenising functions.
paul@2	76
paul@2	77	class TokenStream:
paul@2	78
paul@2	79	"A stream of tokens taken from a string."
paul@2	80
paul@2	81	def __init__(self, s):
paul@2	82	self.s = s
paul@2	83	self.pos = 0
paul@2	84	self.match = None
paul@2	85	self.matching = None
paul@2	86
paul@2	87	def read_until(self, pattern_names, remaining=True):
paul@2	88
paul@2	89	"""
paul@2	90	Find the first match for the given 'pattern_names'. Return the text
paul@2	91	preceding any match, the remaining text if no match was found, or None
paul@2	92	if no match was found and 'remaining' is given as a false value.
paul@2	93	"""
paul@2	94
paul@2	95	first = None
paul@2	96	self.matching = None
paul@2	97
paul@2	98	# Find the first matching pattern.
paul@2	99
paul@2	100	for pattern_name in pattern_names:
paul@2	101	match = patterns[pattern_name].search(self.s, self.pos)
paul@2	102	if match:
paul@2	103	start, end = match.span()
paul@2	104	if self.matching is None or start < first:
paul@2	105	first = start
paul@2	106	self.matching = pattern_name
paul@2	107	self.match = match
paul@2	108
paul@2	109	if self.matching is None:
paul@2	110	if remaining:
paul@2	111	return self.s[self.pos:]
paul@2	112	else:
paul@2	113	return None
paul@2	114	else:
paul@2	115	return self.s[self.pos:first]
paul@2	116
paul@10	117	def read_match(self, group=1):
paul@2	118
paul@10	119	"""
paul@10	120	Return the matched text, updating the position in the stream. If 'group'
paul@10	121	is specified, the indicated group in a match will be returned.
paul@10	122	Typically, group 1 should contain all pertinent data, but groups defined
paul@10	123	within group 1 can provide sections of the data.
paul@10	124	"""
paul@2	125
paul@2	126	if self.match:
paul@2	127	_start, self.pos = self.match.span()
paul@9	128	try:
paul@10	129	return self.match.group(group)
paul@9	130	except IndexError:
paul@9	131	return ""
paul@2	132	else:
paul@2	133	self.pos = len(self.s)
paul@2	134	return None
paul@2	135
paul@2	136
paul@2	137
paul@0	138	# Parser functions.
paul@0	139
paul@0	140	def parse_page(s):
paul@0	141
paul@0	142	"""
paul@0	143	Parse page text 's'. Pages consist of regions delimited by markers.
paul@0	144	"""
paul@0	145
paul@6	146	return parse_region(TokenStream(s))
paul@1	147
paul@10	148	def parse_region(items, level=0, indent=0):
paul@1	149
paul@6	150	"""
paul@10	151	Parse the data provided by 'items' to populate a region with the given
paul@10	152	'level' at the given 'indent'.
paul@6	153	"""
paul@0	154
paul@10	155	region = Region([], level, indent)
paul@0	156
paul@2	157	# Parse section headers.
paul@2	158
paul@2	159	parse_region_header(items, region)
paul@2	160
paul@8	161	# Parse section body.
paul@8	162
paul@2	163	if region.is_transparent():
paul@2	164	parse_region_wiki(items, region)
paul@2	165	else:
paul@2	166	parse_region_opaque(items, region)
paul@2	167
paul@6	168	return region
paul@6	169
paul@6	170	def parse_region_header(items, region):
paul@6	171
paul@6	172	"""
paul@6	173	Parse the region header from the 'items', setting it for the given 'region'.
paul@6	174	"""
paul@6	175
paul@6	176	if items.read_until(["header"], False) == "": # None means no header
paul@6	177	region.type = items.read_match()
paul@6	178
paul@2	179	def parse_region_wiki(items, region):
paul@2	180
paul@2	181	"Parse the data provided by 'items' to populate a wiki 'region'."
paul@0	182
paul@8	183	new_block(region)
paul@14	184	parse_region_details(items, region, [
paul@16	185	"break", "heading",
paul@16	186	"defterm", "defterm_empty",
paul@16	187	"listitem", "listitem_alpha", "listitem_dot", "listitem_num",
paul@16	188	"listitem_roman",
paul@16	189	"regionstart", "regionend", "rule"])
paul@0	190
paul@8	191	def parse_region_opaque(items, region):
paul@1	192
paul@8	193	"Parse the data provided by 'items' to populate an opaque 'region'."
paul@8	194
paul@8	195	parse_region_details(items, region, ["regionend"])
paul@1	196
paul@8	197	def parse_region_details(items, region, pattern_names):
paul@0	198
paul@8	199	"Parse 'items' within 'region' searching using 'pattern_names'."
paul@0	200
paul@8	201	try:
paul@8	202	while True:
paul@0	203
paul@8	204	# Obtain text before any marker or the end of the input.
paul@2	205
paul@8	206	preceding = items.read_until(pattern_names)
paul@8	207	if preceding:
paul@8	208	region.append_text(Text(preceding))
paul@2	209
paul@8	210	# End of input.
paul@0	211
paul@8	212	if not items.matching:
paul@8	213	break
paul@8	214
paul@8	215	# Obtain any feature.
paul@2	216
paul@8	217	feature = items.read_match()
paul@8	218	handler = handlers.get(items.matching)
paul@2	219
paul@8	220	# Handle each feature or add text to the region.
paul@2	221
paul@8	222	if handler:
paul@8	223	handler(items, region)
paul@8	224	else:
paul@8	225	region.append_text(Text(feature))
paul@2	226
paul@8	227	except StopIteration:
paul@8	228	pass
paul@2	229
paul@2	230	region.normalise()
paul@0	231
paul@8	232	def end_region(items, region):
paul@7	233
paul@8	234	"End the parsing of 'region'."
paul@7	235
paul@8	236	raise StopIteration
paul@7	237
paul@8	238	def parse_break(items, region):
paul@8	239
paul@8	240	"Handle a paragraph break within 'region'."
paul@7	241
paul@15	242	region.add(Break())
paul@8	243	new_block(region)
paul@2	244
paul@16	245	def parse_defitem(items, region, extra=""):
paul@16	246
paul@16	247	"Handle a definition item within 'region'."
paul@16	248
paul@16	249	pad = items.read_match(1)
paul@16	250	item = DefItem([], pad, extra)
paul@16	251	parse_region_details(items, item, ["listitemend"])
paul@16	252	region.append(item)
paul@16	253	new_block(region)
paul@16	254
paul@16	255	def parse_defterm(items, region):
paul@16	256
paul@16	257	"Handle a definition term within 'region'."
paul@16	258
paul@16	259	pad = items.read_match(1)
paul@16	260	term = DefTerm([], pad)
paul@16	261	parse_region_details(items, term, ["deftermend", "deftermsep"])
paul@16	262	region.append(term)
paul@16	263	if items.matching == "deftermsep":
paul@16	264	parse_defitem(items, region)
paul@16	265
paul@16	266	def parse_defterm_empty(items, region):
paul@16	267
paul@16	268	"Handle an empty definition term within 'region'."
paul@16	269
paul@16	270	extra = items.read_match(1)
paul@16	271	parse_region_details(items, region, ["deftermsep"])
paul@16	272	parse_defitem(items, region, extra)
paul@16	273
paul@16	274	parse_defterm_end = end_region
paul@16	275	parse_defterm_sep = end_region
paul@16	276
paul@13	277	def parse_heading(items, region):
paul@13	278
paul@13	279	"Handle a heading."
paul@9	280
paul@13	281	start_extra = items.read_match(1)
paul@13	282	level = len(items.read_match(2))
paul@13	283	start_pad = items.read_match(3)
paul@13	284	heading = Heading([], level, start_extra, start_pad)
paul@13	285	parse_region_details(items, heading, ["headingend"])
paul@13	286	region.append(heading)
paul@13	287	new_block(region)
paul@9	288
paul@13	289	def parse_heading_end(items, heading):
paul@13	290
paul@13	291	"Handle the end of a heading."
paul@13	292
paul@13	293	level = len(items.read_match(2))
paul@13	294	if heading.level == level:
paul@13	295	heading.end_pad = items.read_match(1)
paul@13	296	heading.end_extra = items.read_match(3)
paul@13	297	raise StopIteration
paul@9	298
paul@9	299	def parse_listitem(items, region):
paul@9	300
paul@9	301	"Handle a list item marker within 'region'."
paul@9	302
paul@14	303	indent = len(items.read_match(1))
paul@14	304	marker = items.read_match(2)
paul@14	305	space = items.read_match(3)
paul@14	306	item = ListItem([], indent, marker, space)
paul@9	307	parse_region_details(items, item, ["listitemend"])
paul@9	308	region.append(item)
paul@9	309	new_block(region)
paul@9	310
paul@16	311	parse_listitem_end = end_region
paul@13	312
paul@12	313	def parse_rule(items, region):
paul@12	314
paul@12	315	"Handle a horizontal rule within 'region'."
paul@12	316
paul@12	317	length = len(items.read_match(1))
paul@12	318	rule = Rule(length)
paul@12	319	region.append(rule)
paul@12	320	new_block(region)
paul@12	321
paul@8	322	def parse_section(items, region):
paul@2	323
paul@8	324	"Handle the start of a new section within 'region'."
paul@2	325
paul@8	326	# Parse the section and start a new block after the section.
paul@2	327
paul@10	328	indent = len(items.read_match(2))
paul@10	329	level = len(items.read_match(3))
paul@10	330	region.append(parse_region(items, level, indent))
paul@8	331	new_block(region)
paul@2	332
paul@8	333	def parse_section_end(items, region):
paul@2	334
paul@8	335	"Handle the end of a new section within 'region'."
paul@1	336
paul@8	337	feature = items.read_match()
paul@8	338	if region.have_end(feature):
paul@8	339	raise StopIteration
paul@8	340	else:
paul@8	341	region.append_text(Text(feature))
paul@2	342
paul@8	343	# Pattern handlers.
paul@2	344
paul@8	345	handlers = {
paul@8	346	None : end_region,
paul@8	347	"break" : parse_break,
paul@16	348	"defterm" : parse_defterm,
paul@16	349	"defterm_empty" : parse_defterm_empty,
paul@16	350	"deftermend" : parse_defterm_end,
paul@16	351	"deftermsep" : parse_defterm_sep,
paul@13	352	"heading" : parse_heading,
paul@13	353	"headingend" : parse_heading_end,
paul@9	354	"listitemend" : parse_listitem_end,
paul@9	355	"listitem" : parse_listitem,
paul@14	356	"listitem_alpha" : parse_listitem,
paul@14	357	"listitem_dot" : parse_listitem,
paul@14	358	"listitem_num" : parse_listitem,
paul@14	359	"listitem_roman" : parse_listitem,
paul@8	360	"regionstart" : parse_section,
paul@8	361	"regionend" : parse_section_end,
paul@12	362	"rule" : parse_rule,
paul@8	363	}
paul@2	364
paul@6	365	def new_block(region):
paul@6	366
paul@6	367	"Start a new block in 'region'."
paul@0	368
paul@6	369	block = Block([])
paul@15	370	region.add(block)
paul@0	371
paul@1	372
paul@1	373
paul@1	374	# Top-level functions.
paul@0	375
paul@0	376	parse = parse_page
paul@0	377
paul@0	378	# vim: tabstop=4 expandtab shiftwidth=4

MoinLight

Annotated moinformat/__init__.py

Annotated moinformat/init.py