MoinLight (annotate moinformat/__init_

MoinLight

Annotated moinformat/init.py

15:81089c4cb6eb

2017-04-30

Paul Boddie

Introduced an explicit paragraph break node, removing the final indicator from block nodes. Distinguished between node appending and node adding, the latter incorporating the replacement of empty nodes by new nodes.

paul@0	1	#!/usr/bin/env python
paul@0	2
paul@0	3	"""
paul@0	4	Moin wiki format parser.
paul@0	5
paul@11	6	Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
paul@0	7
paul@0	8	This program is free software; you can redistribute it and/or modify it under
paul@0	9	the terms of the GNU General Public License as published by the Free Software
paul@0	10	Foundation; either version 3 of the License, or (at your option) any later
paul@0	11	version.
paul@0	12
paul@0	13	This program is distributed in the hope that it will be useful, but WITHOUT
paul@0	14	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0	15	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
paul@0	16	details.
paul@0	17
paul@0	18	You should have received a copy of the GNU General Public License along with
paul@0	19	this program. If not, see <http://www.gnu.org/licenses/>.
paul@0	20	"""
paul@0	21
paul@15	22	from moinformat.tree import Block, Break, Heading, ListItem, Region, Rule, Text
paul@0	23	import re
paul@0	24
paul@0	25	# Regular expressions.
paul@0	26
paul@0	27	syntax = {
paul@0	28	# Page regions:
paul@13	29	"regionstart" : r"((^\s*)([{]{3,}))", # {{{...
paul@13	30	"regionend" : r"^\s*([}]{3,})", # }}}...
paul@13	31	"header" : r"#!(.*?)\n", # #! char-excl-nl
paul@0	32
paul@0	33	# Region contents:
paul@13	34	# Line-oriented patterns:
paul@14	35	# blank line
paul@14	36	"break" : r"^(\s*?)\n",
paul@14	37	# [ws...] =... ws... expecting headingend
paul@14	38	"heading" : r"^(\s)(?P<x>=+)(\s+)(?=.?\s+(?P=x)\s*\n)",
paul@15	39	# indent list-item [ws...]
paul@14	40	"listitem" : r"^(\s+)(\)(\s)",
paul@15	41	# indent number-item ws...
paul@14	42	"listitem_num" : r"^(\s+)(\d+\.)(\s+)",
paul@15	43	# indent alpha-item ws...
paul@14	44	"listitem_alpha": r"^(\s+)([aA]\.)(\s+)",
paul@15	45	# indent roman-item ws...
paul@14	46	"listitem_roman": r"^(\s+)([iI]\.)(\s+)",
paul@15	47	# indent dot-item [ws...]
paul@14	48	"listitem_dot" : r"^(\s+)(\.)(\s*)",
paul@13	49
paul@13	50	# Region contents:
paul@13	51	# Inline patterns:
paul@13	52	"rule" : r"(-----*)", # ----...
paul@13	53
paul@13	54	# Heading contents:
paul@13	55	"headingend" : r"(\s+)(=+)(\s*\n)", # ws... =... [ws...] nl
paul@9	56
paul@9	57	# List contents:
paul@13	58	"listitemend" : r"^", # next line
paul@0	59	}
paul@0	60
paul@0	61	# Define patterns for the regular expressions.
paul@0	62
paul@0	63	patterns = {}
paul@13	64	for name, value in syntax.items():
paul@13	65	patterns[name] = re.compile(value, re.UNICODE \| re.MULTILINE)
paul@0	66
paul@0	67
paul@0	68
paul@2	69	# Tokenising functions.
paul@2	70
paul@2	71	class TokenStream:
paul@2	72
paul@2	73	"A stream of tokens taken from a string."
paul@2	74
paul@2	75	def __init__(self, s):
paul@2	76	self.s = s
paul@2	77	self.pos = 0
paul@2	78	self.match = None
paul@2	79	self.matching = None
paul@2	80
paul@2	81	def read_until(self, pattern_names, remaining=True):
paul@2	82
paul@2	83	"""
paul@2	84	Find the first match for the given 'pattern_names'. Return the text
paul@2	85	preceding any match, the remaining text if no match was found, or None
paul@2	86	if no match was found and 'remaining' is given as a false value.
paul@2	87	"""
paul@2	88
paul@2	89	first = None
paul@2	90	self.matching = None
paul@2	91
paul@2	92	# Find the first matching pattern.
paul@2	93
paul@2	94	for pattern_name in pattern_names:
paul@2	95	match = patterns[pattern_name].search(self.s, self.pos)
paul@2	96	if match:
paul@2	97	start, end = match.span()
paul@2	98	if self.matching is None or start < first:
paul@2	99	first = start
paul@2	100	self.matching = pattern_name
paul@2	101	self.match = match
paul@2	102
paul@2	103	if self.matching is None:
paul@2	104	if remaining:
paul@2	105	return self.s[self.pos:]
paul@2	106	else:
paul@2	107	return None
paul@2	108	else:
paul@2	109	return self.s[self.pos:first]
paul@2	110
paul@10	111	def read_match(self, group=1):
paul@2	112
paul@10	113	"""
paul@10	114	Return the matched text, updating the position in the stream. If 'group'
paul@10	115	is specified, the indicated group in a match will be returned.
paul@10	116	Typically, group 1 should contain all pertinent data, but groups defined
paul@10	117	within group 1 can provide sections of the data.
paul@10	118	"""
paul@2	119
paul@2	120	if self.match:
paul@2	121	_start, self.pos = self.match.span()
paul@9	122	try:
paul@10	123	return self.match.group(group)
paul@9	124	except IndexError:
paul@9	125	return ""
paul@2	126	else:
paul@2	127	self.pos = len(self.s)
paul@2	128	return None
paul@2	129
paul@2	130
paul@2	131
paul@0	132	# Parser functions.
paul@0	133
paul@0	134	def parse_page(s):
paul@0	135
paul@0	136	"""
paul@0	137	Parse page text 's'. Pages consist of regions delimited by markers.
paul@0	138	"""
paul@0	139
paul@6	140	return parse_region(TokenStream(s))
paul@1	141
paul@10	142	def parse_region(items, level=0, indent=0):
paul@1	143
paul@6	144	"""
paul@10	145	Parse the data provided by 'items' to populate a region with the given
paul@10	146	'level' at the given 'indent'.
paul@6	147	"""
paul@0	148
paul@10	149	region = Region([], level, indent)
paul@0	150
paul@2	151	# Parse section headers.
paul@2	152
paul@2	153	parse_region_header(items, region)
paul@2	154
paul@8	155	# Parse section body.
paul@8	156
paul@2	157	if region.is_transparent():
paul@2	158	parse_region_wiki(items, region)
paul@2	159	else:
paul@2	160	parse_region_opaque(items, region)
paul@2	161
paul@6	162	return region
paul@6	163
paul@6	164	def parse_region_header(items, region):
paul@6	165
paul@6	166	"""
paul@6	167	Parse the region header from the 'items', setting it for the given 'region'.
paul@6	168	"""
paul@6	169
paul@6	170	if items.read_until(["header"], False) == "": # None means no header
paul@6	171	region.type = items.read_match()
paul@6	172
paul@2	173	def parse_region_wiki(items, region):
paul@2	174
paul@2	175	"Parse the data provided by 'items' to populate a wiki 'region'."
paul@0	176
paul@8	177	new_block(region)
paul@14	178	parse_region_details(items, region, [
paul@14	179	"break", "heading", "listitem", "listitem_num", "listitem_alpha",
paul@14	180	"listitem_roman", "listitem_dot", "regionstart", "regionend", "rule"])
paul@0	181
paul@8	182	def parse_region_opaque(items, region):
paul@1	183
paul@8	184	"Parse the data provided by 'items' to populate an opaque 'region'."
paul@8	185
paul@8	186	parse_region_details(items, region, ["regionend"])
paul@1	187
paul@8	188	def parse_region_details(items, region, pattern_names):
paul@0	189
paul@8	190	"Parse 'items' within 'region' searching using 'pattern_names'."
paul@0	191
paul@8	192	try:
paul@8	193	while True:
paul@0	194
paul@8	195	# Obtain text before any marker or the end of the input.
paul@2	196
paul@8	197	preceding = items.read_until(pattern_names)
paul@8	198	if preceding:
paul@8	199	region.append_text(Text(preceding))
paul@2	200
paul@8	201	# End of input.
paul@0	202
paul@8	203	if not items.matching:
paul@8	204	break
paul@8	205
paul@8	206	# Obtain any feature.
paul@2	207
paul@8	208	feature = items.read_match()
paul@8	209	handler = handlers.get(items.matching)
paul@2	210
paul@8	211	# Handle each feature or add text to the region.
paul@2	212
paul@8	213	if handler:
paul@8	214	handler(items, region)
paul@8	215	else:
paul@8	216	region.append_text(Text(feature))
paul@2	217
paul@8	218	except StopIteration:
paul@8	219	pass
paul@2	220
paul@2	221	region.normalise()
paul@0	222
paul@8	223	def end_region(items, region):
paul@7	224
paul@8	225	"End the parsing of 'region'."
paul@7	226
paul@8	227	raise StopIteration
paul@7	228
paul@8	229	def parse_break(items, region):
paul@8	230
paul@8	231	"Handle a paragraph break within 'region'."
paul@7	232
paul@15	233	region.add(Break())
paul@8	234	new_block(region)
paul@2	235
paul@13	236	def parse_heading(items, region):
paul@13	237
paul@13	238	"Handle a heading."
paul@9	239
paul@13	240	start_extra = items.read_match(1)
paul@13	241	level = len(items.read_match(2))
paul@13	242	start_pad = items.read_match(3)
paul@13	243	heading = Heading([], level, start_extra, start_pad)
paul@13	244	parse_region_details(items, heading, ["headingend"])
paul@13	245	region.append(heading)
paul@13	246	new_block(region)
paul@9	247
paul@13	248	def parse_heading_end(items, heading):
paul@13	249
paul@13	250	"Handle the end of a heading."
paul@13	251
paul@13	252	level = len(items.read_match(2))
paul@13	253	if heading.level == level:
paul@13	254	heading.end_pad = items.read_match(1)
paul@13	255	heading.end_extra = items.read_match(3)
paul@13	256	raise StopIteration
paul@9	257
paul@9	258	def parse_listitem(items, region):
paul@9	259
paul@9	260	"Handle a list item marker within 'region'."
paul@9	261
paul@14	262	indent = len(items.read_match(1))
paul@14	263	marker = items.read_match(2)
paul@14	264	space = items.read_match(3)
paul@14	265	item = ListItem([], indent, marker, space)
paul@9	266	parse_region_details(items, item, ["listitemend"])
paul@9	267	region.append(item)
paul@9	268	new_block(region)
paul@9	269
paul@13	270	def parse_listitem_end(items, item):
paul@13	271
paul@13	272	"Handle the end of a list."
paul@13	273
paul@13	274	raise StopIteration
paul@13	275
paul@12	276	def parse_rule(items, region):
paul@12	277
paul@12	278	"Handle a horizontal rule within 'region'."
paul@12	279
paul@12	280	length = len(items.read_match(1))
paul@12	281	rule = Rule(length)
paul@12	282	region.append(rule)
paul@12	283	new_block(region)
paul@12	284
paul@8	285	def parse_section(items, region):
paul@2	286
paul@8	287	"Handle the start of a new section within 'region'."
paul@2	288
paul@8	289	# Parse the section and start a new block after the section.
paul@2	290
paul@10	291	indent = len(items.read_match(2))
paul@10	292	level = len(items.read_match(3))
paul@10	293	region.append(parse_region(items, level, indent))
paul@8	294	new_block(region)
paul@2	295
paul@8	296	def parse_section_end(items, region):
paul@2	297
paul@8	298	"Handle the end of a new section within 'region'."
paul@1	299
paul@8	300	feature = items.read_match()
paul@8	301	if region.have_end(feature):
paul@8	302	raise StopIteration
paul@8	303	else:
paul@8	304	region.append_text(Text(feature))
paul@2	305
paul@8	306	# Pattern handlers.
paul@2	307
paul@8	308	handlers = {
paul@8	309	None : end_region,
paul@8	310	"break" : parse_break,
paul@13	311	"heading" : parse_heading,
paul@13	312	"headingend" : parse_heading_end,
paul@9	313	"listitemend" : parse_listitem_end,
paul@9	314	"listitem" : parse_listitem,
paul@14	315	"listitem_alpha" : parse_listitem,
paul@14	316	"listitem_dot" : parse_listitem,
paul@14	317	"listitem_num" : parse_listitem,
paul@14	318	"listitem_roman" : parse_listitem,
paul@8	319	"regionstart" : parse_section,
paul@8	320	"regionend" : parse_section_end,
paul@12	321	"rule" : parse_rule,
paul@8	322	}
paul@2	323
paul@6	324	def new_block(region):
paul@6	325
paul@6	326	"Start a new block in 'region'."
paul@0	327
paul@6	328	block = Block([])
paul@15	329	region.add(block)
paul@0	330
paul@1	331
paul@1	332
paul@1	333	# Top-level functions.
paul@0	334
paul@0	335	parse = parse_page
paul@0	336
paul@0	337	# vim: tabstop=4 expandtab shiftwidth=4

MoinLight

Annotated moinformat/__init__.py

Annotated moinformat/init.py