MoinLight (annotate moinformat/__init_

MoinLight

Annotated moinformat/init.py

14:70b10d6aaad3

2017-04-30

Paul Boddie

Added support for other list node types, breaking the pattern for list items up into specific patterns.

paul@0	1	#!/usr/bin/env python
paul@0	2
paul@0	3	"""
paul@0	4	Moin wiki format parser.
paul@0	5
paul@11	6	Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
paul@0	7
paul@0	8	This program is free software; you can redistribute it and/or modify it under
paul@0	9	the terms of the GNU General Public License as published by the Free Software
paul@0	10	Foundation; either version 3 of the License, or (at your option) any later
paul@0	11	version.
paul@0	12
paul@0	13	This program is distributed in the hope that it will be useful, but WITHOUT
paul@0	14	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0	15	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
paul@0	16	details.
paul@0	17
paul@0	18	You should have received a copy of the GNU General Public License along with
paul@0	19	this program. If not, see <http://www.gnu.org/licenses/>.
paul@0	20	"""
paul@0	21
paul@13	22	from moinformat.tree import Block, Heading, ListItem, Region, Rule, Text
paul@0	23	import re
paul@0	24
paul@0	25	# Regular expressions.
paul@0	26
paul@0	27	syntax = {
paul@0	28	# Page regions:
paul@13	29	"regionstart" : r"((^\s*)([{]{3,}))", # {{{...
paul@13	30	"regionend" : r"^\s*([}]{3,})", # }}}...
paul@13	31	"header" : r"#!(.*?)\n", # #! char-excl-nl
paul@0	32
paul@0	33	# Region contents:
paul@13	34	# Line-oriented patterns:
paul@14	35	# blank line
paul@14	36	"break" : r"^(\s*?)\n",
paul@14	37	# [ws...] =... ws... expecting headingend
paul@14	38	"heading" : r"^(\s)(?P<x>=+)(\s+)(?=.?\s+(?P=x)\s*\n)",
paul@14	39	# indent (list-item or number-item or alpha-item or roman-item or dot-item)
paul@14	40	"listitem" : r"^(\s+)(\)(\s)",
paul@14	41	"listitem_num" : r"^(\s+)(\d+\.)(\s+)",
paul@14	42	"listitem_alpha": r"^(\s+)([aA]\.)(\s+)",
paul@14	43	"listitem_roman": r"^(\s+)([iI]\.)(\s+)",
paul@14	44	"listitem_dot" : r"^(\s+)(\.)(\s*)",
paul@13	45
paul@13	46	# Region contents:
paul@13	47	# Inline patterns:
paul@13	48	"rule" : r"(-----*)", # ----...
paul@13	49
paul@13	50	# Heading contents:
paul@13	51	"headingend" : r"(\s+)(=+)(\s*\n)", # ws... =... [ws...] nl
paul@9	52
paul@9	53	# List contents:
paul@13	54	"listitemend" : r"^", # next line
paul@0	55	}
paul@0	56
paul@0	57	# Define patterns for the regular expressions.
paul@0	58
paul@0	59	patterns = {}
paul@13	60	for name, value in syntax.items():
paul@13	61	patterns[name] = re.compile(value, re.UNICODE \| re.MULTILINE)
paul@0	62
paul@0	63
paul@0	64
paul@2	65	# Tokenising functions.
paul@2	66
paul@2	67	class TokenStream:
paul@2	68
paul@2	69	"A stream of tokens taken from a string."
paul@2	70
paul@2	71	def __init__(self, s):
paul@2	72	self.s = s
paul@2	73	self.pos = 0
paul@2	74	self.match = None
paul@2	75	self.matching = None
paul@2	76
paul@2	77	def read_until(self, pattern_names, remaining=True):
paul@2	78
paul@2	79	"""
paul@2	80	Find the first match for the given 'pattern_names'. Return the text
paul@2	81	preceding any match, the remaining text if no match was found, or None
paul@2	82	if no match was found and 'remaining' is given as a false value.
paul@2	83	"""
paul@2	84
paul@2	85	first = None
paul@2	86	self.matching = None
paul@2	87
paul@2	88	# Find the first matching pattern.
paul@2	89
paul@2	90	for pattern_name in pattern_names:
paul@2	91	match = patterns[pattern_name].search(self.s, self.pos)
paul@2	92	if match:
paul@2	93	start, end = match.span()
paul@2	94	if self.matching is None or start < first:
paul@2	95	first = start
paul@2	96	self.matching = pattern_name
paul@2	97	self.match = match
paul@2	98
paul@2	99	if self.matching is None:
paul@2	100	if remaining:
paul@2	101	return self.s[self.pos:]
paul@2	102	else:
paul@2	103	return None
paul@2	104	else:
paul@2	105	return self.s[self.pos:first]
paul@2	106
paul@10	107	def read_match(self, group=1):
paul@2	108
paul@10	109	"""
paul@10	110	Return the matched text, updating the position in the stream. If 'group'
paul@10	111	is specified, the indicated group in a match will be returned.
paul@10	112	Typically, group 1 should contain all pertinent data, but groups defined
paul@10	113	within group 1 can provide sections of the data.
paul@10	114	"""
paul@2	115
paul@2	116	if self.match:
paul@2	117	_start, self.pos = self.match.span()
paul@9	118	try:
paul@10	119	return self.match.group(group)
paul@9	120	except IndexError:
paul@9	121	return ""
paul@2	122	else:
paul@2	123	self.pos = len(self.s)
paul@2	124	return None
paul@2	125
paul@2	126
paul@2	127
paul@0	128	# Parser functions.
paul@0	129
paul@0	130	def parse_page(s):
paul@0	131
paul@0	132	"""
paul@0	133	Parse page text 's'. Pages consist of regions delimited by markers.
paul@0	134	"""
paul@0	135
paul@6	136	return parse_region(TokenStream(s))
paul@1	137
paul@10	138	def parse_region(items, level=0, indent=0):
paul@1	139
paul@6	140	"""
paul@10	141	Parse the data provided by 'items' to populate a region with the given
paul@10	142	'level' at the given 'indent'.
paul@6	143	"""
paul@0	144
paul@10	145	region = Region([], level, indent)
paul@0	146
paul@2	147	# Parse section headers.
paul@2	148
paul@2	149	parse_region_header(items, region)
paul@2	150
paul@8	151	# Parse section body.
paul@8	152
paul@2	153	if region.is_transparent():
paul@2	154	parse_region_wiki(items, region)
paul@2	155	else:
paul@2	156	parse_region_opaque(items, region)
paul@2	157
paul@6	158	return region
paul@6	159
paul@6	160	def parse_region_header(items, region):
paul@6	161
paul@6	162	"""
paul@6	163	Parse the region header from the 'items', setting it for the given 'region'.
paul@6	164	"""
paul@6	165
paul@6	166	if items.read_until(["header"], False) == "": # None means no header
paul@6	167	region.type = items.read_match()
paul@6	168
paul@2	169	def parse_region_wiki(items, region):
paul@2	170
paul@2	171	"Parse the data provided by 'items' to populate a wiki 'region'."
paul@0	172
paul@8	173	new_block(region)
paul@14	174	parse_region_details(items, region, [
paul@14	175	"break", "heading", "listitem", "listitem_num", "listitem_alpha",
paul@14	176	"listitem_roman", "listitem_dot", "regionstart", "regionend", "rule"])
paul@0	177
paul@8	178	def parse_region_opaque(items, region):
paul@1	179
paul@8	180	"Parse the data provided by 'items' to populate an opaque 'region'."
paul@8	181
paul@8	182	parse_region_details(items, region, ["regionend"])
paul@1	183
paul@8	184	def parse_region_details(items, region, pattern_names):
paul@0	185
paul@8	186	"Parse 'items' within 'region' searching using 'pattern_names'."
paul@0	187
paul@8	188	try:
paul@8	189	while True:
paul@0	190
paul@8	191	# Obtain text before any marker or the end of the input.
paul@2	192
paul@8	193	preceding = items.read_until(pattern_names)
paul@8	194	if preceding:
paul@8	195	region.append_text(Text(preceding))
paul@2	196
paul@8	197	# End of input.
paul@0	198
paul@8	199	if not items.matching:
paul@8	200	break
paul@8	201
paul@8	202	# Obtain any feature.
paul@2	203
paul@8	204	feature = items.read_match()
paul@8	205	handler = handlers.get(items.matching)
paul@2	206
paul@8	207	# Handle each feature or add text to the region.
paul@2	208
paul@8	209	if handler:
paul@8	210	handler(items, region)
paul@8	211	else:
paul@8	212	region.append_text(Text(feature))
paul@2	213
paul@8	214	except StopIteration:
paul@8	215	pass
paul@2	216
paul@2	217	region.normalise()
paul@0	218
paul@8	219	def end_region(items, region):
paul@7	220
paul@8	221	"End the parsing of 'region'."
paul@7	222
paul@8	223	raise StopIteration
paul@7	224
paul@8	225	def parse_break(items, region):
paul@8	226
paul@8	227	"Handle a paragraph break within 'region'."
paul@7	228
paul@7	229	# Mark any previous block as not being the final one in a sequence.
paul@7	230
paul@14	231	block = region.last()
paul@14	232	if isinstance(block, Block):
paul@14	233	block.final = False
paul@8	234	new_block(region)
paul@2	235
paul@13	236	def parse_heading(items, region):
paul@13	237
paul@13	238	"Handle a heading."
paul@9	239
paul@13	240	start_extra = items.read_match(1)
paul@13	241	level = len(items.read_match(2))
paul@13	242	start_pad = items.read_match(3)
paul@13	243	heading = Heading([], level, start_extra, start_pad)
paul@13	244	parse_region_details(items, heading, ["headingend"])
paul@13	245	region.append(heading)
paul@13	246	new_block(region)
paul@9	247
paul@13	248	def parse_heading_end(items, heading):
paul@13	249
paul@13	250	"Handle the end of a heading."
paul@13	251
paul@13	252	level = len(items.read_match(2))
paul@13	253	if heading.level == level:
paul@13	254	heading.end_pad = items.read_match(1)
paul@13	255	heading.end_extra = items.read_match(3)
paul@13	256	raise StopIteration
paul@9	257
paul@9	258	def parse_listitem(items, region):
paul@9	259
paul@9	260	"Handle a list item marker within 'region'."
paul@9	261
paul@14	262	indent = len(items.read_match(1))
paul@14	263	marker = items.read_match(2)
paul@14	264	space = items.read_match(3)
paul@14	265	item = ListItem([], indent, marker, space)
paul@9	266	parse_region_details(items, item, ["listitemend"])
paul@9	267	region.append(item)
paul@9	268	new_block(region)
paul@9	269
paul@13	270	def parse_listitem_end(items, item):
paul@13	271
paul@13	272	"Handle the end of a list."
paul@13	273
paul@13	274	raise StopIteration
paul@13	275
paul@12	276	def parse_rule(items, region):
paul@12	277
paul@12	278	"Handle a horizontal rule within 'region'."
paul@12	279
paul@12	280	length = len(items.read_match(1))
paul@12	281	rule = Rule(length)
paul@12	282	region.append(rule)
paul@12	283	new_block(region)
paul@12	284
paul@8	285	def parse_section(items, region):
paul@2	286
paul@8	287	"Handle the start of a new section within 'region'."
paul@2	288
paul@8	289	# Parse the section and start a new block after the section.
paul@2	290
paul@10	291	indent = len(items.read_match(2))
paul@10	292	level = len(items.read_match(3))
paul@10	293	region.append(parse_region(items, level, indent))
paul@8	294	new_block(region)
paul@2	295
paul@8	296	def parse_section_end(items, region):
paul@2	297
paul@8	298	"Handle the end of a new section within 'region'."
paul@1	299
paul@8	300	feature = items.read_match()
paul@8	301	if region.have_end(feature):
paul@8	302	raise StopIteration
paul@8	303	else:
paul@8	304	region.append_text(Text(feature))
paul@2	305
paul@8	306	# Pattern handlers.
paul@2	307
paul@8	308	handlers = {
paul@8	309	None : end_region,
paul@8	310	"break" : parse_break,
paul@13	311	"heading" : parse_heading,
paul@13	312	"headingend" : parse_heading_end,
paul@9	313	"listitemend" : parse_listitem_end,
paul@9	314	"listitem" : parse_listitem,
paul@14	315	"listitem_alpha" : parse_listitem,
paul@14	316	"listitem_dot" : parse_listitem,
paul@14	317	"listitem_num" : parse_listitem,
paul@14	318	"listitem_roman" : parse_listitem,
paul@8	319	"regionstart" : parse_section,
paul@8	320	"regionend" : parse_section_end,
paul@12	321	"rule" : parse_rule,
paul@8	322	}
paul@2	323
paul@6	324	def new_block(region):
paul@6	325
paul@6	326	"Start a new block in 'region'."
paul@0	327
paul@6	328	block = Block([])
paul@6	329	region.append(block)
paul@0	330
paul@1	331
paul@1	332
paul@1	333	# Top-level functions.
paul@0	334
paul@0	335	parse = parse_page
paul@0	336
paul@0	337	# vim: tabstop=4 expandtab shiftwidth=4

MoinLight

Annotated moinformat/__init__.py

Annotated moinformat/init.py