MoinLight (annotate moinformat/__init_

MoinLight

Annotated moinformat/init.py

13:7825ca4d0357

2017-04-29

Paul Boddie

Added support for headings.

paul@0	1	#!/usr/bin/env python
paul@0	2
paul@0	3	"""
paul@0	4	Moin wiki format parser.
paul@0	5
paul@11	6	Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
paul@0	7
paul@0	8	This program is free software; you can redistribute it and/or modify it under
paul@0	9	the terms of the GNU General Public License as published by the Free Software
paul@0	10	Foundation; either version 3 of the License, or (at your option) any later
paul@0	11	version.
paul@0	12
paul@0	13	This program is distributed in the hope that it will be useful, but WITHOUT
paul@0	14	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0	15	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
paul@0	16	details.
paul@0	17
paul@0	18	You should have received a copy of the GNU General Public License along with
paul@0	19	this program. If not, see <http://www.gnu.org/licenses/>.
paul@0	20	"""
paul@0	21
paul@13	22	from moinformat.tree import Block, Heading, ListItem, Region, Rule, Text
paul@0	23	import re
paul@0	24
paul@0	25	# Regular expressions.
paul@0	26
paul@0	27	syntax = {
paul@0	28	# Page regions:
paul@13	29	"regionstart" : r"((^\s*)([{]{3,}))", # {{{...
paul@13	30	"regionend" : r"^\s*([}]{3,})", # }}}...
paul@13	31	"header" : r"#!(.*?)\n", # #! char-excl-nl
paul@0	32
paul@0	33	# Region contents:
paul@13	34	# Line-oriented patterns:
paul@13	35	"break" : r"^(\s*?)\n", # blank line
paul@13	36	"heading" : r"^(\s)(?P<x>=+)(\s+)(?=.?\s+(?P=x)\s*\n)", # [ws...] =... ws... expecting headingend
paul@13	37	"listitem" : r"^((\s+)([*]\|\d+[.]))", # indent (list-item or number-item)
paul@13	38
paul@13	39	# Region contents:
paul@13	40	# Inline patterns:
paul@13	41	"rule" : r"(-----*)", # ----...
paul@13	42
paul@13	43	# Heading contents:
paul@13	44	"headingend" : r"(\s+)(=+)(\s*\n)", # ws... =... [ws...] nl
paul@9	45
paul@9	46	# List contents:
paul@13	47	"listitemend" : r"^", # next line
paul@0	48	}
paul@0	49
paul@0	50	# Define patterns for the regular expressions.
paul@0	51
paul@0	52	patterns = {}
paul@13	53	for name, value in syntax.items():
paul@13	54	patterns[name] = re.compile(value, re.UNICODE \| re.MULTILINE)
paul@0	55
paul@0	56
paul@0	57
paul@2	58	# Tokenising functions.
paul@2	59
paul@2	60	class TokenStream:
paul@2	61
paul@2	62	"A stream of tokens taken from a string."
paul@2	63
paul@2	64	def __init__(self, s):
paul@2	65	self.s = s
paul@2	66	self.pos = 0
paul@2	67	self.match = None
paul@2	68	self.matching = None
paul@2	69
paul@2	70	def read_until(self, pattern_names, remaining=True):
paul@2	71
paul@2	72	"""
paul@2	73	Find the first match for the given 'pattern_names'. Return the text
paul@2	74	preceding any match, the remaining text if no match was found, or None
paul@2	75	if no match was found and 'remaining' is given as a false value.
paul@2	76	"""
paul@2	77
paul@2	78	first = None
paul@2	79	self.matching = None
paul@2	80
paul@2	81	# Find the first matching pattern.
paul@2	82
paul@2	83	for pattern_name in pattern_names:
paul@2	84	match = patterns[pattern_name].search(self.s, self.pos)
paul@2	85	if match:
paul@2	86	start, end = match.span()
paul@2	87	if self.matching is None or start < first:
paul@2	88	first = start
paul@2	89	self.matching = pattern_name
paul@2	90	self.match = match
paul@2	91
paul@2	92	if self.matching is None:
paul@2	93	if remaining:
paul@2	94	return self.s[self.pos:]
paul@2	95	else:
paul@2	96	return None
paul@2	97	else:
paul@2	98	return self.s[self.pos:first]
paul@2	99
paul@10	100	def read_match(self, group=1):
paul@2	101
paul@10	102	"""
paul@10	103	Return the matched text, updating the position in the stream. If 'group'
paul@10	104	is specified, the indicated group in a match will be returned.
paul@10	105	Typically, group 1 should contain all pertinent data, but groups defined
paul@10	106	within group 1 can provide sections of the data.
paul@10	107	"""
paul@2	108
paul@2	109	if self.match:
paul@2	110	_start, self.pos = self.match.span()
paul@9	111	try:
paul@10	112	return self.match.group(group)
paul@9	113	except IndexError:
paul@9	114	return ""
paul@2	115	else:
paul@2	116	self.pos = len(self.s)
paul@2	117	return None
paul@2	118
paul@2	119
paul@2	120
paul@0	121	# Parser functions.
paul@0	122
paul@0	123	def parse_page(s):
paul@0	124
paul@0	125	"""
paul@0	126	Parse page text 's'. Pages consist of regions delimited by markers.
paul@0	127	"""
paul@0	128
paul@6	129	return parse_region(TokenStream(s))
paul@1	130
paul@10	131	def parse_region(items, level=0, indent=0):
paul@1	132
paul@6	133	"""
paul@10	134	Parse the data provided by 'items' to populate a region with the given
paul@10	135	'level' at the given 'indent'.
paul@6	136	"""
paul@0	137
paul@10	138	region = Region([], level, indent)
paul@0	139
paul@2	140	# Parse section headers.
paul@2	141
paul@2	142	parse_region_header(items, region)
paul@2	143
paul@8	144	# Parse section body.
paul@8	145
paul@2	146	if region.is_transparent():
paul@2	147	parse_region_wiki(items, region)
paul@2	148	else:
paul@2	149	parse_region_opaque(items, region)
paul@2	150
paul@6	151	return region
paul@6	152
paul@6	153	def parse_region_header(items, region):
paul@6	154
paul@6	155	"""
paul@6	156	Parse the region header from the 'items', setting it for the given 'region'.
paul@6	157	"""
paul@6	158
paul@6	159	if items.read_until(["header"], False) == "": # None means no header
paul@6	160	region.type = items.read_match()
paul@6	161
paul@2	162	def parse_region_wiki(items, region):
paul@2	163
paul@2	164	"Parse the data provided by 'items' to populate a wiki 'region'."
paul@0	165
paul@8	166	new_block(region)
paul@13	167	parse_region_details(items, region, ["break", "heading", "listitem", "regionstart", "regionend", "rule"])
paul@0	168
paul@8	169	def parse_region_opaque(items, region):
paul@1	170
paul@8	171	"Parse the data provided by 'items' to populate an opaque 'region'."
paul@8	172
paul@8	173	parse_region_details(items, region, ["regionend"])
paul@1	174
paul@8	175	def parse_region_details(items, region, pattern_names):
paul@0	176
paul@8	177	"Parse 'items' within 'region' searching using 'pattern_names'."
paul@0	178
paul@8	179	try:
paul@8	180	while True:
paul@0	181
paul@8	182	# Obtain text before any marker or the end of the input.
paul@2	183
paul@8	184	preceding = items.read_until(pattern_names)
paul@8	185	if preceding:
paul@8	186	region.append_text(Text(preceding))
paul@2	187
paul@8	188	# End of input.
paul@0	189
paul@8	190	if not items.matching:
paul@8	191	break
paul@8	192
paul@8	193	# Obtain any feature.
paul@2	194
paul@8	195	feature = items.read_match()
paul@8	196	handler = handlers.get(items.matching)
paul@2	197
paul@8	198	# Handle each feature or add text to the region.
paul@2	199
paul@8	200	if handler:
paul@8	201	handler(items, region)
paul@8	202	else:
paul@8	203	region.append_text(Text(feature))
paul@2	204
paul@8	205	except StopIteration:
paul@8	206	pass
paul@2	207
paul@2	208	region.normalise()
paul@0	209
paul@8	210	def end_region(items, region):
paul@7	211
paul@8	212	"End the parsing of 'region'."
paul@7	213
paul@8	214	raise StopIteration
paul@7	215
paul@8	216	def parse_break(items, region):
paul@8	217
paul@8	218	"Handle a paragraph break within 'region'."
paul@7	219
paul@7	220	# Mark any previous block as not being the final one in a sequence.
paul@7	221
paul@7	222	block = region.nodes[-1]
paul@7	223	block.final = False
paul@8	224	new_block(region)
paul@2	225
paul@13	226	def parse_heading(items, region):
paul@13	227
paul@13	228	"Handle a heading."
paul@9	229
paul@13	230	start_extra = items.read_match(1)
paul@13	231	level = len(items.read_match(2))
paul@13	232	start_pad = items.read_match(3)
paul@13	233	heading = Heading([], level, start_extra, start_pad)
paul@13	234	parse_region_details(items, heading, ["headingend"])
paul@13	235	region.append(heading)
paul@13	236	new_block(region)
paul@9	237
paul@13	238	def parse_heading_end(items, heading):
paul@13	239
paul@13	240	"Handle the end of a heading."
paul@13	241
paul@13	242	level = len(items.read_match(2))
paul@13	243	if heading.level == level:
paul@13	244	heading.end_pad = items.read_match(1)
paul@13	245	heading.end_extra = items.read_match(3)
paul@13	246	raise StopIteration
paul@9	247
paul@9	248	def parse_listitem(items, region):
paul@9	249
paul@9	250	"Handle a list item marker within 'region'."
paul@9	251
paul@9	252	item = ListItem([])
paul@9	253	parse_region_details(items, item, ["listitemend"])
paul@9	254	region.append(item)
paul@9	255	new_block(region)
paul@9	256
paul@13	257	def parse_listitem_end(items, item):
paul@13	258
paul@13	259	"Handle the end of a list."
paul@13	260
paul@13	261	raise StopIteration
paul@13	262
paul@12	263	def parse_rule(items, region):
paul@12	264
paul@12	265	"Handle a horizontal rule within 'region'."
paul@12	266
paul@12	267	length = len(items.read_match(1))
paul@12	268	rule = Rule(length)
paul@12	269	region.append(rule)
paul@12	270	new_block(region)
paul@12	271
paul@8	272	def parse_section(items, region):
paul@2	273
paul@8	274	"Handle the start of a new section within 'region'."
paul@2	275
paul@8	276	# Parse the section and start a new block after the section.
paul@2	277
paul@10	278	indent = len(items.read_match(2))
paul@10	279	level = len(items.read_match(3))
paul@10	280	region.append(parse_region(items, level, indent))
paul@8	281	new_block(region)
paul@2	282
paul@8	283	def parse_section_end(items, region):
paul@2	284
paul@8	285	"Handle the end of a new section within 'region'."
paul@1	286
paul@8	287	feature = items.read_match()
paul@8	288	if region.have_end(feature):
paul@8	289	raise StopIteration
paul@8	290	else:
paul@8	291	region.append_text(Text(feature))
paul@2	292
paul@8	293	# Pattern handlers.
paul@2	294
paul@8	295	handlers = {
paul@8	296	None : end_region,
paul@8	297	"break" : parse_break,
paul@13	298	"heading" : parse_heading,
paul@13	299	"headingend" : parse_heading_end,
paul@9	300	"listitemend" : parse_listitem_end,
paul@9	301	"listitem" : parse_listitem,
paul@8	302	"regionstart" : parse_section,
paul@8	303	"regionend" : parse_section_end,
paul@12	304	"rule" : parse_rule,
paul@8	305	}
paul@2	306
paul@6	307	def new_block(region):
paul@6	308
paul@6	309	"Start a new block in 'region'."
paul@0	310
paul@6	311	block = Block([])
paul@6	312	region.append(block)
paul@0	313
paul@1	314
paul@1	315
paul@1	316	# Top-level functions.
paul@0	317
paul@0	318	parse = parse_page
paul@0	319
paul@0	320	# vim: tabstop=4 expandtab shiftwidth=4

MoinLight

Annotated moinformat/__init__.py

Annotated moinformat/init.py