# HG changeset patch # User Paul Boddie # Date 1494016711 -7200 # Node ID 7b9f5d3e242dd07f51c4f15058b8eb6209771bec # Parent 29f986a016f0f158141cf66f128ddff71dd6b7bc Introduced a necessary mechanism for transferring state between tokenisers. Added table node insertion around collections of table rows. diff -r 29f986a016f0 -r 7b9f5d3e242d moinformat/__init__.py --- a/moinformat/__init__.py Fri May 05 17:39:31 2017 +0200 +++ b/moinformat/__init__.py Fri May 05 22:38:31 2017 +0200 @@ -19,12 +19,13 @@ this program. If not, see . """ -from moinformat.parsing import ParserBase, TokenStream, get_patterns, new_block +from moinformat.parsing import ParserBase, TokenStream, get_patterns, \ + init_formats, new_block from moinformat.serialisers import serialise from moinformat.tree import Break, DefItem, DefTerm, FontStyle, Heading, \ Larger, ListItem, Monospace, Region, Rule, Smaller, \ - Subscript, Superscript, TableAttr, TableAttrs, \ - TableCell, TableRow, Text, Underline + Subscript, Superscript, Table, TableAttr, \ + TableAttrs, TableCell, TableRow, Text, Underline # Regular expressions. @@ -150,11 +151,11 @@ # Principal parser methods. - def get_items(self, s): + def get_items(self, s, pos=0): - "Return a sequence of token items for 's'." + "Return a sequence of token items for 's' and 'pos'." - return TokenStream(s, self.patterns) + return TokenStream(s, self.patterns, pos) def parse(self, s): @@ -183,9 +184,19 @@ "Parse the data provided by 'items' to populate a wiki 'region'." + # Obtain a suitable token stream. + + items = self.replace_items(items) + + # Define a block to hold text and start parsing. + new_block(region) self.parse_region_details(items, region, self.region_pattern_names) + # Update the previous token stream. + + self.update_items(items) + # Parser methods supporting different page features. def parse_attrname(self, items, attrs): @@ -389,6 +400,16 @@ "Handle the start of a table row within 'region'." + # Identify any active table. + + table = region.node(-2) + block = region.node(-1) + + if not (isinstance(table, Table) and block.empty()): + new_table = table = Table([]) + else: + new_table = None + row = TableRow([]) while True: @@ -424,7 +445,12 @@ row.append(cell) - region.add(row) + # Add the row to the table and any new table to the region. + + table.add(row) + if new_table: + region.add(new_table) + new_block(region) def parse_valign(self, items, attrs): @@ -544,6 +570,6 @@ # Top-level functions. def parse(s, formats=None): - return Parser(formats).parse(s) + return Parser(init_formats(formats)).parse(s) # vim: tabstop=4 expandtab shiftwidth=4 diff -r 29f986a016f0 -r 7b9f5d3e242d moinformat/parsing.py --- a/moinformat/parsing.py Fri May 05 17:39:31 2017 +0200 +++ b/moinformat/parsing.py Fri May 05 22:38:31 2017 +0200 @@ -24,12 +24,19 @@ # Pattern management. +ws_excl_nl = r"[ \f\r\t\v]" + def get_patterns(syntax): - "Define patterns for the regular expressions in the 'syntax' mapping." + """ + Define patterns for the regular expressions in the 'syntax' mapping. In each + pattern, replace \N with a pattern for matching whitespace excluding + newlines. + """ patterns = {} for name, value in syntax.items(): + value = value.replace(r"\N", ws_excl_nl) patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE) return patterns @@ -37,10 +44,18 @@ "Combine 'patterns' with those defined by the given 'syntax' mapping." - p = {} - p.update(patterns) - p.update(get_patterns(syntax)) - return p + return combine_dicts([patterns, get_patterns(syntax)]) + +def combine_dicts(dicts): + + "Combine the given 'dicts'." + + combined = {} + for d in dicts: + combined.update(d) + return combined + + # Tokenising functions. @@ -48,10 +63,10 @@ "A stream of tokens taken from a string." - def __init__(self, s, patterns): + def __init__(self, s, patterns, pos=0): self.s = s self.patterns = patterns - self.pos = 0 + self.pos = pos self.match = None self.matching = None @@ -136,13 +151,27 @@ """ self.formats = formats + self.replaced_items = None - def get_items(self, s): + def get_items(self, s, pos=0): - "Return a sequence of token items for 's'." + "Return a sequence of token items for 's' and 'pos'." raise NotImplementedError + def replace_items(self, items): + + "Replace the given 'items' with a sequence employing the same state." + + self.replaced_items = items + return self.get_items(items.s, items.pos) + + def update_items(self, items): + + "Update the state of the replaced items with that of 'items'." + + self.replaced_items.pos = items.pos + def parse(self, s): """ @@ -249,4 +278,20 @@ raise StopIteration + +# Format mapping initialisation. + +def init_formats(formats): + + """ + Convert the given 'formats' mapping from a name-to-class mapping to a + name-to-instance mapping with each parser instance employing the format + mapping itself. Return the converted mapping. + """ + + d = {} + for name, cls in formats.items(): + d[name] = cls(d) + return d + # vim: tabstop=4 expandtab shiftwidth=4 diff -r 29f986a016f0 -r 7b9f5d3e242d moinformat/serialisers.py --- a/moinformat/serialisers.py Fri May 05 17:39:31 2017 +0200 +++ b/moinformat/serialisers.py Fri May 05 22:38:31 2017 +0200 @@ -126,6 +126,12 @@ def end_superscript(self): self.out("^") + def start_table(self): + pass + + def end_table(self): + pass + def start_table_attrs(self): self.out("<") @@ -269,6 +275,12 @@ def end_superscript(self): self.out("") + def start_table(self): + self.out("") + + def end_table(self): + self.out("
") + def start_table_attrs(self): pass diff -r 29f986a016f0 -r 7b9f5d3e242d moinformat/tree.py --- a/moinformat/tree.py Fri May 05 17:39:31 2017 +0200 +++ b/moinformat/tree.py Fri May 05 22:38:31 2017 +0200 @@ -296,6 +296,22 @@ self._to_string(out) out.end_table_attrs() +class Table(Container): + + "A table." + + def __repr__(self): + return "Table(%r)" % self.nodes + + def prettyprint(self, indent=""): + l = ["%sTable:" % indent] + return self._prettyprint(l, indent) + + def to_string(self, out): + out.start_table() + self._to_string(out) + out.end_table() + class TableCell(Container): "A table cell." diff -r 29f986a016f0 -r 7b9f5d3e242d tests/test_parser.py --- a/tests/test_parser.py Fri May 05 17:39:31 2017 +0200 +++ b/tests/test_parser.py Fri May 05 22:38:31 2017 +0200 @@ -1,6 +1,7 @@ #!/usr/bin/env python from moinformat import parse +from moinformat.parsers import table from moinformat.serialisers import serialise, HTMLSerialiser from glob import glob from os.path import join, split @@ -8,8 +9,12 @@ dirname = split(sys.argv[0])[0] +formats = { + "table" : table.TableParser, + } + def test_input(s): - d = parse(s) + d = parse(s, formats) o = serialise(d) print o == s