# HG changeset patch # User Paul Boddie # Date 1513099028 -3600 # Node ID 427c66773470d19622ffe643443d770417ea578e # Parent 743a15ed73aaca182c85bc971f87d841cf8083db Reorganised the parsers and serialisers, introducing the missing table parser for the test program. Added a quiet option in the test program. diff -r 743a15ed73aa -r 427c66773470 moinformat/parsers/__init__.py diff -r 743a15ed73aa -r 427c66773470 moinformat/parsers/table.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/parsers/table.py Tue Dec 12 18:17:08 2017 +0100 @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +""" +Moin wiki table parser. + +Copyright (C) 2017 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.parsing import get_patterns +from moinformat.tree import Table, TableAttrs, TableCell, TableRow, Text +from moinformat import Parser + + + +# Parser functionality. + +class TableParser(Parser): + + "A parser for improved table syntax." + + # Principal parser methods. + + def parse_region_content(self, items, region): + + "Parse the data provided by 'items' to populate the given 'region'." + + self.set_region(items, region) + self.parse_table_region() + + def parse_table_region(self): + + # Start to populate table rows. + + cell = TableCell([]) + row = TableRow([cell]) + table = Table([row]) + self.region.append(table) + + while True: + self.parse_region_details(cell, self.table_region_pattern_names) + + # Detect the end of the table. + + if self.read_matching() == "regionend": + break + + if self.read_matching() == "columnsep": + cell = TableCell([]) + row.append(cell) + + elif self.read_matching() == "rowsep": + row = TableRow([]) + table.append(row) + cell = TableCell([]) + row.append(cell) + + # Parser handler methods. + + def parse_continuation(self, cell): + pass + + def parse_table_end(self, cell): + + "Handle the end of a region within 'cell'." + + feature = self.read_match() + if self.region.have_end(feature): + raise StopIteration + else: + cell.append_inline(Text(feature)) + + # Regular expressions. + + syntax = {} + syntax.update(Parser.syntax) + syntax.update({ + # At start of line: + "rowsep" : r"^==(?!.*==\s*?$)(?=\N*?)", # == not-heading ws-excl-nl + "continuation" : r"^(\N*)\.\.(?!\.)(?=\N)", # .. ws-excl-nl or .. not-dot + + # Within text: + "columnsep" : r"\|\|(?!\|)(?=\N)", # || ws-excl-nl or || not-pipe + }) + + patterns = get_patterns(syntax) + + + + # Pattern details. + + table_region_pattern_names = Parser.region_pattern_names + [ + "columnsep", "continuation", "regionend", "rowsep", + ] + + + + # Pattern handlers. + + handlers = {} + handlers.update(Parser.handlers) + handlers.update({ + "columnsep" : Parser.end_region, + "continuation" : parse_continuation, + "rowsep" : Parser.end_region, + "regionend" : parse_table_end, + }) + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 743a15ed73aa -r 427c66773470 moinformat/serialisers.py --- a/moinformat/serialisers.py Fri May 12 00:51:20 2017 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,331 +0,0 @@ -#!/usr/bin/env python - -""" -Moin wiki serialisers. - -Copyright (C) 2017 Paul Boddie - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see . -""" - -def escape_text(s): - - "Escape XML document text." - - return s.replace("&", "&").replace("<", "<").replace(">", ">") - -def escape_attr(s): - - "Escape XML document attribute." - - return escape_text(s).replace("'", "'").replace('"', """) - -class Serialiser: - - "General serialisation support." - - def __init__(self, out): - self.out = out - -class MoinSerialiser(Serialiser): - - "Serialisation of the page." - - def start_region(self, level, indent, type): - out = self.out - if level: - out(" " * indent + "{" * level) - if type and level: - out("#!%s\n" % type) - - def end_region(self, level, indent, type): - out = self.out - if level: - out("}" * level) - - def start_block(self): - pass - - def end_block(self): - pass - - def start_defitem(self, pad, extra): - self.out((extra and "\n" + extra + "::" or "") + pad) - - def end_defitem(self, pad, extra): - pass - - def start_defterm(self, pad): - self.out(pad) - - def end_defterm(self, pad): - self.out("::") - - def start_emphasis(self): - self.out("''") - - def end_emphasis(self): - self.out("''") - - def start_heading(self, level, extra, pad): - self.out(extra + "=" * level + pad) - - def end_heading(self, level, pad, extra): - self.out(pad + "=" * level + extra) - - def start_larger(self): - self.out("~+") - - def end_larger(self): - self.out("+~") - - def start_listitem(self, indent, marker, space): - self.out("%s%s%s" % (indent * " ", marker, space)) - - def end_listitem(self, indent, marker): - pass - - def start_monospace(self): - self.out("`") - - def end_monospace(self): - self.out("`") - - def start_smaller(self): - self.out("~-") - - def end_smaller(self): - self.out("-~") - - def start_strong(self): - self.out("'''") - - def end_strong(self): - self.out("'''") - - def start_subscript(self): - self.out(",,") - - def end_subscript(self): - self.out(",,") - - def start_superscript(self): - self.out("^") - - def end_superscript(self): - self.out("^") - - def start_table(self): - pass - - def end_table(self): - pass - - def start_table_attrs(self): - self.out("<") - - def end_table_attrs(self): - self.out(">") - - def start_table_cell(self, attrs): - self.out("||") - if attrs and not attrs.empty(): - attrs.to_string(self) - - def end_table_cell(self): - pass - - def start_table_row(self): - pass - - def end_table_row(self, trailing): - self.out("||") - self.out(trailing) - - def start_underline(self): - self.out("__") - - def end_underline(self): - self.out("__") - - def break_(self): - self.out("\n") - - def rule(self, length): - self.out("-" * length) - - def table_attr(self, name, value, concise, quote): - if concise: - if name == "colour": self.out(value) - elif name == "colspan": self.out("-%s" % value) - elif name == "halign" : self.out(value == "left" and "(" or value == "right" and ")" or ":") - elif name == "rowspan": self.out("|%s" % value) - elif name == "valign" : self.out(value == "top" and "^" or "v") - elif name == "width" : self.out(value) - else: - self.out("%s%s" % (escape_text(name), value is not None and - "=%s%s%s" % (quote or '"', escape_attr(value), quote or '"') or "")) - - def text(self, s): - self.out(s) - -class HTMLSerialiser(Serialiser): - - "Serialisation of the page." - - def start_region(self, level, indent, type): - l = [] - out = l.append - if level: - out("level-%d" % level) - - if indent: - out("indent-%d" % indent) - - # NOTE: Encode type details for CSS. - - if type: - out("type-%s" % escape_attr(type)) - - self.out("" % " ".join(l)) - - def end_region(self, level, indent, type): - self.out("") - - def start_block(self): - self.out("

") - - def end_block(self): - self.out("

") - - def start_defitem(self, pad, extra): - self.out("
") - - def end_defitem(self, pad, extra): - self.out("
") - - def start_defterm(self, pad): - self.out("
") - - def end_defterm(self, pad): - self.out("
") - - def start_emphasis(self): - self.out("") - - def end_emphasis(self): - self.out("") - - def start_heading(self, level, extra, pad): - self.out("" % level) - - def end_heading(self, level, pad, extra): - self.out("" % level) - - def start_larger(self): - self.out("") - - def end_larger(self): - self.out("") - - def start_listitem(self, indent, marker, space): - self.out("
  • ") - - def end_listitem(self, indent, marker): - self.out("
  • ") - - def start_monospace(self): - self.out("") - - def end_monospace(self): - self.out("") - - def start_smaller(self): - self.out("") - - def end_smaller(self): - self.out("") - - def start_strong(self): - self.out("") - - def end_strong(self): - self.out("") - - def start_subscript(self): - self.out("") - - def end_subscript(self): - self.out("") - - def start_superscript(self): - self.out("") - - def end_superscript(self): - self.out("") - - def start_table(self): - self.out("") - - def end_table(self): - self.out("
    ") - - def start_table_attrs(self): - pass - - def end_table_attrs(self): - pass - - def start_table_cell(self, attrs): - self.out("") - - def end_table_cell(self): - self.out("") - - def start_table_row(self): - self.out("") - - def end_table_row(self, trailing): - self.out("") - - def start_underline(self): - self.out("") - - def end_underline(self): - self.out("") - - def break_(self): - pass - - def rule(self, length): - self.out("
    " % min(length, 10)) - - def table_attr(self, name, value, concise, quote): - self.out(" %s%s" % (escape_text(name), value is not None and - "='%s'" % escape_attr(value) or "")) - - def text(self, s): - self.out(escape_text(s)) - -# Top-level functions. - -def serialise(doc, serialiser=MoinSerialiser): - l = [] - doc.to_string(serialiser(l.append)) - return "".join(l) - -# vim: tabstop=4 expandtab shiftwidth=4 diff -r 743a15ed73aa -r 427c66773470 moinformat/serialisers/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/serialisers/__init__.py Tue Dec 12 18:17:08 2017 +0100 @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +""" +Moin wiki serialisers. + +Copyright (C) 2017 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.serialisers.moin import MoinSerialiser + +# Top-level functions. + +def serialise(doc, serialiser=MoinSerialiser): + l = [] + doc.to_string(serialiser(l.append)) + return "".join(l) + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 743a15ed73aa -r 427c66773470 moinformat/serialisers/common.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/serialisers/common.py Tue Dec 12 18:17:08 2017 +0100 @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +""" +Moin serialiser support. + +Copyright (C) 2017 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +class Serialiser: + + "General serialisation support." + + def __init__(self, out): + self.out = out + +def escape_attr(s): + + "Escape XML document attribute." + + return escape_text(s).replace("'", "'").replace('"', """) + +def escape_text(s): + + "Escape XML document text." + + return s.replace("&", "&").replace("<", "<").replace(">", ">") + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 743a15ed73aa -r 427c66773470 moinformat/serialisers/html.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/serialisers/html.py Tue Dec 12 18:17:08 2017 +0100 @@ -0,0 +1,165 @@ +#!/usr/bin/env python + +""" +HTML serialiser. + +Copyright (C) 2017 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.serialisers.common import escape_attr, escape_text, Serialiser + +class HTMLSerialiser(Serialiser): + + "Serialisation of the page." + + def start_region(self, level, indent, type): + l = [] + out = l.append + if level: + out("level-%d" % level) + + if indent: + out("indent-%d" % indent) + + # NOTE: Encode type details for CSS. + + if type: + out("type-%s" % escape_attr(type)) + + self.out("" % " ".join(l)) + + def end_region(self, level, indent, type): + self.out("") + + def start_block(self): + self.out("

    ") + + def end_block(self): + self.out("

    ") + + def start_defitem(self, pad, extra): + self.out("
    ") + + def end_defitem(self, pad, extra): + self.out("
    ") + + def start_defterm(self, pad): + self.out("
    ") + + def end_defterm(self, pad): + self.out("
    ") + + def start_emphasis(self): + self.out("") + + def end_emphasis(self): + self.out("") + + def start_heading(self, level, extra, pad): + self.out("" % level) + + def end_heading(self, level, pad, extra): + self.out("" % level) + + def start_larger(self): + self.out("") + + def end_larger(self): + self.out("") + + def start_listitem(self, indent, marker, space): + self.out("
  • ") + + def end_listitem(self, indent, marker): + self.out("
  • ") + + def start_monospace(self): + self.out("") + + def end_monospace(self): + self.out("") + + def start_smaller(self): + self.out("") + + def end_smaller(self): + self.out("") + + def start_strong(self): + self.out("") + + def end_strong(self): + self.out("") + + def start_subscript(self): + self.out("") + + def end_subscript(self): + self.out("") + + def start_superscript(self): + self.out("") + + def end_superscript(self): + self.out("") + + def start_table(self): + self.out("") + + def end_table(self): + self.out("
    ") + + def start_table_attrs(self): + pass + + def end_table_attrs(self): + pass + + def start_table_cell(self, attrs): + self.out("") + + def end_table_cell(self): + self.out("") + + def start_table_row(self): + self.out("") + + def end_table_row(self, trailing): + self.out("") + + def start_underline(self): + self.out("") + + def end_underline(self): + self.out("") + + def break_(self): + pass + + def rule(self, length): + self.out("
    " % min(length, 10)) + + def table_attr(self, name, value, concise, quote): + self.out(" %s%s" % (escape_text(name), value is not None and + "='%s'" % escape_attr(value) or "")) + + def text(self, s): + self.out(escape_text(s)) + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 743a15ed73aa -r 427c66773470 moinformat/serialisers/moin.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/serialisers/moin.py Tue Dec 12 18:17:08 2017 +0100 @@ -0,0 +1,166 @@ +#!/usr/bin/env python + +""" +Moin wiki text serialiser. + +Copyright (C) 2017 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.serialisers.common import escape_attr, escape_text, Serialiser + +class MoinSerialiser(Serialiser): + + "Serialisation of the page." + + def start_region(self, level, indent, type): + out = self.out + if level: + out(" " * indent + "{" * level) + if type and level: + out("#!%s\n" % type) + + def end_region(self, level, indent, type): + out = self.out + if level: + out("}" * level) + + def start_block(self): + pass + + def end_block(self): + pass + + def start_defitem(self, pad, extra): + self.out((extra and "\n" + extra + "::" or "") + pad) + + def end_defitem(self, pad, extra): + pass + + def start_defterm(self, pad): + self.out(pad) + + def end_defterm(self, pad): + self.out("::") + + def start_emphasis(self): + self.out("''") + + def end_emphasis(self): + self.out("''") + + def start_heading(self, level, extra, pad): + self.out(extra + "=" * level + pad) + + def end_heading(self, level, pad, extra): + self.out(pad + "=" * level + extra) + + def start_larger(self): + self.out("~+") + + def end_larger(self): + self.out("+~") + + def start_listitem(self, indent, marker, space): + self.out("%s%s%s" % (indent * " ", marker, space)) + + def end_listitem(self, indent, marker): + pass + + def start_monospace(self): + self.out("`") + + def end_monospace(self): + self.out("`") + + def start_smaller(self): + self.out("~-") + + def end_smaller(self): + self.out("-~") + + def start_strong(self): + self.out("'''") + + def end_strong(self): + self.out("'''") + + def start_subscript(self): + self.out(",,") + + def end_subscript(self): + self.out(",,") + + def start_superscript(self): + self.out("^") + + def end_superscript(self): + self.out("^") + + def start_table(self): + pass + + def end_table(self): + pass + + def start_table_attrs(self): + self.out("<") + + def end_table_attrs(self): + self.out(">") + + def start_table_cell(self, attrs): + self.out("||") + if attrs and not attrs.empty(): + attrs.to_string(self) + + def end_table_cell(self): + pass + + def start_table_row(self): + pass + + def end_table_row(self, trailing): + self.out("||") + self.out(trailing) + + def start_underline(self): + self.out("__") + + def end_underline(self): + self.out("__") + + def break_(self): + self.out("\n") + + def rule(self, length): + self.out("-" * length) + + def table_attr(self, name, value, concise, quote): + if concise: + if name == "colour": self.out(value) + elif name == "colspan": self.out("-%s" % value) + elif name == "halign" : self.out(value == "left" and "(" or value == "right" and ")" or ":") + elif name == "rowspan": self.out("|%s" % value) + elif name == "valign" : self.out(value == "top" and "^" or "v") + elif name == "width" : self.out(value) + else: + self.out("%s%s" % (escape_text(name), value is not None and + "=%s%s%s" % (quote or '"', escape_attr(value), quote or '"') or "")) + + def text(self, s): + self.out(s) + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 743a15ed73aa -r 427c66773470 tests/test_parser.py --- a/tests/test_parser.py Fri May 12 00:51:20 2017 +0200 +++ b/tests/test_parser.py Tue Dec 12 18:17:08 2017 +0100 @@ -2,7 +2,8 @@ from moinformat import parse from moinformat.parsers import table -from moinformat.serialisers import serialise, HTMLSerialiser +from moinformat.serialisers import serialise +from moinformat.serialisers.html import HTMLSerialiser from glob import glob from os.path import join, split import sys @@ -18,6 +19,10 @@ o = serialise(d) print o == s + + if quiet: + return + print "-" * 60 print o if o != s: @@ -30,7 +35,11 @@ print if __name__ == "__main__": - filenames = sys.argv[1:] or glob(join(dirname, "test*.txt")) + args = sys.argv[1:] + quiet = "-q" in args + if quiet: + del args[args.index("-q")] + filenames = args or glob(join(dirname, "test*.txt")) filenames.sort() for filename in filenames: