# HG changeset patch # User Paul Boddie # Date 1688078439 -7200 # Node ID 9736f88a89849a5c1012f56d2d5e052c9711d6a4 # Parent 98caaf6eb2bd6db26fc3bb6ae64f72a93f03f0be Introduced a prettyprinting serialiser and document tree summary parsing. Fixed encoding issues with the serialisation of HTML in the test program. diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/metadata.py --- a/moinformat/metadata.py Fri Jun 30 00:37:36 2023 +0200 +++ b/moinformat/metadata.py Fri Jun 30 00:40:39 2023 +0200 @@ -3,7 +3,7 @@ """ Metadata for document conversion. -Copyright (C) 2018, 2019, 2021 Paul Boddie +Copyright (C) 2018, 2019, 2021, 2023 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -43,13 +43,13 @@ } effects = { - "input_context" : "input", - "input_format" : "parser", - "input_separator" : "input", - "link_format" : "linker", - "output_context" : "output", - "output_format" : "serialiser", - "theme_name" : "theme", + "input_context" : ["input"], + "input_format" : ["parser", "serialiser"], + "input_separator" : ["input"], + "link_format" : ["linker"], + "output_context" : ["output"], + "output_format" : ["serialiser"], + "theme_name" : ["theme"], } def __init__(self, parameters=None): @@ -92,12 +92,14 @@ self.parameters[name] = value - # Invalidate any affected setting. + # Invalidate any affected settings. affected = self.effects.get(name) - if affected and self.has_key(affected): - del self.parameters[affected] + if affected: + for affected_name in affected: + if self.has_key(affected_name): + del self.parameters[affected_name] # Set any default values. @@ -106,36 +108,43 @@ if affected and not self.get(affected): self.set(affected, value) - def make_object(self, name, fn, typename, typevalue=None): + def get_update(self, name, value=None): """ - Make an object to be stored in the setting 'name', using 'fn' to - acquire the object class, with the object type being retrieved from the - 'typename' setting, this being overwritten by 'typevalue' if specified. - Return None if no class is obtained. + Obtain the 'name' setting, this being overwritten by 'value' if + specified. Return the updated setting. """ - # Return any existing object if not reset. + # Overwrite any existing setting. - if not typevalue: - obj = self.get(name) - if obj: - return obj + if value: + self.set(name, value) + return value + else: + return self.get(name) + + def make_object(self, name, cls): - # Overwrite any existing typename setting. + """ + Make an object to be stored in the setting 'name', using 'cls' as the + object class. + """ - else: - self.set(typename, typevalue) + # Return any existing, preserved object. Since updates to various + # properties will discard objects, any preserved object should still be + # applicable. - # Obtain the class. + obj = self.get(name) + if obj: + return obj - cls = fn(self.get(typename)) + # Without any object class, return None. if not cls: self.set(name, None) return None - # Instantiate the class. + # Instantiate the class and record the object. obj = cls(self) self.set(name, obj) @@ -148,7 +157,9 @@ "input_context" setting which will be replaced by any given 'name'. """ - return self.make_object("input", get_input, "input_context", name) + cls = get_input(self.get_update("input_context", name)) + + return self.make_object("input", cls) def get_linker(self, name=None): @@ -157,7 +168,9 @@ "link_format" setting which will be replaced by any given 'name'. """ - return self.make_object("linker", get_linker, "link_format", name) + cls = get_linker(self.get_update("link_format", name)) + + return self.make_object("linker", cls) def get_output(self, name=None): @@ -166,7 +179,9 @@ "output_context" setting which will be replaced by any given 'name'. """ - return self.make_object("output", get_output, "output_context", name) + cls = get_output(self.get_update("output_context", name)) + + return self.make_object("output", cls) def get_parser(self, name=None): @@ -175,7 +190,9 @@ "input_format" setting which will be replaced by any given 'name'. """ - parser = self.make_object("parser", get_parser, "input_format", name) + cls = get_parser(self.get_update("input_format", name)) + + parser = self.make_object("parser", cls) parser.parsers = parsers return parser @@ -186,8 +203,10 @@ "output_format" setting which will be replaced by any given 'name'. """ - serialiser = self.make_object("serialiser", get_serialiser, - "output_format", name) + cls = get_serialiser(self.get_update("output_format", name), + self.get("input_format")) + + serialiser = self.make_object("serialiser", cls) serialiser.serialisers = serialisers return serialiser @@ -198,6 +217,8 @@ setting which will be replaced by any given 'name'. """ - return self.make_object("theme", get_theme, "theme_name", name) + cls = get_theme(self.get_update("theme_name", name)) + + return self.make_object("theme", cls) # vim: tabstop=4 expandtab shiftwidth=4 diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/output/common.py --- a/moinformat/output/common.py Fri Jun 30 00:37:36 2023 +0200 +++ b/moinformat/output/common.py Fri Jun 30 00:40:39 2023 +0200 @@ -3,7 +3,7 @@ """ Output context common functionality. -Copyright (C) 2018 Paul Boddie +Copyright (C) 2018, 2023 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -40,9 +40,10 @@ def reset(self): - "Set up an output collector." + "Set up an output collector and output state." self.output = [] + self.indent = "" def encode(self, text): diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/parsers/pretty.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/parsers/pretty.py Fri Jun 30 00:40:39 2023 +0200 @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +""" +Prettyprinted document tree parser. + +Copyright (C) 2017, 2018, 2019, 2023 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.tree.pretty import Node + +class PrettyParser: + + "A prettyprinted document tree parser." + + formats = ["pretty"] + + def __init__(self, metadata): + self.metadata = metadata + + def parse(self, s): + + "Parse the tree structure representation in 's'." + + indent = 0 + branches = [] + + for line in s.split("\n"): + line = line.rstrip() + if not line: + continue + + new_indent = line.rfind(" ") + 1 + node = Node(line[new_indent:]) + + # Establish a branch to add nodes to. + + if not branches: + branches.append(node) + else: + # Note the current node as outermost branch. + + if new_indent > indent: + branches.append(node) + else: + # Reduced indent involves obtaining an inner branch again. + + while indent > new_indent: + del branches[-1] + indent -= 2 + + # Note the current node as outermost branch. + + branches[-1] = node + + # Append the current node to the parent branch. + + branches[-2].append(node) + + indent = new_indent + + return branches[0] + +parser = PrettyParser + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/serialisers/__init__.py --- a/moinformat/serialisers/__init__.py Fri Jun 30 00:37:36 2023 +0200 +++ b/moinformat/serialisers/__init__.py Fri Jun 30 00:40:39 2023 +0200 @@ -23,11 +23,15 @@ # Top-level functions. -def get_serialiser(name): +def get_serialiser(name, doctype=None): - "Return the main serialiser class for the format having the given 'name'." + """ + Return the main serialiser class for the format having the given 'name'. + If 'doctype' is indicated, obtain a serialiser class specific to that + document type. Otherwise, a general Moin serialiser class is obtained. + """ - return serialisers["%s.moin" % name] + return serialisers["%s.%s" % (name, doctype or "moin")] def make_serialiser(metadata, format=None): diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/serialisers/pretty/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/serialisers/pretty/__init__.py Fri Jun 30 00:40:39 2023 +0200 @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +""" +A package of modules containing prettyprinting serialisers. + +Copyright (C) 2023 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/serialisers/pretty/common.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/serialisers/pretty/common.py Fri Jun 30 00:40:39 2023 +0200 @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +""" +Generic prettyprinted text serialiser. + +Copyright (C) 2023 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.serialisers.common import Serialiser as CommonSerialiser + +class Serialiser(CommonSerialiser): + + "Serialisation of nodes for inspection." + + def container(self, container): + + "Visit all nodes in 'container'." + + if container.nodes: + self.output.indent += " " + for node in container.nodes: + self.visit(node) + self.output.indent = self.output.indent[:-2] + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/serialisers/pretty/graphviz.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/serialisers/pretty/graphviz.py Fri Jun 30 00:40:39 2023 +0200 @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +""" +Prettyprinted text serialiser for Graphviz nodes. + +Copyright (C) 2023 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.serialisers.pretty.common import Serialiser + +class GraphvizSerialiser(Serialiser): + + "Serialisation of Graphviz nodes for inspection." + + input_formats = ["dot", "graphviz"] + formats = ["pretty"] + + # Node handler methods. + + def directive(self, directive): + self.out("%sDirective: key=%r value=%r directive=%r\n" % ( + self.output.indent, directive.key, directive.value, + directive.directive)) + +serialiser = GraphvizSerialiser + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/serialisers/pretty/moin.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/serialisers/pretty/moin.py Fri Jun 30 00:40:39 2023 +0200 @@ -0,0 +1,168 @@ +#!/usr/bin/env python + +""" +Prettyprinted text serialiser for Moin nodes. + +Copyright (C) 2023 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.serialisers.pretty.common import Serialiser + +class MoinSerialiser(Serialiser): + + "Serialisation of Moin nodes for inspection." + + input_formats = ["moin", "wiki"] + formats = ["pretty"] + + # Node handler methods. + + def region(self, region): + self.out("%sRegion: level=%d indent=%d type=%s args=%r extra=%r\n" % ( + self.output.indent, region.level, region.indent, region.type, region.args, + region.extra)) + self.visit_region(region) + + # Block node methods. + + def block(self, block): + self.out("%sBlock\n" % self.output.indent) + self.container(block) + + def defitem(self, defitem): + self.out("%sDefItem: pad=%r extra=%r\n" % (self.output.indent, + defitem.pad, defitem.extra)) + self.container(defitem) + + def defterm(self, defterm): + self.out("%sDefTerm: pad=%r extra=%r\n" % (self.output.indent, + defterm.pad, defterm.extra)) + self.container(defterm) + + def fontstyle(self, fontstyle): + self.out("%sFontStyle: emphasis=%r strong=%r\n" % (self.output.indent, + fontstyle.emphasis, fontstyle.strong)) + self.container(fontstyle) + + def heading(self, heading): + self.out("%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" + " end_extra=%r identifier=%r\n" % ( + self.output.indent, heading.level, heading.start_extra, + heading.start_pad, heading.end_pad, heading.end_extra, + heading.identifier)) + self.container(heading) + + def link_label(self, link_label): + self.out("%sLinkLabel\n" % self.output.indent) + self.container(link_label) + + def link_parameter(self, link_parameter): + self.out("%sLinkParameter\n" % self.output.indent) + self.container(link_parameter) + + def list(self, list): + self.out("%sList: indent=%r marker=%r num=%r\n" % ( + self.output.indent, list.indent, list.marker, list.num)) + self.container(list) + + def listitem(self, listitem): + self.out("%sListItem: indent=%d marker=%r space=%r num=%r\n" % ( + self.output.indent, listitem.indent, listitem.marker, listitem.space, listitem.num)) + self.container(listitem) + + def table(self, table): + self.out("%sTable:\n" % self.output.indent) + self.container(table) + + def table_attrs(self, table_attrs): + self.out("%sTableAttrs:\n" % self.output.indent) + self.container(table_attrs) + + def table_cell(self, table_cell): + self.out("%sTableCell: leading=%r padding=%r\n" % ( + self.output.indent, table_cell.leading, table_cell.padding)) + self.container(table_cell) + + def table_row(self, table_row): + self.out("%sTableRow: trailing=%r leading=%r padding=%r\n" % ( + self.output.indent, table_row.trailing, table_row.leading, + table_row.padding)) + self.container(table_row) + + def inline(self, inline): + self.out("%s%s\n" % (self.output.indent, inline.__class__.__name__)) + + # Inline nodes with children. + + def inline_container(self, inline): + self.inline(inline) + self.container(inline) + + larger = inline_container + + def link(self, link): + self.out("%sLink: target=%r\n" % (self.output.indent, link.target)) + self.container(link) + + def macro(self, macro): + self.out("%sMacro: name=%r args=%r\n" % (self.output.indent, macro.name, macro.args)) + self.container(macro) + + monospace = inline_container + smaller = inline_container + strikethrough = inline_container + subscript = inline_container + superscript = inline_container + + def transclusion(self, transclusion): + self.out("%sTransclusion: target=%r\n" % (self.output.indent, transclusion.target)) + self.container(transclusion) + + underline = inline_container + + # Inline nodes without children. + + def anchor(self, anchor): + self.out("%sAnchor: target=%r\n" % (self.output.indent, anchor.target)) + + break_ = inline + + def comment(self, comment): + self.out("%sComment: comment=%r extra=%r\n" % (self.output.indent, comment.comment, comment.extra)) + + def directive(self, directive): + self.out("%sDirective: directive=%r extra=%r\n" % (self.output.indent, directive.directive, directive.extra)) + + linebreak = inline + nbsp = inline + + def rule(self, rule): + self.out("%sRule: height=%d\n" % (self.output.indent, rule.height)) + + def table_attr(self, table_attr): + self.out("%sTableAttr: name=%r value=%r concise=%r quote=%r\n" % ( + self.output.indent, table_attr.name, table_attr.value, + table_attr.concise, table_attr.quote)) + + def text(self, text): + self.out("%sText: %r\n" % (self.output.indent, text.s)) + + def verbatim(self, verbatim): + self.out("%sVerbatim: text=%r\n" % (self.output.indent, verbatim.text)) + +serialiser = MoinSerialiser + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/serialisers/pretty/pretty.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/serialisers/pretty/pretty.py Fri Jun 30 00:40:39 2023 +0200 @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +""" +Prettyprinted document node prettyprinter. + +Copyright (C) 2023 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.serialisers.pretty.common import Serialiser + +class PrettySerialiser(Serialiser): + + "Serialisation of prettyprinted document nodes for inspection." + + input_formats = ["pretty"] + formats = ["pretty"] + + def node(self, node): + self.out("%s%s%s\n" % (self.output.indent, node.name, + len(node.nodes) and " nodes=%d" % len(node.nodes) or "")) + self.container(node) + +serialiser = PrettySerialiser + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/serialisers/pretty/table.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/serialisers/pretty/table.py Fri Jun 30 00:40:39 2023 +0200 @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +""" +Moin wiki table prettyprinter. + +Copyright (C) 2023 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.serialisers.pretty.moin import MoinSerialiser + +class MoinTableSerialiser(MoinSerialiser): + + "Serialisation of table nodes for inspection." + + input_formats = ["table"] + + def continuation(self, continuation): + self.out("%sContinuation: %r\n" % (self.output.indent, continuation.text)) + +serialiser = MoinTableSerialiser + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/tree/graphviz.py --- a/moinformat/tree/graphviz.py Fri Jun 30 00:37:36 2023 +0200 +++ b/moinformat/tree/graphviz.py Fri Jun 30 00:40:39 2023 +0200 @@ -39,9 +39,6 @@ def __repr__(self): return "Directive(%r, %r, %r)" % (self.key, self.value, self.directive) - def prettyprint(self, indent=""): - return "%sDirective: key=%r value=%r directive=%r" % (indent, self.key, self.value, self.directive) - def visit(self, visitor): visitor.directive(self) diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/tree/moin.py --- a/moinformat/tree/moin.py Fri Jun 30 00:37:36 2023 +0200 +++ b/moinformat/tree/moin.py Fri Jun 30 00:40:39 2023 +0200 @@ -175,14 +175,6 @@ return not self.text_content().strip() - def __str__(self): - return self.prettyprint() - - def _prettyprint(self, l, indent=""): - for node in self.nodes: - l.append(node.prettyprint(indent + " ")) - return "\n".join(l) - class Region(Container): "A region of the page." @@ -213,11 +205,6 @@ return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, self.indent, self.type, self.args, self.transparent, self.extra) - def prettyprint(self, indent=""): - l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, - self.level, self.indent, self.type, self.args, self.extra)] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.region(self) @@ -232,10 +219,6 @@ def __repr__(self): return "Block(%r)" % self.nodes - def prettyprint(self, indent=""): - l = ["%sBlock" % indent] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.block(self) @@ -251,10 +234,6 @@ def __repr__(self): return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) - def prettyprint(self, indent=""): - l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.defitem(self) @@ -270,10 +249,6 @@ def __repr__(self): return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) - def prettyprint(self, indent=""): - l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.defterm(self) @@ -303,10 +278,6 @@ def __repr__(self): return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) - def prettyprint(self, indent=""): - l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.fontstyle(self) @@ -329,13 +300,6 @@ self.nodes, self.level, self.start_extra, self.start_pad, self.end_pad, self.end_extra, self.identifier) - def prettyprint(self, indent=""): - l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" - " end_extra=%r identifier=%r" % ( - indent, self.level, self.start_extra, self.start_pad, self.end_pad, - self.end_extra, self.identifier)] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.heading(self) @@ -346,10 +310,6 @@ def __repr__(self): return "LinkLabel(%r)" % self.nodes - def prettyprint(self, indent=""): - l = ["%sLinkLabel" % indent] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.link_label(self) @@ -360,10 +320,6 @@ def __repr__(self): return "LinkParameter(%r)" % self.nodes - def prettyprint(self, indent=""): - l = ["%sLinkParameter" % indent] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.link_parameter(self) @@ -384,12 +340,6 @@ def __repr__(self): return "List(%r)" % self.nodes - def prettyprint(self, indent=""): - if not self.first: - self.init() - l = ["%sList: indent=%r marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] - return self._prettyprint(l, indent) - def visit(self, visitor): if not self.first: self.init() @@ -413,12 +363,18 @@ def __repr__(self): return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) - def prettyprint(self, indent=""): - l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] - return self._prettyprint(l, indent) + def visit(self, visitor): + visitor.listitem(self) + +class Table(Container): + + "A table." + + def __repr__(self): + return "Table(%r)" % self.nodes def visit(self, visitor): - visitor.listitem(self) + visitor.table(self) class TableAttrs(Container): @@ -436,27 +392,9 @@ def __repr__(self): return "TableAttrs(%r)" % self.nodes - def prettyprint(self, indent=""): - l = ["%sTableAttrs:" % indent] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.table_attrs(self) -class Table(Container): - - "A table." - - def __repr__(self): - return "Table(%r)" % self.nodes - - def prettyprint(self, indent=""): - l = ["%sTable:" % indent] - return self._prettyprint(l, indent) - - def visit(self, visitor): - visitor.table(self) - class TableCell(Container): "A table cell." @@ -471,11 +409,6 @@ return "TableCell(%r, %r, %r, %r)" % (self.nodes, self.attrs, self.leading, self.padding) - def prettyprint(self, indent=""): - l = ["%sTableCell: leading=%r padding=%r" % (indent, self.leading, - self.padding)] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.table_cell(self) @@ -493,11 +426,6 @@ return "TableRow(%r, %r, %r, %r)" % (self.nodes, self.trailing, self.leading, self.padding) - def prettyprint(self, indent=""): - l = ["%sTableRow: trailing=%r leading=%r padding=%r" % ( - indent, self.trailing, self.leading, self.padding)] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.table_row(self) @@ -512,10 +440,6 @@ def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self.nodes) - def prettyprint(self, indent=""): - l = ["%s%s" % (indent, self.__class__.__name__)] - return self._prettyprint(l, indent) - class Larger(Inline): "Larger text." @@ -534,10 +458,6 @@ def __repr__(self): return "Link(%r, %r)" % (self.nodes, self.target) - def prettyprint(self, indent=""): - l = ["%sLink: target=%r" % (indent, self.target)] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.link(self) @@ -558,10 +478,6 @@ self.parent, self.region, self.nodes, self.inline) - def prettyprint(self, indent=""): - l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.macro(self) @@ -611,10 +527,6 @@ def __repr__(self): return "Transclusion(%r, %r)" % (self.nodes, self.target) - def prettyprint(self, indent=""): - l = ["%sTransclusion: target=%r" % (indent, self.target)] - return self._prettyprint(l, indent) - def visit(self, visitor): visitor.transclusion(self) @@ -646,9 +558,6 @@ def __repr__(self): return "Anchor(%r)" % self.target - def prettyprint(self, indent=""): - return "%sAnchor: target=%r" % (indent, self.target) - def visit(self, visitor): visitor.anchor(self) @@ -659,9 +568,6 @@ def __repr__(self): return "Break()" - def prettyprint(self, indent=""): - return "%sBreak" % indent - def visit(self, visitor): visitor.break_(self) @@ -676,9 +582,6 @@ def __repr__(self): return "Comment(%r, %r)" % (self.comment, self.extra) - def prettyprint(self, indent=""): - return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra) - def visit(self, visitor): visitor.comment(self) @@ -693,9 +596,6 @@ def __repr__(self): return "Directive(%r, %r)" % (self.directive, self.extra) - def prettyprint(self, indent=""): - return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra) - def visit(self, visitor): visitor.directive(self) @@ -706,9 +606,6 @@ def __repr__(self): return "LineBreak()" - def prettyprint(self, indent=""): - return "%sLineBreak" % indent - def visit(self, visitor): visitor.linebreak(self) @@ -719,9 +616,6 @@ def __repr__(self): return "NonBreakingSpace()" - def prettyprint(self, indent=""): - return "%sNonBreakingSpace" % indent - def visit(self, visitor): visitor.nbsp(self) @@ -735,9 +629,6 @@ def __repr__(self): return "Rule(%d)" % self.height - def prettyprint(self, indent=""): - return "%sRule: height=%d" % (indent, self.height) - def visit(self, visitor): visitor.rule(self) @@ -754,9 +645,6 @@ def __repr__(self): return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) - def prettyprint(self, indent=""): - return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) - def visit(self, visitor): visitor.table_attr(self) @@ -779,9 +667,6 @@ def __repr__(self): return "Text(%r)" % self.s - def prettyprint(self, indent=""): - return "%sText: %r" % (indent, self.s) - def visit(self, visitor): visitor.text(self) @@ -795,9 +680,6 @@ def __repr__(self): return "Verbatim(%r)" % self.text - def prettyprint(self, indent=""): - return "%sVerbatim: text=%r" % (indent, self.text) - def visit(self, visitor): visitor.verbatim(self) diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/tree/pretty.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/tree/pretty.py Fri Jun 30 00:40:39 2023 +0200 @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +""" +Prettyprinted document tree nodes. + +Copyright (C) 2017, 2018, 2019, 2023 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.tree.moin import Container + +class Node: + + "A simplified tree node representation." + + def __init__(self, name): + self.name = name + self.nodes = [] + + def __repr__(self): + return "Node(%r, %r)" % (self.name, self.nodes) + + def visit(self, visitor): + visitor.node(self) + + def append(self, node): + self.nodes.append(node) + + def test(self, other): + + """ + Test whether this node is considered equivalent to 'other', where + 'other' is a moinparser.tree node. + + Return any failing tree nodes or None. + """ + + if other.__class__.__name__ != self.name: + return self, other, "name" + + if isinstance(other, Container): + for node, other_node in map(None, self.nodes, other.nodes): + if node is None or other_node is None: + return self, other, node is None and "simple" or "document" + t = node.test(other_node) + if t: + return t + elif self.nodes: + return self, other, "empty" + + return None + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 98caaf6eb2bd -r 9736f88a8984 moinformat/tree/table.py --- a/moinformat/tree/table.py Fri Jun 30 00:37:36 2023 +0200 +++ b/moinformat/tree/table.py Fri Jun 30 00:40:39 2023 +0200 @@ -31,9 +31,6 @@ def __repr__(self): return "Continuation(%r)" % self.text - def prettyprint(self, indent=""): - return "%sContinuation: %r" % (indent, self.text) - def visit(self, visitor): visitor.continuation(self) diff -r 98caaf6eb2bd -r 9736f88a8984 tests/test_parser.py --- a/tests/test_parser.py Fri Jun 30 00:37:36 2023 +0200 +++ b/tests/test_parser.py Fri Jun 30 00:40:39 2023 +0200 @@ -1,5 +1,24 @@ #!/usr/bin/env python +""" +Test document parsing and serialisation. + +Copyright (C) 2017, 2018, 2019, 2023 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + from os import listdir from os.path import abspath, split import sys @@ -17,10 +36,12 @@ # Import specific objects. -from moinformat import Metadata, make_input, make_output, make_parser, \ - make_serialiser, parse, serialise +from moinformat import get_parser, Metadata, make_input, make_output, \ + make_parser, make_serialiser, parse, serialise from moinformat.tree.moin import Container + + def test_input(d, s): "Compare serialised output from 'd' with its original form 's'." @@ -61,7 +82,8 @@ metadata.set("output_format", "html") metadata.set("mapping", {"MoinMoin" : "https://moinmo.in/"}) - print serialise(d, make_serialiser(metadata)) + result = serialise(d, make_serialiser(metadata)) + print output.encode(result) print "-" * 60 print @@ -69,7 +91,9 @@ def test_tree(d, t, ts): - "Compare tree structure 'd' with simplified, expected form 't' from 'ts'." + """ + Compare tree structure 'd' with simplified, expected form 't' from 'ts'. + """ failing = t.test(d) @@ -78,9 +102,12 @@ # Show tree versus expected forms. + moin_prettyprinter = make_serialiser(Metadata({"input_format" : "moin"}), "pretty") + tree_prettyprinter = make_serialiser(Metadata({"input_format" : "pretty"}), "pretty") + print not failing print "-" * 60 - print d.prettyprint() + print serialise(d, moin_prettyprinter) if failing: print "-" * 60 print ts @@ -90,102 +117,14 @@ print repr(simple) print repr(tree) print "-" * 60 - print tree.prettyprint() + print serialise(tree, tree_prettyprinter) print "-" * 60 - print simple.prettyprint() + print serialise(simple, tree_prettyprinter) print "-" * 60 print return not failing -class Node: - - "A simplified tree node representation." - - def __init__(self, name): - self.name = name - self.nodes = [] - - def __repr__(self): - return "Node(%r, %r)" % (self.name, self.nodes) - - def prettyprint(self, indent=""): - l = [] - l.append("%s%s%s" % (indent, self.name, len(self.nodes) and " nodes=%d" % len(self.nodes) or "")) - for node in self.nodes: - l.append(node.prettyprint(indent + " ")) - return "\n".join(l) - - def append(self, node): - self.nodes.append(node) - - def test(self, other): - - """ - Test whether this node is considered equivalent to 'other', where - 'other' is a moinparser.tree node. - - Return any failing tree nodes or None. - """ - - if other.__class__.__name__ != self.name: - return self, other, "name" - - if isinstance(other, Container): - for node, other_node in map(None, self.nodes, other.nodes): - if node is None or other_node is None: - return self, other, node is None and "simple" or "document" - t = node.test(other_node) - if t: - return t - elif self.nodes: - return self, other, "empty" - - return None - -def parse_tree(s): - - "Parse the tree structure representation in 's'." - - indent = 0 - branches = [] - - for line in s.split("\n"): - line = line.rstrip() - if not line: - continue - - new_indent = line.rfind(" ") + 1 - node = Node(line[new_indent:]) - - # Establish a branch to add nodes to. - - if not branches: - branches.append(node) - else: - # Note the current node as outermost branch. - - if new_indent > indent: - branches.append(node) - else: - # Reduced indent involves obtaining an inner branch again. - - while indent > new_indent: - del branches[-1] - indent -= 2 - - # Note the current node as outermost branch. - - branches[-1] = node - - # Append the current node to the parent branch. - - branches[-2].append(node) - - indent = new_indent - - return branches[0] - def get_filename(filename): "Using 'filename', return the core text filename and any encoding." @@ -206,10 +145,14 @@ if input.dir.exists(tree_filename): ts = input.readfile(tree_filename) - return ts, parse_tree(ts) + return ts, parse(ts, make_parser(Metadata(), "pretty")) else: return None, None + + +# Main program. + if __name__ == "__main__": args = sys.argv[1:]