1.1 --- a/moinconvert Tue Jun 20 18:58:47 2023 +0200
1.2 +++ b/moinconvert Fri Aug 18 00:18:42 2023 +0200
1.3 @@ -3,7 +3,7 @@
1.4 """
1.5 Moin wiki format converter.
1.6
1.7 -Copyright (C) 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
1.8 +Copyright (C) 2018, 2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
1.9
1.10 This program is free software; you can redistribute it and/or modify it under
1.11 the terms of the GNU General Public License as published by the Free Software
1.12 @@ -36,6 +36,10 @@
1.13
1.14 To indicate pagenames within an input directory, omit any --pagename flags."""
1.15
1.16 +message_tree_format_usage = """\
1.17 +The --tree option cannot be used together with the --format or --output-format
1.18 +options since the --tree option indicates use of the "pretty" format."""
1.19 +
1.20
1.21
1.22 # Options management.
1.23 @@ -88,7 +92,7 @@
1.24 attachments_dir = []
1.25 document_indexes = []
1.26 filenames = []
1.27 - formats = []
1.28 + input_formats = []
1.29 input_dir_types = []
1.30 input_dirs = []
1.31 input_encodings = []
1.32 @@ -96,6 +100,7 @@
1.33 mappings = []
1.34 output_dirs = []
1.35 output_encodings = []
1.36 + output_formats = []
1.37 output_page_seps = []
1.38 pagenames = []
1.39 root_pagenames = []
1.40 @@ -152,6 +157,9 @@
1.41 # Detect tree output.
1.42
1.43 elif arg == "--tree":
1.44 + if output_formats:
1.45 + print >>sys.stderr, message_tree_format_usage
1.46 + sys.exit(1)
1.47 tree = True
1.48
1.49 # Options with following arguments.
1.50 @@ -170,8 +178,11 @@
1.51
1.52 # Switch to collecting formats.
1.53
1.54 - elif arg == "--format":
1.55 - l = formats
1.56 + elif arg in ("--format", "--output-format"):
1.57 + if tree:
1.58 + print >>sys.stderr, message_tree_format_usage
1.59 + sys.exit(1)
1.60 + l = output_formats
1.61 continue
1.62
1.63 # Switch to collecting input locations.
1.64 @@ -192,6 +203,12 @@
1.65 l = input_encodings
1.66 continue
1.67
1.68 + # Switch to collecting input formats.
1.69 +
1.70 + elif arg == "--input-format":
1.71 + l = input_formats
1.72 + continue
1.73 +
1.74 # Switch to collecting input page hierarchy separators.
1.75
1.76 elif arg == "--input-page-sep":
1.77 @@ -254,7 +271,8 @@
1.78
1.79 l = filenames
1.80
1.81 - format = formats and formats[0] or "html"
1.82 + input_format = input_formats and input_formats[0] or "moin"
1.83 + output_format = tree and "pretty" or output_formats and output_formats[0] or "html"
1.84 input_dir = getvalue(input_dirs)
1.85 output_dir = getvalue(output_dirs)
1.86
1.87 @@ -265,23 +283,25 @@
1.88 "bundle" : bundle,
1.89 "common_attachments": common,
1.90 "document_index" : getvalue(document_indexes),
1.91 + "fragment" : fragment,
1.92 "input_context" : input_dir and \
1.93 getvalue(input_dir_types, "directory") or \
1.94 "standalone",
1.95 "input_encoding" : getvalue(input_encodings),
1.96 "input_filename" : input_dir,
1.97 + "input_format" : input_format,
1.98 "input_separator" : getvalue(input_page_seps),
1.99 - "link_format" : format,
1.100 + "link_format" : output_format,
1.101 "mapping" : getmapping(mappings),
1.102 "no_inline" : no_inline,
1.103 "output_context" : output_dir and "directory" or "standalone",
1.104 "output_encoding" : getvalue(output_encodings),
1.105 - "output_format" : format,
1.106 + "output_format" : output_format,
1.107 "output_filename" : output_dir,
1.108 "output_separator" : getvalue(output_page_seps),
1.109 "root_pagename" : getvalue(root_pagenames, "FrontPage"),
1.110 "theme_name" : not fragment and \
1.111 - "%s.%s" % (getvalue(theme_names, "default"), format) or None,
1.112 + "%s.%s" % (getvalue(theme_names, "default"), output_format) or None,
1.113 })
1.114
1.115 # Define the input context and theme.
1.116 @@ -333,19 +353,17 @@
1.117
1.118 p.update_metadata(metadata)
1.119
1.120 - # Show a document tree for debugging purposes, if requested.
1.121 -
1.122 - if tree:
1.123 - print d.prettyprint()
1.124 - continue
1.125 -
1.126 - # Otherwise, serialise the document.
1.127 -
1.128 # Obtain a serialiser using the configuration.
1.129
1.130 serialiser = make_serialiser(metadata)
1.131 outtext = serialise(d, serialiser)
1.132
1.133 + # Show a document tree for debugging purposes, if requested.
1.134 +
1.135 + if tree:
1.136 + print outtext
1.137 + continue
1.138 +
1.139 # With a theme, apply it to the text.
1.140
1.141 if theme:
1.142 @@ -386,11 +404,13 @@
1.143
1.144 --common Obtain attachments from a common directory for all pages,
1.145 rather than each page having its own subdirectory of a
1.146 - top-level attachments directory.
1.147 + top-level attachments directory
1.148 --input-dir Indicate an input directory containing document files
1.149 --input-dir-type Indicate the type of input directory involved
1.150 (default: directory)
1.151 --input-encoding Indicate the character encoding used in document files
1.152 +--input-format Indicate the format of the parsed documents
1.153 + (default: moin)
1.154 --input-page-sep Indicate the separator used in filenames to encode
1.155 hierarchical relationships (subpages and descendant pages)
1.156 --pagename Indicate the page name corresponding to an indicated
1.157 @@ -401,20 +421,24 @@
1.158
1.159 --bundle Bundle resources such as stylesheets within every document,
1.160 useful for publishing documents that need to be copied or
1.161 - distributed individually.
1.162 + distributed individually
1.163 --document-index Provide a "DocumentIndex" filename to be used in links in
1.164 HTML format output, useful for local file browsing instead
1.165 of Web-published content
1.166 ---format Indicate the format to be used for serialised documents
1.167 +--format Indicate the format to be used for serialised documents;
1.168 + equivalent to --output-format
1.169 (default: html)
1.170 --fragment Indicates that an output fragment, not an entire document,
1.171 is to be generated, skipping any theming activities
1.172 --no-inline Suppress inline objects in serialised documents, linking to
1.173 - separate objects instead.
1.174 + separate objects instead
1.175 --output-dir Indicate an output directory to contain serialised document
1.176 files
1.177 --output-encoding Indicate the character encoding used in serialised document
1.178 files
1.179 +--output-format Indicate the format to be used for serialised documents;
1.180 + equivalent to --format
1.181 + (default: html)
1.182 --output-page-sep Indicate the separator used in filenames to encode
1.183 hierarchical relationships (subpages and descendant pages)
1.184 --theme Indicate a theme for serialised documents, typically
2.1 --- a/moinformat/metadata.py Tue Jun 20 18:58:47 2023 +0200
2.2 +++ b/moinformat/metadata.py Fri Aug 18 00:18:42 2023 +0200
2.3 @@ -3,7 +3,7 @@
2.4 """
2.5 Metadata for document conversion.
2.6
2.7 -Copyright (C) 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
2.8 +Copyright (C) 2018, 2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
2.9
2.10 This program is free software; you can redistribute it and/or modify it under
2.11 the terms of the GNU General Public License as published by the Free Software
2.12 @@ -43,13 +43,13 @@
2.13 }
2.14
2.15 effects = {
2.16 - "input_context" : "input",
2.17 - "input_format" : "parser",
2.18 - "input_separator" : "input",
2.19 - "link_format" : "linker",
2.20 - "output_context" : "output",
2.21 - "output_format" : "serialiser",
2.22 - "theme_name" : "theme",
2.23 + "input_context" : ["input"],
2.24 + "input_format" : ["parser", "serialiser"],
2.25 + "input_separator" : ["input"],
2.26 + "link_format" : ["linker"],
2.27 + "output_context" : ["output"],
2.28 + "output_format" : ["serialiser"],
2.29 + "theme_name" : ["theme"],
2.30 }
2.31
2.32 def __init__(self, parameters=None):
2.33 @@ -92,12 +92,14 @@
2.34
2.35 self.parameters[name] = value
2.36
2.37 - # Invalidate any affected setting.
2.38 + # Invalidate any affected settings.
2.39
2.40 affected = self.effects.get(name)
2.41
2.42 - if affected and self.has_key(affected):
2.43 - del self.parameters[affected]
2.44 + if affected:
2.45 + for affected_name in affected:
2.46 + if self.has_key(affected_name):
2.47 + del self.parameters[affected_name]
2.48
2.49 # Set any default values.
2.50
2.51 @@ -106,36 +108,43 @@
2.52 if affected and not self.get(affected):
2.53 self.set(affected, value)
2.54
2.55 - def make_object(self, name, fn, typename, typevalue=None):
2.56 + def get_update(self, name, value=None):
2.57
2.58 """
2.59 - Make an object to be stored in the setting 'name', using 'fn' to
2.60 - acquire the object class, with the object type being retrieved from the
2.61 - 'typename' setting, this being overwritten by 'typevalue' if specified.
2.62 - Return None if no class is obtained.
2.63 + Obtain the 'name' setting, this being overwritten by 'value' if
2.64 + specified. Return the updated setting.
2.65 """
2.66
2.67 - # Return any existing object if not reset.
2.68 + # Overwrite any existing setting.
2.69
2.70 - if not typevalue:
2.71 - obj = self.get(name)
2.72 - if obj:
2.73 - return obj
2.74 + if value:
2.75 + self.set(name, value)
2.76 + return value
2.77 + else:
2.78 + return self.get(name)
2.79 +
2.80 + def make_object(self, name, cls):
2.81
2.82 - # Overwrite any existing typename setting.
2.83 + """
2.84 + Make an object to be stored in the setting 'name', using 'cls' as the
2.85 + object class.
2.86 + """
2.87
2.88 - else:
2.89 - self.set(typename, typevalue)
2.90 + # Return any existing, preserved object. Since updates to various
2.91 + # properties will discard objects, any preserved object should still be
2.92 + # applicable.
2.93
2.94 - # Obtain the class.
2.95 + obj = self.get(name)
2.96 + if obj:
2.97 + return obj
2.98
2.99 - cls = fn(self.get(typename))
2.100 + # Without any object class, return None.
2.101
2.102 if not cls:
2.103 self.set(name, None)
2.104 return None
2.105
2.106 - # Instantiate the class.
2.107 + # Instantiate the class and record the object.
2.108
2.109 obj = cls(self)
2.110 self.set(name, obj)
2.111 @@ -148,7 +157,9 @@
2.112 "input_context" setting which will be replaced by any given 'name'.
2.113 """
2.114
2.115 - return self.make_object("input", get_input, "input_context", name)
2.116 + cls = get_input(self.get_update("input_context", name))
2.117 +
2.118 + return self.make_object("input", cls)
2.119
2.120 def get_linker(self, name=None):
2.121
2.122 @@ -157,7 +168,9 @@
2.123 "link_format" setting which will be replaced by any given 'name'.
2.124 """
2.125
2.126 - return self.make_object("linker", get_linker, "link_format", name)
2.127 + cls = get_linker(self.get_update("link_format", name))
2.128 +
2.129 + return self.make_object("linker", cls)
2.130
2.131 def get_output(self, name=None):
2.132
2.133 @@ -166,7 +179,9 @@
2.134 "output_context" setting which will be replaced by any given 'name'.
2.135 """
2.136
2.137 - return self.make_object("output", get_output, "output_context", name)
2.138 + cls = get_output(self.get_update("output_context", name))
2.139 +
2.140 + return self.make_object("output", cls)
2.141
2.142 def get_parser(self, name=None):
2.143
2.144 @@ -175,7 +190,9 @@
2.145 "input_format" setting which will be replaced by any given 'name'.
2.146 """
2.147
2.148 - parser = self.make_object("parser", get_parser, "input_format", name)
2.149 + cls = get_parser(self.get_update("input_format", name))
2.150 +
2.151 + parser = self.make_object("parser", cls)
2.152 parser.parsers = parsers
2.153 return parser
2.154
2.155 @@ -186,8 +203,10 @@
2.156 "output_format" setting which will be replaced by any given 'name'.
2.157 """
2.158
2.159 - serialiser = self.make_object("serialiser", get_serialiser,
2.160 - "output_format", name)
2.161 + cls = get_serialiser(self.get_update("output_format", name),
2.162 + self.get("input_format"))
2.163 +
2.164 + serialiser = self.make_object("serialiser", cls)
2.165 serialiser.serialisers = serialisers
2.166 return serialiser
2.167
2.168 @@ -198,6 +217,8 @@
2.169 setting which will be replaced by any given 'name'.
2.170 """
2.171
2.172 - return self.make_object("theme", get_theme, "theme_name", name)
2.173 + cls = get_theme(self.get_update("theme_name", name))
2.174 +
2.175 + return self.make_object("theme", cls)
2.176
2.177 # vim: tabstop=4 expandtab shiftwidth=4
3.1 --- a/moinformat/output/common.py Tue Jun 20 18:58:47 2023 +0200
3.2 +++ b/moinformat/output/common.py Fri Aug 18 00:18:42 2023 +0200
3.3 @@ -3,7 +3,7 @@
3.4 """
3.5 Output context common functionality.
3.6
3.7 -Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk>
3.8 +Copyright (C) 2018, 2023 Paul Boddie <paul@boddie.org.uk>
3.9
3.10 This program is free software; you can redistribute it and/or modify it under
3.11 the terms of the GNU General Public License as published by the Free Software
3.12 @@ -40,9 +40,10 @@
3.13
3.14 def reset(self):
3.15
3.16 - "Set up an output collector."
3.17 + "Set up an output collector and output state."
3.18
3.19 self.output = []
3.20 + self.indent = ""
3.21
3.22 def encode(self, text):
3.23
4.1 --- a/moinformat/parsers/__init__.py Tue Jun 20 18:58:47 2023 +0200
4.2 +++ b/moinformat/parsers/__init__.py Fri Aug 18 00:18:42 2023 +0200
4.3 @@ -3,7 +3,7 @@
4.4 """
4.5 Moin wiki parsers.
4.6
4.7 -Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk>
4.8 +Copyright (C) 2017, 2018, 2023 Paul Boddie <paul@boddie.org.uk>
4.9
4.10 This program is free software; you can redistribute it and/or modify it under
4.11 the terms of the GNU General Public License as published by the Free Software
4.12 @@ -24,13 +24,13 @@
4.13
4.14 # Top-level functions.
4.15
4.16 -def get_parser(name="moin"):
4.17 +def get_parser(name=None):
4.18
4.19 "Return the parser class supporting the format with the given 'name'."
4.20
4.21 - return parsers[name]
4.22 + return parsers[name or "moin"]
4.23
4.24 -def make_parser(metadata, name="moin"):
4.25 +def make_parser(metadata, name=None):
4.26
4.27 "Return a parser instance using the given 'metadata' and optional 'name'."
4.28
5.1 --- a/moinformat/parsers/common.py Tue Jun 20 18:58:47 2023 +0200
5.2 +++ b/moinformat/parsers/common.py Fri Aug 18 00:18:42 2023 +0200
5.3 @@ -3,7 +3,7 @@
5.4 """
5.5 Moin wiki parsing functionality.
5.6
5.7 -Copyright (C) 2017, 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
5.8 +Copyright (C) 2017, 2018, 2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
5.9
5.10 This program is free software; you can redistribute it and/or modify it under
5.11 the terms of the GNU General Public License as published by the Free Software
5.12 @@ -279,6 +279,9 @@
5.13 self.parsers = parsers
5.14 self.root = root
5.15
5.16 + def update_metadata(self, metadata):
5.17 + pass
5.18 +
5.19 def get_parser(self, format_type):
5.20
5.21 """
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
6.2 +++ b/moinformat/parsers/html.py Fri Aug 18 00:18:42 2023 +0200
6.3 @@ -0,0 +1,82 @@
6.4 +#!/usr/bin/env python
6.5 +
6.6 +"""
6.7 +HTML document fragment parser.
6.8 +
6.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
6.10 +
6.11 +This program is free software; you can redistribute it and/or modify it under
6.12 +the terms of the GNU General Public License as published by the Free Software
6.13 +Foundation; either version 3 of the License, or (at your option) any later
6.14 +version.
6.15 +
6.16 +This program is distributed in the hope that it will be useful, but WITHOUT
6.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
6.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
6.19 +details.
6.20 +
6.21 +You should have received a copy of the GNU General Public License along with
6.22 +this program. If not, see <http://www.gnu.org/licenses/>.
6.23 +"""
6.24 +
6.25 +from moinformat.parsers.common import ParserBase
6.26 +from moinformat.tree.html import Element, Fragment
6.27 +from moinformat.utils.htmlparse import Parser
6.28 +
6.29 +class HTMLParser(ParserBase):
6.30 +
6.31 + "A prettyprinted document tree parser."
6.32 +
6.33 + formats = ["html"]
6.34 +
6.35 + def __init__(self, metadata):
6.36 + self.metadata = metadata
6.37 +
6.38 + def parse(self, s):
6.39 +
6.40 + "Parse the tree structure representation in 's'."
6.41 +
6.42 + doc = Parser(s).parse()
6.43 +
6.44 + # If only a fragment is involved, find the body node and return its
6.45 + # children in a fragment.
6.46 +
6.47 + if self.metadata.get("fragment") or self.metadata.get("theme_name"):
6.48 + body = self._find_body(doc)
6.49 +
6.50 + if body:
6.51 + return Fragment(body.nodes)
6.52 + else:
6.53 + return None
6.54 +
6.55 + # Otherwise, return the top-level node.
6.56 +
6.57 + else:
6.58 + return doc
6.59 +
6.60 + def _find_body(self, node):
6.61 +
6.62 + """
6.63 + Find the body element from 'node', returning the element if found or
6.64 + None otherwise.
6.65 + """
6.66 +
6.67 + # Search all nodes with children.
6.68 +
6.69 + if isinstance(node, Fragment):
6.70 +
6.71 + # Return the node if it is a body element.
6.72 +
6.73 + if isinstance(node, Element) and node.name == "body":
6.74 + return node
6.75 +
6.76 + for n in node.nodes:
6.77 + body = self._find_body(n)
6.78 + if body:
6.79 + return body
6.80 +
6.81 + return None
6.82 +
6.83 +parser = HTMLParser
6.84 +
6.85 +# vim: tabstop=4 expandtab shiftwidth=4
7.1 --- a/moinformat/parsers/moin.py Tue Jun 20 18:58:47 2023 +0200
7.2 +++ b/moinformat/parsers/moin.py Fri Aug 18 00:18:42 2023 +0200
7.3 @@ -56,15 +56,14 @@
7.4
7.5 formats = ["moin", "wiki"]
7.6
7.7 - def __init__(self, metadata, parsers=None, root=None):
7.8 + # Principal parser methods.
7.9 +
7.10 + def parse(self, s):
7.11
7.12 """
7.13 - Initialise the parser with the given 'metadata' and optional 'parsers'.
7.14 - An optional 'root' indicates the document-level parser.
7.15 + Parse page text 's'. Pages consist of regions delimited by markers.
7.16 """
7.17
7.18 - ParserBase.__init__(self, metadata, parsers, root)
7.19 -
7.20 # Record certain node occurrences for later evaluation.
7.21
7.22 self.macros = []
7.23 @@ -77,13 +76,7 @@
7.24
7.25 self.link_targets = []
7.26
7.27 - # Principal parser methods.
7.28 -
7.29 - def parse(self, s):
7.30 -
7.31 - """
7.32 - Parse page text 's'. Pages consist of regions delimited by markers.
7.33 - """
7.34 + # Obtain the token stream and a region to populate.
7.35
7.36 self.items = self.get_items(s)
7.37 self.region = Region([], type="moin")
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
8.2 +++ b/moinformat/parsers/pretty.py Fri Aug 18 00:18:42 2023 +0200
8.3 @@ -0,0 +1,78 @@
8.4 +#!/usr/bin/env python
8.5 +
8.6 +"""
8.7 +Prettyprinted document tree parser.
8.8 +
8.9 +Copyright (C) 2017, 2018, 2019, 2023 Paul Boddie <paul@boddie.org.uk>
8.10 +
8.11 +This program is free software; you can redistribute it and/or modify it under
8.12 +the terms of the GNU General Public License as published by the Free Software
8.13 +Foundation; either version 3 of the License, or (at your option) any later
8.14 +version.
8.15 +
8.16 +This program is distributed in the hope that it will be useful, but WITHOUT
8.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
8.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
8.19 +details.
8.20 +
8.21 +You should have received a copy of the GNU General Public License along with
8.22 +this program. If not, see <http://www.gnu.org/licenses/>.
8.23 +"""
8.24 +
8.25 +from moinformat.tree.pretty import Node
8.26 +
8.27 +class PrettyParser:
8.28 +
8.29 + "A prettyprinted document tree parser."
8.30 +
8.31 + formats = ["pretty"]
8.32 +
8.33 + def __init__(self, metadata):
8.34 + self.metadata = metadata
8.35 +
8.36 + def parse(self, s):
8.37 +
8.38 + "Parse the tree structure representation in 's'."
8.39 +
8.40 + indent = 0
8.41 + branches = []
8.42 +
8.43 + for line in s.split("\n"):
8.44 + line = line.rstrip()
8.45 + if not line:
8.46 + continue
8.47 +
8.48 + new_indent = line.rfind(" ") + 1
8.49 + node = Node(line[new_indent:])
8.50 +
8.51 + # Establish a branch to add nodes to.
8.52 +
8.53 + if not branches:
8.54 + branches.append(node)
8.55 + else:
8.56 + # Note the current node as outermost branch.
8.57 +
8.58 + if new_indent > indent:
8.59 + branches.append(node)
8.60 + else:
8.61 + # Reduced indent involves obtaining an inner branch again.
8.62 +
8.63 + while indent > new_indent:
8.64 + del branches[-1]
8.65 + indent -= 2
8.66 +
8.67 + # Note the current node as outermost branch.
8.68 +
8.69 + branches[-1] = node
8.70 +
8.71 + # Append the current node to the parent branch.
8.72 +
8.73 + branches[-2].append(node)
8.74 +
8.75 + indent = new_indent
8.76 +
8.77 + return branches[0]
8.78 +
8.79 +parser = PrettyParser
8.80 +
8.81 +# vim: tabstop=4 expandtab shiftwidth=4
9.1 --- a/moinformat/serialisers/__init__.py Tue Jun 20 18:58:47 2023 +0200
9.2 +++ b/moinformat/serialisers/__init__.py Fri Aug 18 00:18:42 2023 +0200
9.3 @@ -3,7 +3,7 @@
9.4 """
9.5 Moin wiki serialisers.
9.6
9.7 -Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk>
9.8 +Copyright (C) 2017, 2018, 2023 Paul Boddie <paul@boddie.org.uk>
9.9
9.10 This program is free software; you can redistribute it and/or modify it under
9.11 the terms of the GNU General Public License as published by the Free Software
9.12 @@ -23,11 +23,15 @@
9.13
9.14 # Top-level functions.
9.15
9.16 -def get_serialiser(name):
9.17 +def get_serialiser(name, doctype=None):
9.18
9.19 - "Return the main serialiser class for the format having the given 'name'."
9.20 + """
9.21 + Return the main serialiser class for the format having the given 'name'.
9.22 + If 'doctype' is indicated, obtain a serialiser class specific to that
9.23 + document type. Otherwise, a general Moin serialiser class is obtained.
9.24 + """
9.25
9.26 - return serialisers["%s.moin" % name]
9.27 + return serialisers["%s.%s" % (name, doctype or "moin")]
9.28
9.29 def make_serialiser(metadata, format=None):
9.30
9.31 @@ -43,7 +47,7 @@
9.32 "Serialise 'doc' using the given 'serialiser' instance."
9.33
9.34 serialiser.reset()
9.35 - doc.to_string(serialiser)
9.36 + doc.visit(serialiser)
9.37 return serialiser.get_output()
9.38
9.39 # vim: tabstop=4 expandtab shiftwidth=4
10.1 --- a/moinformat/serialisers/common.py Tue Jun 20 18:58:47 2023 +0200
10.2 +++ b/moinformat/serialisers/common.py Fri Aug 18 00:18:42 2023 +0200
10.3 @@ -3,7 +3,7 @@
10.4 """
10.5 Moin serialiser support.
10.6
10.7 -Copyright (C) 2017, 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
10.8 +Copyright (C) 2017, 2018, 2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
10.9
10.10 This program is free software; you can redistribute it and/or modify it under
10.11 the terms of the GNU General Public License as published by the Free Software
10.12 @@ -92,6 +92,36 @@
10.13 else:
10.14 return cls(self.metadata, self.serialisers)
10.15
10.16 + # Serialisation visitor methods.
10.17 +
10.18 + def visit(self, node):
10.19 +
10.20 + "Visit the 'node' to invoke the appropriate serialisation handler."
10.21 +
10.22 + node.visit(self)
10.23 +
10.24 + def visit_region(self, region):
10.25 +
10.26 + """
10.27 + Obtain a serialiser for the region from the same format family. Retain
10.28 + the same serialiser if no appropriate serialiser could be obtained.
10.29 + """
10.30 +
10.31 + serialiser_name = self.formats and "%s.%s" % (self.formats[0], region.type) or None
10.32 + serialiser = self.get_serialiser(serialiser_name)
10.33 +
10.34 + # Serialise the region.
10.35 +
10.36 + serialiser.container(region)
10.37 +
10.38 + def container(self, container):
10.39 +
10.40 + "Visit all nodes in 'container'."
10.41 +
10.42 + if container.nodes:
10.43 + for node in container.nodes:
10.44 + self.visit(node)
10.45 +
10.46 def escape_attr(s):
10.47
10.48 "Escape XML document attribute."
11.1 --- a/moinformat/serialisers/html/graphviz.py Tue Jun 20 18:58:47 2023 +0200
11.2 +++ b/moinformat/serialisers/html/graphviz.py Fri Aug 18 00:18:42 2023 +0200
11.3 @@ -3,7 +3,7 @@
11.4 """
11.5 Graphviz serialiser, generating content for embedding in HTML documents.
11.6
11.7 -Copyright (C) 2018, 2019, 2022 Paul Boddie <paul@boddie.org.uk>
11.8 +Copyright (C) 2018, 2019, 2022, 2023 Paul Boddie <paul@boddie.org.uk>
11.9
11.10 This program is free software; you can redistribute it and/or modify it under
11.11 the terms of the GNU General Public License as published by the Free Software
11.12 @@ -55,19 +55,16 @@
11.13 def init(self):
11.14 self.directives = {}
11.15
11.16 - def start_block(self):
11.17 - pass
11.18 + def block(self, block):
11.19 + self.container(block)
11.20
11.21 - def end_block(self):
11.22 - pass
11.23 -
11.24 - def directive(self, key, value, directive):
11.25 - if not self.directives.has_key(key):
11.26 - self.directives[key] = []
11.27 - self.directives[key].append(value)
11.28 + def directive(self, directive):
11.29 + if not self.directives.has_key(directive.key):
11.30 + self.directives[directive.key] = []
11.31 + self.directives[directive.key].append(directive.value)
11.32
11.33 def text(self, text):
11.34 - self.process_graph(text)
11.35 + self.process_graph(text.s)
11.36
11.37
11.38
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
12.2 +++ b/moinformat/serialisers/html/html.py Fri Aug 18 00:18:42 2023 +0200
12.3 @@ -0,0 +1,72 @@
12.4 +#!/usr/bin/env python
12.5 +
12.6 +"""
12.7 +HTML serialiser.
12.8 +
12.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
12.10 +
12.11 +This program is free software; you can redistribute it and/or modify it under
12.12 +the terms of the GNU General Public License as published by the Free Software
12.13 +Foundation; either version 3 of the License, or (at your option) any later
12.14 +version.
12.15 +
12.16 +This program is distributed in the hope that it will be useful, but WITHOUT
12.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12.19 +details.
12.20 +
12.21 +You should have received a copy of the GNU General Public License along with
12.22 +this program. If not, see <http://www.gnu.org/licenses/>.
12.23 +"""
12.24 +
12.25 +from moinformat.serialisers.common import Serialiser
12.26 +
12.27 +
12.28 +
12.29 +# The serialiser class.
12.30 +
12.31 +class HTMLSerialiser(Serialiser):
12.32 +
12.33 + "Serialisation of HTML fragments."
12.34 +
12.35 + input_formats = ["html"]
12.36 + formats = ["html"]
12.37 +
12.38 + def attribute(self, attribute):
12.39 + self.out(attribute.name)
12.40 + if attribute.value is not None:
12.41 + self.out("=")
12.42 + self.visit(attribute.value)
12.43 +
12.44 + def attribute_value(self, attribute_value):
12.45 + self.out("%s%s%s" % (attribute_value.quote, attribute_value.value, attribute_value.quote))
12.46 +
12.47 + def element(self, element):
12.48 + self.out("<%s" % element.name)
12.49 + for attribute in element.attributes:
12.50 + self.out(" ")
12.51 + self.visit(attribute)
12.52 + self.out(">")
12.53 + self.container(element)
12.54 + self.out("</%s>" % element.name)
12.55 +
12.56 + def comment(self, comment):
12.57 + self.out("<%s>" % comment.value)
12.58 +
12.59 + def directive(self, directive):
12.60 + self.out("<%s>" % directive.value)
12.61 +
12.62 + def inclusion(self, inclusion):
12.63 + self.out("<%s>" % inclusion.value)
12.64 +
12.65 + def node(self, node):
12.66 + self.out(node.value)
12.67 +
12.68 + text = node
12.69 +
12.70 + def fragment(self, fragment):
12.71 + self.container(fragment)
12.72 +
12.73 +serialiser = HTMLSerialiser
12.74 +
12.75 +# vim: tabstop=4 expandtab shiftwidth=4
13.1 --- a/moinformat/serialisers/html/moin.py Tue Jun 20 18:58:47 2023 +0200
13.2 +++ b/moinformat/serialisers/html/moin.py Fri Aug 18 00:18:42 2023 +0200
13.3 @@ -3,7 +3,8 @@
13.4 """
13.5 HTML serialiser.
13.6
13.7 -Copyright (C) 2017, 2018, 2019, 2021, 2022 Paul Boddie <paul@boddie.org.uk>
13.8 +Copyright (C) 2017, 2018, 2019, 2021, 2022,
13.9 + 2023 Paul Boddie <paul@boddie.org.uk>
13.10
13.11 This program is free software; you can redistribute it and/or modify it under
13.12 the terms of the GNU General Public License as published by the Free Software
13.13 @@ -30,90 +31,7 @@
13.14 input_formats = ["moin", "wiki"]
13.15 formats = ["html"]
13.16
13.17 - def _region_tag(self, type):
13.18 -
13.19 - # NOTE: Need to support types in general.
13.20 -
13.21 - type = type and type.split()[0]
13.22 -
13.23 - if type == "inline":
13.24 - return "tt"
13.25 - elif type in (None, "python"):
13.26 - return "pre"
13.27 - else:
13.28 - return "span"
13.29 -
13.30 - def start_region(self, level, indent, type, args, extra):
13.31 -
13.32 - # Generate attributes, joining them when preparing the tag.
13.33 -
13.34 - l = []
13.35 - out = l.append
13.36 -
13.37 - if level:
13.38 - out("level-%d" % level)
13.39 -
13.40 - if indent:
13.41 - out("indent-%d" % indent)
13.42 -
13.43 - # NOTE: Encode type details for CSS.
13.44 -
13.45 - out("type-%s" % escape_attr(type or "opaque"))
13.46 -
13.47 - tag = self._region_tag(type)
13.48 -
13.49 - # Inline regions must preserve "indent" as space in the text.
13.50 -
13.51 - if type == "inline" and indent:
13.52 - self.out(" " * indent)
13.53 -
13.54 - self.out("<%s class='%s'>" % (tag, " ".join(l)))
13.55 -
13.56 - def end_region(self, level, indent, type, args, extra):
13.57 - tag = self._region_tag(type)
13.58 - self.out("</%s>" % tag)
13.59 -
13.60 - def start_block(self):
13.61 - self.out("<p>")
13.62 -
13.63 - def end_block(self):
13.64 - self.out("</p>")
13.65 -
13.66 - def start_defitem(self, pad, extra):
13.67 - self.out("<dd>")
13.68 -
13.69 - def end_defitem(self, pad, extra):
13.70 - self.out("</dd>")
13.71 -
13.72 - def start_defterm(self, pad, extra):
13.73 - self.out("<dt>")
13.74 -
13.75 - def end_defterm(self, pad, extra):
13.76 - self.out("</dt>")
13.77 -
13.78 - def start_emphasis(self):
13.79 - self.out("<em>")
13.80 -
13.81 - def end_emphasis(self):
13.82 - self.out("</em>")
13.83 -
13.84 - def start_heading(self, level, extra, pad, identifier):
13.85 - self.out("<h%d id='%s'>" % (level, escape_attr(self.linker.make_id(identifier))))
13.86 -
13.87 - def end_heading(self, level, pad, extra):
13.88 - self.out("</h%d>" % level)
13.89 -
13.90 - def start_larger(self):
13.91 - self.out("<big>")
13.92 -
13.93 - def end_larger(self):
13.94 - self.out("</big>")
13.95 -
13.96 - def start_linktext(self):
13.97 - pass
13.98 -
13.99 - def end_linktext(self):
13.100 - pass
13.101 + # Support methods.
13.102
13.103 list_tags = {
13.104 "i" : "lower-roman",
13.105 @@ -132,141 +50,246 @@
13.106
13.107 return "ul", None
13.108
13.109 - def start_list(self, indent, marker, num):
13.110 - tag, style_type = self._get_list_tag(marker)
13.111 - style = style_type and ' style="list-style-type: %s"' % escape_attr(style_type) or ""
13.112 - start = style_type and num is not None and ' start="%s"' % escape_attr(num) or ""
13.113 - self.out("<%s%s%s>" % (tag, style, start))
13.114 + def _link(self, target, nodes, tag, attr):
13.115 + link = self.linker and self.linker.translate(target) or None
13.116 +
13.117 + self.out('<%s %s="%s"' % (tag, attr, escape_attr(link.get_target())))
13.118 +
13.119 + # Provide link parameters as attributes.
13.120 +
13.121 + if nodes:
13.122 + for node in nodes:
13.123 + if isinstance(node, LinkParameter):
13.124 + self.out(" ")
13.125 + node.visit(self)
13.126 +
13.127 + # Close the tag if an image.
13.128 +
13.129 + if tag == "img":
13.130 + self.out(" />")
13.131 +
13.132 + # Provide the link label if specified. Otherwise, use a generated
13.133 + # default for the label.
13.134 +
13.135 + else:
13.136 + self.out(">")
13.137 +
13.138 + for node in nodes or []:
13.139 + if isinstance(node, LinkLabel):
13.140 + node.visit(self)
13.141 + break
13.142 + else:
13.143 + self.out(escape_text(link.get_label()))
13.144 +
13.145 + self.out("</%s>" % tag)
13.146 +
13.147 + def _region_tag(self, type):
13.148 +
13.149 + # NOTE: Need to support types in general.
13.150 +
13.151 + type = type and type.split()[0]
13.152
13.153 - def end_list(self, indent, marker, num):
13.154 - tag, style = self._get_list_tag(marker)
13.155 + if type == "inline":
13.156 + return "tt"
13.157 + elif type in (None, "python"):
13.158 + return "pre"
13.159 + else:
13.160 + return "span"
13.161 +
13.162 + # Node handler methods.
13.163 +
13.164 + def region(self, region):
13.165 + tag = self._region_tag(region.type)
13.166 +
13.167 + # Generate attributes, joining them when preparing the tag.
13.168 +
13.169 + attrs = []
13.170 + attr = attrs.append
13.171 +
13.172 + if region.level:
13.173 + attr("region-level-%d" % region.level)
13.174 +
13.175 + if region.indent:
13.176 + attr("region-indent-%d" % region.indent)
13.177 +
13.178 + # NOTE: Encode type details for CSS.
13.179 +
13.180 + attr("region-type-%s" % escape_attr(region.type or "opaque"))
13.181 +
13.182 + # Inline regions must preserve "indent" as space in the text.
13.183 +
13.184 + if region.type == "inline" and region.indent:
13.185 + self.out(" " * region.indent)
13.186 +
13.187 + self.out("<%s class='%s'>" % (tag, " ".join(attrs)))
13.188 +
13.189 + # Serialise the region content.
13.190 +
13.191 + self.visit_region(region)
13.192 +
13.193 + # End the region with the previous serialiser.
13.194 +
13.195 self.out("</%s>" % tag)
13.196
13.197 - def start_listitem(self, indent, marker, space, num):
13.198 + # Block node methods.
13.199 +
13.200 + def block(self, block):
13.201 + self.out("<p>")
13.202 + self.container(block)
13.203 + self.out("</p>")
13.204 +
13.205 + def defitem(self, defitem):
13.206 + self.out("<dd>")
13.207 + self.container(defitem)
13.208 + self.out("</dd>")
13.209 +
13.210 + def defterm(self, defterm):
13.211 + self.out("<dt>")
13.212 + self.container(defterm)
13.213 + self.out("</dt>")
13.214 +
13.215 + def fontstyle(self, fontstyle):
13.216 + if fontstyle.emphasis:
13.217 + self.out("<em>")
13.218 + elif fontstyle.strong:
13.219 + self.out("<strong>")
13.220 + self.container(fontstyle)
13.221 + if fontstyle.emphasis:
13.222 + self.out("</em>")
13.223 + elif fontstyle.strong:
13.224 + self.out("</strong>")
13.225 +
13.226 + def heading(self, heading):
13.227 + self.out("<h%d id='%s'>" % (
13.228 + heading.level,
13.229 + escape_attr(self.linker.make_id(heading.identifier))))
13.230 + self.container(heading)
13.231 + self.out("</h%d>" % heading.level)
13.232 +
13.233 + def larger(self, larger):
13.234 + self.out("<big>")
13.235 + self.container(larger)
13.236 + self.out("</big>")
13.237 +
13.238 + def list(self, list):
13.239 + tag, style_type = self._get_list_tag(list.marker)
13.240 + style = style_type and \
13.241 + ' style="list-style-type: %s"' % escape_attr(style_type) or ""
13.242 + start = style_type and \
13.243 + list.num is not None and ' start="%s"' % escape_attr(list.num) or ""
13.244 + self.out("<%s%s%s>" % (tag, style, start))
13.245 + self.container(list)
13.246 + self.out("</%s>" % tag)
13.247 +
13.248 + def listitem(self, listitem):
13.249 self.out("<li>")
13.250 -
13.251 - def end_listitem(self, indent, marker, space, num):
13.252 + self.container(listitem)
13.253 self.out("</li>")
13.254
13.255 - def start_macro(self, name, args, nodes, inline):
13.256 + def macro(self, macro):
13.257
13.258 # Special case of a deliberately unexpanded macro.
13.259
13.260 - if nodes is None:
13.261 + if macro.nodes is None:
13.262 return
13.263
13.264 - tag = inline and "span" or "div"
13.265 - self.out("<%s class='macro %s'>" % (tag, escape_text(name)))
13.266 + tag = macro.inline and "span" or "div"
13.267 + self.out("<%s class='macro %s'>" % (tag, escape_text(macro.name)))
13.268
13.269 # Fallback case for when macros are not replaced.
13.270
13.271 - if not nodes:
13.272 + if not macro.nodes:
13.273 self.out(escape_text("<<"))
13.274 - self.out("<span class='name'>%s</span>" % escape_text(name))
13.275 - if args:
13.276 + self.out("<span class='name'>%s</span>" % escape_text(macro.name))
13.277 + if macro.args:
13.278 self.out("(")
13.279 first = True
13.280 - for arg in args:
13.281 + for arg in macro.args:
13.282 if not first:
13.283 self.out(",")
13.284 self.out("<span class='arg'>%s</span>" % escape_text(arg))
13.285 first = False
13.286 - if args:
13.287 + if macro.args:
13.288 self.out(")")
13.289 self.out(escape_text(">>"))
13.290
13.291 - def end_macro(self, inline):
13.292 - tag = inline and "span" or "div"
13.293 + # Produce the expanded macro content.
13.294 +
13.295 + else:
13.296 + self.container(macro)
13.297 +
13.298 + tag = macro.inline and "span" or "div"
13.299 self.out("</%s>" % tag)
13.300
13.301 - def start_monospace(self):
13.302 + def monospace(self, monospace):
13.303 self.out("<tt>")
13.304 -
13.305 - def end_monospace(self):
13.306 + self.container(monospace)
13.307 self.out("</tt>")
13.308
13.309 - def start_smaller(self):
13.310 + def smaller(self, smaller):
13.311 self.out("<small>")
13.312 -
13.313 - def end_smaller(self):
13.314 + self.container(smaller)
13.315 self.out("</small>")
13.316
13.317 - def start_strikethrough(self):
13.318 + def strikethrough(self, strikethrough):
13.319 self.out("<del>")
13.320 -
13.321 - def end_strikethrough(self):
13.322 + self.container(strikethrough)
13.323 self.out("</del>")
13.324
13.325 - def start_strong(self):
13.326 - self.out("<strong>")
13.327 -
13.328 - def end_strong(self):
13.329 - self.out("</strong>")
13.330 -
13.331 - def start_subscript(self):
13.332 + def subscript(self, subscript):
13.333 self.out("<sub>")
13.334 -
13.335 - def end_subscript(self):
13.336 + self.container(subscript)
13.337 self.out("</sub>")
13.338
13.339 - def start_superscript(self):
13.340 + def superscript(self, superscript):
13.341 self.out("<sup>")
13.342 -
13.343 - def end_superscript(self):
13.344 + self.container(superscript)
13.345 self.out("</sup>")
13.346
13.347 - def start_table(self):
13.348 + def table(self, table):
13.349 self.out("<table>")
13.350 -
13.351 - def end_table(self):
13.352 + self.container(table)
13.353 self.out("</table>")
13.354
13.355 - def start_table_attrs(self):
13.356 - pass
13.357 -
13.358 - def end_table_attrs(self):
13.359 - pass
13.360 -
13.361 - def start_table_cell(self, attrs, leading, padding):
13.362 + def table_cell(self, table_cell):
13.363 self.out("<td")
13.364
13.365 # Handle the attributes separately from their container.
13.366
13.367 - if attrs and not attrs.empty():
13.368 - for attr in attrs.nodes:
13.369 - attr.to_string(self)
13.370 + if table_cell.attrs and not table_cell.attrs.empty():
13.371 + for attr in table_cell.attrs.nodes:
13.372 + attr.visit(self)
13.373
13.374 self.out(">")
13.375 -
13.376 - def end_table_cell(self):
13.377 + self.container(table_cell)
13.378 self.out("</td>")
13.379
13.380 - def start_table_row(self, leading, padding):
13.381 + def table_row(self, table_row):
13.382 self.out("<tr>")
13.383 -
13.384 - def end_table_row(self, trailing):
13.385 + self.container(table_row)
13.386 self.out("</tr>")
13.387
13.388 - def start_underline(self):
13.389 + def underline(self, underline):
13.390 self.out("<span style='text-decoration: underline'>")
13.391 -
13.392 - def end_underline(self):
13.393 + self.container(underline)
13.394 self.out("</span>")
13.395
13.396 - def anchor(self, target):
13.397 - self.out("<a name='%s' />" % escape_attr(self.linker.make_id(target)))
13.398 + # Inline node methods.
13.399
13.400 - def break_(self):
13.401 + def anchor(self, anchor):
13.402 + self.out("<a name='%s' />" % escape_attr(self.linker.make_id(anchor.target)))
13.403 +
13.404 + def break_(self, break_):
13.405 pass
13.406
13.407 - def comment(self, comment, extra):
13.408 + def comment(self, comment):
13.409 pass
13.410
13.411 - def directive(self, directive, extra):
13.412 + def directive(self, directive):
13.413
13.414 # Obtain a blank value if the value is missing.
13.415
13.416 - name, text = (directive.split(None, 1) + [""])[:2]
13.417 + name, text = (directive.directive.split(None, 1) + [""])[:2]
13.418
13.419 # Produce a readable redirect.
13.420
13.421 @@ -281,80 +304,51 @@
13.422
13.423 self.end_block()
13.424
13.425 - def linebreak(self):
13.426 + def linebreak(self, linebreak):
13.427 self.out("<br />")
13.428
13.429 - def _link(self, target, nodes, tag, attr):
13.430 - link = self.linker and self.linker.translate(target) or None
13.431 -
13.432 - self.out('<%s %s="%s"' % (tag, attr, escape_attr(link.get_target())))
13.433 -
13.434 - # Provide link parameters as attributes.
13.435 -
13.436 - if nodes:
13.437 - for node in nodes:
13.438 - if isinstance(node, LinkParameter):
13.439 - self.out(" ")
13.440 - node.to_string(self)
13.441 -
13.442 - # Close the tag if an image.
13.443 -
13.444 - if tag == "img":
13.445 - self.out(" />")
13.446 -
13.447 - # Provide the link label if specified. Otherwise, use a generated
13.448 - # default for the label.
13.449 + def link(self, link):
13.450 + self._link(link.target, link.nodes, "a", "href")
13.451
13.452 - else:
13.453 - self.out(">")
13.454 -
13.455 - for node in nodes or []:
13.456 - if isinstance(node, LinkLabel):
13.457 - node.to_string(self)
13.458 - break
13.459 - else:
13.460 - self.out(escape_text(link.get_label()))
13.461 + def link_label(self, link_label):
13.462 + self.container(link_label)
13.463
13.464 - self.out("</%s>" % tag)
13.465 -
13.466 - def link(self, target, nodes):
13.467 - self._link(target, nodes, "a", "href")
13.468 + def link_parameter(self, link_parameter):
13.469 + s = link_parameter.text_content()
13.470 + key_value = s.split("=", 1)
13.471
13.472 - def link_label(self, nodes):
13.473 - for node in nodes:
13.474 - node.to_string(self)
13.475 -
13.476 - def link_parameter(self, key_value):
13.477 if len(key_value) == 1:
13.478 self.out(key_value[0])
13.479 else:
13.480 key, value = key_value
13.481 self.out("%s='%s'" % (key, escape_attr(value)))
13.482
13.483 - def nbsp(self):
13.484 + def nbsp(self, nbsp):
13.485 self.out(" ")
13.486
13.487 - def rule(self, height):
13.488 - self.out("<hr style='height: %dpt' />" % min(height, 10))
13.489 + def rule(self, rule):
13.490 + self.out("<hr style='height: %dpt' />" % min(rule.height, 10))
13.491
13.492 - def table_attrs(self, nodes):
13.493 + def table_attrs(self, table_attrs):
13.494
13.495 # Skip the attributes in their original form.
13.496
13.497 pass
13.498
13.499 - def table_attr(self, name, value, concise, quote):
13.500 - self.out(" %s%s" % (escape_text(name), value is not None and
13.501 - "='%s'" % escape_attr(value) or ""))
13.502 + def table_attr(self, table_attr):
13.503 + self.out(" %s%s" % (
13.504 + escape_text(table_attr.name),
13.505 + table_attr.value is not None and
13.506 + "='%s'" % escape_attr(table_attr.value) or ""))
13.507
13.508 - def text(self, s):
13.509 - self.out(escape_text(s))
13.510 + def text(self, text):
13.511 + self.out(escape_text(text.s))
13.512
13.513 - def transclusion(self, target, nodes):
13.514 - self._link(target, nodes, "img", "src")
13.515 + def transclusion(self, transclusion):
13.516 + self._link(transclusion.target, transclusion.nodes, "img", "src")
13.517
13.518 - def verbatim(self, s):
13.519 - self.text(s)
13.520 + def verbatim(self, verbatim):
13.521 + self.out(escape_text(verbatim.text))
13.522
13.523 serialiser = HTMLSerialiser
13.524
14.1 --- a/moinformat/serialisers/html/table.py Tue Jun 20 18:58:47 2023 +0200
14.2 +++ b/moinformat/serialisers/html/table.py Fri Aug 18 00:18:42 2023 +0200
14.3 @@ -28,7 +28,7 @@
14.4
14.5 input_formats = ["table"]
14.6
14.7 - def continuation(self, text):
14.8 + def continuation(self, continuation):
14.9 self.out(" ")
14.10
14.11 serialiser = HTMLTableSerialiser
15.1 --- a/moinformat/serialisers/moin/graphviz.py Tue Jun 20 18:58:47 2023 +0200
15.2 +++ b/moinformat/serialisers/moin/graphviz.py Fri Aug 18 00:18:42 2023 +0200
15.3 @@ -3,7 +3,7 @@
15.4 """
15.5 Moin Graphviz region serialiser.
15.6
15.7 -Copyright (C) 2018, 2021 Paul Boddie <paul@boddie.org.uk>
15.8 +Copyright (C) 2018, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
15.9
15.10 This program is free software; you can redistribute it and/or modify it under
15.11 the terms of the GNU General Public License as published by the Free Software
15.12 @@ -28,20 +28,19 @@
15.13 input_formats = ["graphviz", "dot"]
15.14 formats = ["moin", "wiki"]
15.15
15.16 - def start_block(self):
15.17 - pass
15.18 -
15.19 - def end_block(self):
15.20 - pass
15.21 + def block(self, block):
15.22 + self.container(block)
15.23
15.24 - def directive(self, key, value, directive):
15.25 - if directive:
15.26 - self.out("#%s\n" % directive)
15.27 + def directive(self, directive):
15.28 + if directive.directive:
15.29 + self.out("#%s\n" % directive.directive)
15.30 else:
15.31 - self.out("//%s%s\n" % (value and "%s=" % key or key, value or ""))
15.32 + self.out("//%s%s\n" % (
15.33 + directive.value and "%s=" % directive.key or directive.key,
15.34 + directive.value or ""))
15.35
15.36 def text(self, text):
15.37 - self.out(text)
15.38 + self.out(text.s)
15.39
15.40 serialiser = MoinGraphvizSerialiser
15.41
16.1 --- a/moinformat/serialisers/moin/moin.py Tue Jun 20 18:58:47 2023 +0200
16.2 +++ b/moinformat/serialisers/moin/moin.py Fri Aug 18 00:18:42 2023 +0200
16.3 @@ -3,7 +3,7 @@
16.4 """
16.5 Moin wiki text serialiser.
16.6
16.7 -Copyright (C) 2017, 2018, 2021, 2022 Paul Boddie <paul@boddie.org.uk>
16.8 +Copyright (C) 2017, 2018, 2021, 2022, 2023 Paul Boddie <paul@boddie.org.uk>
16.9
16.10 This program is free software; you can redistribute it and/or modify it under
16.11 the terms of the GNU General Public License as published by the Free Software
16.12 @@ -28,224 +28,216 @@
16.13 input_formats = ["moin", "wiki"]
16.14 formats = ["moin", "wiki"]
16.15
16.16 - def start_region(self, level, indent, type, args, extra):
16.17 + # Node handler methods.
16.18 +
16.19 + def region(self, region):
16.20 out = self.out
16.21 - if level:
16.22 - out(" " * indent + "{" * level)
16.23
16.24 - # Produce a header for regions within a top-level region.
16.25 + if region.level:
16.26 + out(" " * region.indent + "{" * region.level)
16.27
16.28 - if type and type != "inline" and level:
16.29 + # Produce a header for regions within a top-level region.
16.30
16.31 - # Obtain individual arguments, excluding the region type.
16.32 + if region.type and region.type != "inline":
16.33 +
16.34 + # Obtain individual arguments, excluding the region type.
16.35
16.36 - args = args.split(" ")[1:]
16.37 - args_str = args and (" %s" % " ".join(args)) or ""
16.38 + args = region.args and region.args.split(" ")[1:] or None
16.39 + args_str = args and (" %s" % " ".join(args)) or ""
16.40
16.41 - out("#!%s%s\n" % (type, args_str))
16.42 + out("#!%s%s\n" % (region.type, args_str))
16.43 +
16.44 + # Serialise the region content.
16.45
16.46 - def end_region(self, level, indent, type, args, extra):
16.47 - out = self.out
16.48 - if level:
16.49 - out("%s%s" % ("}" * level, extra or ""))
16.50 + self.visit_region(region)
16.51
16.52 - def start_block(self):
16.53 - pass
16.54 + if region.level:
16.55 + out("%s%s" % ("}" * region.level, region.extra or ""))
16.56
16.57 - def end_block(self):
16.58 - pass
16.59 + # Block node methods.
16.60
16.61 - def start_defitem(self, pad, extra):
16.62 - self.out((extra and extra + "::" or "") + pad)
16.63 -
16.64 - def end_defitem(self, pad, extra):
16.65 - pass
16.66 + def block(self, block):
16.67 + self.container(block)
16.68
16.69 - def start_defterm(self, pad, extra):
16.70 - self.out(pad)
16.71 + def defitem(self, defitem):
16.72 + self.out((defitem.extra and defitem.extra + "::" or "") + defitem.pad)
16.73 + self.container(defitem)
16.74
16.75 - def end_defterm(self, pad, extra):
16.76 - self.out("::" + extra)
16.77 + def defterm(self, defterm):
16.78 + self.out(defterm.pad)
16.79 + self.container(defterm)
16.80 + self.out("::" + defterm.extra)
16.81
16.82 - def start_emphasis(self):
16.83 - self.out("''")
16.84 -
16.85 - def end_emphasis(self):
16.86 - self.out("''")
16.87 + def fontstyle(self, fontstyle):
16.88 + if fontstyle.emphasis:
16.89 + self.out("''")
16.90 + elif fontstyle.strong:
16.91 + self.out("'''")
16.92 + self.container(fontstyle)
16.93 + if fontstyle.emphasis:
16.94 + self.out("''")
16.95 + elif fontstyle.strong:
16.96 + self.out("'''")
16.97
16.98 - def start_heading(self, level, extra, pad, identifier):
16.99 - self.out(extra + "=" * level + pad)
16.100 + def heading(self, heading):
16.101 + self.out(heading.start_extra + "=" * heading.level + heading.start_pad)
16.102 + self.container(heading)
16.103 + self.out(heading.end_pad + "=" * heading.level + heading.end_extra)
16.104
16.105 - def end_heading(self, level, pad, extra):
16.106 - self.out(pad + "=" * level + extra)
16.107 -
16.108 - def start_larger(self):
16.109 + def larger(self, larger):
16.110 self.out("~+")
16.111 -
16.112 - def end_larger(self):
16.113 + self.container(larger)
16.114 self.out("+~")
16.115
16.116 - def start_list(self, indent, marker, num):
16.117 - pass
16.118 -
16.119 - def end_list(self, indent, marker, num):
16.120 - pass
16.121 + def list(self, list):
16.122 + self.container(list)
16.123
16.124 - def start_listitem(self, indent, marker, space, num):
16.125 - self.out("%s%s%s%s" % (indent * " ", marker, num and "#%s" % num or "", space))
16.126 + def listitem(self, listitem):
16.127 + self.out("%s%s%s%s" % (
16.128 + listitem.indent * " ",
16.129 + listitem.marker,
16.130 + listitem.num and "#%s" % listitem.num or "",
16.131 + listitem.space))
16.132 + self.container(listitem)
16.133
16.134 - def end_listitem(self, indent, marker, space, num):
16.135 - pass
16.136 -
16.137 - def start_macro(self, name, args, nodes, inline):
16.138 + def macro(self, macro):
16.139
16.140 # Special case of a deliberately unexpanded macro.
16.141
16.142 - if nodes is None:
16.143 + if macro.nodes is None:
16.144 return
16.145
16.146 # Fallback case for when macros are not replaced.
16.147
16.148 - if not nodes:
16.149 - self.out("<<%s%s>>" % (name, args and "(%s)" % ",".join(args) or ""))
16.150 + if not macro.nodes:
16.151 + self.out("<<%s%s>>" % (macro.name, macro.args and "(%s)" % ",".join(macro.args) or ""))
16.152
16.153 - def end_macro(self, inline):
16.154 - pass
16.155 -
16.156 - def start_monospace(self):
16.157 + def monospace(self, monospace):
16.158 + self.out("`")
16.159 + self.container(monospace)
16.160 self.out("`")
16.161
16.162 - def end_monospace(self):
16.163 - self.out("`")
16.164 -
16.165 - def start_smaller(self):
16.166 + def smaller(self, smaller):
16.167 self.out("~-")
16.168 -
16.169 - def end_smaller(self):
16.170 + self.container(smaller)
16.171 self.out("-~")
16.172
16.173 - def start_strong(self):
16.174 - self.out("'''")
16.175 -
16.176 - def end_strong(self):
16.177 - self.out("'''")
16.178 -
16.179 - def start_strikethrough(self):
16.180 + def strikethrough(self, strikethrough):
16.181 self.out("--(")
16.182 -
16.183 - def end_strikethrough(self):
16.184 + self.container(strikethrough)
16.185 self.out(")--")
16.186
16.187 - def start_subscript(self):
16.188 + def subscript(self, subscript):
16.189 + self.out(",,")
16.190 + self.container(subscript)
16.191 self.out(",,")
16.192
16.193 - def end_subscript(self):
16.194 - self.out(",,")
16.195 -
16.196 - def start_superscript(self):
16.197 + def superscript(self, superscript):
16.198 self.out("^")
16.199 -
16.200 - def end_superscript(self):
16.201 + self.container(superscript)
16.202 self.out("^")
16.203
16.204 - def start_table(self):
16.205 - pass
16.206 -
16.207 - def end_table(self):
16.208 - pass
16.209 + def table(self, table):
16.210 + self.container(table)
16.211
16.212 - def start_table_attrs(self):
16.213 - self.out("<")
16.214 -
16.215 - def end_table_attrs(self):
16.216 - self.out(">")
16.217 + def table_cell(self, table_cell):
16.218 + self.out("||")
16.219 + self.container(table_cell)
16.220
16.221 - def start_table_cell(self, attrs, leading, padding):
16.222 + def table_row(self, table_row):
16.223 + self.container(table_row)
16.224 self.out("||")
16.225 -
16.226 - def end_table_cell(self):
16.227 - pass
16.228 + self.out(table_row.trailing)
16.229
16.230 - def start_table_row(self, leading, padding):
16.231 - pass
16.232 -
16.233 - def end_table_row(self, trailing):
16.234 - self.out("||")
16.235 - self.out(trailing)
16.236 -
16.237 - def start_underline(self):
16.238 + def underline(self, underline):
16.239 + self.out("__")
16.240 + self.container(underline)
16.241 self.out("__")
16.242
16.243 - def end_underline(self):
16.244 - self.out("__")
16.245 + # Inline node methods.
16.246
16.247 - def anchor(self, target):
16.248 - self.out("((%s))" % target)
16.249 + def anchor(self, anchor):
16.250 + self.out("((%s))" % anchor.target)
16.251
16.252 - def break_(self):
16.253 + def break_(self, break_):
16.254 self.out("\n")
16.255
16.256 - def comment(self, comment, extra):
16.257 - self.out("##%s%s" % (comment, extra))
16.258 + def comment(self, comment):
16.259 + self.out("##%s%s" % (comment.comment, comment.extra))
16.260
16.261 - def directive(self, directive, extra):
16.262 - self.out("#%s%s" % (directive, extra))
16.263 + def directive(self, directive):
16.264 + self.out("#%s%s" % (directive.directive, directive.extra))
16.265
16.266 - def linebreak(self):
16.267 + def linebreak(self, linebreak):
16.268 self.out(r"\\")
16.269
16.270 - def link(self, target, nodes):
16.271 - self.out("[[%s" % target)
16.272 - for node in nodes:
16.273 + def link(self, link):
16.274 + self.out("[[%s" % link.target)
16.275 + for node in link.nodes:
16.276 self.out("|")
16.277 - node.to_string(self)
16.278 + node.visit(self)
16.279 self.out("]]")
16.280
16.281 - def link_label(self, nodes):
16.282 - for node in nodes:
16.283 - node.to_string(self)
16.284 + def link_label(self, link_label):
16.285 + self.container(link_label)
16.286
16.287 - def link_parameter(self, key_value):
16.288 + def link_parameter(self, link_parameter):
16.289 + s = link_parameter.text_content()
16.290 + key_value = s.split("=", 1)
16.291 +
16.292 if len(key_value) == 1:
16.293 self.out(key_value[0])
16.294 else:
16.295 self.out("=".join(key_value))
16.296
16.297 - def nbsp(self):
16.298 + def nbsp(self, nbsp):
16.299 self.out(r"\_")
16.300
16.301 - def rule(self, height):
16.302 - self.out("-" * (height + 4))
16.303 + def rule(self, rule):
16.304 + self.out("-" * (rule.height + 4))
16.305
16.306 - def table_attrs(self, nodes):
16.307 - for node in nodes:
16.308 - node.to_string(self)
16.309 + def table_attrs(self, table_attrs):
16.310 + self.out("<")
16.311 + self.container(table_attrs)
16.312 + if not table_attrs.incomplete:
16.313 + self.out(">")
16.314
16.315 - def table_attr(self, name, value, concise, quote):
16.316 - if concise:
16.317 - if name == "bgcolor": self.out(value)
16.318 - elif name == "colspan": self.out("-%s" % value)
16.319 - elif name == "align" : self.out(value == "left" and "(" or value == "right" and ")" or ":")
16.320 - elif name == "rowspan": self.out("|%s" % value)
16.321 - elif name == "valign" : self.out(value == "top" and "^" or "v")
16.322 - elif name == "width" : self.out(value)
16.323 + def table_attr(self, table_attr):
16.324 + if table_attr.concise:
16.325 + if table_attr.name == "bgcolor":
16.326 + self.out(table_attr.value)
16.327 + elif table_attr.name == "colspan":
16.328 + self.out("-%s" % table_attr.value)
16.329 + elif table_attr.name == "align":
16.330 + self.out(table_attr.value == "left" and "(" or table_attr.value == "right" and ")" or ":")
16.331 + elif table_attr.name == "rowspan":
16.332 + self.out("|%s" % table_attr.value)
16.333 + elif table_attr.name == "valign":
16.334 + self.out(table_attr.value == "top" and "^" or "v")
16.335 + elif table_attr.name == "width":
16.336 + self.out(table_attr.value)
16.337 else:
16.338 - self.out("%s%s" % (escape_text(name), value is not None and
16.339 - "=%s%s%s" % (quote or '"', escape_attr(value), quote or '"') or ""))
16.340 -
16.341 - def text(self, s):
16.342 - self.out(s)
16.343 + self.out("%s%s" % (
16.344 + escape_text(table_attr.name),
16.345 + table_attr.value is not None and "=%s%s%s" % (
16.346 + table_attr.quote or '"',
16.347 + escape_attr(table_attr.value),
16.348 + table_attr.quote or '"')
16.349 + or ""))
16.350
16.351 - def transclusion(self, target, nodes):
16.352 - self.out("{{%s" % target)
16.353 - for node in nodes:
16.354 + def text(self, text):
16.355 + self.out(text.s)
16.356 +
16.357 + def transclusion(self, transclusion):
16.358 + self.out("{{%s" % transclusion.target)
16.359 + for node in transclusion.nodes:
16.360 self.out("|")
16.361 - node.to_string(self)
16.362 + node.visit(self)
16.363 self.out("}}")
16.364
16.365 - def verbatim(self, text):
16.366 + def verbatim(self, verbatim):
16.367 self.out("<<<")
16.368 - self.out(text)
16.369 + self.out(verbatim.text)
16.370 self.out(">>>")
16.371
16.372 serialiser = MoinSerialiser
17.1 --- a/moinformat/serialisers/moin/table.py Tue Jun 20 18:58:47 2023 +0200
17.2 +++ b/moinformat/serialisers/moin/table.py Fri Aug 18 00:18:42 2023 +0200
17.3 @@ -3,7 +3,7 @@
17.4 """
17.5 Moin wiki table serialiser.
17.6
17.7 -Copyright (C) 2017, 2018, 2021 Paul Boddie <paul@boddie.org.uk>
17.8 +Copyright (C) 2017, 2018, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
17.9
17.10 This program is free software; you can redistribute it and/or modify it under
17.11 the terms of the GNU General Public License as published by the Free Software
17.12 @@ -31,31 +31,35 @@
17.13 self.first_cell = False
17.14 self.first_row = False
17.15
17.16 - def start_table(self):
17.17 + def table(self, table):
17.18 self.first_row = True
17.19 + self.container(table)
17.20
17.21 - def start_table_cell(self, attrs, leading, padding):
17.22 + def table_cell(self, table_cell):
17.23 if not self.first_cell:
17.24 - self.out(leading)
17.25 + self.out(table_cell.leading)
17.26 self.out("||")
17.27 else:
17.28 self.first_cell = False
17.29 - self.out(padding)
17.30
17.31 - def start_table_row(self, leading, padding):
17.32 + self.out(table_cell.padding)
17.33 + self.container(table_cell)
17.34 +
17.35 + def table_row(self, table_row):
17.36 self.first_cell = True
17.37 +
17.38 if not self.first_row:
17.39 - self.out(leading)
17.40 + self.out(table_row.leading)
17.41 self.out("==")
17.42 - self.out(padding)
17.43 + self.out(table_row.padding)
17.44 else:
17.45 self.first_row = False
17.46
17.47 - def end_table_row(self, trailing):
17.48 - self.out(trailing)
17.49 + self.container(table_row)
17.50 + self.out(table_row.trailing)
17.51
17.52 - def continuation(self, text):
17.53 - self.out(text)
17.54 + def continuation(self, continuation):
17.55 + self.out(continuation.text)
17.56
17.57 serialiser = MoinTableSerialiser
17.58
18.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
18.2 +++ b/moinformat/serialisers/pretty/__init__.py Fri Aug 18 00:18:42 2023 +0200
18.3 @@ -0,0 +1,22 @@
18.4 +#!/usr/bin/env python
18.5 +
18.6 +"""
18.7 +A package of modules containing prettyprinting serialisers.
18.8 +
18.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
18.10 +
18.11 +This program is free software; you can redistribute it and/or modify it under
18.12 +the terms of the GNU General Public License as published by the Free Software
18.13 +Foundation; either version 3 of the License, or (at your option) any later
18.14 +version.
18.15 +
18.16 +This program is distributed in the hope that it will be useful, but WITHOUT
18.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18.19 +details.
18.20 +
18.21 +You should have received a copy of the GNU General Public License along with
18.22 +this program. If not, see <http://www.gnu.org/licenses/>.
18.23 +"""
18.24 +
18.25 +# vim: tabstop=4 expandtab shiftwidth=4
19.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
19.2 +++ b/moinformat/serialisers/pretty/common.py Fri Aug 18 00:18:42 2023 +0200
19.3 @@ -0,0 +1,38 @@
19.4 +#!/usr/bin/env python
19.5 +
19.6 +"""
19.7 +Generic prettyprinted text serialiser.
19.8 +
19.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
19.10 +
19.11 +This program is free software; you can redistribute it and/or modify it under
19.12 +the terms of the GNU General Public License as published by the Free Software
19.13 +Foundation; either version 3 of the License, or (at your option) any later
19.14 +version.
19.15 +
19.16 +This program is distributed in the hope that it will be useful, but WITHOUT
19.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19.19 +details.
19.20 +
19.21 +You should have received a copy of the GNU General Public License along with
19.22 +this program. If not, see <http://www.gnu.org/licenses/>.
19.23 +"""
19.24 +
19.25 +from moinformat.serialisers.common import Serialiser as CommonSerialiser
19.26 +
19.27 +class Serialiser(CommonSerialiser):
19.28 +
19.29 + "Serialisation of nodes for inspection."
19.30 +
19.31 + def container(self, container):
19.32 +
19.33 + "Visit all nodes in 'container'."
19.34 +
19.35 + if container.nodes:
19.36 + self.output.indent += " "
19.37 + for node in container.nodes:
19.38 + self.visit(node)
19.39 + self.output.indent = self.output.indent[:-2]
19.40 +
19.41 +# vim: tabstop=4 expandtab shiftwidth=4
20.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
20.2 +++ b/moinformat/serialisers/pretty/graphviz.py Fri Aug 18 00:18:42 2023 +0200
20.3 @@ -0,0 +1,47 @@
20.4 +#!/usr/bin/env python
20.5 +
20.6 +"""
20.7 +Prettyprinted text serialiser for Graphviz nodes.
20.8 +
20.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
20.10 +
20.11 +This program is free software; you can redistribute it and/or modify it under
20.12 +the terms of the GNU General Public License as published by the Free Software
20.13 +Foundation; either version 3 of the License, or (at your option) any later
20.14 +version.
20.15 +
20.16 +This program is distributed in the hope that it will be useful, but WITHOUT
20.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
20.19 +details.
20.20 +
20.21 +You should have received a copy of the GNU General Public License along with
20.22 +this program. If not, see <http://www.gnu.org/licenses/>.
20.23 +"""
20.24 +
20.25 +from moinformat.serialisers.pretty.common import Serialiser
20.26 +
20.27 +class GraphvizSerialiser(Serialiser):
20.28 +
20.29 + "Serialisation of Graphviz nodes for inspection."
20.30 +
20.31 + input_formats = ["dot", "graphviz"]
20.32 + formats = ["pretty"]
20.33 +
20.34 + # Node handler methods.
20.35 +
20.36 + def block(self, block):
20.37 + self.out("%sBlock\n" % self.output.indent)
20.38 + self.container(block)
20.39 +
20.40 + def directive(self, directive):
20.41 + self.out("%sDirective: key=%r value=%r directive=%r\n" % (
20.42 + self.output.indent, directive.key, directive.value,
20.43 + directive.directive))
20.44 +
20.45 + def text(self, text):
20.46 + self.out("%sText: %r\n" % (self.output.indent, text.s))
20.47 +
20.48 +serialiser = GraphvizSerialiser
20.49 +
20.50 +# vim: tabstop=4 expandtab shiftwidth=4
21.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
21.2 +++ b/moinformat/serialisers/pretty/html.py Fri Aug 18 00:18:42 2023 +0200
21.3 @@ -0,0 +1,63 @@
21.4 +#!/usr/bin/env python
21.5 +
21.6 +"""
21.7 +Prettyprinted HTML document node prettyprinter.
21.8 +
21.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
21.10 +
21.11 +This program is free software; you can redistribute it and/or modify it under
21.12 +the terms of the GNU General Public License as published by the Free Software
21.13 +Foundation; either version 3 of the License, or (at your option) any later
21.14 +version.
21.15 +
21.16 +This program is distributed in the hope that it will be useful, but WITHOUT
21.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
21.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
21.19 +details.
21.20 +
21.21 +You should have received a copy of the GNU General Public License along with
21.22 +this program. If not, see <http://www.gnu.org/licenses/>.
21.23 +"""
21.24 +
21.25 +from moinformat.serialisers.pretty.common import Serialiser
21.26 +
21.27 +class HTMLSerialiser(Serialiser):
21.28 +
21.29 + "Serialisation of prettyprinted document nodes for inspection."
21.30 +
21.31 + input_formats = ["html"]
21.32 + formats = ["pretty"]
21.33 +
21.34 + def attribute(self, attribute):
21.35 + self.out("%sAttribute: %s" % (self.output.indent, attribute.name))
21.36 + if attribute.value is not None:
21.37 + self.out("=")
21.38 + self.visit(attribute.value)
21.39 + self.out("\n")
21.40 +
21.41 + def attribute_value(self, attribute_value):
21.42 + self.out("%s%s%s" % (attribute_value.quote, attribute_value.value, attribute_value.quote))
21.43 +
21.44 + def element(self, element):
21.45 + self.out("%sElement: name=%r\n" % (self.output.indent, element.name))
21.46 + self.output.indent += " "
21.47 + for attribute in element.attributes:
21.48 + self.visit(attribute)
21.49 + self.output.indent = self.output.indent[:-2]
21.50 + self.container(element)
21.51 +
21.52 + def node(self, node):
21.53 + self.out("%s%s: %r\n" % (self.output.indent, node.__class__.__name__, node.value))
21.54 +
21.55 + comment = node
21.56 + directive = node
21.57 + inclusion = node
21.58 + text = node
21.59 +
21.60 + def fragment(self, fragment):
21.61 + self.out("%s%s\n" % (self.output.indent, fragment.__class__.__name__))
21.62 + self.container(fragment)
21.63 +
21.64 +serialiser = HTMLSerialiser
21.65 +
21.66 +# vim: tabstop=4 expandtab shiftwidth=4
22.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
22.2 +++ b/moinformat/serialisers/pretty/moin.py Fri Aug 18 00:18:42 2023 +0200
22.3 @@ -0,0 +1,168 @@
22.4 +#!/usr/bin/env python
22.5 +
22.6 +"""
22.7 +Prettyprinted text serialiser for Moin nodes.
22.8 +
22.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
22.10 +
22.11 +This program is free software; you can redistribute it and/or modify it under
22.12 +the terms of the GNU General Public License as published by the Free Software
22.13 +Foundation; either version 3 of the License, or (at your option) any later
22.14 +version.
22.15 +
22.16 +This program is distributed in the hope that it will be useful, but WITHOUT
22.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
22.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
22.19 +details.
22.20 +
22.21 +You should have received a copy of the GNU General Public License along with
22.22 +this program. If not, see <http://www.gnu.org/licenses/>.
22.23 +"""
22.24 +
22.25 +from moinformat.serialisers.pretty.common import Serialiser
22.26 +
22.27 +class MoinSerialiser(Serialiser):
22.28 +
22.29 + "Serialisation of Moin nodes for inspection."
22.30 +
22.31 + input_formats = ["moin", "wiki"]
22.32 + formats = ["pretty"]
22.33 +
22.34 + # Node handler methods.
22.35 +
22.36 + def region(self, region):
22.37 + self.out("%sRegion: level=%d indent=%d type=%s args=%r extra=%r\n" % (
22.38 + self.output.indent, region.level, region.indent, region.type, region.args,
22.39 + region.extra))
22.40 + self.visit_region(region)
22.41 +
22.42 + # Block node methods.
22.43 +
22.44 + def block(self, block):
22.45 + self.out("%sBlock\n" % self.output.indent)
22.46 + self.container(block)
22.47 +
22.48 + def defitem(self, defitem):
22.49 + self.out("%sDefItem: pad=%r extra=%r\n" % (self.output.indent,
22.50 + defitem.pad, defitem.extra))
22.51 + self.container(defitem)
22.52 +
22.53 + def defterm(self, defterm):
22.54 + self.out("%sDefTerm: pad=%r extra=%r\n" % (self.output.indent,
22.55 + defterm.pad, defterm.extra))
22.56 + self.container(defterm)
22.57 +
22.58 + def fontstyle(self, fontstyle):
22.59 + self.out("%sFontStyle: emphasis=%r strong=%r\n" % (self.output.indent,
22.60 + fontstyle.emphasis, fontstyle.strong))
22.61 + self.container(fontstyle)
22.62 +
22.63 + def heading(self, heading):
22.64 + self.out("%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r"
22.65 + " end_extra=%r identifier=%r\n" % (
22.66 + self.output.indent, heading.level, heading.start_extra,
22.67 + heading.start_pad, heading.end_pad, heading.end_extra,
22.68 + heading.identifier))
22.69 + self.container(heading)
22.70 +
22.71 + def link_label(self, link_label):
22.72 + self.out("%sLinkLabel\n" % self.output.indent)
22.73 + self.container(link_label)
22.74 +
22.75 + def link_parameter(self, link_parameter):
22.76 + self.out("%sLinkParameter\n" % self.output.indent)
22.77 + self.container(link_parameter)
22.78 +
22.79 + def list(self, list):
22.80 + self.out("%sList: indent=%r marker=%r num=%r\n" % (
22.81 + self.output.indent, list.indent, list.marker, list.num))
22.82 + self.container(list)
22.83 +
22.84 + def listitem(self, listitem):
22.85 + self.out("%sListItem: indent=%d marker=%r space=%r num=%r\n" % (
22.86 + self.output.indent, listitem.indent, listitem.marker, listitem.space, listitem.num))
22.87 + self.container(listitem)
22.88 +
22.89 + def table(self, table):
22.90 + self.out("%sTable:\n" % self.output.indent)
22.91 + self.container(table)
22.92 +
22.93 + def table_attrs(self, table_attrs):
22.94 + self.out("%sTableAttrs:\n" % self.output.indent)
22.95 + self.container(table_attrs)
22.96 +
22.97 + def table_cell(self, table_cell):
22.98 + self.out("%sTableCell: leading=%r padding=%r\n" % (
22.99 + self.output.indent, table_cell.leading, table_cell.padding))
22.100 + self.container(table_cell)
22.101 +
22.102 + def table_row(self, table_row):
22.103 + self.out("%sTableRow: trailing=%r leading=%r padding=%r\n" % (
22.104 + self.output.indent, table_row.trailing, table_row.leading,
22.105 + table_row.padding))
22.106 + self.container(table_row)
22.107 +
22.108 + def inline(self, inline):
22.109 + self.out("%s%s\n" % (self.output.indent, inline.__class__.__name__))
22.110 +
22.111 + # Inline nodes with children.
22.112 +
22.113 + def inline_container(self, inline):
22.114 + self.inline(inline)
22.115 + self.container(inline)
22.116 +
22.117 + larger = inline_container
22.118 +
22.119 + def link(self, link):
22.120 + self.out("%sLink: target=%r\n" % (self.output.indent, link.target))
22.121 + self.container(link)
22.122 +
22.123 + def macro(self, macro):
22.124 + self.out("%sMacro: name=%r args=%r\n" % (self.output.indent, macro.name, macro.args))
22.125 + self.container(macro)
22.126 +
22.127 + monospace = inline_container
22.128 + smaller = inline_container
22.129 + strikethrough = inline_container
22.130 + subscript = inline_container
22.131 + superscript = inline_container
22.132 +
22.133 + def transclusion(self, transclusion):
22.134 + self.out("%sTransclusion: target=%r\n" % (self.output.indent, transclusion.target))
22.135 + self.container(transclusion)
22.136 +
22.137 + underline = inline_container
22.138 +
22.139 + # Inline nodes without children.
22.140 +
22.141 + def anchor(self, anchor):
22.142 + self.out("%sAnchor: target=%r\n" % (self.output.indent, anchor.target))
22.143 +
22.144 + break_ = inline
22.145 +
22.146 + def comment(self, comment):
22.147 + self.out("%sComment: comment=%r extra=%r\n" % (self.output.indent, comment.comment, comment.extra))
22.148 +
22.149 + def directive(self, directive):
22.150 + self.out("%sDirective: directive=%r extra=%r\n" % (self.output.indent, directive.directive, directive.extra))
22.151 +
22.152 + linebreak = inline
22.153 + nbsp = inline
22.154 +
22.155 + def rule(self, rule):
22.156 + self.out("%sRule: height=%d\n" % (self.output.indent, rule.height))
22.157 +
22.158 + def table_attr(self, table_attr):
22.159 + self.out("%sTableAttr: name=%r value=%r concise=%r quote=%r\n" % (
22.160 + self.output.indent, table_attr.name, table_attr.value,
22.161 + table_attr.concise, table_attr.quote))
22.162 +
22.163 + def text(self, text):
22.164 + self.out("%sText: %r\n" % (self.output.indent, text.s))
22.165 +
22.166 + def verbatim(self, verbatim):
22.167 + self.out("%sVerbatim: text=%r\n" % (self.output.indent, verbatim.text))
22.168 +
22.169 +serialiser = MoinSerialiser
22.170 +
22.171 +# vim: tabstop=4 expandtab shiftwidth=4
23.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
23.2 +++ b/moinformat/serialisers/pretty/pretty.py Fri Aug 18 00:18:42 2023 +0200
23.3 @@ -0,0 +1,38 @@
23.4 +#!/usr/bin/env python
23.5 +
23.6 +"""
23.7 +Prettyprinted document node prettyprinter.
23.8 +
23.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
23.10 +
23.11 +This program is free software; you can redistribute it and/or modify it under
23.12 +the terms of the GNU General Public License as published by the Free Software
23.13 +Foundation; either version 3 of the License, or (at your option) any later
23.14 +version.
23.15 +
23.16 +This program is distributed in the hope that it will be useful, but WITHOUT
23.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
23.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
23.19 +details.
23.20 +
23.21 +You should have received a copy of the GNU General Public License along with
23.22 +this program. If not, see <http://www.gnu.org/licenses/>.
23.23 +"""
23.24 +
23.25 +from moinformat.serialisers.pretty.common import Serialiser
23.26 +
23.27 +class PrettySerialiser(Serialiser):
23.28 +
23.29 + "Serialisation of prettyprinted document nodes for inspection."
23.30 +
23.31 + input_formats = ["pretty"]
23.32 + formats = ["pretty"]
23.33 +
23.34 + def node(self, node):
23.35 + self.out("%s%s%s\n" % (self.output.indent, node.name,
23.36 + len(node.nodes) and " nodes=%d" % len(node.nodes) or ""))
23.37 + self.container(node)
23.38 +
23.39 +serialiser = PrettySerialiser
23.40 +
23.41 +# vim: tabstop=4 expandtab shiftwidth=4
24.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
24.2 +++ b/moinformat/serialisers/pretty/table.py Fri Aug 18 00:18:42 2023 +0200
24.3 @@ -0,0 +1,35 @@
24.4 +#!/usr/bin/env python
24.5 +
24.6 +"""
24.7 +Moin wiki table prettyprinter.
24.8 +
24.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
24.10 +
24.11 +This program is free software; you can redistribute it and/or modify it under
24.12 +the terms of the GNU General Public License as published by the Free Software
24.13 +Foundation; either version 3 of the License, or (at your option) any later
24.14 +version.
24.15 +
24.16 +This program is distributed in the hope that it will be useful, but WITHOUT
24.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
24.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
24.19 +details.
24.20 +
24.21 +You should have received a copy of the GNU General Public License along with
24.22 +this program. If not, see <http://www.gnu.org/licenses/>.
24.23 +"""
24.24 +
24.25 +from moinformat.serialisers.pretty.moin import MoinSerialiser
24.26 +
24.27 +class MoinTableSerialiser(MoinSerialiser):
24.28 +
24.29 + "Serialisation of table nodes for inspection."
24.30 +
24.31 + input_formats = ["table"]
24.32 +
24.33 + def continuation(self, continuation):
24.34 + self.out("%sContinuation: %r\n" % (self.output.indent, continuation.text))
24.35 +
24.36 +serialiser = MoinTableSerialiser
24.37 +
24.38 +# vim: tabstop=4 expandtab shiftwidth=4
25.1 --- a/moinformat/tree/graphviz.py Tue Jun 20 18:58:47 2023 +0200
25.2 +++ b/moinformat/tree/graphviz.py Fri Aug 18 00:18:42 2023 +0200
25.3 @@ -3,7 +3,7 @@
25.4 """
25.5 Graphviz document tree nodes.
25.6
25.7 -Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk>
25.8 +Copyright (C) 2018, 2023 Paul Boddie <paul@boddie.org.uk>
25.9
25.10 This program is free software; you can redistribute it and/or modify it under
25.11 the terms of the GNU General Public License as published by the Free Software
25.12 @@ -39,10 +39,7 @@
25.13 def __repr__(self):
25.14 return "Directive(%r, %r, %r)" % (self.key, self.value, self.directive)
25.15
25.16 - def prettyprint(self, indent=""):
25.17 - return "%sDirective: key=%r value=%r directive=%r" % (indent, self.key, self.value, self.directive)
25.18 -
25.19 - def to_string(self, out):
25.20 - out.directive(self.key, self.value, self.directive)
25.21 + def visit(self, visitor):
25.22 + return visitor.directive(self)
25.23
25.24 # vim: tabstop=4 expandtab shiftwidth=4
26.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
26.2 +++ b/moinformat/tree/html.py Fri Aug 18 00:18:42 2023 +0200
26.3 @@ -0,0 +1,26 @@
26.4 +#!/usr/bin/env python
26.5 +
26.6 +"""
26.7 +HTML document nodes.
26.8 +
26.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
26.10 +
26.11 +This program is free software; you can redistribute it and/or modify it under
26.12 +the terms of the GNU General Public License as published by the Free Software
26.13 +Foundation; either version 3 of the License, or (at your option) any later
26.14 +version.
26.15 +
26.16 +This program is distributed in the hope that it will be useful, but WITHOUT
26.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
26.19 +details.
26.20 +
26.21 +You should have received a copy of the GNU General Public License along with
26.22 +this program. If not, see <http://www.gnu.org/licenses/>.
26.23 +"""
26.24 +
26.25 +from moinformat.utils.htmlparse.tree import Attribute, AttributeValue, \
26.26 + Comment, Directive, Element, \
26.27 + Fragment, Node, Inclusion, Text
26.28 +
26.29 +# vim: tabstop=4 expandtab shiftwidth=4
27.1 --- a/moinformat/tree/moin.py Tue Jun 20 18:58:47 2023 +0200
27.2 +++ b/moinformat/tree/moin.py Fri Aug 18 00:18:42 2023 +0200
27.3 @@ -3,7 +3,8 @@
27.4 """
27.5 Moin wiki format document tree nodes.
27.6
27.7 -Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022 Paul Boddie <paul@boddie.org.uk>
27.8 +Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022,
27.9 + 2023 Paul Boddie <paul@boddie.org.uk>
27.10
27.11 This program is free software; you can redistribute it and/or modify it under
27.12 the terms of the GNU General Public License as published by the Free Software
27.13 @@ -174,18 +175,6 @@
27.14
27.15 return not self.text_content().strip()
27.16
27.17 - def __str__(self):
27.18 - return self.prettyprint()
27.19 -
27.20 - def _prettyprint(self, l, indent=""):
27.21 - for node in self.nodes:
27.22 - l.append(node.prettyprint(indent + " "))
27.23 - return "\n".join(l)
27.24 -
27.25 - def _to_string(self, out):
27.26 - for node in self.nodes:
27.27 - node.to_string(out)
27.28 -
27.29 class Region(Container):
27.30
27.31 "A region of the page."
27.32 @@ -216,28 +205,8 @@
27.33 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level,
27.34 self.indent, self.type, self.args, self.transparent, self.extra)
27.35
27.36 - def prettyprint(self, indent=""):
27.37 - l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent,
27.38 - self.level, self.indent, self.type, self.args, self.extra)]
27.39 - return self._prettyprint(l, indent)
27.40 -
27.41 - def to_string(self, out):
27.42 - out.start_region(self.level, self.indent, self.type, self.args, self.extra)
27.43 -
27.44 - # Obtain a serialiser for the region from the same format family.
27.45 - # Retain the same serialiser if no appropriate serialiser could be
27.46 - # obtained.
27.47 -
27.48 - serialiser_name = "%s.%s" % (out.formats[0], self.type)
27.49 - serialiser = out.get_serialiser(serialiser_name)
27.50 -
27.51 - # Serialise the region.
27.52 -
27.53 - self._to_string(serialiser)
27.54 -
27.55 - # End the region with the previous serialiser.
27.56 -
27.57 - out.end_region(self.level, self.indent, self.type, self.args, self.extra)
27.58 + def visit(self, visitor):
27.59 + return visitor.region(self)
27.60
27.61
27.62
27.63 @@ -250,14 +219,8 @@
27.64 def __repr__(self):
27.65 return "Block(%r)" % self.nodes
27.66
27.67 - def prettyprint(self, indent=""):
27.68 - l = ["%sBlock" % indent]
27.69 - return self._prettyprint(l, indent)
27.70 -
27.71 - def to_string(self, out):
27.72 - out.start_block()
27.73 - self._to_string(out)
27.74 - out.end_block()
27.75 + def visit(self, visitor):
27.76 + return visitor.block(self)
27.77
27.78 class DefItem(Container):
27.79
27.80 @@ -271,14 +234,8 @@
27.81 def __repr__(self):
27.82 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra)
27.83
27.84 - def prettyprint(self, indent=""):
27.85 - l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)]
27.86 - return self._prettyprint(l, indent)
27.87 -
27.88 - def to_string(self, out):
27.89 - out.start_defitem(self.pad, self.extra)
27.90 - self._to_string(out)
27.91 - out.end_defitem(self.pad, self.extra)
27.92 + def visit(self, visitor):
27.93 + return visitor.defitem(self)
27.94
27.95 class DefTerm(Container):
27.96
27.97 @@ -292,14 +249,8 @@
27.98 def __repr__(self):
27.99 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra)
27.100
27.101 - def prettyprint(self, indent=""):
27.102 - l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)]
27.103 - return self._prettyprint(l, indent)
27.104 -
27.105 - def to_string(self, out):
27.106 - out.start_defterm(self.pad, self.extra)
27.107 - self._to_string(out)
27.108 - out.end_defterm(self.pad, self.extra)
27.109 + def visit(self, visitor):
27.110 + return visitor.defterm(self)
27.111
27.112 class FontStyle(Container):
27.113
27.114 @@ -327,20 +278,8 @@
27.115 def __repr__(self):
27.116 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong)
27.117
27.118 - def prettyprint(self, indent=""):
27.119 - l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)]
27.120 - return self._prettyprint(l, indent)
27.121 -
27.122 - def to_string(self, out):
27.123 - if self.emphasis:
27.124 - out.start_emphasis()
27.125 - elif self.strong:
27.126 - out.start_strong()
27.127 - self._to_string(out)
27.128 - if self.emphasis:
27.129 - out.end_emphasis()
27.130 - elif self.strong:
27.131 - out.end_strong()
27.132 + def visit(self, visitor):
27.133 + return visitor.fontstyle(self)
27.134
27.135 class Heading(Container):
27.136
27.137 @@ -361,17 +300,8 @@
27.138 self.nodes, self.level, self.start_extra, self.start_pad,
27.139 self.end_pad, self.end_extra, self.identifier)
27.140
27.141 - def prettyprint(self, indent=""):
27.142 - l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r"
27.143 - " end_extra=%r identifier=%r" % (
27.144 - indent, self.level, self.start_extra, self.start_pad, self.end_pad,
27.145 - self.end_extra, self.identifier)]
27.146 - return self._prettyprint(l, indent)
27.147 -
27.148 - def to_string(self, out):
27.149 - out.start_heading(self.level, self.start_extra, self.start_pad, self.identifier)
27.150 - self._to_string(out)
27.151 - out.end_heading(self.level, self.end_pad, self.end_extra)
27.152 + def visit(self, visitor):
27.153 + return visitor.heading(self)
27.154
27.155 class LinkLabel(Container):
27.156
27.157 @@ -380,12 +310,8 @@
27.158 def __repr__(self):
27.159 return "LinkLabel(%r)" % self.nodes
27.160
27.161 - def prettyprint(self, indent=""):
27.162 - l = ["%sLinkLabel" % indent]
27.163 - return self._prettyprint(l, indent)
27.164 -
27.165 - def to_string(self, out):
27.166 - out.link_label(self.nodes)
27.167 + def visit(self, visitor):
27.168 + return visitor.link_label(self)
27.169
27.170 class LinkParameter(Container):
27.171
27.172 @@ -394,14 +320,8 @@
27.173 def __repr__(self):
27.174 return "LinkParameter(%r)" % self.nodes
27.175
27.176 - def prettyprint(self, indent=""):
27.177 - l = ["%sLinkParameter" % indent]
27.178 - return self._prettyprint(l, indent)
27.179 -
27.180 - def to_string(self, out):
27.181 - s = self.text_content()
27.182 - t = s.split("=", 1)
27.183 - out.link_parameter(t)
27.184 + def visit(self, visitor):
27.185 + return visitor.link_parameter(self)
27.186
27.187 class List(Container):
27.188
27.189 @@ -420,18 +340,10 @@
27.190 def __repr__(self):
27.191 return "List(%r)" % self.nodes
27.192
27.193 - def prettyprint(self, indent=""):
27.194 + def visit(self, visitor):
27.195 if not self.first:
27.196 self.init()
27.197 - l = ["%sList: indent=%r marker=%r num=%r" % (indent, self.indent, self.marker, self.num)]
27.198 - return self._prettyprint(l, indent)
27.199 -
27.200 - def to_string(self, out):
27.201 - if not self.first:
27.202 - self.init()
27.203 - out.start_list(self.indent, self.marker, self.num)
27.204 - self._to_string(out)
27.205 - out.end_list(self.indent, self.marker, self.num)
27.206 + return visitor.list(self)
27.207
27.208 class ListItem(Container):
27.209
27.210 @@ -451,14 +363,18 @@
27.211 def __repr__(self):
27.212 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num)
27.213
27.214 - def prettyprint(self, indent=""):
27.215 - l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)]
27.216 - return self._prettyprint(l, indent)
27.217 + def visit(self, visitor):
27.218 + return visitor.listitem(self)
27.219 +
27.220 +class Table(Container):
27.221
27.222 - def to_string(self, out):
27.223 - out.start_listitem(self.indent, self.marker, self.space, self.num)
27.224 - self._to_string(out)
27.225 - out.end_listitem(self.indent, self.marker, self.space, self.num)
27.226 + "A table."
27.227 +
27.228 + def __repr__(self):
27.229 + return "Table(%r)" % self.nodes
27.230 +
27.231 + def visit(self, visitor):
27.232 + return visitor.table(self)
27.233
27.234 class TableAttrs(Container):
27.235
27.236 @@ -476,31 +392,8 @@
27.237 def __repr__(self):
27.238 return "TableAttrs(%r)" % self.nodes
27.239
27.240 - def prettyprint(self, indent=""):
27.241 - l = ["%sTableAttrs:" % indent]
27.242 - return self._prettyprint(l, indent)
27.243 -
27.244 - def to_string(self, out):
27.245 - out.start_table_attrs()
27.246 - out.table_attrs(self.nodes)
27.247 - if not self.incomplete:
27.248 - out.end_table_attrs()
27.249 -
27.250 -class Table(Container):
27.251 -
27.252 - "A table."
27.253 -
27.254 - def __repr__(self):
27.255 - return "Table(%r)" % self.nodes
27.256 -
27.257 - def prettyprint(self, indent=""):
27.258 - l = ["%sTable:" % indent]
27.259 - return self._prettyprint(l, indent)
27.260 -
27.261 - def to_string(self, out):
27.262 - out.start_table()
27.263 - self._to_string(out)
27.264 - out.end_table()
27.265 + def visit(self, visitor):
27.266 + return visitor.table_attrs(self)
27.267
27.268 class TableCell(Container):
27.269
27.270 @@ -516,15 +409,8 @@
27.271 return "TableCell(%r, %r, %r, %r)" % (self.nodes, self.attrs,
27.272 self.leading, self.padding)
27.273
27.274 - def prettyprint(self, indent=""):
27.275 - l = ["%sTableCell: leading=%r padding=%r" % (indent, self.leading,
27.276 - self.padding)]
27.277 - return self._prettyprint(l, indent)
27.278 -
27.279 - def to_string(self, out):
27.280 - out.start_table_cell(self.attrs, self.leading, self.padding)
27.281 - self._to_string(out)
27.282 - out.end_table_cell()
27.283 + def visit(self, visitor):
27.284 + return visitor.table_cell(self)
27.285
27.286 class TableRow(Container):
27.287
27.288 @@ -540,15 +426,8 @@
27.289 return "TableRow(%r, %r, %r, %r)" % (self.nodes, self.trailing,
27.290 self.leading, self.padding)
27.291
27.292 - def prettyprint(self, indent=""):
27.293 - l = ["%sTableRow: trailing=%r leading=%r padding=%r" % (
27.294 - indent, self.trailing, self.leading, self.padding)]
27.295 - return self._prettyprint(l, indent)
27.296 -
27.297 - def to_string(self, out):
27.298 - out.start_table_row(self.leading, self.padding)
27.299 - self._to_string(out)
27.300 - out.end_table_row(self.trailing)
27.301 + def visit(self, visitor):
27.302 + return visitor.table_row(self)
27.303
27.304
27.305
27.306 @@ -561,18 +440,12 @@
27.307 def __repr__(self):
27.308 return "%s(%r)" % (self.__class__.__name__, self.nodes)
27.309
27.310 - def prettyprint(self, indent=""):
27.311 - l = ["%s%s" % (indent, self.__class__.__name__)]
27.312 - return self._prettyprint(l, indent)
27.313 -
27.314 class Larger(Inline):
27.315
27.316 "Larger text."
27.317
27.318 - def to_string(self, out):
27.319 - out.start_larger()
27.320 - self._to_string(out)
27.321 - out.end_larger()
27.322 + def visit(self, visitor):
27.323 + return visitor.larger(self)
27.324
27.325 class Link(Container):
27.326
27.327 @@ -585,12 +458,8 @@
27.328 def __repr__(self):
27.329 return "Link(%r, %r)" % (self.nodes, self.target)
27.330
27.331 - def prettyprint(self, indent=""):
27.332 - l = ["%sLink: target=%r" % (indent, self.target)]
27.333 - return self._prettyprint(l, indent)
27.334 -
27.335 - def to_string(self, out):
27.336 - out.link(self.target, self.nodes)
27.337 + def visit(self, visitor):
27.338 + return visitor.link(self)
27.339
27.340 class Macro(Container):
27.341
27.342 @@ -609,60 +478,43 @@
27.343 self.parent, self.region,
27.344 self.nodes, self.inline)
27.345
27.346 - def prettyprint(self, indent=""):
27.347 - l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)]
27.348 - return self._prettyprint(l, indent)
27.349 -
27.350 - def to_string(self, out):
27.351 - out.start_macro(self.name, self.args, self.nodes, self.inline)
27.352 - if self.nodes:
27.353 - self._to_string(out)
27.354 - out.end_macro(self.inline)
27.355 + def visit(self, visitor):
27.356 + return visitor.macro(self)
27.357
27.358 class Monospace(Inline):
27.359
27.360 "Monospaced text."
27.361
27.362 - def to_string(self, out):
27.363 - out.start_monospace()
27.364 - self._to_string(out)
27.365 - out.end_monospace()
27.366 + def visit(self, visitor):
27.367 + return visitor.monospace(self)
27.368
27.369 class Smaller(Inline):
27.370
27.371 "Smaller text."
27.372
27.373 - def to_string(self, out):
27.374 - out.start_smaller()
27.375 - self._to_string(out)
27.376 - out.end_smaller()
27.377 + def visit(self, visitor):
27.378 + return visitor.smaller(self)
27.379
27.380 class Strikethrough(Inline):
27.381
27.382 - "Crossed-out text."
27.383 + "Crossed-visitor text."
27.384
27.385 - def to_string(self, out):
27.386 - out.start_strikethrough()
27.387 - self._to_string(out)
27.388 - out.end_strikethrough()
27.389 + def visit(self, visitor):
27.390 + return visitor.strikethrough(self)
27.391
27.392 class Subscript(Inline):
27.393
27.394 "Subscripted text."
27.395
27.396 - def to_string(self, out):
27.397 - out.start_subscript()
27.398 - self._to_string(out)
27.399 - out.end_subscript()
27.400 + def visit(self, visitor):
27.401 + return visitor.subscript(self)
27.402
27.403 class Superscript(Inline):
27.404
27.405 "Superscripted text."
27.406
27.407 - def to_string(self, out):
27.408 - out.start_superscript()
27.409 - self._to_string(out)
27.410 - out.end_superscript()
27.411 + def visit(self, visitor):
27.412 + return visitor.superscript(self)
27.413
27.414 class Transclusion(Container):
27.415
27.416 @@ -675,21 +527,15 @@
27.417 def __repr__(self):
27.418 return "Transclusion(%r, %r)" % (self.nodes, self.target)
27.419
27.420 - def prettyprint(self, indent=""):
27.421 - l = ["%sTransclusion: target=%r" % (indent, self.target)]
27.422 - return self._prettyprint(l, indent)
27.423 -
27.424 - def to_string(self, out):
27.425 - out.transclusion(self.target, self.nodes)
27.426 + def visit(self, visitor):
27.427 + return visitor.transclusion(self)
27.428
27.429 class Underline(Inline):
27.430
27.431 "Underlined text."
27.432
27.433 - def to_string(self, out):
27.434 - out.start_underline()
27.435 - self._to_string(out)
27.436 - out.end_underline()
27.437 + def visit(self, visitor):
27.438 + return visitor.underline(self)
27.439
27.440
27.441
27.442 @@ -712,11 +558,8 @@
27.443 def __repr__(self):
27.444 return "Anchor(%r)" % self.target
27.445
27.446 - def prettyprint(self, indent=""):
27.447 - return "%sAnchor: target=%r" % (indent, self.target)
27.448 -
27.449 - def to_string(self, out):
27.450 - out.anchor(self.target)
27.451 + def visit(self, visitor):
27.452 + return visitor.anchor(self)
27.453
27.454 class Break(Node):
27.455
27.456 @@ -725,11 +568,8 @@
27.457 def __repr__(self):
27.458 return "Break()"
27.459
27.460 - def prettyprint(self, indent=""):
27.461 - return "%sBreak" % indent
27.462 -
27.463 - def to_string(self, out):
27.464 - out.break_()
27.465 + def visit(self, visitor):
27.466 + return visitor.break_(self)
27.467
27.468 class Comment(Node):
27.469
27.470 @@ -742,11 +582,8 @@
27.471 def __repr__(self):
27.472 return "Comment(%r, %r)" % (self.comment, self.extra)
27.473
27.474 - def prettyprint(self, indent=""):
27.475 - return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra)
27.476 -
27.477 - def to_string(self, out):
27.478 - out.comment(self.comment, self.extra)
27.479 + def visit(self, visitor):
27.480 + return visitor.comment(self)
27.481
27.482 class Directive(Node):
27.483
27.484 @@ -759,11 +596,8 @@
27.485 def __repr__(self):
27.486 return "Directive(%r, %r)" % (self.directive, self.extra)
27.487
27.488 - def prettyprint(self, indent=""):
27.489 - return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra)
27.490 -
27.491 - def to_string(self, out):
27.492 - out.directive(self.directive, self.extra)
27.493 + def visit(self, visitor):
27.494 + return visitor.directive(self)
27.495
27.496 class LineBreak(Node):
27.497
27.498 @@ -772,11 +606,8 @@
27.499 def __repr__(self):
27.500 return "LineBreak()"
27.501
27.502 - def prettyprint(self, indent=""):
27.503 - return "%sLineBreak" % indent
27.504 -
27.505 - def to_string(self, out):
27.506 - out.linebreak()
27.507 + def visit(self, visitor):
27.508 + return visitor.linebreak(self)
27.509
27.510 class NonBreakingSpace(Node):
27.511
27.512 @@ -785,11 +616,8 @@
27.513 def __repr__(self):
27.514 return "NonBreakingSpace()"
27.515
27.516 - def prettyprint(self, indent=""):
27.517 - return "%sNonBreakingSpace" % indent
27.518 -
27.519 - def to_string(self, out):
27.520 - out.nbsp()
27.521 + def visit(self, visitor):
27.522 + return visitor.nbsp(self)
27.523
27.524 class Rule(Node):
27.525
27.526 @@ -801,11 +629,8 @@
27.527 def __repr__(self):
27.528 return "Rule(%d)" % self.height
27.529
27.530 - def prettyprint(self, indent=""):
27.531 - return "%sRule: height=%d" % (indent, self.height)
27.532 -
27.533 - def to_string(self, out):
27.534 - out.rule(self.height)
27.535 + def visit(self, visitor):
27.536 + return visitor.rule(self)
27.537
27.538 class TableAttr(Node):
27.539
27.540 @@ -820,11 +645,8 @@
27.541 def __repr__(self):
27.542 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote)
27.543
27.544 - def prettyprint(self, indent=""):
27.545 - return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote)
27.546 -
27.547 - def to_string(self, out):
27.548 - out.table_attr(self.name, self.value, self.concise, self.quote)
27.549 + def visit(self, visitor):
27.550 + return visitor.table_attr(self)
27.551
27.552 class Text(Node):
27.553
27.554 @@ -845,11 +667,8 @@
27.555 def __repr__(self):
27.556 return "Text(%r)" % self.s
27.557
27.558 - def prettyprint(self, indent=""):
27.559 - return "%sText: %r" % (indent, self.s)
27.560 -
27.561 - def to_string(self, out):
27.562 - out.text(self.s)
27.563 + def visit(self, visitor):
27.564 + return visitor.text(self)
27.565
27.566 class Verbatim(Node):
27.567
27.568 @@ -861,10 +680,7 @@
27.569 def __repr__(self):
27.570 return "Verbatim(%r)" % self.text
27.571
27.572 - def prettyprint(self, indent=""):
27.573 - return "%sVerbatim: text=%r" % (indent, self.text)
27.574 -
27.575 - def to_string(self, out):
27.576 - out.verbatim(self.text)
27.577 + def visit(self, visitor):
27.578 + return visitor.verbatim(self)
27.579
27.580 # vim: tabstop=4 expandtab shiftwidth=4
28.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
28.2 +++ b/moinformat/tree/pretty.py Fri Aug 18 00:18:42 2023 +0200
28.3 @@ -0,0 +1,65 @@
28.4 +#!/usr/bin/env python
28.5 +
28.6 +"""
28.7 +Prettyprinted document tree nodes.
28.8 +
28.9 +Copyright (C) 2017, 2018, 2019, 2023 Paul Boddie <paul@boddie.org.uk>
28.10 +
28.11 +This program is free software; you can redistribute it and/or modify it under
28.12 +the terms of the GNU General Public License as published by the Free Software
28.13 +Foundation; either version 3 of the License, or (at your option) any later
28.14 +version.
28.15 +
28.16 +This program is distributed in the hope that it will be useful, but WITHOUT
28.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
28.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
28.19 +details.
28.20 +
28.21 +You should have received a copy of the GNU General Public License along with
28.22 +this program. If not, see <http://www.gnu.org/licenses/>.
28.23 +"""
28.24 +
28.25 +from moinformat.tree.moin import Container
28.26 +
28.27 +class Node:
28.28 +
28.29 + "A simplified tree node representation."
28.30 +
28.31 + def __init__(self, name):
28.32 + self.name = name
28.33 + self.nodes = []
28.34 +
28.35 + def __repr__(self):
28.36 + return "Node(%r, %r)" % (self.name, self.nodes)
28.37 +
28.38 + def visit(self, visitor):
28.39 + return visitor.node(self)
28.40 +
28.41 + def append(self, node):
28.42 + self.nodes.append(node)
28.43 +
28.44 + def test(self, other):
28.45 +
28.46 + """
28.47 + Test whether this node is considered equivalent to 'other', where
28.48 + 'other' is a moinparser.tree node.
28.49 +
28.50 + Return any failing tree nodes or None.
28.51 + """
28.52 +
28.53 + if other.__class__.__name__ != self.name:
28.54 + return self, other, "name"
28.55 +
28.56 + if isinstance(other, Container):
28.57 + for node, other_node in map(None, self.nodes, other.nodes):
28.58 + if node is None or other_node is None:
28.59 + return self, other, node is None and "simple" or "document"
28.60 + t = node.test(other_node)
28.61 + if t:
28.62 + return t
28.63 + elif self.nodes:
28.64 + return self, other, "empty"
28.65 +
28.66 + return None
28.67 +
28.68 +# vim: tabstop=4 expandtab shiftwidth=4
29.1 --- a/moinformat/tree/table.py Tue Jun 20 18:58:47 2023 +0200
29.2 +++ b/moinformat/tree/table.py Fri Aug 18 00:18:42 2023 +0200
29.3 @@ -3,7 +3,7 @@
29.4 """
29.5 Extended table syntax document tree nodes.
29.6
29.7 -Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk>
29.8 +Copyright (C) 2018, 2023 Paul Boddie <paul@boddie.org.uk>
29.9
29.10 This program is free software; you can redistribute it and/or modify it under
29.11 the terms of the GNU General Public License as published by the Free Software
29.12 @@ -31,10 +31,7 @@
29.13 def __repr__(self):
29.14 return "Continuation(%r)" % self.text
29.15
29.16 - def prettyprint(self, indent=""):
29.17 - return "%sContinuation: %r" % (indent, self.text)
29.18 -
29.19 - def to_string(self, out):
29.20 - out.continuation(self.text)
29.21 + def visit(self, visitor):
29.22 + return visitor.continuation(self)
29.23
29.24 # vim: tabstop=4 expandtab shiftwidth=4
30.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
30.2 +++ b/moinformat/utils/htmlparse/__init__.py Fri Aug 18 00:18:42 2023 +0200
30.3 @@ -0,0 +1,24 @@
30.4 +#!/usr/bin/env python
30.5 +
30.6 +"""
30.7 +HTML parsing modules.
30.8 +
30.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
30.10 +
30.11 +This program is free software; you can redistribute it and/or modify it under
30.12 +the terms of the GNU General Public License as published by the Free Software
30.13 +Foundation; either version 3 of the License, or (at your option) any later
30.14 +version.
30.15 +
30.16 +This program is distributed in the hope that it will be useful, but WITHOUT
30.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
30.19 +details.
30.20 +
30.21 +You should have received a copy of the GNU General Public License along with
30.22 +this program. If not, see <http://www.gnu.org/licenses/>.
30.23 +"""
30.24 +
30.25 +from moinformat.utils.htmlparse.parse import Parser
30.26 +
30.27 +# vim: tabstop=4 expandtab shiftwidth=4
31.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
31.2 +++ b/moinformat/utils/htmlparse/lex.py Fri Aug 18 00:18:42 2023 +0200
31.3 @@ -0,0 +1,234 @@
31.4 +#!/usr/bin/env python
31.5 +
31.6 +"""
31.7 +Lexical partitioning of HTML document content.
31.8 +
31.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
31.10 +
31.11 +This program is free software; you can redistribute it and/or modify it under
31.12 +the terms of the GNU General Public License as published by the Free Software
31.13 +Foundation; either version 3 of the License, or (at your option) any later
31.14 +version.
31.15 +
31.16 +This program is distributed in the hope that it will be useful, but WITHOUT
31.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
31.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
31.19 +details.
31.20 +
31.21 +You should have received a copy of the GNU General Public License along with
31.22 +this program. If not, see <http://www.gnu.org/licenses/>.
31.23 +"""
31.24 +
31.25 +# Lexical analysis state transition handler functions.
31.26 +
31.27 +def tag_or_similar(text, pos):
31.28 +
31.29 + # Consult the text positions following the position indicated.
31.30 +
31.31 + if text[pos:pos+2] == "<!":
31.32 + if text[pos+3:pos+4] == "[":
31.33 + return IN_INCLUSION
31.34 + elif text[pos+3:pos+5] == "--":
31.35 + return IN_COMMENT
31.36 + else:
31.37 + return IN_DIRECTIVE
31.38 + else:
31.39 + return IN_TAG
31.40 +
31.41 +def at_attribute_value(text, pos):
31.42 + return AT_ATTRIBUTE_VALUE
31.43 +
31.44 +def in_dq_attribute_value(text, pos):
31.45 + return IN_DQ_ATTRIBUTE_VALUE
31.46 +
31.47 +def in_sq_attribute_value(text, pos):
31.48 + return IN_SQ_ATTRIBUTE_VALUE
31.49 +
31.50 +def after_attribute_value(text, pos):
31.51 + return AFTER_ATTRIBUTE_VALUE
31.52 +
31.53 +def end_of_standalone_tag(text, pos):
31.54 + return AT_END_OF_TAG
31.55 +
31.56 +def end_of_tag(text, pos):
31.57 + return BETWEEN_TAGS
31.58 +
31.59 +
31.60 +
31.61 +# Lexical analysis states/spans.
31.62 +
31.63 +class Span:
31.64 + def __init__(self, text):
31.65 + self.text = text
31.66 +
31.67 + def empty(self):
31.68 + return not self.text
31.69 +
31.70 + def __repr__(self):
31.71 + return "%s(%r)" % (self.__class__.__name__, self.text)
31.72 +
31.73 +class AT_END_OF_TAG(Span):
31.74 + transitions = [(None, "", end_of_tag)]
31.75 +
31.76 + def empty(self):
31.77 + return False
31.78 +
31.79 + def visit(self, visitor):
31.80 + return visitor.at_end_of_tag(self)
31.81 +
31.82 +class BETWEEN_TAGS(Span):
31.83 + transitions = [("<", "", tag_or_similar)]
31.84 +
31.85 + def visit(self, visitor):
31.86 + return visitor.between_tags(self)
31.87 +
31.88 +class IN_TAG(Span):
31.89 + transitions = [
31.90 + ("=", "", at_attribute_value),
31.91 + ("/>", "", end_of_standalone_tag),
31.92 + (">", "", end_of_tag),
31.93 + ]
31.94 +
31.95 + def visit(self, visitor):
31.96 + return visitor.in_tag(self)
31.97 +
31.98 +class IN_COMMENT(Span):
31.99 + transitions = [("-->", "--", end_of_tag)]
31.100 +
31.101 + def visit(self, visitor):
31.102 + return visitor.in_comment(self)
31.103 +
31.104 +class IN_DIRECTIVE(Span):
31.105 + transitions = [(">", "", end_of_tag)]
31.106 +
31.107 + def visit(self, visitor):
31.108 + return visitor.in_directive(self)
31.109 +
31.110 +class IN_INCLUSION(Span):
31.111 + transitions = [("]]>", "]]", end_of_tag)]
31.112 +
31.113 + def visit(self, visitor):
31.114 + return visitor.in_inclusion(self)
31.115 +
31.116 +class AFTER_ATTRIBUTE_VALUE(Span):
31.117 + transitions = [
31.118 + ("=", "", at_attribute_value),
31.119 + ("/>", "", end_of_standalone_tag),
31.120 + (">", "", end_of_tag),
31.121 + ]
31.122 +
31.123 + def empty(self):
31.124 + return not self.text.strip()
31.125 +
31.126 + def visit(self, visitor):
31.127 + return visitor.after_attribute_value(self)
31.128 +
31.129 +class AT_ATTRIBUTE_VALUE(Span):
31.130 + transitions = [
31.131 + ("=", "", at_attribute_value),
31.132 + ('"', "", in_dq_attribute_value),
31.133 + ("'", "", in_sq_attribute_value),
31.134 + ("/>", "", end_of_standalone_tag),
31.135 + (">", "", end_of_tag),
31.136 + ]
31.137 +
31.138 + def empty(self):
31.139 + return not self.text.strip()
31.140 +
31.141 + def visit(self, visitor):
31.142 + return visitor.at_attribute_value(self)
31.143 +
31.144 +class IN_DQ_ATTRIBUTE_VALUE(Span):
31.145 + transitions = [('"', "", after_attribute_value)]
31.146 +
31.147 + def visit(self, visitor):
31.148 + return visitor.in_dq_attribute_value(self)
31.149 +
31.150 +class IN_SQ_ATTRIBUTE_VALUE(Span):
31.151 + transitions = [("'", "", after_attribute_value)]
31.152 +
31.153 + def visit(self, visitor):
31.154 + return visitor.in_sq_attribute_value(self)
31.155 +
31.156 +
31.157 +
31.158 +# Utility functions.
31.159 +
31.160 +def find_one(text, pos, choices):
31.161 +
31.162 + """
31.163 + Find in 'text' from 'pos' the earliest occurring instance of one of the
31.164 + given 'choices', these being a list of (token string, extra string, state)
31.165 + tuples.
31.166 +
31.167 + The token string is a token marking the start of the next span, the extra
31.168 + string is the portion of the token to be added to the end of the current
31.169 + span upon matching, and the state applies to the next span.
31.170 +
31.171 + The associated state, the position of the occurrence, and the position of
31.172 + the text following the occurrence are returned as a tuple.
31.173 + """
31.174 +
31.175 + next_state = None
31.176 + first_pos = None
31.177 + first_extra = None
31.178 + next_pos = None
31.179 +
31.180 + for token, extra, state in choices:
31.181 + if token is None:
31.182 + return state, pos, extra, pos
31.183 +
31.184 + found_pos = text.find(token, pos)
31.185 +
31.186 + if found_pos != -1 and (next_state is None or found_pos < first_pos):
31.187 + next_state = state
31.188 + first_pos = found_pos
31.189 + first_extra = extra
31.190 + next_pos = found_pos + len(token)
31.191 +
31.192 + return next_state, first_pos, first_extra, next_pos
31.193 +
31.194 +
31.195 +
31.196 +# Lexical partitioning.
31.197 +
31.198 +class Lexer:
31.199 + def __init__(self, text):
31.200 + self.text = text
31.201 + self.state = BETWEEN_TAGS
31.202 + self.pos = 0
31.203 +
31.204 + def _end_of_input(self):
31.205 + start = self.pos
31.206 + self.pos = None
31.207 + return self._span(self.text[start:])
31.208 +
31.209 + def _span(self, text):
31.210 + return self.state(text)
31.211 +
31.212 + def __iter__(self):
31.213 + return self
31.214 +
31.215 + def next(self):
31.216 + if self.pos is None:
31.217 + raise StopIteration
31.218 +
31.219 + # Obtain details of a state transition: a handler function to determine
31.220 + # the next state, and the start and end positions of the token causing
31.221 + # the transition.
31.222 +
31.223 + handler, pos, extra, next_pos = find_one(self.text, self.pos, self.state.transitions)
31.224 +
31.225 + if handler is None:
31.226 + return self._end_of_input()
31.227 +
31.228 + # Obtain the lexical span and update the state and position.
31.229 +
31.230 + span = self._span(self.text[self.pos:pos] + extra)
31.231 +
31.232 + self.state = handler(self.text, pos)
31.233 + self.pos = next_pos
31.234 +
31.235 + return span
31.236 +
31.237 +# vim: tabstop=4 expandtab shiftwidth=4
32.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
32.2 +++ b/moinformat/utils/htmlparse/parse.py Fri Aug 18 00:18:42 2023 +0200
32.3 @@ -0,0 +1,110 @@
32.4 +#!/usr/bin/env python
32.5 +
32.6 +"""
32.7 +An absurdly minimal HTML parser.
32.8 +
32.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
32.10 +
32.11 +This program is free software; you can redistribute it and/or modify it under
32.12 +the terms of the GNU General Public License as published by the Free Software
32.13 +Foundation; either version 3 of the License, or (at your option) any later
32.14 +version.
32.15 +
32.16 +This program is distributed in the hope that it will be useful, but WITHOUT
32.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
32.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
32.19 +details.
32.20 +
32.21 +You should have received a copy of the GNU General Public License along with
32.22 +this program. If not, see <http://www.gnu.org/licenses/>.
32.23 +"""
32.24 +
32.25 +from moinformat.utils.htmlparse.token import Tokeniser
32.26 +from moinformat.utils.htmlparse.tree import Attribute, AttributeValue, \
32.27 + Comment, Directive, Element, \
32.28 + Fragment, Inclusion, Node, Text
32.29 +
32.30 +
32.31 +
32.32 +# Token processing employing the tokens from tokenisation.
32.33 +
32.34 +class Visitor:
32.35 + def __init__(self):
32.36 + self.node = Fragment()
32.37 + self.stack = [self.node]
32.38 +
32.39 + def append(self, node):
32.40 + self.node.nodes.append(node)
32.41 +
32.42 + def push(self, node):
32.43 + self.stack.append(node)
32.44 + self.append(node)
32.45 + self.node = node
32.46 +
32.47 + def pop(self):
32.48 + self.stack.pop()
32.49 + self.node = self.stack[-1]
32.50 +
32.51 + def visit(self, token):
32.52 + token.visit(self)
32.53 +
32.54 + # Specific handler methods.
32.55 +
32.56 + def attribute(self, token):
32.57 + if isinstance(self.node, Element):
32.58 + self.node.attributes.append(Attribute(token.value))
32.59 + else:
32.60 + raise ValueError, token
32.61 +
32.62 + def attribute_value(self, token):
32.63 + if isinstance(self.node, Element):
32.64 + self.node.attributes[-1].value = AttributeValue(token.value, token.quote)
32.65 + else:
32.66 + raise ValueError, token
32.67 +
32.68 + def comment(self, token):
32.69 + self.append(Comment(token.value))
32.70 +
32.71 + def directive(self, token):
32.72 + self.append(Directive(token.value))
32.73 +
32.74 + def inclusion(self, token):
32.75 + self.append(Inclusion(token.value))
32.76 +
32.77 + def tag(self, token):
32.78 + if not token.is_end():
32.79 + self.push(Element(token.tag_name()))
32.80 + elif self.node.name == token.tag_name():
32.81 + self.pop()
32.82 + else:
32.83 + raise ValueError, token
32.84 +
32.85 + def tag_close(self, token):
32.86 + self.pop()
32.87 +
32.88 + def text(self, token):
32.89 + self.append(Text(token.value))
32.90 +
32.91 +
32.92 +
32.93 +# Parsing and document construction.
32.94 +
32.95 +class Parser:
32.96 + def __init__(self, text):
32.97 + self.tokeniser = Tokeniser(text)
32.98 + self.visitor = Visitor()
32.99 +
32.100 + def __iter__(self):
32.101 + return self
32.102 +
32.103 + def next(self):
32.104 + token = self.tokeniser.next()
32.105 + self.visitor.visit(token)
32.106 +
32.107 + def parse(self):
32.108 + for _none in self:
32.109 + pass
32.110 +
32.111 + return self.visitor.node
32.112 +
32.113 +# vim: tabstop=4 expandtab shiftwidth=4
33.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
33.2 +++ b/moinformat/utils/htmlparse/token.py Fri Aug 18 00:18:42 2023 +0200
33.3 @@ -0,0 +1,160 @@
33.4 +#!/usr/bin/env python
33.5 +
33.6 +"""
33.7 +An absurdly minimal HTML tokeniser.
33.8 +
33.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
33.10 +
33.11 +This program is free software; you can redistribute it and/or modify it under
33.12 +the terms of the GNU General Public License as published by the Free Software
33.13 +Foundation; either version 3 of the License, or (at your option) any later
33.14 +version.
33.15 +
33.16 +This program is distributed in the hope that it will be useful, but WITHOUT
33.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
33.19 +details.
33.20 +
33.21 +You should have received a copy of the GNU General Public License along with
33.22 +this program. If not, see <http://www.gnu.org/licenses/>.
33.23 +"""
33.24 +
33.25 +from collections import deque
33.26 +from moinformat.utils.htmlparse.lex import Lexer
33.27 +
33.28 +
33.29 +
33.30 +# Document token classes.
33.31 +
33.32 +class Token:
33.33 + def __init__(self, value):
33.34 + self.value = value
33.35 +
33.36 + def __repr__(self):
33.37 + return "%s(%r)" % (self.__class__.__name__, self.value)
33.38 +
33.39 +class Attribute(Token):
33.40 + def visit(self, visitor):
33.41 + return visitor.attribute(self)
33.42 +
33.43 +class AttributeValue(Token):
33.44 + def __init__(self, value, quote):
33.45 + self.value = value
33.46 + self.quote = quote
33.47 +
33.48 + def __repr__(self):
33.49 + return "%s(%r, %r)" % (self.__class__.__name__, self.value, self.quote)
33.50 +
33.51 + def visit(self, visitor):
33.52 + return visitor.attribute_value(self)
33.53 +
33.54 +class Comment(Token):
33.55 + def visit(self, visitor):
33.56 + return visitor.comment(self)
33.57 +
33.58 +class Directive(Token):
33.59 + def visit(self, visitor):
33.60 + return visitor.directive(self)
33.61 +
33.62 +class Inclusion(Token):
33.63 + def visit(self, visitor):
33.64 + return visitor.inclusion(self)
33.65 +
33.66 +class Tag(Token):
33.67 + def visit(self, visitor):
33.68 + return visitor.tag(self)
33.69 +
33.70 + def is_end(self):
33.71 + return self.value.startswith("/")
33.72 +
33.73 + def tag_name(self):
33.74 + return self.is_end() and self.value[1:] or self.value
33.75 +
33.76 +class TagClose:
33.77 + def visit(self, visitor):
33.78 + return visitor.tag_close(self)
33.79 +
33.80 + def __repr__(self):
33.81 + return "%s()" % self.__class__.__name__
33.82 +
33.83 +class Text(Token):
33.84 + def visit(self, visitor):
33.85 + return visitor.text(self)
33.86 +
33.87 +
33.88 +
33.89 +# Tidying visitor employing the spans from lexical partitioning.
33.90 +
33.91 +class Visitor:
33.92 + def __init__(self):
33.93 + self.queued = deque()
33.94 +
33.95 + def visit(self, span):
33.96 + return span.visit(self)
33.97 +
33.98 + # Specific handler methods.
33.99 +
33.100 + def between_tags(self, span):
33.101 + return Text(span.text)
33.102 +
33.103 + def in_comment(self, span):
33.104 + return Comment(span.text)
33.105 +
33.106 + def in_directive(self, span):
33.107 + return Directive(span.text)
33.108 +
33.109 + def in_inclusion(self, span):
33.110 + return Inclusion(span.text)
33.111 +
33.112 + def _queue_attributes(self, tokens):
33.113 + for token in tokens:
33.114 + self.queued.append(Attribute(token))
33.115 +
33.116 + def in_tag(self, span):
33.117 + tokens = span.text.split()
33.118 + self._queue_attributes(tokens[1:])
33.119 + return Tag(tokens[0])
33.120 +
33.121 + def at_end_of_tag(self, span):
33.122 + return TagClose()
33.123 +
33.124 + def after_attribute_value(self, span):
33.125 + tokens = span.text.split()
33.126 + self._queue_attributes(tokens)
33.127 + return self.queued.popleft()
33.128 +
33.129 + def at_attribute_value(self, span):
33.130 + tokens = span.text.split()
33.131 + self._queue_attributes(tokens[1:])
33.132 + return AttributeValue(tokens[0], "")
33.133 +
33.134 + def in_dq_attribute_value(self, span):
33.135 + return AttributeValue(span.text, '"')
33.136 +
33.137 + def in_sq_attribute_value(self, span):
33.138 + return AttributeValue(span.text, "'")
33.139 +
33.140 +
33.141 +
33.142 +# Tokenising.
33.143 +
33.144 +class Tokeniser:
33.145 + def __init__(self, text):
33.146 + self.lexer = Lexer(text)
33.147 + self.visitor = Visitor()
33.148 +
33.149 + def __iter__(self):
33.150 + return self
33.151 +
33.152 + def next(self):
33.153 + if self.visitor.queued:
33.154 + return self.visitor.queued.popleft()
33.155 +
33.156 + while 1:
33.157 + span = self.lexer.next()
33.158 + if not span.empty():
33.159 + break
33.160 +
33.161 + return self.visitor.visit(span)
33.162 +
33.163 +# vim: tabstop=4 expandtab shiftwidth=4
34.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
34.2 +++ b/moinformat/utils/htmlparse/tree.py Fri Aug 18 00:18:42 2023 +0200
34.3 @@ -0,0 +1,102 @@
34.4 +#!/usr/bin/env python
34.5 +
34.6 +"""
34.7 +HTML document nodes.
34.8 +
34.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
34.10 +
34.11 +This program is free software; you can redistribute it and/or modify it under
34.12 +the terms of the GNU General Public License as published by the Free Software
34.13 +Foundation; either version 3 of the License, or (at your option) any later
34.14 +version.
34.15 +
34.16 +This program is distributed in the hope that it will be useful, but WITHOUT
34.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
34.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
34.19 +details.
34.20 +
34.21 +You should have received a copy of the GNU General Public License along with
34.22 +this program. If not, see <http://www.gnu.org/licenses/>.
34.23 +"""
34.24 +
34.25 +# Element attributes.
34.26 +
34.27 +class Attribute:
34.28 + def __init__(self, name, value=None):
34.29 + self.name = name
34.30 + self.value = value
34.31 +
34.32 + def __repr__(self):
34.33 + return "%s(%r, %r)" % (self.__class__.__name__, self.name, self.value)
34.34 +
34.35 + def visit(self, visitor):
34.36 + return visitor.attribute(self)
34.37 +
34.38 +
34.39 +
34.40 +# Nodes containing other nodes.
34.41 +
34.42 +class Fragment:
34.43 + def __init__(self, nodes=None):
34.44 + self.nodes = nodes or []
34.45 +
34.46 + def __repr__(self):
34.47 + return "%s(%r)" % (self.__class__.__name__, self.nodes)
34.48 +
34.49 + def visit(self, visitor):
34.50 + return visitor.fragment(self)
34.51 +
34.52 +class Element(Fragment):
34.53 + def __init__(self, name, attributes=None, nodes=None):
34.54 + Fragment.__init__(self, nodes)
34.55 + self.name = name
34.56 + self.attributes = attributes or []
34.57 +
34.58 + def __repr__(self):
34.59 + return "%s(%r, %r, %r)" % (self.__class__.__name__, self.name, self.attributes, self.nodes)
34.60 +
34.61 + def visit(self, visitor):
34.62 + return visitor.element(self)
34.63 +
34.64 +
34.65 +
34.66 +# Nodes having values.
34.67 +
34.68 +class Node:
34.69 + def __init__(self, value):
34.70 + self.value = value
34.71 +
34.72 + def __repr__(self):
34.73 + return "%s(%r)" % (self.__class__.__name__, self.value)
34.74 +
34.75 + def visit(self, visitor):
34.76 + return visitor.node(self)
34.77 +
34.78 +class AttributeValue(Node):
34.79 + def __init__(self, value, quote):
34.80 + Node.__init__(self, value)
34.81 + self.quote = quote
34.82 +
34.83 + def __repr__(self):
34.84 + return "%s(%r, %r)" % (self.__class__.__name__, self.value, self.quote)
34.85 +
34.86 + def visit(self, visitor):
34.87 + return visitor.attribute_value(self)
34.88 +
34.89 +class Comment(Node):
34.90 + def visit(self, visitor):
34.91 + return visitor.comment(self)
34.92 +
34.93 +class Directive(Node):
34.94 + def visit(self, visitor):
34.95 + return visitor.directive(self)
34.96 +
34.97 +class Inclusion(Node):
34.98 + def visit(self, visitor):
34.99 + return visitor.inclusion(self)
34.100 +
34.101 +class Text(Node):
34.102 + def visit(self, visitor):
34.103 + return visitor.text(self)
34.104 +
34.105 +# vim: tabstop=4 expandtab shiftwidth=4
35.1 --- a/tests/test_parser.py Tue Jun 20 18:58:47 2023 +0200
35.2 +++ b/tests/test_parser.py Fri Aug 18 00:18:42 2023 +0200
35.3 @@ -1,5 +1,24 @@
35.4 #!/usr/bin/env python
35.5
35.6 +"""
35.7 +Test document parsing and serialisation.
35.8 +
35.9 +Copyright (C) 2017, 2018, 2019, 2023 Paul Boddie <paul@boddie.org.uk>
35.10 +
35.11 +This program is free software; you can redistribute it and/or modify it under
35.12 +the terms of the GNU General Public License as published by the Free Software
35.13 +Foundation; either version 3 of the License, or (at your option) any later
35.14 +version.
35.15 +
35.16 +This program is distributed in the hope that it will be useful, but WITHOUT
35.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
35.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
35.19 +details.
35.20 +
35.21 +You should have received a copy of the GNU General Public License along with
35.22 +this program. If not, see <http://www.gnu.org/licenses/>.
35.23 +"""
35.24 +
35.25 from os import listdir
35.26 from os.path import abspath, split
35.27 import sys
35.28 @@ -17,10 +36,12 @@
35.29
35.30 # Import specific objects.
35.31
35.32 -from moinformat import Metadata, make_input, make_output, make_parser, \
35.33 - make_serialiser, parse, serialise
35.34 +from moinformat import get_parser, Metadata, make_input, make_output, \
35.35 + make_parser, make_serialiser, parse, serialise
35.36 from moinformat.tree.moin import Container
35.37
35.38 +
35.39 +
35.40 def test_input(d, s):
35.41
35.42 "Compare serialised output from 'd' with its original form 's'."
35.43 @@ -61,7 +82,8 @@
35.44 metadata.set("output_format", "html")
35.45 metadata.set("mapping", {"MoinMoin" : "https://moinmo.in/"})
35.46
35.47 - print serialise(d, make_serialiser(metadata))
35.48 + result = serialise(d, make_serialiser(metadata))
35.49 + print output.encode(result)
35.50 print "-" * 60
35.51 print
35.52
35.53 @@ -69,7 +91,9 @@
35.54
35.55 def test_tree(d, t, ts):
35.56
35.57 - "Compare tree structure 'd' with simplified, expected form 't' from 'ts'."
35.58 + """
35.59 + Compare tree structure 'd' with simplified, expected form 't' from 'ts'.
35.60 + """
35.61
35.62 failing = t.test(d)
35.63
35.64 @@ -78,9 +102,12 @@
35.65
35.66 # Show tree versus expected forms.
35.67
35.68 + moin_prettyprinter = make_serialiser(Metadata({"input_format" : "moin"}), "pretty")
35.69 + tree_prettyprinter = make_serialiser(Metadata({"input_format" : "pretty"}), "pretty")
35.70 +
35.71 print not failing
35.72 print "-" * 60
35.73 - print d.prettyprint()
35.74 + print serialise(d, moin_prettyprinter)
35.75 if failing:
35.76 print "-" * 60
35.77 print ts
35.78 @@ -90,102 +117,14 @@
35.79 print repr(simple)
35.80 print repr(tree)
35.81 print "-" * 60
35.82 - print tree.prettyprint()
35.83 + print serialise(tree, tree_prettyprinter)
35.84 print "-" * 60
35.85 - print simple.prettyprint()
35.86 + print serialise(simple, tree_prettyprinter)
35.87 print "-" * 60
35.88 print
35.89
35.90 return not failing
35.91
35.92 -class Node:
35.93 -
35.94 - "A simplified tree node representation."
35.95 -
35.96 - def __init__(self, name):
35.97 - self.name = name
35.98 - self.nodes = []
35.99 -
35.100 - def __repr__(self):
35.101 - return "Node(%r, %r)" % (self.name, self.nodes)
35.102 -
35.103 - def prettyprint(self, indent=""):
35.104 - l = []
35.105 - l.append("%s%s%s" % (indent, self.name, len(self.nodes) and " nodes=%d" % len(self.nodes) or ""))
35.106 - for node in self.nodes:
35.107 - l.append(node.prettyprint(indent + " "))
35.108 - return "\n".join(l)
35.109 -
35.110 - def append(self, node):
35.111 - self.nodes.append(node)
35.112 -
35.113 - def test(self, other):
35.114 -
35.115 - """
35.116 - Test whether this node is considered equivalent to 'other', where
35.117 - 'other' is a moinparser.tree node.
35.118 -
35.119 - Return any failing tree nodes or None.
35.120 - """
35.121 -
35.122 - if other.__class__.__name__ != self.name:
35.123 - return self, other, "name"
35.124 -
35.125 - if isinstance(other, Container):
35.126 - for node, other_node in map(None, self.nodes, other.nodes):
35.127 - if node is None or other_node is None:
35.128 - return self, other, node is None and "simple" or "document"
35.129 - t = node.test(other_node)
35.130 - if t:
35.131 - return t
35.132 - elif self.nodes:
35.133 - return self, other, "empty"
35.134 -
35.135 - return None
35.136 -
35.137 -def parse_tree(s):
35.138 -
35.139 - "Parse the tree structure representation in 's'."
35.140 -
35.141 - indent = 0
35.142 - branches = []
35.143 -
35.144 - for line in s.split("\n"):
35.145 - line = line.rstrip()
35.146 - if not line:
35.147 - continue
35.148 -
35.149 - new_indent = line.rfind(" ") + 1
35.150 - node = Node(line[new_indent:])
35.151 -
35.152 - # Establish a branch to add nodes to.
35.153 -
35.154 - if not branches:
35.155 - branches.append(node)
35.156 - else:
35.157 - # Note the current node as outermost branch.
35.158 -
35.159 - if new_indent > indent:
35.160 - branches.append(node)
35.161 - else:
35.162 - # Reduced indent involves obtaining an inner branch again.
35.163 -
35.164 - while indent > new_indent:
35.165 - del branches[-1]
35.166 - indent -= 2
35.167 -
35.168 - # Note the current node as outermost branch.
35.169 -
35.170 - branches[-1] = node
35.171 -
35.172 - # Append the current node to the parent branch.
35.173 -
35.174 - branches[-2].append(node)
35.175 -
35.176 - indent = new_indent
35.177 -
35.178 - return branches[0]
35.179 -
35.180 def get_filename(filename):
35.181
35.182 "Using 'filename', return the core text filename and any encoding."
35.183 @@ -206,10 +145,14 @@
35.184
35.185 if input.dir.exists(tree_filename):
35.186 ts = input.readfile(tree_filename)
35.187 - return ts, parse_tree(ts)
35.188 + return ts, parse(ts, make_parser(Metadata(), "pretty"))
35.189 else:
35.190 return None, None
35.191
35.192 +
35.193 +
35.194 +# Main program.
35.195 +
35.196 if __name__ == "__main__":
35.197 args = sys.argv[1:]
35.198
35.199 @@ -222,7 +165,7 @@
35.200
35.201 -q Suppress test output, reporting only success or failure
35.202 --quiet Equivalent to -q
35.203 -"""
35.204 +""" % sys.argv[0]
35.205 sys.exit(1)
35.206
35.207 for arg in ["-q", "--quiet"]: