# HG changeset patch # User Paul Boddie # Date 1532615617 -7200 # Node ID b295c92a7933f05c4c70cc2d0d1b30083b9c082a # Parent 8f0697b0a53dea3c3589c9f5b2afd89de790afcb Introduced support for link translation and different linking schemes, initially supporting HTML-based linking. Added a page name option to the conversion script to control link translation. diff -r 8f0697b0a53d -r b295c92a7933 convert.py --- a/convert.py Tue Jul 24 23:36:13 2018 +0200 +++ b/convert.py Thu Jul 26 16:33:37 2018 +0200 @@ -10,6 +10,7 @@ l = filenames = [] formats = [] + pagenames = [] tree = False macros = False @@ -25,12 +26,18 @@ elif arg == "--macros": macros = True - # Switch to collecting formats + # Switch to collecting formats. elif arg == "--format": l = formats continue + # Switch to collecting page names. + + elif arg == "--pagename": + l = pagenames + continue + # Collect options and arguments. else: @@ -40,7 +47,12 @@ l = filenames + format = formats and formats[0] or "html" + + # Derive the page name from the filename if not specified. + filename = filenames[0] + pagename = pagenames and pagenames[0] or split(filename)[-1] f = open(filename) try: @@ -53,7 +65,7 @@ if tree: print d.prettyprint() else: - format = formats and formats[0] or "html" + p.translate_links(format, pagename) print serialise(d, get_serialiser(format)) finally: f.close() diff -r 8f0697b0a53d -r b295c92a7933 moinformat/links/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/links/__init__.py Thu Jul 26 16:33:37 2018 +0200 @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +""" +Linking scheme implementations. + +Copyright (C) 2018 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.links.manifest import linkers + +# Top-level functions. + +def get_linker(name): + + """ + Return the linking scheme handler with the given 'name' or None if no such + handler is found. + """ + + return linkers.get(name) + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 8f0697b0a53d -r b295c92a7933 moinformat/links/common.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/links/common.py Thu Jul 26 16:33:37 2018 +0200 @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +""" +Common linking scheme functionality. + +Copyright (C) 2018 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +class Linker: + + "Translate Moin links into other forms." + + def __init__(self, pagename, mapping=None): + + """ + Initialise the linker with the 'pagename' and optional interwiki + 'mapping'. + """ + + self.pagename = pagename + self.mapping = mapping or {} + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 8f0697b0a53d -r b295c92a7933 moinformat/links/html.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/links/html.py Thu Jul 26 16:33:37 2018 +0200 @@ -0,0 +1,140 @@ +#!/usr/bin/env python + +""" +HTML linking scheme. + +Copyright (C) 2018 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.links.common import Linker +from urlparse import urlparse + +class HTMLLinker(Linker): + + "Translate Moin links into HTML links." + + name = "html" + + def get_top_level(self): + + "Return a relative link to the top level." + + levels = self.pagename.count("/") + return "/".join([".."] * levels) + + def is_url(self, link): + + "Return whether the 'link' references a URL." + + scheme, host, path, params, query, fragment = urlparse(link.target) + return scheme + + def normalise(self, path): + + "Return a normalised form of 'path'." + + return not path.endswith("/") and "%s/" % path or path + + def translate(self, link): + + "Translate the 'link', rewriting the target." + + target = link.target.rstrip("/") + + # Sub-pages. + + if target.startswith("/"): + self.translate_subpage(link, target) + + # Sibling (of ancestor) pages. + + elif target.startswith("../"): + self.translate_relative(link, target) + + # Attachment or interwiki link. + + elif self.translate_qualified_link(link, target): + pass + + # Plain URL. + + elif self.is_url(link): + pass + + # Top-level pages. + + else: + top_level = self.get_top_level() + link.target = "%s%s" % (top_level and "%s/" % top_level or "", target) + + def translate_qualified_link(self, link, target): + + """ + Translate a possible qualified 'link', returning whether translation + occurred. + """ + + t = target.split(":", 1) + if len(t) != 2: + return False + + prefix, target = t + + # Attachment links. + + if prefix == "attachment": + self.translate_attachment(link, target) + return True + + # Interwiki links. + + url = self.mapping.get(prefix) + if url: + self.translate_interwiki(link, url, target) + return True + + return False + + # Specific link translators. + + def translate_attachment(self, link, target): + + "Update 'link' for the given attachment 'target'." + + link.target = "%sattachments/%s/%s" % ( + self.get_top_level(), self.pagename, target) + + def translate_interwiki(self, link, url, target): + + "Update 'link' for the given interwiki 'target'." + + link.target = "%s%s" % (self.normalise(url), target) + + def translate_relative(self, link, target): + + "Update 'link' for the given relative 'target'." + + link.target = target[len("../"):] + + def translate_subpage(self, link, target): + + "Update 'link' for the given subpage 'target'." + + link.target = ".%s" % target + +linker = HTMLLinker + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 8f0697b0a53d -r b295c92a7933 moinformat/links/manifest.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/links/manifest.py Thu Jul 26 16:33:37 2018 +0200 @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +""" +Linking scheme implementation manifest. + +Copyright (C) 2017, 2018 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.imports import get_extensions +from os.path import split + +reserved = ["__init__", "common", "manifest"] + +# Obtain details of this module's package. + +dirname = split(__file__)[0] +package = __name__.rsplit(".", 1)[0] + +# Define an attribute mapping names to modules. + +modules = {} +get_extensions(dirname, package, modules, reserved) + +# Obtain all linkers. + +linkers = {} + +# Use names declared in each handler to register the handlers: +# linker.name -> linker + +for module in modules.values(): + linkers[module.linker.name] = module.linker + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 8f0697b0a53d -r b295c92a7933 moinformat/parsers/moin.py --- a/moinformat/parsers/moin.py Tue Jul 24 23:36:13 2018 +0200 +++ b/moinformat/parsers/moin.py Thu Jul 26 16:33:37 2018 +0200 @@ -19,11 +19,23 @@ this program. If not, see . """ +# Document transformations. + +from moinformat.links import get_linker from moinformat.macros import get_macro + +# Parser functionality and pattern definition. + from moinformat.parsers.common import ParserBase, get_patterns, \ excl, expect, group, optional, recur, \ repeat + +# Serialisation. + from moinformat.serialisers import serialise + +# Document tree nodes. + from moinformat.tree.moin import Break, DefItem, DefTerm, FontStyle, Heading, \ Larger, Link, List, ListItem, Macro, \ Monospace, Region, Rule, Smaller, \ @@ -53,9 +65,10 @@ ParserBase.__init__(self, default_formats, root) - # Record macro occurrences for later evaluation. + # Record certain node occurrences for later evaluation. self.macros = [] + self.links = [] # Principal parser methods. @@ -106,6 +119,30 @@ macro = macro_cls(node, self.region) macro.evaluate() + # Link translation. + + def translate_links(self, scheme, name, base=""): + + """ + Translate the link nodes in the document for the given 'scheme' and + employing the given document 'name' and 'base'. + """ + + # Obtain a class for the named linker. + + linker_cls = get_linker(scheme) + if not linker_cls: + return + + # Instantiate the class with document metadata. + + linker = linker_cls(name, base) + + for node in self.links: + + # Translate the link. + + linker.translate(node) # Parser methods supporting different page features. @@ -480,6 +517,10 @@ link = Link(text and [Text(text)], target) region.append_inline(link) + # Record the link for later processing. + + self.root.links.append(link) + def parse_macro(self, region): name = self.match_group("name") args = self.match_group("args") diff -r 8f0697b0a53d -r b295c92a7933 tests/test_links.tree --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_links.tree Thu Jul 26 16:33:37 2018 +0200 @@ -0,0 +1,24 @@ +Region + Block + Text + Link + Text + Text + Link + Text + Text + Link + Text + Text + Link + Text + Text + Link + Text + Text + Link + Text + Text + Link + Text + Text diff -r 8f0697b0a53d -r b295c92a7933 tests/test_links.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_links.txt Thu Jul 26 16:33:37 2018 +0200 @@ -0,0 +1,3 @@ +Links: [[TopLevel|top-level]], [[/SubPage|sub-page]], [[/Sub/SubPage|sub-sub-page]], +[[../Sibling|sibling]], [[../../ParentSibling|sibling of parent]], +[[http://www.python.org/|URL]], [[attachment:image.png|attachment]].