# HG changeset patch # User Paul Boddie # Date 1555106865 -7200 # Node ID 90e7ce5b6cefb8c607b78bd131bb0d108faf0c7a # Parent 2149d41d53561f03d24db91873f8c4c886a71a39 Separated link target parsing from translation; added a link target abstraction. Changed HTML link translation to work with this new abstraction. Added aggregation of link targets in the parser for further processing. diff -r 2149d41d5356 -r 90e7ce5b6cef moinformat/links/common.py --- a/moinformat/links/common.py Sat Apr 13 00:04:21 2019 +0200 +++ b/moinformat/links/common.py Sat Apr 13 00:07:45 2019 +0200 @@ -23,13 +23,19 @@ "A link abstraction." - def __init__(self, target, label, type): + def __init__(self, target, label, link_target=None): - "Initialise the link with the given 'target', 'label' and 'type'." + """ + Initialise the link with the given 'target' and 'label' and + 'link_target' object. + """ self.target = target self.label = label - self.type = type + self.link_target = link_target + + def __repr__(self): + return "Link(%r, %r, %r)" % (self.target, self.label, self.link_target) def get_target(self): return self.target @@ -37,8 +43,8 @@ def get_label(self): return self.label or self.target - def get_type(self): - return self.type + def get_link_target(self): + return self.link_target class Linker: diff -r 2149d41d5356 -r 90e7ce5b6cef moinformat/links/html.py --- a/moinformat/links/html.py Sat Apr 13 00:04:21 2019 +0200 +++ b/moinformat/links/html.py Sat Apr 13 00:07:45 2019 +0200 @@ -21,7 +21,6 @@ from moinformat.links.common import Link, Linker, resolve from urllib import quote, quote_plus -from urlparse import urlparse class HTMLLinker(Linker): @@ -45,13 +44,6 @@ levels = pagename.count("/") + 1 return "/".join([".."] * levels) - def is_url(self, target): - - "Return whether the 'target' references a URL." - - scheme, host, path, params, query, fragment = urlparse(target) - return scheme and target or None - def normalise(self, path): "Return a normalised form of 'path'." @@ -61,51 +53,51 @@ def translate(self, target): """ - Translate the 'target', returning a tuple containing the rewritten - target string and a suitable default label. + Translate the 'target', returning a link object containing the rewritten + target and a suitable default label. """ - target = target.rstrip("/") + identifier = target.get_identifier() + text = target.get_text() + type = target.get_type() - # Fragments. Remove the leading hash for the label. + # Fragments. - if target.startswith("#"): - return Link(self.quote(target), target.lstrip("#"), "fragment") + if type == "fragment": + return Link(self.quote(text), identifier, target) # Sub-pages. Remove the leading slash for the label. - if target.startswith("/"): - return Link(self.translate_pagename(target), target.lstrip("/"), "page") + if type == "sub-page": + return Link(self.translate_pagename(text), identifier, target) # Sibling (of ancestor) pages. - if target.startswith("../"): - return Link(self.translate_pagename(target), None, "page") - - # Attachment or interwiki link. - - rewritten = self.translate_qualified_link(target) - if rewritten: - return rewritten # includes label + if type == "sibling-page": + return Link(self.translate_pagename(text), identifier, target) # Plain URL. - rewritten = self.is_url(target) - if rewritten: - return Link(rewritten, None, "url") + if type == "url": + return Link(text, identifier, target) # Top-level pages. - return Link(self.translate_pagename(target), None, "page") + if type == "page": + return Link(self.translate_pagename(text), identifier, target) + + # Attachment or interwiki link. - def translate_pagename(self, target): + return self.translate_qualified_link(target) - "Translate the pagename in 'target'." + def translate_pagename(self, text): + + "Translate the pagename in 'text'." # Obtain the target pagename and the fragment. # Split the pagename into path components. - t = target.split("#", 1) + t = text.split("#", 1) # Determine the actual pagename referenced. # Replace the root pagename if it appears. @@ -136,22 +128,20 @@ Return None if the link is not suitable. """ - t = target.split(":", 1) - if len(t) != 2: - return None - - prefix, target = t + identifier = target.get_identifier() + text = target.get_text() + type = target.get_type() # Attachment links. - if prefix == "attachment": - return Link(self.translate_attachment(target), target, "attachment") + if type == "attachment": + return Link(self.translate_attachment(identifier), identifier, target) # Interwiki links. - url = self.mapping.get(prefix) + url = self.mapping.get(type) if url: - return Link(self.translate_interwiki(url, target), target, "interwiki") + return Link(self.translate_interwiki(url, identifier), identifier, target) return None diff -r 2149d41d5356 -r 90e7ce5b6cef moinformat/parsers/moin.py --- a/moinformat/parsers/moin.py Sat Apr 13 00:04:21 2019 +0200 +++ b/moinformat/parsers/moin.py Sat Apr 13 00:07:45 2019 +0200 @@ -44,6 +44,10 @@ TableCell, TableRow, Text, Transclusion, \ Underline, Verbatim +# Link parsing. + +from moinformat.utils.links import parse_link_target + join = "".join class MoinParser(ParserBase): @@ -69,6 +73,10 @@ self.headings = [] + # Record link targets for resource identification. + + self.link_targets = [] + # Principal parser methods. def parse(self, s): @@ -565,7 +573,13 @@ target = self.match_group("target") end = self.match_group("end") - span = cls([], target) + # Obtain an object for the link target. + + link_target = parse_link_target(target, self.metadata) + + # Obtain an object for the node. + + span = cls([], link_target) # Obtain the extra details. @@ -586,6 +600,10 @@ region.append_inline(span) + # Record the link target for later processing. + + self.root.link_targets.append(link_target) + def parse_link(self, region): self._parse_link(region, Link, self.link_pattern_names) diff -r 2149d41d5356 -r 90e7ce5b6cef moinformat/utils/links.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/utils/links.py Sat Apr 13 00:07:45 2019 +0200 @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +""" +Link target parsing. + +Copyright (C) 2018, 2019 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from urlparse import urlparse + +class LinkTarget: + + "A link target abstraction." + + def __init__(self, type, text, identifier=None): + + "Initialise the link with the given 'type', 'text' and 'identifier'." + + self.type = type + self.text = text + self.identifier = identifier + + def __repr__(self): + return "LinkTarget(%r, %r, %r)" % (self.type, self.text, self.identifier) + + def __str__(self): + return self.text + + __unicode__ = __str__ + + def get_identifier(self): + return self.identifier or self.text + + def get_text(self): + return self.text + + def get_type(self): + return self.type + +# Parsing and recognition functions. + +def is_url(target): + + "Return whether the 'target' references a URL." + + scheme, host, path, params, query, fragment = urlparse(target) + return scheme and target or None + +def parse_link_target(target, metadata=None): + + """ + Parse a link 'target', returning a link target object. Use any 'metadata' + to identify certain link types. + """ + + # Fragments. + + if target.startswith("#"): + return LinkTarget("fragment", target, target.lstrip("#")) + + # Sub-pages. + + if target.startswith("/"): + return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/")) + + # Sibling (of ancestor) pages. + + if target.startswith("../"): + return LinkTarget("sibling-page", target, target.rstrip("/")) + + # Attachment or interwiki link. + + result = parse_qualified_link_target(target, metadata) + if result: + return result + + # Plain URL. + + if is_url(target): + return LinkTarget("url", target) + + # Top-level pages. + + return LinkTarget("page", target) + +def parse_qualified_link_target(target, metadata=None): + + """ + Parse a possible qualified link 'target', returning a link target object or + None if the target is not suitable. Use any 'metadata' to identify certain + link types. + """ + + t = target.split(":", 1) + + if len(t) != 2: + return None + + prefix, identifier = t + + mapping = metadata and metadata.get("mapping") + + if prefix == "attachment" or mapping and mapping.get(prefix): + return LinkTarget(prefix, target, identifier) + + return None + +# vim: tabstop=4 expandtab shiftwidth=4