# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1555106865 -7200
# Node ID 90e7ce5b6cefb8c607b78bd131bb0d108faf0c7a
# Parent  2149d41d53561f03d24db91873f8c4c886a71a39
Separated link target parsing from translation; added a link target abstraction.
Changed HTML link translation to work with this new abstraction.
Added aggregation of link targets in the parser for further processing.

diff -r 2149d41d5356 -r 90e7ce5b6cef moinformat/links/common.py
--- a/moinformat/links/common.py	Sat Apr 13 00:04:21 2019 +0200
+++ b/moinformat/links/common.py	Sat Apr 13 00:07:45 2019 +0200
@@ -23,13 +23,19 @@
 
     "A link abstraction."
 
-    def __init__(self, target, label, type):
+    def __init__(self, target, label, link_target=None):
 
-        "Initialise the link with the given 'target', 'label' and 'type'."
+        """
+        Initialise the link with the given 'target' and 'label' and
+        'link_target' object.
+        """
 
         self.target = target
         self.label = label
-        self.type = type
+        self.link_target = link_target
+
+    def __repr__(self):
+        return "Link(%r, %r, %r)" % (self.target, self.label, self.link_target)
 
     def get_target(self):
         return self.target
@@ -37,8 +43,8 @@
     def get_label(self):
         return self.label or self.target
 
-    def get_type(self):
-        return self.type
+    def get_link_target(self):
+        return self.link_target
 
 class Linker:
 
diff -r 2149d41d5356 -r 90e7ce5b6cef moinformat/links/html.py
--- a/moinformat/links/html.py	Sat Apr 13 00:04:21 2019 +0200
+++ b/moinformat/links/html.py	Sat Apr 13 00:07:45 2019 +0200
@@ -21,7 +21,6 @@
 
 from moinformat.links.common import Link, Linker, resolve
 from urllib import quote, quote_plus
-from urlparse import urlparse
 
 class HTMLLinker(Linker):
 
@@ -45,13 +44,6 @@
         levels = pagename.count("/") + 1
         return "/".join([".."] * levels)
 
-    def is_url(self, target):
-
-        "Return whether the 'target' references a URL."
-
-        scheme, host, path, params, query, fragment = urlparse(target)
-        return scheme and target or None
-
     def normalise(self, path):
 
         "Return a normalised form of 'path'."
@@ -61,51 +53,51 @@
     def translate(self, target):
 
         """
-        Translate the 'target', returning a tuple containing the rewritten
-        target string and a suitable default label.
+        Translate the 'target', returning a link object containing the rewritten
+        target and a suitable default label.
         """
 
-        target = target.rstrip("/")
+        identifier = target.get_identifier()
+        text = target.get_text()
+        type = target.get_type()
 
-        # Fragments. Remove the leading hash for the label.
+        # Fragments.
 
-        if target.startswith("#"):
-            return Link(self.quote(target), target.lstrip("#"), "fragment")
+        if type == "fragment":
+            return Link(self.quote(text), identifier, target)
 
         # Sub-pages. Remove the leading slash for the label.
 
-        if target.startswith("/"):
-            return Link(self.translate_pagename(target), target.lstrip("/"), "page")
+        if type == "sub-page":
+            return Link(self.translate_pagename(text), identifier, target)
 
         # Sibling (of ancestor) pages.
 
-        if target.startswith("../"):
-            return Link(self.translate_pagename(target), None, "page")
-
-        # Attachment or interwiki link.
-
-        rewritten = self.translate_qualified_link(target)
-        if rewritten:
-            return rewritten # includes label
+        if type == "sibling-page":
+            return Link(self.translate_pagename(text), identifier, target)
 
         # Plain URL.
 
-        rewritten = self.is_url(target)
-        if rewritten:
-            return Link(rewritten, None, "url")
+        if type == "url":
+            return Link(text, identifier, target)
 
         # Top-level pages.
 
-        return Link(self.translate_pagename(target), None, "page")
+        if type == "page":
+            return Link(self.translate_pagename(text), identifier, target)
+
+        # Attachment or interwiki link.
 
-    def translate_pagename(self, target):
+        return self.translate_qualified_link(target)
 
-        "Translate the pagename in 'target'."
+    def translate_pagename(self, text):
+
+        "Translate the pagename in 'text'."
 
         # Obtain the target pagename and the fragment.
         # Split the pagename into path components.
 
-        t = target.split("#", 1)
+        t = text.split("#", 1)
 
         # Determine the actual pagename referenced.
         # Replace the root pagename if it appears.
@@ -136,22 +128,20 @@
         Return None if the link is not suitable.
         """
 
-        t = target.split(":", 1)
-        if len(t) != 2:
-            return None
-
-        prefix, target = t
+        identifier = target.get_identifier()
+        text = target.get_text()
+        type = target.get_type()
 
         # Attachment links.
 
-        if prefix == "attachment":
-            return Link(self.translate_attachment(target), target, "attachment")
+        if type == "attachment":
+            return Link(self.translate_attachment(identifier), identifier, target)
 
         # Interwiki links.
 
-        url = self.mapping.get(prefix)
+        url = self.mapping.get(type)
         if url:
-            return Link(self.translate_interwiki(url, target), target, "interwiki")
+            return Link(self.translate_interwiki(url, identifier), identifier, target)
 
         return None
 
diff -r 2149d41d5356 -r 90e7ce5b6cef moinformat/parsers/moin.py
--- a/moinformat/parsers/moin.py	Sat Apr 13 00:04:21 2019 +0200
+++ b/moinformat/parsers/moin.py	Sat Apr 13 00:07:45 2019 +0200
@@ -44,6 +44,10 @@
                                  TableCell, TableRow, Text, Transclusion, \
                                  Underline, Verbatim
 
+# Link parsing.
+
+from moinformat.utils.links import parse_link_target
+
 join = "".join
 
 class MoinParser(ParserBase):
@@ -69,6 +73,10 @@
 
         self.headings = []
 
+        # Record link targets for resource identification.
+
+        self.link_targets = []
+
     # Principal parser methods.
 
     def parse(self, s):
@@ -565,7 +573,13 @@
         target = self.match_group("target")
         end = self.match_group("end")
 
-        span = cls([], target)
+        # Obtain an object for the link target.
+
+        link_target = parse_link_target(target, self.metadata)
+
+        # Obtain an object for the node.
+
+        span = cls([], link_target)
 
         # Obtain the extra details.
 
@@ -586,6 +600,10 @@
 
         region.append_inline(span)
 
+        # Record the link target for later processing.
+
+        self.root.link_targets.append(link_target)
+
     def parse_link(self, region):
         self._parse_link(region, Link, self.link_pattern_names)
 
diff -r 2149d41d5356 -r 90e7ce5b6cef moinformat/utils/links.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/utils/links.py	Sat Apr 13 00:07:45 2019 +0200
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+
+"""
+Link target parsing.
+
+Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from urlparse import urlparse
+
+class LinkTarget:
+
+    "A link target abstraction."
+
+    def __init__(self, type, text, identifier=None):
+
+        "Initialise the link with the given 'type', 'text' and 'identifier'."
+
+        self.type = type
+        self.text = text
+        self.identifier = identifier
+
+    def __repr__(self):
+        return "LinkTarget(%r, %r, %r)" % (self.type, self.text, self.identifier)
+
+    def __str__(self):
+        return self.text
+
+    __unicode__ = __str__
+
+    def get_identifier(self):
+        return self.identifier or self.text
+
+    def get_text(self):
+        return self.text
+
+    def get_type(self):
+        return self.type
+
+# Parsing and recognition functions.
+
+def is_url(target):
+
+    "Return whether the 'target' references a URL."
+
+    scheme, host, path, params, query, fragment = urlparse(target)
+    return scheme and target or None
+
+def parse_link_target(target, metadata=None):
+
+    """
+    Parse a link 'target', returning a link target object. Use any 'metadata'
+    to identify certain link types.
+    """
+
+    # Fragments.
+
+    if target.startswith("#"):
+        return LinkTarget("fragment", target, target.lstrip("#"))
+
+    # Sub-pages.
+
+    if target.startswith("/"):
+        return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/"))
+
+    # Sibling (of ancestor) pages.
+
+    if target.startswith("../"):
+        return LinkTarget("sibling-page", target, target.rstrip("/"))
+
+    # Attachment or interwiki link.
+
+    result = parse_qualified_link_target(target, metadata)
+    if result:
+        return result 
+
+    # Plain URL.
+
+    if is_url(target):
+        return LinkTarget("url", target)
+
+    # Top-level pages.
+
+    return LinkTarget("page", target)
+
+def parse_qualified_link_target(target, metadata=None):
+
+    """
+    Parse a possible qualified link 'target', returning a link target object or
+    None if the target is not suitable. Use any 'metadata' to identify certain
+    link types.
+    """
+
+    t = target.split(":", 1)
+
+    if len(t) != 2:
+        return None
+
+    prefix, identifier = t
+
+    mapping = metadata and metadata.get("mapping")
+
+    if prefix == "attachment" or mapping and mapping.get(prefix):
+        return LinkTarget(prefix, target, identifier)
+
+    return None
+
+# vim: tabstop=4 expandtab shiftwidth=4