MoinLight (file moinformat/links/html.py at 969d5eb58473)

     1 #!/usr/bin/env python     2      3 """     4 HTML linking scheme.     5      6 Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.links.common import Linker, resolve    23 from urllib import quote, quote_plus    24 from urlparse import urlparse    25     26 class HTMLLinker(Linker):    27     28     "Translate Moin links into HTML links."    29     30     name = "html"    31     32     def get_top_level(self):    33     34         "Return a relative link to the top level."    35     36         # The root page is at the top level already.    37     38         pagename = self.metadata.get("pagename", "")    39     40         if pagename == self.root_pagename:    41             return ""    42     43         # Siblings of the root page are actually one level below.    44     45         levels = pagename.count("/") + 1    46         return "/".join([".."] * levels)    47     48     def is_url(self, target):    49     50         "Return whether the 'target' references a URL."    51     52         scheme, host, path, params, query, fragment = urlparse(target)    53         return scheme and target or None    54     55     def normalise(self, path):    56     57         "Return a normalised form of 'path'."    58     59         return not path.endswith("/") and "%s/" % path or path    60     61     def translate(self, target):    62     63         """    64         Translate the 'target', returning a tuple containing the rewritten    65         target string and a suitable default label.    66         """    67     68         target = target.rstrip("/")    69     70         # Fragments. Remove the leading hash for the label.    71     72         if target.startswith("#"):    73             return self.quote(target), target.lstrip("#")    74     75         # Sub-pages. Remove the leading slash for the label.    76     77         if target.startswith("/"):    78             return self.translate_pagename(target), target.lstrip("/")    79     80         # Sibling (of ancestor) pages.    81     82         if target.startswith("../"):    83             return self.translate_pagename(target), None    84     85         # Attachment or interwiki link.    86     87         rewritten = self.translate_qualified_link(target)    88         if rewritten:    89             return rewritten # includes label    90     91         # Plain URL.    92     93         rewritten = self.is_url(target)    94         if rewritten:    95             return rewritten, None    96     97         # Top-level pages.    98     99         return self.translate_pagename(target), None   100    101     def translate_pagename(self, target):   102    103         "Translate the pagename in 'target'."   104    105         # Obtain the target pagename and the fragment.   106         # Split the pagename into path components.   107    108         t = target.split("#", 1)   109    110         # Determine the actual pagename referenced.   111         # Replace the root pagename if it appears.   112    113         pagename = self.metadata.get("pagename", "")   114         resolved = resolve(t[0], pagename, self.root_pagename)   115    116         # Rewrite the target using a relative link to the top level and then the   117         # resolved pagename.   118    119         top_level = self.get_top_level()   120    121         # Support an explicit "DocumentIndex" filename for file browsing.   122    123         document_index = self.metadata.get("document_index")   124    125         t[0] = "%s%s%s" % (top_level and "%s/" % top_level or "", resolved,   126                            document_index and "/%s" % document_index or "")   127    128         return self.quote("#".join(t))   129    130     def translate_qualified_link(self, target):   131    132         """   133         Translate a possible qualified link 'target', returning a tuple   134         containing a rewritten target and a suitable default label.   135    136         Return None if the link is not suitable.   137         """   138    139         t = target.split(":", 1)   140         if len(t) != 2:   141             return None   142    143         prefix, target = t   144    145         # Attachment links.   146    147         if prefix == "attachment":   148             return self.translate_attachment(target), target   149    150         # Interwiki links.   151    152         url = self.mapping.get(prefix)   153         if url:   154             return self.translate_interwiki(url, target), target   155    156         return None   157    158     # Specific link translators.   159    160     def translate_attachment(self, target):   161    162         "Return a translation of the given attachment 'target'."   163    164         return self.quote("./attachments/%s" % target)   165    166     def translate_interwiki(self, url, target):   167    168         "Return a translation of the given interwiki 'target'."   169    170         return "%s%s" % (self.normalise(url), self.quote(target))   171    172     # Path encoding.   173    174     def quote(self, s):   175    176         """   177         Quote URL path 's', preserving path separators and fragment indicators,   178         encoding fragment identifiers.   179         """   180    181         s = self.replace_whitespace(s)   182         parts = s.split("#", 1)   183    184         if len(parts) > 1:   185             parts[1] = self.make_id(parts[1])   186    187         return "#".join(map(quote, parts))   188    189     # Whitespace conversion in pagenames.   190    191     def replace_whitespace(self, pagename):   192    193         "Map whitespace in 'pagename' to appropriate characters."   194    195         wsmap = self.metadata.get("whitespace", self.default_whitespace_map)   196    197         for old, new in wsmap:   198             pagename = pagename.replace(old, new)   199    200         return pagename   201    202     # Identifier encoding.   203    204     def make_id(self, s):   205    206         "Make a suitable identifier for HTML element identification."   207    208         # NOTE: This reproduces the Moin algorithm for compatibility.   209         # NOTE: There may well be improvements possible, possibly by replacing plus   210         # NOTE: with something less cumbersome, even though plus may be unusual in   211         # NOTE: things like headings, anyway.   212    213         # The desired output is the following pattern:   214    215         # [A-Za-z][-_:.A-Za-z0-9]*   216    217         # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an   218         # output range as follows (in addition to A-Za-z0-9):   219    220         # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|}   221    222         # The quote_plus function converts space to plus, preserves -_:. and encodes   223         # all other symbols (including original occurrences of plus and percent) and   224         # non-alphanumeric (ASCII) characters using percent encoding.   225    226         # With colons preserved, the resulting output is in the following range   227         # (in addition to A-Za-z0-9):   228    229         # -_:.%+   230    231         # Percent will only occur as an encoding prefix. Plus will only occur as a   232         # replacement for space.   233    234         # Combining quote_plus and UTF-7 gives the following range (in addition to   235         # A-Za-z0-9):   236    237         # -_:.%+   238    239         # Examples:   240    241         #          UTF-7         quote_plus    replace percent and plus   242         # :     -> :          -> :          -> :   243         # -     -> -          -> -          -> -   244         # .     -> .          -> .          -> .   245         # %     -> %          -> %25        -> .25   246         # +     -> +-         -> %2B-       -> .2B-   247         # _     -> _          -> _          -> _   248         # space -> space      -> +          -> _   249    250         # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode   251    252         quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_")   253    254         # Ensure that the identifier starts with an alphabetical character.   255    256         if not quoted[0].isalpha():   257             return "A%s" % quoted   258         else:   259             return quoted   260    261 linker = HTMLLinker   262    263 # vim: tabstop=4 expandtab shiftwidth=4