1 #!/usr/bin/env python 2 3 """ 4 HTML linking scheme. 5 6 Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from moinformat.links.common import Linker, resolve 23 from urllib import quote, quote_plus 24 from urlparse import urlparse 25 26 class HTMLLinker(Linker): 27 28 "Translate Moin links into HTML links." 29 30 name = "html" 31 32 def get_top_level(self): 33 34 "Return a relative link to the top level." 35 36 # The root page is at the top level already. 37 38 pagename = self.metadata.get("pagename", "") 39 40 if pagename == self.root_pagename: 41 return "" 42 43 # Siblings of the root page are actually one level below. 44 45 levels = pagename.count("/") + 1 46 return "/".join([".."] * levels) 47 48 def is_url(self, target): 49 50 "Return whether the 'target' references a URL." 51 52 scheme, host, path, params, query, fragment = urlparse(target) 53 return scheme and target or None 54 55 def normalise(self, path): 56 57 "Return a normalised form of 'path'." 58 59 return not path.endswith("/") and "%s/" % path or path 60 61 def translate(self, target): 62 63 """ 64 Translate the 'target', returning a tuple containing the rewritten 65 target string and a suitable default label. 66 """ 67 68 target = target.rstrip("/") 69 70 # Fragments. Remove the leading hash for the label. 71 72 if target.startswith("#"): 73 return self.quote(target), target.lstrip("#") 74 75 # Sub-pages. Remove the leading slash for the label. 76 77 if target.startswith("/"): 78 return self.translate_pagename(target), target.lstrip("/") 79 80 # Sibling (of ancestor) pages. 81 82 if target.startswith("../"): 83 return self.translate_pagename(target), None 84 85 # Attachment or interwiki link. 86 87 rewritten = self.translate_qualified_link(target) 88 if rewritten: 89 return rewritten # includes label 90 91 # Plain URL. 92 93 rewritten = self.is_url(target) 94 if rewritten: 95 return rewritten, None 96 97 # Top-level pages. 98 99 return self.translate_pagename(target), None 100 101 def translate_pagename(self, target): 102 103 "Translate the pagename in 'target'." 104 105 # Obtain the target pagename and the fragment. 106 # Split the pagename into path components. 107 108 t = target.split("#", 1) 109 110 # Determine the actual pagename referenced. 111 # Replace the root pagename if it appears. 112 113 pagename = self.metadata.get("pagename", "") 114 resolved = resolve(t[0], pagename, self.root_pagename) 115 116 # Rewrite the target using a relative link to the top level and then the 117 # resolved pagename. 118 119 top_level = self.get_top_level() 120 121 # Support an explicit "DocumentIndex" filename for file browsing. 122 123 document_index = self.metadata.get("document_index") 124 125 t[0] = "%s%s%s" % (top_level and "%s/" % top_level or "", resolved, 126 document_index and "/%s" % document_index or "") 127 128 return self.quote("#".join(t)) 129 130 def translate_qualified_link(self, target): 131 132 """ 133 Translate a possible qualified link 'target', returning a tuple 134 containing a rewritten target and a suitable default label. 135 136 Return None if the link is not suitable. 137 """ 138 139 t = target.split(":", 1) 140 if len(t) != 2: 141 return None 142 143 prefix, target = t 144 145 # Attachment links. 146 147 if prefix == "attachment": 148 return self.translate_attachment(target), target 149 150 # Interwiki links. 151 152 url = self.mapping.get(prefix) 153 if url: 154 return self.translate_interwiki(url, target), target 155 156 return None 157 158 # Specific link translators. 159 160 def translate_attachment(self, target): 161 162 "Return a translation of the given attachment 'target'." 163 164 return self.quote("./attachments/%s" % target) 165 166 def translate_interwiki(self, url, target): 167 168 "Return a translation of the given interwiki 'target'." 169 170 return "%s%s" % (self.normalise(url), self.quote(target)) 171 172 # Path encoding. 173 174 def quote(self, s): 175 176 """ 177 Quote URL path 's', preserving path separators and fragment indicators, 178 encoding fragment identifiers. 179 """ 180 181 s = self.replace_whitespace(s) 182 parts = s.split("#", 1) 183 184 if len(parts) > 1: 185 parts[1] = self.make_id(parts[1]) 186 187 return "#".join(map(quote, parts)) 188 189 # Whitespace conversion in pagenames. 190 191 def replace_whitespace(self, pagename): 192 193 "Map whitespace in 'pagename' to appropriate characters." 194 195 wsmap = self.metadata.get("whitespace", self.default_whitespace_map) 196 197 for old, new in wsmap: 198 pagename = pagename.replace(old, new) 199 200 return pagename 201 202 # Identifier encoding. 203 204 def make_id(self, s): 205 206 "Make a suitable identifier for HTML element identification." 207 208 # NOTE: This reproduces the Moin algorithm for compatibility. 209 # NOTE: There may well be improvements possible, possibly by replacing plus 210 # NOTE: with something less cumbersome, even though plus may be unusual in 211 # NOTE: things like headings, anyway. 212 213 # The desired output is the following pattern: 214 215 # [A-Za-z][-_:.A-Za-z0-9]* 216 217 # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an 218 # output range as follows (in addition to A-Za-z0-9): 219 220 # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} 221 222 # The quote_plus function converts space to plus, preserves -_:. and encodes 223 # all other symbols (including original occurrences of plus and percent) and 224 # non-alphanumeric (ASCII) characters using percent encoding. 225 226 # With colons preserved, the resulting output is in the following range 227 # (in addition to A-Za-z0-9): 228 229 # -_:.%+ 230 231 # Percent will only occur as an encoding prefix. Plus will only occur as a 232 # replacement for space. 233 234 # Combining quote_plus and UTF-7 gives the following range (in addition to 235 # A-Za-z0-9): 236 237 # -_:.%+ 238 239 # Examples: 240 241 # UTF-7 quote_plus replace percent and plus 242 # : -> : -> : -> : 243 # - -> - -> - -> - 244 # . -> . -> . -> . 245 # % -> % -> %25 -> .25 246 # + -> +- -> %2B- -> .2B- 247 # _ -> _ -> _ -> _ 248 # space -> space -> + -> _ 249 250 # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode 251 252 quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") 253 254 # Ensure that the identifier starts with an alphabetical character. 255 256 if not quoted[0].isalpha(): 257 return "A%s" % quoted 258 else: 259 return quoted 260 261 linker = HTMLLinker 262 263 # vim: tabstop=4 expandtab shiftwidth=4