paul@91 | 1 | #!/usr/bin/env python |
paul@91 | 2 | |
paul@91 | 3 | """ |
paul@91 | 4 | HTML linking scheme. |
paul@91 | 5 | |
paul@319 | 6 | Copyright (C) 2018, 2019, 2022 Paul Boddie <paul@boddie.org.uk> |
paul@91 | 7 | |
paul@91 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@91 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@91 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@91 | 11 | version. |
paul@91 | 12 | |
paul@91 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@91 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@91 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@91 | 16 | details. |
paul@91 | 17 | |
paul@91 | 18 | You should have received a copy of the GNU General Public License along with |
paul@91 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@91 | 20 | """ |
paul@91 | 21 | |
paul@214 | 22 | from moinformat.links.common import Link, Linker, resolve |
paul@128 | 23 | from urllib import quote, quote_plus |
paul@91 | 24 | |
paul@91 | 25 | class HTMLLinker(Linker): |
paul@91 | 26 | |
paul@91 | 27 | "Translate Moin links into HTML links." |
paul@91 | 28 | |
paul@91 | 29 | name = "html" |
paul@91 | 30 | |
paul@91 | 31 | def get_top_level(self): |
paul@91 | 32 | |
paul@91 | 33 | "Return a relative link to the top level." |
paul@91 | 34 | |
paul@159 | 35 | # The root page is at the top level already. |
paul@159 | 36 | |
paul@165 | 37 | pagename = self.metadata.get("pagename", "") |
paul@165 | 38 | |
paul@165 | 39 | if pagename == self.root_pagename: |
paul@159 | 40 | return "" |
paul@159 | 41 | |
paul@159 | 42 | # Siblings of the root page are actually one level below. |
paul@159 | 43 | |
paul@165 | 44 | levels = pagename.count("/") + 1 |
paul@91 | 45 | return "/".join([".."] * levels) |
paul@91 | 46 | |
paul@91 | 47 | def normalise(self, path): |
paul@91 | 48 | |
paul@91 | 49 | "Return a normalised form of 'path'." |
paul@91 | 50 | |
paul@91 | 51 | return not path.endswith("/") and "%s/" % path or path |
paul@91 | 52 | |
paul@93 | 53 | def translate(self, target): |
paul@91 | 54 | |
paul@118 | 55 | """ |
paul@222 | 56 | Translate the 'target', returning a link object containing the rewritten |
paul@222 | 57 | target and a suitable default label. |
paul@118 | 58 | """ |
paul@91 | 59 | |
paul@222 | 60 | identifier = target.get_identifier() |
paul@222 | 61 | text = target.get_text() |
paul@222 | 62 | type = target.get_type() |
paul@91 | 63 | |
paul@222 | 64 | # Fragments. |
paul@138 | 65 | |
paul@222 | 66 | if type == "fragment": |
paul@222 | 67 | return Link(self.quote(text), identifier, target) |
paul@138 | 68 | |
paul@150 | 69 | # Sub-pages. Remove the leading slash for the label. |
paul@91 | 70 | |
paul@222 | 71 | if type == "sub-page": |
paul@222 | 72 | return Link(self.translate_pagename(text), identifier, target) |
paul@91 | 73 | |
paul@91 | 74 | # Sibling (of ancestor) pages. |
paul@91 | 75 | |
paul@222 | 76 | if type == "sibling-page": |
paul@222 | 77 | return Link(self.translate_pagename(text), identifier, target) |
paul@91 | 78 | |
paul@91 | 79 | # Plain URL. |
paul@91 | 80 | |
paul@222 | 81 | if type == "url": |
paul@222 | 82 | return Link(text, identifier, target) |
paul@91 | 83 | |
paul@91 | 84 | # Top-level pages. |
paul@91 | 85 | |
paul@222 | 86 | if type == "page": |
paul@222 | 87 | return Link(self.translate_pagename(text), identifier, target) |
paul@222 | 88 | |
paul@222 | 89 | # Attachment or interwiki link. |
paul@159 | 90 | |
paul@222 | 91 | return self.translate_qualified_link(target) |
paul@159 | 92 | |
paul@222 | 93 | def translate_pagename(self, text): |
paul@222 | 94 | |
paul@222 | 95 | "Translate the pagename in 'text'." |
paul@159 | 96 | |
paul@159 | 97 | # Obtain the target pagename and the fragment. |
paul@159 | 98 | # Split the pagename into path components. |
paul@159 | 99 | |
paul@222 | 100 | t = text.split("#", 1) |
paul@159 | 101 | |
paul@159 | 102 | # Determine the actual pagename referenced. |
paul@159 | 103 | # Replace the root pagename if it appears. |
paul@159 | 104 | |
paul@165 | 105 | pagename = self.metadata.get("pagename", "") |
paul@165 | 106 | resolved = resolve(t[0], pagename, self.root_pagename) |
paul@159 | 107 | |
paul@159 | 108 | # Rewrite the target using a relative link to the top level and then the |
paul@159 | 109 | # resolved pagename. |
paul@159 | 110 | |
paul@93 | 111 | top_level = self.get_top_level() |
paul@196 | 112 | |
paul@196 | 113 | # Support an explicit "DocumentIndex" filename for file browsing. |
paul@196 | 114 | |
paul@196 | 115 | document_index = self.metadata.get("document_index") |
paul@196 | 116 | |
paul@196 | 117 | t[0] = "%s%s%s" % (top_level and "%s/" % top_level or "", resolved, |
paul@196 | 118 | document_index and "/%s" % document_index or "") |
paul@159 | 119 | |
paul@159 | 120 | return self.quote("#".join(t)) |
paul@91 | 121 | |
paul@93 | 122 | def translate_qualified_link(self, target): |
paul@91 | 123 | |
paul@91 | 124 | """ |
paul@214 | 125 | Translate a possible qualified link 'target', returning a link object |
paul@214 | 126 | retaining a rewritten target and a suitable default label. |
paul@118 | 127 | |
paul@118 | 128 | Return None if the link is not suitable. |
paul@91 | 129 | """ |
paul@91 | 130 | |
paul@222 | 131 | identifier = target.get_identifier() |
paul@319 | 132 | pagename = target.get_pagename() |
paul@222 | 133 | text = target.get_text() |
paul@222 | 134 | type = target.get_type() |
paul@91 | 135 | |
paul@91 | 136 | # Attachment links. |
paul@91 | 137 | |
paul@222 | 138 | if type == "attachment": |
paul@319 | 139 | return Link(self.translate_attachment(identifier, pagename), |
paul@248 | 140 | identifier, target) |
paul@91 | 141 | |
paul@91 | 142 | # Interwiki links. |
paul@91 | 143 | |
paul@222 | 144 | url = self.mapping.get(type) |
paul@91 | 145 | if url: |
paul@248 | 146 | return Link(self.translate_interwiki(url, identifier), |
paul@248 | 147 | identifier or type, target) |
paul@91 | 148 | |
paul@93 | 149 | return None |
paul@91 | 150 | |
paul@91 | 151 | # Specific link translators. |
paul@91 | 152 | |
paul@319 | 153 | def translate_attachment(self, target, pagename): |
paul@319 | 154 | |
paul@319 | 155 | """ |
paul@319 | 156 | Return a translation of the given attachment 'target' associated with |
paul@319 | 157 | the given 'pagename'. |
paul@319 | 158 | """ |
paul@91 | 159 | |
paul@319 | 160 | common_attachments = self.metadata.get("common_attachments") |
paul@319 | 161 | top_level = self.get_top_level() |
paul@91 | 162 | |
paul@319 | 163 | return self.quote("%s%s/%s%s" % (top_level and "%s/" % top_level or "", |
paul@319 | 164 | self.attachments_dir, |
paul@319 | 165 | not common_attachments and "%s/" % pagename or "", |
paul@319 | 166 | target)) |
paul@91 | 167 | |
paul@93 | 168 | def translate_interwiki(self, url, target): |
paul@91 | 169 | |
paul@93 | 170 | "Return a translation of the given interwiki 'target'." |
paul@91 | 171 | |
paul@128 | 172 | return "%s%s" % (self.normalise(url), self.quote(target)) |
paul@91 | 173 | |
paul@128 | 174 | # Path encoding. |
paul@128 | 175 | |
paul@128 | 176 | def quote(self, s): |
paul@128 | 177 | |
paul@128 | 178 | """ |
paul@128 | 179 | Quote URL path 's', preserving path separators and fragment indicators, |
paul@128 | 180 | encoding fragment identifiers. |
paul@128 | 181 | """ |
paul@128 | 182 | |
paul@193 | 183 | s = self.replace_whitespace(s) |
paul@128 | 184 | parts = s.split("#", 1) |
paul@128 | 185 | |
paul@128 | 186 | if len(parts) > 1: |
paul@128 | 187 | parts[1] = self.make_id(parts[1]) |
paul@128 | 188 | |
paul@128 | 189 | return "#".join(map(quote, parts)) |
paul@128 | 190 | |
paul@193 | 191 | # Whitespace conversion in pagenames. |
paul@193 | 192 | |
paul@193 | 193 | def replace_whitespace(self, pagename): |
paul@193 | 194 | |
paul@193 | 195 | "Map whitespace in 'pagename' to appropriate characters." |
paul@193 | 196 | |
paul@193 | 197 | wsmap = self.metadata.get("whitespace", self.default_whitespace_map) |
paul@193 | 198 | |
paul@193 | 199 | for old, new in wsmap: |
paul@193 | 200 | pagename = pagename.replace(old, new) |
paul@193 | 201 | |
paul@193 | 202 | return pagename |
paul@193 | 203 | |
paul@128 | 204 | # Identifier encoding. |
paul@128 | 205 | |
paul@128 | 206 | def make_id(self, s): |
paul@128 | 207 | |
paul@128 | 208 | "Make a suitable identifier for HTML element identification." |
paul@128 | 209 | |
paul@128 | 210 | # NOTE: This reproduces the Moin algorithm for compatibility. |
paul@128 | 211 | # NOTE: There may well be improvements possible, possibly by replacing plus |
paul@128 | 212 | # NOTE: with something less cumbersome, even though plus may be unusual in |
paul@128 | 213 | # NOTE: things like headings, anyway. |
paul@128 | 214 | |
paul@128 | 215 | # The desired output is the following pattern: |
paul@128 | 216 | |
paul@128 | 217 | # [A-Za-z][-_:.A-Za-z0-9]* |
paul@128 | 218 | |
paul@128 | 219 | # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an |
paul@128 | 220 | # output range as follows (in addition to A-Za-z0-9): |
paul@128 | 221 | |
paul@128 | 222 | # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} |
paul@128 | 223 | |
paul@128 | 224 | # The quote_plus function converts space to plus, preserves -_:. and encodes |
paul@128 | 225 | # all other symbols (including original occurrences of plus and percent) and |
paul@128 | 226 | # non-alphanumeric (ASCII) characters using percent encoding. |
paul@128 | 227 | |
paul@128 | 228 | # With colons preserved, the resulting output is in the following range |
paul@128 | 229 | # (in addition to A-Za-z0-9): |
paul@128 | 230 | |
paul@128 | 231 | # -_:.%+ |
paul@128 | 232 | |
paul@128 | 233 | # Percent will only occur as an encoding prefix. Plus will only occur as a |
paul@128 | 234 | # replacement for space. |
paul@128 | 235 | |
paul@128 | 236 | # Combining quote_plus and UTF-7 gives the following range (in addition to |
paul@128 | 237 | # A-Za-z0-9): |
paul@128 | 238 | |
paul@128 | 239 | # -_:.%+ |
paul@128 | 240 | |
paul@128 | 241 | # Examples: |
paul@128 | 242 | |
paul@128 | 243 | # UTF-7 quote_plus replace percent and plus |
paul@128 | 244 | # : -> : -> : -> : |
paul@128 | 245 | # - -> - -> - -> - |
paul@128 | 246 | # . -> . -> . -> . |
paul@128 | 247 | # % -> % -> %25 -> .25 |
paul@128 | 248 | # + -> +- -> %2B- -> .2B- |
paul@128 | 249 | # _ -> _ -> _ -> _ |
paul@128 | 250 | # space -> space -> + -> _ |
paul@128 | 251 | |
paul@128 | 252 | # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode |
paul@128 | 253 | |
paul@128 | 254 | quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") |
paul@128 | 255 | |
paul@128 | 256 | # Ensure that the identifier starts with an alphabetical character. |
paul@128 | 257 | |
paul@128 | 258 | if not quoted[0].isalpha(): |
paul@128 | 259 | return "A%s" % quoted |
paul@128 | 260 | else: |
paul@128 | 261 | return quoted |
paul@91 | 262 | |
paul@91 | 263 | linker = HTMLLinker |
paul@91 | 264 | |
paul@91 | 265 | # vim: tabstop=4 expandtab shiftwidth=4 |