MoinLight (annotate moinformat/links/html.py in 455069f7e3f7)

MoinLight

Annotated moinformat/links/html.py

323:455069f7e3f7

20 months ago

Paul Boddie

Removed tag rel-0-2-1

paul@91	1	#!/usr/bin/env python
paul@91	2
paul@91	3	"""
paul@91	4	HTML linking scheme.
paul@91	5
paul@319	6	Copyright (C) 2018, 2019, 2022 Paul Boddie <paul@boddie.org.uk>
paul@91	7
paul@91	8	This program is free software; you can redistribute it and/or modify it under
paul@91	9	the terms of the GNU General Public License as published by the Free Software
paul@91	10	Foundation; either version 3 of the License, or (at your option) any later
paul@91	11	version.
paul@91	12
paul@91	13	This program is distributed in the hope that it will be useful, but WITHOUT
paul@91	14	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@91	15	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
paul@91	16	details.
paul@91	17
paul@91	18	You should have received a copy of the GNU General Public License along with
paul@91	19	this program. If not, see <http://www.gnu.org/licenses/>.
paul@91	20	"""
paul@91	21
paul@214	22	from moinformat.links.common import Link, Linker, resolve
paul@128	23	from urllib import quote, quote_plus
paul@91	24
paul@91	25	class HTMLLinker(Linker):
paul@91	26
paul@91	27	"Translate Moin links into HTML links."
paul@91	28
paul@91	29	name = "html"
paul@91	30
paul@91	31	def get_top_level(self):
paul@91	32
paul@91	33	"Return a relative link to the top level."
paul@91	34
paul@159	35	# The root page is at the top level already.
paul@159	36
paul@165	37	pagename = self.metadata.get("pagename", "")
paul@165	38
paul@165	39	if pagename == self.root_pagename:
paul@159	40	return ""
paul@159	41
paul@159	42	# Siblings of the root page are actually one level below.
paul@159	43
paul@165	44	levels = pagename.count("/") + 1
paul@91	45	return "/".join([".."] * levels)
paul@91	46
paul@91	47	def normalise(self, path):
paul@91	48
paul@91	49	"Return a normalised form of 'path'."
paul@91	50
paul@91	51	return not path.endswith("/") and "%s/" % path or path
paul@91	52
paul@93	53	def translate(self, target):
paul@91	54
paul@118	55	"""
paul@222	56	Translate the 'target', returning a link object containing the rewritten
paul@222	57	target and a suitable default label.
paul@118	58	"""
paul@91	59
paul@222	60	identifier = target.get_identifier()
paul@222	61	text = target.get_text()
paul@222	62	type = target.get_type()
paul@91	63
paul@222	64	# Fragments.
paul@138	65
paul@222	66	if type == "fragment":
paul@222	67	return Link(self.quote(text), identifier, target)
paul@138	68
paul@150	69	# Sub-pages. Remove the leading slash for the label.
paul@91	70
paul@222	71	if type == "sub-page":
paul@222	72	return Link(self.translate_pagename(text), identifier, target)
paul@91	73
paul@91	74	# Sibling (of ancestor) pages.
paul@91	75
paul@222	76	if type == "sibling-page":
paul@222	77	return Link(self.translate_pagename(text), identifier, target)
paul@91	78
paul@91	79	# Plain URL.
paul@91	80
paul@222	81	if type == "url":
paul@222	82	return Link(text, identifier, target)
paul@91	83
paul@91	84	# Top-level pages.
paul@91	85
paul@222	86	if type == "page":
paul@222	87	return Link(self.translate_pagename(text), identifier, target)
paul@222	88
paul@222	89	# Attachment or interwiki link.
paul@159	90
paul@222	91	return self.translate_qualified_link(target)
paul@159	92
paul@222	93	def translate_pagename(self, text):
paul@222	94
paul@222	95	"Translate the pagename in 'text'."
paul@159	96
paul@159	97	# Obtain the target pagename and the fragment.
paul@159	98	# Split the pagename into path components.
paul@159	99
paul@222	100	t = text.split("#", 1)
paul@159	101
paul@159	102	# Determine the actual pagename referenced.
paul@159	103	# Replace the root pagename if it appears.
paul@159	104
paul@165	105	pagename = self.metadata.get("pagename", "")
paul@165	106	resolved = resolve(t[0], pagename, self.root_pagename)
paul@159	107
paul@159	108	# Rewrite the target using a relative link to the top level and then the
paul@159	109	# resolved pagename.
paul@159	110
paul@93	111	top_level = self.get_top_level()
paul@196	112
paul@196	113	# Support an explicit "DocumentIndex" filename for file browsing.
paul@196	114
paul@196	115	document_index = self.metadata.get("document_index")
paul@196	116
paul@196	117	t[0] = "%s%s%s" % (top_level and "%s/" % top_level or "", resolved,
paul@196	118	document_index and "/%s" % document_index or "")
paul@159	119
paul@159	120	return self.quote("#".join(t))
paul@91	121
paul@93	122	def translate_qualified_link(self, target):
paul@91	123
paul@91	124	"""
paul@214	125	Translate a possible qualified link 'target', returning a link object
paul@214	126	retaining a rewritten target and a suitable default label.
paul@118	127
paul@118	128	Return None if the link is not suitable.
paul@91	129	"""
paul@91	130
paul@222	131	identifier = target.get_identifier()
paul@319	132	pagename = target.get_pagename()
paul@222	133	text = target.get_text()
paul@222	134	type = target.get_type()
paul@91	135
paul@91	136	# Attachment links.
paul@91	137
paul@222	138	if type == "attachment":
paul@319	139	return Link(self.translate_attachment(identifier, pagename),
paul@248	140	identifier, target)
paul@91	141
paul@91	142	# Interwiki links.
paul@91	143
paul@222	144	url = self.mapping.get(type)
paul@91	145	if url:
paul@248	146	return Link(self.translate_interwiki(url, identifier),
paul@248	147	identifier or type, target)
paul@91	148
paul@93	149	return None
paul@91	150
paul@91	151	# Specific link translators.
paul@91	152
paul@319	153	def translate_attachment(self, target, pagename):
paul@319	154
paul@319	155	"""
paul@319	156	Return a translation of the given attachment 'target' associated with
paul@319	157	the given 'pagename'.
paul@319	158	"""
paul@91	159
paul@319	160	common_attachments = self.metadata.get("common_attachments")
paul@319	161	top_level = self.get_top_level()
paul@91	162
paul@319	163	return self.quote("%s%s/%s%s" % (top_level and "%s/" % top_level or "",
paul@319	164	self.attachments_dir,
paul@319	165	not common_attachments and "%s/" % pagename or "",
paul@319	166	target))
paul@91	167
paul@93	168	def translate_interwiki(self, url, target):
paul@91	169
paul@93	170	"Return a translation of the given interwiki 'target'."
paul@91	171
paul@128	172	return "%s%s" % (self.normalise(url), self.quote(target))
paul@91	173
paul@128	174	# Path encoding.
paul@128	175
paul@128	176	def quote(self, s):
paul@128	177
paul@128	178	"""
paul@128	179	Quote URL path 's', preserving path separators and fragment indicators,
paul@128	180	encoding fragment identifiers.
paul@128	181	"""
paul@128	182
paul@193	183	s = self.replace_whitespace(s)
paul@128	184	parts = s.split("#", 1)
paul@128	185
paul@128	186	if len(parts) > 1:
paul@128	187	parts[1] = self.make_id(parts[1])
paul@128	188
paul@128	189	return "#".join(map(quote, parts))
paul@128	190
paul@193	191	# Whitespace conversion in pagenames.
paul@193	192
paul@193	193	def replace_whitespace(self, pagename):
paul@193	194
paul@193	195	"Map whitespace in 'pagename' to appropriate characters."
paul@193	196
paul@193	197	wsmap = self.metadata.get("whitespace", self.default_whitespace_map)
paul@193	198
paul@193	199	for old, new in wsmap:
paul@193	200	pagename = pagename.replace(old, new)
paul@193	201
paul@193	202	return pagename
paul@193	203
paul@128	204	# Identifier encoding.
paul@128	205
paul@128	206	def make_id(self, s):
paul@128	207
paul@128	208	"Make a suitable identifier for HTML element identification."
paul@128	209
paul@128	210	# NOTE: This reproduces the Moin algorithm for compatibility.
paul@128	211	# NOTE: There may well be improvements possible, possibly by replacing plus
paul@128	212	# NOTE: with something less cumbersome, even though plus may be unusual in
paul@128	213	# NOTE: things like headings, anyway.
paul@128	214
paul@128	215	# The desired output is the following pattern:
paul@128	216
paul@128	217	# [A-Za-z][-_:.A-Za-z0-9]*
paul@128	218
paul@128	219	# The Python UTF-7 encoder preserves symbols and it encodes + as +- with an
paul@128	220	# output range as follows (in addition to A-Za-z0-9):
paul@128	221
paul@128	222	# -_:.%+ !"#$&\'()*,/;<=>?@[]^`{\|}
paul@128	223
paul@128	224	# The quote_plus function converts space to plus, preserves -_:. and encodes
paul@128	225	# all other symbols (including original occurrences of plus and percent) and
paul@128	226	# non-alphanumeric (ASCII) characters using percent encoding.
paul@128	227
paul@128	228	# With colons preserved, the resulting output is in the following range
paul@128	229	# (in addition to A-Za-z0-9):
paul@128	230
paul@128	231	# -_:.%+
paul@128	232
paul@128	233	# Percent will only occur as an encoding prefix. Plus will only occur as a
paul@128	234	# replacement for space.
paul@128	235
paul@128	236	# Combining quote_plus and UTF-7 gives the following range (in addition to
paul@128	237	# A-Za-z0-9):
paul@128	238
paul@128	239	# -_:.%+
paul@128	240
paul@128	241	# Examples:
paul@128	242
paul@128	243	# UTF-7 quote_plus replace percent and plus
paul@128	244	# : -> : -> : -> :
paul@128	245	# - -> - -> - -> -
paul@128	246	# . -> . -> . -> .
paul@128	247	# % -> % -> %25 -> .25
paul@128	248	# + -> +- -> %2B- -> .2B-
paul@128	249	# _ -> _ -> _ -> _
paul@128	250	# space -> space -> + -> _
paul@128	251
paul@128	252	# See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode
paul@128	253
paul@128	254	quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_")
paul@128	255
paul@128	256	# Ensure that the identifier starts with an alphabetical character.
paul@128	257
paul@128	258	if not quoted[0].isalpha():
paul@128	259	return "A%s" % quoted
paul@128	260	else:
paul@128	261	return quoted
paul@91	262
paul@91	263	linker = HTMLLinker
paul@91	264
paul@91	265	# vim: tabstop=4 expandtab shiftwidth=4