1.1 --- a/moinformat/links/html.py Tue Jul 31 00:07:46 2018 +0200
1.2 +++ b/moinformat/links/html.py Tue Jul 31 19:17:39 2018 +0200
1.3 @@ -20,8 +20,15 @@
1.4 """
1.5
1.6 from moinformat.links.common import Linker
1.7 +from urllib import quote as _quote
1.8 from urlparse import urlparse
1.9
1.10 +def quote(s):
1.11 +
1.12 + "Quote URL path 's', preserving path separators and fragment indicators."
1.13 +
1.14 + return "#".join(map(_quote, s.split("#", 1)))
1.15 +
1.16 class HTMLLinker(Linker):
1.17
1.18 "Translate Moin links into HTML links."
1.19 @@ -79,7 +86,7 @@
1.20 # Top-level pages.
1.21
1.22 top_level = self.get_top_level()
1.23 - return "%s%s" % (top_level and "%s/" % top_level or "", target)
1.24 + return quote("%s%s" % (top_level and "%s/" % top_level or "", target))
1.25
1.26 def translate_qualified_link(self, target):
1.27
1.28 @@ -113,26 +120,26 @@
1.29
1.30 "Return a translation of the given attachment 'target'."
1.31
1.32 - return "%sattachments/%s/%s" % (
1.33 - self.get_top_level(), self.pagename, target)
1.34 + return quote("%sattachments/%s/%s" % (
1.35 + self.get_top_level(), self.pagename, target))
1.36
1.37 def translate_interwiki(self, url, target):
1.38
1.39 "Return a translation of the given interwiki 'target'."
1.40
1.41 - return "%s%s" % (self.normalise(url), target)
1.42 + return "%s%s" % (self.normalise(url), quote(target))
1.43
1.44 def translate_relative(self, target):
1.45
1.46 "Return a translation of the given relative 'target'."
1.47
1.48 - return target[len("../"):]
1.49 + return quote(target[len("../"):])
1.50
1.51 def translate_subpage(self, target):
1.52
1.53 "Return a translation of the given subpage 'target'."
1.54
1.55 - return ".%s" % target
1.56 + return quote(".%s" % target)
1.57
1.58 linker = HTMLLinker
1.59
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/moinformat/macros/anchor.py Tue Jul 31 19:17:39 2018 +0200
2.3 @@ -0,0 +1,46 @@
2.4 +#!/usr/bin/env python
2.5 +
2.6 +"""
2.7 +Anchor macro for Moin compatibility.
2.8 +
2.9 +Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk>
2.10 +
2.11 +This program is free software; you can redistribute it and/or modify it under
2.12 +the terms of the GNU General Public License as published by the Free Software
2.13 +Foundation; either version 3 of the License, or (at your option) any later
2.14 +version.
2.15 +
2.16 +This program is distributed in the hope that it will be useful, but WITHOUT
2.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
2.19 +details.
2.20 +
2.21 +You should have received a copy of the GNU General Public License along with
2.22 +this program. If not, see <http://www.gnu.org/licenses/>.
2.23 +"""
2.24 +
2.25 +from moinformat.macros.common import Macro
2.26 +from moinformat.tree.moin import Anchor
2.27 +
2.28 +class AnchorMacro(Macro):
2.29 +
2.30 + "An anchor macro."
2.31 +
2.32 + name = "Anchor"
2.33 +
2.34 + def evaluate(self):
2.35 +
2.36 + "Evaluate the macro, producing an anchor node."
2.37 +
2.38 + # Obtain the concatenated arguments since they might contain commas.
2.39 +
2.40 + argstr = ",".join(self.node.args)
2.41 +
2.42 + # Replace the macro node with the anchor.
2.43 +
2.44 + macro = self.node
2.45 + macro.parent.replace(macro, Anchor(argstr))
2.46 +
2.47 +macro = AnchorMacro
2.48 +
2.49 +# vim: tabstop=4 expandtab shiftwidth=4
3.1 --- a/moinformat/parsers/moin.py Tue Jul 31 00:07:46 2018 +0200
3.2 +++ b/moinformat/parsers/moin.py Tue Jul 31 19:17:39 2018 +0200
3.3 @@ -35,12 +35,12 @@
3.4
3.5 # Document tree nodes.
3.6
3.7 -from moinformat.tree.moin import Break, DefItem, DefTerm, FontStyle, Heading, \
3.8 - Larger, LineBreak, Link, List, ListItem, \
3.9 - Macro, Monospace, Region, Rule, Smaller, \
3.10 - Strikethrough, Subscript, Superscript, Table, \
3.11 - TableAttr, TableAttrs, TableCell, TableRow, \
3.12 - Text, Underline
3.13 +from moinformat.tree.moin import Anchor, Break, DefItem, DefTerm, FontStyle, \
3.14 + Heading, Larger, LineBreak, Link, List, \
3.15 + ListItem, Macro, Monospace, Region, Rule, \
3.16 + Smaller, Strikethrough, Subscript, \
3.17 + Superscript, Table, TableAttr, TableAttrs, \
3.18 + TableCell, TableRow, Text, Underline
3.19
3.20 join = "".join
3.21
3.22 @@ -481,13 +481,18 @@
3.23
3.24 # Complete inline pattern handlers.
3.25
3.26 + def parse_anchor(self, region):
3.27 + target = self.match_group("target")
3.28 + anchor = Anchor(target)
3.29 + region.append_inline(anchor)
3.30 +
3.31 def parse_linebreak(self, region):
3.32 region.append_inline(LineBreak())
3.33
3.34 def parse_link(self, region):
3.35 target = self.match_group("target")
3.36 text = self.match_group("text")
3.37 - link = Link(text and [Text(text)], target)
3.38 + link = Link(text and [Text(text)] or [], target)
3.39 region.append_inline(link)
3.40
3.41 def parse_macro(self, region):
3.42 @@ -619,6 +624,10 @@
3.43 # Complete inline patterns are for markup features that do not support
3.44 # arbitrary content within them:
3.45
3.46 + "anchor" : join((r"\(\(", # ((
3.47 + group("target", ".*?"), # target
3.48 + r"\)\)")), # ))
3.49 +
3.50 "linebreak" : r"\\\\", # \\
3.51
3.52 "link" : join((r"\[\[", # [[
3.53 @@ -706,8 +715,9 @@
3.54 ]
3.55
3.56 inline_pattern_names = [
3.57 - "fontstyle", "larger", "linebreak", "link", "macro", "monospace",
3.58 - "regionstart", "smaller", "strike", "sub", "super", "underline",
3.59 + "anchor", "fontstyle", "larger", "linebreak", "link", "macro",
3.60 + "monospace", "regionstart", "smaller", "strike", "sub", "super",
3.61 + "underline",
3.62 ]
3.63
3.64 list_pattern_names = [
3.65 @@ -740,6 +750,7 @@
3.66
3.67 handlers = {
3.68 None : end_region,
3.69 + "anchor" : parse_anchor,
3.70 "attrname" : parse_attrname,
3.71 "break" : parse_break,
3.72 "colour" : parse_colour,
4.1 --- a/moinformat/serialisers/common.py Tue Jul 31 00:07:46 2018 +0200
4.2 +++ b/moinformat/serialisers/common.py Tue Jul 31 19:17:39 2018 +0200
4.3 @@ -19,6 +19,8 @@
4.4 this program. If not, see <http://www.gnu.org/licenses/>.
4.5 """
4.6
4.7 +from urllib import quote_plus
4.8 +
4.9 class Serialiser:
4.10
4.11 "General serialisation support."
4.12 @@ -98,4 +100,61 @@
4.13
4.14 return s.replace("&", "&").replace("<", "<").replace(">", ">")
4.15
4.16 +def make_id(s):
4.17 +
4.18 + "Make a suitable identifier for XML element identification."
4.19 +
4.20 + # NOTE: This reproduces the Moin algorithm for compatibility.
4.21 + # NOTE: There may well be improvements possible, possibly by replacing plus
4.22 + # NOTE: with something less cumbersome, even though plus may be unusual in
4.23 + # NOTE: things like headings, anyway.
4.24 +
4.25 + # The desired output is the following pattern:
4.26 +
4.27 + # [A-Za-z][-_:.A-Za-z0-9]*
4.28 +
4.29 + # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an
4.30 + # output range as follows (in addition to A-Za-z0-9):
4.31 +
4.32 + # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|}
4.33 +
4.34 + # The quote_plus function converts space to plus, preserves -_:. and encodes
4.35 + # all other symbols (including original occurrences of plus and percent) and
4.36 + # non-alphanumeric (ASCII) characters using percent encoding.
4.37 +
4.38 + # With colons preserved, the resulting output is in the following range
4.39 + # (in addition to A-Za-z0-9):
4.40 +
4.41 + # -_:.%+
4.42 +
4.43 + # Percent will only occur as an encoding prefix. Plus will only occur as a
4.44 + # replacement for space.
4.45 +
4.46 + # Combining quote_plus and UTF-7 gives the following range (in addition to
4.47 + # A-Za-z0-9):
4.48 +
4.49 + # -_:.%+
4.50 +
4.51 + # Examples:
4.52 +
4.53 + # UTF-7 quote_plus replace percent and plus
4.54 + # : -> : -> : -> :
4.55 + # - -> - -> - -> -
4.56 + # . -> . -> . -> .
4.57 + # % -> % -> %25 -> .25
4.58 + # + -> +- -> %2B- -> .2B-
4.59 + # _ -> _ -> _ -> _
4.60 + # space -> space -> + -> _
4.61 +
4.62 + # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode
4.63 +
4.64 + quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_")
4.65 +
4.66 + # Ensure that the identifier starts with an alphabetical character.
4.67 +
4.68 + if not quoted[0].isalpha():
4.69 + return "A%s" % quoted
4.70 + else:
4.71 + return quoted
4.72 +
4.73 # vim: tabstop=4 expandtab shiftwidth=4
5.1 --- a/moinformat/serialisers/html/moin.py Tue Jul 31 00:07:46 2018 +0200
5.2 +++ b/moinformat/serialisers/html/moin.py Tue Jul 31 19:17:39 2018 +0200
5.3 @@ -19,7 +19,8 @@
5.4 this program. If not, see <http://www.gnu.org/licenses/>.
5.5 """
5.6
5.7 -from moinformat.serialisers.common import escape_attr, escape_text, Serialiser
5.8 +from moinformat.serialisers.common import escape_attr, escape_text, make_id, \
5.9 + Serialiser
5.10
5.11 class HTMLSerialiser(Serialiser):
5.12
5.13 @@ -88,8 +89,8 @@
5.14 def end_emphasis(self):
5.15 self.out("</em>")
5.16
5.17 - def start_heading(self, level, extra, pad):
5.18 - self.out("<h%d>" % level)
5.19 + def start_heading(self, level, extra, pad, text):
5.20 + self.out("<h%d id='%s'>" % (level, escape_attr(make_id(text))))
5.21
5.22 def end_heading(self, level, pad, extra):
5.23 self.out("</h%d>" % level)
5.24 @@ -100,10 +101,12 @@
5.25 def end_larger(self):
5.26 self.out("</big>")
5.27
5.28 - def start_link(self, target):
5.29 + def start_link(self, target, nodes):
5.30 if self.linker:
5.31 target = self.linker.translate(target)
5.32 self.out('<a href="%s">' % escape_attr(target))
5.33 + if not nodes:
5.34 + self.out(escape_text(target))
5.35
5.36 def end_link(self):
5.37 self.out("</a>")
5.38 @@ -244,6 +247,9 @@
5.39 def end_underline(self):
5.40 self.out("</span>")
5.41
5.42 + def anchor(self, target):
5.43 + self.out("<a name='%s' />" % escape_attr(make_id(target)))
5.44 +
5.45 def break_(self):
5.46 pass
5.47
6.1 --- a/moinformat/serialisers/moin/moin.py Tue Jul 31 00:07:46 2018 +0200
6.2 +++ b/moinformat/serialisers/moin/moin.py Tue Jul 31 19:17:39 2018 +0200
6.3 @@ -66,7 +66,7 @@
6.4 def end_emphasis(self):
6.5 self.out("''")
6.6
6.7 - def start_heading(self, level, extra, pad):
6.8 + def start_heading(self, level, extra, pad, text):
6.9 self.out(extra + "=" * level + pad)
6.10
6.11 def end_heading(self, level, pad, extra):
6.12 @@ -78,7 +78,7 @@
6.13 def end_larger(self):
6.14 self.out("+~")
6.15
6.16 - def start_link(self, target):
6.17 + def start_link(self, target, nodes):
6.18 self.out("[[%s" % target)
6.19
6.20 def end_link(self):
6.21 @@ -179,6 +179,9 @@
6.22 def end_underline(self):
6.23 self.out("__")
6.24
6.25 + def anchor(self, target):
6.26 + self.out("((%s))" % target)
6.27 +
6.28 def break_(self):
6.29 self.out("\n")
6.30
7.1 --- a/moinformat/tree/moin.py Tue Jul 31 00:07:46 2018 +0200
7.2 +++ b/moinformat/tree/moin.py Tue Jul 31 19:17:39 2018 +0200
7.3 @@ -113,6 +113,23 @@
7.4 i = self.nodes.index(old)
7.5 self.nodes[i] = new
7.6
7.7 + def text_content(self):
7.8 +
7.9 + """
7.10 + Return a string containing the content of text nodes within this
7.11 + container.
7.12 + """
7.13 +
7.14 + l = []
7.15 +
7.16 + for node in self.nodes:
7.17 + if isinstance(node, Text):
7.18 + l.append(node.s)
7.19 + elif isinstance(node, Container):
7.20 + l.append(node.text_content())
7.21 +
7.22 + return "".join(l)
7.23 +
7.24 def __str__(self):
7.25 return self.prettyprint()
7.26
7.27 @@ -307,7 +324,7 @@
7.28 return self._prettyprint(l, indent)
7.29
7.30 def to_string(self, out):
7.31 - out.start_heading(self.level, self.start_extra, self.start_pad)
7.32 + out.start_heading(self.level, self.start_extra, self.start_pad, self.text_content())
7.33 self._to_string(out)
7.34 out.end_heading(self.level, self.end_pad, self.end_extra)
7.35
7.36 @@ -472,7 +489,7 @@
7.37 return self._prettyprint(l, indent)
7.38
7.39 def to_string(self, out):
7.40 - out.start_link(self.target)
7.41 + out.start_link(self.target, self.nodes)
7.42 if self.nodes:
7.43 out.start_linktext()
7.44 self._to_string(out)
7.45 @@ -567,6 +584,22 @@
7.46 def empty(self):
7.47 return False
7.48
7.49 +class Anchor(Node):
7.50 +
7.51 + "Anchor details."
7.52 +
7.53 + def __init__(self, target):
7.54 + self.target = target
7.55 +
7.56 + def __repr__(self):
7.57 + return "Anchor(%r)" % self.target
7.58 +
7.59 + def prettyprint(self, indent=""):
7.60 + return "%sAnchor: target=%r" % (indent, self.target)
7.61 +
7.62 + def to_string(self, out):
7.63 + out.anchor(self.target)
7.64 +
7.65 class Break(Node):
7.66
7.67 "A paragraph break."
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
9.2 +++ b/tests/test_anchors.txt Tue Jul 31 19:17:39 2018 +0200
9.3 @@ -0,0 +1,22 @@
9.4 += Level 1 =
9.5 +
9.6 +((Number #1 anchor))
9.7 +Some text.
9.8 +
9.9 +((Text, anchored.)) Some text referenced elsewhere.
9.10 +
9.11 += Heading __Underlined__ =
9.12 +
9.13 +Some more text.
9.14 +
9.15 +((Anchored!))
9.16 +More anchored text.
9.17 +
9.18 +A [[#Heading Underlined|link]] to the above heading.
9.19 +
9.20 +<<Anchor(Anchored, using macro)>>
9.21 += Macro-Anchored Heading =
9.22 +
9.23 +Yet more text.
9.24 +
9.25 +Link to the [[#Number #1 anchor]].