# HG changeset patch # User Paul Boddie # Date 1533057459 -7200 # Node ID fb199ccdb341071249c5d1ce5b0e533c3aca1ede # Parent 0c98b2e6896a7e8de5bedd7506de70b7c471c3ba# Parent de69e3bc8121c8339a29ab72c2f0a4ab3020f921 Merged changes from the default branch. diff -r 0c98b2e6896a -r fb199ccdb341 moinformat/links/html.py --- a/moinformat/links/html.py Tue Jul 31 00:07:46 2018 +0200 +++ b/moinformat/links/html.py Tue Jul 31 19:17:39 2018 +0200 @@ -20,8 +20,15 @@ """ from moinformat.links.common import Linker +from urllib import quote as _quote from urlparse import urlparse +def quote(s): + + "Quote URL path 's', preserving path separators and fragment indicators." + + return "#".join(map(_quote, s.split("#", 1))) + class HTMLLinker(Linker): "Translate Moin links into HTML links." @@ -79,7 +86,7 @@ # Top-level pages. top_level = self.get_top_level() - return "%s%s" % (top_level and "%s/" % top_level or "", target) + return quote("%s%s" % (top_level and "%s/" % top_level or "", target)) def translate_qualified_link(self, target): @@ -113,26 +120,26 @@ "Return a translation of the given attachment 'target'." - return "%sattachments/%s/%s" % ( - self.get_top_level(), self.pagename, target) + return quote("%sattachments/%s/%s" % ( + self.get_top_level(), self.pagename, target)) def translate_interwiki(self, url, target): "Return a translation of the given interwiki 'target'." - return "%s%s" % (self.normalise(url), target) + return "%s%s" % (self.normalise(url), quote(target)) def translate_relative(self, target): "Return a translation of the given relative 'target'." - return target[len("../"):] + return quote(target[len("../"):]) def translate_subpage(self, target): "Return a translation of the given subpage 'target'." - return ".%s" % target + return quote(".%s" % target) linker = HTMLLinker diff -r 0c98b2e6896a -r fb199ccdb341 moinformat/macros/anchor.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/macros/anchor.py Tue Jul 31 19:17:39 2018 +0200 @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +""" +Anchor macro for Moin compatibility. + +Copyright (C) 2018 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.macros.common import Macro +from moinformat.tree.moin import Anchor + +class AnchorMacro(Macro): + + "An anchor macro." + + name = "Anchor" + + def evaluate(self): + + "Evaluate the macro, producing an anchor node." + + # Obtain the concatenated arguments since they might contain commas. + + argstr = ",".join(self.node.args) + + # Replace the macro node with the anchor. + + macro = self.node + macro.parent.replace(macro, Anchor(argstr)) + +macro = AnchorMacro + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 0c98b2e6896a -r fb199ccdb341 moinformat/parsers/moin.py --- a/moinformat/parsers/moin.py Tue Jul 31 00:07:46 2018 +0200 +++ b/moinformat/parsers/moin.py Tue Jul 31 19:17:39 2018 +0200 @@ -35,12 +35,12 @@ # Document tree nodes. -from moinformat.tree.moin import Break, DefItem, DefTerm, FontStyle, Heading, \ - Larger, LineBreak, Link, List, ListItem, \ - Macro, Monospace, Region, Rule, Smaller, \ - Strikethrough, Subscript, Superscript, Table, \ - TableAttr, TableAttrs, TableCell, TableRow, \ - Text, Underline +from moinformat.tree.moin import Anchor, Break, DefItem, DefTerm, FontStyle, \ + Heading, Larger, LineBreak, Link, List, \ + ListItem, Macro, Monospace, Region, Rule, \ + Smaller, Strikethrough, Subscript, \ + Superscript, Table, TableAttr, TableAttrs, \ + TableCell, TableRow, Text, Underline join = "".join @@ -481,13 +481,18 @@ # Complete inline pattern handlers. + def parse_anchor(self, region): + target = self.match_group("target") + anchor = Anchor(target) + region.append_inline(anchor) + def parse_linebreak(self, region): region.append_inline(LineBreak()) def parse_link(self, region): target = self.match_group("target") text = self.match_group("text") - link = Link(text and [Text(text)], target) + link = Link(text and [Text(text)] or [], target) region.append_inline(link) def parse_macro(self, region): @@ -619,6 +624,10 @@ # Complete inline patterns are for markup features that do not support # arbitrary content within them: + "anchor" : join((r"\(\(", # (( + group("target", ".*?"), # target + r"\)\)")), # )) + "linebreak" : r"\\\\", # \\ "link" : join((r"\[\[", # [[ @@ -706,8 +715,9 @@ ] inline_pattern_names = [ - "fontstyle", "larger", "linebreak", "link", "macro", "monospace", - "regionstart", "smaller", "strike", "sub", "super", "underline", + "anchor", "fontstyle", "larger", "linebreak", "link", "macro", + "monospace", "regionstart", "smaller", "strike", "sub", "super", + "underline", ] list_pattern_names = [ @@ -740,6 +750,7 @@ handlers = { None : end_region, + "anchor" : parse_anchor, "attrname" : parse_attrname, "break" : parse_break, "colour" : parse_colour, diff -r 0c98b2e6896a -r fb199ccdb341 moinformat/serialisers/common.py --- a/moinformat/serialisers/common.py Tue Jul 31 00:07:46 2018 +0200 +++ b/moinformat/serialisers/common.py Tue Jul 31 19:17:39 2018 +0200 @@ -19,6 +19,8 @@ this program. If not, see . """ +from urllib import quote_plus + class Serialiser: "General serialisation support." @@ -98,4 +100,61 @@ return s.replace("&", "&").replace("<", "<").replace(">", ">") +def make_id(s): + + "Make a suitable identifier for XML element identification." + + # NOTE: This reproduces the Moin algorithm for compatibility. + # NOTE: There may well be improvements possible, possibly by replacing plus + # NOTE: with something less cumbersome, even though plus may be unusual in + # NOTE: things like headings, anyway. + + # The desired output is the following pattern: + + # [A-Za-z][-_:.A-Za-z0-9]* + + # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an + # output range as follows (in addition to A-Za-z0-9): + + # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} + + # The quote_plus function converts space to plus, preserves -_:. and encodes + # all other symbols (including original occurrences of plus and percent) and + # non-alphanumeric (ASCII) characters using percent encoding. + + # With colons preserved, the resulting output is in the following range + # (in addition to A-Za-z0-9): + + # -_:.%+ + + # Percent will only occur as an encoding prefix. Plus will only occur as a + # replacement for space. + + # Combining quote_plus and UTF-7 gives the following range (in addition to + # A-Za-z0-9): + + # -_:.%+ + + # Examples: + + # UTF-7 quote_plus replace percent and plus + # : -> : -> : -> : + # - -> - -> - -> - + # . -> . -> . -> . + # % -> % -> %25 -> .25 + # + -> +- -> %2B- -> .2B- + # _ -> _ -> _ -> _ + # space -> space -> + -> _ + + # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode + + quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") + + # Ensure that the identifier starts with an alphabetical character. + + if not quoted[0].isalpha(): + return "A%s" % quoted + else: + return quoted + # vim: tabstop=4 expandtab shiftwidth=4 diff -r 0c98b2e6896a -r fb199ccdb341 moinformat/serialisers/html/moin.py --- a/moinformat/serialisers/html/moin.py Tue Jul 31 00:07:46 2018 +0200 +++ b/moinformat/serialisers/html/moin.py Tue Jul 31 19:17:39 2018 +0200 @@ -19,7 +19,8 @@ this program. If not, see . """ -from moinformat.serialisers.common import escape_attr, escape_text, Serialiser +from moinformat.serialisers.common import escape_attr, escape_text, make_id, \ + Serialiser class HTMLSerialiser(Serialiser): @@ -88,8 +89,8 @@ def end_emphasis(self): self.out("") - def start_heading(self, level, extra, pad): - self.out("" % level) + def start_heading(self, level, extra, pad, text): + self.out("" % (level, escape_attr(make_id(text)))) def end_heading(self, level, pad, extra): self.out("" % level) @@ -100,10 +101,12 @@ def end_larger(self): self.out("") - def start_link(self, target): + def start_link(self, target, nodes): if self.linker: target = self.linker.translate(target) self.out('' % escape_attr(target)) + if not nodes: + self.out(escape_text(target)) def end_link(self): self.out("") @@ -244,6 +247,9 @@ def end_underline(self): self.out("") + def anchor(self, target): + self.out("" % escape_attr(make_id(target))) + def break_(self): pass diff -r 0c98b2e6896a -r fb199ccdb341 moinformat/serialisers/moin/moin.py --- a/moinformat/serialisers/moin/moin.py Tue Jul 31 00:07:46 2018 +0200 +++ b/moinformat/serialisers/moin/moin.py Tue Jul 31 19:17:39 2018 +0200 @@ -66,7 +66,7 @@ def end_emphasis(self): self.out("''") - def start_heading(self, level, extra, pad): + def start_heading(self, level, extra, pad, text): self.out(extra + "=" * level + pad) def end_heading(self, level, pad, extra): @@ -78,7 +78,7 @@ def end_larger(self): self.out("+~") - def start_link(self, target): + def start_link(self, target, nodes): self.out("[[%s" % target) def end_link(self): @@ -179,6 +179,9 @@ def end_underline(self): self.out("__") + def anchor(self, target): + self.out("((%s))" % target) + def break_(self): self.out("\n") diff -r 0c98b2e6896a -r fb199ccdb341 moinformat/tree/moin.py --- a/moinformat/tree/moin.py Tue Jul 31 00:07:46 2018 +0200 +++ b/moinformat/tree/moin.py Tue Jul 31 19:17:39 2018 +0200 @@ -113,6 +113,23 @@ i = self.nodes.index(old) self.nodes[i] = new + def text_content(self): + + """ + Return a string containing the content of text nodes within this + container. + """ + + l = [] + + for node in self.nodes: + if isinstance(node, Text): + l.append(node.s) + elif isinstance(node, Container): + l.append(node.text_content()) + + return "".join(l) + def __str__(self): return self.prettyprint() @@ -307,7 +324,7 @@ return self._prettyprint(l, indent) def to_string(self, out): - out.start_heading(self.level, self.start_extra, self.start_pad) + out.start_heading(self.level, self.start_extra, self.start_pad, self.text_content()) self._to_string(out) out.end_heading(self.level, self.end_pad, self.end_extra) @@ -472,7 +489,7 @@ return self._prettyprint(l, indent) def to_string(self, out): - out.start_link(self.target) + out.start_link(self.target, self.nodes) if self.nodes: out.start_linktext() self._to_string(out) @@ -567,6 +584,22 @@ def empty(self): return False +class Anchor(Node): + + "Anchor details." + + def __init__(self, target): + self.target = target + + def __repr__(self): + return "Anchor(%r)" % self.target + + def prettyprint(self, indent=""): + return "%sAnchor: target=%r" % (indent, self.target) + + def to_string(self, out): + out.anchor(self.target) + class Break(Node): "A paragraph break." diff -r 0c98b2e6896a -r fb199ccdb341 tests/test_anchors.tree --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_anchors.tree Tue Jul 31 19:17:39 2018 +0200 @@ -0,0 +1,43 @@ +Region + Heading + Text + Break + Block + Anchor + Text + Break + Block + Anchor + Text + Break + Heading + Text + Underline + Text + Break + Block + Text + Break + Block + Anchor + Text + Break + Block + Text + Link + Text + Text + Break + Block + Macro + Text + Heading + Text + Break + Block + Text + Break + Block + Text + Link + Text diff -r 0c98b2e6896a -r fb199ccdb341 tests/test_anchors.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_anchors.txt Tue Jul 31 19:17:39 2018 +0200 @@ -0,0 +1,22 @@ += Level 1 = + +((Number #1 anchor)) +Some text. + +((Text, anchored.)) Some text referenced elsewhere. + += Heading __Underlined__ = + +Some more text. + +((Anchored!)) +More anchored text. + +A [[#Heading Underlined|link]] to the above heading. + +<> += Macro-Anchored Heading = + +Yet more text. + +Link to the [[#Number #1 anchor]].