# HG changeset patch # User Paul Boddie # Date 1422886699 -3600 # Node ID 11e412862d4515a5305c8e6689827645bfd8f720 # Parent d54fd34e39c0bb0f89369c2031cff1841b52a1bd Fixed handling of links in comment pages, which are represented by subpages in Moin and therefore need deeper relative links. diff -r d54fd34e39c0 -r 11e412862d45 TO_DO.txt --- a/TO_DO.txt Sat Jan 10 15:11:34 2015 +0100 +++ b/TO_DO.txt Mon Feb 02 15:18:19 2015 +0100 @@ -1,3 +1,13 @@ +Recent Issues +============= + +Linking in pages with slash characters in the actual titles. (Difficult to +handle, especially if the pages eventually get renamed.) + +(Linking from comment subpages.) + + + Enhancements ============ diff -r d54fd34e39c0 -r 11e412862d45 convert.py --- a/convert.py Sat Jan 10 15:11:34 2015 +0100 +++ b/convert.py Mon Feb 02 15:18:19 2015 +0100 @@ -164,17 +164,19 @@ if not body: body = "## Empty page." + is_comment_page = content.get("content:class") == "Comment" + # NOTE: Very simple technique employed for guessing the format. if no_translate: - fn = write + fn = notranslate elif body.startswith("<"): fn = xmltranslate else: fn = translate try: - fn(join(versions_dir, content["content"]), body) + fn(join(versions_dir, content["content"]), body, is_comment_page) except: err = codecs.getwriter("utf-8")(sys.stderr) print >>err, "Error parsing", content["content"] @@ -210,7 +212,12 @@ "Record properties in the current content dictionary." - self.content[attributes[-1]["name"]] = text.strip() + property_name = attributes[-1]["name"] + self.content[property_name] = text.strip() + + property_class = attributes[-1].get("class") + if property_class: + self.content["%s:%s" % (property_name, "class")] = property_class.strip() def handle_id(self, name, elements, attributes, all_text, text): @@ -272,7 +279,7 @@ finally: f.close() -def translate(filename, body, fn=None): +def translate(filename, body, is_comment_page, fn=None): """ Write to the file with the given 'filename' a translation of the given @@ -284,12 +291,15 @@ out = codecs.open(filename, "w", encoding="utf-8") try: print >>out, "#pragma page-filename", filename - fn(body, out) + fn(body, out, is_comment_page) finally: out.close() -def xmltranslate(filename, body): - translate(filename, body, xmlparser.parse) +def xmltranslate(filename, body, is_comment_page): + translate(filename, body, is_comment_page, xmlparser.parse) + +def notranslate(filename, body, is_comment_page): + write(filename, body) def sort_comments(pages_dir, pageid): diff -r d54fd34e39c0 -r 11e412862d45 wikiparser.py --- a/wikiparser.py Sat Jan 10 15:11:34 2015 +0100 +++ b/wikiparser.py Mon Feb 02 15:18:19 2015 +0100 @@ -3,7 +3,7 @@ """ Confluence Wiki syntax parsing. -Copyright (C) 2012, 2013 Paul Boddie +Copyright (C) 2012, 2013, 2015 Paul Boddie This software is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -425,7 +425,8 @@ "A parser for Confluence markup." - def __init__(self): + def __init__(self, is_comment_page=False): + self.is_comment_page = is_comment_page self.max_level = self.level = 0 self.in_heading = False self.held_anchors = [] @@ -495,6 +496,8 @@ else: prefix = "../" + if self.is_comment_page: + prefix += "../" # Make the link tidier by making a target if none was given. @@ -804,11 +807,11 @@ def forbids_macros(self): return self.in_heading or self.macro -def parse(s, out): +def parse(s, out, is_comment_page=False): "Parse the content in the string 's', writing a translation to 'out'." - parser = ConfluenceParser() + parser = ConfluenceParser(is_comment_page) out.write(parser.parse_text(s, top=True)) if __name__ == "__main__": diff -r d54fd34e39c0 -r 11e412862d45 xmlparser.py --- a/xmlparser.py Sat Jan 10 15:11:34 2015 +0100 +++ b/xmlparser.py Mon Feb 02 15:18:19 2015 +0100 @@ -139,9 +139,10 @@ "Handle content from Confluence 4 page revisions." - def __init__(self, out): + def __init__(self, out, is_comment_page=False): Parser.__init__(self) self.out = out + self.is_comment_page = is_comment_page # Link target and label information. @@ -302,6 +303,8 @@ prefix = link_target_prefixes.get(attrname) if prefix: target_details.insert(0, prefix) + if self.is_comment_page: + target_details.insert(0, prefix) # Make a link based on the details. @@ -511,7 +514,7 @@ def normalise(self, text, name): return normalise_regexp.sub(self.get_replacement(name), text) -def parse(s, out): +def parse(s, out, is_comment_page=False): "Parse the content in the string 's', writing a translation to 'out'." @@ -530,7 +533,7 @@ f = StringIO(s.encode("utf-8")) try: - parser = ConfluenceXMLParser(out) + parser = ConfluenceXMLParser(out, is_comment_page) parser.parse(f) finally: f.close()