1.1 --- a/parser.py Sun Feb 17 20:33:21 2013 +0100
1.2 +++ b/parser.py Sun Feb 17 20:36:11 2013 +0100
1.3 @@ -3,7 +3,7 @@
1.4 """
1.5 Confluence Wiki syntax parsing.
1.6
1.7 -Copyright (C) 2012 Paul Boddie <paul@boddie.org.uk>
1.8 +Copyright (C) 2012, 2013 Paul Boddie <paul@boddie.org.uk>
1.9
1.10 This software is free software; you can redistribute it and/or
1.11 modify it under the terms of the GNU General Public License as
1.12 @@ -397,15 +397,15 @@
1.13 "blockquote" : " %s",
1.14 "small" : "~-%s-~",
1.15 "big" : "~+%s+~",
1.16 - "p" : "\n%s\n",
1.17 - "ol" : "\n%s",
1.18 - "ul" : "\n%s",
1.19 + "p" : "%s",
1.20 + "ol" : "%s",
1.21 + "ul" : "%s",
1.22 "ac:plain-text-body" : "{{{%s}}}",
1.23 "ac:link" : "[[%s%s|%s]]",
1.24 }
1.25
1.26 for tag, translation in blocktypes.items():
1.27 - tags[tag] = "\n%s\n" % translation
1.28 + tags[tag] = translation
1.29
1.30 simple_tags = {
1.31 # XHTML tag MoinMoin syntax
1.32 @@ -414,8 +414,8 @@
1.33
1.34 list_tags = {
1.35 # XHTML list tag MoinMoin list item syntax
1.36 - "ol" : "1. %s\n",
1.37 - "ul" : "* %s\n",
1.38 + "ol" : "1. %s",
1.39 + "ul" : "* %s",
1.40 }
1.41
1.42 indented_tags = ["li", "p"]
1.43 @@ -438,9 +438,6 @@
1.44 normalise_regexp_str = r"\s+"
1.45 normalise_regexp = re.compile(normalise_regexp_str)
1.46
1.47 -normalise_end_regexp_str = r"\s\s+$"
1.48 -normalise_end_regexp = re.compile(normalise_end_regexp_str)
1.49 -
1.50 class ConfluenceXMLParser(Parser):
1.51
1.52 "Handle content from Confluence 4 page revisions."
1.53 @@ -567,13 +564,16 @@
1.54 # Add the converted text to the end of the parent element's text nodes.
1.55
1.56 if len(self.text) > 1:
1.57 - preceding = "".join(self.text[-2])
1.58 -
1.59 - if not self.is_preformatted():
1.60 - preceding = self.normalise_end(preceding, self.elements[-2])
1.61 -
1.62 - self.text[-2] = [preceding]
1.63 - self.text[-2].append(text)
1.64 + nodes = self.text[-2]
1.65 + if "".join(self.text[-2]):
1.66 + parent = self.elements[-2]
1.67 + if parent == "body":
1.68 + nodes.append("\n\n")
1.69 + elif list_tags.has_key(parent):
1.70 + nodes.append("\n")
1.71 + elif list_tags.has_key(name) and parent == "li":
1.72 + nodes.append("\n")
1.73 + nodes.append(text)
1.74
1.75 # Otherwise, emit the text.
1.76
1.77 @@ -583,23 +583,17 @@
1.78 def is_preformatted(self):
1.79 return reduce(operator.or_, self.states.values(), False)
1.80
1.81 - def get_replacement(self, name, end=False):
1.82 - if list_tags.has_key(name):
1.83 - if end:
1.84 - return "\n"
1.85 - else:
1.86 - return ""
1.87 - elif name == "body":
1.88 - return "\n\n"
1.89 + # Whitespace normalisation.
1.90 +
1.91 + def get_replacement(self, name):
1.92 + if name in ("html", "body") or list_tags.has_key(name):
1.93 + return ""
1.94 else:
1.95 return " "
1.96
1.97 def normalise(self, text, name):
1.98 return normalise_regexp.sub(self.get_replacement(name), text)
1.99
1.100 - def normalise_end(self, text, name):
1.101 - return normalise_end_regexp.sub(self.get_replacement(name, True), text)
1.102 -
1.103 def xmlparse(s, out):
1.104
1.105 "Parse the content in the string 's', writing a translation to 'out'."
1.106 @@ -682,6 +676,9 @@
1.107
1.108 if __name__ == "__main__":
1.109 s = sys.stdin.read()
1.110 - parse(s, sys.stdout)
1.111 + if "--xml" in sys.argv:
1.112 + xmlparse(s, sys.stdout)
1.113 + else:
1.114 + parse(s, sys.stdout)
1.115
1.116 # vim: tabstop=4 expandtab shiftwidth=4