1.1 --- a/xmlparser.py Fri Mar 22 01:37:06 2013 +0100
1.2 +++ b/xmlparser.py Fri Mar 22 01:53:39 2013 +0100
1.3 @@ -26,6 +26,7 @@
1.4 except ImportError:
1.5 from StringIO import StringIO
1.6
1.7 +from MoinMoin import wikiutil
1.8 from common import *
1.9 from xmlread import Parser
1.10 import re
1.11 @@ -45,8 +46,6 @@
1.12 "sup" : "^%s^",
1.13 "sub" : ",,%s,,",
1.14 "code" : "`%s`",
1.15 - "pre" : "{{{%s}}}",
1.16 - "table" : "{{{#!table\n%s\n}}}",
1.17 "tbody" : "%s",
1.18 "tr" : "%s",
1.19 "th" : "'''%s'''",
1.20 @@ -57,7 +56,6 @@
1.21 "p" : "%s",
1.22 "ol" : "%s",
1.23 "ul" : "%s",
1.24 - "ac:plain-text-body" : "{{{%s}}}",
1.25 "ac:link" : "[[%s%s|%s]]",
1.26 "ac:image" : "{{%s%s|%s}}",
1.27 }
1.28 @@ -78,6 +76,10 @@
1.29
1.30 indented_tags = ["li", "p"]
1.31
1.32 +preformatted_tags = ["pre", "ac:plain-text-body"]
1.33 +single_level_tags = ["strong", "em", "u", "del", "sup", "sub", "code"]
1.34 +formatted_tags = ["ac:rich-text-body", "table"]
1.35 +
1.36 link_target_tags = {
1.37 # Confluence element Attribute providing the target
1.38 "ri:page" : "ri:content-title",
1.39 @@ -85,6 +87,15 @@
1.40 "ri:user" : "ri:username",
1.41 }
1.42
1.43 +# NOTE: User links should support the intended user namespace prefix.
1.44 +
1.45 +link_target_types = {
1.46 + # Confluence element MoinMoin link prefix
1.47 + "ri:attachment" : "attachment:",
1.48 + "ri:user" : "",
1.49 + "ac:link-body" : "#",
1.50 + }
1.51 +
1.52 macro_rich_text_styles = {
1.53 # Confluence style MoinMoin admonition style
1.54 "note" : "caution",
1.55 @@ -104,21 +115,24 @@
1.56 Parser.__init__(self)
1.57 self.out = out
1.58
1.59 - # Link target information.
1.60 + # Link target and label information.
1.61
1.62 self.target = None
1.63 self.target_type = None
1.64 + self.label = None
1.65
1.66 # Macro information.
1.67
1.68 self.macro = None
1.69 self.macro_parameters = {}
1.70
1.71 - # Indentation and preformatted states.
1.72 + # Indentation and element nesting states.
1.73
1.74 self.indent = 0
1.75 self.states = {}
1.76 - for name in ("pre", "ac:plain-text-body"):
1.77 + self.max_level = self.level = 0
1.78 +
1.79 + for name in preformatted_tags + single_level_tags:
1.80 self.states[name] = 0
1.81
1.82 # Table states.
1.83 @@ -133,14 +147,28 @@
1.84 self.indent += 1
1.85 elif self.states.has_key(name):
1.86 self.states[name] += 1
1.87 + if name in preformatted_tags or name in formatted_tags:
1.88 + self.level += 1
1.89 + self.max_level = max(self.level, self.max_level)
1.90 +
1.91 Parser.startElement(self, name, attrs)
1.92
1.93 + # Remember macro information for use within the element.
1.94 +
1.95 + if name == "ac:macro":
1.96 + self.macro = self.attributes[-1].get("ac:name")
1.97 +
1.98 def endElement(self, name):
1.99 Parser.endElement(self, name)
1.100 +
1.101 if list_tags.has_key(name):
1.102 self.indent -= 1
1.103 elif self.states.has_key(name):
1.104 self.states[name] -= 1
1.105 + if name in preformatted_tags or name in formatted_tags:
1.106 + self.level -= 1
1.107 + if not self.level:
1.108 + self.max_level = 0
1.109
1.110 def characters(self, content):
1.111 if not self.is_preformatted():
1.112 @@ -155,6 +183,13 @@
1.113 # Parser-related methods.
1.114
1.115 def handleElement(self, name):
1.116 +
1.117 + """
1.118 + Handle the completion of the element with the given 'name'. Any content
1.119 + will either be recorded for later use (by an enclosing element, for
1.120 + example) or emitted in some form.
1.121 + """
1.122 +
1.123 text = "".join(self.text[-1])
1.124
1.125 # Handle state.
1.126 @@ -181,14 +216,55 @@
1.127 self.target_type = name
1.128 text = ""
1.129
1.130 + # For anchor links, just use the raw text and let Moin do the formatting.
1.131 +
1.132 + elif name == "ac:link-body":
1.133 + self.target_type = name
1.134 + self.label = text
1.135 + text = ""
1.136 +
1.137 + # Discard macro state.
1.138 +
1.139 + elif name == "ac:macro":
1.140 + self.macro = None
1.141 + self.macro_parameters = {}
1.142 +
1.143 # Remember macro information.
1.144
1.145 - elif name == "ac:parameter":
1.146 + elif name in ("ac:parameter", "ac:default-parameter"):
1.147 self.macro_parameters[self.attributes[-1].get("ac:name")] = text
1.148 text = ""
1.149
1.150 - elif name == "ac:macro":
1.151 - self.macro = self.attributes[-1].get("ac:name")
1.152 + # Handle single-level tags.
1.153 +
1.154 + elif name in single_level_tags and self.states[name] > 1:
1.155 + conversion = "%s"
1.156 +
1.157 + # Handle preformatted sections.
1.158 +
1.159 + elif name in preformatted_tags or name in formatted_tags:
1.160 +
1.161 + # Nest the section appropriately.
1.162 +
1.163 + level = 3 + self.max_level - self.level
1.164 + opening = "{" * level
1.165 + closing = "}" * level
1.166 +
1.167 + # Macro name information is used to style rich text body regions.
1.168 +
1.169 + if name != "table" and self.macro and macro_rich_text_styles.has_key(self.macro):
1.170 + details = macro_rich_text_styles[self.macro]
1.171 + title = self.macro_parameters.get("title")
1.172 + if title:
1.173 + details = "%s\n\n%s" % (details, title)
1.174 +
1.175 + conversion = "%s#!wiki %s\n\n%%s\n%s" % (opening, details, closing)
1.176 +
1.177 + elif name == "table":
1.178 + conversion = "%s#!table\n%%s\n%s" % (opening, closing)
1.179 +
1.180 + else:
1.181 + conversion = "%s%%s%s" % (opening, closing)
1.182
1.183 # Handle the common case.
1.184
1.185 @@ -198,29 +274,12 @@
1.186 # Attempt to convert the text.
1.187
1.188 # Links require target information.
1.189 - # NOTE: User links should support the intended user namespace prefix.
1.190
1.191 if name in ("ac:link", "ac:image"):
1.192 - if self.target_type == "ri:attachment":
1.193 - prefix = "attachment:"
1.194 - elif self.target_type == "ri:user":
1.195 - prefix = ""
1.196 - else:
1.197 - prefix = "../"
1.198 -
1.199 - text = conversion % (prefix, self.target, text or self.target)
1.200 - self.target = self.target_type = None
1.201 -
1.202 - # Macro name information is used to style rich text body regions.
1.203 -
1.204 - elif name == "ac:macro" and macro_rich_text_styles.has_key(self.macro):
1.205 - details = macro_rich_text_styles[self.macro]
1.206 - title = self.macro_parameters.get("title")
1.207 - if title:
1.208 - details = "%s\n\n%s" % (details, title)
1.209 - text = "{{{#!wiki %s\n\n%s}}}" % (details, text)
1.210 - self.macro = None
1.211 - self.macro_parameters = {}
1.212 + prefix = link_target_types.get(self.target_type, "../")
1.213 + anchor = self.attributes[-1].get("ac:anchor")
1.214 + text = conversion % (prefix, anchor or self.target, self.label or text or self.target)
1.215 + self.target = self.target_type = self.label = None
1.216
1.217 # Handle the common case.
1.218
1.219 @@ -265,7 +324,7 @@
1.220 self.out.write(text)
1.221
1.222 def is_preformatted(self):
1.223 - return reduce(operator.or_, self.states.values(), False)
1.224 + return reduce(operator.or_, [self.states[tag] for tag in preformatted_tags], False)
1.225
1.226 # Whitespace normalisation.
1.227