1.1 --- a/xmlparser.py Wed Apr 10 16:22:38 2013 +0200
1.2 +++ b/xmlparser.py Wed Apr 10 19:11:26 2013 +0200
1.3 @@ -75,12 +75,13 @@
1.4 "ul" : "* %s",
1.5 }
1.6
1.7 -indented_tags = ["li", "p"]
1.8 -
1.9 preformatted_tags = ["pre", "ac:plain-text-body"]
1.10 single_level_tags = ["strong", "em", "u", "del", "sup", "sub", "code"]
1.11 formatted_tags = ["ac:rich-text-body", "table"]
1.12
1.13 +indented_tags = ["li", "p"] + preformatted_tags + formatted_tags
1.14 +block_tags = indented_tags + blocktypes.keys() + list_tags.keys()
1.15 +
1.16 link_target_tags = {
1.17 # Confluence element Attributes providing the target
1.18 "ri:page" : ("ri:space-key", "ri:content-title"),
1.19 @@ -149,6 +150,10 @@
1.20 self.table_rows = 0
1.21 self.table_columns = 0
1.22
1.23 + # Block states.
1.24 +
1.25 + self.have_block = False
1.26 +
1.27 # ContentHandler-related methods.
1.28
1.29 def startElement(self, name, attrs):
1.30 @@ -314,6 +319,8 @@
1.31 if not conversion:
1.32 conversion = tags.get(name)
1.33
1.34 +
1.35 +
1.36 # Attempt to convert the text.
1.37
1.38 # Links require target information.
1.39 @@ -355,17 +362,36 @@
1.40
1.41 if len(self.text) > 1:
1.42 nodes = self.text[-2]
1.43 - if "".join(self.text[-2]):
1.44 +
1.45 + # Where preceding text exists, add any blank line separators.
1.46 +
1.47 + if "".join(nodes):
1.48 parent = self.elements[-2]
1.49 +
1.50 + # All top-level elements are separated with blank lines.
1.51 +
1.52 if parent == "body":
1.53 - nodes.append("\n\n")
1.54 - elif list_tags.has_key(parent):
1.55 + nodes.append("\n")
1.56 +
1.57 + # Block elements always cause a new line to be started.
1.58 +
1.59 + if name in block_tags or self.have_block:
1.60 nodes.append("\n")
1.61 - elif list_tags.has_key(name):
1.62 - nodes.append("\n")
1.63 +
1.64 + self.have_block = False
1.65 +
1.66 + # Without preceding text, save any block node state so that new line
1.67 + # separators can be added at another level.
1.68 +
1.69 + elif name in block_tags:
1.70 + self.have_block = True
1.71 +
1.72 + else:
1.73 + self.have_block = False
1.74 +
1.75 nodes.append(text)
1.76
1.77 - # Otherwise, emit the text.
1.78 + # Otherwise, emit the text (at the top level of the document).
1.79
1.80 else:
1.81 self.out.write(text)