# HG changeset patch # User Paul Boddie # Date 1365969002 -7200 # Node ID 2e37bd8ea04dc7a41ca9d404d422216095257f6d # Parent e3262eb82f1d7a5978200eeb850163200b441973 Reset indentation within sections/regions, remembering external indentation state. Fixed default parameter handling for macros. Made the test program handle UTF-8 content. diff -r e3262eb82f1d -r 2e37bd8ea04d tests/test_xml_lists_blocks.txt --- a/tests/test_xml_lists_blocks.txt Sun Apr 14 21:47:21 2013 +0200 +++ b/tests/test_xml_lists_blocks.txt Sun Apr 14 21:50:02 2013 +0200 @@ -5,4 +5,9 @@
  • An item with preformatted text and more text
  • An item with a link and following text
  • A link and following text and another link
  • +
  • An item with
  • diff -r e3262eb82f1d -r 2e37bd8ea04d tests/test_xml_macros.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_xml_macros.txt Sun Apr 14 21:50:02 2013 +0200 @@ -0,0 +1,6 @@ +

    This is a test of redcolours.

    + +sections featuring code + +second +

    This is the second paragraph.

    diff -r e3262eb82f1d -r 2e37bd8ea04d xmlparser.py --- a/xmlparser.py Sun Apr 14 21:47:21 2013 +0200 +++ b/xmlparser.py Sun Apr 14 21:50:02 2013 +0200 @@ -139,7 +139,7 @@ # Indentation and element nesting states. - self.indent = 0 + self.indents = [0] self.states = {} self.max_level = self.level = 0 @@ -162,11 +162,11 @@ # Track indentation for lists. if list_tags.has_key(name): - self.indent += 1 + self.indents.append(self.indents[-1] + 1) # Track element nesting. - elif self.states.has_key(name): + if self.states.has_key(name): self.states[name] += 1 # Track cumulative element nesting in order to produce appropriate depth @@ -176,6 +176,10 @@ self.level += 1 self.max_level = max(self.level, self.max_level) + # Reset indentation within regions. + + self.indents.append(0) + Parser.startElement(self, name, attrs) # Remember macro information for use within the element. @@ -184,17 +188,32 @@ self.macro = self.attributes[-1].get("ac:name") def endElement(self, name): + + # Reset the indent for any preformatted/formatted region so that it may + # itself be indented. + + if name in preformatted_tags or name in formatted_tags: + self.indents.pop() + Parser.endElement(self, name) if list_tags.has_key(name): - self.indent -= 1 - elif self.states.has_key(name): + self.indents.pop() + + if self.states.has_key(name): self.states[name] -= 1 + if name in preformatted_tags or name in formatted_tags: self.level -= 1 if not self.level: self.max_level = 0 + # Discard macro state. + + if name == "ac:macro": + self.macro = None + self.macro_parameters = {} + def characters(self, content): if not self.is_preformatted(): content = self.normalise(content, self.elements[-1]) @@ -272,18 +291,16 @@ self.label = text.strip() text = "" - # Discard macro state. - - elif name == "ac:macro": - self.macro = None - self.macro_parameters = {} - # Remember macro information. - elif name in ("ac:parameter", "ac:default-parameter"): + elif name == "ac:parameter": self.macro_parameters[self.attributes[-1].get("ac:name")] = text text = "" + elif name == "ac:default-parameter": + self.macro_parameters[self.attributes[-2].get("ac:name")] = text + text = "" + # Handle single-level tags. elif name in single_level_tags and self.states[name] > 1: @@ -342,13 +359,21 @@ text = conversion % (self.target, self.label or self.target) self.target = self.target_type = self.label = None - # Handle the common case. + # Macros require various kinds of information. + + elif name == "ac:macro": + macro_name = self.attributes[-1]["ac:name"] + + # Handle the common cases for parameterised and unparameterised + # substitutions. elif text and conversion: text = conversion % text elif simple_tags.has_key(name): text = simple_tags[name] + + # Postprocess table columns and rows. if name in ("th", "td"): @@ -360,10 +385,12 @@ text = "\n==\n%s" % text self.table_rows += 1 + + # Normalise leading whitespace and indent the text if appropriate. if name in indented_tags: - text = " " * self.indent + text.lstrip() + text = " " * self.indents[-1] + text.lstrip() # Add the converted text to the end of the parent element's text nodes. @@ -451,7 +478,7 @@ f.close() if __name__ == "__main__": - s = sys.stdin.read() + s = codecs.getreader("utf-8")(sys.stdin).read() out = codecs.getwriter("utf-8")(sys.stdout) parse(s, out)