1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/tests/test_lists.txt Sat Mar 02 19:56:23 2013 +0100
1.3 @@ -0,0 +1,5 @@
1.4 +*Lists* are like this:
1.5 + * First item
1.6 + * Second item
1.7 + ** Sublist item
1.8 + * Final item
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/tests/test_xml_tables.txt Sat Mar 02 19:56:23 2013 +0100
2.3 @@ -0,0 +1,17 @@
2.4 +<table><tbody>
2.5 +<tr>
2.6 +<th><p>Heading 1</p></th>
2.7 +<th><p>Heading 2</p></th>
2.8 +<th><p>Heading 3</p></th>
2.9 +</tr>
2.10 +<tr>
2.11 +<td><p>Cell 1</p></td>
2.12 +<td><p>Cell 2</p></td>
2.13 +<td><p>Cell 3</p></td>
2.14 +</tr>
2.15 +<tr>
2.16 +<td><p>Cell 4</p></td>
2.17 +<td><p>Cell 5</p></td>
2.18 +<td><p>Cell 6</p></td>
2.19 +</tr>
2.20 +</tbody></table>
3.1 --- a/wikiparser.py Tue Feb 26 01:07:26 2013 +0100
3.2 +++ b/wikiparser.py Sat Mar 02 19:56:23 2013 +0100
3.3 @@ -34,6 +34,7 @@
3.4 from common import *
3.5 import re
3.6 import sys
3.7 +import codecs
3.8
3.9 # Section extraction.
3.10
3.11 @@ -74,7 +75,7 @@
3.12
3.13 # Heading, table and list extraction.
3.14
3.15 -list_regexp_str = r"^\s*(?P<listtype>[*#-])[*#-]*.*(\n\s*(?P=listtype).*?)*(?:\n|$)"
3.16 +list_regexp_str = r"^\s*(?P<listtype>[*#-])[*#-]*\s+.*(\n\s*(?P=listtype).*?)*(?:\n|$)"
3.17 table_regexp_str = r"^((?P<celltype>[|]{1,2})((.|\n(?!\n))+?(?P=celltype))+(\n|$))+"
3.18 blocktext_regexp_str = r"^(?P<type>h\d|bq)\.\s+(?P<text>.*)$"
3.19
3.20 @@ -146,7 +147,7 @@
3.21
3.22 # List item inspection.
3.23
3.24 -listitem_regexp_str = r"^(?P<marker> *[-*#]+)\s*(?P<text>.*)$"
3.25 +listitem_regexp_str = r"^(?P<marker> *[-*#]+)\s+(?P<text>.*)$"
3.26 listitem_regexp = re.compile(listitem_regexp_str, re.MULTILINE)
3.27
3.28 def get_list_items(text):
3.29 @@ -503,6 +504,7 @@
3.30
3.31 if __name__ == "__main__":
3.32 s = sys.stdin.read()
3.33 - parse(s, sys.stdout)
3.34 + out = codecs.getwriter("utf-8")(sys.stdout)
3.35 + parse(s, out)
3.36
3.37 # vim: tabstop=4 expandtab shiftwidth=4
4.1 --- a/xmlparser.py Tue Feb 26 01:07:26 2013 +0100
4.2 +++ b/xmlparser.py Sat Mar 02 19:56:23 2013 +0100
4.3 @@ -32,6 +32,7 @@
4.4 import sys
4.5 import operator
4.6 import htmlentitydefs
4.7 +import codecs
4.8
4.9 # XML dialect syntax parsing.
4.10
4.11 @@ -45,6 +46,11 @@
4.12 "sub" : ",,%s,,",
4.13 "code" : "`%s`",
4.14 "pre" : "{{{%s}}}",
4.15 + "table" : "{{{#!table\n%s\n}}}",
4.16 + "tbody" : "%s",
4.17 + "tr" : "%s",
4.18 + "th" : "'''%s'''",
4.19 + "td" : "%s",
4.20 "blockquote" : " %s",
4.21 "small" : "~-%s-~",
4.22 "big" : "~+%s+~",
4.23 @@ -114,6 +120,11 @@
4.24 for name in ("pre", "ac:plain-text-body"):
4.25 self.states[name] = 0
4.26
4.27 + # Table states.
4.28 +
4.29 + self.table_rows = 0
4.30 + self.table_columns = 0
4.31 +
4.32 # ContentHandler-related methods.
4.33
4.34 def startElement(self, name, attrs):
4.35 @@ -143,7 +154,17 @@
4.36 # Parser-related methods.
4.37
4.38 def handleElement(self, name):
4.39 - text = "".join(self.text[-1])
4.40 + text = "".join(self.text[-1]).strip()
4.41 +
4.42 + # Handle state.
4.43 +
4.44 + if name == "table":
4.45 + self.table_rows = 0
4.46 + elif name == "tr":
4.47 + self.table_columns = 0
4.48 +
4.49 + # Find conversions.
4.50 +
4.51 conversion = None
4.52
4.53 # Handle list elements.
4.54 @@ -207,6 +228,17 @@
4.55 elif simple_tags.has_key(name):
4.56 text = simple_tags[name]
4.57
4.58 + # Postprocess table columns and rows.
4.59 +
4.60 + if name in ("th", "td"):
4.61 + if self.table_columns:
4.62 + text = "\n|| %s" % text
4.63 + self.table_columns += 1
4.64 + elif name == "tr":
4.65 + if self.table_rows:
4.66 + text = "\n==\n%s" % text
4.67 + self.table_rows += 1
4.68 +
4.69 # Normalise leading whitespace and indent the text if appropriate.
4.70
4.71 if name in indented_tags:
4.72 @@ -271,6 +303,7 @@
4.73
4.74 if __name__ == "__main__":
4.75 s = sys.stdin.read()
4.76 - parse(s, sys.stdout)
4.77 + out = codecs.getwriter("utf-8")(sys.stdout)
4.78 + parse(s, out)
4.79
4.80 # vim: tabstop=4 expandtab shiftwidth=4