1.1 --- a/wikiparser.py Tue Feb 26 01:07:26 2013 +0100
1.2 +++ b/wikiparser.py Sat Mar 02 19:56:23 2013 +0100
1.3 @@ -34,6 +34,7 @@
1.4 from common import *
1.5 import re
1.6 import sys
1.7 +import codecs
1.8
1.9 # Section extraction.
1.10
1.11 @@ -74,7 +75,7 @@
1.12
1.13 # Heading, table and list extraction.
1.14
1.15 -list_regexp_str = r"^\s*(?P<listtype>[*#-])[*#-]*.*(\n\s*(?P=listtype).*?)*(?:\n|$)"
1.16 +list_regexp_str = r"^\s*(?P<listtype>[*#-])[*#-]*\s+.*(\n\s*(?P=listtype).*?)*(?:\n|$)"
1.17 table_regexp_str = r"^((?P<celltype>[|]{1,2})((.|\n(?!\n))+?(?P=celltype))+(\n|$))+"
1.18 blocktext_regexp_str = r"^(?P<type>h\d|bq)\.\s+(?P<text>.*)$"
1.19
1.20 @@ -146,7 +147,7 @@
1.21
1.22 # List item inspection.
1.23
1.24 -listitem_regexp_str = r"^(?P<marker> *[-*#]+)\s*(?P<text>.*)$"
1.25 +listitem_regexp_str = r"^(?P<marker> *[-*#]+)\s+(?P<text>.*)$"
1.26 listitem_regexp = re.compile(listitem_regexp_str, re.MULTILINE)
1.27
1.28 def get_list_items(text):
1.29 @@ -503,6 +504,7 @@
1.30
1.31 if __name__ == "__main__":
1.32 s = sys.stdin.read()
1.33 - parse(s, sys.stdout)
1.34 + out = codecs.getwriter("utf-8")(sys.stdout)
1.35 + parse(s, out)
1.36
1.37 # vim: tabstop=4 expandtab shiftwidth=4