# HG changeset patch # User Paul Boddie # Date 1370899309 -7200 # Node ID a79c3559e45c9beb52aeae7b74cfd20ed0c68cbe # Parent 5909d1da7230701c18f4ce77911beef78ea98a0e Supported list recognition in region extraction, preventing sections from breaking up lists. diff -r 5909d1da7230 -r a79c3559e45c tests/test_lists.txt --- a/tests/test_lists.txt Mon Jun 10 18:52:25 2013 +0200 +++ b/tests/test_lists.txt Mon Jun 10 23:21:49 2013 +0200 @@ -2,4 +2,5 @@ * First item * Second item ** Sublist item + ** Item in {color:#00ff00}colour{color} * Final item diff -r 5909d1da7230 -r a79c3559e45c tests/test_tables_lists_macros.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_tables_lists_macros.txt Mon Jun 10 23:21:49 2013 +0200 @@ -0,0 +1,9 @@ +A table: + +|| Heading || Heading || +| Cell | Cell with list: +* {color:#ff0000}Red{color} +* {color:#00ff00}Green{color} +* {color:#0000ff}Blue{color} | + +After the table. diff -r 5909d1da7230 -r a79c3559e45c wikiparser.py --- a/wikiparser.py Mon Jun 10 18:52:25 2013 +0200 +++ b/wikiparser.py Mon Jun 10 23:21:49 2013 +0200 @@ -39,7 +39,14 @@ # Section extraction. -sections_regexp_str = r"(?[^-_*+{}\n:]+)(?P:[^}\n]+)?}|^(?P[|]{1,2})|(?P[|]{1,2})(\n|$)" +sections_regexp_str = r"(?[^-_*+{}\n:]+)(?P:[^}\n]+)?}" \ + r"|" \ + r"^(?P[|]{1,2})" \ + r"|" \ + r"(?P[|]{1,2}(\n|$))" \ + r"|" \ + r"^(?P\s*[*#-]+\s+.*(\n|$))" + sections_regexp = re.compile(sections_regexp_str, re.DOTALL | re.MULTILINE) def get_regions(s): @@ -53,12 +60,14 @@ regions = [""] depth = 0 had_row = False + had_item = False for match in sections_regexp.finditer(s): start, end = match.span() is_start = match.group("options") or match.group("rowstart") is_section = is_section_marker(match.group("type")) is_row = match.group("rowstart") or match.group("rowend") + is_item = match.group("listitem") # The start of a region is either indicated by a marker with options or # by a marker where no region is currently active. @@ -86,6 +95,16 @@ regions[-2] += regions[-1] + s[start:end] regions.pop() + # A list item may either continue a list region or start a new + # list region. + + elif is_item: + if (last != start or not had_item): + regions.append(s[start:end]) + else: + regions[-2] += regions[-1] + s[start:end] + regions.pop() + # Certain markers may be standalone macros. else: @@ -131,6 +150,7 @@ depth -= 1 had_row = is_row + had_item = is_item last = end # Where a region is still active, terminate it.