# HG changeset patch # User Paul Boddie # Date 1326759397 -3600 # Node ID cd06464e4a279f1d15d555c1aa17709f27fcfff7 # Parent 0042f967eae05e4e95a33c12aef7e293e5444c48 Added support for separate sections within tables through the use of "quoted" section markers ("{{{" quoted as "\{\{\{" and "}}}" as "\}\}\}") so that it becomes possible to embed preformatted text and even tables within tables. diff -r 0042f967eae0 -r cd06464e4a27 ImprovedTableParser.py --- a/ImprovedTableParser.py Sun Jan 15 23:37:14 2012 +0100 +++ b/ImprovedTableParser.py Tue Jan 17 01:16:37 2012 +0100 @@ -14,12 +14,19 @@ # Regular expressions. syntax = { + # For section markers. + "markers" : (r"^\s*(?P\\+)(?P{|})(?P=n)(?P=b)(?P=n)(?P=b)", re.MULTILINE), + "marker" : (r"(\\+)", 0), + # At start of line: - "rows" : (r"^==", re.MULTILINE), # == + "sections" : (r"(^\s*{{{.*?^\s*}}})", re.MULTILINE | re.DOTALL), # {{{ ... }}} + "rows" : (r"^==", re.MULTILINE), # == + # Within text: - "columns" : (r"\|\|[ \t]*", 0), # || whitespace + "columns" : (r"\|\|[ \t]*", 0), # || ws-excl-nl + # At start of column text: - "column" : (r"^\s*<(.*?)>\s*(.*)", re.DOTALL), # whitespace < attributes > whitespace + "column" : (r"^\s*<(.*?)>\s*(.*)", re.DOTALL), # ws < attributes > ws } patterns = {} @@ -32,46 +39,139 @@ "Parse 's', returning a table definition." - rows = [] + s = replaceMarkers(s) + table_attrs = {} + rows = [] - # Extract each row from the definition. + # The following will be redefined upon the construction of the first row. + + row_attrs = {} + columns = [] + + # Process exposed text and sections. + + exposed = True + + # Initially, start a new row. + + row_continued = False + + for region in patterns["sections"].split(s): - for row_text in patterns["rows"].split(s): - columns = [] + # Only look for table features in exposed text. + + if exposed: + + # Extract each row from the definition. + + for row_text in patterns["rows"].split(region): + + # Only create a new row when a boundary has been found. - # Extract each column from the row. + if not row_continued: + if columns: + extractAttributes(columns[0][0], row_attrs, table_attrs) - for text in patterns["columns"].split(row_text): + row_attrs = {} + columns = [] + rows.append((row_attrs, columns)) + column_continued = False - # Extract the attribute and text sections. + # Extract each column from the row. - match = patterns["column"].search(text) - if match: - attribute_text, text = match.groups() - columns.append((parseAttributes(attribute_text, True), text)) - else: - columns.append(({}, text)) + for text in patterns["columns"].split(row_text): + + # Only create a new column when a boundary has been found. + + if not column_continued: + + # Extract the attribute and text sections. - # Extract row- and table-level attributes. + match = patterns["column"].search(text) + if match: + attribute_text, text = match.groups() + columns.append([parseAttributes(attribute_text, True), text]) + else: + columns.append([{}, text]) - row_attrs = {} + else: + columns[-1][1] += text + + # Permit columns immediately following this one. + + column_continued = False - if columns: - attrs, column = columns[0] + # Permit a continuation of the current column. + + column_continued = True + + # Permit rows immediately following this one. + + row_continued = False + + # Permit a continuation if the current row. - for name, value in attrs.items(): - if name.startswith("row"): - row_attrs[name] = value - del attrs[name] - elif name.startswith("table"): - table_attrs[name] = value - del attrs[name] + row_continued = True + + # Write any section into the current column. - rows.append((row_attrs, columns)) + else: + columns[-1][1] += region + + exposed = not exposed + + if columns: + extractAttributes(columns[0][0], row_attrs, table_attrs) return table_attrs, rows +def extractAttributes(attrs, row_attrs, table_attrs): + + """ + Extract row- and table-level attributes from 'attrs', storing them in + 'row_attrs' and 'table_attrs' respectively. + """ + + for name, value in attrs.items(): + if name.startswith("row"): + row_attrs[name] = value + del attrs[name] + elif name.startswith("table"): + table_attrs[name] = value + del attrs[name] + +def replaceMarkers(s): + + "Convert the section notation in 's'." + + l = [] + last = 0 + + # Get each marker and convert it. + + for match in patterns["markers"].finditer(s): + start, stop = match.span() + l.append(s[last:start]) + + # Convert the marker. + + marker = [] + brace = True + for text in patterns["marker"].split(match.group()): + if brace: + marker.append(text) + else: + marker.append(text[:-1]) + brace = not brace + + l.append("".join(marker)) + last = stop + else: + l.append(s[last:]) + + return "".join(l) + def parseAttributes(s, escape=True): """ diff -r 0042f967eae0 -r cd06464e4a27 tests/test_sections.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_sections.py Tue Jan 17 01:16:37 2012 +0100 @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +from ImprovedTableParser import replaceMarkers + +text = """ +{{{ +Hello +\\{\\{\\{ +Hello again +But not \\{\\{\\{ this \\}\\}\\} +\\\\{\\\\{\\\\{ +And once again +\\\\}\\\\}\\\\} +And again +\\}\\}\\} +again +}}} +""" + +replaced = replaceMarkers(text) + +print text +print +print replaced + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r 0042f967eae0 -r cd06464e4a27 tests/test_table.py --- a/tests/test_table.py Sun Jan 15 23:37:14 2012 +0100 +++ b/tests/test_table.py Tue Jan 17 01:16:37 2012 +0100 @@ -20,6 +20,11 @@ || * Item #A || Not a list +== +\\{\\{\\{ +Some preformatted text. +\\}\\}\\} +|| Preformatted text in a separate section """ attrs, rows = parse(table) @@ -27,9 +32,9 @@ print table print attrs print rows -print len(rows) == 5, ": length is", len(rows), "==", 5 +print len(rows) == 6, ": length is", len(rows), "==", 6 print -for (row_attrs, columns), expected in zip(rows, [3, 2, 3, 3, 3]): +for (row_attrs, columns), expected in zip(rows, [3, 2, 3, 3, 3, 2]): print row_attrs print columns print len(columns) == expected, ": length is", len(columns), "==", expected