# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1326759397 -3600
# Node ID cd06464e4a279f1d15d555c1aa17709f27fcfff7
# Parent  0042f967eae05e4e95a33c12aef7e293e5444c48
Added support for separate sections within tables through the use of "quoted"
section markers ("{{{" quoted as "\{\{\{" and "}}}" as "\}\}\}") so that it
becomes possible to embed preformatted text and even tables within tables.

diff -r 0042f967eae0 -r cd06464e4a27 ImprovedTableParser.py
--- a/ImprovedTableParser.py	Sun Jan 15 23:37:14 2012 +0100
+++ b/ImprovedTableParser.py	Tue Jan 17 01:16:37 2012 +0100
@@ -14,12 +14,19 @@
 # Regular expressions.
 
 syntax = {
+    # For section markers.
+    "markers"   : (r"^\s*(?P<n>\\+)(?P<b>{|})(?P=n)(?P=b)(?P=n)(?P=b)", re.MULTILINE),
+    "marker"    : (r"(\\+)",                                            0),
+
     # At start of line:
-    "rows"      : (r"^==",                  re.MULTILINE),  # ==
+    "sections"  : (r"(^\s*{{{.*?^\s*}}})",  re.MULTILINE | re.DOTALL),  # {{{ ... }}}
+    "rows"      : (r"^==",                  re.MULTILINE),              # ==
+
     # Within text:
-    "columns"   : (r"\|\|[ \t]*",           0),             # || whitespace
+    "columns"   : (r"\|\|[ \t]*",           0),                         # || ws-excl-nl
+
     # At start of column text:
-    "column"    : (r"^\s*<(.*?)>\s*(.*)",   re.DOTALL),     # whitespace < attributes > whitespace
+    "column"    : (r"^\s*<(.*?)>\s*(.*)",   re.DOTALL),                 # ws < attributes > ws
     }
 
 patterns = {}
@@ -32,46 +39,139 @@
 
     "Parse 's', returning a table definition."
 
-    rows = []
+    s = replaceMarkers(s)
+
     table_attrs = {}
+    rows = []
 
-    # Extract each row from the definition.
+    # The following will be redefined upon the construction of the first row.
+
+    row_attrs = {}
+    columns = []
+
+    # Process exposed text and sections.
+
+    exposed = True
+
+    # Initially, start a new row.
+
+    row_continued = False
+
+    for region in patterns["sections"].split(s):
 
-    for row_text in patterns["rows"].split(s):
-        columns = []
+        # Only look for table features in exposed text.
+
+        if exposed:
+
+            # Extract each row from the definition.
+
+            for row_text in patterns["rows"].split(region):
+
+                # Only create a new row when a boundary has been found.
 
-        # Extract each column from the row.
+                if not row_continued:
+                    if columns:
+                        extractAttributes(columns[0][0], row_attrs, table_attrs)
 
-        for text in patterns["columns"].split(row_text):
+                    row_attrs = {}
+                    columns = []
+                    rows.append((row_attrs, columns))
+                    column_continued = False
 
-            # Extract the attribute and text sections.
+                # Extract each column from the row.
 
-            match = patterns["column"].search(text)
-            if match:
-                attribute_text, text = match.groups()
-                columns.append((parseAttributes(attribute_text, True), text))
-            else:
-                columns.append(({}, text))
+                for text in patterns["columns"].split(row_text):
+
+                    # Only create a new column when a boundary has been found.
+
+                    if not column_continued:
+
+                        # Extract the attribute and text sections.
 
-        # Extract row- and table-level attributes.
+                        match = patterns["column"].search(text)
+                        if match:
+                            attribute_text, text = match.groups()
+                            columns.append([parseAttributes(attribute_text, True), text])
+                        else:
+                            columns.append([{}, text])
 
-        row_attrs = {}
+                    else:
+                        columns[-1][1] += text
+
+                    # Permit columns immediately following this one.
+
+                    column_continued = False
 
-        if columns:
-            attrs, column = columns[0]
+                # Permit a continuation of the current column.
+
+                column_continued = True
+
+                # Permit rows immediately following this one.
+
+                row_continued = False
+
+            # Permit a continuation if the current row.
 
-            for name, value in attrs.items():
-                if name.startswith("row"):
-                    row_attrs[name] = value
-                    del attrs[name]
-                elif name.startswith("table"):
-                    table_attrs[name] = value
-                    del attrs[name]
+            row_continued = True
+
+        # Write any section into the current column.
 
-        rows.append((row_attrs, columns))
+        else:
+            columns[-1][1] += region
+
+        exposed = not exposed
+
+    if columns:
+        extractAttributes(columns[0][0], row_attrs, table_attrs)
 
     return table_attrs, rows
 
+def extractAttributes(attrs, row_attrs, table_attrs):
+
+    """
+    Extract row- and table-level attributes from 'attrs', storing them in
+    'row_attrs' and 'table_attrs' respectively.
+    """
+
+    for name, value in attrs.items():
+        if name.startswith("row"):
+            row_attrs[name] = value
+            del attrs[name]
+        elif name.startswith("table"):
+            table_attrs[name] = value
+            del attrs[name]
+
+def replaceMarkers(s):
+
+    "Convert the section notation in 's'."
+
+    l = []
+    last = 0
+
+    # Get each marker and convert it.
+
+    for match in patterns["markers"].finditer(s):
+        start, stop = match.span()
+        l.append(s[last:start])
+
+        # Convert the marker.
+
+        marker = []
+        brace = True
+        for text in patterns["marker"].split(match.group()):
+            if brace:
+                marker.append(text)
+            else:
+                marker.append(text[:-1])
+            brace = not brace
+
+        l.append("".join(marker))
+        last = stop
+    else:
+        l.append(s[last:])
+
+    return "".join(l)
+
 def parseAttributes(s, escape=True):
 
     """
diff -r 0042f967eae0 -r cd06464e4a27 tests/test_sections.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_sections.py	Tue Jan 17 01:16:37 2012 +0100
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+from ImprovedTableParser import replaceMarkers
+
+text = """
+{{{
+Hello
+\\{\\{\\{
+Hello again
+But not \\{\\{\\{ this \\}\\}\\}
+\\\\{\\\\{\\\\{
+And once again
+\\\\}\\\\}\\\\}
+And again
+\\}\\}\\}
+again
+}}}
+"""
+
+replaced = replaceMarkers(text)
+
+print text
+print
+print replaced
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 0042f967eae0 -r cd06464e4a27 tests/test_table.py
--- a/tests/test_table.py	Sun Jan 15 23:37:14 2012 +0100
+++ b/tests/test_table.py	Tue Jan 17 01:16:37 2012 +0100
@@ -20,6 +20,11 @@
 ||
  * Item #A
 || Not a list
+==
+\\{\\{\\{
+Some preformatted text.
+\\}\\}\\}
+||<colspan="2"> Preformatted text in a separate section
 """
 
 attrs, rows = parse(table)
@@ -27,9 +32,9 @@
 print table
 print attrs
 print rows
-print len(rows) == 5, ": length is", len(rows), "==", 5
+print len(rows) == 6, ": length is", len(rows), "==", 6
 print
-for (row_attrs, columns), expected in zip(rows, [3, 2, 3, 3, 3]):
+for (row_attrs, columns), expected in zip(rows, [3, 2, 3, 3, 3, 2]):
     print row_attrs
     print columns
     print len(columns) == expected, ": length is", len(columns), "==", expected