# HG changeset patch # User Paul Boddie # Date 1333221652 -7200 # Node ID 14a41cb069bf26def25e05f6a66239df0c6a38eb # Parent 9fb26a52b861ab9f67aa88735b75f001713ececf Extract row and table attributes from all columns in each row, thus fixing tables where the first column of a row is absent due to a rowspan being applied. diff -r 9fb26a52b861 -r 14a41cb069bf ImprovedTableParser.py --- a/ImprovedTableParser.py Sat Mar 31 18:10:18 2012 +0200 +++ b/ImprovedTableParser.py Sat Mar 31 21:20:52 2012 +0200 @@ -90,7 +90,7 @@ # Complete any existing row. if columns: - extractAttributes(columns[0][0], row_attrs, table_attrs) + extractAttributes(columns, row_attrs, table_attrs) span_columns(columns, columnnumber) # Replicate the last row to determine column usage. @@ -193,7 +193,7 @@ # Complete any final row. if columns: - extractAttributes(columns[0][0], row_attrs, table_attrs) + extractAttributes(columns, row_attrs, table_attrs) return table_attrs, rows @@ -235,20 +235,22 @@ return columnnumber -def extractAttributes(attrs, row_attrs, table_attrs): +def extractAttributes(columns, row_attrs, table_attrs): """ - Extract row- and table-level attributes from 'attrs', storing them in + Extract row- and table-level attributes from 'columns', storing them in 'row_attrs' and 'table_attrs' respectively. """ - for name, value in attrs.items(): - if name.startswith("row") and name not in ("rowspan", "rowcontinuation"): - row_attrs[name] = value - del attrs[name] - elif name.startswith("table"): - table_attrs[name] = value - del attrs[name] + for column in columns: + attrs = column[0] + for name, value in attrs.items(): + if name.startswith("row") and name not in ("rowspan", "rowcontinuation"): + row_attrs[name] = value + del attrs[name] + elif name.startswith("table"): + table_attrs[name] = value + del attrs[name] def replaceMarkers(s): diff -r 9fb26a52b861 -r 14a41cb069bf tests/test_table.py --- a/tests/test_table.py Sat Mar 31 18:10:18 2012 +0200 +++ b/tests/test_table.py Sat Mar 31 21:20:52 2012 +0200 @@ -34,17 +34,21 @@ And this is the second column. == Some \\{\\{\\{preformatted text\\}\\}\\} || Some `preformatted text` || Observe the region notation inline. +== + 1 || 2 || 3 +== + 2 || 3 """ attrs, rows = parse(table) -expected = 9 +expected = 11 print table print attrs print rows print len(rows) == expected, ": length is", len(rows), "==", expected print -for (row_attrs, columns), expected in zip(rows, [3, 2, 3, 3, 3, 2, 2, 2, 3]): +for (row_attrs, columns), expected in zip(rows, [3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 2]): print row_attrs print columns non_continuation_columns = [