ImprovedTableParser

Changeset

22:14a41cb069bf
2012-03-31 Paul Boddie raw files shortlog changelog graph Extract row and table attributes from all columns in each row, thus fixing tables where the first column of a row is absent due to a rowspan being applied.
ImprovedTableParser.py (file) tests/test_table.py (file)
     1.1 --- a/ImprovedTableParser.py	Sat Mar 31 18:10:18 2012 +0200
     1.2 +++ b/ImprovedTableParser.py	Sat Mar 31 21:20:52 2012 +0200
     1.3 @@ -90,7 +90,7 @@
     1.4                      # Complete any existing row.
     1.5  
     1.6                      if columns:
     1.7 -                        extractAttributes(columns[0][0], row_attrs, table_attrs)
     1.8 +                        extractAttributes(columns, row_attrs, table_attrs)
     1.9                          span_columns(columns, columnnumber)
    1.10  
    1.11                          # Replicate the last row to determine column usage.
    1.12 @@ -193,7 +193,7 @@
    1.13      # Complete any final row.
    1.14  
    1.15      if columns:
    1.16 -        extractAttributes(columns[0][0], row_attrs, table_attrs)
    1.17 +        extractAttributes(columns, row_attrs, table_attrs)
    1.18  
    1.19      return table_attrs, rows
    1.20  
    1.21 @@ -235,20 +235,22 @@
    1.22  
    1.23      return columnnumber
    1.24  
    1.25 -def extractAttributes(attrs, row_attrs, table_attrs):
    1.26 +def extractAttributes(columns, row_attrs, table_attrs):
    1.27  
    1.28      """
    1.29 -    Extract row- and table-level attributes from 'attrs', storing them in
    1.30 +    Extract row- and table-level attributes from 'columns', storing them in
    1.31      'row_attrs' and 'table_attrs' respectively.
    1.32      """
    1.33  
    1.34 -    for name, value in attrs.items():
    1.35 -        if name.startswith("row") and name not in ("rowspan", "rowcontinuation"):
    1.36 -            row_attrs[name] = value
    1.37 -            del attrs[name]
    1.38 -        elif name.startswith("table"):
    1.39 -            table_attrs[name] = value
    1.40 -            del attrs[name]
    1.41 +    for column in columns:
    1.42 +        attrs = column[0]
    1.43 +        for name, value in attrs.items():
    1.44 +            if name.startswith("row") and name not in ("rowspan", "rowcontinuation"):
    1.45 +                row_attrs[name] = value
    1.46 +                del attrs[name]
    1.47 +            elif name.startswith("table"):
    1.48 +                table_attrs[name] = value
    1.49 +                del attrs[name]
    1.50  
    1.51  def replaceMarkers(s):
    1.52  
     2.1 --- a/tests/test_table.py	Sat Mar 31 18:10:18 2012 +0200
     2.2 +++ b/tests/test_table.py	Sat Mar 31 21:20:52 2012 +0200
     2.3 @@ -34,17 +34,21 @@
     2.4  And this is the second column.
     2.5  ==
     2.6  Some \\{\\{\\{preformatted text\\}\\}\\} || Some `preformatted text` || Observe the region notation inline.
     2.7 +==
     2.8 +<rowspan=2> 1 || 2 || 3
     2.9 +==
    2.10 +<rowstyle="background-color: #f33"> 2 || 3
    2.11  """
    2.12  
    2.13  attrs, rows = parse(table)
    2.14 -expected = 9
    2.15 +expected = 11
    2.16  
    2.17  print table
    2.18  print attrs
    2.19  print rows
    2.20  print len(rows) == expected, ": length is", len(rows), "==", expected
    2.21  print
    2.22 -for (row_attrs, columns), expected in zip(rows, [3, 2, 3, 3, 3, 2, 2, 2, 3]):
    2.23 +for (row_attrs, columns), expected in zip(rows, [3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 2]):
    2.24      print row_attrs
    2.25      print columns
    2.26      non_continuation_columns = [