1.1 --- a/ImprovedTableParser.py Sat Mar 31 21:20:52 2012 +0200
1.2 +++ b/ImprovedTableParser.py Sun Apr 01 01:40:28 2012 +0200
1.3 @@ -16,15 +16,11 @@
1.4 # Regular expressions.
1.5
1.6 syntax = {
1.7 - # For section markers.
1.8 - "markers" : (r"(?P<n>\\+)(?P<b>{|})(?P=n)(?P=b)(?P=n)(?P=b)", re.MULTILINE),
1.9 - "marker" : (r"(\\+)", 0),
1.10 -
1.11 # At start of line:
1.12 "rows" : (r"^==(?!.*?==$)", re.MULTILINE), # == not-heading
1.13
1.14 # Within text:
1.15 - "sections" : (r"({{{.*?}}})", re.MULTILINE | re.DOTALL), # {{{ ... }}}
1.16 + "markers" : (r"([{]{3,}|[}]{3,})", re.MULTILINE | re.DOTALL), # {{{... or }}}...
1.17 "columns" : (r"\|\|[ \t]*", 0), # || ws-excl-nl
1.18
1.19 # At start of column text:
1.20 @@ -50,8 +46,6 @@
1.21
1.22 "Parse 's', returning a table definition."
1.23
1.24 - s = replaceMarkers(s)
1.25 -
1.26 table_attrs = {}
1.27 rows = []
1.28
1.29 @@ -67,21 +61,24 @@
1.30
1.31 # Process exposed text and sections.
1.32
1.33 - exposed = True
1.34 + marker = None
1.35 + is_region = True
1.36
1.37 # Initially, start a new row.
1.38
1.39 row_continued = False
1.40
1.41 - for region in patterns["sections"].split(s):
1.42 + for match_text in patterns["markers"].split(s):
1.43
1.44 - # Only look for table features in exposed text.
1.45 + # Only look for table features in exposed text. Where a section is
1.46 + # defined, a marker will have been read and all regions before the
1.47 + # closing marker will not be exposed.
1.48
1.49 - if exposed:
1.50 + if is_region and not marker:
1.51
1.52 # Extract each row from the definition.
1.53
1.54 - for row_text in patterns["rows"].split(region):
1.55 + for row_text in patterns["rows"].split(match_text):
1.56
1.57 # Only create a new row when a boundary has been found.
1.58
1.59 @@ -183,12 +180,31 @@
1.60
1.61 row_continued = True
1.62
1.63 - # Write any section into the current column.
1.64 + else:
1.65 +
1.66 + # Handle section markers.
1.67 +
1.68 + if not is_region:
1.69 +
1.70 + # Interpret the given marker, closing the current section if the
1.71 + # given marker is the corresponding end marker for the current
1.72 + # section.
1.73
1.74 - else:
1.75 - columns[columnnumber][1] += region
1.76 + if marker:
1.77 + if match_text.startswith("}") and len(marker) == len(match_text):
1.78 + marker = None
1.79 +
1.80 + # Without a current marker, start a section if an appropriate marker
1.81 + # is given.
1.82
1.83 - exposed = not exposed
1.84 + elif match_text.startswith("{"):
1.85 + marker = match_text
1.86 +
1.87 + # Markers and section text are incorporated into the current column.
1.88 +
1.89 + columns[columnnumber][1] += match_text
1.90 +
1.91 + is_region = not is_region
1.92
1.93 # Complete any final row.
1.94
1.95 @@ -252,37 +268,6 @@
1.96 table_attrs[name] = value
1.97 del attrs[name]
1.98
1.99 -def replaceMarkers(s):
1.100 -
1.101 - "Convert the section notation in 's'."
1.102 -
1.103 - l = []
1.104 - last = 0
1.105 -
1.106 - # Get each marker and convert it.
1.107 -
1.108 - for match in patterns["markers"].finditer(s):
1.109 - start, stop = match.span()
1.110 - l.append(s[last:start])
1.111 -
1.112 - # Convert the marker.
1.113 -
1.114 - marker = []
1.115 - brace = True
1.116 - for text in patterns["marker"].split(match.group()):
1.117 - if brace:
1.118 - marker.append(text)
1.119 - else:
1.120 - marker.append(text[:-1])
1.121 - brace = not brace
1.122 -
1.123 - l.append("".join(marker))
1.124 - last = stop
1.125 - else:
1.126 - l.append(s[last:])
1.127 -
1.128 - return "".join(l)
1.129 -
1.130 def parseAttributes(s, escape=True):
1.131
1.132 """