ImprovedTableParser (file ImprovedTableParser.py at 75990ca1e4c6)

     1 # -*- coding: iso-8859-1 -*-     2 """     3     MoinMoin - ImprovedTableParser library     4      5     @copyright: 2012 by Paul Boddie <paul@boddie.org.uk>     6     @license: GNU GPL (v2 or later), see COPYING.txt for details.     7 """     8      9 from MoinMoin import wikiutil    10 from shlex import shlex    11 from StringIO import StringIO    12 from MoinSupport import *    13 import re    14     15 # Regular expressions.    16     17 syntax = {    18     # For section markers.    19     "markers"   : (r"^\s*(?P<n>\\+)(?P<b>{|})(?P=n)(?P=b)(?P=n)(?P=b)", re.MULTILINE),    20     "marker"    : (r"(\\+)",                                            0),    21     22     # At start of line:    23     "sections"  : (r"(^\s*{{{.*?^\s*}}})",  re.MULTILINE | re.DOTALL),  # {{{ ... }}}    24     "rows"      : (r"^==",                  re.MULTILINE),              # ==    25     26     # Within text:    27     "columns"   : (r"\|\|[ \t]*",           0),                         # || ws-excl-nl    28     29     # At start of column text:    30     "column"    : (r"^\s*<(.*?)>\s*(.*)",   re.DOTALL),                 # ws < attributes > ws    31     }    32     33 patterns = {}    34 for name, (value, flags) in syntax.items():    35     patterns[name] = re.compile(value, re.UNICODE | flags)    36     37 # Other regular expressions.    38     39 leading_number_regexp = re.compile(r"\d*")    40     41 # Functions.    42     43 def parse(s):    44     45     "Parse 's', returning a table definition."    46     47     s = replaceMarkers(s)    48     49     table_attrs = {}    50     rows = []    51     52     # The following will be redefined upon the construction of the first row.    53     54     row_attrs = {}    55     columns = []    56     57     # Process exposed text and sections.    58     59     exposed = True    60     61     # Initially, start a new row.    62     63     row_continued = False    64     65     for region in patterns["sections"].split(s):    66     67         # Only look for table features in exposed text.    68     69         if exposed:    70     71             # Extract each row from the definition.    72     73             for row_text in patterns["rows"].split(region):    74     75                 # Only create a new row when a boundary has been found.    76     77                 if not row_continued:    78                     if columns:    79                         extractAttributes(columns[0][0], row_attrs, table_attrs)    80     81                     row_attrs = {}    82                     columns = []    83                     rows.append((row_attrs, columns))    84                     column_continued = False    85     86                 # Extract each column from the row.    87     88                 for text in patterns["columns"].split(row_text):    89     90                     # Only create a new column when a boundary has been found.    91     92                     if not column_continued:    93     94                         # Extract the attribute and text sections.    95     96                         match = patterns["column"].search(text)    97                         if match:    98                             attribute_text, text = match.groups()    99                             columns.append([parseAttributes(attribute_text, True), text])   100                         else:   101                             columns.append([{}, text])   102    103                     else:   104                         columns[-1][1] += text   105    106                     # Permit columns immediately following this one.   107    108                     column_continued = False   109    110                 # Permit a continuation of the current column.   111    112                 column_continued = True   113    114                 # Permit rows immediately following this one.   115    116                 row_continued = False   117    118             # Permit a continuation if the current row.   119    120             row_continued = True   121    122         # Write any section into the current column.   123    124         else:   125             columns[-1][1] += region   126    127         exposed = not exposed   128    129     if columns:   130         extractAttributes(columns[0][0], row_attrs, table_attrs)   131    132     return table_attrs, rows   133    134 def extractAttributes(attrs, row_attrs, table_attrs):   135    136     """   137     Extract row- and table-level attributes from 'attrs', storing them in   138     'row_attrs' and 'table_attrs' respectively.   139     """   140    141     for name, value in attrs.items():   142         if name.startswith("row") and name != "rowspan":   143             row_attrs[name] = value   144             del attrs[name]   145         elif name.startswith("table"):   146             table_attrs[name] = value   147             del attrs[name]   148    149 def replaceMarkers(s):   150    151     "Convert the section notation in 's'."   152    153     l = []   154     last = 0   155    156     # Get each marker and convert it.   157    158     for match in patterns["markers"].finditer(s):   159         start, stop = match.span()   160         l.append(s[last:start])   161    162         # Convert the marker.   163    164         marker = []   165         brace = True   166         for text in patterns["marker"].split(match.group()):   167             if brace:   168                 marker.append(text)   169             else:   170                 marker.append(text[:-1])   171             brace = not brace   172    173         l.append("".join(marker))   174         last = stop   175     else:   176         l.append(s[last:])   177    178     return "".join(l)   179    180 def parseAttributes(s, escape=True):   181    182     """   183     Parse the table attributes string 's', returning a mapping of names to   184     values. If 'escape' is set to a true value, the attributes will be suitable   185     for use with the formatter API. If 'escape' is set to a false value, the   186     attributes will have any quoting removed.   187     """   188    189     attrs = {}   190     f = StringIO(s)   191     name = None   192     need_value = False   193    194     for token in shlex(f):   195    196         # Capture the name if needed.   197    198         if name is None:   199             name = escape and wikiutil.escape(token) or strip_token(token)   200    201         # Detect either an equals sign or another name.   202    203         elif not need_value:   204             if token == "=":   205                 need_value = True   206             else:   207                 attrs[name.lower()] = escape and "true" or True   208                 name = wikiutil.escape(token)   209    210         # Otherwise, capture a value.   211    212         else:   213             # Quoting of attributes done similarly to wikiutil.parseAttributes.   214    215             if token:   216                 if escape:   217                     if token[0] in ("'", '"'):   218                         token = wikiutil.escape(token)   219                     else:   220                         token = '"%s"' % wikiutil.escape(token, 1)   221                 else:   222                     token = strip_token(token)   223        224             attrs[name.lower()] = token   225             name = None   226             need_value = False   227    228     return attrs   229    230 def strip_token(token):   231    232     "Return the given 'token' stripped of quoting."   233    234     if token[0] in ("'", '"') and token[-1] == token[0]:   235         return token[1:-1]   236     else:   237         return token   238    239 # Formatting of embedded content.   240 # NOTE: Borrowed from EventAggregator.   241    242 def getParserClass(request, format):   243    244     """   245     Return a parser class using the 'request' for the given 'format', returning   246     a plain text parser if no parser can be found for the specified 'format'.   247     """   248    249     try:   250         return wikiutil.searchAndImportPlugin(request.cfg, "parser", format or "plain")   251     except wikiutil.PluginMissingError:   252         return wikiutil.searchAndImportPlugin(request.cfg, "parser", "plain")   253    254 def formatText(text, request, fmt):   255    256     "Format the given 'text' using the specified 'request' and formatter 'fmt'."   257    258     parser_cls = getParserClass(request, request.page.pi["format"])   259     parser = parser_cls(text, request, line_anchors=False)   260     return request.redirectedOutput(parser.format, fmt, inhibit_p=True)   261    262 # Sorting utilities.   263    264 def get_sort_columns(s, start=0):   265    266     """   267     Split the comma-separated string 's', extracting the column specifications   268     of the form <column>["n"] where the prefix "n" indicates an optional   269     numeric conversion for that column. Column indexes start from the specified   270     'start' value (defaulting to 0).   271     """   272    273     sort_columns = []   274     for column_spec in s.split(","):   275         column_spec = column_spec.strip()   276    277         ascending = True   278         if column_spec.endswith("d"):   279             column_spec = column_spec[:-1]   280             ascending = False   281    282         # Extract the conversion indicator and column index.   283    284         if column_spec.endswith("n"):   285             column = column_spec[:-1]   286             fn = to_number   287         else:   288             column = column_spec   289             fn = str   290    291         # Ignore badly-specified columns.   292    293         try:   294             sort_columns.append((max(0, int(column) - start), fn, ascending))   295         except ValueError:   296             pass   297    298     return sort_columns   299    300 def to_number(s):   301    302     "Convert 's' to a number, discarding any non-numeric trailing data."   303    304     match = leading_number_regexp.match(s)   305     if match:   306         return int(match.group())   307     else:   308         raise ValueError, s   309    310 class Sorter:   311    312     "A sorting helper class."   313    314     def __init__(self, sort_columns):   315         self.sort_columns = sort_columns   316    317     def __call__(self, row1, row2):   318         row_attrs1, columns1 = row1   319         row_attrs2, columns2 = row2   320    321         # Apply the conversions to each column, comparing the results.   322    323         for column, fn, ascending in self.sort_columns:   324             column_attrs1, text1 = columns1[column]   325             column_attrs2, text2 = columns2[column]   326    327             # Ignore a column when a conversion is not possible.   328    329             try:   330                 text1 = fn(text1)   331                 text2 = fn(text2)   332                 result = cmp(text1, text2)   333    334                 # Where the columns differ, return a result observing the sense   335                 # (ascending or descending) of the comparison for the column.   336    337                 if result != 0:   338                     return ascending and result or -result   339    340             except ValueError:   341                 pass   342    343         return 0   344    345 # Common formatting functions.   346    347 def formatTable(text, request, fmt, attrs=None):   348    349     """   350     Format the given 'text' using the specified 'request' and formatter 'fmt'.   351     The optional 'attrs' can be used to control the presentation of the table.   352     """   353    354     # Parse the table region.   355    356     table_attrs, table = parse(text)   357    358     # Override any region arguments with request parameters.   359    360     table_name = attrs.get("name")   361     sortcolumns = table_name and getQualifiedParameter(request, table_name, "sortcolumns") or attrs.get("sortcolumns")   362    363     # Sort the rows according to the values in each of the specified columns.   364    365     if sortcolumns:   366         data_start = int(attrs.get("headers", "1"))   367         headers = table[:data_start]   368         data = table[data_start:]   369    370         # Get the sort columns using Unix sort-like notation.   371    372         sorter = Sorter(get_sort_columns(sortcolumns))   373         data.sort(cmp=sorter)   374    375         table = headers + data   376    377     # Write the table.   378    379     request.write(fmt.table(1, table_attrs))   380    381     for row_attrs, columns in table:   382         request.write(fmt.table_row(1, row_attrs))   383    384         for column_attrs, column_text in columns:   385             request.write(fmt.table_cell(1, column_attrs))   386             request.write(formatText(column_text, request, fmt))   387             request.write(fmt.table_cell(0))   388    389         request.write(fmt.table_row(0))   390    391     request.write(fmt.table(0))   392    393 # vim: tabstop=4 expandtab shiftwidth=4