ImprovedTableParser (file ImprovedTableParser.py at a5aa61c0be15)

     1 # -*- coding: iso-8859-1 -*-     2 """     3     MoinMoin - ImprovedTableParser library     4      5     @copyright: 2012 by Paul Boddie <paul@boddie.org.uk>     6     @license: GNU GPL (v2 or later), see COPYING.txt for details.     7 """     8      9 from MoinMoin import wikiutil    10 from shlex import shlex    11 from StringIO import StringIO    12 from MoinSupport import *    13 import re    14     15 # Regular expressions.    16     17 syntax = {    18     # For section markers.    19     "markers"   : (r"^\s*(?P<n>\\+)(?P<b>{|})(?P=n)(?P=b)(?P=n)(?P=b)", re.MULTILINE),    20     "marker"    : (r"(\\+)",                                            0),    21     22     # At start of line:    23     "sections"  : (r"(^\s*{{{.*?^\s*}}})",  re.MULTILINE | re.DOTALL),  # {{{ ... }}}    24     "rows"      : (r"^==",                  re.MULTILINE),              # ==    25     26     # Within text:    27     "columns"   : (r"\|\|[ \t]*",           0),                         # || ws-excl-nl    28     29     # At start of column text:    30     "column"    : (r"^\s*<(.*?)>\s*(.*)",   re.DOTALL),                 # ws < attributes > ws    31     }    32     33 patterns = {}    34 for name, (value, flags) in syntax.items():    35     patterns[name] = re.compile(value, re.UNICODE | flags)    36     37 # Other regular expressions.    38     39 leading_number_regexp = re.compile(r"\d*")    40     41 # Functions.    42     43 def parse(s):    44     45     "Parse 's', returning a table definition."    46     47     s = replaceMarkers(s)    48     49     table_attrs = {}    50     rows = []    51     52     # The following will be redefined upon the construction of the first row.    53     54     row_attrs = {}    55     columns = []    56     57     # Process exposed text and sections.    58     59     exposed = True    60     61     # Initially, start a new row.    62     63     row_continued = False    64     65     for region in patterns["sections"].split(s):    66     67         # Only look for table features in exposed text.    68     69         if exposed:    70     71             # Extract each row from the definition.    72     73             for row_text in patterns["rows"].split(region):    74     75                 # Only create a new row when a boundary has been found.    76     77                 if not row_continued:    78                     if columns:    79                         extractAttributes(columns[0][0], row_attrs, table_attrs)    80     81                     row_attrs = {}    82                     columns = []    83                     rows.append((row_attrs, columns))    84                     column_continued = False    85     86                 # Extract each column from the row.    87     88                 for text in patterns["columns"].split(row_text):    89     90                     # Only create a new column when a boundary has been found.    91     92                     if not column_continued:    93     94                         # Extract the attribute and text sections.    95     96                         match = patterns["column"].search(text)    97                         if match:    98                             attribute_text, text = match.groups()    99                             columns.append([parseAttributes(attribute_text, True), text])   100                         else:   101                             columns.append([{}, text])   102    103                     else:   104                         columns[-1][1] += text   105    106                     # Permit columns immediately following this one.   107    108                     column_continued = False   109    110                 # Permit a continuation of the current column.   111    112                 column_continued = True   113    114                 # Permit rows immediately following this one.   115    116                 row_continued = False   117    118             # Permit a continuation if the current row.   119    120             row_continued = True   121    122         # Write any section into the current column.   123    124         else:   125             columns[-1][1] += region   126    127         exposed = not exposed   128    129     if columns:   130         extractAttributes(columns[0][0], row_attrs, table_attrs)   131    132     return table_attrs, rows   133    134 def extractAttributes(attrs, row_attrs, table_attrs):   135    136     """   137     Extract row- and table-level attributes from 'attrs', storing them in   138     'row_attrs' and 'table_attrs' respectively.   139     """   140    141     for name, value in attrs.items():   142         if name.startswith("row") and name != "rowspan":   143             row_attrs[name] = value   144             del attrs[name]   145         elif name.startswith("table"):   146             table_attrs[name] = value   147             del attrs[name]   148    149 def replaceMarkers(s):   150    151     "Convert the section notation in 's'."   152    153     l = []   154     last = 0   155    156     # Get each marker and convert it.   157    158     for match in patterns["markers"].finditer(s):   159         start, stop = match.span()   160         l.append(s[last:start])   161    162         # Convert the marker.   163    164         marker = []   165         brace = True   166         for text in patterns["marker"].split(match.group()):   167             if brace:   168                 marker.append(text)   169             else:   170                 marker.append(text[:-1])   171             brace = not brace   172    173         l.append("".join(marker))   174         last = stop   175     else:   176         l.append(s[last:])   177    178     return "".join(l)   179    180 def parseAttributes(s, escape=True):   181    182     """   183     Parse the table attributes string 's', returning a mapping of names to   184     values. If 'escape' is set to a true value, the attributes will be suitable   185     for use with the formatter API. If 'escape' is set to a false value, the   186     attributes will have any quoting removed.   187     """   188    189     attrs = {}   190     f = StringIO(s)   191     name = None   192     need_value = False   193    194     for token in shlex(f):   195    196         # Capture the name if needed.   197    198         if name is None:   199             name = escape and wikiutil.escape(token) or strip_token(token)   200    201         # Detect either an equals sign or another name.   202    203         elif not need_value:   204             if token == "=":   205                 need_value = True   206             else:   207                 attrs[name.lower()] = escape and "true" or True   208                 name = wikiutil.escape(token)   209    210         # Otherwise, capture a value.   211    212         else:   213             # Quoting of attributes done similarly to wikiutil.parseAttributes.   214    215             if token:   216                 if escape:   217                     if token[0] in ("'", '"'):   218                         token = wikiutil.escape(token)   219                     else:   220                         token = '"%s"' % wikiutil.escape(token, 1)   221                 else:   222                     token = strip_token(token)   223        224             attrs[name.lower()] = token   225             name = None   226             need_value = False   227    228     return attrs   229    230 def strip_token(token):   231    232     "Return the given 'token' stripped of quoting."   233    234     if token[0] in ("'", '"') and token[-1] == token[0]:   235         return token[1:-1]   236     else:   237         return token   238    239 # Formatting of embedded content.   240 # NOTE: Borrowed from EventAggregator.   241    242 def getParserClass(request, format):   243    244     """   245     Return a parser class using the 'request' for the given 'format', returning   246     a plain text parser if no parser can be found for the specified 'format'.   247     """   248    249     try:   250         return wikiutil.searchAndImportPlugin(request.cfg, "parser", format or "plain")   251     except wikiutil.PluginMissingError:   252         return wikiutil.searchAndImportPlugin(request.cfg, "parser", "plain")   253    254 def formatText(text, request, fmt):   255    256     "Format the given 'text' using the specified 'request' and formatter 'fmt'."   257    258     parser_cls = getParserClass(request, request.page.pi["format"])   259     parser = parser_cls(text, request, line_anchors=False)   260     return request.redirectedOutput(parser.format, fmt, inhibit_p=True)   261    262 # Sorting utilities.   263    264 def get_sort_columns(s, start=0):   265    266     """   267     Split the comma-separated string 's', extracting the column specifications   268     of the form <column>["n"] where the prefix "n" indicates an optional   269     numeric conversion for that column. Column indexes start from the specified   270     'start' value (defaulting to 0).   271     """   272    273     sort_columns = []   274     for column_spec in s.split(","):   275         column_spec = column_spec.strip()   276    277         ascending = True   278         if column_spec.endswith("d"):   279             column_spec = column_spec[:-1]   280             ascending = False   281    282         # Extract the conversion indicator and column index.   283         # Ignore badly-specified columns.   284    285         try:   286             column = get_number(column_spec)   287             suffix = column_spec[len(column):]   288             fn = converters[suffix]   289             sort_columns.append((max(0, int(column) - start), fn, ascending))   290         except ValueError:   291             pass   292    293     return sort_columns   294    295 def get_column_types(sort_columns):   296    297     """   298     Return a dictionary mapping column indexes to conversion functions.   299     """   300    301     d = {}   302     for column, fn, ascending in sort_columns:   303         d[column] = fn, ascending   304     return d   305    306 def get_number(s):   307    308     "From 's', get any leading number."   309    310     match = leading_number_regexp.match(s)   311     if match:   312         return match.group()   313     else:   314         return ""   315    316 def to_number(s):   317    318     "Convert 's' to a number, discarding any non-numeric trailing data."   319    320     return int(get_number(s))   321    322 class Sorter:   323    324     "A sorting helper class."   325    326     def __init__(self, sort_columns):   327         self.sort_columns = sort_columns   328    329     def __call__(self, row1, row2):   330         row_attrs1, columns1 = row1   331         row_attrs2, columns2 = row2   332    333         # Apply the conversions to each column, comparing the results.   334    335         for column, fn, ascending in self.sort_columns:   336             column_attrs1, text1 = columns1[column]   337             column_attrs2, text2 = columns2[column]   338    339             # Ignore a column when a conversion is not possible.   340    341             try:   342                 text1 = fn(text1)   343                 text2 = fn(text2)   344                 result = cmp(text1, text2)   345    346                 # Where the columns differ, return a result observing the sense   347                 # (ascending or descending) of the comparison for the column.   348    349                 if result != 0:   350                     return ascending and result or -result   351    352             except ValueError:   353                 pass   354    355         return 0   356    357 def write_sort_control(columnnumber, write, sort_columns, column_types, columns, table_name, data_start, start=0):   358    359     """   360     Write a sort control in its own form which provides a list of sort   361     descriptions, modifying the 'sort_columns' provided by introducing the given   362     column in different positions.   363     """   364    365     option_html = """\   366         <option value="%(value)s" %(selected)s>%(label)s</option>   367 """   368    369     # Start with the existing criteria without this column being involved.   370    371     current_sort_columns = [(column + start, suffixes[fn], not ascending and "d" or "")   372         for (column, fn, ascending) in sort_columns]   373     revised_sort_columns = [(column + start, suffixes[fn], not ascending and "d" or "")   374         for (column, fn, ascending) in sort_columns if column != columnnumber]   375     values = [revised_sort_columns]   376     revised_sort_labels = [columns[column][1].strip() for (column, fn, ascending) in revised_sort_columns]   377     labels = [revised_sort_labels]   378    379     # Add this column in all possible places in the sorting criteria.   380    381     i = 0   382     while i <= len(revised_sort_columns):   383         value = revised_sort_columns[:]   384         label = revised_sort_labels[:]   385         fn, ascending = column_types.get(columnnumber, (str, True))   386         value.insert(i, (columnnumber + start, suffixes[fn], not ascending and "d" or ""))   387         label.insert(i, columns[columnnumber][1].strip())   388         values.append(value)   389         labels.append(label)   390         i += 1   391    392     # Make the list of options.   393    394     options_html = []   395     for value, label in zip(values, labels):   396         options_html.append(option_html % {   397             "value"     : ",".join([("%d%s%s" % spec) for spec in value]),   398             "label"     : ", ".join(label),   399             "selected"  : value == current_sort_columns and 'selected="selected"' or "",   400             })   401    402     # Write the form.   403    404     d = {   405         "table_name"    : table_name,   406         "options"       : "".join(options_html),   407         "data_start"    : data_start,   408         }   409    410     write("""\   411 <form method="post">   412     <input name="tablename" value="%(table_name)s" type="hidden" />   413     <input name="%(table_name)s-headers" value="%(data_start)s" type="hidden" />   414     <select name="%(table_name)s-sortcolumns" onchange="this.form.submit()">   415 %(options)s   416     </select>   417 </form>   418 """ % d)   419    420 # Sorting-related tables.   421    422 converters = {   423     "n" : to_number,   424     "" : str,   425     }   426    427 suffixes = {}   428 for key, value in converters.items():   429     suffixes[value] = key   430    431 # Common formatting functions.   432    433 def formatTable(text, request, fmt, attrs=None):   434    435     """   436     Format the given 'text' using the specified 'request' and formatter 'fmt'.   437     The optional 'attrs' can be used to control the presentation of the table.   438     """   439    440     # Parse the table region.   441    442     table_attrs, table = parse(text)   443    444     # Override any region arguments with request parameters.   445    446     table_name = attrs.get("name")   447    448     # Get sorting criteria from the region and the request.   449    450     region_sortcolumns = attrs.get("sortcolumns")   451     sortcolumns = table_name and getQualifiedParameter(request, table_name, "sortcolumns") or region_sortcolumns   452    453     # Sort the rows according to the values in each of the specified columns.   454    455     data_start = int(table_name and getQualifiedParameter(request, table_name, "headers") or attrs.get("headers", "1"))   456    457     if sortcolumns:   458         headers = table[:data_start]   459         data = table[data_start:]   460    461         # Get the sort columns using Unix sort-like notation.   462    463         sort_columns = get_sort_columns(sortcolumns)   464         region_sort_columns = get_sort_columns(region_sortcolumns)   465    466         sorter = Sorter(sort_columns)   467         data.sort(cmp=sorter)   468    469         table = headers + data   470         column_types = get_column_types(region_sort_columns)   471    472     # Write the table.   473    474     writing_html = request.page.output_mimetype == "text/html"   475     write = request.write   476     write(fmt.table(1, table_attrs))   477    478     for rownumber, (row_attrs, columns) in enumerate(table):   479         write(fmt.table_row(1, row_attrs))   480    481         for columnnumber, (column_attrs, column_text) in enumerate(columns):   482             write(fmt.table_cell(1, column_attrs))   483             write(formatText(column_text, request, fmt))   484    485             # Add sorting controls, if appropriate.   486    487             if writing_html and sortcolumns and rownumber == data_start - 1:   488                 write_sort_control(columnnumber, write, sort_columns, column_types, columns, table_name, data_start)   489    490             write(fmt.table_cell(0))   491    492         write(fmt.table_row(0))   493    494     write(fmt.table(0))   495    496 # vim: tabstop=4 expandtab shiftwidth=4