ImprovedTableParser

ImprovedTableParser.py

43:7172d229dea9
2015-02-23 Paul Boddie Updated release and copyright information.
     1 # -*- coding: iso-8859-1 -*-     2 """     3     MoinMoin - ImprovedTableParser library     4      5     @copyright: 2012, 2013, 2015 by Paul Boddie <paul@boddie.org.uk>     6     @license: GNU GPL (v2 or later), see COPYING.txt for details.     7 """     8      9 from MoinMoin import wikiutil    10 from MoinSupport import *    11 import re    12     13 __version__ = "0.2.1"    14     15 # Regular expressions.    16     17 syntax = {    18     # At start of line:    19     "rows"      : (r"^==(?!.*?==$)[ \t]?",      re.MULTILINE),          # == not-heading ws-excl-nl    20     "continuations" : (r"^\s*\.\.(?!\.)[ \t]?", re.MULTILINE),          # .. ws-excl-nl or .. not-dot    21     22     # Within text:    23     "markers"   : (r"([{]{3,}|[}]{3,})",    re.MULTILINE | re.DOTALL),  # {{{... or }}}...    24     "columns"   : (r"\|\|[ \t]*",           0),                         # || ws-excl-nl    25     26     # At start of column text:    27     "column"    : (r"^\s*<([^<].*?)>\s*(.*)",   re.DOTALL),             # ws < not-< attributes > ws    28     }    29     30 patterns = {}    31 for name, (value, flags) in syntax.items():    32     patterns[name] = re.compile(value, re.UNICODE | flags)    33     34 # Other regular expressions.    35     36 leading_number_regexp = re.compile(r"\d*")    37     38 # Constants.    39     40 up_arrow = u'\u2191'    41 down_arrow = u'\u2193'    42     43 # Functions.    44     45 def parse(s):    46     47     "Parse 's', returning a table definition."    48     49     table_attrs = {}    50     rows = []    51     52     # The following will be redefined upon the construction of the first row.    53     54     row_attrs = {}    55     columns = []    56     columnnumber = 0    57     58     # The following will be redefined upon the construction of the first column.    59     60     column_attrs = {}    61     62     # Process exposed text and sections.    63     64     marker = None    65     is_region = True    66     67     # Initially, start a new row.    68     69     row_continued = False    70     71     for match_text in patterns["markers"].split(s):    72     73         # Only look for table features in exposed text. Where a section is    74         # defined, a marker will have been read and all regions before the    75         # closing marker will not be exposed.    76     77         if is_region and not marker:    78     79             # Extract each row from the definition.    80     81             for row_text in patterns["rows"].split(match_text):    82     83                 # Only create a new row when a boundary has been found.    84     85                 if not row_continued:    86     87                     # Complete any existing row.    88     89                     if columns:    90                         extractAttributes(columns, row_attrs, table_attrs)    91                         span_columns(columns, columnnumber)    92     93                         # Replicate the last row to determine column usage.    94     95                         column_usage = []    96     97                         for column_attrs, text in columns:    98                             try:    99                                 rowspan = int(column_attrs.get("rowspan", "1"))   100                             except ValueError:   101                                 rowspan = 1   102                             if rowspan > 1:   103                                 attrs = {}   104                                 attrs.update(column_attrs)   105                                 attrs["rowspan"] = str(rowspan - 1)   106                                 attrs["rowcontinuation"] = True   107                                 column_usage.append((attrs, text))   108                             else:   109                                 column_usage.append(({}, None))   110    111                         columns = column_usage   112    113                     # Define a new collection of row attributes.   114    115                     row_attrs = {}   116    117                     # Reset the columns and make the list available for the   118                     # addition of new columns, starting a new column   119                     # immediately.   120    121                     rows.append((row_attrs, columns))   122                     column_continued = False   123                     columnnumber = 0   124    125                 # Extract each column from the row.   126    127                 for text in patterns["columns"].split(row_text):   128    129                     # Replace line continuation strings.   130    131                     text = patterns["continuations"].sub("", text)   132    133                     # Only create a new column when a boundary has been found.   134    135                     if not column_continued:   136    137                         # Complete any existing column.   138    139                         if columns:   140                             columnnumber = span_columns(columns, columnnumber)   141    142                         # Extract the attribute and text sections.   143    144                         match = patterns["column"].search(text)   145                         if match:   146                             attribute_text, text = match.groups()   147                             column_attrs = parseAttributes(attribute_text, False)   148                         else:   149                             column_attrs = {}   150    151                         # Define the new column with a mutable container   152                         # permitting the extension of the text.   153    154                         details = [column_attrs, text]   155    156                         # Find the next gap in the columns.   157    158                         while columnnumber != -1 and columnnumber < len(columns):   159                             attrs, text = columns[columnnumber]   160                             if text is None:   161                                 columns[columnnumber] = details   162                                 break   163                             columnnumber += 1   164    165                         # Or start adding at the end of the row.   166    167                         else:   168                             columnnumber = -1   169                             columns.append(details)   170    171                     else:   172                         columns[columnnumber][1] += text   173    174                     # Permit columns immediately following this one.   175    176                     column_continued = False   177    178                 # Permit a continuation of the current column.   179    180                 column_continued = True   181    182                 # Permit rows immediately following this one.   183    184                 row_continued = False   185    186             # Permit a continuation if the current row.   187    188             row_continued = True   189    190         else:   191    192             # Handle section markers.   193    194             if not is_region:   195    196                 # Interpret the given marker, closing the current section if the   197                 # given marker is the corresponding end marker for the current   198                 # section.   199    200                 if marker:   201                     if match_text.startswith("}") and len(marker) == len(match_text):   202                         marker = None   203    204                 # Without a current marker, start a section if an appropriate marker   205                 # is given.   206    207                 elif match_text.startswith("{"):   208                     marker = match_text   209    210             # Markers and section text are incorporated into the current column.   211    212             columns[columnnumber][1] += match_text   213    214         is_region = not is_region   215    216     # Complete any final row.   217    218     if columns:   219         extractAttributes(columns, row_attrs, table_attrs)   220         span_columns(columns, columnnumber)   221    222     return table_attrs, rows   223    224 def span_columns(columns, columnnumber):   225    226     """   227     In the 'columns', make the column with the 'columnnumber' span the specified   228     number of columns, returning the next appropriate column number.   229     """   230    231     column_attrs, text = columns[columnnumber]   232    233     # Handle any previous column spanning other columns.   234    235     if column_attrs.has_key("colspan"):   236         try:   237             colspan = int(column_attrs["colspan"])   238         except ValueError:   239             colspan = 1   240    241         # Duplicate the current column as continuation   242         # columns for as long as the colspan is defined.   243    244         colspan -= 1   245         while colspan > 0:   246             attrs = {}   247             attrs.update(column_attrs)   248             attrs["colspan"] = str(colspan)   249             attrs["colcontinuation"] = True   250    251             if columnnumber != -1:   252                 columnnumber += 1   253                 if columnnumber < len(columns):   254                     columns[columnnumber] = attrs, text   255                 else:   256                     columnnumber = -1   257    258             if columnnumber == -1:   259                 columns.append((attrs, text))   260    261             colspan -= 1   262    263     return columnnumber   264    265 def extractAttributes(columns, row_attrs, table_attrs):   266    267     """   268     Extract row- and table-level attributes from 'columns', storing them in   269     'row_attrs' and 'table_attrs' respectively.   270     """   271    272     for column in columns:   273         attrs = column[0]   274         for name, value in attrs.items():   275             if name.startswith("row") and name not in ("rowspan", "rowcontinuation"):   276                 row_attrs[name] = value   277                 del attrs[name]   278             elif name.startswith("table"):   279                 table_attrs[name] = value   280                 del attrs[name]   281    282 # Sorting utilities.   283    284 def get_sort_columns(s, start=0):   285    286     """   287     Split the comma-separated string 's', extracting the column specifications   288     of the form <column>["n"] where the suffix "n" indicates an optional   289     numeric conversion for that column. Column indexes start from the specified   290     'start' value (defaulting to 0).   291     """   292    293     sort_columns = []   294     for column_spec in s.split(","):   295         column_spec = column_spec.strip()   296    297         ascending = True   298         if column_spec.endswith("d"):   299             column_spec = column_spec[:-1]   300             ascending = False   301    302         # Extract the conversion indicator and column index.   303         # Ignore badly-specified columns.   304    305         try:   306             column = get_number(column_spec)   307             suffix = column_spec[len(column):]   308             fn = converters[suffix]   309             sort_columns.append((max(0, int(column) - start), fn, ascending))   310         except ValueError:   311             pass   312    313     return sort_columns   314    315 def get_column_types(sort_columns):   316    317     """   318     Return a dictionary mapping column indexes to conversion functions.   319     """   320    321     d = {}   322     for column, fn, ascending in sort_columns:   323         d[column] = fn, ascending   324     return d   325    326 def get_number(s):   327    328     "From 's', get any leading number."   329    330     match = leading_number_regexp.match(s)   331     if match:   332         return match.group()   333     else:   334         return ""   335    336 def to_number(s, request):   337    338     """   339     Convert 's' to a number, discarding any non-numeric trailing data.   340     Return an empty string if 's' is empty.   341     """   342    343     if s:   344         return int(get_number(to_plain_text(s, request)))   345     else:   346         return s   347    348 def to_plain_text(s, request):   349    350     "Convert 's' to plain text."   351    352     fmt = getFormatterClass(request, "plain")(request)   353     fmt.setPage(request.page)   354     return formatText(s, request, fmt)   355    356 converters = {   357     "n" : to_number,   358     "" : to_plain_text,   359     }   360    361 suffixes = {}   362 for key, value in converters.items():   363     suffixes[value] = key   364    365 class Sorter:   366    367     "A sorting helper class."   368    369     def __init__(self, sort_columns, request):   370         self.sort_columns = sort_columns   371         self.request = request   372    373     def __call__(self, row1, row2):   374         row_attrs1, columns1 = row1   375         row_attrs2, columns2 = row2   376    377         # Apply the conversions to each column, comparing the results.   378    379         for column, fn, ascending in self.sort_columns:   380             column_attrs1, text1 = columns1[column]   381             column_attrs2, text2 = columns2[column]   382    383             # Ignore a column when a conversion is not possible.   384    385             try:   386                 value1 = fn(text1, self.request)   387                 value2 = fn(text2, self.request)   388    389                 # Avoid empty strings appearing earlier than other values.   390    391                 if value1 == "" and value2 != "":   392                     result = 1   393                 elif value1 != "" and value2 == "":   394                     result = -1   395                 else:   396                     result = cmp(value1, value2)   397    398                 # Where the columns differ, return a result observing the sense   399                 # (ascending or descending) of the comparison for the column.   400    401                 if result != 0:   402                     return ascending and result or -result   403    404             except ValueError:   405                 pass   406    407         return 0   408    409 def write_sort_control(request, columnnumber, columns, sort_columns, column_types, table_name, start=0, write=None):   410    411     """   412     Using the 'request', write a sort control for the given 'columnnumber' in   413     the collection of 'columns', using the existing 'sort_columns' and   414     'column_types' to construct labels and links that modify the sort criteria,   415     and using the given 'table_name' to parameterise the links.   416    417     If the 'write' parameter is specified, use it to write output; otherwise,   418     write output using the request.   419     """   420    421     fmt = request.formatter   422     write = write or request.write   423     _ = request.getText   424    425     write(fmt.div(1, css_class="sortcolumns"))   426    427     write(fmt.paragraph(1))   428     write(fmt.text(_("Sort by columns...")))   429     write(fmt.paragraph(0))   430    431     # Start with the existing criteria without this column being involved.   432    433     revised_sort_columns = [(column, fn, ascending)   434         for (column, fn, ascending) in sort_columns if column != columnnumber]   435    436     # Get the specification of this column.   437    438     columnfn, columnascending = column_types.get(columnnumber, (to_plain_text, True))   439     newsortcolumn = columnnumber, columnfn, columnascending   440     newsortcolumn_reverse = columnnumber, columnfn, not columnascending   441     newlabel = columns[columnnumber][1].strip()   442    443     # Show this column in all possible places in the sorting criteria.   444    445     write(fmt.number_list(1))   446    447     just_had_this_column = False   448    449     for i, (column, fn, ascending) in enumerate(sort_columns):   450         new_sort_columns = revised_sort_columns[:]   451         new_sort_columns.insert(i, newsortcolumn)   452         label = columns[column][1].strip()   453    454         arrow = columnascending and down_arrow or up_arrow   455         arrow_reverse = not columnascending and down_arrow or up_arrow   456    457         sortcolumns = get_sort_column_output(new_sort_columns)   458         new_sort_columns[i] = newsortcolumn_reverse   459         sortcolumns_reverse = get_sort_column_output(new_sort_columns)   460    461         # Columns permitting the insertion of the selected column.   462    463         if column != columnnumber and not just_had_this_column:   464             write(fmt.listitem(1, css_class="sortcolumn"))   465    466             # Pop-up element showing the column inserted before the sort column.   467    468             write(fmt.span(1, css_class="sortcolumn-container"))   469             write(fmt.span(1, css_class="newsortcolumn"))   470             write(formatText(newlabel, request, fmt))   471    472             write_sort_link(write, request, fmt, table_name, sortcolumns, arrow, "sortdirection")   473             write_sort_link(write, request, fmt, table_name, sortcolumns_reverse, arrow_reverse, "sortdirection")   474    475             write(fmt.span(0))   476             write(fmt.span(0))   477    478             # Link for selection of the modified sort criteria using the current   479             # column and showing its particular direction.   480    481             arrow = ascending and down_arrow or up_arrow   482             arrow_reverse = not ascending and down_arrow or up_arrow   483             write_sort_link(write, request, fmt, table_name, sortcolumns, u"%s %s" % (label, arrow), "")   484    485         # Columns permitting removal or modification.   486    487         else:   488             write(fmt.listitem(1))   489    490             # Either show the column without a link, since the column to be   491             # inserted is already before the current column.   492    493             if just_had_this_column:   494                 just_had_this_column = False   495                 arrow = ascending and down_arrow or up_arrow   496                 arrow_reverse = not ascending and down_arrow or up_arrow   497    498                 # Write the current column with its particular direction.   499    500                 write(fmt.span(1, css_class="unlinkedcolumn"))   501                 write(formatText(u"%s %s" % (label, arrow), request, fmt))   502                 write(fmt.span(0))   503    504             # Or show the column with a link for its removal.   505    506             else:   507                 just_had_this_column = True   508                 sortcolumns_revised = get_sort_column_output(revised_sort_columns)   509                 write_sort_link(write, request, fmt, table_name, sortcolumns_revised, u"%s %s" % (label, arrow), "removecolumn")   510    511                 # Alternative sort direction.   512    513                 write_sort_link(write, request, fmt, table_name, sortcolumns_reverse, arrow_reverse, "altdirection")   514    515         write(fmt.listitem(0))   516    517     if not just_had_this_column:   518    519         # Write the sorting criteria with this column at the end.   520    521         new_sort_columns = revised_sort_columns[:]   522         new_sort_columns.append(newsortcolumn)   523    524         sortcolumns = get_sort_column_output(new_sort_columns)   525         new_sort_columns[-1] = newsortcolumn_reverse   526         sortcolumns_reverse = get_sort_column_output(new_sort_columns)   527    528         arrow = columnascending and down_arrow or up_arrow   529         arrow_reverse = not columnascending and down_arrow or up_arrow   530    531         write(fmt.listitem(1, css_class="appendcolumn"))   532    533         # Pop-up element showing the column inserted before the sort column.   534    535         write(fmt.span(1, css_class="newsortcolumn"))   536         write_sort_link(write, request, fmt, table_name, sortcolumns, newlabel, "")   537         write_sort_link(write, request, fmt, table_name, sortcolumns, arrow, "sortdirection")   538         write_sort_link(write, request, fmt, table_name, sortcolumns_reverse, arrow_reverse, "sortdirection")   539         write(fmt.span(0))   540    541         write(fmt.listitem(0))   542    543     write(fmt.number_list(0))   544    545     write(fmt.div(0))   546    547 def write_sort_link(write, request, fmt, table_name, sortcolumns, label, css_class):   548    549     "Write a link expressing sort criteria."   550    551     write(fmt.url(1, "?%s#%s" % (   552         wikiutil.makeQueryString("%s-sortcolumns=%s" % (table_name, sortcolumns)),   553         fmt.qualify_id(fmt.sanitize_to_id(table_name))   554         ), css_class=css_class))   555     write(formatText(label, request, fmt))   556     write(fmt.url(0))   557    558 def get_sort_column_output(columns, start=0):   559    560     "Return the output criteria for the given 'columns' indexed from 'start'."   561    562     return ",".join([("%d%s%s" % (column + start, suffixes[fn], not ascending and "d" or ""))   563         for (column, fn, ascending) in columns])   564    565 # Common formatting functions.   566    567 def formatTable(text, request, fmt, attrs=None, write=None):   568    569     """   570     Format the given 'text' using the specified 'request' and formatter 'fmt'.   571     The optional 'attrs' can be used to control the presentation of the table.   572    573     If the 'write' parameter is specified, use it to write output; otherwise,   574     write output using the request.   575     """   576    577     # Parse the table region.   578    579     table_attrs, table = parse(text)   580    581     # Define the table name and an anchor attribute.   582    583     table_name = attrs.get("name")   584     if table_name:   585         table_attrs["tableid"] = table_name   586     else:   587         table_name = table_attrs.get("tableid")   588    589     # Only attempt to offer sorting capabilities if a table name is specified.   590    591     if table_name:   592    593         # Get the underlying column types.   594    595         column_types = get_column_types(get_sort_columns(attrs.get("columntypes", "")))   596    597         # Get sorting criteria from the region.   598    599         region_sortcolumns = attrs.get("sortcolumns", "")   600    601         # Update the column types from the sort criteria.   602    603         column_types.update(get_column_types(get_sort_columns(region_sortcolumns)))   604    605         # Determine the applicable sort criteria using the request.   606    607         sortcolumns = getQualifiedParameter(request, table_name, "sortcolumns")   608         if sortcolumns is None:   609             sortcolumns = region_sortcolumns   610    611         # Define the final sort criteria.   612    613         sort_columns = get_sort_columns(sortcolumns)   614         data_start = int(getQualifiedParameter(request, table_name, "headers") or attrs.get("headers", "1"))   615    616         # Update the column types from the final sort criteria.   617    618         column_types.update(get_column_types(sort_columns))   619    620         # Sort the rows according to the values in each of the specified columns.   621    622         if sort_columns:   623             headers = table[:data_start]   624             data = table[data_start:]   625    626             # Perform the sort and reconstruct the table.   627    628             sorter = Sorter(sort_columns, request)   629             data.sort(cmp=sorter)   630             table = headers + data   631    632     # Otherwise, indicate that no sorting is being performed.   633    634     else:   635         sort_columns = None   636    637     # Write the table.   638    639     write = write or request.write   640     write(fmt.table(1, table_attrs))   641    642     for rownumber, (row_attrs, columns) in enumerate(table):   643         write(fmt.table_row(1, row_attrs))   644         sortable_heading = sort_columns is not None and rownumber == data_start - 1   645    646         for columnnumber, (column_attrs, column_text) in enumerate(columns):   647    648             # Always skip column continuation cells.   649    650             if column_attrs.get("colcontinuation"):   651                 continue   652    653             # Where sorting has not occurred, preserve rowspans and do not write   654             # cells that continue a rowspan.   655    656             if not sort_columns:   657                 if column_attrs.get("rowcontinuation"):   658                     continue   659    660             # Where sorting has occurred, replicate cell contents and remove any   661             # rowspans.   662    663             else:   664                 if column_attrs.has_key("rowspan"):   665                     del column_attrs["rowspan"]   666    667             # Remove any continuation attributes that still apply.   668    669             if column_attrs.has_key("rowcontinuation"):   670                 del column_attrs["rowcontinuation"]   671    672             write(fmt.table_cell(1, column_attrs))   673    674             if sortable_heading:   675                 write(fmt.div(1, css_class="sortablecolumn"))   676    677             write(formatText(column_text or "", request, fmt))   678    679             # Add sorting controls, if appropriate.   680    681             if sortable_heading:   682                 write_sort_control(request, columnnumber, columns, sort_columns, column_types, table_name, write=write)   683                 write(fmt.div(0))   684    685             write(fmt.table_cell(0))   686    687         write(fmt.table_row(0))   688    689     write(fmt.table(0))   690    691 def formatTableForOutputType(text, request, mimetype, attrs=None, write=None):   692    693     """   694     Format the given 'text' using the specified 'request' for the given output   695     'mimetype'.   696    697     The optional 'attrs' can be used to control the presentation of the table.   698    699     If the 'write' parameter is specified, use it to write output; otherwise,   700     write output using the request.   701     """   702    703     write = write or request.write   704    705     if mimetype == "text/html":   706         write('<html>')   707         write('<body>')   708         fmt = request.html_formatter   709         fmt.setPage(request.page)   710         formatTable(text, request, fmt, attrs, write)   711         write('</body>')   712         write('</html>')   713    714 # vim: tabstop=4 expandtab shiftwidth=4