ImprovedTableParser (file ImprovedTableParser.py at 103ec898398a)

     1 # -*- coding: iso-8859-1 -*-     2 """     3     MoinMoin - ImprovedTableParser library     4      5     @copyright: 2012 by Paul Boddie <paul@boddie.org.uk>     6     @license: GNU GPL (v2 or later), see COPYING.txt for details.     7 """     8      9 from MoinMoin import wikiutil    10 from shlex import shlex    11 from StringIO import StringIO    12 import re    13     14 # Regular expressions.    15     16 syntax = {    17     # For section markers.    18     "markers"   : (r"^\s*(?P<n>\\+)(?P<b>{|})(?P=n)(?P=b)(?P=n)(?P=b)", re.MULTILINE),    19     "marker"    : (r"(\\+)",                                            0),    20     21     # At start of line:    22     "sections"  : (r"(^\s*{{{.*?^\s*}}})",  re.MULTILINE | re.DOTALL),  # {{{ ... }}}    23     "rows"      : (r"^==",                  re.MULTILINE),              # ==    24     25     # Within text:    26     "columns"   : (r"\|\|[ \t]*",           0),                         # || ws-excl-nl    27     28     # At start of column text:    29     "column"    : (r"^\s*<(.*?)>\s*(.*)",   re.DOTALL),                 # ws < attributes > ws    30     }    31     32 patterns = {}    33 for name, (value, flags) in syntax.items():    34     patterns[name] = re.compile(value, re.UNICODE | flags)    35     36 # Other regular expressions.    37     38 leading_number_regexp = re.compile(r"\d*")    39     40 # Functions.    41     42 def parse(s):    43     44     "Parse 's', returning a table definition."    45     46     s = replaceMarkers(s)    47     48     table_attrs = {}    49     rows = []    50     51     # The following will be redefined upon the construction of the first row.    52     53     row_attrs = {}    54     columns = []    55     56     # Process exposed text and sections.    57     58     exposed = True    59     60     # Initially, start a new row.    61     62     row_continued = False    63     64     for region in patterns["sections"].split(s):    65     66         # Only look for table features in exposed text.    67     68         if exposed:    69     70             # Extract each row from the definition.    71     72             for row_text in patterns["rows"].split(region):    73     74                 # Only create a new row when a boundary has been found.    75     76                 if not row_continued:    77                     if columns:    78                         extractAttributes(columns[0][0], row_attrs, table_attrs)    79     80                     row_attrs = {}    81                     columns = []    82                     rows.append((row_attrs, columns))    83                     column_continued = False    84     85                 # Extract each column from the row.    86     87                 for text in patterns["columns"].split(row_text):    88     89                     # Only create a new column when a boundary has been found.    90     91                     if not column_continued:    92     93                         # Extract the attribute and text sections.    94     95                         match = patterns["column"].search(text)    96                         if match:    97                             attribute_text, text = match.groups()    98                             columns.append([parseAttributes(attribute_text, True), text])    99                         else:   100                             columns.append([{}, text])   101    102                     else:   103                         columns[-1][1] += text   104    105                     # Permit columns immediately following this one.   106    107                     column_continued = False   108    109                 # Permit a continuation of the current column.   110    111                 column_continued = True   112    113                 # Permit rows immediately following this one.   114    115                 row_continued = False   116    117             # Permit a continuation if the current row.   118    119             row_continued = True   120    121         # Write any section into the current column.   122    123         else:   124             columns[-1][1] += region   125    126         exposed = not exposed   127    128     if columns:   129         extractAttributes(columns[0][0], row_attrs, table_attrs)   130    131     return table_attrs, rows   132    133 def extractAttributes(attrs, row_attrs, table_attrs):   134    135     """   136     Extract row- and table-level attributes from 'attrs', storing them in   137     'row_attrs' and 'table_attrs' respectively.   138     """   139    140     for name, value in attrs.items():   141         if name.startswith("row") and name != "rowspan":   142             row_attrs[name] = value   143             del attrs[name]   144         elif name.startswith("table"):   145             table_attrs[name] = value   146             del attrs[name]   147    148 def replaceMarkers(s):   149    150     "Convert the section notation in 's'."   151    152     l = []   153     last = 0   154    155     # Get each marker and convert it.   156    157     for match in patterns["markers"].finditer(s):   158         start, stop = match.span()   159         l.append(s[last:start])   160    161         # Convert the marker.   162    163         marker = []   164         brace = True   165         for text in patterns["marker"].split(match.group()):   166             if brace:   167                 marker.append(text)   168             else:   169                 marker.append(text[:-1])   170             brace = not brace   171    172         l.append("".join(marker))   173         last = stop   174     else:   175         l.append(s[last:])   176    177     return "".join(l)   178    179 def parseAttributes(s, escape=True):   180    181     """   182     Parse the table attributes string 's', returning a mapping of names to   183     values. If 'escape' is set to a true value, the attributes will be suitable   184     for use with the formatter API. If 'escape' is set to a false value, the   185     attributes will have any quoting removed.   186     """   187    188     attrs = {}   189     f = StringIO(s)   190     name = None   191     need_value = False   192    193     for token in shlex(f):   194    195         # Capture the name if needed.   196    197         if name is None:   198             name = escape and wikiutil.escape(token) or strip_token(token)   199    200         # Detect either an equals sign or another name.   201    202         elif not need_value:   203             if token == "=":   204                 need_value = True   205             else:   206                 attrs[name.lower()] = escape and "true" or True   207                 name = wikiutil.escape(token)   208    209         # Otherwise, capture a value.   210    211         else:   212             # Quoting of attributes done similarly to wikiutil.parseAttributes.   213    214             if token:   215                 if escape:   216                     if token[0] in ("'", '"'):   217                         token = wikiutil.escape(token)   218                     else:   219                         token = '"%s"' % wikiutil.escape(token, 1)   220                 else:   221                     token = strip_token(token)   222        223             attrs[name.lower()] = token   224             name = None   225             need_value = False   226    227     return attrs   228    229 def strip_token(token):   230    231     "Return the given 'token' stripped of quoting."   232    233     if token[0] in ("'", '"') and token[-1] == token[0]:   234         return token[1:-1]   235     else:   236         return token   237    238 # Formatting of embedded content.   239 # NOTE: Borrowed from EventAggregator.   240    241 def getParserClass(request, format):   242    243     """   244     Return a parser class using the 'request' for the given 'format', returning   245     a plain text parser if no parser can be found for the specified 'format'.   246     """   247    248     try:   249         return wikiutil.searchAndImportPlugin(request.cfg, "parser", format or "plain")   250     except wikiutil.PluginMissingError:   251         return wikiutil.searchAndImportPlugin(request.cfg, "parser", "plain")   252    253 def formatText(text, request, fmt):   254    255     "Format the given 'text' using the specified 'request' and formatter 'fmt'."   256    257     parser_cls = getParserClass(request, request.page.pi["format"])   258     parser = parser_cls(text, request, line_anchors=False)   259     return request.redirectedOutput(parser.format, fmt, inhibit_p=True)   260    261 # Sorting utilities.   262    263 def get_sort_columns(s, start=0):   264    265     """   266     Split the comma-separated string 's', extracting the column specifications   267     of the form <column>["n"] where the prefix "n" indicates an optional   268     numeric conversion for that column. Column indexes start from the specified   269     'start' value (defaulting to 0).   270     """   271    272     sort_columns = []   273     for column_spec in s.split(","):   274         column_spec = column_spec.strip()   275    276         ascending = True   277         if column_spec.endswith("d"):   278             column_spec = column_spec[:-1]   279             ascending = False   280    281         # Extract the conversion indicator and column index.   282    283         if column_spec.endswith("n"):   284             column = column_spec[:-1]   285             fn = to_number   286         else:   287             column = column_spec   288             fn = str   289    290         # Ignore badly-specified columns.   291    292         try:   293             sort_columns.append((max(0, int(column) - start), fn, ascending))   294         except ValueError:   295             pass   296    297     return sort_columns   298    299 def to_number(s):   300    301     "Convert 's' to a number, discarding any non-numeric trailing data."   302    303     match = leading_number_regexp.match(s)   304     if match:   305         return int(match.group())   306     else:   307         raise ValueError, s   308    309 class Sorter:   310    311     "A sorting helper class."   312    313     def __init__(self, sort_columns):   314         self.sort_columns = sort_columns   315    316     def __call__(self, row1, row2):   317         row_attrs1, columns1 = row1   318         row_attrs2, columns2 = row2   319    320         # Apply the conversions to each column, comparing the results.   321    322         for column, fn, ascending in self.sort_columns:   323             column_attrs1, text1 = columns1[column]   324             column_attrs2, text2 = columns2[column]   325    326             # Ignore a column when a conversion is not possible.   327    328             try:   329                 text1 = fn(text1)   330                 text2 = fn(text2)   331                 result = cmp(text1, text2)   332    333                 # Where the columns differ, return a result observing the sense   334                 # (ascending or descending) of the comparison for the column.   335    336                 if result != 0:   337                     return ascending and result or -result   338    339             except ValueError:   340                 pass   341    342         return 0   343    344 # Common formatting functions.   345    346 def formatTable(text, request, fmt, attrs=None):   347    348     """   349     Format the given 'text' using the specified 'request' and formatter 'fmt'.   350     The optional 'attrs' can be used to control the presentation of the table.   351     """   352    353     table_attrs, table = parse(text)   354    355     # Sort the rows according to the values in each of the specified columns.   356    357     if attrs.has_key("sortcolumns"):   358         data_start = int(attrs.get("headers", "1"))   359         headers = table[:data_start]   360         data = table[data_start:]   361    362         # Get the sort columns using Unix sort-like notation.   363    364         sorter = Sorter(get_sort_columns(attrs["sortcolumns"]))   365         data.sort(cmp=sorter)   366    367         table = headers + data   368    369     # Write the table.   370    371     request.write(fmt.table(1, table_attrs))   372    373     for row_attrs, columns in table:   374         request.write(fmt.table_row(1, row_attrs))   375    376         for column_attrs, column_text in columns:   377             request.write(fmt.table_cell(1, column_attrs))   378             request.write(formatText(column_text, request, fmt))   379             request.write(fmt.table_cell(0))   380    381         request.write(fmt.table_row(0))   382    383     request.write(fmt.table(0))   384    385 # vim: tabstop=4 expandtab shiftwidth=4