paul@0 | 1 | # -*- coding: iso-8859-1 -*- |
paul@0 | 2 | """ |
paul@0 | 3 | MoinMoin - ImprovedTableParser library |
paul@0 | 4 | |
paul@0 | 5 | @copyright: 2012 by Paul Boddie <paul@boddie.org.uk> |
paul@0 | 6 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@0 | 7 | """ |
paul@0 | 8 | |
paul@0 | 9 | from MoinMoin import wikiutil |
paul@0 | 10 | from shlex import shlex |
paul@0 | 11 | from StringIO import StringIO |
paul@5 | 12 | from MoinSupport import * |
paul@0 | 13 | import re |
paul@0 | 14 | |
paul@0 | 15 | # Regular expressions. |
paul@0 | 16 | |
paul@0 | 17 | syntax = { |
paul@1 | 18 | # For section markers. |
paul@1 | 19 | "markers" : (r"^\s*(?P<n>\\+)(?P<b>{|})(?P=n)(?P=b)(?P=n)(?P=b)", re.MULTILINE), |
paul@1 | 20 | "marker" : (r"(\\+)", 0), |
paul@1 | 21 | |
paul@0 | 22 | # At start of line: |
paul@1 | 23 | "sections" : (r"(^\s*{{{.*?^\s*}}})", re.MULTILINE | re.DOTALL), # {{{ ... }}} |
paul@1 | 24 | "rows" : (r"^==", re.MULTILINE), # == |
paul@1 | 25 | |
paul@0 | 26 | # Within text: |
paul@1 | 27 | "columns" : (r"\|\|[ \t]*", 0), # || ws-excl-nl |
paul@1 | 28 | |
paul@0 | 29 | # At start of column text: |
paul@1 | 30 | "column" : (r"^\s*<(.*?)>\s*(.*)", re.DOTALL), # ws < attributes > ws |
paul@0 | 31 | } |
paul@0 | 32 | |
paul@0 | 33 | patterns = {} |
paul@0 | 34 | for name, (value, flags) in syntax.items(): |
paul@0 | 35 | patterns[name] = re.compile(value, re.UNICODE | flags) |
paul@0 | 36 | |
paul@4 | 37 | # Other regular expressions. |
paul@4 | 38 | |
paul@4 | 39 | leading_number_regexp = re.compile(r"\d*") |
paul@4 | 40 | |
paul@0 | 41 | # Functions. |
paul@0 | 42 | |
paul@0 | 43 | def parse(s): |
paul@0 | 44 | |
paul@0 | 45 | "Parse 's', returning a table definition." |
paul@0 | 46 | |
paul@1 | 47 | s = replaceMarkers(s) |
paul@1 | 48 | |
paul@0 | 49 | table_attrs = {} |
paul@1 | 50 | rows = [] |
paul@0 | 51 | |
paul@1 | 52 | # The following will be redefined upon the construction of the first row. |
paul@1 | 53 | |
paul@1 | 54 | row_attrs = {} |
paul@1 | 55 | columns = [] |
paul@1 | 56 | |
paul@1 | 57 | # Process exposed text and sections. |
paul@1 | 58 | |
paul@1 | 59 | exposed = True |
paul@1 | 60 | |
paul@1 | 61 | # Initially, start a new row. |
paul@1 | 62 | |
paul@1 | 63 | row_continued = False |
paul@1 | 64 | |
paul@1 | 65 | for region in patterns["sections"].split(s): |
paul@0 | 66 | |
paul@1 | 67 | # Only look for table features in exposed text. |
paul@1 | 68 | |
paul@1 | 69 | if exposed: |
paul@1 | 70 | |
paul@1 | 71 | # Extract each row from the definition. |
paul@1 | 72 | |
paul@1 | 73 | for row_text in patterns["rows"].split(region): |
paul@1 | 74 | |
paul@1 | 75 | # Only create a new row when a boundary has been found. |
paul@0 | 76 | |
paul@1 | 77 | if not row_continued: |
paul@1 | 78 | if columns: |
paul@1 | 79 | extractAttributes(columns[0][0], row_attrs, table_attrs) |
paul@0 | 80 | |
paul@1 | 81 | row_attrs = {} |
paul@1 | 82 | columns = [] |
paul@1 | 83 | rows.append((row_attrs, columns)) |
paul@1 | 84 | column_continued = False |
paul@0 | 85 | |
paul@1 | 86 | # Extract each column from the row. |
paul@0 | 87 | |
paul@1 | 88 | for text in patterns["columns"].split(row_text): |
paul@1 | 89 | |
paul@1 | 90 | # Only create a new column when a boundary has been found. |
paul@1 | 91 | |
paul@1 | 92 | if not column_continued: |
paul@1 | 93 | |
paul@1 | 94 | # Extract the attribute and text sections. |
paul@0 | 95 | |
paul@1 | 96 | match = patterns["column"].search(text) |
paul@1 | 97 | if match: |
paul@1 | 98 | attribute_text, text = match.groups() |
paul@1 | 99 | columns.append([parseAttributes(attribute_text, True), text]) |
paul@1 | 100 | else: |
paul@1 | 101 | columns.append([{}, text]) |
paul@0 | 102 | |
paul@1 | 103 | else: |
paul@1 | 104 | columns[-1][1] += text |
paul@1 | 105 | |
paul@1 | 106 | # Permit columns immediately following this one. |
paul@1 | 107 | |
paul@1 | 108 | column_continued = False |
paul@0 | 109 | |
paul@1 | 110 | # Permit a continuation of the current column. |
paul@1 | 111 | |
paul@1 | 112 | column_continued = True |
paul@1 | 113 | |
paul@1 | 114 | # Permit rows immediately following this one. |
paul@1 | 115 | |
paul@1 | 116 | row_continued = False |
paul@1 | 117 | |
paul@1 | 118 | # Permit a continuation if the current row. |
paul@0 | 119 | |
paul@1 | 120 | row_continued = True |
paul@1 | 121 | |
paul@1 | 122 | # Write any section into the current column. |
paul@0 | 123 | |
paul@1 | 124 | else: |
paul@1 | 125 | columns[-1][1] += region |
paul@1 | 126 | |
paul@1 | 127 | exposed = not exposed |
paul@1 | 128 | |
paul@1 | 129 | if columns: |
paul@1 | 130 | extractAttributes(columns[0][0], row_attrs, table_attrs) |
paul@0 | 131 | |
paul@0 | 132 | return table_attrs, rows |
paul@0 | 133 | |
paul@1 | 134 | def extractAttributes(attrs, row_attrs, table_attrs): |
paul@1 | 135 | |
paul@1 | 136 | """ |
paul@1 | 137 | Extract row- and table-level attributes from 'attrs', storing them in |
paul@1 | 138 | 'row_attrs' and 'table_attrs' respectively. |
paul@1 | 139 | """ |
paul@1 | 140 | |
paul@1 | 141 | for name, value in attrs.items(): |
paul@3 | 142 | if name.startswith("row") and name != "rowspan": |
paul@1 | 143 | row_attrs[name] = value |
paul@1 | 144 | del attrs[name] |
paul@1 | 145 | elif name.startswith("table"): |
paul@1 | 146 | table_attrs[name] = value |
paul@1 | 147 | del attrs[name] |
paul@1 | 148 | |
paul@1 | 149 | def replaceMarkers(s): |
paul@1 | 150 | |
paul@1 | 151 | "Convert the section notation in 's'." |
paul@1 | 152 | |
paul@1 | 153 | l = [] |
paul@1 | 154 | last = 0 |
paul@1 | 155 | |
paul@1 | 156 | # Get each marker and convert it. |
paul@1 | 157 | |
paul@1 | 158 | for match in patterns["markers"].finditer(s): |
paul@1 | 159 | start, stop = match.span() |
paul@1 | 160 | l.append(s[last:start]) |
paul@1 | 161 | |
paul@1 | 162 | # Convert the marker. |
paul@1 | 163 | |
paul@1 | 164 | marker = [] |
paul@1 | 165 | brace = True |
paul@1 | 166 | for text in patterns["marker"].split(match.group()): |
paul@1 | 167 | if brace: |
paul@1 | 168 | marker.append(text) |
paul@1 | 169 | else: |
paul@1 | 170 | marker.append(text[:-1]) |
paul@1 | 171 | brace = not brace |
paul@1 | 172 | |
paul@1 | 173 | l.append("".join(marker)) |
paul@1 | 174 | last = stop |
paul@1 | 175 | else: |
paul@1 | 176 | l.append(s[last:]) |
paul@1 | 177 | |
paul@1 | 178 | return "".join(l) |
paul@1 | 179 | |
paul@0 | 180 | def parseAttributes(s, escape=True): |
paul@0 | 181 | |
paul@0 | 182 | """ |
paul@0 | 183 | Parse the table attributes string 's', returning a mapping of names to |
paul@0 | 184 | values. If 'escape' is set to a true value, the attributes will be suitable |
paul@4 | 185 | for use with the formatter API. If 'escape' is set to a false value, the |
paul@4 | 186 | attributes will have any quoting removed. |
paul@0 | 187 | """ |
paul@0 | 188 | |
paul@0 | 189 | attrs = {} |
paul@0 | 190 | f = StringIO(s) |
paul@0 | 191 | name = None |
paul@0 | 192 | need_value = False |
paul@0 | 193 | |
paul@0 | 194 | for token in shlex(f): |
paul@0 | 195 | |
paul@0 | 196 | # Capture the name if needed. |
paul@0 | 197 | |
paul@0 | 198 | if name is None: |
paul@4 | 199 | name = escape and wikiutil.escape(token) or strip_token(token) |
paul@0 | 200 | |
paul@0 | 201 | # Detect either an equals sign or another name. |
paul@0 | 202 | |
paul@0 | 203 | elif not need_value: |
paul@0 | 204 | if token == "=": |
paul@0 | 205 | need_value = True |
paul@0 | 206 | else: |
paul@0 | 207 | attrs[name.lower()] = escape and "true" or True |
paul@0 | 208 | name = wikiutil.escape(token) |
paul@0 | 209 | |
paul@0 | 210 | # Otherwise, capture a value. |
paul@0 | 211 | |
paul@0 | 212 | else: |
paul@4 | 213 | # Quoting of attributes done similarly to wikiutil.parseAttributes. |
paul@0 | 214 | |
paul@4 | 215 | if token: |
paul@4 | 216 | if escape: |
paul@4 | 217 | if token[0] in ("'", '"'): |
paul@4 | 218 | token = wikiutil.escape(token) |
paul@4 | 219 | else: |
paul@4 | 220 | token = '"%s"' % wikiutil.escape(token, 1) |
paul@0 | 221 | else: |
paul@4 | 222 | token = strip_token(token) |
paul@0 | 223 | |
paul@0 | 224 | attrs[name.lower()] = token |
paul@0 | 225 | name = None |
paul@0 | 226 | need_value = False |
paul@0 | 227 | |
paul@0 | 228 | return attrs |
paul@0 | 229 | |
paul@4 | 230 | def strip_token(token): |
paul@4 | 231 | |
paul@4 | 232 | "Return the given 'token' stripped of quoting." |
paul@4 | 233 | |
paul@4 | 234 | if token[0] in ("'", '"') and token[-1] == token[0]: |
paul@4 | 235 | return token[1:-1] |
paul@4 | 236 | else: |
paul@4 | 237 | return token |
paul@4 | 238 | |
paul@0 | 239 | # Formatting of embedded content. |
paul@0 | 240 | # NOTE: Borrowed from EventAggregator. |
paul@0 | 241 | |
paul@0 | 242 | def getParserClass(request, format): |
paul@0 | 243 | |
paul@0 | 244 | """ |
paul@0 | 245 | Return a parser class using the 'request' for the given 'format', returning |
paul@0 | 246 | a plain text parser if no parser can be found for the specified 'format'. |
paul@0 | 247 | """ |
paul@0 | 248 | |
paul@0 | 249 | try: |
paul@0 | 250 | return wikiutil.searchAndImportPlugin(request.cfg, "parser", format or "plain") |
paul@0 | 251 | except wikiutil.PluginMissingError: |
paul@0 | 252 | return wikiutil.searchAndImportPlugin(request.cfg, "parser", "plain") |
paul@0 | 253 | |
paul@0 | 254 | def formatText(text, request, fmt): |
paul@0 | 255 | |
paul@0 | 256 | "Format the given 'text' using the specified 'request' and formatter 'fmt'." |
paul@0 | 257 | |
paul@0 | 258 | parser_cls = getParserClass(request, request.page.pi["format"]) |
paul@0 | 259 | parser = parser_cls(text, request, line_anchors=False) |
paul@0 | 260 | return request.redirectedOutput(parser.format, fmt, inhibit_p=True) |
paul@0 | 261 | |
paul@4 | 262 | # Sorting utilities. |
paul@4 | 263 | |
paul@4 | 264 | def get_sort_columns(s, start=0): |
paul@4 | 265 | |
paul@4 | 266 | """ |
paul@4 | 267 | Split the comma-separated string 's', extracting the column specifications |
paul@4 | 268 | of the form <column>["n"] where the prefix "n" indicates an optional |
paul@4 | 269 | numeric conversion for that column. Column indexes start from the specified |
paul@4 | 270 | 'start' value (defaulting to 0). |
paul@4 | 271 | """ |
paul@4 | 272 | |
paul@4 | 273 | sort_columns = [] |
paul@4 | 274 | for column_spec in s.split(","): |
paul@4 | 275 | column_spec = column_spec.strip() |
paul@4 | 276 | |
paul@4 | 277 | ascending = True |
paul@4 | 278 | if column_spec.endswith("d"): |
paul@4 | 279 | column_spec = column_spec[:-1] |
paul@4 | 280 | ascending = False |
paul@4 | 281 | |
paul@4 | 282 | # Extract the conversion indicator and column index. |
paul@4 | 283 | |
paul@4 | 284 | if column_spec.endswith("n"): |
paul@4 | 285 | column = column_spec[:-1] |
paul@4 | 286 | fn = to_number |
paul@4 | 287 | else: |
paul@4 | 288 | column = column_spec |
paul@4 | 289 | fn = str |
paul@4 | 290 | |
paul@4 | 291 | # Ignore badly-specified columns. |
paul@4 | 292 | |
paul@4 | 293 | try: |
paul@4 | 294 | sort_columns.append((max(0, int(column) - start), fn, ascending)) |
paul@4 | 295 | except ValueError: |
paul@4 | 296 | pass |
paul@4 | 297 | |
paul@4 | 298 | return sort_columns |
paul@4 | 299 | |
paul@4 | 300 | def to_number(s): |
paul@4 | 301 | |
paul@4 | 302 | "Convert 's' to a number, discarding any non-numeric trailing data." |
paul@4 | 303 | |
paul@4 | 304 | match = leading_number_regexp.match(s) |
paul@4 | 305 | if match: |
paul@4 | 306 | return int(match.group()) |
paul@4 | 307 | else: |
paul@4 | 308 | raise ValueError, s |
paul@4 | 309 | |
paul@4 | 310 | class Sorter: |
paul@4 | 311 | |
paul@4 | 312 | "A sorting helper class." |
paul@4 | 313 | |
paul@4 | 314 | def __init__(self, sort_columns): |
paul@4 | 315 | self.sort_columns = sort_columns |
paul@4 | 316 | |
paul@4 | 317 | def __call__(self, row1, row2): |
paul@4 | 318 | row_attrs1, columns1 = row1 |
paul@4 | 319 | row_attrs2, columns2 = row2 |
paul@4 | 320 | |
paul@4 | 321 | # Apply the conversions to each column, comparing the results. |
paul@4 | 322 | |
paul@4 | 323 | for column, fn, ascending in self.sort_columns: |
paul@4 | 324 | column_attrs1, text1 = columns1[column] |
paul@4 | 325 | column_attrs2, text2 = columns2[column] |
paul@4 | 326 | |
paul@4 | 327 | # Ignore a column when a conversion is not possible. |
paul@4 | 328 | |
paul@4 | 329 | try: |
paul@4 | 330 | text1 = fn(text1) |
paul@4 | 331 | text2 = fn(text2) |
paul@4 | 332 | result = cmp(text1, text2) |
paul@4 | 333 | |
paul@4 | 334 | # Where the columns differ, return a result observing the sense |
paul@4 | 335 | # (ascending or descending) of the comparison for the column. |
paul@4 | 336 | |
paul@4 | 337 | if result != 0: |
paul@4 | 338 | return ascending and result or -result |
paul@4 | 339 | |
paul@4 | 340 | except ValueError: |
paul@4 | 341 | pass |
paul@4 | 342 | |
paul@4 | 343 | return 0 |
paul@4 | 344 | |
paul@0 | 345 | # Common formatting functions. |
paul@0 | 346 | |
paul@4 | 347 | def formatTable(text, request, fmt, attrs=None): |
paul@0 | 348 | |
paul@4 | 349 | """ |
paul@4 | 350 | Format the given 'text' using the specified 'request' and formatter 'fmt'. |
paul@4 | 351 | The optional 'attrs' can be used to control the presentation of the table. |
paul@4 | 352 | """ |
paul@4 | 353 | |
paul@5 | 354 | # Parse the table region. |
paul@5 | 355 | |
paul@4 | 356 | table_attrs, table = parse(text) |
paul@4 | 357 | |
paul@5 | 358 | # Override any region arguments with request parameters. |
paul@5 | 359 | |
paul@5 | 360 | table_name = attrs.get("name") |
paul@5 | 361 | sortcolumns = table_name and getQualifiedParameter(request, table_name, "sortcolumns") or attrs.get("sortcolumns") |
paul@5 | 362 | |
paul@4 | 363 | # Sort the rows according to the values in each of the specified columns. |
paul@0 | 364 | |
paul@5 | 365 | if sortcolumns: |
paul@4 | 366 | data_start = int(attrs.get("headers", "1")) |
paul@4 | 367 | headers = table[:data_start] |
paul@4 | 368 | data = table[data_start:] |
paul@4 | 369 | |
paul@4 | 370 | # Get the sort columns using Unix sort-like notation. |
paul@0 | 371 | |
paul@5 | 372 | sorter = Sorter(get_sort_columns(sortcolumns)) |
paul@4 | 373 | data.sort(cmp=sorter) |
paul@4 | 374 | |
paul@4 | 375 | table = headers + data |
paul@4 | 376 | |
paul@4 | 377 | # Write the table. |
paul@4 | 378 | |
paul@4 | 379 | request.write(fmt.table(1, table_attrs)) |
paul@0 | 380 | |
paul@0 | 381 | for row_attrs, columns in table: |
paul@0 | 382 | request.write(fmt.table_row(1, row_attrs)) |
paul@0 | 383 | |
paul@0 | 384 | for column_attrs, column_text in columns: |
paul@0 | 385 | request.write(fmt.table_cell(1, column_attrs)) |
paul@0 | 386 | request.write(formatText(column_text, request, fmt)) |
paul@0 | 387 | request.write(fmt.table_cell(0)) |
paul@0 | 388 | |
paul@0 | 389 | request.write(fmt.table_row(0)) |
paul@0 | 390 | |
paul@0 | 391 | request.write(fmt.table(0)) |
paul@0 | 392 | |
paul@0 | 393 | # vim: tabstop=4 expandtab shiftwidth=4 |