1.1 --- a/moinformat/__init__.py Thu May 04 19:13:31 2017 +0200
1.2 +++ b/moinformat/__init__.py Thu May 04 21:41:13 2017 +0200
1.3 @@ -189,48 +189,7 @@
1.4
1.5
1.6
1.7 -# Parsing utilities.
1.8 -
1.9 -def parse_region_details(items, region, pattern_names):
1.10 -
1.11 - "Parse 'items' within 'region' searching using 'pattern_names'."
1.12 -
1.13 - try:
1.14 - while True:
1.15 -
1.16 - # Obtain text before any marker or the end of the input.
1.17 -
1.18 - preceding = items.read_until(pattern_names)
1.19 - if preceding:
1.20 - region.append_inline(Text(preceding))
1.21 -
1.22 - # End of input.
1.23 -
1.24 - if not items.matching:
1.25 - break
1.26 -
1.27 - # Obtain any feature.
1.28 -
1.29 - feature = items.read_match()
1.30 - handler = handlers.get(items.matching)
1.31 -
1.32 - # Handle each feature or add text to the region.
1.33 -
1.34 - if handler:
1.35 - handler(items, region)
1.36 - else:
1.37 - region.append_inline(Text(feature))
1.38 -
1.39 - except StopIteration:
1.40 - pass
1.41 -
1.42 - region.normalise()
1.43 -
1.44 -def end_region(items, region):
1.45 -
1.46 - "End the parsing of 'region', breaking out of the parsing loop."
1.47 -
1.48 - raise StopIteration
1.49 +# Utility functions.
1.50
1.51 def new_block(region):
1.52
1.53 @@ -241,401 +200,478 @@
1.54
1.55
1.56
1.57 -# Parser functions for different page features.
1.58 +# Parser abstraction.
1.59
1.60 -def parse_page(s):
1.61 +class Parser:
1.62 +
1.63 + "An extensible parser."
1.64
1.65 - """
1.66 - Parse page text 's'. Pages consist of regions delimited by markers.
1.67 - """
1.68 + def __init__(self, formats=None):
1.69 + self.formats = formats
1.70 +
1.71 + # Principal parser methods.
1.72
1.73 - return parse_region(TokenStream(s))
1.74 + def parse_page(self, s):
1.75
1.76 -def parse_region(items, level=0, indent=0):
1.77 + """
1.78 + Parse page text 's'. Pages consist of regions delimited by markers.
1.79 + """
1.80
1.81 - """
1.82 - Parse the data provided by 'items' to populate a region with the given
1.83 - 'level' at the given 'indent'.
1.84 - """
1.85 + return self.parse_region(TokenStream(s))
1.86
1.87 - region = Region([], level, indent)
1.88 -
1.89 - # Parse section headers.
1.90 + def parse_region(self, items, level=0, indent=0):
1.91
1.92 - parse_region_header(items, region)
1.93 -
1.94 - # Parse section body.
1.95 + """
1.96 + Parse the data provided by 'items' to populate a region with the given
1.97 + 'level' at the given 'indent'.
1.98 + """
1.99
1.100 - if region.is_transparent():
1.101 - parse_region_wiki(items, region)
1.102 - else:
1.103 - parse_region_opaque(items, region)
1.104 + region = Region([], level, indent)
1.105 +
1.106 + # Parse section headers.
1.107 +
1.108 + self.parse_region_header(items, region)
1.109 +
1.110 + # Parse section body.
1.111
1.112 - return region
1.113 + if region.is_transparent():
1.114 + self.parse_region_wiki(items, region)
1.115 + else:
1.116 + self.parse_region_opaque(items, region)
1.117
1.118 -def parse_region_header(items, region):
1.119 + return region
1.120 +
1.121 + def parse_region_header(self, items, region):
1.122
1.123 - """
1.124 - Parse the region header from the 'items', setting it for the given 'region'.
1.125 - """
1.126 + """
1.127 + Parse the region header from the 'items', setting it for the given 'region'.
1.128 + """
1.129
1.130 - if items.read_until(["header"], False) == "": # None means no header
1.131 - region.type = items.read_match()
1.132 + if items.read_until(["header"], False) == "": # None means no header
1.133 + region.type = items.read_match()
1.134
1.135 -def parse_region_wiki(items, region):
1.136 + def parse_region_wiki(self, items, region):
1.137
1.138 - "Parse the data provided by 'items' to populate a wiki 'region'."
1.139 + "Parse the data provided by 'items' to populate a wiki 'region'."
1.140
1.141 - new_block(region)
1.142 - parse_region_details(items, region, inline_pattern_names + [
1.143 - "break", "heading",
1.144 - "defterm", "defterm_empty",
1.145 - "listitem", "listitem_alpha", "listitem_dot", "listitem_num",
1.146 - "listitem_roman",
1.147 - "regionstart", "regionend",
1.148 - "rule",
1.149 - "tablerow",
1.150 - ])
1.151 + new_block(region)
1.152 + self.parse_region_details(items, region, inline_pattern_names + [
1.153 + "break", "heading",
1.154 + "defterm", "defterm_empty",
1.155 + "listitem", "listitem_alpha", "listitem_dot", "listitem_num",
1.156 + "listitem_roman",
1.157 + "regionstart", "regionend",
1.158 + "rule",
1.159 + "tablerow",
1.160 + ])
1.161
1.162 -def parse_region_opaque(items, region):
1.163 + def parse_region_opaque(self, items, region):
1.164 +
1.165 + "Parse the data provided by 'items' to populate an opaque 'region'."
1.166 +
1.167 + self.parse_region_details(items, region, ["regionend"])
1.168
1.169 - "Parse the data provided by 'items' to populate an opaque 'region'."
1.170 + # Parser methods supporting different page features.
1.171 +
1.172 + def parse_attrname(self, items, attrs):
1.173 +
1.174 + "Handle an attribute name within 'attrs'."
1.175
1.176 - parse_region_details(items, region, ["regionend"])
1.177 + name = items.read_match()
1.178 + attr = TableAttr(name)
1.179
1.180 -def parse_attrname(items, attrs):
1.181 + preceding = items.read_until(["attrvalue"], False)
1.182 + if preceding == "":
1.183 + attr.quote = items.read_match(1)
1.184 + attr.value = items.read_match(2)
1.185
1.186 - "Handle an attribute name within 'attrs'."
1.187 + attrs.append(attr)
1.188 +
1.189 + def parse_break(self, items, region):
1.190
1.191 - name = items.read_match()
1.192 - attr = TableAttr(name)
1.193 + "Handle a paragraph break within 'region'."
1.194 +
1.195 + region.add(Break())
1.196 + new_block(region)
1.197 +
1.198 + def parse_defitem(self, items, region, extra=""):
1.199
1.200 - preceding = items.read_until(["attrvalue"], False)
1.201 - if preceding == "":
1.202 - attr.quote = items.read_match(1)
1.203 - attr.value = items.read_match(2)
1.204 + "Handle a definition item within 'region'."
1.205
1.206 - attrs.append(attr)
1.207 -
1.208 -def parse_break(items, region):
1.209 + pad = items.read_match(1)
1.210 + item = DefItem([], pad, extra)
1.211 + self.parse_region_details(items, item, ["listitemend"])
1.212 + region.add(item)
1.213 + new_block(region)
1.214
1.215 - "Handle a paragraph break within 'region'."
1.216 + def parse_defterm(self, items, region):
1.217 +
1.218 + "Handle a definition term within 'region'."
1.219
1.220 - region.add(Break())
1.221 - new_block(region)
1.222 -
1.223 -def parse_defitem(items, region, extra=""):
1.224 -
1.225 - "Handle a definition item within 'region'."
1.226 + pad = items.read_match(1)
1.227 + term = DefTerm([], pad)
1.228 + self.parse_region_details(items, term, ["deftermend", "deftermsep"])
1.229 + region.add(term)
1.230 + if items.matching == "deftermsep":
1.231 + self.parse_defitem(items, region)
1.232
1.233 - pad = items.read_match(1)
1.234 - item = DefItem([], pad, extra)
1.235 - parse_region_details(items, item, ["listitemend"])
1.236 - region.add(item)
1.237 - new_block(region)
1.238 + def parse_defterm_empty(self, items, region):
1.239 +
1.240 + "Handle an empty definition term within 'region'."
1.241
1.242 -def parse_defterm(items, region):
1.243 + extra = items.read_match(1)
1.244 + self.parse_region_details(items, region, ["deftermsep"])
1.245 + self.parse_defitem(items, region, extra)
1.246
1.247 - "Handle a definition term within 'region'."
1.248 + def parse_fontstyle(self, items, region):
1.249 +
1.250 + "Handle emphasis and strong styles."
1.251
1.252 - pad = items.read_match(1)
1.253 - term = DefTerm([], pad)
1.254 - parse_region_details(items, term, ["deftermend", "deftermsep"])
1.255 - region.add(term)
1.256 - if items.matching == "deftermsep":
1.257 - parse_defitem(items, region)
1.258 + n = len(items.read_match(1))
1.259 +
1.260 + # Handle endings.
1.261
1.262 -def parse_defterm_empty(items, region):
1.263 -
1.264 - "Handle an empty definition term within 'region'."
1.265 -
1.266 - extra = items.read_match(1)
1.267 - parse_region_details(items, region, ["deftermsep"])
1.268 - parse_defitem(items, region, extra)
1.269 + if isinstance(region, FontStyle):
1.270 + emphasis = n in (2, 4, 5)
1.271 + strong = n in (3, 5, 6)
1.272 + active = True
1.273
1.274 -def parse_fontstyle(items, region):
1.275 -
1.276 - "Handle emphasis and strong styles."
1.277 + if region.emphasis and emphasis:
1.278 + active = region.close_emphasis()
1.279 + n -= 2
1.280 + if region.strong and strong:
1.281 + active = region.close_strong()
1.282 + n -= 3
1.283
1.284 - n = len(items.read_match(1))
1.285 + if not active:
1.286 + if n:
1.287 + items.rewind(n)
1.288 + raise StopIteration
1.289
1.290 - # Handle endings.
1.291 + elif not n:
1.292 + return
1.293
1.294 - if isinstance(region, FontStyle):
1.295 + # Handle new styles.
1.296 +
1.297 emphasis = n in (2, 4, 5)
1.298 strong = n in (3, 5, 6)
1.299 - active = True
1.300 + double = n in (4, 6)
1.301 +
1.302 + span = FontStyle([], emphasis, strong)
1.303 + if not double:
1.304 + self.parse_region_details(items, span, inline_pattern_names)
1.305 + region.append_inline(span)
1.306 +
1.307 + def parse_halign(self, items, attrs):
1.308 +
1.309 + "Handle horizontal alignment within 'attrs'."
1.310 +
1.311 + value = items.read_match()
1.312 + attr = TableAttr("halign", value == "(" and "left" or value == ")" and "right" or "center", True)
1.313 + attrs.append(attr)
1.314 +
1.315 + def parse_heading(self, items, region):
1.316
1.317 - if region.emphasis and emphasis:
1.318 - active = region.close_emphasis()
1.319 - n -= 2
1.320 - if region.strong and strong:
1.321 - active = region.close_strong()
1.322 - n -= 3
1.323 + "Handle a heading."
1.324
1.325 - if not active:
1.326 - if n:
1.327 - items.rewind(n)
1.328 + start_extra = items.read_match(1)
1.329 + level = len(items.read_match(2))
1.330 + start_pad = items.read_match(3)
1.331 + heading = Heading([], level, start_extra, start_pad)
1.332 + self.parse_region_details(items, heading, ["headingend"] + inline_pattern_names)
1.333 + region.add(heading)
1.334 + new_block(region)
1.335 +
1.336 + def parse_heading_end(self, items, heading):
1.337 +
1.338 + "Handle the end of a heading."
1.339 +
1.340 + level = len(items.read_match(2))
1.341 + if heading.level == level:
1.342 + heading.end_pad = items.read_match(1)
1.343 + heading.end_extra = items.read_match(3)
1.344 raise StopIteration
1.345
1.346 - elif not n:
1.347 + def parse_listitem(self, items, region):
1.348 +
1.349 + "Handle a list item marker within 'region'."
1.350 +
1.351 + indent = len(items.read_match(1))
1.352 + marker = items.read_match(2)
1.353 + space = items.read_match(3)
1.354 + item = ListItem([], indent, marker, space)
1.355 + self.parse_region_details(items, item, ["listitemend"])
1.356 + region.add(item)
1.357 + new_block(region)
1.358 +
1.359 + def parse_rule(self, items, region):
1.360 +
1.361 + "Handle a horizontal rule within 'region'."
1.362 +
1.363 + length = len(items.read_match(1))
1.364 + rule = Rule(length)
1.365 + region.add(rule)
1.366 + new_block(region)
1.367 +
1.368 + def parse_section(self, items, region):
1.369 +
1.370 + "Handle the start of a new section within 'region'."
1.371 +
1.372 + # Parse the section and start a new block after the section.
1.373 +
1.374 + indent = len(items.read_match(2))
1.375 + level = len(items.read_match(3))
1.376 + region.add(self.parse_region(items, level, indent))
1.377 + new_block(region)
1.378 +
1.379 + def parse_section_end(self, items, region):
1.380 +
1.381 + "Handle the end of a new section within 'region'."
1.382 +
1.383 + feature = items.read_match()
1.384 + if region.have_end(feature):
1.385 + raise StopIteration
1.386 + else:
1.387 + region.append_inline(Text(feature))
1.388 +
1.389 + def parse_table_attrs(self, items, cell):
1.390 +
1.391 + "Handle the start of table attributes within 'cell'."
1.392 +
1.393 + attrs = TableAttrs([])
1.394 + self.parse_region_details(items, attrs, table_pattern_names)
1.395 +
1.396 + # Test the validity of the attributes.
1.397 +
1.398 + last = None
1.399 +
1.400 + for node in attrs.nodes:
1.401 +
1.402 + # Text separator nodes must be whitespace.
1.403 +
1.404 + if isinstance(node, Text):
1.405 + if node.s.strip():
1.406 + break
1.407 +
1.408 + # Named attributes must be preceded by space if not the first.
1.409 +
1.410 + elif last and not node.concise and not isinstance(last, Text):
1.411 + break
1.412 +
1.413 + last = node
1.414 +
1.415 + # All nodes were valid: preserve the collection.
1.416 +
1.417 + else:
1.418 + cell.attrs = attrs
1.419 return
1.420
1.421 - # Handle new styles.
1.422 -
1.423 - emphasis = n in (2, 4, 5)
1.424 - strong = n in (3, 5, 6)
1.425 - double = n in (4, 6)
1.426 + # Invalid nodes were found: serialise the attributes as text.
1.427
1.428 - span = FontStyle([], emphasis, strong)
1.429 - if not double:
1.430 - parse_region_details(items, span, inline_pattern_names)
1.431 - region.append_inline(span)
1.432 -
1.433 -def parse_halign(items, attrs):
1.434 -
1.435 - "Handle horizontal alignment within 'attrs'."
1.436 -
1.437 - value = items.read_match()
1.438 - attr = TableAttr("halign", value == "(" and "left" or value == ")" and "right" or "center", True)
1.439 - attrs.append(attr)
1.440 + cell.append_inline(Text(serialise(attrs)))
1.441
1.442 -def parse_heading(items, region):
1.443 -
1.444 - "Handle a heading."
1.445 + def parse_table_row(self, items, region):
1.446
1.447 - start_extra = items.read_match(1)
1.448 - level = len(items.read_match(2))
1.449 - start_pad = items.read_match(3)
1.450 - heading = Heading([], level, start_extra, start_pad)
1.451 - parse_region_details(items, heading, ["headingend"] + inline_pattern_names)
1.452 - region.add(heading)
1.453 - new_block(region)
1.454 + "Handle the start of a table row within 'region'."
1.455
1.456 -def parse_heading_end(items, heading):
1.457 -
1.458 - "Handle the end of a heading."
1.459 -
1.460 - level = len(items.read_match(2))
1.461 - if heading.level == level:
1.462 - heading.end_pad = items.read_match(1)
1.463 - heading.end_extra = items.read_match(3)
1.464 - raise StopIteration
1.465 + row = TableRow([])
1.466
1.467 -def parse_listitem(items, region):
1.468 -
1.469 - "Handle a list item marker within 'region'."
1.470 + while True:
1.471 + cell = TableCell([])
1.472 + self.parse_region_details(items, cell, ["tableattrs", "tablecell", "tableend"])
1.473
1.474 - indent = len(items.read_match(1))
1.475 - marker = items.read_match(2)
1.476 - space = items.read_match(3)
1.477 - item = ListItem([], indent, marker, space)
1.478 - parse_region_details(items, item, ["listitemend"])
1.479 - region.add(item)
1.480 - new_block(region)
1.481 -
1.482 -def parse_rule(items, region):
1.483 -
1.484 - "Handle a horizontal rule within 'region'."
1.485 -
1.486 - length = len(items.read_match(1))
1.487 - rule = Rule(length)
1.488 - region.add(rule)
1.489 - new_block(region)
1.490 + # Handle the end of the row.
1.491
1.492 -def parse_section(items, region):
1.493 + if items.matching == "tableend":
1.494 + trailing = items.read_match()
1.495
1.496 - "Handle the start of a new section within 'region'."
1.497 -
1.498 - # Parse the section and start a new block after the section.
1.499 + # If the cell was started but not finished, convert the row into text.
1.500
1.501 - indent = len(items.read_match(2))
1.502 - level = len(items.read_match(3))
1.503 - region.add(parse_region(items, level, indent))
1.504 - new_block(region)
1.505 -
1.506 -def parse_section_end(items, region):
1.507 -
1.508 - "Handle the end of a new section within 'region'."
1.509 -
1.510 - feature = items.read_match()
1.511 - if region.have_end(feature):
1.512 - raise StopIteration
1.513 - else:
1.514 - region.append_inline(Text(feature))
1.515 + if not row.nodes or not cell.empty():
1.516 + for node in row.nodes:
1.517 + region.append_inline(Text(serialise(node)))
1.518 + region.append_inline(Text(serialise(cell)))
1.519 + region.append_inline(Text(trailing))
1.520
1.521 -def parse_table_attrs(items, cell):
1.522 -
1.523 - "Handle the start of table attributes within 'cell'."
1.524 -
1.525 - attrs = TableAttrs([])
1.526 - parse_region_details(items, attrs, table_pattern_names)
1.527 -
1.528 - # Test the validity of the attributes.
1.529 + new_block(region)
1.530 + return
1.531
1.532 - last = None
1.533 -
1.534 - for node in attrs.nodes:
1.535 -
1.536 - # Text separator nodes must be whitespace.
1.537 -
1.538 - if isinstance(node, Text):
1.539 - if node.s.strip():
1.540 - break
1.541 -
1.542 - # Named attributes must be preceded by space if not the first.
1.543 + # Append the final cell, if not empty.
1.544
1.545 - elif last and not node.concise and not isinstance(last, Text):
1.546 - break
1.547 -
1.548 - last = node
1.549 -
1.550 - # All nodes were valid: preserve the collection.
1.551 + else:
1.552 + row.trailing = trailing
1.553
1.554 - else:
1.555 - cell.attrs = attrs
1.556 - return
1.557 -
1.558 - # Invalid nodes were found: serialise the attributes as text.
1.559 -
1.560 - cell.append_inline(Text(serialise(attrs)))
1.561 -
1.562 -def parse_table_row(items, region):
1.563 -
1.564 - "Handle the start of a table row within 'region'."
1.565 -
1.566 - row = TableRow([])
1.567 + if not cell.empty():
1.568 + row.append(cell)
1.569 + break
1.570
1.571 - while True:
1.572 - cell = TableCell([])
1.573 - parse_region_details(items, cell, ["tableattrs", "tablecell", "tableend"])
1.574 -
1.575 - # Handle the end of the row.
1.576 -
1.577 - if items.matching == "tableend":
1.578 - trailing = items.read_match()
1.579 -
1.580 - # If the cell was started but not finished, convert the row into text.
1.581 + # A cell separator has been found.
1.582
1.583 - if not row.nodes or not cell.empty():
1.584 - for node in row.nodes:
1.585 - region.append_inline(Text(serialise(node)))
1.586 - region.append_inline(Text(serialise(cell)))
1.587 - region.append_inline(Text(trailing))
1.588 + row.append(cell)
1.589
1.590 - new_block(region)
1.591 - return
1.592 -
1.593 - # Append the final cell, if not empty.
1.594 + region.add(row)
1.595 + new_block(region)
1.596
1.597 - else:
1.598 - row.trailing = trailing
1.599 + def parse_valign(self, items, attrs):
1.600
1.601 - if not cell.empty():
1.602 - row.append(cell)
1.603 - break
1.604 -
1.605 - # A cell separator has been found.
1.606 -
1.607 - row.append(cell)
1.608 + "Handle vertical alignment within 'attrs'."
1.609
1.610 - region.add(row)
1.611 - new_block(region)
1.612 -
1.613 -def parse_valign(items, attrs):
1.614 -
1.615 - "Handle vertical alignment within 'attrs'."
1.616 -
1.617 - value = items.read_match()
1.618 - attr = TableAttr("valign", value == "^" and "top" or "bottom", True)
1.619 - attrs.append(attr)
1.620 + value = items.read_match()
1.621 + attr = TableAttr("valign", value == "^" and "top" or "bottom", True)
1.622 + attrs.append(attr)
1.623
1.624
1.625
1.626 -# Inline formatting handlers.
1.627 + # Inline formatting handlers.
1.628 +
1.629 + def parse_inline(self, items, region, cls, pattern_name):
1.630
1.631 -def parse_inline(items, region, cls, pattern_name):
1.632 + "Handle an inline region."
1.633 +
1.634 + span = cls([])
1.635 + self.parse_region_details(items, span, inline_patterns_for(pattern_name))
1.636 + region.append_inline(span)
1.637
1.638 - "Handle an inline region."
1.639 + def parse_larger(self, items, region):
1.640 + self.parse_inline(items, region, Larger, "larger")
1.641 +
1.642 + def parse_monospace(self, items, region):
1.643 + self.parse_inline(items, region, Monospace, "monospace")
1.644
1.645 - span = cls([])
1.646 - parse_region_details(items, span, inline_patterns_for(pattern_name))
1.647 - region.append_inline(span)
1.648 + def parse_smaller(self, items, region):
1.649 + self.parse_inline(items, region, Smaller, "smaller")
1.650 +
1.651 + def parse_sub(self, items, region):
1.652 + self.parse_inline(items, region, Subscript, "sub")
1.653 +
1.654 + def parse_super(self, items, region):
1.655 + self.parse_inline(items, region, Superscript, "super")
1.656
1.657 -parse_larger = lambda items, region: parse_inline(items, region, Larger, "larger")
1.658 -parse_monospace = lambda items, region: parse_inline(items, region, Monospace, "monospace")
1.659 -parse_smaller = lambda items, region: parse_inline(items, region, Smaller, "smaller")
1.660 -parse_sub = lambda items, region: parse_inline(items, region, Subscript, "sub")
1.661 -parse_super = lambda items, region: parse_inline(items, region, Superscript, "super")
1.662 -parse_underline = lambda items, region: parse_inline(items, region, Underline, "underline")
1.663 + def parse_underline(self, items, region):
1.664 + self.parse_inline(items, region, Underline, "underline")
1.665 +
1.666 +
1.667
1.668 -# Table attribute handlers.
1.669 + # Table attribute handlers.
1.670 +
1.671 + def parse_table_attr(self, items, attrs, pattern_name):
1.672 +
1.673 + "Handle a table attribute."
1.674
1.675 -def parse_table_attr(items, attrs, pattern_name):
1.676 + value = items.read_match()
1.677 + attrs.append(TableAttr(pattern_name, value, True))
1.678
1.679 - "Handle a table attribute."
1.680 + def parse_colour(self, items, cell):
1.681 + self.parse_table_attr(items, cell, "colour")
1.682
1.683 - value = items.read_match()
1.684 - attrs.append(TableAttr(pattern_name, value, True))
1.685 + def parse_colspan(self, items, cell):
1.686 + self.parse_table_attr(items, cell, "colspan")
1.687
1.688 -parse_colour = lambda items, cell: parse_table_attr(items, cell, "colour")
1.689 -parse_colspan = lambda items, cell: parse_table_attr(items, cell, "colspan")
1.690 -parse_rowspan = lambda items, cell: parse_table_attr(items, cell, "rowspan")
1.691 -parse_width = lambda items, cell: parse_table_attr(items, cell, "width")
1.692 + def parse_rowspan(self, items, cell):
1.693 + self.parse_table_attr(items, cell, "rowspan")
1.694 +
1.695 + def parse_width(self, items, cell):
1.696 + self.parse_table_attr(items, cell, "width")
1.697
1.698
1.699
1.700 -# Pattern handlers.
1.701 + # Parsing utilities.
1.702 +
1.703 + def parse_region_details(self, items, region, pattern_names):
1.704 +
1.705 + "Parse 'items' within 'region' searching using 'pattern_names'."
1.706 +
1.707 + try:
1.708 + while True:
1.709 +
1.710 + # Obtain text before any marker or the end of the input.
1.711 +
1.712 + preceding = items.read_until(pattern_names)
1.713 + if preceding:
1.714 + region.append_inline(Text(preceding))
1.715 +
1.716 + # End of input.
1.717 +
1.718 + if not items.matching:
1.719 + break
1.720 +
1.721 + # Obtain any feature.
1.722 +
1.723 + feature = items.read_match()
1.724 + handler = self.handlers.get(items.matching)
1.725 +
1.726 + # Handle each feature or add text to the region.
1.727 +
1.728 + if handler:
1.729 + handler(self, items, region)
1.730 + else:
1.731 + region.append_inline(Text(feature))
1.732 +
1.733 + except StopIteration:
1.734 + pass
1.735 +
1.736 + region.normalise()
1.737 +
1.738 + def end_region(self, items, region):
1.739 +
1.740 + "End the parsing of 'region', breaking out of the parsing loop."
1.741 +
1.742 + raise StopIteration
1.743 +
1.744 +
1.745
1.746 -handlers = {
1.747 - None : end_region,
1.748 - "attrname" : parse_attrname,
1.749 - "break" : parse_break,
1.750 - "colour" : parse_colour,
1.751 - "colspan" : parse_colspan,
1.752 - "defterm" : parse_defterm,
1.753 - "defterm_empty" : parse_defterm_empty,
1.754 - "deftermend" : end_region,
1.755 - "deftermsep" : end_region,
1.756 - "fontstyle" : parse_fontstyle,
1.757 - "halign" : parse_halign,
1.758 - "heading" : parse_heading,
1.759 - "headingend" : parse_heading_end,
1.760 - "larger" : parse_larger,
1.761 - "largerend" : end_region,
1.762 - "listitemend" : end_region,
1.763 - "listitem" : parse_listitem,
1.764 - "listitem_alpha" : parse_listitem,
1.765 - "listitem_dot" : parse_listitem,
1.766 - "listitem_num" : parse_listitem,
1.767 - "listitem_roman" : parse_listitem,
1.768 - "monospace" : parse_monospace,
1.769 - "monospaceend" : end_region,
1.770 - "regionstart" : parse_section,
1.771 - "regionend" : parse_section_end,
1.772 - "rowspan" : parse_rowspan,
1.773 - "rule" : parse_rule,
1.774 - "smaller" : parse_smaller,
1.775 - "smallerend" : end_region,
1.776 - "sub" : parse_sub,
1.777 - "subend" : end_region,
1.778 - "super" : parse_super,
1.779 - "superend" : end_region,
1.780 - "tableattrs" : parse_table_attrs,
1.781 - "tableattrsend" : end_region,
1.782 - "tablerow" : parse_table_row,
1.783 - "tablecell" : end_region,
1.784 - "tableend" : end_region,
1.785 - "underline" : parse_underline,
1.786 - "underlineend" : end_region,
1.787 - "valign" : parse_valign,
1.788 - "width" : parse_width,
1.789 - }
1.790 + # Pattern handlers.
1.791 +
1.792 + handlers = {
1.793 + None : end_region,
1.794 + "attrname" : parse_attrname,
1.795 + "break" : parse_break,
1.796 + "colour" : parse_colour,
1.797 + "colspan" : parse_colspan,
1.798 + "defterm" : parse_defterm,
1.799 + "defterm_empty" : parse_defterm_empty,
1.800 + "deftermend" : end_region,
1.801 + "deftermsep" : end_region,
1.802 + "fontstyle" : parse_fontstyle,
1.803 + "halign" : parse_halign,
1.804 + "heading" : parse_heading,
1.805 + "headingend" : parse_heading_end,
1.806 + "larger" : parse_larger,
1.807 + "largerend" : end_region,
1.808 + "listitemend" : end_region,
1.809 + "listitem" : parse_listitem,
1.810 + "listitem_alpha" : parse_listitem,
1.811 + "listitem_dot" : parse_listitem,
1.812 + "listitem_num" : parse_listitem,
1.813 + "listitem_roman" : parse_listitem,
1.814 + "monospace" : parse_monospace,
1.815 + "monospaceend" : end_region,
1.816 + "regionstart" : parse_section,
1.817 + "regionend" : parse_section_end,
1.818 + "rowspan" : parse_rowspan,
1.819 + "rule" : parse_rule,
1.820 + "smaller" : parse_smaller,
1.821 + "smallerend" : end_region,
1.822 + "sub" : parse_sub,
1.823 + "subend" : end_region,
1.824 + "super" : parse_super,
1.825 + "superend" : end_region,
1.826 + "tableattrs" : parse_table_attrs,
1.827 + "tableattrsend" : end_region,
1.828 + "tablerow" : parse_table_row,
1.829 + "tablecell" : end_region,
1.830 + "tableend" : end_region,
1.831 + "underline" : parse_underline,
1.832 + "underlineend" : end_region,
1.833 + "valign" : parse_valign,
1.834 + "width" : parse_width,
1.835 + }
1.836
1.837
1.838
1.839 # Top-level functions.
1.840
1.841 -parse = parse_page
1.842 +def parse(s, formats=None):
1.843 + return Parser(formats).parse_page(s)
1.844
1.845 # vim: tabstop=4 expandtab shiftwidth=4