# HG changeset patch # User Paul Boddie # Date 1531675241 -7200 # Node ID 285d1e37c8ad33aa2cca1dd1f1ac8f3d028b42fa # Parent d517824d2df556585a60b5eee7b9469aaf0055cf Introduced group names and pattern generation helper functions. Adjusted the Moin serialiser to work with these changes. diff -r d517824d2df5 -r 285d1e37c8ad moinformat/parsers/common.py --- a/moinformat/parsers/common.py Sun Jul 15 14:19:42 2018 +0200 +++ b/moinformat/parsers/common.py Sun Jul 15 19:20:41 2018 +0200 @@ -26,6 +26,44 @@ # Pattern management. ws_excl_nl = r"[ \f\r\t\v]" +quotes = "['" '"]' # ['"] + +def excl(s): + + "Return a non-matching pattern for 's'." + + return "(?!%s)" % s + +def expect(s): + + "Return a pattern expecting 's'." + + return "(?=%s)" % s + +def group(name, s): + + "Return a pattern group having 'name' and the pattern string 's'." + + return "(?P<%s>%s)" % (name, s) + +def optional(s): + + "Return an optional pattern." + + return "(?:%s)?" % s + +def recur(name): + + "Return a test for a recurrence of group 'name'." + + return "(?P=%s)" % name + +def repeat(s, min=None, max=None): + + "Return a pattern matching 's' for the given 'min' and 'max' limits." + + return "%s{%s,%s}" % (s, min is not None and min or "", + max is not None and max or "") def get_patterns(syntax): @@ -38,6 +76,7 @@ patterns = {} for name, value in syntax.items(): value = value.replace(r"\N", ws_excl_nl) + value = value.replace(r"\Q", quotes) patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE) return patterns @@ -317,7 +356,7 @@ """ if self.read_until(["header"], False) == "": # None means no header - region.type = self.match_group() + region.type = self.match_group("args") def parse_region_opaque(self, region): diff -r d517824d2df5 -r 285d1e37c8ad moinformat/parsers/moin.py --- a/moinformat/parsers/moin.py Sun Jul 15 14:19:42 2018 +0200 +++ b/moinformat/parsers/moin.py Sun Jul 15 19:20:41 2018 +0200 @@ -19,7 +19,8 @@ this program. If not, see . """ -from moinformat.parsers.common import ParserBase, get_patterns, get_subset +from moinformat.parsers.common import ParserBase, get_patterns, \ + excl, expect, group, optional, recur, repeat from moinformat.serialisers import serialise from moinformat.tree import Break, DefItem, DefTerm, FontStyle, Heading, \ Larger, Link, List, ListItem, Monospace, Region, \ @@ -27,6 +28,8 @@ Superscript, Table, TableAttr, TableAttrs, \ TableCell, TableRow, Text, Underline +join = "".join + class MoinParser(ParserBase): "A wiki region parser." @@ -82,13 +85,13 @@ "Handle an attribute name within 'attrs'." - name = self.match_group() + name = self.match_group("name") attr = TableAttr(name) preceding = self.read_until(["attrvalue"], False) if preceding == "": - attr.quote = self.match_group(1) - attr.value = self.match_group(2) + attr.quote = self.match_group("quote") + attr.value = self.match_group("value") attrs.append(attr) @@ -103,7 +106,7 @@ "Handle a definition item within 'region'." - pad = self.match_group(1) + pad = self.match_group("pad") item = DefItem([], pad, extra) self.parse_region_details(item, ["listitemend"]) self.add_node(region, item) @@ -113,7 +116,7 @@ "Handle a definition term within 'region'." - pad = self.match_group(1) + pad = self.match_group("pad") term = DefTerm([], pad) self.parse_region_details(term, ["deftermend", "deftermsep"]) self.add_node(region, term) @@ -124,7 +127,7 @@ "Handle an empty definition term within 'region'." - extra = self.match_group(1) + extra = self.match_group("pad") self.parse_region_details(region, ["deftermsep"]) self.parse_defitem(region, extra) @@ -132,7 +135,7 @@ "Handle emphasis and strong styles." - n = len(self.match_group(1)) + n = len(self.match_group("style")) # Handle endings. @@ -171,7 +174,7 @@ "Handle horizontal alignment within 'attrs'." - value = self.match_group() + value = self.match_group("value") attr = TableAttr("halign", value == "(" and "left" or value == ")" and "right" or "center", True) attrs.append(attr) @@ -179,9 +182,9 @@ "Handle a heading." - start_extra = self.match_group(1) - level = len(self.match_group(2)) - start_pad = self.match_group(3) + start_extra = self.match_group("extra") + level = len(self.match_group("level")) + start_pad = self.match_group("pad") heading = Heading([], level, start_extra, start_pad) self.parse_region_details(heading, ["headingend"] + self.inline_pattern_names) self.add_node(region, heading) @@ -191,10 +194,10 @@ "Handle the end of a heading." - level = len(self.match_group(2)) + level = len(self.match_group("level")) if heading.level == level: - heading.end_pad = self.match_group(1) - heading.end_extra = self.match_group(3) + heading.end_pad = self.match_group("pad") + heading.end_extra = self.match_group("extra") raise StopIteration def parse_list(self, item): @@ -209,16 +212,10 @@ "Handle a list item marker within 'region'." - final = len(self.match_groups()) - - indent = len(self.match_group(1)) - marker = self.match_group(2) - space = self.match_group(final) - - if final > 3: - num = self.match_group(3) - else: - num = None + indent = len(self.match_group("indent")) + marker = self.match_group("marker") + num = self.match_group("num") + space = self.match_group("pad") last = region.node(-1) @@ -265,7 +262,7 @@ "Handle a horizontal rule within 'region'." - length = len(self.match_group(1)) + length = len(self.match_group("rule")) rule = Rule(length) self.add_node(region, rule) self.new_block(region) @@ -276,8 +273,8 @@ # Parse the section and start a new block after the section. - indent = len(self.match_group(2)) - level = len(self.match_group(3)) + indent = len(self.match_group("indent")) + level = len(self.match_group("level")) self.add_node(region, self.parse_region(level, indent, "inline")) self.new_block(region) @@ -285,7 +282,7 @@ "Handle the end of a new section within 'region'." - feature = self.match_group() + feature = self.match_group("level") if region.have_end(feature): raise StopIteration else: @@ -350,7 +347,7 @@ # Handle the end of the row. if self.matching_pattern() == "tableend": - trailing = self.match_group() + trailing = self.match_group("extra") # If the cell was started but not finished, convert the row into text. @@ -388,7 +385,7 @@ "Handle vertical alignment within 'attrs'." - value = self.match_group() + value = self.match_group("value") attr = TableAttr("valign", value == "^" and "top" or "bottom", True) attrs.append(attr) @@ -408,8 +405,8 @@ self.parse_inline(region, Larger, "larger") def parse_link(self, region): - target = self.match_group(1) - text = self.match_group(2) + target = self.match_group("target") + text = self.match_group("text") link = Link(text and [Text(text)], target) region.append_inline(link) @@ -439,7 +436,7 @@ "Handle a table attribute." - attrs.append(TableAttr(pattern_name, self.match_group(), True)) + attrs.append(TableAttr(pattern_name, self.match_group("value"), True)) def parse_colour(self, cell): self.parse_table_attr(cell, "colour") @@ -459,80 +456,145 @@ syntax = { # Page regions: - "regionstart" : r"((\N*)([{]{3,}))", # [line-start ws] {{{... - "regionend" : r"(?:\N*)([}]{3,})", # [line-start ws] }}}... - "header" : r"#!(.*?)\n", # #! char-excl-nl + + "regionstart" : join((group("indent", r"\N*"), # ws... (optional) + group("level", repeat("[{]", 3)))), # {{{... + + "regionend" : join((r"\N*", # ws... (optional) + group("level", repeat("[}]", 3)))), # }}}... + + "header" : join(("#!", # #! + group("args", ".*?"), "\n")), # text-excl-nl # Region contents: # Line-oriented patterns: - # blank line - "break" : r"^(\s*?)\n", - # ws... expecting text :: - "defterm" : r"^(\N+)(?=.+?::)", - # ws... expecting :: ws... - "defterm_empty" : r"^(\N+)(?=::\s+)", - # [ws...] =... ws... expecting headingend - "heading" : r"^(\N*)(?P=+)(\s+)(?=.*?\N+(?P=x)\N*$)", - # ws... list-item [ws...] - "listitem" : r"^(\N+)(\*)(\s*)", - # ws... number-item ws... [# number] - "listitem_num" : r"^(\N+)(\d+\.)(?:#(\d+))?(\s+)", - # ws... alpha-item ws... [# number] - "listitem_alpha": r"^(\N+)([aA]\.)(?:#(\d+))?(\s+)", - # ws... roman-item ws... [# number] - "listitem_roman": r"^(\N+)([iI]\.)(?:#(\d+))?(\s+)", - # ws... dot-item [ws...] - "listitem_dot" : r"^(\N+)(\.)(\s*)", - # || - "tablerow" : r"^\|\|", + + "break" : r"^(\s*?)\n", # blank line + + "defterm" : join(("^", + group("pad", r"\N+"), # ws... + expect(".+?::"))), # text :: + + "defterm_empty" : join(("^", + group("pad", r"\N+"), # ws... + expect("::\s+"))), # :: + # ws... (optional) + + "heading" : join(("^", + group("extra", r"\N*"), # ws... (optional) + group("level", "=+"), # =... + group("pad", r"\s+"), # ws... + expect(join((r".*?\N+", # text + recur("level"), # =... + r"\N*$"))))), # ws... (optional) + + "listitem" : join(("^", + group("indent", r"\N+"), # ws... + group("marker", r"\*"), # list-marker + group("pad", r"\s*"))), # ws... (optional) + + "listitem_num" : join(("^", + group("indent", r"\N+"), # ws... + group("marker", r"\d+\."), # decimal-marker + optional(join(("#", group("num", r"\d+")))), # # num (optional) + group("pad", r"\s+"))), # ws... + + "listitem_alpha": join(("^", + group("indent", r"\N+"), # ws... + group("marker", r"[aA]\."), # alpha-marker + optional(join(("#", group("num", r"\d+")))), # # num (optional) + group("pad", r"\s+"))), # ws... + + "listitem_roman": join(("^", + group("indent", r"\N+"), # ws... + group("marker", r"[iI]\."), # roman-marker + optional(join(("#", group("num", r"\d+")))), # # num (optional) + group("pad", r"\s+"))), # ws... + + "listitem_dot" : join(("^", + group("indent", r"\N+"), # ws... + group("marker", r"\."), # dot-marker + group("pad", r"\s*"))), # ws... (optional) + + "tablerow" : r"^\|\|", # || # Region contents: # Inline patterns: - "fontstyle" : r"('{2,6})", - "larger" : r"~\+", - "monospace" : r"`", - "rule" : r"(-----*)", # ----... - "smaller" : r"~-", - "strike" : r"--\(", # --( - "sub" : r",,", - "super" : r"\^", - "underline" : r"__", + + "fontstyle" : group("style", repeat("'", 2, 6)), # ''... + "larger" : r"~\+", # ~+ + "monospace" : r"`", # ` + "rule" : group("rule", "-----*"), # ----... + "smaller" : r"~-", # ~- + "strike" : r"--\(", # --( + "sub" : r",,", # ,, + "super" : r"\^", # ^ + "underline" : r"__", # __ # Complete inline patterns: - "link" : r"\[\[(.*?)(?:\|(.*?))?]]", # [[target]] or [[target|text]] + + "link" : join((r"\[\[", # [[ + group("target", ".*?"), # target + optional(join((r"\|", group("text", ".*?")))), # | text (optional) + "]]")), # ]] # Inline contents: - "largerend" : r"\+~", - "monospaceend" : r"`", - "smallerend" : r"-~", - "strikeend" : r"\)--", # )-- - "subend" : r",,", - "superend" : r"\^", - "underlineend" : r"__", + + "largerend" : r"\+~", # +~ + "monospaceend" : r"`", # ` + "smallerend" : r"-~", # -~ + "strikeend" : r"\)--", # )-- + "subend" : r",,", # ,, + "superend" : r"\^", # ^ + "underlineend" : r"__", # __ # Heading contents: - "headingend" : r"(\N+)(=+)(\N*$)", # ws... =... [ws...] nl + + "headingend" : join((group("pad", r"\N+"), # ws... + group("level", "=+"), # =... + group("extra", r"\N*$"))), # ws (optional) # List contents: - "deftermend" : r"::(\s*?\n)", - "deftermsep" : r"::(\s+)", - "listitemend" : r"^", # next line + + "deftermend" : join(("::", group("pad", r"\s*?\n"))), # :: + # ws... (optional) + # nl + + "deftermsep" : join(("::", group("pad", r"\s+"))), # :: + # ws... (optional) + + "listitemend" : r"^", # next line # Table contents: - "tableattrs" : r"<", - "tablecell" : r"\|\|", - "tableend" : r"(\s*?)^", # [ws...] next line + + "tableattrs" : r"<", # < + "tablecell" : r"\|\|", # || + + "tableend" : join((group("extra", r"\s*?"), # ws... (optional) + "^")), # next line # Table attributes: - "tableattrsend" : r">", - "halign" : r"([(:)])", - "valign" : r"([v^])", - "colour" : r"(\#[0-9A-F]{6})", - "colspan" : r"-(\d+)", - "rowspan" : r"\|(\d+)", - "width" : r"(\d+%)", - "attrname" : r"((?![-\d])[-\w]+)", # not-dash-or-digit dash-or-word-char... - "attrvalue" : r"""=(?P['"])(.*?)(?P=x)""", + + "tableattrsend" : r">", # > + "halign" : group("value", "[(:)]"), # halign-marker + "valign" : group("value", "[v^]"), # valign-marker + "colour" : group("value", join(("\#", # # + repeat("[0-9A-F]", 6, 6)))), # nnnnnn + + "colspan" : join(("-", # - + group("value", "\d+"))), # n... + + "rowspan" : join((r"\|", # | + group("value", "\d+"))), # n... + + "width" : group("value", "\d+%"), # n... % + + "attrname" : join((excl(r"[-\d]"), # not-dash-or-digit + group("name", r"[-\w]+"))), # dash-digit-letter... + + "attrvalue" : join(("=", group("quote", r"\Q"), # quote + group("value", ".*?"), # non-quote... (optional) + recur("quote"))), # quote } patterns = get_patterns(syntax) @@ -560,7 +622,7 @@ region_pattern_names = inline_pattern_names + list_pattern_names + [ "break", "heading", "defterm", "defterm_empty", - "regionstart", "regionend", "rule", "tablerow", + "regionend", "rule", "tablerow", ] table_region_pattern_names = inline_pattern_names + [ diff -r d517824d2df5 -r 285d1e37c8ad moinformat/parsers/table.py --- a/moinformat/parsers/table.py Sun Jul 15 14:19:42 2018 +0200 +++ b/moinformat/parsers/table.py Sun Jul 15 19:20:41 2018 +0200 @@ -19,11 +19,12 @@ this program. If not, see . """ -from moinformat.parsers.common import get_patterns +from moinformat.parsers.common import get_patterns, \ + excl, expect, group from moinformat.parsers.moin import MoinParser from moinformat.tree import Table, TableAttrs, TableCell, TableRow, Text - +join = "".join # Parser functionality. @@ -90,11 +91,22 @@ syntax.update(MoinParser.syntax) syntax.update({ # At start of line: - "rowsep" : r"^==(?!.*==\s*?$)(?=\N*?)", # == not-heading ws-excl-nl - "continuation" : r"^(\N*)\.\.(?!\.)(?=\N)", # .. ws-excl-nl or .. not-dot + + "rowsep" : join(("^==", # == + excl(r".*==\s*?$"), # not-heading + expect(r"\N*?"))), # ws-excl-nl + + "continuation" : join(("^", + group("indent", r"\N*"), # ws... (optional) + r"\.\.", # .. + excl(r"\."), # not-. + expect(r"\N"))), # ws # Within text: - "columnsep" : r"\|\|(?!\|)(?=\N)", # || ws-excl-nl or || not-pipe + + "columnsep" : join((r"\|\|", # || + excl(r"\|"), # not-| + expect(r"\N"))), # ws }) patterns = get_patterns(syntax) @@ -104,7 +116,7 @@ # Pattern details. table_region_pattern_names = MoinParser.region_pattern_names + [ - "columnsep", "continuation", "regionend", "rowsep", + "columnsep", "continuation", "rowsep", ] diff -r d517824d2df5 -r 285d1e37c8ad moinformat/serialisers/moin.py --- a/moinformat/serialisers/moin.py Sun Jul 15 14:19:42 2018 +0200 +++ b/moinformat/serialisers/moin.py Sun Jul 15 19:20:41 2018 +0200 @@ -95,7 +95,7 @@ pass def start_listitem(self, indent, marker, space, num): - self.out("%s%s%s%s" % (indent * " ", marker, num is not None and "#%s" % num or "", space)) + self.out("%s%s%s%s" % (indent * " ", marker, num and "#%s" % num or "", space)) def end_listitem(self, indent, marker, space, num): pass