# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1531692263 -7200
# Node ID 225f92510d629d692de720612d0dac7940855b9d
# Parent  4a05d10e795eeca5c3efcf081c012c1472ed0560
Combine patterns in order to search using a single regular expression.
This requires all group names to be prefixed with pattern names, with a special
null group being used within each constituent pattern to identify it.
The match groups are filtered so that only the matching pattern's groups are
retained.

diff -r 4a05d10e795e -r 225f92510d62 moinformat/parsers/common.py
--- a/moinformat/parsers/common.py	Sun Jul 15 23:59:08 2018 +0200
+++ b/moinformat/parsers/common.py	Mon Jul 16 00:04:23 2018 +0200
@@ -42,7 +42,10 @@
 
 def group(name, s):
 
-    "Return a pattern group having 'name' and the pattern string 's'."
+    """
+    Return a pattern for the group having the given 'name' and employing the
+    pattern string 's'.
+    """
 
     return "(?P<%s>%s)" % (name, s)
 
@@ -69,25 +72,44 @@
 
     """
     Define patterns for the regular expressions in the 'syntax' mapping. In each
-    pattern, replace \N with a pattern for matching whitespace excluding
-    newlines.
+    pattern, replace...
+
+    \N with a pattern for matching whitespace excluding newlines
+    \Q with a pattern for matching quotation marks
+
+    Group names are also qualified with a pattern name prefix.
     """
 
     patterns = {}
+
     for name, value in syntax.items():
         value = value.replace(r"\N", ws_excl_nl)
         value = value.replace(r"\Q", quotes)
-        patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE)
+
+        # Add the name to group names as a prefix.
+
+        value = value.replace("(?P<", "(?P<%s_" % name)
+        value = value.replace("(?P=", "(?P=%s_" % name)
+
+        # Record the updated expression and add an identifying null group.
+
+        patterns[name] = "%s(?P<group_%s>)" % (value, name)
+
     return patterns
 
-def get_subset(d, keys):
+def get_expression(d, keys):
 
-    "Return a subset of 'd' having the given 'keys'."
+    """
+    Return a compiled expression combining patterns in 'd' having the given
+    'keys'.
+    """
 
-    subset = {}
+    subset = []
+
     for key in keys:
-        subset[key] = d[key]
-    return subset
+        subset.append(d[key])
+
+    return re.compile("|".join(subset), re.UNICODE | re.MULTILINE)
 
 
 
@@ -105,7 +127,7 @@
 
         self.match = None
         self.queued = None
-        self.match_start = None
+        self.groups = {}
 
         # Pattern name details.
 
@@ -123,56 +145,73 @@
 
         self.queued = self.match
 
-    def read_until(self, patterns, remaining=True):
+    def read_until(self, expression, remaining=True):
 
         """
-        Find the first match for the given 'patterns'. Return the text preceding
-        any match, the remaining text if no match was found, or None if no match
-        was found and 'remaining' is given as a false value.
+        Find the first match for the given 'expression'. Return the text
+        preceding any match, the remaining text if no match was found, or None
+        if no match was found and 'remaining' is given as a false value.
         """
 
         if self.queued:
             self.match = self.queued
             self.queued = None
         else:
-            self.match_start = None
             self.matching = None
 
             # Find the first matching pattern.
 
-            for pattern_name, pattern in patterns.items():
-                match = pattern.search(self.s, self.pos)
-                if match:
-                    start, end = match.span()
-                    if self.matching is None or start < self.start:
-                        self.start = start
-                        self.matching = pattern_name
+            match = expression.search(self.s, self.pos)
+
+            if match:
+                for name, value in match.groupdict().items():
+
+                    # Use a group with a non-null value to identify the
+                    # matching pattern.
+
+                    if name.startswith("group_") and value is not None:
+                        self.matching = name[len("group_"):]
+                        self.start, self.end = match.span()
                         self.match = match
+                        break
+
+        # Return the remaining text, if appropriate.
 
         if self.matching is None:
+            self.groups = {}
             if remaining:
                 return self.s[self.pos:]
             else:
                 return None
         else:
+            self.groups = self.filter_groups()
             return self.s[self.pos:self.start]
 
-    def match_group(self, group=1):
+    def filter_groups(self):
+
+        "Filter groups from the current match for the matching pattern."
+
+        d = {}
+        for key, value in self.match.groupdict().items():
+            if key.startswith("%s_" % self.matching):
+                d[key] = value
+        return d
+
+    def match_group(self, group=None):
 
         """
         Return the matched text, updating the position in the stream. If 'group'
         is specified, the indicated group in a match will be returned.
-        Typically, group 1 should contain all pertinent data, but groups defined
-        within group 1 can provide sections of the data.
+        Otherwise, the entire match is returned.
         """
 
         self.update_pos()
 
         if self.match:
-            try:
-                return self.match.group(group)
-            except IndexError:
-                return ""
+            if group is None:
+                return self.s[self.start:self.end]
+            else:
+                return self.groups.get("%s_%s" % (self.matching, group))
         else:
             return None
 
@@ -184,9 +223,12 @@
 
         if self.match:
             if groups is None:
-                return self.match.groups()
+                return self.groups
             else:
-                return self.match.groups(groups)
+                l = []
+                for group in groups:
+                    l.append(self.groups.get("%s_%s" % (self.matching, group)))
+                return l
         else:
             return []
 
@@ -233,11 +275,11 @@
         else:
             return None
 
-    def get_patterns(self, pattern_names):
+    def get_expression(self, pattern_names):
 
         "Return a mapping of the given 'pattern_names' to patterns."
 
-        return get_subset(self.patterns, pattern_names)
+        return get_expression(self.patterns, pattern_names)
 
     def get_items(self, s, pos=0):
 
@@ -260,12 +302,13 @@
         or None if no match was found and 'remaining' is given as a false value.
         """
 
-        return self.items.read_until(self.get_patterns(pattern_names))
+        return self.items.read_until(self.get_expression(pattern_names))
 
-    def match_group(self, group=1):
+    def match_group(self, group=None):
 
         """
-        Return the group of the matching pattern with the given 'group' number.
+        Return the group of the matching pattern with the given 'group'
+        identifier. If 'group' is omitted or None, return the entire match.
         """
 
         return self.items.match_group(group)
@@ -407,7 +450,7 @@
 
                 # Obtain any feature.
 
-                feature = self.match_group()
+                feature = self.match_group(None)
                 handler = self.handlers.get(self.matching_pattern())
 
                 # Handle each feature or add text to the region.