# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1493930340 -7200
# Node ID 3993165616f88b901f0897fe41fd0d7115d1454f
# Parent  4ebe552530c805b23503b0919fb70b4acbb60e84
Moved common parsing functionality into a separate module.
Eliminated "transparent" region decisions in the Region class, deciding region
transparency in parser classes instead.

diff -r 4ebe552530c8 -r 3993165616f8 moinformat/__init__.py
--- a/moinformat/__init__.py	Thu May 04 21:41:13 2017 +0200
+++ b/moinformat/__init__.py	Thu May 04 22:39:00 2017 +0200
@@ -19,11 +19,13 @@
 this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 
+from moinformat.parsing import ParserBase, TokenStream, new_block
 from moinformat.serialisers import serialise
-from moinformat.tree import Block, Break, DefItem, DefTerm, FontStyle, Heading, \
+from moinformat.tree import Break, DefItem, DefTerm, FontStyle, Heading, \
                             Larger, ListItem, Monospace, Region, Rule, Smaller, \
                             Subscript, Superscript, TableAttr, TableAttrs, \
                             TableCell, TableRow, Text, Underline
+
 import re
 
 # Regular expressions.
@@ -120,141 +122,60 @@
 
 
 
-# Tokenising functions.
-
-class TokenStream:
-
-    "A stream of tokens taken from a string."
+class Parser(ParserBase):
 
-    def __init__(self, s):
-        self.s = s
-        self.pos = 0
-        self.match = None
-        self.matching = None
+    "A wiki region parser."
 
-    def rewind(self, length):
-
-        "Rewind in the string by 'length'."
-
-        self.pos -= min(length, self.pos)
-
-    def read_until(self, pattern_names, remaining=True):
+    def __init__(self, formats=None):
 
         """
-        Find the first match for the given 'pattern_names'. Return the text
-        preceding any match, the remaining text if no match was found, or None
-        if no match was found and 'remaining' is given as a false value.
+        Initialise the parser with any given 'formats' mapping from region type
+        names to parser objects.
         """
 
-        first = None
-        self.matching = None
-
-        # Find the first matching pattern.
-
-        for pattern_name in pattern_names:
-            match = patterns[pattern_name].search(self.s, self.pos)
-            if match:
-                start, end = match.span()
-                if self.matching is None or start < first:
-                    first = start
-                    self.matching = pattern_name
-                    self.match = match
+        formats = {"wiki" : self}
+        if formats:
+            formats.update(formats)
 
-        if self.matching is None:
-            if remaining:
-                return self.s[self.pos:]
-            else:
-                return None
-        else:
-            return self.s[self.pos:first]
-
-    def read_match(self, group=1):
-
-        """
-        Return the matched text, updating the position in the stream. If 'group'
-        is specified, the indicated group in a match will be returned.
-        Typically, group 1 should contain all pertinent data, but groups defined
-        within group 1 can provide sections of the data.
-        """
+        ParserBase.__init__(self, formats)
 
-        if self.match:
-            _start, self.pos = self.match.span()
-            try:
-                return self.match.group(group)
-            except IndexError:
-                return ""
-        else:
-            self.pos = len(self.s)
-            return None
-
-
-
-# Utility functions.
-
-def new_block(region):
+    def get_items(self, s):
 
-    "Start a new block in 'region'."
-
-    block = Block([])
-    region.add(block)
-
-
+        "Return a sequence of token items for 's'."
 
-# Parser abstraction.
-
-class Parser:
-
-    "An extensible parser."
-
-    def __init__(self, formats=None):
-        self.formats = formats
+        return TokenStream(s, patterns)
 
     # Principal parser methods.
 
-    def parse_page(self, s):
+    def parse(self, s):
 
         """
         Parse page text 's'. Pages consist of regions delimited by markers.
         """
 
-        return self.parse_region(TokenStream(s))
-
-    def parse_region(self, items, level=0, indent=0):
+        items = self.get_items(s)
+        region = Region([])
 
-        """
-        Parse the data provided by 'items' to populate a region with the given
-        'level' at the given 'indent'.
-        """
-
-        region = Region([], level, indent)
-
-        # Parse section headers.
+        # Parse page header.
 
         self.parse_region_header(items, region)
 
-        # Parse section body.
+        # Handle pages directly with this parser.
+        # Otherwise, test the type and find an appropriate parser.
 
-        if region.is_transparent():
-            self.parse_region_wiki(items, region)
+        if not region.type:
+            self.parse_region_content(items, region)
         else:
-            self.parse_region_opaque(items, region)
+            self.parse_region_type(items, region)
 
         return region
 
-    def parse_region_header(self, items, region):
-
-        """
-        Parse the region header from the 'items', setting it for the given 'region'.
-        """
-
-        if items.read_until(["header"], False) == "": # None means no header
-            region.type = items.read_match()
-
-    def parse_region_wiki(self, items, region):
+    def parse_region_content(self, items, region):
 
         "Parse the data provided by 'items' to populate a wiki 'region'."
 
         new_block(region)
+
         self.parse_region_details(items, region, inline_pattern_names + [
             "break", "heading",
             "defterm", "defterm_empty",
@@ -265,12 +186,6 @@
             "tablerow",
             ])
 
-    def parse_region_opaque(self, items, region):
-
-        "Parse the data provided by 'items' to populate an opaque 'region'."
-
-        self.parse_region_details(items, region, ["regionend"])
-
     # Parser methods supporting different page features.
 
     def parse_attrname(self, items, attrs):
@@ -575,52 +490,9 @@
 
 
 
-    # Parsing utilities.
-
-    def parse_region_details(self, items, region, pattern_names):
-
-        "Parse 'items' within 'region' searching using 'pattern_names'."
-
-        try:
-            while True:
-
-                # Obtain text before any marker or the end of the input.
-
-                preceding = items.read_until(pattern_names)
-                if preceding:
-                    region.append_inline(Text(preceding))
-
-                # End of input.
-
-                if not items.matching:
-                    break
-
-                # Obtain any feature.
+    # Pattern handlers.
 
-                feature = items.read_match()
-                handler = self.handlers.get(items.matching)
-
-                # Handle each feature or add text to the region.
-
-                if handler:
-                    handler(self, items, region)
-                else:
-                    region.append_inline(Text(feature))
-
-        except StopIteration:
-            pass
-
-        region.normalise()
-
-    def end_region(self, items, region):
-
-        "End the parsing of 'region', breaking out of the parsing loop."
-
-        raise StopIteration
-
-
-
-    # Pattern handlers.
+    end_region = ParserBase.end_region
 
     handlers = {
         None : end_region,
@@ -672,6 +544,6 @@
 # Top-level functions.
 
 def parse(s, formats=None):
-    return Parser(formats).parse_page(s)
+    return Parser(formats).parse(s)
 
 # vim: tabstop=4 expandtab shiftwidth=4
diff -r 4ebe552530c8 -r 3993165616f8 moinformat/parsing.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/parsing.py	Thu May 04 22:39:00 2017 +0200
@@ -0,0 +1,231 @@
+#!/usr/bin/env python
+
+"""
+Moin wiki parsing functionality.
+
+Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.tree import Block, Region, Text
+
+# Tokenising functions.
+
+class TokenStream:
+
+    "A stream of tokens taken from a string."
+
+    def __init__(self, s, patterns):
+        self.s = s
+        self.patterns = patterns
+        self.pos = 0
+        self.match = None
+        self.matching = None
+
+    def rewind(self, length):
+
+        "Rewind in the string by 'length'."
+
+        self.pos -= min(length, self.pos)
+
+    def read_until(self, pattern_names, remaining=True):
+
+        """
+        Find the first match for the given 'pattern_names'. Return the text
+        preceding any match, the remaining text if no match was found, or None
+        if no match was found and 'remaining' is given as a false value.
+        """
+
+        first = None
+        self.matching = None
+
+        # Find the first matching pattern.
+
+        for pattern_name in pattern_names:
+            match = self.patterns[pattern_name].search(self.s, self.pos)
+            if match:
+                start, end = match.span()
+                if self.matching is None or start < first:
+                    first = start
+                    self.matching = pattern_name
+                    self.match = match
+
+        if self.matching is None:
+            if remaining:
+                return self.s[self.pos:]
+            else:
+                return None
+        else:
+            return self.s[self.pos:first]
+
+    def read_match(self, group=1):
+
+        """
+        Return the matched text, updating the position in the stream. If 'group'
+        is specified, the indicated group in a match will be returned.
+        Typically, group 1 should contain all pertinent data, but groups defined
+        within group 1 can provide sections of the data.
+        """
+
+        if self.match:
+            _start, self.pos = self.match.span()
+            try:
+                return self.match.group(group)
+            except IndexError:
+                return ""
+        else:
+            self.pos = len(self.s)
+            return None
+
+
+
+# Utility functions.
+
+def new_block(region):
+
+    "Start a new block in 'region'."
+
+    region.add(Block([]))
+
+
+
+# Parser abstractions.
+
+class ParserBase:
+
+    "Common parsing methods."
+
+    def __init__(self, formats=None):
+
+        """
+        Initialise the parser with any given 'formats' mapping from region type
+        names to parser objects.
+        """
+
+        self.formats = formats
+
+    def get_items(self, s):
+
+        "Return a sequence of token items for 's'."
+
+        raise NotImplementedError
+
+    def parse(self, s):
+
+        """
+        Parse page text 's'. Pages consist of regions delimited by markers.
+        """
+
+        return self.parse_region(self.get_items(s))
+
+    def parse_region(self, items, level=0, indent=0):
+
+        """
+        Parse the data provided by 'items' to populate a region with the given
+        'level' at the given 'indent'.
+        """
+
+        region = Region([], level, indent)
+
+        # Parse section headers, then parse according to region type.
+
+        self.parse_region_header(items, region)
+        self.parse_region_type(items, region)
+
+        return region
+
+    def parse_region_type(self, items, region):
+
+        """
+        Given data provided by 'items', use configured parsers to parse the
+        'region' based on its type.
+        """
+
+        # Find an appropriate parser given the type.
+
+        if self.formats.has_key(region.type):
+            self.formats[region.type].parse_region_content(items, region)
+
+        # Otherwise, treat the section as opaque.
+
+        else:
+            self.parse_region_opaque(items, region)
+
+    def parse_region_header(self, items, region):
+
+        """
+        Parse the region header from the 'items', setting it for the given 'region'.
+        """
+
+        if items.read_until(["header"], False) == "": # None means no header
+            region.type = items.read_match()
+
+    def parse_region_opaque(self, items, region):
+
+        "Parse the data provided by 'items' to populate an opaque 'region'."
+
+        region.transparent = False
+        self.parse_region_details(items, region, ["regionend"])
+
+    def parse_region_content(self, items, region):
+
+        "Parse the data provided by 'items' to populate the given 'region'."
+
+        pass
+
+    # Parsing utilities.
+
+    def parse_region_details(self, items, region, pattern_names):
+
+        "Parse 'items' within 'region' searching using 'pattern_names'."
+
+        try:
+            while True:
+
+                # Obtain text before any marker or the end of the input.
+
+                preceding = items.read_until(pattern_names)
+                if preceding:
+                    region.append_inline(Text(preceding))
+
+                # End of input.
+
+                if not items.matching:
+                    break
+
+                # Obtain any feature.
+
+                feature = items.read_match()
+                handler = self.handlers.get(items.matching)
+
+                # Handle each feature or add text to the region.
+
+                if handler:
+                    handler(self, items, region)
+                else:
+                    region.append_inline(Text(feature))
+
+        except StopIteration:
+            pass
+
+        region.normalise()
+
+    def end_region(self, items, region):
+
+        "End the parsing of 'region', breaking out of the parsing loop."
+
+        raise StopIteration
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 4ebe552530c8 -r 3993165616f8 moinformat/tree.py
--- a/moinformat/tree.py	Thu May 04 21:41:13 2017 +0200
+++ b/moinformat/tree.py	Thu May 04 22:39:00 2017 +0200
@@ -97,13 +97,12 @@
 
     "A region of the page."
 
-    transparent_region_types = ["wiki"]
-
-    def __init__(self, nodes, level=0, indent=0, type=None):
+    def __init__(self, nodes, level=0, indent=0, type=None, transparent=True):
         Container.__init__(self, nodes)
         self.level = level
         self.indent = indent
         self.type = type
+        self.transparent = transparent
 
     def add(self, node):
         last = self.node(-1)
@@ -113,7 +112,7 @@
             self.append(node)
 
     def append_inline(self, node):
-        if self.is_transparent():
+        if self.transparent:
             self.nodes[-1].append(node)
         else:
             self.append(node)
@@ -121,9 +120,6 @@
     def have_end(self, s):
         return self.level and s.startswith("}") and self.level == len(s)
 
-    def is_transparent(self):
-        return not self.level or self.type in self.transparent_region_types
-
     def __repr__(self):
         return "Region(%r, %r, %r, %r)" % (self.nodes, self.level, self.indent, self.type)