MoinLight (file moinformat/parsing.py at 8dbedbb8ef8b)

     1 #!/usr/bin/env python     2      3 """     4 Moin wiki parsing functionality.     5      6 Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.tree import Block, Region, Text    23 import re    24     25 # Pattern management.    26     27 def get_patterns(syntax):    28     29     "Define patterns for the regular expressions in the 'syntax' mapping."    30     31     patterns = {}    32     for name, value in syntax.items():    33         patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE)    34     return patterns    35     36 def combine_patterns(patterns, syntax):    37     38     "Combine 'patterns' with those defined by the given 'syntax' mapping."    39     40     p = {}    41     p.update(patterns)    42     p.update(get_patterns(syntax))    43     return p    44     45 # Tokenising functions.    46     47 class TokenStream:    48     49     "A stream of tokens taken from a string."    50     51     def __init__(self, s, patterns):    52         self.s = s    53         self.patterns = patterns    54         self.pos = 0    55         self.match = None    56         self.matching = None    57     58     def rewind(self, length):    59     60         "Rewind in the string by 'length'."    61     62         self.pos -= min(length, self.pos)    63     64     def read_until(self, pattern_names, remaining=True):    65     66         """    67         Find the first match for the given 'pattern_names'. Return the text    68         preceding any match, the remaining text if no match was found, or None    69         if no match was found and 'remaining' is given as a false value.    70         """    71     72         first = None    73         self.matching = None    74     75         # Find the first matching pattern.    76     77         for pattern_name in pattern_names:    78             match = self.patterns[pattern_name].search(self.s, self.pos)    79             if match:    80                 start, end = match.span()    81                 if self.matching is None or start < first:    82                     first = start    83                     self.matching = pattern_name    84                     self.match = match    85     86         if self.matching is None:    87             if remaining:    88                 return self.s[self.pos:]    89             else:    90                 return None    91         else:    92             return self.s[self.pos:first]    93     94     def read_match(self, group=1):    95     96         """    97         Return the matched text, updating the position in the stream. If 'group'    98         is specified, the indicated group in a match will be returned.    99         Typically, group 1 should contain all pertinent data, but groups defined   100         within group 1 can provide sections of the data.   101         """   102    103         if self.match:   104             _start, self.pos = self.match.span()   105             try:   106                 return self.match.group(group)   107             except IndexError:   108                 return ""   109         else:   110             self.pos = len(self.s)   111             return None   112    113    114    115 # Utility functions.   116    117 def new_block(region):   118    119     "Start a new block in 'region'."   120    121     region.add(Block([]))   122    123    124    125 # Parser abstractions.   126    127 class ParserBase:   128    129     "Common parsing methods."   130    131     def __init__(self, formats=None):   132    133         """   134         Initialise the parser with any given 'formats' mapping from region type   135         names to parser objects.   136         """   137    138         self.formats = formats   139    140     def get_items(self, s):   141    142         "Return a sequence of token items for 's'."   143    144         raise NotImplementedError   145    146     def parse(self, s):   147    148         """   149         Parse page text 's'. Pages consist of regions delimited by markers.   150         """   151    152         return self.parse_region(self.get_items(s))   153    154     def parse_region(self, items, level=0, indent=0):   155    156         """   157         Parse the data provided by 'items' to populate a region with the given   158         'level' at the given 'indent'.   159         """   160    161         region = Region([], level, indent)   162    163         # Parse section headers, then parse according to region type.   164    165         self.parse_region_header(items, region)   166         self.parse_region_type(items, region)   167    168         return region   169    170     def parse_region_type(self, items, region):   171    172         """   173         Given data provided by 'items', use configured parsers to parse the   174         'region' based on its type.   175         """   176    177         # Find an appropriate parser given the type.   178    179         if self.formats.has_key(region.type):   180             self.formats[region.type].parse_region_content(items, region)   181    182         # Otherwise, treat the section as opaque.   183    184         else:   185             self.parse_region_opaque(items, region)   186    187     def parse_region_header(self, items, region):   188    189         """   190         Parse the region header from the 'items', setting it for the given 'region'.   191         """   192    193         if items.read_until(["header"], False) == "": # None means no header   194             region.type = items.read_match()   195    196     def parse_region_opaque(self, items, region):   197    198         "Parse the data provided by 'items' to populate an opaque 'region'."   199    200         region.transparent = False   201         self.parse_region_details(items, region, ["regionend"])   202    203     def parse_region_content(self, items, region):   204    205         "Parse the data provided by 'items' to populate the given 'region'."   206    207         pass   208    209     # Parsing utilities.   210    211     def parse_region_details(self, items, region, pattern_names):   212    213         "Parse 'items' within 'region' searching using 'pattern_names'."   214    215         try:   216             while True:   217    218                 # Obtain text before any marker or the end of the input.   219    220                 preceding = items.read_until(pattern_names)   221                 if preceding:   222                     region.append_inline(Text(preceding))   223    224                 # End of input.   225    226                 if not items.matching:   227                     break   228    229                 # Obtain any feature.   230    231                 feature = items.read_match()   232                 handler = self.handlers.get(items.matching)   233    234                 # Handle each feature or add text to the region.   235    236                 if handler:   237                     handler(self, items, region)   238                 else:   239                     region.append_inline(Text(feature))   240    241         except StopIteration:   242             pass   243    244         region.normalise()   245    246     def end_region(self, items, region):   247    248         "End the parsing of 'region', breaking out of the parsing loop."   249    250         raise StopIteration   251    252 # vim: tabstop=4 expandtab shiftwidth=4