MoinLight (file moinformat/parsing.py at 3993165616f8)

     1 #!/usr/bin/env python     2      3 """     4 Moin wiki parsing functionality.     5      6 Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.tree import Block, Region, Text    23     24 # Tokenising functions.    25     26 class TokenStream:    27     28     "A stream of tokens taken from a string."    29     30     def __init__(self, s, patterns):    31         self.s = s    32         self.patterns = patterns    33         self.pos = 0    34         self.match = None    35         self.matching = None    36     37     def rewind(self, length):    38     39         "Rewind in the string by 'length'."    40     41         self.pos -= min(length, self.pos)    42     43     def read_until(self, pattern_names, remaining=True):    44     45         """    46         Find the first match for the given 'pattern_names'. Return the text    47         preceding any match, the remaining text if no match was found, or None    48         if no match was found and 'remaining' is given as a false value.    49         """    50     51         first = None    52         self.matching = None    53     54         # Find the first matching pattern.    55     56         for pattern_name in pattern_names:    57             match = self.patterns[pattern_name].search(self.s, self.pos)    58             if match:    59                 start, end = match.span()    60                 if self.matching is None or start < first:    61                     first = start    62                     self.matching = pattern_name    63                     self.match = match    64     65         if self.matching is None:    66             if remaining:    67                 return self.s[self.pos:]    68             else:    69                 return None    70         else:    71             return self.s[self.pos:first]    72     73     def read_match(self, group=1):    74     75         """    76         Return the matched text, updating the position in the stream. If 'group'    77         is specified, the indicated group in a match will be returned.    78         Typically, group 1 should contain all pertinent data, but groups defined    79         within group 1 can provide sections of the data.    80         """    81     82         if self.match:    83             _start, self.pos = self.match.span()    84             try:    85                 return self.match.group(group)    86             except IndexError:    87                 return ""    88         else:    89             self.pos = len(self.s)    90             return None    91     92     93     94 # Utility functions.    95     96 def new_block(region):    97     98     "Start a new block in 'region'."    99    100     region.add(Block([]))   101    102    103    104 # Parser abstractions.   105    106 class ParserBase:   107    108     "Common parsing methods."   109    110     def __init__(self, formats=None):   111    112         """   113         Initialise the parser with any given 'formats' mapping from region type   114         names to parser objects.   115         """   116    117         self.formats = formats   118    119     def get_items(self, s):   120    121         "Return a sequence of token items for 's'."   122    123         raise NotImplementedError   124    125     def parse(self, s):   126    127         """   128         Parse page text 's'. Pages consist of regions delimited by markers.   129         """   130    131         return self.parse_region(self.get_items(s))   132    133     def parse_region(self, items, level=0, indent=0):   134    135         """   136         Parse the data provided by 'items' to populate a region with the given   137         'level' at the given 'indent'.   138         """   139    140         region = Region([], level, indent)   141    142         # Parse section headers, then parse according to region type.   143    144         self.parse_region_header(items, region)   145         self.parse_region_type(items, region)   146    147         return region   148    149     def parse_region_type(self, items, region):   150    151         """   152         Given data provided by 'items', use configured parsers to parse the   153         'region' based on its type.   154         """   155    156         # Find an appropriate parser given the type.   157    158         if self.formats.has_key(region.type):   159             self.formats[region.type].parse_region_content(items, region)   160    161         # Otherwise, treat the section as opaque.   162    163         else:   164             self.parse_region_opaque(items, region)   165    166     def parse_region_header(self, items, region):   167    168         """   169         Parse the region header from the 'items', setting it for the given 'region'.   170         """   171    172         if items.read_until(["header"], False) == "": # None means no header   173             region.type = items.read_match()   174    175     def parse_region_opaque(self, items, region):   176    177         "Parse the data provided by 'items' to populate an opaque 'region'."   178    179         region.transparent = False   180         self.parse_region_details(items, region, ["regionend"])   181    182     def parse_region_content(self, items, region):   183    184         "Parse the data provided by 'items' to populate the given 'region'."   185    186         pass   187    188     # Parsing utilities.   189    190     def parse_region_details(self, items, region, pattern_names):   191    192         "Parse 'items' within 'region' searching using 'pattern_names'."   193    194         try:   195             while True:   196    197                 # Obtain text before any marker or the end of the input.   198    199                 preceding = items.read_until(pattern_names)   200                 if preceding:   201                     region.append_inline(Text(preceding))   202    203                 # End of input.   204    205                 if not items.matching:   206                     break   207    208                 # Obtain any feature.   209    210                 feature = items.read_match()   211                 handler = self.handlers.get(items.matching)   212    213                 # Handle each feature or add text to the region.   214    215                 if handler:   216                     handler(self, items, region)   217                 else:   218                     region.append_inline(Text(feature))   219    220         except StopIteration:   221             pass   222    223         region.normalise()   224    225     def end_region(self, items, region):   226    227         "End the parsing of 'region', breaking out of the parsing loop."   228    229         raise StopIteration   230    231 # vim: tabstop=4 expandtab shiftwidth=4