MoinLight (file moinformat/__init_

     1 #!/usr/bin/env python     2      3 """     4 Moin wiki format parser.     5      6 Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.tree import Block, ListItem, Region, Rule, Text    23 import re    24     25 # Regular expressions.    26     27 syntax = {    28     # Page regions:    29     "regionstart"   : (r"((^\s*)([{]{3,}))",            re.MULTILINE | re.DOTALL),  # {{{...    30     "regionend"     : (r"^\s*([}]{3,})",                re.MULTILINE | re.DOTALL),  # }}}...    31     "header"        : (r"#!(.*?)\n",                    0),                         # #! char-excl-nl    32     33     # Region contents:    34     "break"         : (r"^(\s*?)\n",                    re.MULTILINE),              # blank line    35     "listitem"      : (r"^((\s+)([*]|\d+[.]))",         re.MULTILINE),              # indent (list-item or number-item)    36     "rule"          : (r"(-----*)",                     0),                         # ----...    37     38     # List contents:    39     "listitemend"   : (r"^",                            re.MULTILINE),              # next line    40     }    41     42 # Define patterns for the regular expressions.    43     44 patterns = {}    45 for name, (value, flags) in syntax.items():    46     patterns[name] = re.compile(value, re.UNICODE | flags)    47     48     49     50 # Tokenising functions.    51     52 class TokenStream:    53     54     "A stream of tokens taken from a string."    55     56     def __init__(self, s):    57         self.s = s    58         self.pos = 0    59         self.match = None    60         self.matching = None    61     62     def read_until(self, pattern_names, remaining=True):    63     64         """    65         Find the first match for the given 'pattern_names'. Return the text    66         preceding any match, the remaining text if no match was found, or None    67         if no match was found and 'remaining' is given as a false value.    68         """    69     70         first = None    71         self.matching = None    72     73         # Find the first matching pattern.    74     75         for pattern_name in pattern_names:    76             match = patterns[pattern_name].search(self.s, self.pos)    77             if match:    78                 start, end = match.span()    79                 if self.matching is None or start < first:    80                     first = start    81                     self.matching = pattern_name    82                     self.match = match    83     84         if self.matching is None:    85             if remaining:    86                 return self.s[self.pos:]    87             else:    88                 return None    89         else:    90             return self.s[self.pos:first]    91     92     def read_match(self, group=1):    93     94         """    95         Return the matched text, updating the position in the stream. If 'group'    96         is specified, the indicated group in a match will be returned.    97         Typically, group 1 should contain all pertinent data, but groups defined    98         within group 1 can provide sections of the data.    99         """   100    101         if self.match:   102             _start, self.pos = self.match.span()   103             try:   104                 return self.match.group(group)   105             except IndexError:   106                 return ""   107         else:   108             self.pos = len(self.s)   109             return None   110    111    112    113 # Parser functions.   114    115 def parse_page(s):   116    117     """   118     Parse page text 's'. Pages consist of regions delimited by markers.   119     """   120    121     return parse_region(TokenStream(s))   122    123 def parse_region(items, level=0, indent=0):   124    125     """   126     Parse the data provided by 'items' to populate a region with the given   127     'level' at the given 'indent'.   128     """   129    130     region = Region([], level, indent)   131    132     # Parse section headers.   133    134     parse_region_header(items, region)   135    136     # Parse section body.   137    138     if region.is_transparent():   139         parse_region_wiki(items, region)   140     else:   141         parse_region_opaque(items, region)   142    143     return region   144    145 def parse_region_header(items, region):   146    147     """   148     Parse the region header from the 'items', setting it for the given 'region'.   149     """   150    151     if items.read_until(["header"], False) == "": # None means no header   152         region.type = items.read_match()   153    154 def parse_region_wiki(items, region):   155    156     "Parse the data provided by 'items' to populate a wiki 'region'."   157    158     new_block(region)   159     parse_region_details(items, region, ["break", "listitem", "regionstart", "regionend", "rule"])   160    161 def parse_region_opaque(items, region):   162    163     "Parse the data provided by 'items' to populate an opaque 'region'."   164    165     parse_region_details(items, region, ["regionend"])   166    167 def parse_region_details(items, region, pattern_names):   168    169     "Parse 'items' within 'region' searching using 'pattern_names'."   170    171     try:   172         while True:   173    174             # Obtain text before any marker or the end of the input.   175    176             preceding = items.read_until(pattern_names)   177             if preceding:   178                 region.append_text(Text(preceding))   179    180             # End of input.   181    182             if not items.matching:   183                 break   184    185             # Obtain any feature.   186    187             feature = items.read_match()   188             handler = handlers.get(items.matching)   189    190             # Handle each feature or add text to the region.   191    192             if handler:   193                 handler(items, region)   194             else:   195                 region.append_text(Text(feature))   196    197     except StopIteration:   198         pass   199    200     region.normalise()   201    202 def end_region(items, region):   203    204     "End the parsing of 'region'."   205    206     raise StopIteration   207    208 def parse_break(items, region):   209    210     "Handle a paragraph break within 'region'."   211    212     # Mark any previous block as not being the final one in a sequence.   213    214     block = region.nodes[-1]   215     block.final = False   216     new_block(region)   217    218 def parse_listitem_end(items, region):   219    220     "Handle the end of a list."   221    222     raise StopIteration   223    224 def parse_listitem(items, region):   225    226     "Handle a list item marker within 'region'."   227    228     item = ListItem([])   229     parse_region_details(items, item, ["listitemend"])   230     region.append(item)   231     new_block(region)   232    233 def parse_rule(items, region):   234    235     "Handle a horizontal rule within 'region'."   236    237     length = len(items.read_match(1))   238     rule = Rule(length)   239     region.append(rule)   240     new_block(region)   241    242 def parse_section(items, region):   243    244     "Handle the start of a new section within 'region'."   245    246     # Parse the section and start a new block after the section.   247    248     indent = len(items.read_match(2))   249     level = len(items.read_match(3))   250     region.append(parse_region(items, level, indent))   251     new_block(region)   252    253 def parse_section_end(items, region):   254    255     "Handle the end of a new section within 'region'."   256    257     feature = items.read_match()   258     if region.have_end(feature):   259         raise StopIteration   260     else:   261         region.append_text(Text(feature))   262    263 # Pattern handlers.   264    265 handlers = {   266     None : end_region,   267     "break" : parse_break,   268     "listitemend" : parse_listitem_end,   269     "listitem" : parse_listitem,   270     "regionstart" : parse_section,   271     "regionend" : parse_section_end,   272     "rule" : parse_rule,   273     }   274    275 def new_block(region):   276    277     "Start a new block in 'region'."   278    279     block = Block([])   280     region.append(block)   281    282    283    284 # Top-level functions.   285    286 parse = parse_page   287    288 # vim: tabstop=4 expandtab shiftwidth=4
MoinLight

moinformat/__init__.py

moinformat/init.py