MoinLight (file moinformat.py at 1f77cbb15c53)

     1 #!/usr/bin/env python     2      3 """     4 Moin wiki format parser.     5      6 Copyright (C) 2012, 2013, 2015, 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from cgi import escape    23 import re    24     25 # Regular expressions.    26     27 syntax = {    28     # Page regions:    29     "markers"       : (r"^\s*([{]{3,}|[}]{3,})",    re.MULTILINE | re.DOTALL),  # {{{... or }}}...    30     31     # Region contents:    32     "header"        : (r"\A#!(.*?)\n",              0),                         # #! char-excl-nl    33     "region text"   : (r"(^\s*$)",                  re.MULTILINE),              # blank line    34     }    35     36 # Define patterns for the regular expressions.    37     38 patterns = {}    39 for name, (value, flags) in syntax.items():    40     patterns[name] = re.compile(value, re.UNICODE | flags)    41     42     43     44 # Document nodes.    45     46 class Container:    47     48     "A container of document nodes."    49     50     def __init__(self, nodes):    51         self.nodes = nodes    52     53     def append(self, node):    54         self.nodes.append(node)    55     56     def normalise(self):    57     58         "Combine adjacent text nodes."    59     60         nodes = self.nodes    61         self.nodes = []    62         text = None    63     64         for node in nodes:    65     66             # Open a text node or merge text into an open node.    67     68             if isinstance(node, Text):    69                 if not text:    70                     text = node    71                 else:    72                     text.merge(node)    73     74             # Close any open text node and append the current node.    75     76             else:    77                 if text:    78                     self.append(text)    79                     text = None    80                 self.append(node)    81     82         # Add any open text node.    83     84         if text:    85             self.append(text)    86     87 class Region(Container):    88     89     "A region of the page."    90     91     transparent_region_types = ["wiki"]    92     93     def __init__(self, nodes, level=0, type=None):    94         Container.__init__(self, nodes)    95         self.level = level    96         self.type = type    97     98     def expand(self):    99    100         """   101         Expand text nodes by parsing them as region text, if the region is   102         understandable to the standard parser.   103         """   104    105         if self.is_transparent():   106             nodes = self.nodes   107             self.nodes = []   108    109             for node in nodes:   110                 if isinstance(node, Text):   111                     parse_region_text(node.s, self)   112                 else:   113                     self.append(node)   114    115     def have_start(self, s):   116         return self.is_transparent() and s.startswith("{")   117    118     def have_end(self, s):   119         return self.level and s.startswith("}") and self.level == len(s)   120    121     def is_transparent(self):   122         return not self.level or self.type in self.transparent_region_types   123    124     def __repr__(self):   125         return "Region(%r, %r, %r)" % (self.nodes, self.level, self.type)   126    127     def to_string(self, out):   128         out.start_region(self.level, self.type)   129         for node in self.nodes:   130             node.to_string(out)   131         out.end_region(self.level, self.type)   132    133 class Block(Container):   134    135     "A block in the page."   136    137     def __init__(self, nodes, final=True):   138         Container.__init__(self, nodes)   139         self.final = final   140    141     def __repr__(self):   142         return "Block(%r)" % self.nodes   143    144     def to_string(self, out):   145         out.start_block(self.final)   146         for node in self.nodes:   147             node.to_string(out)   148         out.end_block(self.final)   149    150 class Text:   151    152     "A text node."   153    154     def __init__(self, s):   155         self.s = s   156    157     def merge(self, text):   158         self.s += text.s   159    160     def __repr__(self):   161         return "Text(%r)" % self.s   162    163     def to_string(self, out):   164         out.text(self.s)   165    166    167    168 # Serialisation.   169    170 class Serialiser:   171    172     "General serialisation support."   173    174     def __init__(self, out):   175         self.out = out   176    177 class MoinSerialiser(Serialiser):   178    179     "Serialisation of the page."   180    181     def start_region(self, level, type):   182         out = self.out   183         if level:   184             out("{" * level)        # marker   185         if type and level:   186             out("#!%s\n" % type)    # header   187    188     def end_region(self, level, type):   189         out = self.out   190         if level:   191             out("}" * level)        # marker   192    193     def start_block(self, final):   194         pass   195    196     def end_block(self, final):   197         if not final:   198             self.out("\n")   199    200     def text(self, s):   201         self.out(s)   202    203 class HTMLSerialiser(Serialiser):   204    205     "Serialisation of the page."   206    207     def start_region(self, level, type):   208         l = []   209         out = l.append   210         if level:   211             out("level-%d" % level)                 # marker   212    213         # NOTE: Encode type details for CSS.   214    215         if type:   216             out("type-%s" % escape(type, True))     # header   217    218         self.out("<span class='%s'>" % " ".join(l))   219    220     def end_region(self, level, type):   221         self.out("</span>")   222    223     def start_block(self, final):   224         self.out("<p>")   225    226     def end_block(self, final):   227         self.out("</p>")   228    229     def text(self, s):   230         self.out(escape(s))   231    232    233    234 # Parser functions.   235    236 def parse_page(s):   237    238     """   239     Parse page text 's'. Pages consist of regions delimited by markers.   240     """   241    242     # Define tokens for interpretation by the parser.   243    244     items = iter(patterns["markers"].split(s))   245    246     # Define a region for the page and parse it.   247    248     region = Region([])   249     parse_region(items, region)   250     return region   251    252 def parse_region(items, region):   253    254     "Parse the data provided by 'items' to populate 'region'."   255    256     nodes = region.nodes   257     first = True   258    259     # Process exposed text and sections.   260    261     try:   262         try:   263             while True:   264    265                 # Parse section headers.   266    267                 if first:   268                     match_text = parse_region_header(items.next(), region)   269                     first = False   270                 else:   271                     match_text = items.next()   272    273                 # Start a section if an appropriate marker is given.   274    275                 if region.have_start(match_text):   276    277                     # Define the section and parse it.   278    279                     _region = Region([], len(match_text))   280                     region.append(_region)   281                     parse_region(items, _region)   282    283                 # Interpret the given marker, closing the current section if the   284                 # given marker is the corresponding end marker for the current   285                 # section.   286    287                 elif region.have_end(match_text):   288                     return   289    290                 # Otherwise, parse text in the region.   291    292                 else:   293                     region.append(Text(match_text))   294    295         # End of input.   296    297         except StopIteration:   298             pass   299    300     finally:   301         region.normalise()   302    303         # Parse region contents, if possible.   304    305         region.expand()   306    307 def parse_region_header(s, region):   308    309     """   310     Parse the text 's', extracting any region header and setting it for the   311     given 'region'. Return the remaining text.   312     """   313    314     items = iter(patterns["header"].split(s))   315     pre_header = items.next()   316    317     if not pre_header:   318         region.type = items.next()   319         return items.next()   320     else:   321         return pre_header   322    323 def parse_region_text(s, region):   324    325     "Parse the text 's' as part of 'region'."   326    327     items = iter(patterns["region text"].split(s))   328     block = Block([])   329     region.append(block)   330    331     try:   332         while True:   333             match_text = items.next()   334    335             if not match_text.strip():   336                 region.append(block)   337                 block.final = False   338                 block = Block([])   339             else:   340                 block.append(Text(match_text))   341    342     except StopIteration:   343         pass   344    345    346    347 # Top-level functions.   348    349 parse = parse_page   350    351 def serialise(doc, serialiser=MoinSerialiser):   352     l = []   353     doc.to_string(serialiser(l.append))   354     return "".join(l)   355    356 # vim: tabstop=4 expandtab shiftwidth=4