1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format parser. 5 6 Copyright (C) 2012, 2013, 2015, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from cgi import escape 23 import re 24 25 # Regular expressions. 26 27 syntax = { 28 # Page regions: 29 "markers" : (r"^\s*([{]{3,}|[}]{3,})", re.MULTILINE | re.DOTALL), # {{{... or }}}... 30 31 # Region contents: 32 "header" : (r"\A#!(.*?)$", re.MULTILINE), # #! char-excl-nl 33 "region text" : (r"(^\s*$)", re.MULTILINE), # blank line 34 } 35 36 # Define patterns for the regular expressions. 37 38 patterns = {} 39 for name, (value, flags) in syntax.items(): 40 patterns[name] = re.compile(value, re.UNICODE | flags) 41 42 43 44 # Document nodes. 45 46 class Container: 47 48 "A container of document nodes." 49 50 def __init__(self, nodes): 51 self.nodes = nodes 52 53 def append(self, node): 54 self.nodes.append(node) 55 56 class Region(Container): 57 58 "A region of the page." 59 60 def __init__(self, nodes, level=0, type=None): 61 Container.__init__(self, nodes) 62 self.level = level 63 self.type = type 64 65 def have_end(self, s): 66 return self.level and s.startswith("}") and self.level == len(s) 67 68 def __repr__(self): 69 return "Region(%r, %r, %r)" % (self.nodes, self.level, self.type) 70 71 def to_string(self, out): 72 out.start_region(self.level, self.type) 73 for node in self.nodes: 74 node.to_string(out) 75 out.end_region(self.level, self.type) 76 77 class Block(Container): 78 79 "A block in the page." 80 81 def __init__(self, nodes, final=True): 82 Container.__init__(self, nodes) 83 self.final = final 84 85 def __repr__(self): 86 return "Block(%r)" % self.nodes 87 88 def to_string(self, out): 89 out.start_block(self.final) 90 for node in self.nodes: 91 node.to_string(out) 92 out.end_block(self.final) 93 94 class Text: 95 96 "A text node." 97 98 def __init__(self, s): 99 self.s = s 100 101 def __repr__(self): 102 return "Text(%r)" % self.s 103 104 def to_string(self, out): 105 out.text(self.s) 106 107 108 109 # Serialisation. 110 111 class Serialiser: 112 113 "General serialisation support." 114 115 def __init__(self, out): 116 self.out = out 117 118 class MoinSerialiser(Serialiser): 119 120 "Serialisation of the page." 121 122 def start_region(self, level, type): 123 out = self.out 124 if level: 125 out("{" * level) # marker 126 if type: 127 out("#!%s" % type) # header 128 129 def end_region(self, level, type): 130 out = self.out 131 if level: 132 out("}" * level) # marker 133 134 def start_block(self, final): 135 pass 136 137 def end_block(self, final): 138 if not final: 139 self.out("\n") 140 141 def text(self, s): 142 self.out(s) 143 144 class HTMLSerialiser(Serialiser): 145 146 "Serialisation of the page." 147 148 def start_region(self, level, type): 149 l = [] 150 out = l.append 151 if level: 152 out("level-%d" % level) # marker 153 154 # NOTE: Encode type details for CSS. 155 156 if type: 157 out("type-%s" % escape(type, True)) # header 158 159 self.out("<span class='%s'>" % " ".join(l)) 160 161 def end_region(self, level, type): 162 self.out("</span>") 163 164 def start_block(self, final): 165 self.out("<p>") 166 167 def end_block(self, final): 168 self.out("</p>") 169 170 def text(self, s): 171 self.out(escape(s)) 172 173 174 175 # Parser functions. 176 177 def parse_page(s): 178 179 """ 180 Parse page text 's'. Pages consist of regions delimited by markers. 181 """ 182 183 region = Region([]) 184 items = iter(patterns["markers"].split(s)) 185 parse_region(items, region) 186 return region 187 188 def parse_region(items, region): 189 190 "Parse the data provided by 'items' to populate 'region'." 191 192 nodes = region.nodes 193 first = True 194 195 # Process exposed text and sections. 196 197 try: 198 while True: 199 200 # Parse section headers. 201 202 if first: 203 match_text = parse_region_header(items.next(), region) 204 first = False 205 else: 206 match_text = items.next() 207 208 # Start a section if an appropriate marker is given. 209 210 if match_text.startswith("{"): 211 _region = Region([], len(match_text)) 212 region.append(_region) 213 parse_region(items, _region) 214 215 # Interpret the given marker, closing the current section if the 216 # given marker is the corresponding end marker for the current 217 # section. 218 219 elif region.have_end(match_text): 220 return 221 222 # Otherwise, parse text in the region. 223 224 else: 225 parse_region_text(match_text, region) 226 227 except StopIteration: 228 pass 229 230 def parse_region_header(s, region): 231 232 """ 233 Parse the text 's', extracting any region header and setting it for the 234 given 'region'. Return the remaining text. 235 """ 236 237 items = iter(patterns["header"].split(s)) 238 pre_header = items.next() 239 240 if not pre_header: 241 region.type = items.next() 242 return items.next() 243 else: 244 return pre_header 245 246 def parse_region_text(s, region): 247 248 "Parse the text 's' as part of 'region'." 249 250 items = iter(patterns["region text"].split(s)) 251 block = Block([]) 252 region.append(block) 253 254 try: 255 while True: 256 match_text = items.next() 257 258 if not match_text.strip(): 259 region.append(block) 260 block.final = False 261 block = Block([]) 262 else: 263 block.append(Text(match_text)) 264 265 except StopIteration: 266 pass 267 268 # Top-level parsing function. 269 270 parse = parse_page 271 272 # Top-level serialising functions. 273 274 def serialise(doc, serialiser=MoinSerialiser): 275 l = [] 276 doc.to_string(serialiser(l.append)) 277 return "".join(l) 278 279 # vim: tabstop=4 expandtab shiftwidth=4