1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format converter. 5 6 Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from moinformat import errors, make_parser, make_serialiser, Metadata, parse, \ 23 serialise 24 from os.path import split 25 import sys 26 27 # Long messages. 28 29 message_all_with_filenames = """\ 30 Using --all overrides any indicated pagenames. Either --all or the filenames 31 should be omitted.""" 32 33 message_explicit_pagenames = """\ 34 Explicit pagenames (indicated using --pagename) are only to be specified when 35 providing filenames without an input directory (indicated using --input-dir). 36 37 To indicate pagenames within an input directory, omit any --pagename flags.""" 38 39 40 41 # Options management. 42 43 def getmapping(mappings): 44 45 """ 46 Return the given 'mappings' - a collection of key-then-value items - as a 47 dictionary. 48 """ 49 50 mapping = {} 51 key = None 52 53 for arg in mappings: 54 if key is None: 55 key = arg 56 else: 57 mapping[key] = arg 58 key = None 59 60 return mapping 61 62 def getvalue(values, default=None): 63 64 """ 65 Return the first value from 'values' or 'default' if 'values' is empty or 66 the first value tests as false. 67 """ 68 69 return values and values[0] or default 70 71 72 73 # Main program. 74 75 def main(): 76 77 "Interpret program options and perform the conversion." 78 79 dirname, progname = split(sys.argv[0]) 80 args = sys.argv[1:] 81 82 if "--help" in args: 83 show_help(progname) 84 sys.exit(0) 85 86 # Option values. 87 88 document_indexes = [] 89 filenames = [] 90 formats = [] 91 input_dir_types = [] 92 input_dirs = [] 93 input_encodings = [] 94 input_page_seps = [] 95 mappings = [] 96 output_dirs = [] 97 output_encodings = [] 98 pagenames = [] 99 root_pagenames = [] 100 theme_names = [] 101 102 # Obtain filenames by default. 103 104 l = filenames 105 106 # Flags. 107 108 all = False 109 fragment = False 110 macros = False 111 tree = False 112 113 for arg in args: 114 115 # Detect tree output. 116 117 if arg == "--tree": 118 tree = True 119 120 # Detect macro evaluation. 121 122 elif arg == "--macros": 123 macros = True 124 125 # Detect all documents. 126 127 elif arg == "--all": 128 all = True 129 130 # Switch to document index. 131 132 elif arg == "--document-index": 133 l = document_indexes 134 continue 135 136 # Detect fragment output (if serialising). 137 138 elif arg == "--fragment": 139 fragment = True 140 141 # Switch to collecting formats. 142 143 elif arg == "--format": 144 l = formats 145 continue 146 147 # Switch to collecting input locations. 148 149 elif arg == "--input-dir": 150 l = input_dirs 151 continue 152 153 # Switch to collecting input context types. 154 155 elif arg == "--input-dir-type": 156 l = input_dir_types 157 continue 158 159 # Switch to collecting input encodings. 160 161 elif arg == "--input-encoding": 162 l = input_encodings 163 continue 164 165 # Switch to collecting input page hierarchy separators. 166 167 elif arg == "--input-page-sep": 168 l = input_page_seps 169 continue 170 171 # Switch to collecting mappings. 172 173 elif arg == "--mapping": 174 l = mappings 175 continue 176 177 # Switch to collecting output locations. 178 179 elif arg == "--output-dir": 180 l = output_dirs 181 continue 182 183 # Switch to collecting output encodings. 184 185 elif arg == "--output-encoding": 186 l = output_encodings 187 continue 188 189 # Switch to collecting page names. 190 191 elif arg == "--pagename": 192 l = pagenames 193 continue 194 195 # Switch to collecting root page names. 196 197 elif arg == "--root": 198 l = root_pagenames 199 continue 200 201 # Switch to collecting theme names. 202 203 elif arg == "--theme": 204 l = theme_names 205 continue 206 207 # Collect options and arguments. 208 209 else: 210 l.append(arg) 211 212 # Collect multiple mappings. 213 214 if l is mappings: 215 continue 216 217 # Collect filenames normally. 218 219 l = filenames 220 221 format = formats and formats[0] or "html" 222 input_dir = getvalue(input_dirs) 223 output_dir = getvalue(output_dirs) 224 225 # Define metadata. 226 227 metadata = Metadata({ 228 "document_index" : getvalue(document_indexes), 229 "input_context" : input_dir and \ 230 getvalue(input_dir_types, "directory") or \ 231 "standalone", 232 "input_encoding" : getvalue(input_encodings), 233 "input_filename" : input_dir, 234 "input_separator" : getvalue(input_page_seps), 235 "link_format" : format, 236 "mapping" : getmapping(mappings), 237 "output_context" : output_dir and "directory" or "standalone", 238 "output_encoding" : getvalue(output_encodings), 239 "output_format" : format, 240 "output_filename" : output_dir, 241 "root_pagename" : getvalue(root_pagenames, "FrontPage"), 242 "theme_name" : not fragment and \ 243 "%s.%s" % (getvalue(theme_names, "default"), format) or None, 244 }) 245 246 # Define the input context and theme. 247 248 input = metadata.get_input() 249 theme = metadata.get_theme() 250 251 # Treat filenames as pagenames if an input directory is indicated and if no 252 # pagenames are explicitly specified. 253 254 if input_dir: 255 if pagenames: 256 print >>sys.stderr, message_explicit_pagenames 257 sys.exit(1) 258 259 if all: 260 if filenames: 261 print >>sys.stderr, message_all_with_filenames 262 sys.exit(1) 263 else: 264 filenames = input.all() 265 266 pagenames = filenames 267 filenames = [] 268 269 # Open each file or page, parse the content, serialise the document. 270 271 for pagename, filename in map(None, pagenames, filenames): 272 273 # Define a pagename if missing. 274 275 pagename = pagename or split(filename)[-1] 276 metadata.set("pagename", pagename) 277 278 # Read either from a filename or using a pagename. 279 280 if filename: 281 pagetext = input.readfile(filename) 282 else: 283 pagetext = input.readpage(pagename) 284 285 # Parse the page content. 286 287 p = make_parser(metadata) 288 d = parse(pagetext, p) 289 290 if macros: 291 p.evaluate_macros() 292 293 p.update_metadata(metadata) 294 295 # Show a document tree for debugging purposes, if requested. 296 297 if tree: 298 print d.prettyprint() 299 continue 300 301 # Otherwise, serialise the document. 302 303 # Obtain a serialiser using the configuration. 304 305 serialiser = make_serialiser(metadata) 306 outtext = serialise(d, serialiser) 307 308 # With a theme, apply it to the text. 309 310 if theme: 311 outtext = theme.apply(outtext) 312 313 # If reading from a file, show the result. Otherwise, write to the 314 # output context. 315 316 output = metadata.get_output() 317 318 if not output.can_write(): 319 print outtext 320 else: 321 output.writepage(outtext, pagename) 322 print >>sys.stderr, pagename 323 324 # Install any theme resources. 325 326 if theme: 327 theme.install_resources() 328 329 def show_help(progname): 330 331 "Show the help text." 332 333 print >>sys.stderr, help_text % progname 334 335 help_text = """\ 336 Usage: %s [ <options> ] ( --all | <filename>... ) 337 338 Input file options: 339 340 --all Detect all document files in the specified input directory 341 342 Input options: 343 344 --input-dir Indicate an input directory containing document files 345 --input-dir-type Indicate the type of input directory involved 346 (default: directory) 347 --input-encoding Indicate the character encoding used in document files 348 --input-page-sep Indicate the separator used in filenames to encode 349 hierarchical relationships (subpages and descendant pages) 350 --pagename Indicate the page name corresponding to an indicated 351 filename, with each successive instance of this option 352 corresponding to each successive filename instance 353 354 Output options: 355 356 --document-index Provide a "DocumentIndex" filename to be used in links in 357 HTML format output, useful for local file browsing instead 358 of Web-published content 359 --format Indicate the format to be used for serialised documents 360 (default: html) 361 --fragment Indicates that an output fragment, not an entire document, 362 is to be generated, skipping any theming activities 363 --output-dir Indicate an output directory to contain serialised document 364 files 365 --output-encoding Indicate the character encoding used in serialised document 366 files 367 --theme Indicate a theme for serialised documents, typically 368 requiring an output directory to be useful 369 --tree Produce a document tree representation on standard output 370 instead of generating output files 371 372 Configuration options: 373 374 --macros Perform macro evaluation/expansion before serialising 375 documents 376 --mapping Indicate a name and corresponding URL to be used to 377 translate interwiki links 378 --root Indicate the root page name to be used 379 (default: FrontPage) 380 """ 381 382 if __name__ == "__main__": 383 try: 384 main() 385 except errors.ProcessingError, exc: 386 print str(exc) 387 388 # vim: tabstop=4 expandtab shiftwidth=4