1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format converter. 5 6 Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from moinformat import copy_attachments, errors, make_parser, make_serialiser, \ 23 Metadata, parse, serialise 24 from os.path import split 25 import sys 26 27 # Long messages. 28 29 message_all_with_filenames = """\ 30 Using --all overrides any indicated pagenames. Either --all or the filenames 31 should be omitted.""" 32 33 message_explicit_pagenames = """\ 34 Explicit pagenames (indicated using --pagename) are only to be specified when 35 providing filenames without an input directory (indicated using --input-dir). 36 37 To indicate pagenames within an input directory, omit any --pagename flags.""" 38 39 40 41 # Options management. 42 43 def getmapping(mappings): 44 45 """ 46 Return the given 'mappings' - a collection of key-then-value items - as a 47 dictionary. 48 """ 49 50 mapping = {} 51 key = None 52 53 for arg in mappings: 54 if key is None: 55 key = arg 56 else: 57 mapping[key] = arg 58 key = None 59 60 return mapping 61 62 def getvalue(values, default=None): 63 64 """ 65 Return the first value from 'values' or 'default' if 'values' is empty or 66 the first value tests as false. 67 """ 68 69 return values and values[0] or default 70 71 72 73 # Main program. 74 75 def main(): 76 77 "Interpret program options and perform the conversion." 78 79 dirname, progname = split(sys.argv[0]) 80 args = sys.argv[1:] 81 82 if "--help" in args: 83 show_help(progname) 84 sys.exit(0) 85 86 # Option values. 87 88 attachments_dir = [] 89 document_indexes = [] 90 filenames = [] 91 formats = [] 92 input_dir_types = [] 93 input_dirs = [] 94 input_encodings = [] 95 input_page_seps = [] 96 mappings = [] 97 output_dirs = [] 98 output_encodings = [] 99 pagenames = [] 100 root_pagenames = [] 101 theme_names = [] 102 103 # Obtain filenames by default. 104 105 l = filenames 106 107 # Flags. 108 109 all = False 110 bundle = False 111 fragment = False 112 macros = False 113 tree = False 114 115 for arg in args: 116 117 # Flags with no following arguments. 118 119 # Detect all documents. 120 121 if arg == "--all": 122 all = True 123 124 # Detect resource bundling. 125 126 elif arg == "--bundle": 127 bundle = True 128 129 # Detect fragment output (if serialising). 130 131 elif arg == "--fragment": 132 fragment = True 133 134 # Detect macro evaluation. 135 136 elif arg == "--macros": 137 macros = True 138 139 # Detect tree output. 140 141 elif arg == "--tree": 142 tree = True 143 144 # Options with following arguments. 145 146 # Switch to document index. 147 148 elif arg == "--attachments-dir": 149 l = attachments_dir 150 continue 151 152 # Switch to document index. 153 154 elif arg == "--document-index": 155 l = document_indexes 156 continue 157 158 # Switch to collecting formats. 159 160 elif arg == "--format": 161 l = formats 162 continue 163 164 # Switch to collecting input locations. 165 166 elif arg == "--input-dir": 167 l = input_dirs 168 continue 169 170 # Switch to collecting input context types. 171 172 elif arg == "--input-dir-type": 173 l = input_dir_types 174 continue 175 176 # Switch to collecting input encodings. 177 178 elif arg == "--input-encoding": 179 l = input_encodings 180 continue 181 182 # Switch to collecting input page hierarchy separators. 183 184 elif arg == "--input-page-sep": 185 l = input_page_seps 186 continue 187 188 # Switch to collecting mappings. 189 190 elif arg == "--mapping": 191 l = mappings 192 continue 193 194 # Switch to collecting output locations. 195 196 elif arg == "--output-dir": 197 l = output_dirs 198 continue 199 200 # Switch to collecting output encodings. 201 202 elif arg == "--output-encoding": 203 l = output_encodings 204 continue 205 206 # Switch to collecting page names. 207 208 elif arg == "--pagename": 209 l = pagenames 210 continue 211 212 # Switch to collecting root page names. 213 214 elif arg == "--root": 215 l = root_pagenames 216 continue 217 218 # Switch to collecting theme names. 219 220 elif arg == "--theme": 221 l = theme_names 222 continue 223 224 # Collect options and arguments. 225 226 else: 227 l.append(arg) 228 229 # Collect multiple mappings. 230 231 if l is mappings: 232 continue 233 234 # Collect filenames normally. 235 236 l = filenames 237 238 format = formats and formats[0] or "html" 239 input_dir = getvalue(input_dirs) 240 output_dir = getvalue(output_dirs) 241 242 # Define metadata. 243 244 metadata = Metadata({ 245 "attachments" : getvalue(attachments_dir, "attachments"), 246 "bundle" : bundle, 247 "document_index" : getvalue(document_indexes), 248 "input_context" : input_dir and \ 249 getvalue(input_dir_types, "directory") or \ 250 "standalone", 251 "input_encoding" : getvalue(input_encodings), 252 "input_filename" : input_dir, 253 "input_separator" : getvalue(input_page_seps), 254 "link_format" : format, 255 "mapping" : getmapping(mappings), 256 "output_context" : output_dir and "directory" or "standalone", 257 "output_encoding" : getvalue(output_encodings), 258 "output_format" : format, 259 "output_filename" : output_dir, 260 "root_pagename" : getvalue(root_pagenames, "FrontPage"), 261 "theme_name" : not fragment and \ 262 "%s.%s" % (getvalue(theme_names, "default"), format) or None, 263 }) 264 265 # Define the input context and theme. 266 267 input = metadata.get_input() 268 theme = metadata.get_theme() 269 270 # Treat filenames as pagenames if an input directory is indicated and if no 271 # pagenames are explicitly specified. 272 273 if input_dir: 274 if pagenames: 275 print >>sys.stderr, message_explicit_pagenames 276 sys.exit(1) 277 278 if all: 279 if filenames: 280 print >>sys.stderr, message_all_with_filenames 281 sys.exit(1) 282 else: 283 filenames = input.all() 284 285 pagenames = filenames 286 filenames = [] 287 288 # Open each file or page, parse the content, serialise the document. 289 290 for pagename, filename in map(None, pagenames, filenames): 291 292 # Define a pagename if missing. 293 294 pagename = pagename or split(filename)[-1] 295 metadata.set("pagename", pagename) 296 297 # Read either from a filename or using a pagename. 298 299 if filename: 300 pagetext = input.readfile(filename) 301 else: 302 pagetext = input.readpage(pagename) 303 304 # Parse the page content. 305 306 p = make_parser(metadata) 307 d = parse(pagetext, p) 308 309 if macros: 310 p.evaluate_macros() 311 312 p.update_metadata(metadata) 313 314 # Show a document tree for debugging purposes, if requested. 315 316 if tree: 317 print d.prettyprint() 318 continue 319 320 # Otherwise, serialise the document. 321 322 # Obtain a serialiser using the configuration. 323 324 serialiser = make_serialiser(metadata) 325 outtext = serialise(d, serialiser) 326 327 # With a theme, apply it to the text. 328 329 if theme: 330 outtext = theme.apply(outtext) 331 332 # If reading from a file, show the result. Otherwise, write to the 333 # output context. 334 335 output = metadata.get_output() 336 337 if not output.can_write(): 338 print outtext 339 else: 340 output.writepage(outtext, pagename) 341 print >>sys.stderr, pagename 342 343 copy_attachments(p, input, output, all=True) 344 345 # Install any theme resources. 346 347 if theme: 348 theme.install_resources() 349 350 def show_help(progname): 351 352 "Show the help text." 353 354 print >>sys.stderr, help_text % progname 355 356 help_text = """\ 357 Usage: %s [ <options> ] ( --all | <filename>... ) 358 359 Input file options: 360 361 --all Detect all document files in the specified input directory 362 363 Input options: 364 365 --input-dir Indicate an input directory containing document files 366 --input-dir-type Indicate the type of input directory involved 367 (default: directory) 368 --input-encoding Indicate the character encoding used in document files 369 --input-page-sep Indicate the separator used in filenames to encode 370 hierarchical relationships (subpages and descendant pages) 371 --pagename Indicate the page name corresponding to an indicated 372 filename, with each successive instance of this option 373 corresponding to each successive filename instance 374 375 Output options: 376 377 --bundle Bundle resources such as stylesheets within every document, 378 useful for publishing documents that need to be copied or 379 distributed individually. 380 --document-index Provide a "DocumentIndex" filename to be used in links in 381 HTML format output, useful for local file browsing instead 382 of Web-published content 383 --format Indicate the format to be used for serialised documents 384 (default: html) 385 --fragment Indicates that an output fragment, not an entire document, 386 is to be generated, skipping any theming activities 387 --output-dir Indicate an output directory to contain serialised document 388 files 389 --output-encoding Indicate the character encoding used in serialised document 390 files 391 --theme Indicate a theme for serialised documents, typically 392 requiring an output directory to be useful 393 --tree Produce a document tree representation on standard output 394 instead of generating output files 395 396 Configuration options: 397 398 --macros Perform macro evaluation/expansion before serialising 399 documents 400 --mapping Indicate a name and corresponding URL to be used to 401 translate interwiki links 402 --root Indicate the root page name to be used 403 (default: FrontPage) 404 """ 405 406 if __name__ == "__main__": 407 try: 408 main() 409 except errors.ProcessingError, exc: 410 print str(exc) 411 412 # vim: tabstop=4 expandtab shiftwidth=4