1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format converter. 5 6 Copyright (C) 2018, 2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from moinformat import copy_attachments, errors, make_parser, make_serialiser, \ 23 Metadata, parse, serialise 24 from os.path import split 25 import sys 26 27 # Long messages. 28 29 message_all_with_filenames = """\ 30 Using --all overrides any indicated pagenames. Either --all or the filenames 31 should be omitted.""" 32 33 message_explicit_pagenames = """\ 34 Explicit pagenames (indicated using --pagename) are only to be specified when 35 providing filenames without an input directory (indicated using --input-dir). 36 37 To indicate pagenames within an input directory, omit any --pagename flags.""" 38 39 message_tree_format_usage = """\ 40 The --tree option cannot be used together with the --format or --output-format 41 options since the --tree option indicates use of the "pretty" format.""" 42 43 44 45 # Options management. 46 47 def getmapping(mappings): 48 49 """ 50 Return the given 'mappings' - a collection of key-then-value items - as a 51 dictionary. 52 """ 53 54 mapping = {} 55 key = None 56 57 for arg in mappings: 58 if key is None: 59 key = arg 60 else: 61 mapping[key] = arg 62 key = None 63 64 return mapping 65 66 def getvalue(values, default=None): 67 68 """ 69 Return the first value from 'values' or 'default' if 'values' is empty or 70 the first value tests as false. 71 """ 72 73 return values and values[0] or default 74 75 76 77 # Main program. 78 79 def main(): 80 81 "Interpret program options and perform the conversion." 82 83 dirname, progname = split(sys.argv[0]) 84 args = sys.argv[1:] 85 86 if "--help" in args: 87 show_help(progname) 88 sys.exit(0) 89 90 # Option values. 91 92 attachments_dir = [] 93 document_indexes = [] 94 filenames = [] 95 input_formats = [] 96 input_dir_types = [] 97 input_dirs = [] 98 input_encodings = [] 99 input_page_seps = [] 100 mappings = [] 101 output_dirs = [] 102 output_encodings = [] 103 output_formats = [] 104 output_page_seps = [] 105 pagenames = [] 106 root_pagenames = [] 107 theme_names = [] 108 109 # Obtain filenames by default. 110 111 l = filenames 112 113 # Flags. 114 115 all = False 116 bundle = False 117 common = False 118 fragment = False 119 macros = False 120 no_inline = False 121 tree = False 122 123 for arg in args: 124 125 # Flags with no following arguments. 126 127 # Detect all documents. 128 129 if arg == "--all": 130 all = True 131 132 # Detect resource bundling. 133 134 elif arg == "--bundle": 135 bundle = True 136 137 # Detect use of a common attachments directory. 138 139 elif arg == "--common": 140 common = True 141 142 # Detect fragment output (if serialising). 143 144 elif arg == "--fragment": 145 fragment = True 146 147 # Detect macro evaluation. 148 149 elif arg == "--macros": 150 macros = True 151 152 # Detect suppression of inline objects. 153 154 elif arg == "--no-inline": 155 no_inline = True 156 157 # Detect tree output. 158 159 elif arg == "--tree": 160 if output_formats: 161 print >>sys.stderr, message_tree_format_usage 162 sys.exit(1) 163 tree = True 164 165 # Options with following arguments. 166 167 # Switch to document index. 168 169 elif arg == "--attachments-dir": 170 l = attachments_dir 171 continue 172 173 # Switch to document index. 174 175 elif arg == "--document-index": 176 l = document_indexes 177 continue 178 179 # Switch to collecting formats. 180 181 elif arg in ("--format", "--output-format"): 182 if tree: 183 print >>sys.stderr, message_tree_format_usage 184 sys.exit(1) 185 l = output_formats 186 continue 187 188 # Switch to collecting input locations. 189 190 elif arg == "--input-dir": 191 l = input_dirs 192 continue 193 194 # Switch to collecting input context types. 195 196 elif arg == "--input-dir-type": 197 l = input_dir_types 198 continue 199 200 # Switch to collecting input encodings. 201 202 elif arg == "--input-encoding": 203 l = input_encodings 204 continue 205 206 # Switch to collecting input formats. 207 208 elif arg == "--input-format": 209 l = input_formats 210 continue 211 212 # Switch to collecting input page hierarchy separators. 213 214 elif arg == "--input-page-sep": 215 l = input_page_seps 216 continue 217 218 # Switch to collecting mappings. 219 220 elif arg == "--mapping": 221 l = mappings 222 continue 223 224 # Switch to collecting output locations. 225 226 elif arg == "--output-dir": 227 l = output_dirs 228 continue 229 230 # Switch to collecting output encodings. 231 232 elif arg == "--output-encoding": 233 l = output_encodings 234 continue 235 236 # Switch to collecting output page hierarchy separators. 237 238 elif arg == "--output-page-sep": 239 l = output_page_seps 240 continue 241 242 # Switch to collecting page names. 243 244 elif arg == "--pagename": 245 l = pagenames 246 continue 247 248 # Switch to collecting root page names. 249 250 elif arg == "--root": 251 l = root_pagenames 252 continue 253 254 # Switch to collecting theme names. 255 256 elif arg == "--theme": 257 l = theme_names 258 continue 259 260 # Collect options and arguments. 261 262 else: 263 l.append(arg) 264 265 # Collect multiple mappings. 266 267 if l is mappings: 268 continue 269 270 # Collect filenames normally. 271 272 l = filenames 273 274 input_format = input_formats and input_formats[0] or "moin" 275 output_format = tree and "pretty" or output_formats and output_formats[0] or "html" 276 input_dir = getvalue(input_dirs) 277 output_dir = getvalue(output_dirs) 278 279 # Define metadata. 280 281 metadata = Metadata({ 282 "attachments" : getvalue(attachments_dir, "attachments"), 283 "bundle" : bundle, 284 "common_attachments": common, 285 "document_index" : getvalue(document_indexes), 286 "fragment" : fragment, 287 "input_context" : input_dir and \ 288 getvalue(input_dir_types, "directory") or \ 289 "standalone", 290 "input_encoding" : getvalue(input_encodings), 291 "input_filename" : input_dir, 292 "input_format" : input_format, 293 "input_separator" : getvalue(input_page_seps), 294 "link_format" : output_format, 295 "mapping" : getmapping(mappings), 296 "no_inline" : no_inline, 297 "output_context" : output_dir and "directory" or "standalone", 298 "output_encoding" : getvalue(output_encodings), 299 "output_format" : output_format, 300 "output_filename" : output_dir, 301 "output_separator" : getvalue(output_page_seps), 302 "root_pagename" : getvalue(root_pagenames, "FrontPage"), 303 "theme_name" : not fragment and \ 304 "%s.%s" % (getvalue(theme_names, "default"), output_format) or None, 305 }) 306 307 # Define the input context and theme. 308 309 input = metadata.get_input() 310 theme = metadata.get_theme() 311 312 # Treat filenames as pagenames if an input directory is indicated and if no 313 # pagenames are explicitly specified. 314 315 if input_dir: 316 if pagenames: 317 print >>sys.stderr, message_explicit_pagenames 318 sys.exit(1) 319 320 if all: 321 if filenames: 322 print >>sys.stderr, message_all_with_filenames 323 sys.exit(1) 324 else: 325 filenames = input.all() 326 327 pagenames = filenames 328 filenames = [] 329 330 # Open each file or page, parse the content, serialise the document. 331 332 for pagename, filename in map(None, pagenames, filenames): 333 334 # Define a pagename if missing. 335 336 pagename = pagename or split(filename)[-1] 337 metadata.set("pagename", pagename) 338 339 # Read either from a filename or using a pagename. 340 341 if filename: 342 pagetext = input.readfile(filename) 343 else: 344 pagetext = input.readpage(pagename) 345 346 # Parse the page content. 347 348 p = make_parser(metadata) 349 d = parse(pagetext, p) 350 351 if macros: 352 p.evaluate_macros() 353 354 p.update_metadata(metadata) 355 356 # Obtain a serialiser using the configuration. 357 358 serialiser = make_serialiser(metadata) 359 outtext = serialise(d, serialiser) 360 361 # Show a document tree for debugging purposes, if requested. 362 363 if tree: 364 print outtext 365 continue 366 367 # With a theme, apply it to the text. 368 369 if theme: 370 outtext = theme.apply(outtext) 371 372 # If reading from a file, show the result. Otherwise, write to the 373 # output context. 374 375 output = metadata.get_output() 376 377 if not output.can_write(): 378 print outtext 379 else: 380 output.writepage(outtext, pagename) 381 print >>sys.stderr, pagename 382 383 copy_attachments(p, input, output, all=True) 384 385 # Install any theme resources. 386 387 if theme: 388 theme.install_resources() 389 390 def show_help(progname): 391 392 "Show the help text." 393 394 print >>sys.stderr, help_text % progname 395 396 help_text = """\ 397 Usage: %s [ <options> ] ( --all | <filename>... ) 398 399 Input file options: 400 401 --all Detect all document files in the specified input directory 402 403 Input options: 404 405 --common Obtain attachments from a common directory for all pages, 406 rather than each page having its own subdirectory of a 407 top-level attachments directory 408 --input-dir Indicate an input directory containing document files 409 --input-dir-type Indicate the type of input directory involved 410 (default: directory) 411 --input-encoding Indicate the character encoding used in document files 412 --input-format Indicate the format of the parsed documents 413 (default: moin) 414 --input-page-sep Indicate the separator used in filenames to encode 415 hierarchical relationships (subpages and descendant pages) 416 --pagename Indicate the page name corresponding to an indicated 417 filename, with each successive instance of this option 418 corresponding to each successive filename instance 419 420 Output options: 421 422 --bundle Bundle resources such as stylesheets within every document, 423 useful for publishing documents that need to be copied or 424 distributed individually 425 --document-index Provide a "DocumentIndex" filename to be used in links in 426 HTML format output, useful for local file browsing instead 427 of Web-published content 428 --format Indicate the format to be used for serialised documents; 429 equivalent to --output-format 430 (default: html) 431 --fragment Indicates that an output fragment, not an entire document, 432 is to be generated, skipping any theming activities 433 --no-inline Suppress inline objects in serialised documents, linking to 434 separate objects instead 435 --output-dir Indicate an output directory to contain serialised document 436 files 437 --output-encoding Indicate the character encoding used in serialised document 438 files 439 --output-format Indicate the format to be used for serialised documents; 440 equivalent to --format 441 (default: html) 442 --output-page-sep Indicate the separator used in filenames to encode 443 hierarchical relationships (subpages and descendant pages) 444 --theme Indicate a theme for serialised documents, typically 445 requiring an output directory to be useful 446 --tree Produce a document tree representation on standard output 447 instead of generating output files 448 449 Configuration options: 450 451 --macros Perform macro evaluation/expansion before serialising 452 documents 453 --mapping Indicate a name and corresponding URL to be used to 454 translate interwiki links 455 --root Indicate the root page name to be used 456 (default: FrontPage) 457 """ 458 459 if __name__ == "__main__": 460 try: 461 main() 462 except errors.ProcessingError, exc: 463 print str(exc) 464 465 # vim: tabstop=4 expandtab shiftwidth=4