paul@84 | 1 | #!/usr/bin/env python |
paul@84 | 2 | |
paul@174 | 3 | """ |
paul@174 | 4 | Moin wiki format converter. |
paul@174 | 5 | |
paul@338 | 6 | Copyright (C) 2018, 2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk> |
paul@174 | 7 | |
paul@174 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@174 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@174 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@174 | 11 | version. |
paul@174 | 12 | |
paul@174 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@174 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@174 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@174 | 16 | details. |
paul@174 | 17 | |
paul@174 | 18 | You should have received a copy of the GNU General Public License along with |
paul@174 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@174 | 20 | """ |
paul@174 | 21 | |
paul@223 | 22 | from moinformat import copy_attachments, errors, make_parser, make_serialiser, \ |
paul@223 | 23 | Metadata, parse, serialise |
paul@84 | 24 | from os.path import split |
paul@84 | 25 | import sys |
paul@84 | 26 | |
paul@174 | 27 | # Long messages. |
paul@174 | 28 | |
paul@174 | 29 | message_all_with_filenames = """\ |
paul@174 | 30 | Using --all overrides any indicated pagenames. Either --all or the filenames |
paul@174 | 31 | should be omitted.""" |
paul@174 | 32 | |
paul@174 | 33 | message_explicit_pagenames = """\ |
paul@174 | 34 | Explicit pagenames (indicated using --pagename) are only to be specified when |
paul@174 | 35 | providing filenames without an input directory (indicated using --input-dir). |
paul@174 | 36 | |
paul@174 | 37 | To indicate pagenames within an input directory, omit any --pagename flags.""" |
paul@174 | 38 | |
paul@334 | 39 | message_tree_format_usage = """\ |
paul@338 | 40 | The --tree option cannot be used together with the --format or --output-format |
paul@338 | 41 | options since the --tree option indicates use of the "pretty" format.""" |
paul@334 | 42 | |
paul@174 | 43 | |
paul@174 | 44 | |
paul@174 | 45 | # Options management. |
paul@174 | 46 | |
paul@133 | 47 | def getmapping(mappings): |
paul@174 | 48 | |
paul@174 | 49 | """ |
paul@174 | 50 | Return the given 'mappings' - a collection of key-then-value items - as a |
paul@174 | 51 | dictionary. |
paul@174 | 52 | """ |
paul@174 | 53 | |
paul@133 | 54 | mapping = {} |
paul@133 | 55 | key = None |
paul@133 | 56 | |
paul@133 | 57 | for arg in mappings: |
paul@133 | 58 | if key is None: |
paul@133 | 59 | key = arg |
paul@133 | 60 | else: |
paul@133 | 61 | mapping[key] = arg |
paul@133 | 62 | key = None |
paul@133 | 63 | |
paul@133 | 64 | return mapping |
paul@133 | 65 | |
paul@165 | 66 | def getvalue(values, default=None): |
paul@174 | 67 | |
paul@174 | 68 | """ |
paul@174 | 69 | Return the first value from 'values' or 'default' if 'values' is empty or |
paul@174 | 70 | the first value tests as false. |
paul@174 | 71 | """ |
paul@174 | 72 | |
paul@165 | 73 | return values and values[0] or default |
paul@133 | 74 | |
paul@174 | 75 | |
paul@174 | 76 | |
paul@174 | 77 | # Main program. |
paul@174 | 78 | |
paul@84 | 79 | def main(): |
paul@174 | 80 | |
paul@174 | 81 | "Interpret program options and perform the conversion." |
paul@174 | 82 | |
paul@84 | 83 | dirname, progname = split(sys.argv[0]) |
paul@84 | 84 | args = sys.argv[1:] |
paul@84 | 85 | |
paul@186 | 86 | if "--help" in args: |
paul@186 | 87 | show_help(progname) |
paul@186 | 88 | sys.exit(0) |
paul@186 | 89 | |
paul@104 | 90 | # Option values. |
paul@104 | 91 | |
paul@215 | 92 | attachments_dir = [] |
paul@196 | 93 | document_indexes = [] |
paul@196 | 94 | filenames = [] |
paul@338 | 95 | input_formats = [] |
paul@133 | 96 | input_dir_types = [] |
paul@133 | 97 | input_dirs = [] |
paul@104 | 98 | input_encodings = [] |
paul@133 | 99 | input_page_seps = [] |
paul@97 | 100 | mappings = [] |
paul@104 | 101 | output_dirs = [] |
paul@104 | 102 | output_encodings = [] |
paul@338 | 103 | output_formats = [] |
paul@312 | 104 | output_page_seps = [] |
paul@104 | 105 | pagenames = [] |
paul@165 | 106 | root_pagenames = [] |
paul@196 | 107 | theme_names = [] |
paul@196 | 108 | |
paul@196 | 109 | # Obtain filenames by default. |
paul@196 | 110 | |
paul@196 | 111 | l = filenames |
paul@104 | 112 | |
paul@104 | 113 | # Flags. |
paul@104 | 114 | |
paul@133 | 115 | all = False |
paul@278 | 116 | bundle = False |
paul@282 | 117 | common = False |
paul@161 | 118 | fragment = False |
paul@104 | 119 | macros = False |
paul@282 | 120 | no_inline = False |
paul@84 | 121 | tree = False |
paul@84 | 122 | |
paul@84 | 123 | for arg in args: |
paul@84 | 124 | |
paul@215 | 125 | # Flags with no following arguments. |
paul@215 | 126 | |
paul@215 | 127 | # Detect all documents. |
paul@84 | 128 | |
paul@215 | 129 | if arg == "--all": |
paul@215 | 130 | all = True |
paul@215 | 131 | |
paul@278 | 132 | # Detect resource bundling. |
paul@278 | 133 | |
paul@278 | 134 | elif arg == "--bundle": |
paul@278 | 135 | bundle = True |
paul@278 | 136 | |
paul@282 | 137 | # Detect use of a common attachments directory. |
paul@282 | 138 | |
paul@282 | 139 | elif arg == "--common": |
paul@282 | 140 | common = True |
paul@282 | 141 | |
paul@215 | 142 | # Detect fragment output (if serialising). |
paul@215 | 143 | |
paul@215 | 144 | elif arg == "--fragment": |
paul@215 | 145 | fragment = True |
paul@84 | 146 | |
paul@89 | 147 | # Detect macro evaluation. |
paul@89 | 148 | |
paul@89 | 149 | elif arg == "--macros": |
paul@89 | 150 | macros = True |
paul@89 | 151 | |
paul@282 | 152 | # Detect suppression of inline objects. |
paul@282 | 153 | |
paul@282 | 154 | elif arg == "--no-inline": |
paul@282 | 155 | no_inline = True |
paul@282 | 156 | |
paul@215 | 157 | # Detect tree output. |
paul@215 | 158 | |
paul@215 | 159 | elif arg == "--tree": |
paul@338 | 160 | if output_formats: |
paul@334 | 161 | print >>sys.stderr, message_tree_format_usage |
paul@334 | 162 | sys.exit(1) |
paul@215 | 163 | tree = True |
paul@133 | 164 | |
paul@215 | 165 | # Options with following arguments. |
paul@215 | 166 | |
paul@215 | 167 | # Switch to document index. |
paul@215 | 168 | |
paul@215 | 169 | elif arg == "--attachments-dir": |
paul@215 | 170 | l = attachments_dir |
paul@215 | 171 | continue |
paul@133 | 172 | |
paul@196 | 173 | # Switch to document index. |
paul@196 | 174 | |
paul@196 | 175 | elif arg == "--document-index": |
paul@196 | 176 | l = document_indexes |
paul@196 | 177 | continue |
paul@196 | 178 | |
paul@91 | 179 | # Switch to collecting formats. |
paul@84 | 180 | |
paul@338 | 181 | elif arg in ("--format", "--output-format"): |
paul@334 | 182 | if tree: |
paul@334 | 183 | print >>sys.stderr, message_tree_format_usage |
paul@334 | 184 | sys.exit(1) |
paul@338 | 185 | l = output_formats |
paul@84 | 186 | continue |
paul@84 | 187 | |
paul@133 | 188 | # Switch to collecting input locations. |
paul@133 | 189 | |
paul@133 | 190 | elif arg == "--input-dir": |
paul@133 | 191 | l = input_dirs |
paul@133 | 192 | continue |
paul@133 | 193 | |
paul@133 | 194 | # Switch to collecting input context types. |
paul@133 | 195 | |
paul@133 | 196 | elif arg == "--input-dir-type": |
paul@133 | 197 | l = input_dir_types |
paul@133 | 198 | continue |
paul@133 | 199 | |
paul@104 | 200 | # Switch to collecting input encodings. |
paul@104 | 201 | |
paul@104 | 202 | elif arg == "--input-encoding": |
paul@104 | 203 | l = input_encodings |
paul@104 | 204 | continue |
paul@104 | 205 | |
paul@338 | 206 | # Switch to collecting input formats. |
paul@338 | 207 | |
paul@338 | 208 | elif arg == "--input-format": |
paul@338 | 209 | l = input_formats |
paul@338 | 210 | continue |
paul@338 | 211 | |
paul@133 | 212 | # Switch to collecting input page hierarchy separators. |
paul@133 | 213 | |
paul@133 | 214 | elif arg == "--input-page-sep": |
paul@133 | 215 | l = input_page_seps |
paul@133 | 216 | continue |
paul@133 | 217 | |
paul@97 | 218 | # Switch to collecting mappings. |
paul@97 | 219 | |
paul@97 | 220 | elif arg == "--mapping": |
paul@97 | 221 | l = mappings |
paul@97 | 222 | continue |
paul@97 | 223 | |
paul@100 | 224 | # Switch to collecting output locations. |
paul@100 | 225 | |
paul@104 | 226 | elif arg == "--output-dir": |
paul@104 | 227 | l = output_dirs |
paul@104 | 228 | continue |
paul@104 | 229 | |
paul@104 | 230 | # Switch to collecting output encodings. |
paul@104 | 231 | |
paul@104 | 232 | elif arg == "--output-encoding": |
paul@104 | 233 | l = output_encodings |
paul@100 | 234 | continue |
paul@100 | 235 | |
paul@312 | 236 | # Switch to collecting output page hierarchy separators. |
paul@312 | 237 | |
paul@312 | 238 | elif arg == "--output-page-sep": |
paul@312 | 239 | l = output_page_seps |
paul@312 | 240 | continue |
paul@312 | 241 | |
paul@91 | 242 | # Switch to collecting page names. |
paul@91 | 243 | |
paul@91 | 244 | elif arg == "--pagename": |
paul@91 | 245 | l = pagenames |
paul@91 | 246 | continue |
paul@91 | 247 | |
paul@165 | 248 | # Switch to collecting root page names. |
paul@165 | 249 | |
paul@165 | 250 | elif arg == "--root": |
paul@165 | 251 | l = root_pagenames |
paul@165 | 252 | continue |
paul@165 | 253 | |
paul@161 | 254 | # Switch to collecting theme names. |
paul@161 | 255 | |
paul@161 | 256 | elif arg == "--theme": |
paul@161 | 257 | l = theme_names |
paul@161 | 258 | continue |
paul@161 | 259 | |
paul@84 | 260 | # Collect options and arguments. |
paul@84 | 261 | |
paul@84 | 262 | else: |
paul@84 | 263 | l.append(arg) |
paul@84 | 264 | |
paul@100 | 265 | # Collect multiple mappings. |
paul@100 | 266 | |
paul@97 | 267 | if l is mappings: |
paul@97 | 268 | continue |
paul@97 | 269 | |
paul@84 | 270 | # Collect filenames normally. |
paul@84 | 271 | |
paul@84 | 272 | l = filenames |
paul@84 | 273 | |
paul@338 | 274 | input_format = input_formats and input_formats[0] or "moin" |
paul@338 | 275 | output_format = tree and "pretty" or output_formats and output_formats[0] or "html" |
paul@133 | 276 | input_dir = getvalue(input_dirs) |
paul@133 | 277 | output_dir = getvalue(output_dirs) |
paul@104 | 278 | |
paul@165 | 279 | # Define metadata. |
paul@97 | 280 | |
paul@165 | 281 | metadata = Metadata({ |
paul@215 | 282 | "attachments" : getvalue(attachments_dir, "attachments"), |
paul@278 | 283 | "bundle" : bundle, |
paul@282 | 284 | "common_attachments": common, |
paul@196 | 285 | "document_index" : getvalue(document_indexes), |
paul@165 | 286 | "input_context" : input_dir and \ |
paul@165 | 287 | getvalue(input_dir_types, "directory") or \ |
paul@165 | 288 | "standalone", |
paul@165 | 289 | "input_encoding" : getvalue(input_encodings), |
paul@165 | 290 | "input_filename" : input_dir, |
paul@338 | 291 | "input_format" : input_format, |
paul@165 | 292 | "input_separator" : getvalue(input_page_seps), |
paul@338 | 293 | "link_format" : output_format, |
paul@165 | 294 | "mapping" : getmapping(mappings), |
paul@282 | 295 | "no_inline" : no_inline, |
paul@165 | 296 | "output_context" : output_dir and "directory" or "standalone", |
paul@165 | 297 | "output_encoding" : getvalue(output_encodings), |
paul@338 | 298 | "output_format" : output_format, |
paul@165 | 299 | "output_filename" : output_dir, |
paul@312 | 300 | "output_separator" : getvalue(output_page_seps), |
paul@165 | 301 | "root_pagename" : getvalue(root_pagenames, "FrontPage"), |
paul@165 | 302 | "theme_name" : not fragment and \ |
paul@338 | 303 | "%s.%s" % (getvalue(theme_names, "default"), output_format) or None, |
paul@165 | 304 | }) |
paul@133 | 305 | |
paul@165 | 306 | # Define the input context and theme. |
paul@161 | 307 | |
paul@165 | 308 | input = metadata.get_input() |
paul@165 | 309 | theme = metadata.get_theme() |
paul@161 | 310 | |
paul@133 | 311 | # Treat filenames as pagenames if an input directory is indicated and if no |
paul@133 | 312 | # pagenames are explicitly specified. |
paul@104 | 313 | |
paul@133 | 314 | if input_dir: |
paul@133 | 315 | if pagenames: |
paul@174 | 316 | print >>sys.stderr, message_explicit_pagenames |
paul@133 | 317 | sys.exit(1) |
paul@104 | 318 | |
paul@133 | 319 | if all: |
paul@133 | 320 | if filenames: |
paul@174 | 321 | print >>sys.stderr, message_all_with_filenames |
paul@133 | 322 | sys.exit(1) |
paul@133 | 323 | else: |
paul@133 | 324 | filenames = input.all() |
paul@104 | 325 | |
paul@133 | 326 | pagenames = filenames |
paul@133 | 327 | filenames = [] |
paul@133 | 328 | |
paul@133 | 329 | # Open each file or page, parse the content, serialise the document. |
paul@104 | 330 | |
paul@133 | 331 | for pagename, filename in map(None, pagenames, filenames): |
paul@133 | 332 | |
paul@133 | 333 | # Define a pagename if missing. |
paul@133 | 334 | |
paul@133 | 335 | pagename = pagename or split(filename)[-1] |
paul@165 | 336 | metadata.set("pagename", pagename) |
paul@89 | 337 | |
paul@133 | 338 | # Read either from a filename or using a pagename. |
paul@133 | 339 | |
paul@133 | 340 | if filename: |
paul@133 | 341 | pagetext = input.readfile(filename) |
paul@133 | 342 | else: |
paul@133 | 343 | pagetext = input.readpage(pagename) |
paul@133 | 344 | |
paul@133 | 345 | # Parse the page content. |
paul@104 | 346 | |
paul@165 | 347 | p = make_parser(metadata) |
paul@133 | 348 | d = parse(pagetext, p) |
paul@133 | 349 | |
paul@133 | 350 | if macros: |
paul@133 | 351 | p.evaluate_macros() |
paul@104 | 352 | |
paul@199 | 353 | p.update_metadata(metadata) |
paul@199 | 354 | |
paul@104 | 355 | # Obtain a serialiser using the configuration. |
paul@104 | 356 | |
paul@165 | 357 | serialiser = make_serialiser(metadata) |
paul@133 | 358 | outtext = serialise(d, serialiser) |
paul@133 | 359 | |
paul@334 | 360 | # Show a document tree for debugging purposes, if requested. |
paul@334 | 361 | |
paul@334 | 362 | if tree: |
paul@334 | 363 | print outtext |
paul@334 | 364 | continue |
paul@334 | 365 | |
paul@161 | 366 | # With a theme, apply it to the text. |
paul@161 | 367 | |
paul@161 | 368 | if theme: |
paul@161 | 369 | outtext = theme.apply(outtext) |
paul@161 | 370 | |
paul@133 | 371 | # If reading from a file, show the result. Otherwise, write to the |
paul@133 | 372 | # output context. |
paul@133 | 373 | |
paul@165 | 374 | output = metadata.get_output() |
paul@165 | 375 | |
paul@133 | 376 | if not output.can_write(): |
paul@133 | 377 | print outtext |
paul@133 | 378 | else: |
paul@133 | 379 | output.writepage(outtext, pagename) |
paul@133 | 380 | print >>sys.stderr, pagename |
paul@84 | 381 | |
paul@232 | 382 | copy_attachments(p, input, output, all=True) |
paul@223 | 383 | |
paul@161 | 384 | # Install any theme resources. |
paul@161 | 385 | |
paul@161 | 386 | if theme: |
paul@161 | 387 | theme.install_resources() |
paul@161 | 388 | |
paul@186 | 389 | def show_help(progname): |
paul@186 | 390 | |
paul@186 | 391 | "Show the help text." |
paul@186 | 392 | |
paul@186 | 393 | print >>sys.stderr, help_text % progname |
paul@186 | 394 | |
paul@186 | 395 | help_text = """\ |
paul@186 | 396 | Usage: %s [ <options> ] ( --all | <filename>... ) |
paul@186 | 397 | |
paul@198 | 398 | Input file options: |
paul@198 | 399 | |
paul@198 | 400 | --all Detect all document files in the specified input directory |
paul@198 | 401 | |
paul@186 | 402 | Input options: |
paul@186 | 403 | |
paul@282 | 404 | --common Obtain attachments from a common directory for all pages, |
paul@282 | 405 | rather than each page having its own subdirectory of a |
paul@338 | 406 | top-level attachments directory |
paul@186 | 407 | --input-dir Indicate an input directory containing document files |
paul@186 | 408 | --input-dir-type Indicate the type of input directory involved |
paul@186 | 409 | (default: directory) |
paul@186 | 410 | --input-encoding Indicate the character encoding used in document files |
paul@338 | 411 | --input-format Indicate the format of the parsed documents |
paul@338 | 412 | (default: moin) |
paul@186 | 413 | --input-page-sep Indicate the separator used in filenames to encode |
paul@186 | 414 | hierarchical relationships (subpages and descendant pages) |
paul@186 | 415 | --pagename Indicate the page name corresponding to an indicated |
paul@186 | 416 | filename, with each successive instance of this option |
paul@186 | 417 | corresponding to each successive filename instance |
paul@186 | 418 | |
paul@186 | 419 | Output options: |
paul@186 | 420 | |
paul@278 | 421 | --bundle Bundle resources such as stylesheets within every document, |
paul@278 | 422 | useful for publishing documents that need to be copied or |
paul@338 | 423 | distributed individually |
paul@196 | 424 | --document-index Provide a "DocumentIndex" filename to be used in links in |
paul@196 | 425 | HTML format output, useful for local file browsing instead |
paul@196 | 426 | of Web-published content |
paul@338 | 427 | --format Indicate the format to be used for serialised documents; |
paul@338 | 428 | equivalent to --output-format |
paul@186 | 429 | (default: html) |
paul@186 | 430 | --fragment Indicates that an output fragment, not an entire document, |
paul@186 | 431 | is to be generated, skipping any theming activities |
paul@282 | 432 | --no-inline Suppress inline objects in serialised documents, linking to |
paul@338 | 433 | separate objects instead |
paul@186 | 434 | --output-dir Indicate an output directory to contain serialised document |
paul@186 | 435 | files |
paul@186 | 436 | --output-encoding Indicate the character encoding used in serialised document |
paul@186 | 437 | files |
paul@338 | 438 | --output-format Indicate the format to be used for serialised documents; |
paul@338 | 439 | equivalent to --format |
paul@338 | 440 | (default: html) |
paul@312 | 441 | --output-page-sep Indicate the separator used in filenames to encode |
paul@312 | 442 | hierarchical relationships (subpages and descendant pages) |
paul@186 | 443 | --theme Indicate a theme for serialised documents, typically |
paul@186 | 444 | requiring an output directory to be useful |
paul@186 | 445 | --tree Produce a document tree representation on standard output |
paul@186 | 446 | instead of generating output files |
paul@186 | 447 | |
paul@186 | 448 | Configuration options: |
paul@186 | 449 | |
paul@186 | 450 | --macros Perform macro evaluation/expansion before serialising |
paul@186 | 451 | documents |
paul@186 | 452 | --mapping Indicate a name and corresponding URL to be used to |
paul@186 | 453 | translate interwiki links |
paul@186 | 454 | --root Indicate the root page name to be used |
paul@186 | 455 | (default: FrontPage) |
paul@186 | 456 | """ |
paul@186 | 457 | |
paul@84 | 458 | if __name__ == "__main__": |
paul@209 | 459 | try: |
paul@209 | 460 | main() |
paul@209 | 461 | except errors.ProcessingError, exc: |
paul@209 | 462 | print str(exc) |
paul@84 | 463 | |
paul@84 | 464 | # vim: tabstop=4 expandtab shiftwidth=4 |