1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format converter. 5 6 Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from moinformat import make_parser, make_serialiser, Metadata, parse, serialise 23 from os.path import split 24 import sys 25 26 # Long messages. 27 28 message_all_with_filenames = """\ 29 Using --all overrides any indicated pagenames. Either --all or the filenames 30 should be omitted.""" 31 32 message_explicit_pagenames = """\ 33 Explicit pagenames (indicated using --pagename) are only to be specified when 34 providing filenames without an input directory (indicated using --input-dir). 35 36 To indicate pagenames within an input directory, omit any --pagename flags.""" 37 38 39 40 # Options management. 41 42 def getmapping(mappings): 43 44 """ 45 Return the given 'mappings' - a collection of key-then-value items - as a 46 dictionary. 47 """ 48 49 mapping = {} 50 key = None 51 52 for arg in mappings: 53 if key is None: 54 key = arg 55 else: 56 mapping[key] = arg 57 key = None 58 59 return mapping 60 61 def getvalue(values, default=None): 62 63 """ 64 Return the first value from 'values' or 'default' if 'values' is empty or 65 the first value tests as false. 66 """ 67 68 return values and values[0] or default 69 70 71 72 # Main program. 73 74 def main(): 75 76 "Interpret program options and perform the conversion." 77 78 dirname, progname = split(sys.argv[0]) 79 args = sys.argv[1:] 80 81 if "--help" in args: 82 show_help(progname) 83 sys.exit(0) 84 85 # Option values. 86 87 document_indexes = [] 88 filenames = [] 89 formats = [] 90 input_dir_types = [] 91 input_dirs = [] 92 input_encodings = [] 93 input_page_seps = [] 94 mappings = [] 95 output_dirs = [] 96 output_encodings = [] 97 pagenames = [] 98 root_pagenames = [] 99 theme_names = [] 100 101 # Obtain filenames by default. 102 103 l = filenames 104 105 # Flags. 106 107 all = False 108 fragment = False 109 macros = False 110 tree = False 111 112 for arg in args: 113 114 # Detect tree output. 115 116 if arg == "--tree": 117 tree = True 118 119 # Detect macro evaluation. 120 121 elif arg == "--macros": 122 macros = True 123 124 # Detect all documents. 125 126 elif arg == "--all": 127 all = True 128 129 # Switch to document index. 130 131 elif arg == "--document-index": 132 l = document_indexes 133 continue 134 135 # Detect fragment output (if serialising). 136 137 elif arg == "--fragment": 138 fragment = True 139 140 # Switch to collecting formats. 141 142 elif arg == "--format": 143 l = formats 144 continue 145 146 # Switch to collecting input locations. 147 148 elif arg == "--input-dir": 149 l = input_dirs 150 continue 151 152 # Switch to collecting input context types. 153 154 elif arg == "--input-dir-type": 155 l = input_dir_types 156 continue 157 158 # Switch to collecting input encodings. 159 160 elif arg == "--input-encoding": 161 l = input_encodings 162 continue 163 164 # Switch to collecting input page hierarchy separators. 165 166 elif arg == "--input-page-sep": 167 l = input_page_seps 168 continue 169 170 # Switch to collecting mappings. 171 172 elif arg == "--mapping": 173 l = mappings 174 continue 175 176 # Switch to collecting output locations. 177 178 elif arg == "--output-dir": 179 l = output_dirs 180 continue 181 182 # Switch to collecting output encodings. 183 184 elif arg == "--output-encoding": 185 l = output_encodings 186 continue 187 188 # Switch to collecting page names. 189 190 elif arg == "--pagename": 191 l = pagenames 192 continue 193 194 # Switch to collecting root page names. 195 196 elif arg == "--root": 197 l = root_pagenames 198 continue 199 200 # Switch to collecting theme names. 201 202 elif arg == "--theme": 203 l = theme_names 204 continue 205 206 # Collect options and arguments. 207 208 else: 209 l.append(arg) 210 211 # Collect multiple mappings. 212 213 if l is mappings: 214 continue 215 216 # Collect filenames normally. 217 218 l = filenames 219 220 format = formats and formats[0] or "html" 221 input_dir = getvalue(input_dirs) 222 output_dir = getvalue(output_dirs) 223 224 # Define metadata. 225 226 metadata = Metadata({ 227 "document_index" : getvalue(document_indexes), 228 "input_context" : input_dir and \ 229 getvalue(input_dir_types, "directory") or \ 230 "standalone", 231 "input_encoding" : getvalue(input_encodings), 232 "input_filename" : input_dir, 233 "input_separator" : getvalue(input_page_seps), 234 "link_format" : format, 235 "mapping" : getmapping(mappings), 236 "output_context" : output_dir and "directory" or "standalone", 237 "output_encoding" : getvalue(output_encodings), 238 "output_format" : format, 239 "output_filename" : output_dir, 240 "root_pagename" : getvalue(root_pagenames, "FrontPage"), 241 "theme_name" : not fragment and \ 242 "%s.%s" % (getvalue(theme_names, "default"), format) or None, 243 }) 244 245 # Define the input context and theme. 246 247 input = metadata.get_input() 248 theme = metadata.get_theme() 249 250 # Treat filenames as pagenames if an input directory is indicated and if no 251 # pagenames are explicitly specified. 252 253 if input_dir: 254 if pagenames: 255 print >>sys.stderr, message_explicit_pagenames 256 sys.exit(1) 257 258 if all: 259 if filenames: 260 print >>sys.stderr, message_all_with_filenames 261 sys.exit(1) 262 else: 263 filenames = input.all() 264 265 pagenames = filenames 266 filenames = [] 267 268 # Open each file or page, parse the content, serialise the document. 269 270 for pagename, filename in map(None, pagenames, filenames): 271 272 # Define a pagename if missing. 273 274 pagename = pagename or split(filename)[-1] 275 metadata.set("pagename", pagename) 276 277 # Read either from a filename or using a pagename. 278 279 if filename: 280 pagetext = input.readfile(filename) 281 else: 282 pagetext = input.readpage(pagename) 283 284 # Parse the page content. 285 286 p = make_parser(metadata) 287 d = parse(pagetext, p) 288 289 if macros: 290 p.evaluate_macros() 291 292 # Show a document tree for debugging purposes, if requested. 293 294 if tree: 295 print d.prettyprint() 296 continue 297 298 # Otherwise, serialise the document. 299 300 # Obtain a serialiser using the configuration. 301 302 serialiser = make_serialiser(metadata) 303 outtext = serialise(d, serialiser) 304 305 # With a theme, apply it to the text. 306 307 if theme: 308 outtext = theme.apply(outtext) 309 310 # If reading from a file, show the result. Otherwise, write to the 311 # output context. 312 313 output = metadata.get_output() 314 315 if not output.can_write(): 316 print outtext 317 else: 318 output.writepage(outtext, pagename) 319 print >>sys.stderr, pagename 320 321 # Install any theme resources. 322 323 if theme: 324 theme.install_resources() 325 326 def show_help(progname): 327 328 "Show the help text." 329 330 print >>sys.stderr, help_text % progname 331 332 help_text = """\ 333 Usage: %s [ <options> ] ( --all | <filename>... ) 334 335 Input options: 336 337 --all Detect all document files in the specified input directory 338 --input-dir Indicate an input directory containing document files 339 --input-dir-type Indicate the type of input directory involved 340 (default: directory) 341 --input-encoding Indicate the character encoding used in document files 342 --input-page-sep Indicate the separator used in filenames to encode 343 hierarchical relationships (subpages and descendant pages) 344 --pagename Indicate the page name corresponding to an indicated 345 filename, with each successive instance of this option 346 corresponding to each successive filename instance 347 348 Output options: 349 350 --document-index Provide a "DocumentIndex" filename to be used in links in 351 HTML format output, useful for local file browsing instead 352 of Web-published content 353 --format Indicate the format to be used for serialised documents 354 (default: html) 355 --fragment Indicates that an output fragment, not an entire document, 356 is to be generated, skipping any theming activities 357 --output-dir Indicate an output directory to contain serialised document 358 files 359 --output-encoding Indicate the character encoding used in serialised document 360 files 361 --theme Indicate a theme for serialised documents, typically 362 requiring an output directory to be useful 363 --tree Produce a document tree representation on standard output 364 instead of generating output files 365 366 Configuration options: 367 368 --macros Perform macro evaluation/expansion before serialising 369 documents 370 --mapping Indicate a name and corresponding URL to be used to 371 translate interwiki links 372 --root Indicate the root page name to be used 373 (default: FrontPage) 374 """ 375 376 if __name__ == "__main__": 377 main() 378 379 # vim: tabstop=4 expandtab shiftwidth=4