MoinLight

moinconvert

225:e5fa4499d41d
2019-04-13 Paul Boddie Merged changes from the default branch. single-regexp-searching
     1 #!/usr/bin/env python     2      3 """     4 Moin wiki format converter.     5      6 Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat import copy_attachments, errors, make_parser, make_serialiser, \    23                        Metadata, parse, serialise    24 from os.path import split    25 import sys    26     27 # Long messages.    28     29 message_all_with_filenames = """\    30 Using --all overrides any indicated pagenames. Either --all or the filenames    31 should be omitted."""    32     33 message_explicit_pagenames = """\    34 Explicit pagenames (indicated using --pagename) are only to be specified when    35 providing filenames without an input directory (indicated using --input-dir).    36     37 To indicate pagenames within an input directory, omit any --pagename flags."""    38     39     40     41 # Options management.    42     43 def getmapping(mappings):    44     45     """    46     Return the given 'mappings' - a collection of key-then-value items - as a    47     dictionary.    48     """    49     50     mapping = {}    51     key = None    52     53     for arg in mappings:    54         if key is None:    55             key = arg    56         else:    57             mapping[key] = arg    58             key = None    59     60     return mapping    61     62 def getvalue(values, default=None):    63     64     """    65     Return the first value from 'values' or 'default' if 'values' is empty or    66     the first value tests as false.    67     """    68     69     return values and values[0] or default    70     71     72     73 # Main program.    74     75 def main():    76     77     "Interpret program options and perform the conversion."    78     79     dirname, progname = split(sys.argv[0])    80     args = sys.argv[1:]    81     82     if "--help" in args:    83         show_help(progname)    84         sys.exit(0)    85     86     # Option values.    87     88     attachments_dir = []    89     document_indexes = []    90     filenames = []    91     formats = []    92     input_dir_types = []    93     input_dirs = []    94     input_encodings = []    95     input_page_seps = []    96     mappings = []    97     output_dirs = []    98     output_encodings = []    99     pagenames = []   100     root_pagenames = []   101     theme_names = []   102    103     # Obtain filenames by default.   104    105     l = filenames   106    107     # Flags.   108    109     all = False   110     fragment = False   111     macros = False   112     tree = False   113    114     for arg in args:   115    116         # Flags with no following arguments.   117    118         # Detect all documents.   119    120         if arg == "--all":   121             all = True   122    123         # Detect fragment output (if serialising).   124    125         elif arg == "--fragment":   126             fragment = True   127    128         # Detect macro evaluation.   129    130         elif arg == "--macros":   131             macros = True   132    133         # Detect tree output.   134    135         elif arg == "--tree":   136             tree = True   137    138         # Options with following arguments.   139    140         # Switch to document index.   141    142         elif arg == "--attachments-dir":   143             l = attachments_dir   144             continue   145    146         # Switch to document index.   147    148         elif arg == "--document-index":   149             l = document_indexes   150             continue   151    152         # Switch to collecting formats.   153    154         elif arg == "--format":   155             l = formats   156             continue   157    158         # Switch to collecting input locations.   159    160         elif arg == "--input-dir":   161             l = input_dirs   162             continue   163    164         # Switch to collecting input context types.   165    166         elif arg == "--input-dir-type":   167             l = input_dir_types   168             continue   169    170         # Switch to collecting input encodings.   171    172         elif arg == "--input-encoding":   173             l = input_encodings   174             continue   175    176         # Switch to collecting input page hierarchy separators.   177    178         elif arg == "--input-page-sep":   179             l = input_page_seps   180             continue   181    182         # Switch to collecting mappings.   183    184         elif arg == "--mapping":   185             l = mappings   186             continue   187    188         # Switch to collecting output locations.   189    190         elif arg == "--output-dir":   191             l = output_dirs   192             continue   193    194         # Switch to collecting output encodings.   195    196         elif arg == "--output-encoding":   197             l = output_encodings   198             continue   199    200         # Switch to collecting page names.   201    202         elif arg == "--pagename":   203             l = pagenames   204             continue   205    206         # Switch to collecting root page names.   207    208         elif arg == "--root":   209             l = root_pagenames   210             continue   211    212         # Switch to collecting theme names.   213    214         elif arg == "--theme":   215             l = theme_names   216             continue   217    218         # Collect options and arguments.   219    220         else:   221             l.append(arg)   222    223             # Collect multiple mappings.   224    225             if l is mappings:   226                 continue   227    228         # Collect filenames normally.   229    230         l = filenames   231    232     format = formats and formats[0] or "html"   233     input_dir = getvalue(input_dirs)   234     output_dir = getvalue(output_dirs)   235    236     # Define metadata.   237    238     metadata = Metadata({   239         "attachments"       : getvalue(attachments_dir, "attachments"),   240         "document_index"    : getvalue(document_indexes),   241         "input_context"     : input_dir and \   242                               getvalue(input_dir_types, "directory") or \   243                               "standalone",   244         "input_encoding"    : getvalue(input_encodings),   245         "input_filename"    : input_dir,   246         "input_separator"   : getvalue(input_page_seps),   247         "link_format"       : format,   248         "mapping"           : getmapping(mappings),   249         "output_context"    : output_dir and "directory" or "standalone",   250         "output_encoding"   : getvalue(output_encodings),   251         "output_format"     : format,   252         "output_filename"   : output_dir,   253         "root_pagename"     : getvalue(root_pagenames, "FrontPage"),   254         "theme_name"        : not fragment and \   255                               "%s.%s" % (getvalue(theme_names, "default"), format) or None,   256         })   257    258     # Define the input context and theme.   259    260     input = metadata.get_input()   261     theme = metadata.get_theme()   262    263     # Treat filenames as pagenames if an input directory is indicated and if no   264     # pagenames are explicitly specified.   265    266     if input_dir:   267         if pagenames:   268             print >>sys.stderr, message_explicit_pagenames   269             sys.exit(1)   270    271         if all:   272             if filenames:   273                 print >>sys.stderr, message_all_with_filenames   274                 sys.exit(1)   275             else:   276                 filenames = input.all()   277    278         pagenames = filenames   279         filenames = []   280    281     # Open each file or page, parse the content, serialise the document.   282    283     for pagename, filename in map(None, pagenames, filenames):   284    285         # Define a pagename if missing.   286    287         pagename = pagename or split(filename)[-1]   288         metadata.set("pagename", pagename)   289    290         # Read either from a filename or using a pagename.   291    292         if filename:   293             pagetext = input.readfile(filename)   294         else:   295             pagetext = input.readpage(pagename)   296    297         # Parse the page content.   298    299         p = make_parser(metadata)   300         d = parse(pagetext, p)   301    302         if macros:   303             p.evaluate_macros()   304    305         p.update_metadata(metadata)   306    307         # Show a document tree for debugging purposes, if requested.   308    309         if tree:   310             print d.prettyprint()   311             continue   312    313         # Otherwise, serialise the document.   314    315         # Obtain a serialiser using the configuration.   316    317         serialiser = make_serialiser(metadata)   318         outtext = serialise(d, serialiser)   319    320         # With a theme, apply it to the text.   321    322         if theme:   323             outtext = theme.apply(outtext)   324    325         # If reading from a file, show the result. Otherwise, write to the   326         # output context.   327    328         output = metadata.get_output()   329    330         if not output.can_write():   331             print outtext   332         else:   333             output.writepage(outtext, pagename)   334             print >>sys.stderr, pagename   335    336             copy_attachments(p, input, output)   337    338     # Install any theme resources.   339    340     if theme:   341         theme.install_resources()   342    343 def show_help(progname):   344    345     "Show the help text."   346    347     print >>sys.stderr, help_text % progname   348    349 help_text = """\   350 Usage: %s [ <options> ] ( --all | <filename>... )   351    352 Input file options:   353    354 --all               Detect all document files in the specified input directory   355    356 Input options:   357    358 --input-dir         Indicate an input directory containing document files   359 --input-dir-type    Indicate the type of input directory involved   360                     (default: directory)   361 --input-encoding    Indicate the character encoding used in document files   362 --input-page-sep    Indicate the separator used in filenames to encode   363                     hierarchical relationships (subpages and descendant pages)   364 --pagename          Indicate the page name corresponding to an indicated   365                     filename, with each successive instance of this option   366                     corresponding to each successive filename instance   367    368 Output options:   369    370 --document-index    Provide a "DocumentIndex" filename to be used in links in   371                     HTML format output, useful for local file browsing instead   372                     of Web-published content   373 --format            Indicate the format to be used for serialised documents   374                     (default: html)   375 --fragment          Indicates that an output fragment, not an entire document,   376                     is to be generated, skipping any theming activities   377 --output-dir        Indicate an output directory to contain serialised document   378                     files   379 --output-encoding   Indicate the character encoding used in serialised document   380                     files   381 --theme             Indicate a theme for serialised documents, typically   382                     requiring an output directory to be useful   383 --tree              Produce a document tree representation on standard output   384                     instead of generating output files   385    386 Configuration options:   387    388 --macros            Perform macro evaluation/expansion before serialising   389                     documents   390 --mapping           Indicate a name and corresponding URL to be used to   391                     translate interwiki links   392 --root              Indicate the root page name to be used   393                     (default: FrontPage)   394 """   395    396 if __name__ == "__main__":   397     try:   398         main()   399     except errors.ProcessingError, exc:   400         print str(exc)   401    402 # vim: tabstop=4 expandtab shiftwidth=4