2.1 --- a/convert.py Sat Nov 02 01:19:46 2013 +0100
2.2 +++ b/convert.py Sat Nov 02 01:24:31 2013 +0100
2.3 @@ -30,9 +30,13 @@
2.4 import xmlread
2.5 import wikiparser, xmlparser
2.6 import sys
2.7 +import time, calendar
2.8
2.9 from common import get_page_title
2.10
2.11 +def date_to_seconds(s):
2.12 + return calendar.timegm(time.strptime(s.split(".", 1)[0], "%Y-%m-%d %H:%M:%S"))
2.13 +
2.14 class ConfluenceHandler:
2.15
2.16 "Handle content from a Confluence Wiki dump."
2.17 @@ -122,12 +126,13 @@
2.18 # See sort_manifest for access to this data.
2.19
2.20 append(join(pages_dir, pageid, "manifest"),
2.21 - "%s|AddRevision|_|%s|%s|%s|%s\n" % ( # blank added for consistency with AddAttachment
2.22 + "%s|AddRevision|_|%s|%s|%s|%s|%d\n" % ( # blank added for consistency with AddAttachment
2.23 content["version"],
2.24 versionfile,
2.25 title, # comment titles will incorporate the comment's position
2.26 content["lastModifierName"],
2.27 - content["versionComment"]
2.28 + content["versionComment"],
2.29 + date_to_seconds(content["lastModificationDate"])
2.30 ))
2.31
2.32 # Add information to parent pages for child page lists.
2.33 @@ -187,7 +192,7 @@
2.34 attachid = identifier
2.35
2.36 append(join(pages_dir, pageid, "attachments"),
2.37 - "%s|AddAttachment|%s|%s|%s|%s|%s\n" % (
2.38 + "%s|AddAttachment|%s|%s|%s|%s|%s|%d\n" % (
2.39 version,
2.40 # Have to "taint" archive filenames, although Moin will
2.41 # probably handle package script filename tainting.
2.42 @@ -195,7 +200,8 @@
2.43 wikiutil.taintfilename(content["fileName"]),
2.44 "", # pagename is substituted later
2.45 content["lastModifierName"],
2.46 - content["comment"]
2.47 + content["comment"],
2.48 + date_to_seconds(content["lastModificationDate"])
2.49 ))
2.50
2.51 self.content = {}
2.52 @@ -331,7 +337,7 @@
2.53
2.54 lines = [x.split("|") for x in f.readlines()]
2.55 for line in lines:
2.56 - version, _action, _archive_filename, filename, title, username, comment = line
2.57 + version, _action, _archive_filename, filename, title, username, comment, mtime = line
2.58 if title:
2.59 mapping.append((split(filename)[-1], title))
2.60
2.61 @@ -363,7 +369,7 @@
2.62 result = []
2.63
2.64 for line in lines:
2.65 - version, _action, _archive_filename, filename, old_title, username, comment = line
2.66 + version, _action, _archive_filename, filename, old_title, username, comment, mtime = line
2.67
2.68 # Replace title information with the information already present.
2.69
2.70 @@ -374,7 +380,7 @@
2.71
2.72 # The version is omitted now that the manifest is ordered.
2.73
2.74 - line = _action, _archive_filename, filename, new_title, username, comment
2.75 + line = _action, _archive_filename, filename, new_title, username, comment, mtime
2.76 result.append(line)
2.77
2.78 return result
2.79 @@ -399,14 +405,14 @@
2.80
2.81 return "".join(result)
2.82
2.83 -def sort_manifest(pages_dir, pageid, output=None, output_mapping=None, no_translate=False):
2.84 +def sort_manifest(pages_dir, pageid, output_mapping=None, no_translate=False):
2.85
2.86 """
2.87 Using the given 'pageid', locate the manifest for the page and any page
2.88 title information written to a "pagetitle" file.
2.89
2.90 - Then sort the manifest according to revision so that it will be added to
2.91 - MoinMoin in the correct order.
2.92 + Then sort the manifest according to revision so that historical operations
2.93 + such as page renaming can be detected.
2.94
2.95 If a "pagetitle" file exists, the title column in the manifest will be
2.96 augmented with the contents of that file. This is typically done for
2.97 @@ -415,10 +421,6 @@
2.98 If a "children" file exists, the pages in that file will be added as a list
2.99 to the end of each revision's content.
2.100
2.101 - If 'output' is given, the manifest details will be appended to the file
2.102 - having that filename instead of being rewritten to the original manifest
2.103 - file.
2.104 -
2.105 If 'output_mapping' is given, a mapping from version identifiers to page
2.106 titles will be appended to the file having that filename.
2.107 """
2.108 @@ -453,13 +455,13 @@
2.109 final_result = []
2.110
2.111 for details in result:
2.112 - _action, _archive_filename, filename, new_title, username, comment = details
2.113 + _action, _archive_filename, filename, new_title, username, comment, mtime = details
2.114
2.115 # Detect renamed pages and add a redirect revision.
2.116
2.117 if last_title and last_title != new_title and _action == "AddRevision":
2.118 renaming_versionfile = filename + ".rename"
2.119 - final_result.append((_action, "_", renaming_versionfile, last_title, username, "Page renamed to %s" % new_title))
2.120 + final_result.append((_action, "_", renaming_versionfile, last_title, username, "Page renamed to %s" % new_title, mtime))
2.121 write(renaming_versionfile, "#REDIRECT %s" % new_title)
2.122
2.123 last_title = new_title
2.124 @@ -513,14 +515,23 @@
2.125 if exists(attachments):
2.126 final_result += _sort_manifest(attachments, title)
2.127
2.128 + return final_result
2.129 +
2.130 +def sort_final_manifest(entries, output):
2.131 +
2.132 + """
2.133 + Sort the manifest 'entries' by last modified time and serialise it.
2.134 + The manifest details will be appended to the file named by 'output'.
2.135 + """
2.136 +
2.137 + # The final entry in each element is the mtime.
2.138 +
2.139 + entries.sort(cmp=lambda x, y: cmp(int(x[-1]), int(y[-1])))
2.140 +
2.141 # Serialise the manifest.
2.142
2.143 - s = serialise_manifest(final_result)
2.144 -
2.145 - if output is None:
2.146 - write(manifest, s)
2.147 - else:
2.148 - append(output, s)
2.149 + s = serialise_manifest(entries)
2.150 + append(output, s)
2.151
2.152 def get_space_and_name(page_name):
2.153 try:
2.154 @@ -631,8 +642,12 @@
2.155 output_manifest = join(space, "MOIN_PACKAGE")
2.156 append(output_manifest, "MoinMoinPackage|1\n")
2.157
2.158 + entries = []
2.159 +
2.160 for pageid in listdir(pages_dir):
2.161 - sort_manifest(pages_dir, pageid, output_manifest, output_mapping, no_translate)
2.162 + entries += sort_manifest(pages_dir, pageid, output_mapping, no_translate)
2.163 +
2.164 + sort_final_manifest(entries, output_manifest)
2.165
2.166 # Write the page package.
2.167
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
3.2 +++ b/merge.py Sat Nov 02 01:24:31 2013 +0100
3.3 @@ -0,0 +1,155 @@
3.4 +#!/usr/bin/env python
3.5 +
3.6 +"""
3.7 +Merge page packages.
3.8 +
3.9 +Copyright (C) 2013 Paul Boddie <paul@boddie.org.uk>
3.10 +
3.11 +This software is free software; you can redistribute it and/or
3.12 +modify it under the terms of the GNU General Public License as
3.13 +published by the Free Software Foundation; either version 2 of
3.14 +the License, or (at your option) any later version.
3.15 +
3.16 +This software is distributed in the hope that it will be useful,
3.17 +but WITHOUT ANY WARRANTY; without even the implied warranty of
3.18 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3.19 +GNU General Public License for more details.
3.20 +
3.21 +You should have received a copy of the GNU General Public
3.22 +License along with this library; see the file LICENCE.txt
3.23 +If not, write to the Free Software Foundation, Inc.,
3.24 +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
3.25 +"""
3.26 +
3.27 +from os import chdir, getcwd, makedirs, mkdir, walk
3.28 +from os.path import exists, extsep, join, normpath, relpath, split, splitext
3.29 +from shutil import copy
3.30 +from zipfile import ZipFile
3.31 +from cStringIO import StringIO
3.32 +import sys
3.33 +
3.34 +def get_filenames(package):
3.35 + results = []
3.36 + for path, dirnames, filenames in walk(package):
3.37 + path = relpath(path, package)
3.38 + for filename in filenames:
3.39 + results.append(join(path, filename))
3.40 + return results
3.41 +
3.42 +# Main program.
3.43 +
3.44 +if __name__ == "__main__":
3.45 + packages = sys.argv[2:]
3.46 +
3.47 + if not packages:
3.48 + print >>sys.stderr, """
3.49 +Please specify an output basename followed by a list of page packages.
3.50 +For example:
3.51 +
3.52 +%(progname)s OUT COM.zip DEV.zip DOC.zip SEC.zip
3.53 +
3.54 +As a result of running this program, a page package will be created at the
3.55 +specified basename along with an archive of the form OUT.zip given a basename of
3.56 +OUT.
3.57 +""" % {"progname" : split(sys.argv[0])[-1]}
3.58 +
3.59 + sys.exit(1)
3.60 +
3.61 + outdir = sys.argv[1]
3.62 + outleafname = split(outdir)[-1]
3.63 +
3.64 + if exists(outdir):
3.65 + print >>sys.stderr, "Directory %s exists already. Please choose another or remove its contents." % outdir
3.66 + sys.exit(1)
3.67 +
3.68 + package_zip = outdir + extsep + "zip"
3.69 +
3.70 + if exists(package_zip):
3.71 + print >>sys.stderr, "Page package exists. Please remove or rename it:", package_zip
3.72 + sys.exit(1)
3.73 +
3.74 + # Make the output directory for the merged packages.
3.75 +
3.76 + mkdir(outdir)
3.77 +
3.78 + # Collect entries from all packages for sorting.
3.79 +
3.80 + entries = []
3.81 +
3.82 + for package in packages:
3.83 + is_zipfile = splitext(package)[-1] == extsep + "zip"
3.84 + leafname = split(package)[-1]
3.85 +
3.86 + if is_zipfile:
3.87 + f = open(package, "rb")
3.88 + zf = ZipFile(f)
3.89 + ff = StringIO(zf.read("MOIN_PACKAGE"))
3.90 + else:
3.91 + ff = open(join(package, "MOIN_PACKAGE"))
3.92 +
3.93 + try:
3.94 + # Skip the first line and get the manifest entries.
3.95 + # NOTE: We could use the MoinMoin.package API here.
3.96 +
3.97 + ff.readline()
3.98 + entries += [x.rstrip("\n").split("|") for x in ff.readlines()]
3.99 +
3.100 + # Copy files from the package into the output directory.
3.101 +
3.102 + if is_zipfile:
3.103 + filenames = zf.namelist()
3.104 + else:
3.105 + filenames = get_filenames(package)
3.106 +
3.107 + for filename in filenames:
3.108 + if split(filename)[-1] == "MOIN_PACKAGE":
3.109 + continue
3.110 +
3.111 + # Extract files, tidying up any filesystem pathnames.
3.112 +
3.113 + if is_zipfile:
3.114 + zf.extract(filename, outdir)
3.115 + else:
3.116 + target = normpath(join(outdir, leafname, filename))
3.117 + target_dir = split(target)[0]
3.118 + if not exists(target_dir):
3.119 + makedirs(target_dir)
3.120 + copy(join(package, filename), target)
3.121 +
3.122 + finally:
3.123 + ff.close()
3.124 + if is_zipfile:
3.125 + zf.close()
3.126 +
3.127 + # The final entry in each element is the mtime.
3.128 +
3.129 + entries.sort(cmp=lambda x, y: cmp(int(x[-1]), int(y[-1])))
3.130 +
3.131 + # Write the combined manifest to the output directory.
3.132 +
3.133 + output_manifest = join(outdir, "MOIN_PACKAGE")
3.134 +
3.135 + f = open(output_manifest, "w")
3.136 + write = f.write
3.137 + try:
3.138 + write("MoinMoinPackage|1\n")
3.139 + for entry in entries:
3.140 +
3.141 + # Reference the adjusted location of each file.
3.142 +
3.143 + entry[1] = join(outleafname, entry[1])
3.144 + write("|".join(entry) + "\n")
3.145 + finally:
3.146 + f.close()
3.147 +
3.148 + # Write the page package.
3.149 +
3.150 + page_package = ZipFile(package_zip, "w")
3.151 +
3.152 + try:
3.153 + for filename in get_filenames(outdir):
3.154 + page_package.write(join(outleafname, filename))
3.155 + finally:
3.156 + page_package.close()
3.157 +
3.158 +# vim: tabstop=4 expandtab shiftwidth=4