# HG changeset patch # User Paul Boddie # Date 1383351871 -3600 # Node ID c3d772d8cbad526c865a04e8a2198fcc6dc722aa # Parent b8232447118ae973533bee57c595761962d63ce4 Added revision and attachment timestamping, sorting edits by such time details. Added a merge script to combine page packages for a single coherent import. diff -r b8232447118a -r c3d772d8cbad README.txt --- a/README.txt Sat Nov 02 01:19:46 2013 +0100 +++ b/README.txt Sat Nov 02 01:24:31 2013 +0100 @@ -38,9 +38,10 @@ MoinMoin Prerequisites ---------------------- -The page package installer does not preserve user information when installing -page revisions. This can be modified by applying a patch to MoinMoin as -follows while at the top level of the MoinMoin source distribution: +The page package installer does not preserve user information or the last +modified time when installing page revisions. This can be modified by applying +a patch to MoinMoin as follows while at the top level of the MoinMoin source +distribution: patch -p1 $CCDIR/patches/patch-moin-1.9-MoinMoin-packages.diff @@ -120,6 +121,19 @@ This requires a suitable moinsetup.cfg file in the working directory. +Importing Many Workspaces +------------------------- + +Where more than one namespace is to be imported, the page packages should be +merged so that the resulting history information is ordered correctly. + +To merge packages, use a command of the following form: + +python merge.py OUT COM.zip DEV.zip DOC.zip SEC.zip + +A directory called OUT and a page package called OUT.zip will be produced. The +latter can then be imported into MoinMoin as described above. + Mappings from Identifiers to Pages ---------------------------------- diff -r b8232447118a -r c3d772d8cbad convert.py --- a/convert.py Sat Nov 02 01:19:46 2013 +0100 +++ b/convert.py Sat Nov 02 01:24:31 2013 +0100 @@ -30,9 +30,13 @@ import xmlread import wikiparser, xmlparser import sys +import time, calendar from common import get_page_title +def date_to_seconds(s): + return calendar.timegm(time.strptime(s.split(".", 1)[0], "%Y-%m-%d %H:%M:%S")) + class ConfluenceHandler: "Handle content from a Confluence Wiki dump." @@ -122,12 +126,13 @@ # See sort_manifest for access to this data. append(join(pages_dir, pageid, "manifest"), - "%s|AddRevision|_|%s|%s|%s|%s\n" % ( # blank added for consistency with AddAttachment + "%s|AddRevision|_|%s|%s|%s|%s|%d\n" % ( # blank added for consistency with AddAttachment content["version"], versionfile, title, # comment titles will incorporate the comment's position content["lastModifierName"], - content["versionComment"] + content["versionComment"], + date_to_seconds(content["lastModificationDate"]) )) # Add information to parent pages for child page lists. @@ -187,7 +192,7 @@ attachid = identifier append(join(pages_dir, pageid, "attachments"), - "%s|AddAttachment|%s|%s|%s|%s|%s\n" % ( + "%s|AddAttachment|%s|%s|%s|%s|%s|%d\n" % ( version, # Have to "taint" archive filenames, although Moin will # probably handle package script filename tainting. @@ -195,7 +200,8 @@ wikiutil.taintfilename(content["fileName"]), "", # pagename is substituted later content["lastModifierName"], - content["comment"] + content["comment"], + date_to_seconds(content["lastModificationDate"]) )) self.content = {} @@ -331,7 +337,7 @@ lines = [x.split("|") for x in f.readlines()] for line in lines: - version, _action, _archive_filename, filename, title, username, comment = line + version, _action, _archive_filename, filename, title, username, comment, mtime = line if title: mapping.append((split(filename)[-1], title)) @@ -363,7 +369,7 @@ result = [] for line in lines: - version, _action, _archive_filename, filename, old_title, username, comment = line + version, _action, _archive_filename, filename, old_title, username, comment, mtime = line # Replace title information with the information already present. @@ -374,7 +380,7 @@ # The version is omitted now that the manifest is ordered. - line = _action, _archive_filename, filename, new_title, username, comment + line = _action, _archive_filename, filename, new_title, username, comment, mtime result.append(line) return result @@ -399,14 +405,14 @@ return "".join(result) -def sort_manifest(pages_dir, pageid, output=None, output_mapping=None, no_translate=False): +def sort_manifest(pages_dir, pageid, output_mapping=None, no_translate=False): """ Using the given 'pageid', locate the manifest for the page and any page title information written to a "pagetitle" file. - Then sort the manifest according to revision so that it will be added to - MoinMoin in the correct order. + Then sort the manifest according to revision so that historical operations + such as page renaming can be detected. If a "pagetitle" file exists, the title column in the manifest will be augmented with the contents of that file. This is typically done for @@ -415,10 +421,6 @@ If a "children" file exists, the pages in that file will be added as a list to the end of each revision's content. - If 'output' is given, the manifest details will be appended to the file - having that filename instead of being rewritten to the original manifest - file. - If 'output_mapping' is given, a mapping from version identifiers to page titles will be appended to the file having that filename. """ @@ -453,13 +455,13 @@ final_result = [] for details in result: - _action, _archive_filename, filename, new_title, username, comment = details + _action, _archive_filename, filename, new_title, username, comment, mtime = details # Detect renamed pages and add a redirect revision. if last_title and last_title != new_title and _action == "AddRevision": renaming_versionfile = filename + ".rename" - final_result.append((_action, "_", renaming_versionfile, last_title, username, "Page renamed to %s" % new_title)) + final_result.append((_action, "_", renaming_versionfile, last_title, username, "Page renamed to %s" % new_title, mtime)) write(renaming_versionfile, "#REDIRECT %s" % new_title) last_title = new_title @@ -513,14 +515,23 @@ if exists(attachments): final_result += _sort_manifest(attachments, title) + return final_result + +def sort_final_manifest(entries, output): + + """ + Sort the manifest 'entries' by last modified time and serialise it. + The manifest details will be appended to the file named by 'output'. + """ + + # The final entry in each element is the mtime. + + entries.sort(cmp=lambda x, y: cmp(int(x[-1]), int(y[-1]))) + # Serialise the manifest. - s = serialise_manifest(final_result) - - if output is None: - write(manifest, s) - else: - append(output, s) + s = serialise_manifest(entries) + append(output, s) def get_space_and_name(page_name): try: @@ -631,8 +642,12 @@ output_manifest = join(space, "MOIN_PACKAGE") append(output_manifest, "MoinMoinPackage|1\n") + entries = [] + for pageid in listdir(pages_dir): - sort_manifest(pages_dir, pageid, output_manifest, output_mapping, no_translate) + entries += sort_manifest(pages_dir, pageid, output_mapping, no_translate) + + sort_final_manifest(entries, output_manifest) # Write the page package. diff -r b8232447118a -r c3d772d8cbad merge.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge.py Sat Nov 02 01:24:31 2013 +0100 @@ -0,0 +1,155 @@ +#!/usr/bin/env python + +""" +Merge page packages. + +Copyright (C) 2013 Paul Boddie + +This software is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of +the License, or (at your option) any later version. + +This software is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public +License along with this library; see the file LICENCE.txt +If not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +""" + +from os import chdir, getcwd, makedirs, mkdir, walk +from os.path import exists, extsep, join, normpath, relpath, split, splitext +from shutil import copy +from zipfile import ZipFile +from cStringIO import StringIO +import sys + +def get_filenames(package): + results = [] + for path, dirnames, filenames in walk(package): + path = relpath(path, package) + for filename in filenames: + results.append(join(path, filename)) + return results + +# Main program. + +if __name__ == "__main__": + packages = sys.argv[2:] + + if not packages: + print >>sys.stderr, """ +Please specify an output basename followed by a list of page packages. +For example: + +%(progname)s OUT COM.zip DEV.zip DOC.zip SEC.zip + +As a result of running this program, a page package will be created at the +specified basename along with an archive of the form OUT.zip given a basename of +OUT. +""" % {"progname" : split(sys.argv[0])[-1]} + + sys.exit(1) + + outdir = sys.argv[1] + outleafname = split(outdir)[-1] + + if exists(outdir): + print >>sys.stderr, "Directory %s exists already. Please choose another or remove its contents." % outdir + sys.exit(1) + + package_zip = outdir + extsep + "zip" + + if exists(package_zip): + print >>sys.stderr, "Page package exists. Please remove or rename it:", package_zip + sys.exit(1) + + # Make the output directory for the merged packages. + + mkdir(outdir) + + # Collect entries from all packages for sorting. + + entries = [] + + for package in packages: + is_zipfile = splitext(package)[-1] == extsep + "zip" + leafname = split(package)[-1] + + if is_zipfile: + f = open(package, "rb") + zf = ZipFile(f) + ff = StringIO(zf.read("MOIN_PACKAGE")) + else: + ff = open(join(package, "MOIN_PACKAGE")) + + try: + # Skip the first line and get the manifest entries. + # NOTE: We could use the MoinMoin.package API here. + + ff.readline() + entries += [x.rstrip("\n").split("|") for x in ff.readlines()] + + # Copy files from the package into the output directory. + + if is_zipfile: + filenames = zf.namelist() + else: + filenames = get_filenames(package) + + for filename in filenames: + if split(filename)[-1] == "MOIN_PACKAGE": + continue + + # Extract files, tidying up any filesystem pathnames. + + if is_zipfile: + zf.extract(filename, outdir) + else: + target = normpath(join(outdir, leafname, filename)) + target_dir = split(target)[0] + if not exists(target_dir): + makedirs(target_dir) + copy(join(package, filename), target) + + finally: + ff.close() + if is_zipfile: + zf.close() + + # The final entry in each element is the mtime. + + entries.sort(cmp=lambda x, y: cmp(int(x[-1]), int(y[-1]))) + + # Write the combined manifest to the output directory. + + output_manifest = join(outdir, "MOIN_PACKAGE") + + f = open(output_manifest, "w") + write = f.write + try: + write("MoinMoinPackage|1\n") + for entry in entries: + + # Reference the adjusted location of each file. + + entry[1] = join(outleafname, entry[1]) + write("|".join(entry) + "\n") + finally: + f.close() + + # Write the page package. + + page_package = ZipFile(package_zip, "w") + + try: + for filename in get_filenames(outdir): + page_package.write(join(outleafname, filename)) + finally: + page_package.close() + +# vim: tabstop=4 expandtab shiftwidth=4