1.1 --- a/convert.py Sun Apr 22 02:19:08 2012 +0200
1.2 +++ b/convert.py Sun Apr 22 19:16:47 2012 +0200
1.3 @@ -33,10 +33,10 @@
1.4
1.5 "Handle content from a Confluence Wiki dump."
1.6
1.7 - def __init__(self, directory):
1.8 + def __init__(self, space):
1.9 self.content = {}
1.10 self.elements = []
1.11 - self.directory = directory
1.12 + self.space = space
1.13
1.14 def handle_object(self, name, elements, attributes, all_text, text):
1.15
1.16 @@ -46,8 +46,8 @@
1.17 identifier = text.strip()
1.18 content = self.content
1.19
1.20 - pages_dir = join(self.directory, "pages")
1.21 - versions_dir = join(self.directory, "versions")
1.22 + pages_dir = join(self.space, "pages")
1.23 + versions_dir = join(self.space, "versions")
1.24
1.25 # Handle particular types.
1.26
1.27 @@ -73,10 +73,14 @@
1.28
1.29 mkdirs(join(pages_dir, pageid))
1.30
1.31 + title = content["title"]
1.32 + if title:
1.33 + title = "%s/%s" % (self.space, title)
1.34 +
1.35 append(join(pages_dir, pageid, "manifest"), "%s|AddRevision|%s|%s|%s|%s\n" % (
1.36 content["version"],
1.37 versionfile,
1.38 - content["title"] or content["version"], # comment titles will incorporate the version
1.39 + title or content["version"], # comment titles will incorporate the version
1.40 content["lastModifierName"],
1.41 content["versionComment"]))
1.42
1.43 @@ -89,7 +93,7 @@
1.44
1.45 for _comment, commentid in content["comments"]:
1.46 mkdirs(join(pages_dir, commentid))
1.47 - append(join(pages_dir, commentid, "pagetitle"), content["title"])
1.48 + append(join(pages_dir, commentid, "pagetitle"), title)
1.49
1.50 # Some metadata is not particularly relevant. For example,
1.51 # ancestors, children, parent are navigation-related.
1.52 @@ -100,7 +104,10 @@
1.53 # Handle revisions.
1.54
1.55 elif objecttype == "BodyContent":
1.56 - translate(join(versions_dir, content["content"]), content["body"])
1.57 + body = content["body"]
1.58 + if not body:
1.59 + body = "## Empty page."
1.60 + translate(join(versions_dir, content["content"]), body)
1.61
1.62 self.content = {}
1.63
1.64 @@ -226,18 +233,24 @@
1.65 filename = sys.argv[1]
1.66 is_zipfile = splitext(filename)[-1] == extsep + "zip"
1.67
1.68 - directory = sys.argv[2]
1.69 + space = sys.argv[2]
1.70
1.71 - if exists(directory):
1.72 - print >>sys.stderr, "Directory exists. Please choose another or remove its contents."
1.73 + if exists(space):
1.74 + print >>sys.stderr, "Directory exists for space %s. Please choose another or remove its contents." % space
1.75 sys.exit(1)
1.76
1.77 - mkdir(directory)
1.78 - mkdirs(join(directory, "pages"))
1.79 - mkdirs(join(directory, "versions"))
1.80 + package_zip = space + extsep + "zip"
1.81 +
1.82 + if exists(package_zip):
1.83 + print >>sys.stderr, "Page package exists. Please remove or rename it:", package_zip
1.84 + sys.exit(1)
1.85 +
1.86 + mkdir(space)
1.87 + mkdirs(join(space, "pages"))
1.88 + mkdirs(join(space, "versions"))
1.89
1.90 p = xmlread.ConfigurableParser()
1.91 - handler = ConfluenceHandler(directory)
1.92 + handler = ConfluenceHandler(space)
1.93
1.94 p["object"] = handler.handle_object
1.95 p["property"] = handler.handle_property
1.96 @@ -265,9 +278,9 @@
1.97 # Tidy up the import manifests, sorting each of them by revision and
1.98 # finalising them.
1.99
1.100 - pages_dir = join(directory, "pages")
1.101 + pages_dir = join(space, "pages")
1.102
1.103 - output_manifest = join(directory, "MOIN_PACKAGE")
1.104 + output_manifest = join(space, "MOIN_PACKAGE")
1.105 append(output_manifest, "MoinMoinPackage|1\n")
1.106
1.107 for pageid in listdir(pages_dir):
1.108 @@ -277,12 +290,12 @@
1.109
1.110 # Write the page package.
1.111
1.112 - page_package = ZipFile(directory + extsep + "zip", "w")
1.113 + page_package = ZipFile(package_zip, "w")
1.114
1.115 try:
1.116 # Include the page revisions.
1.117
1.118 - versions_dir = join(directory, "versions")
1.119 + versions_dir = join(space, "versions")
1.120
1.121 for versionid in listdir(versions_dir):
1.122 page_package.write(join(versions_dir, versionid))