2.1 --- a/convert.py Sun Feb 24 23:42:06 2013 +0100
2.2 +++ b/convert.py Tue Feb 26 01:07:26 2013 +0100
2.3 @@ -21,10 +21,11 @@
2.4 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
2.5 """
2.6
2.7 -from os import listdir, mkdir, makedirs
2.8 -from os.path import exists, extsep, join, splitext
2.9 +from os import chdir, getcwd, listdir, mkdir, makedirs, walk
2.10 +from os.path import exists, extsep, join, split, splitext
2.11 from zipfile import ZipFile
2.12 from cStringIO import StringIO
2.13 +from MoinMoin import wikiutil
2.14 import codecs
2.15 import xmlread
2.16 import wikiparser, xmlparser
2.17 @@ -44,7 +45,24 @@
2.18
2.19 def handle_object(self, name, elements, attributes, all_text, text):
2.20
2.21 - "Handle objects according to type."
2.22 + """
2.23 + Handle objects according to type. Objects appear as follows:
2.24 +
2.25 + <object class="Page" package="...">
2.26 + <id name="id">...</id>
2.27 + ...
2.28 + </object>
2.29 +
2.30 + Within objects, one finds things like properties and collections, which
2.31 + are handled by their own methods but which are stored in the content
2.32 + dictionary associated with the current object.
2.33 +
2.34 + By the time this method is called, the contents of the object will have
2.35 + been gathered and the properties and collections populated in the
2.36 + content dictionary. Any identifier will have been assigned to the
2.37 + textual content of the object element and will be available in the
2.38 + 'text' parameter.
2.39 + """
2.40
2.41 objecttype = attributes[-1]["class"]
2.42
2.43 @@ -98,7 +116,7 @@
2.44 # See sort_manifest for access to this data.
2.45
2.46 append(join(pages_dir, pageid, "manifest"),
2.47 - "%s|AddRevision|%s|%s|%s|%s\n" % (
2.48 + "%s|AddRevision|_|%s|%s|%s|%s\n" % ( # blank added for consistency with AddAttachment
2.49 content["version"],
2.50 versionfile,
2.51 title, # comment titles will incorporate the comment's position
2.52 @@ -151,6 +169,29 @@
2.53 print >>sys.stderr, body
2.54 raise
2.55
2.56 + # Handle attachments.
2.57 +
2.58 + elif objecttype == "Attachment":
2.59 + pageid = content["content"]
2.60 + version = content["attachmentVersion"]
2.61 +
2.62 + if content.has_key("originalVersion"):
2.63 + attachid = content["originalVersion"]
2.64 + else:
2.65 + attachid = identifier
2.66 +
2.67 + append(join(pages_dir, pageid, "attachments"),
2.68 + "%s|AddAttachment|%s|%s|%s|%s|%s\n" % (
2.69 + version,
2.70 + # Have to "taint" archive filenames, although Moin will
2.71 + # probably handle package script filename tainting.
2.72 + wikiutil.taintfilename(join("attachments", pageid, attachid, version)),
2.73 + wikiutil.taintfilename(content["fileName"]),
2.74 + "", # pagename is substituted later
2.75 + content["lastModifierName"],
2.76 + content["comment"]
2.77 + ))
2.78 +
2.79 self.content = {}
2.80
2.81 def handle_property(self, name, elements, attributes, all_text, text):
2.82 @@ -270,6 +311,65 @@
2.83
2.84 write(join(pages_dir, commentid, "pagetitle"), "%s/%04d" % (title, position))
2.85
2.86 +def _sort_manifest(manifest, title):
2.87 +
2.88 + """
2.89 + Open the given 'manifest' and sort it according to revision so that it will
2.90 + be added to MoinMoin in the correct order.
2.91 +
2.92 + If a 'title' is provided, the title column in the manifest will be augmented
2.93 + with that information. This is typically done for comments and is necessary
2.94 + for attachments.
2.95 +
2.96 + A list of manifest entries is returned.
2.97 + """
2.98 +
2.99 + f = codecs.open(manifest, "r", encoding="utf-8")
2.100 + try:
2.101 + lines = [x.split("|") for x in f.readlines()]
2.102 + lines.sort(cmp=lambda x, y: cmp(int(x[0]), int(y[0])))
2.103 +
2.104 + # Reconstruct the lines, optionally changing the titles.
2.105 +
2.106 + result = []
2.107 +
2.108 + for line in lines:
2.109 + version, _action, _archive_filename, filename, old_title, username, comment = line
2.110 +
2.111 + # Replace title information with the information already present.
2.112 +
2.113 + if title is not None:
2.114 + new_title = title
2.115 + else:
2.116 + new_title = old_title
2.117 +
2.118 + # The version is omitted now that the manifest is ordered.
2.119 +
2.120 + line = _action, _archive_filename, filename, new_title, username, comment
2.121 + result.append(line)
2.122 +
2.123 + return result
2.124 +
2.125 + finally:
2.126 + f.close()
2.127 +
2.128 +def serialise_manifest(manifest):
2.129 +
2.130 + """
2.131 + Process the 'manifest' consisting of entries, removing superfluous columns.
2.132 + """
2.133 +
2.134 + result = []
2.135 +
2.136 + for columns in manifest:
2.137 + action = columns[0]
2.138 + if action == "AddRevision":
2.139 + columns = list(columns)
2.140 + del columns[1]
2.141 + result.append("|".join(columns))
2.142 +
2.143 + return "".join(result)
2.144 +
2.145 def sort_manifest(pages_dir, pageid, output=None, no_translate=False):
2.146
2.147 """
2.148 @@ -292,6 +392,7 @@
2.149 """
2.150
2.151 manifest = join(pages_dir, pageid, "manifest")
2.152 + attachments = join(pages_dir, pageid, "attachments")
2.153 pagetitle = join(pages_dir, pageid, "pagetitle")
2.154 children = join(pages_dir, pageid, "children")
2.155 comments = join(pages_dir, pageid, "comments")
2.156 @@ -301,51 +402,37 @@
2.157 else:
2.158 title = None
2.159
2.160 - f = codecs.open(manifest, "r", encoding="utf-8")
2.161 - try:
2.162 - lines = [x.split("|") for x in f.readlines()]
2.163 - lines.sort(cmp=lambda x, y: cmp(int(x[0]), int(y[0])))
2.164 + # Sort the revision manifest.
2.165 +
2.166 + result = _sort_manifest(manifest, title)
2.167
2.168 - # Reconstruct the lines, optionally changing the titles.
2.169 + for _action, _archive_filename, filename, new_title, username, comment in result:
2.170
2.171 - result = []
2.172 + # Add child page information to the content.
2.173
2.174 - for line in lines:
2.175 - version, _addrevision, filename, old_title, username, comment = line
2.176 -
2.177 - # Replace title information with the information already present.
2.178 + if exists(children) and not no_translate:
2.179 + child_pages = []
2.180 + child_page_names = [x for x in read(children).split("\n") if x]
2.181 + child_page_names.sort()
2.182
2.183 - if title is not None:
2.184 - new_title = title
2.185 - else:
2.186 - new_title = old_title
2.187 -
2.188 - # The version is omitted now that the manifest is ordered.
2.189 + for child_page_name in child_page_names:
2.190 + child_pages.append(" * [[%s]]" % child_page_name)
2.191
2.192 - line = _addrevision, filename, new_title, username, comment
2.193 - result.append("|".join(line))
2.194 -
2.195 - # Add child page information to the content.
2.196 + append(filename, child_page_section % "\n".join(child_pages))
2.197
2.198 - if exists(children) and not no_translate:
2.199 - child_pages = []
2.200 - child_page_names = [x for x in read(children).split("\n") if x]
2.201 - child_page_names.sort()
2.202 + # Add comments to the content.
2.203 +
2.204 + if exists(comments) and title and not no_translate:
2.205 + append(filename, comment_section % title)
2.206
2.207 - for child_page_name in child_page_names:
2.208 - child_pages.append(" * [[%s]]" % child_page_name)
2.209 -
2.210 - append(filename, child_page_section % "\n".join(child_pages))
2.211 -
2.212 - # Add comments to the content.
2.213 + # Add the attachments to the manifest.
2.214
2.215 - if exists(comments) and title and not no_translate:
2.216 - append(filename, comment_section % title)
2.217 + if exists(attachments):
2.218 + result += _sort_manifest(attachments, title)
2.219
2.220 - finally:
2.221 - f.close()
2.222 + # Serialise the manifest.
2.223
2.224 - s = "".join(result)
2.225 + s = serialise_manifest(result)
2.226
2.227 if output is None:
2.228 write(manifest, s)
2.229 @@ -375,8 +462,13 @@
2.230 filename = sys.argv[1]
2.231 is_zipfile = splitext(filename)[-1] == extsep + "zip"
2.232 space = sys.argv[2]
2.233 + if len(sys.argv) > 3:
2.234 + attachments = sys.argv[3]
2.235 + else:
2.236 + attachments = None
2.237 except IndexError:
2.238 - print >>sys.stderr, "Please specify an XML file containing Wiki data and a workspace name."
2.239 + print >>sys.stderr, "Please specify an XML file containing Wiki data, a workspace name,"
2.240 + print >>sys.stderr, "and an optional attachments directory location."
2.241 print >>sys.stderr, "For example: com_entities.xml COM"
2.242 sys.exit(1)
2.243
2.244 @@ -421,40 +513,59 @@
2.245
2.246 try:
2.247 p.parse(ff)
2.248 +
2.249 + # Tidy up the import manifests, sorting each of them by revision and
2.250 + # finalising them.
2.251 +
2.252 + pages_dir = join(space, "pages")
2.253 +
2.254 + for pageid in listdir(pages_dir):
2.255 + sort_comments(pages_dir, pageid)
2.256 +
2.257 + output_manifest = join(space, "MOIN_PACKAGE")
2.258 + append(output_manifest, "MoinMoinPackage|1\n")
2.259 +
2.260 + for pageid in listdir(pages_dir):
2.261 + sort_manifest(pages_dir, pageid, output_manifest, no_translate)
2.262 +
2.263 + # Write the page package.
2.264 +
2.265 + page_package = ZipFile(package_zip, "w")
2.266 +
2.267 + try:
2.268 + # Include the page revisions.
2.269 +
2.270 + versions_dir = join(space, "versions")
2.271 +
2.272 + for versionid in listdir(versions_dir):
2.273 + page_package.write(join(versions_dir, versionid))
2.274 +
2.275 + # Include the attachments.
2.276 +
2.277 + if attachments:
2.278 + cwd = getcwd()
2.279 + chdir(split(attachments)[0])
2.280 + try:
2.281 + for path, dirnames, filenames in walk(split(attachments)[1]):
2.282 + for filename in filenames:
2.283 + # Have to "taint" archive filenames.
2.284 + page_package.write(join(path, filename), wikiutil.taintfilename(join(path, filename)))
2.285 + finally:
2.286 + chdir(cwd)
2.287 + elif is_zipfile:
2.288 + for filename in zf.namelist():
2.289 + if filename.startswith("attachments"):
2.290 + # Have to "taint" archive filenames.
2.291 + page_package.writestr(wikiutil.taintfilename(filename), zf.read(filename))
2.292 +
2.293 + # Include only the top-level manifest.
2.294 +
2.295 + page_package.write(output_manifest, "MOIN_PACKAGE")
2.296 +
2.297 + finally:
2.298 + page_package.close()
2.299 +
2.300 finally:
2.301 f.close()
2.302
2.303 - # Tidy up the import manifests, sorting each of them by revision and
2.304 - # finalising them.
2.305 -
2.306 - pages_dir = join(space, "pages")
2.307 -
2.308 - for pageid in listdir(pages_dir):
2.309 - sort_comments(pages_dir, pageid)
2.310 -
2.311 - output_manifest = join(space, "MOIN_PACKAGE")
2.312 - append(output_manifest, "MoinMoinPackage|1\n")
2.313 -
2.314 - for pageid in listdir(pages_dir):
2.315 - sort_manifest(pages_dir, pageid, output_manifest, no_translate)
2.316 -
2.317 - # Write the page package.
2.318 -
2.319 - page_package = ZipFile(package_zip, "w")
2.320 -
2.321 - try:
2.322 - # Include the page revisions.
2.323 -
2.324 - versions_dir = join(space, "versions")
2.325 -
2.326 - for versionid in listdir(versions_dir):
2.327 - page_package.write(join(versions_dir, versionid))
2.328 -
2.329 - # Include only the top-level manifest.
2.330 -
2.331 - page_package.write(output_manifest, "MOIN_PACKAGE")
2.332 -
2.333 - finally:
2.334 - page_package.close()
2.335 -
2.336 # vim: tabstop=4 expandtab shiftwidth=4