1.1 --- a/convert.py Sat Jun 15 20:54:00 2013 +0200
1.2 +++ b/convert.py Sat Jun 29 23:38:40 2013 +0200
1.3 @@ -314,6 +314,28 @@
1.4
1.5 write(join(pages_dir, commentid, "pagetitle"), "%s/%04d" % (title, position))
1.6
1.7 +def _manifest_to_mapping(manifest, output_mapping):
1.8 +
1.9 + """
1.10 + Open the given 'manifest' and write a mapping from version identifiers to
1.11 + page names/titles to the file with the given 'output_mapping' filename.
1.12 + """
1.13 +
1.14 + f = codecs.open(manifest, "r", encoding="utf-8")
1.15 + try:
1.16 + mapping = []
1.17 +
1.18 + lines = [x.split("|") for x in f.readlines()]
1.19 + for line in lines:
1.20 + version, _action, _archive_filename, filename, title, username, comment = line
1.21 + if title:
1.22 + mapping.append((split(filename)[-1], title))
1.23 +
1.24 + append(output_mapping, "".join([("\t".join(x) + "\n") for x in mapping]))
1.25 +
1.26 + finally:
1.27 + f.close()
1.28 +
1.29 def _sort_manifest(manifest, title):
1.30
1.31 """
1.32 @@ -373,7 +395,7 @@
1.33
1.34 return "".join(result)
1.35
1.36 -def sort_manifest(pages_dir, pageid, output=None, no_translate=False):
1.37 +def sort_manifest(pages_dir, pageid, output=None, output_mapping=None, no_translate=False):
1.38
1.39 """
1.40 Using the given 'pageid', locate the manifest for the page and any page
1.41 @@ -392,6 +414,9 @@
1.42 If 'output' is given, the manifest details will be appended to the file
1.43 having that filename instead of being rewritten to the original manifest
1.44 file.
1.45 +
1.46 + If 'output_mapping' is given, a mapping from version identifiers to page
1.47 + titles will be appended to the file having that filename.
1.48 """
1.49
1.50 manifest = join(pages_dir, pageid, "manifest")
1.51 @@ -410,6 +435,13 @@
1.52
1.53 result = _sort_manifest(manifest, title)
1.54
1.55 + # Output a mapping of identifiers to page names.
1.56 +
1.57 + if output_mapping:
1.58 + _manifest_to_mapping(manifest, output_mapping)
1.59 +
1.60 + # Modify the content to include child pages and comments.
1.61 +
1.62 for _action, _archive_filename, filename, new_title, username, comment in result:
1.63
1.64 # Add child page information to the content.
1.65 @@ -549,11 +581,13 @@
1.66 for pageid in listdir(pages_dir):
1.67 sort_comments(pages_dir, pageid)
1.68
1.69 + output_mapping = join(space, "MAPPING")
1.70 +
1.71 output_manifest = join(space, "MOIN_PACKAGE")
1.72 append(output_manifest, "MoinMoinPackage|1\n")
1.73
1.74 for pageid in listdir(pages_dir):
1.75 - sort_manifest(pages_dir, pageid, output_manifest, no_translate)
1.76 + sort_manifest(pages_dir, pageid, output_manifest, output_mapping, no_translate)
1.77
1.78 # Write the page package.
1.79
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/mappings.sh Sat Jun 29 23:38:40 2013 +0200
2.3 @@ -0,0 +1,15 @@
2.4 +#!/bin/sh
2.5 +
2.6 +MAPPINGS=$*
2.7 +ID_MAPPING="mapping-id-to-page.txt"
2.8 +TINY_ID_MAPPING="mapping-tiny-to-id.txt"
2.9 +TINY_MAPPING="mapping-tiny-to-page.txt"
2.10 +TAB=`printf '\t'`
2.11 +
2.12 +# Combine the space mappings into a common mapping from page identifiers to
2.13 +# page names.
2.14 +sort -n -u $MAPPINGS > "$ID_MAPPING"
2.15 +
2.16 +# Produce a common mapping from tiny URL identifiers to page names.
2.17 +cut -f 1 "$ID_MAPPING" | uniq | python tiny.py - > "$TINY_ID_MAPPING"
2.18 +join -t "$TAB" -1 2 -2 1 "$TINY_ID_MAPPING" "$ID_MAPPING" | cut -f 2,3 | LC_ALL=C sort > "$TINY_MAPPING"
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
3.2 +++ b/tiny.py Sat Jun 29 23:38:40 2013 +0200
3.3 @@ -0,0 +1,28 @@
3.4 +#!/usr/bin/env python
3.5 +
3.6 +# See: https://answers.atlassian.com/questions/87971/what-is-the-algorithm-used-to-create-the-tiny-links
3.7 +
3.8 +from base64 import b64decode, b64encode
3.9 +from struct import pack, unpack
3.10 +import sys
3.11 +
3.12 +def tiny_url(s):
3.13 + return b64encode(pack("<I", int(s)).rstrip("\x00")).rstrip("=").replace("+", "_").replace("/", "-")
3.14 +
3.15 +def identifier(s):
3.16 + bytes = b64decode(s.replace("-", "/").replace("_", "+") + "=" * (6 - len(s)))
3.17 + return unpack("<I", bytes + "\x00" * (4 - len(bytes)))[0]
3.18 +
3.19 +arg = sys.argv[1]
3.20 +reverse = len(sys.argv) > 2 and sys.argv[2] in ("-r", "--reverse")
3.21 +fn = reverse and identifier or tiny_url
3.22 +
3.23 +if arg == "-":
3.24 + for line in sys.stdin.readlines():
3.25 + line = line.strip()
3.26 + if line:
3.27 + print "%s\t%s" % (fn(line), line)
3.28 +else:
3.29 + print fn(arg)
3.30 +
3.31 +# vim: tabstop=4 expandtab shiftwidth=4