# HG changeset patch # User Paul Boddie # Date 1372541920 -7200 # Node ID 30ace9041d8efe6006f464bab9903b6f3dedf073 # Parent 18ed5c4f294a93bccb1b7302d9ca25d0d12928cd Added generation of identifier and tiny URL mappings. diff -r 18ed5c4f294a -r 30ace9041d8e convert.py --- a/convert.py Sat Jun 15 20:54:00 2013 +0200 +++ b/convert.py Sat Jun 29 23:38:40 2013 +0200 @@ -314,6 +314,28 @@ write(join(pages_dir, commentid, "pagetitle"), "%s/%04d" % (title, position)) +def _manifest_to_mapping(manifest, output_mapping): + + """ + Open the given 'manifest' and write a mapping from version identifiers to + page names/titles to the file with the given 'output_mapping' filename. + """ + + f = codecs.open(manifest, "r", encoding="utf-8") + try: + mapping = [] + + lines = [x.split("|") for x in f.readlines()] + for line in lines: + version, _action, _archive_filename, filename, title, username, comment = line + if title: + mapping.append((split(filename)[-1], title)) + + append(output_mapping, "".join([("\t".join(x) + "\n") for x in mapping])) + + finally: + f.close() + def _sort_manifest(manifest, title): """ @@ -373,7 +395,7 @@ return "".join(result) -def sort_manifest(pages_dir, pageid, output=None, no_translate=False): +def sort_manifest(pages_dir, pageid, output=None, output_mapping=None, no_translate=False): """ Using the given 'pageid', locate the manifest for the page and any page @@ -392,6 +414,9 @@ If 'output' is given, the manifest details will be appended to the file having that filename instead of being rewritten to the original manifest file. + + If 'output_mapping' is given, a mapping from version identifiers to page + titles will be appended to the file having that filename. """ manifest = join(pages_dir, pageid, "manifest") @@ -410,6 +435,13 @@ result = _sort_manifest(manifest, title) + # Output a mapping of identifiers to page names. + + if output_mapping: + _manifest_to_mapping(manifest, output_mapping) + + # Modify the content to include child pages and comments. + for _action, _archive_filename, filename, new_title, username, comment in result: # Add child page information to the content. @@ -549,11 +581,13 @@ for pageid in listdir(pages_dir): sort_comments(pages_dir, pageid) + output_mapping = join(space, "MAPPING") + output_manifest = join(space, "MOIN_PACKAGE") append(output_manifest, "MoinMoinPackage|1\n") for pageid in listdir(pages_dir): - sort_manifest(pages_dir, pageid, output_manifest, no_translate) + sort_manifest(pages_dir, pageid, output_manifest, output_mapping, no_translate) # Write the page package. diff -r 18ed5c4f294a -r 30ace9041d8e mappings.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mappings.sh Sat Jun 29 23:38:40 2013 +0200 @@ -0,0 +1,15 @@ +#!/bin/sh + +MAPPINGS=$* +ID_MAPPING="mapping-id-to-page.txt" +TINY_ID_MAPPING="mapping-tiny-to-id.txt" +TINY_MAPPING="mapping-tiny-to-page.txt" +TAB=`printf '\t'` + +# Combine the space mappings into a common mapping from page identifiers to +# page names. +sort -n -u $MAPPINGS > "$ID_MAPPING" + +# Produce a common mapping from tiny URL identifiers to page names. +cut -f 1 "$ID_MAPPING" | uniq | python tiny.py - > "$TINY_ID_MAPPING" +join -t "$TAB" -1 2 -2 1 "$TINY_ID_MAPPING" "$ID_MAPPING" | cut -f 2,3 | LC_ALL=C sort > "$TINY_MAPPING" diff -r 18ed5c4f294a -r 30ace9041d8e tiny.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tiny.py Sat Jun 29 23:38:40 2013 +0200 @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +# See: https://answers.atlassian.com/questions/87971/what-is-the-algorithm-used-to-create-the-tiny-links + +from base64 import b64decode, b64encode +from struct import pack, unpack +import sys + +def tiny_url(s): + return b64encode(pack(" 2 and sys.argv[2] in ("-r", "--reverse") +fn = reverse and identifier or tiny_url + +if arg == "-": + for line in sys.stdin.readlines(): + line = line.strip() + if line: + print "%s\t%s" % (fn(line), line) +else: + print fn(arg) + +# vim: tabstop=4 expandtab shiftwidth=4