# HG changeset patch # User Paul Boddie # Date 1633471829 -7200 # Node ID e9dbcdac88503ee71f25d722e6c951c2e2c89f50 # Parent 9522ce7060da7a5d58f2c10ecd65b5c196978032 Added tentative Moin directory storage support. diff -r 9522ce7060da -r e9dbcdac8850 docs/wiki/Input --- a/docs/wiki/Input Wed Oct 06 00:04:30 2021 +0200 +++ b/docs/wiki/Input Wed Oct 06 00:10:29 2021 +0200 @@ -17,6 +17,19 @@ name, and with any configured `input_separator` being used in filenames to construct hierarchical page relationships. +== Moin Directory == + +A directory containing wiki page files stored in a MoinMoin arrangement. + + [[Metadata#input_context|input_context]]:: `moindirectory` + +Each directory within the root directory contains data for a different wiki +page, with the directory name providing the page name (in an encoded form). +Within the directory for each page, a `current` file provides the current +revision number, and a `revisions` directory contains one or more numbered +files each providing the content of the appropriate revision. Currently, only +the current revision is accessed. + == Standalone == A standalone wiki page file. diff -r 9522ce7060da -r e9dbcdac8850 moinformat/input/moindirectory.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/moinformat/input/moindirectory.py Wed Oct 06 00:10:29 2021 +0200 @@ -0,0 +1,197 @@ +#!/usr/bin/env python + +""" +MoinMoin directory input context. + +Copyright (C) 2018, 2019, 2021 Paul Boddie + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +""" + +from moinformat.input.directory import DirectoryInput +from moinformat.utils.directory import Directory +from os.path import exists, join +import re + +class MoinDirectoryInput(DirectoryInput): + + "A directory input context." + + name = "moindirectory" + + # Pagename and filename translation. + + unsafe_pagename_characters = re.compile(r"([^A-Za-z0-9_]+)") + encoded_pagename_groups = re.compile(r"(\([A-Fa-f0-9]+\))") + + def __init__(self, metadata): + + "Initialise the context with the given 'metadata'." + + if not metadata.has_key("input_filename"): + raise ValueError, metadata + + DirectoryInput.__init__(self, metadata) + + # Do not search recursively in nested directories for pages. + # This overrides the common directory input behaviour. + + self.nested = False + + def _get_attachments_dir(self, pagename): + + "Return the attachments directory for 'pagename'." + + return self.dir.get_filename(join(self.to_filename(pagename), "attachments")) + + def _get_current_revision(self, filename): + + filename = self.dir.get_filename(join(filename, "current")) + + try: + current = open(filename) + except IOError: + return None + + try: + return current.read().strip() + finally: + current.close() + + def all(self): + + "Return all pages in the context." + + # Ignore dotfiles. Do not search recursively, and obtain directory names + # instead of filenames to identify pages. + + pages = [] + + for filename in self.dir.select_files("[!.]*", self.nested, + directories=True): + + revision = self._get_current_revision(filename) + + if revision and exists(self.dir.get_filename(join(filename, "revisions", revision))): + pages.append(self.to_pagename(filename)) + + return pages + + def all_attachments(self): + + "Return all attachment filenames in the context." + + return self.dir.select_files(join("*", "attachments", "*"), True) + + def get_attachments(self, pagename): + + """ + Return all attachment filenames for the given 'pagename'. Each filename + is relative to the appropriate attachment directory. + """ + + return Directory(self._get_attachments_dir(pagename)).select_files("*") + + # Page characteristics. + + def subpage_filenames(self, pagename): + + "Return the subpage filenames of 'pagename'." + + pattern = self.to_filename("%s%s" % (pagename, self.level_sep)) + return self.dir.select_files("%s*" % pattern, self.nested, directories=True) + + # Page access methods. + + def readfile(self, filename, encoding=None): + + """ + Return the contents of the file having the given 'filename' and optional + 'encoding'. + """ + + # Moin employs a file to indicate the current revision and a directory + # containing the revisions. + + revision = self._get_current_revision(filename) + + return self.readpath(self.dir.get_filename(join(filename, "revisions", + revision)), encoding) + + # Convenience methods. + + def get_attachment_filename(self, pagename, filename): + + """ + Return the full path of an attachment file for the given 'pagename' + having the given 'filename'. + """ + + if not pagename: + return None + + return self.dir.get_filename(join(self.to_filename(pagename), + "attachments", + filename)) + + # NOTE: Translation methods should encode filenames appropriately. + + def to_filename(self, pagename): + + "Return the filename corresponding to 'pagename'." + + # Transform "special" characters into the Moin "(xx)" representation. + # Groups of characters are enclosed within each instance. + + encoded = [] + append = encoded.append + safe = True + + for group in self.unsafe_pagename_characters.split(pagename): + if safe: + append(group) + else: + append("(") + for ch in group: + append("%02x" % ord(ch)) + append(")") + + safe = not safe + + return DirectoryInput.to_filename(self, "".join(encoded)) + + def to_pagename(self, filename): + + "Return the pagename corresponding to 'filename'." + + # Transform "special" characters from the Moin "(xx)" representation. + + decoded = [] + append = decoded.append + safe = True + + for group in self.encoded_pagename_groups.split(filename): + if safe: + append(group) + else: + num = group[1:-1] + append(chr(int(num, 16))) + + safe = not safe + + return DirectoryInput.to_pagename(self, "".join(decoded)) + +input = MoinDirectoryInput + +# vim: tabstop=4 expandtab shiftwidth=4