1.1 --- a/docs/wiki/Input Wed Oct 06 00:04:30 2021 +0200
1.2 +++ b/docs/wiki/Input Wed Oct 06 00:10:29 2021 +0200
1.3 @@ -17,6 +17,19 @@
1.4 name, and with any configured `input_separator` being used in filenames to
1.5 construct hierarchical page relationships.
1.6
1.7 +== Moin Directory ==
1.8 +
1.9 +A directory containing wiki page files stored in a MoinMoin arrangement.
1.10 +
1.11 + [[Metadata#input_context|input_context]]:: `moindirectory`
1.12 +
1.13 +Each directory within the root directory contains data for a different wiki
1.14 +page, with the directory name providing the page name (in an encoded form).
1.15 +Within the directory for each page, a `current` file provides the current
1.16 +revision number, and a `revisions` directory contains one or more numbered
1.17 +files each providing the content of the appropriate revision. Currently, only
1.18 +the current revision is accessed.
1.19 +
1.20 == Standalone ==
1.21
1.22 A standalone wiki page file.
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/moinformat/input/moindirectory.py Wed Oct 06 00:10:29 2021 +0200
2.3 @@ -0,0 +1,197 @@
2.4 +#!/usr/bin/env python
2.5 +
2.6 +"""
2.7 +MoinMoin directory input context.
2.8 +
2.9 +Copyright (C) 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
2.10 +
2.11 +This program is free software; you can redistribute it and/or modify it under
2.12 +the terms of the GNU General Public License as published by the Free Software
2.13 +Foundation; either version 3 of the License, or (at your option) any later
2.14 +version.
2.15 +
2.16 +This program is distributed in the hope that it will be useful, but WITHOUT
2.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
2.19 +details.
2.20 +
2.21 +You should have received a copy of the GNU General Public License along with
2.22 +this program. If not, see <http://www.gnu.org/licenses/>.
2.23 +"""
2.24 +
2.25 +from moinformat.input.directory import DirectoryInput
2.26 +from moinformat.utils.directory import Directory
2.27 +from os.path import exists, join
2.28 +import re
2.29 +
2.30 +class MoinDirectoryInput(DirectoryInput):
2.31 +
2.32 + "A directory input context."
2.33 +
2.34 + name = "moindirectory"
2.35 +
2.36 + # Pagename and filename translation.
2.37 +
2.38 + unsafe_pagename_characters = re.compile(r"([^A-Za-z0-9_]+)")
2.39 + encoded_pagename_groups = re.compile(r"(\([A-Fa-f0-9]+\))")
2.40 +
2.41 + def __init__(self, metadata):
2.42 +
2.43 + "Initialise the context with the given 'metadata'."
2.44 +
2.45 + if not metadata.has_key("input_filename"):
2.46 + raise ValueError, metadata
2.47 +
2.48 + DirectoryInput.__init__(self, metadata)
2.49 +
2.50 + # Do not search recursively in nested directories for pages.
2.51 + # This overrides the common directory input behaviour.
2.52 +
2.53 + self.nested = False
2.54 +
2.55 + def _get_attachments_dir(self, pagename):
2.56 +
2.57 + "Return the attachments directory for 'pagename'."
2.58 +
2.59 + return self.dir.get_filename(join(self.to_filename(pagename), "attachments"))
2.60 +
2.61 + def _get_current_revision(self, filename):
2.62 +
2.63 + filename = self.dir.get_filename(join(filename, "current"))
2.64 +
2.65 + try:
2.66 + current = open(filename)
2.67 + except IOError:
2.68 + return None
2.69 +
2.70 + try:
2.71 + return current.read().strip()
2.72 + finally:
2.73 + current.close()
2.74 +
2.75 + def all(self):
2.76 +
2.77 + "Return all pages in the context."
2.78 +
2.79 + # Ignore dotfiles. Do not search recursively, and obtain directory names
2.80 + # instead of filenames to identify pages.
2.81 +
2.82 + pages = []
2.83 +
2.84 + for filename in self.dir.select_files("[!.]*", self.nested,
2.85 + directories=True):
2.86 +
2.87 + revision = self._get_current_revision(filename)
2.88 +
2.89 + if revision and exists(self.dir.get_filename(join(filename, "revisions", revision))):
2.90 + pages.append(self.to_pagename(filename))
2.91 +
2.92 + return pages
2.93 +
2.94 + def all_attachments(self):
2.95 +
2.96 + "Return all attachment filenames in the context."
2.97 +
2.98 + return self.dir.select_files(join("*", "attachments", "*"), True)
2.99 +
2.100 + def get_attachments(self, pagename):
2.101 +
2.102 + """
2.103 + Return all attachment filenames for the given 'pagename'. Each filename
2.104 + is relative to the appropriate attachment directory.
2.105 + """
2.106 +
2.107 + return Directory(self._get_attachments_dir(pagename)).select_files("*")
2.108 +
2.109 + # Page characteristics.
2.110 +
2.111 + def subpage_filenames(self, pagename):
2.112 +
2.113 + "Return the subpage filenames of 'pagename'."
2.114 +
2.115 + pattern = self.to_filename("%s%s" % (pagename, self.level_sep))
2.116 + return self.dir.select_files("%s*" % pattern, self.nested, directories=True)
2.117 +
2.118 + # Page access methods.
2.119 +
2.120 + def readfile(self, filename, encoding=None):
2.121 +
2.122 + """
2.123 + Return the contents of the file having the given 'filename' and optional
2.124 + 'encoding'.
2.125 + """
2.126 +
2.127 + # Moin employs a file to indicate the current revision and a directory
2.128 + # containing the revisions.
2.129 +
2.130 + revision = self._get_current_revision(filename)
2.131 +
2.132 + return self.readpath(self.dir.get_filename(join(filename, "revisions",
2.133 + revision)), encoding)
2.134 +
2.135 + # Convenience methods.
2.136 +
2.137 + def get_attachment_filename(self, pagename, filename):
2.138 +
2.139 + """
2.140 + Return the full path of an attachment file for the given 'pagename'
2.141 + having the given 'filename'.
2.142 + """
2.143 +
2.144 + if not pagename:
2.145 + return None
2.146 +
2.147 + return self.dir.get_filename(join(self.to_filename(pagename),
2.148 + "attachments",
2.149 + filename))
2.150 +
2.151 + # NOTE: Translation methods should encode filenames appropriately.
2.152 +
2.153 + def to_filename(self, pagename):
2.154 +
2.155 + "Return the filename corresponding to 'pagename'."
2.156 +
2.157 + # Transform "special" characters into the Moin "(xx)" representation.
2.158 + # Groups of characters are enclosed within each instance.
2.159 +
2.160 + encoded = []
2.161 + append = encoded.append
2.162 + safe = True
2.163 +
2.164 + for group in self.unsafe_pagename_characters.split(pagename):
2.165 + if safe:
2.166 + append(group)
2.167 + else:
2.168 + append("(")
2.169 + for ch in group:
2.170 + append("%02x" % ord(ch))
2.171 + append(")")
2.172 +
2.173 + safe = not safe
2.174 +
2.175 + return DirectoryInput.to_filename(self, "".join(encoded))
2.176 +
2.177 + def to_pagename(self, filename):
2.178 +
2.179 + "Return the pagename corresponding to 'filename'."
2.180 +
2.181 + # Transform "special" characters from the Moin "(xx)" representation.
2.182 +
2.183 + decoded = []
2.184 + append = decoded.append
2.185 + safe = True
2.186 +
2.187 + for group in self.encoded_pagename_groups.split(filename):
2.188 + if safe:
2.189 + append(group)
2.190 + else:
2.191 + num = group[1:-1]
2.192 + append(chr(int(num, 16)))
2.193 +
2.194 + safe = not safe
2.195 +
2.196 + return DirectoryInput.to_pagename(self, "".join(decoded))
2.197 +
2.198 +input = MoinDirectoryInput
2.199 +
2.200 +# vim: tabstop=4 expandtab shiftwidth=4