MoinLight

Changeset

303:e9dbcdac8850
2021-10-06 Paul Boddie raw files shortlog changelog graph Added tentative Moin directory storage support.
docs/wiki/Input (file) moinformat/input/moindirectory.py (file)
     1.1 --- a/docs/wiki/Input	Wed Oct 06 00:04:30 2021 +0200
     1.2 +++ b/docs/wiki/Input	Wed Oct 06 00:10:29 2021 +0200
     1.3 @@ -17,6 +17,19 @@
     1.4  name, and with any configured `input_separator` being used in filenames to
     1.5  construct hierarchical page relationships.
     1.6  
     1.7 +== Moin Directory ==
     1.8 +
     1.9 +A directory containing wiki page files stored in a MoinMoin arrangement.
    1.10 +
    1.11 + [[Metadata#input_context|input_context]]:: `moindirectory`
    1.12 +
    1.13 +Each directory within the root directory contains data for a different wiki
    1.14 +page, with the directory name providing the page name (in an encoded form).
    1.15 +Within the directory for each page, a `current` file provides the current
    1.16 +revision number, and a `revisions` directory contains one or more numbered
    1.17 +files each providing the content of the appropriate revision. Currently, only
    1.18 +the current revision is accessed.
    1.19 +
    1.20  == Standalone ==
    1.21  
    1.22  A standalone wiki page file.
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/moinformat/input/moindirectory.py	Wed Oct 06 00:10:29 2021 +0200
     2.3 @@ -0,0 +1,197 @@
     2.4 +#!/usr/bin/env python
     2.5 +
     2.6 +"""
     2.7 +MoinMoin directory input context.
     2.8 +
     2.9 +Copyright (C) 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
    2.10 +
    2.11 +This program is free software; you can redistribute it and/or modify it under
    2.12 +the terms of the GNU General Public License as published by the Free Software
    2.13 +Foundation; either version 3 of the License, or (at your option) any later
    2.14 +version.
    2.15 +
    2.16 +This program is distributed in the hope that it will be useful, but WITHOUT
    2.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    2.18 +FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
    2.19 +details.
    2.20 +
    2.21 +You should have received a copy of the GNU General Public License along with
    2.22 +this program.  If not, see <http://www.gnu.org/licenses/>.
    2.23 +"""
    2.24 +
    2.25 +from moinformat.input.directory import DirectoryInput
    2.26 +from moinformat.utils.directory import Directory
    2.27 +from os.path import exists, join
    2.28 +import re
    2.29 +
    2.30 +class MoinDirectoryInput(DirectoryInput):
    2.31 +
    2.32 +    "A directory input context."
    2.33 +
    2.34 +    name = "moindirectory"
    2.35 +
    2.36 +    # Pagename and filename translation.
    2.37 +
    2.38 +    unsafe_pagename_characters = re.compile(r"([^A-Za-z0-9_]+)")
    2.39 +    encoded_pagename_groups = re.compile(r"(\([A-Fa-f0-9]+\))")
    2.40 +
    2.41 +    def __init__(self, metadata):
    2.42 +
    2.43 +        "Initialise the context with the given 'metadata'."
    2.44 +
    2.45 +        if not metadata.has_key("input_filename"):
    2.46 +            raise ValueError, metadata
    2.47 +
    2.48 +        DirectoryInput.__init__(self, metadata)
    2.49 +
    2.50 +        # Do not search recursively in nested directories for pages.
    2.51 +        # This overrides the common directory input behaviour.
    2.52 +
    2.53 +        self.nested = False
    2.54 +
    2.55 +    def _get_attachments_dir(self, pagename):
    2.56 +
    2.57 +        "Return the attachments directory for 'pagename'."
    2.58 +
    2.59 +        return self.dir.get_filename(join(self.to_filename(pagename), "attachments"))
    2.60 +
    2.61 +    def _get_current_revision(self, filename):
    2.62 +
    2.63 +        filename = self.dir.get_filename(join(filename, "current"))
    2.64 +
    2.65 +        try:
    2.66 +            current = open(filename)
    2.67 +        except IOError:
    2.68 +            return None
    2.69 +
    2.70 +        try:
    2.71 +            return current.read().strip()
    2.72 +        finally:
    2.73 +            current.close()
    2.74 +
    2.75 +    def all(self):
    2.76 +
    2.77 +        "Return all pages in the context."
    2.78 +
    2.79 +        # Ignore dotfiles. Do not search recursively, and obtain directory names
    2.80 +        # instead of filenames to identify pages.
    2.81 +
    2.82 +        pages = []
    2.83 +
    2.84 +        for filename in self.dir.select_files("[!.]*", self.nested,
    2.85 +                                              directories=True):
    2.86 +
    2.87 +            revision = self._get_current_revision(filename)
    2.88 +
    2.89 +            if revision and exists(self.dir.get_filename(join(filename, "revisions", revision))):
    2.90 +                pages.append(self.to_pagename(filename))
    2.91 +
    2.92 +        return pages
    2.93 +
    2.94 +    def all_attachments(self):
    2.95 +
    2.96 +        "Return all attachment filenames in the context."
    2.97 +
    2.98 +        return self.dir.select_files(join("*", "attachments", "*"), True)
    2.99 +
   2.100 +    def get_attachments(self, pagename):
   2.101 +
   2.102 +        """
   2.103 +        Return all attachment filenames for the given 'pagename'. Each filename
   2.104 +        is relative to the appropriate attachment directory.
   2.105 +        """
   2.106 +
   2.107 +        return Directory(self._get_attachments_dir(pagename)).select_files("*")
   2.108 +
   2.109 +    # Page characteristics.
   2.110 +
   2.111 +    def subpage_filenames(self, pagename):
   2.112 +
   2.113 +        "Return the subpage filenames of 'pagename'."
   2.114 +
   2.115 +        pattern = self.to_filename("%s%s" % (pagename, self.level_sep))
   2.116 +        return self.dir.select_files("%s*" % pattern, self.nested, directories=True)
   2.117 +
   2.118 +    # Page access methods.
   2.119 +
   2.120 +    def readfile(self, filename, encoding=None):
   2.121 +
   2.122 +        """
   2.123 +        Return the contents of the file having the given 'filename' and optional
   2.124 +        'encoding'.
   2.125 +        """
   2.126 +
   2.127 +        # Moin employs a file to indicate the current revision and a directory
   2.128 +        # containing the revisions.
   2.129 +
   2.130 +        revision = self._get_current_revision(filename)
   2.131 +
   2.132 +        return self.readpath(self.dir.get_filename(join(filename, "revisions",
   2.133 +                                  revision)), encoding)
   2.134 +
   2.135 +    # Convenience methods.
   2.136 +
   2.137 +    def get_attachment_filename(self, pagename, filename):
   2.138 +
   2.139 +        """
   2.140 +        Return the full path of an attachment file for the given 'pagename'
   2.141 +        having the given 'filename'.
   2.142 +        """
   2.143 +
   2.144 +        if not pagename:
   2.145 +            return None
   2.146 +
   2.147 +        return self.dir.get_filename(join(self.to_filename(pagename),
   2.148 +                                          "attachments",
   2.149 +                                          filename))
   2.150 +
   2.151 +    # NOTE: Translation methods should encode filenames appropriately.
   2.152 +
   2.153 +    def to_filename(self, pagename):
   2.154 +
   2.155 +        "Return the filename corresponding to 'pagename'."
   2.156 +
   2.157 +        # Transform "special" characters into the Moin "(xx)" representation.
   2.158 +        # Groups of characters are enclosed within each instance.
   2.159 +
   2.160 +        encoded = []
   2.161 +        append = encoded.append
   2.162 +        safe = True
   2.163 +
   2.164 +        for group in self.unsafe_pagename_characters.split(pagename):
   2.165 +            if safe:
   2.166 +                append(group)
   2.167 +            else:
   2.168 +                append("(")
   2.169 +                for ch in group:
   2.170 +                    append("%02x" % ord(ch))
   2.171 +                append(")")
   2.172 +
   2.173 +            safe = not safe
   2.174 +
   2.175 +        return DirectoryInput.to_filename(self, "".join(encoded))
   2.176 +
   2.177 +    def to_pagename(self, filename):
   2.178 +
   2.179 +        "Return the pagename corresponding to 'filename'."
   2.180 +
   2.181 +        # Transform "special" characters from the Moin "(xx)" representation.
   2.182 +
   2.183 +        decoded = []
   2.184 +        append = decoded.append
   2.185 +        safe = True
   2.186 +
   2.187 +        for group in self.encoded_pagename_groups.split(filename):
   2.188 +            if safe:
   2.189 +                append(group)
   2.190 +            else:
   2.191 +                num = group[1:-1]
   2.192 +                append(chr(int(num, 16)))
   2.193 +
   2.194 +            safe = not safe
   2.195 +
   2.196 +        return DirectoryInput.to_pagename(self, "".join(decoded))
   2.197 +
   2.198 +input = MoinDirectoryInput
   2.199 +
   2.200 +# vim: tabstop=4 expandtab shiftwidth=4