1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/moinformat/translators/moin/html.py Thu Aug 17 23:34:10 2023 +0200
1.3 @@ -0,0 +1,99 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"""
1.7 +HTML-to-Moin translator.
1.8 +
1.9 +Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
1.10 +
1.11 +This program is free software; you can redistribute it and/or modify it under
1.12 +the terms of the GNU General Public License as published by the Free Software
1.13 +Foundation; either version 3 of the License, or (at your option) any later
1.14 +version.
1.15 +
1.16 +This program is distributed in the hope that it will be useful, but WITHOUT
1.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
1.19 +details.
1.20 +
1.21 +You should have received a copy of the GNU General Public License along with
1.22 +this program. If not, see <http://www.gnu.org/licenses/>.
1.23 +"""
1.24 +
1.25 +from moinformat.translators.common import Translator
1.26 +from moinformat.tree.moin import Block, Heading, Region, Text
1.27 +
1.28 +def int_or_default(s, default):
1.29 + if not s:
1.30 + return default
1.31 + try:
1.32 + return int(s)
1.33 + except ValueError:
1.34 + return default
1.35 +
1.36 +class HTMLToMoinTranslator(Translator):
1.37 +
1.38 + "Translation of HTML document nodes to Moin document nodes."
1.39 +
1.40 + input_formats = ["html"]
1.41 + formats = ["moin"]
1.42 +
1.43 + def _get_attribute(self, element, name):
1.44 + for attribute in element.attributes:
1.45 + if attribute.name == name:
1.46 + return attribute.value and attribute.value.value
1.47 + return None
1.48 +
1.49 + def _get_class_values(self, element):
1.50 + class_value = self._get_attribute(element, "class")
1.51 + if not class_value:
1.52 + return {}
1.53 +
1.54 + d = {}
1.55 + for token in class_value.split():
1.56 + if token and token.startswith("region-"):
1.57 + _region, name, value = token.split("-", 2)
1.58 + d[name] = value
1.59 + return d
1.60 +
1.61 + def element(self, element):
1.62 + if not element.name:
1.63 + return None
1.64 + elif element.name[0] == "h" and element.name[1:].isdigit():
1.65 + return Heading(self.container(element), int(element.name[1:]),
1.66 + start_pad=" ", end_pad=" ", end_extra="\n",
1.67 + identifier=self._get_attribute(element, "id"))
1.68 + elif element.name == "p":
1.69 + return Block(self.container(element))
1.70 + elif element.name == "span":
1.71 + d = self._get_class_values(element)
1.72 + if d.has_key("type"):
1.73 + return Region(self.container(element),
1.74 + int_or_default(d.get("level"), 0),
1.75 + int_or_default(d.get("indent"), 0),
1.76 + d.get("type"),
1.77 + extra="\n")
1.78 + else:
1.79 + return Block(self.container(element))
1.80 + else:
1.81 + return None
1.82 +
1.83 + def fragment(self, fragment):
1.84 + return self.container(fragment)
1.85 +
1.86 + def text(self, text):
1.87 + return Text(text.value)
1.88 +
1.89 + # Some nodes are not directly translated.
1.90 +
1.91 + def node(self, node):
1.92 + return None
1.93 +
1.94 + attribute = node
1.95 + attribute_value = node
1.96 + comment = node
1.97 + directive = node
1.98 + inclusion = node
1.99 +
1.100 +translator = HTMLToMoinTranslator
1.101 +
1.102 +# vim: tabstop=4 expandtab shiftwidth=4