paul@347 | 1 | #!/usr/bin/env python |
paul@347 | 2 | |
paul@347 | 3 | """ |
paul@347 | 4 | HTML-to-Moin translator. |
paul@347 | 5 | |
paul@347 | 6 | Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk> |
paul@347 | 7 | |
paul@347 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@347 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@347 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@347 | 11 | version. |
paul@347 | 12 | |
paul@347 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@347 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@347 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@347 | 16 | details. |
paul@347 | 17 | |
paul@347 | 18 | You should have received a copy of the GNU General Public License along with |
paul@347 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@347 | 20 | """ |
paul@347 | 21 | |
paul@347 | 22 | from moinformat.translators.common import Translator |
paul@347 | 23 | from moinformat.tree.moin import Block, Heading, Region, Text |
paul@347 | 24 | |
paul@347 | 25 | def int_or_default(s, default): |
paul@347 | 26 | if not s: |
paul@347 | 27 | return default |
paul@347 | 28 | try: |
paul@347 | 29 | return int(s) |
paul@347 | 30 | except ValueError: |
paul@347 | 31 | return default |
paul@347 | 32 | |
paul@347 | 33 | class HTMLToMoinTranslator(Translator): |
paul@347 | 34 | |
paul@347 | 35 | "Translation of HTML document nodes to Moin document nodes." |
paul@347 | 36 | |
paul@347 | 37 | input_formats = ["html"] |
paul@347 | 38 | formats = ["moin"] |
paul@347 | 39 | |
paul@347 | 40 | def _get_attribute(self, element, name): |
paul@347 | 41 | for attribute in element.attributes: |
paul@347 | 42 | if attribute.name == name: |
paul@347 | 43 | return attribute.value and attribute.value.value |
paul@347 | 44 | return None |
paul@347 | 45 | |
paul@347 | 46 | def _get_class_values(self, element): |
paul@347 | 47 | class_value = self._get_attribute(element, "class") |
paul@347 | 48 | if not class_value: |
paul@347 | 49 | return {} |
paul@347 | 50 | |
paul@347 | 51 | d = {} |
paul@347 | 52 | for token in class_value.split(): |
paul@347 | 53 | if token and token.startswith("region-"): |
paul@347 | 54 | _region, name, value = token.split("-", 2) |
paul@347 | 55 | d[name] = value |
paul@347 | 56 | return d |
paul@347 | 57 | |
paul@347 | 58 | def element(self, element): |
paul@347 | 59 | if not element.name: |
paul@347 | 60 | return None |
paul@347 | 61 | elif element.name[0] == "h" and element.name[1:].isdigit(): |
paul@347 | 62 | return Heading(self.container(element), int(element.name[1:]), |
paul@347 | 63 | start_pad=" ", end_pad=" ", end_extra="\n", |
paul@347 | 64 | identifier=self._get_attribute(element, "id")) |
paul@347 | 65 | elif element.name == "p": |
paul@347 | 66 | return Block(self.container(element)) |
paul@347 | 67 | elif element.name == "span": |
paul@347 | 68 | d = self._get_class_values(element) |
paul@347 | 69 | if d.has_key("type"): |
paul@347 | 70 | return Region(self.container(element), |
paul@347 | 71 | int_or_default(d.get("level"), 0), |
paul@347 | 72 | int_or_default(d.get("indent"), 0), |
paul@347 | 73 | d.get("type"), |
paul@347 | 74 | extra="\n") |
paul@347 | 75 | else: |
paul@347 | 76 | return Block(self.container(element)) |
paul@347 | 77 | else: |
paul@347 | 78 | return None |
paul@347 | 79 | |
paul@347 | 80 | def fragment(self, fragment): |
paul@347 | 81 | return self.container(fragment) |
paul@347 | 82 | |
paul@347 | 83 | def text(self, text): |
paul@347 | 84 | return Text(text.value) |
paul@347 | 85 | |
paul@347 | 86 | # Some nodes are not directly translated. |
paul@347 | 87 | |
paul@347 | 88 | def node(self, node): |
paul@347 | 89 | return None |
paul@347 | 90 | |
paul@347 | 91 | attribute = node |
paul@347 | 92 | attribute_value = node |
paul@347 | 93 | comment = node |
paul@347 | 94 | directive = node |
paul@347 | 95 | inclusion = node |
paul@347 | 96 | |
paul@347 | 97 | translator = HTMLToMoinTranslator |
paul@347 | 98 | |
paul@347 | 99 | # vim: tabstop=4 expandtab shiftwidth=4 |