Lichen

Annotated lib/posix/iconv.py

902:dc210430ce4c
2019-05-27 Paul Boddie Updated copyright statement years.
paul@380 1
#!/usr/bin/env python
paul@380 2
paul@380 3
"""
paul@380 4
POSIX character set conversion functions.
paul@380 5
paul@787 6
Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk>
paul@380 7
paul@380 8
This program is free software; you can redistribute it and/or modify it under
paul@380 9
the terms of the GNU General Public License as published by the Free Software
paul@380 10
Foundation; either version 3 of the License, or (at your option) any later
paul@380 11
version.
paul@380 12
paul@380 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@380 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@380 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@380 16
details.
paul@380 17
paul@380 18
You should have received a copy of the GNU General Public License along with
paul@380 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@380 20
"""
paul@380 21
paul@787 22
from __builtins__.types import check_string
paul@386 23
from native import iconv, iconv_close, iconv_open, iconv_reset
paul@386 24
paul@453 25
class IncompleteSequenceError(OSError):
paul@453 26
paul@453 27
    "An error indicating an incomplete multibyte sequence."
paul@453 28
paul@453 29
    pass
paul@386 30
paul@453 31
class InvalidSequenceError(OSError):
paul@453 32
paul@453 33
    "An error indicating an incomplete multibyte sequence."
paul@453 34
paul@453 35
    pass
paul@380 36
paul@380 37
class ConverterError(Exception):
paul@380 38
paul@380 39
    "An error indicating a failure involving a character set converter."
paul@380 40
paul@380 41
    pass
paul@380 42
paul@380 43
class Converter:
paul@380 44
paul@380 45
    "A character set converter."
paul@380 46
paul@380 47
    def __init__(self, from_encoding, to_encoding):
paul@380 48
paul@380 49
        "Initialise conversion between 'from_encoding' and 'to_encoding'."
paul@380 50
paul@380 51
        check_string(from_encoding)
paul@380 52
        check_string(to_encoding)
paul@380 53
        self.__data__ = iconv_open(to_encoding, from_encoding)
paul@386 54
        self.reset()
paul@386 55
paul@386 56
    def reset(self):
paul@386 57
paul@386 58
        "Reset the state of the converter."
paul@386 59
paul@386 60
        self.state = ["", 0, 0]
paul@386 61
        self.result = []
paul@386 62
        iconv_reset(self.__data__)
paul@380 63
paul@380 64
    def close(self):
paul@380 65
paul@380 66
        "Close this converter."
paul@380 67
paul@380 68
        iconv_close(self.__data__)
paul@380 69
        self.__data__ = None
paul@380 70
paul@386 71
    def feed(self, s):
paul@380 72
paul@410 73
        "Feed 's' to the converter, converting its byte representation."
paul@380 74
paul@380 75
        if self.__data__ is None:
paul@380 76
            raise ConverterError
paul@380 77
paul@380 78
        check_string(s)
paul@380 79
paul@386 80
        _s, start, remaining = self.state
paul@386 81
paul@386 82
        if _s:
paul@403 83
            self.state = [_s + s, start, remaining + s.bytelength()]
paul@386 84
        else:
paul@403 85
            self.state = [s, 0, s.bytelength()]
paul@380 86
paul@380 87
        while True:
paul@380 88
paul@380 89
            # Obtain converted text and update the state.
paul@380 90
paul@386 91
            try:
paul@386 92
                out = iconv(self.__data__, self.state)
paul@386 93
paul@386 94
            # Incomplete input does not cause an exception.
paul@386 95
paul@453 96
            except IncompleteSequenceError, exc:
paul@453 97
                self.result.append(exc.arg)
paul@453 98
                return
paul@453 99
paul@453 100
            # Invalid input causes a Unicode exception.
paul@453 101
paul@453 102
            except InvalidSequenceError, exc:
paul@453 103
                raise UnicodeDecodeError(exc.arg)
paul@386 104
paul@386 105
            # Add any returned text to the result.
paul@386 106
paul@386 107
            self.result.append(out)
paul@380 108
paul@380 109
            # Test for the end of the conversion.
paul@380 110
paul@386 111
            _s, start, remaining = self.state
paul@386 112
paul@380 113
            if not remaining:
paul@386 114
                return
paul@386 115
paul@386 116
    def __str__(self):
paul@386 117
paul@386 118
        "Return the value of the converted string."
paul@386 119
paul@386 120
        return "".join(self.result)
paul@380 121
paul@380 122
# vim: tabstop=4 expandtab shiftwidth=4