1 #!/usr/bin/env python 2 3 """ 4 POSIX character set conversion functions. 5 6 Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from __builtins__.types import check_int, check_string 23 from native import iconv, iconv_close, iconv_open, iconv_reset 24 25 class IncompleteSequenceError(OSError): 26 27 "An error indicating an incomplete multibyte sequence." 28 29 pass 30 31 class InvalidSequenceError(OSError): 32 33 "An error indicating an incomplete multibyte sequence." 34 35 pass 36 37 class ConverterError(Exception): 38 39 "An error indicating a failure involving a character set converter." 40 41 pass 42 43 class Converter: 44 45 "A character set converter." 46 47 def __init__(self, from_encoding, to_encoding): 48 49 "Initialise conversion between 'from_encoding' and 'to_encoding'." 50 51 check_string(from_encoding) 52 check_string(to_encoding) 53 self.__data__ = iconv_open(to_encoding, from_encoding) 54 self.reset() 55 56 def reset(self): 57 58 "Reset the state of the converter." 59 60 self.state = ["", 0, 0] 61 self.result = [] 62 iconv_reset(self.__data__) 63 64 def close(self): 65 66 "Close this converter." 67 68 iconv_close(self.__data__) 69 self.__data__ = None 70 71 def feed(self, s): 72 73 "Feed 's' to the converter, converting its byte representation." 74 75 if self.__data__ is None: 76 raise ConverterError 77 78 check_string(s) 79 80 _s, start, remaining = self.state 81 82 if _s: 83 self.state = [_s + s, start, remaining + s.bytelength()] 84 else: 85 self.state = [s, 0, s.bytelength()] 86 87 while True: 88 89 # Obtain converted text and update the state. 90 91 try: 92 out = iconv(self.__data__, self.state) 93 94 # Incomplete input does not cause an exception. 95 96 except IncompleteSequenceError, exc: 97 self.result.append(exc.arg) 98 return 99 100 # Invalid input causes a Unicode exception. 101 102 except InvalidSequenceError, exc: 103 raise UnicodeDecodeError(exc.arg) 104 105 # Add any returned text to the result. 106 107 self.result.append(out) 108 109 # Test for the end of the conversion. 110 111 _s, start, remaining = self.state 112 113 if not remaining: 114 return 115 116 def __str__(self): 117 118 "Return the value of the converted string." 119 120 return "".join(self.result) 121 122 # vim: tabstop=4 expandtab shiftwidth=4