Lichen

lib/posix/iconv.py

843:d305986d05c8
2018-07-05 Paul Boddie Employed sets for attributes and providers referenced by accesses. This causes various attributes to be identified definitively in the access plans and instruction sequences.
     1 #!/usr/bin/env python     2      3 """     4 POSIX character set conversion functions.     5      6 Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from __builtins__.types import check_string    23 from native import iconv, iconv_close, iconv_open, iconv_reset    24     25 class IncompleteSequenceError(OSError):    26     27     "An error indicating an incomplete multibyte sequence."    28     29     pass    30     31 class InvalidSequenceError(OSError):    32     33     "An error indicating an incomplete multibyte sequence."    34     35     pass    36     37 class ConverterError(Exception):    38     39     "An error indicating a failure involving a character set converter."    40     41     pass    42     43 class Converter:    44     45     "A character set converter."    46     47     def __init__(self, from_encoding, to_encoding):    48     49         "Initialise conversion between 'from_encoding' and 'to_encoding'."    50     51         check_string(from_encoding)    52         check_string(to_encoding)    53         self.__data__ = iconv_open(to_encoding, from_encoding)    54         self.reset()    55     56     def reset(self):    57     58         "Reset the state of the converter."    59     60         self.state = ["", 0, 0]    61         self.result = []    62         iconv_reset(self.__data__)    63     64     def close(self):    65     66         "Close this converter."    67     68         iconv_close(self.__data__)    69         self.__data__ = None    70     71     def feed(self, s):    72     73         "Feed 's' to the converter, converting its byte representation."    74     75         if self.__data__ is None:    76             raise ConverterError    77     78         check_string(s)    79     80         _s, start, remaining = self.state    81     82         if _s:    83             self.state = [_s + s, start, remaining + s.bytelength()]    84         else:    85             self.state = [s, 0, s.bytelength()]    86     87         while True:    88     89             # Obtain converted text and update the state.    90     91             try:    92                 out = iconv(self.__data__, self.state)    93     94             # Incomplete input does not cause an exception.    95     96             except IncompleteSequenceError, exc:    97                 self.result.append(exc.arg)    98                 return    99    100             # Invalid input causes a Unicode exception.   101    102             except InvalidSequenceError, exc:   103                 raise UnicodeDecodeError(exc.arg)   104    105             # Add any returned text to the result.   106    107             self.result.append(out)   108    109             # Test for the end of the conversion.   110    111             _s, start, remaining = self.state   112    113             if not remaining:   114                 return   115    116     def __str__(self):   117    118         "Return the value of the converted string."   119    120         return "".join(self.result)   121    122 # vim: tabstop=4 expandtab shiftwidth=4