Lichen

tests/iconv.py

934:2989aab1b4f7
7 months ago Paul Boddie Renamed the utf8string class to unicode, eliminating the unicode function. This means that the simple case of merely returning an object if it is already a Unicode object no longer occurs when using the unicode callable, but such behaviour might be better supported with more general customised instantiation functionality.
     1 # -*- coding: ISO-8859-1 -*-     2      3 from posix.iconv import Converter     4      5 only_utf8 = Converter("UTF-8", "UTF-8")     6 to_utf8 = Converter("ISO-8859-1", "UTF-8")     7 to_utf16 = Converter("ISO-8859-1", "UTF-16")     8 from_utf8 = Converter("UTF-8", "ISO-8859-1")     9 from_utf16 = Converter("UTF-16", "ISO-8859-1")    10     11 try:    12     iso = b"???"    13     print iso                           # ???    14     to_utf8.feed(iso)    15     utf8 = str(to_utf8)    16     print utf8                          # ??????    17     from_utf8.feed(utf8)    18     print str(from_utf8)                # ???    19     to_utf16.feed(iso)    20     utf16 = str(to_utf16)    21     print utf16                         # ...    22     from_utf16.feed(utf16)    23     print str(from_utf16)               # ???    24     25     # Convert UTF-8 to UTF-8.    26     27     only_utf8.feed(utf8)    28     utf8_2 = str(only_utf8)    29     print utf8_2                        # ??????    30     31     # Convert part of a UTF-16 sequence, then convert the remainder, then obtain    32     # the result.    33     34     first = utf16[:3]    35     second = utf16[3:]    36     37     from_utf16.reset()    38     print "first:", first               # ...    39     from_utf16.feed(first)              # should have handled an incomplete input    40     print "second:", second             # ...    41     from_utf16.feed(second)             # should have handled the complete input    42     print str(from_utf16)               # ???    43     44     # Convert part of a UTF-8 sequence, then the remainder, then get the result.    45     46     first = utf8[:3]    47     second = utf8[3:]    48     49     from_utf8.reset()    50     print "first:", first               # ???    51     from_utf8.feed(first)               # should have handled an incomplete input    52     print "second:", second             # ???    53     from_utf8.feed(second)              # should have handled the complete input    54     print str(from_utf8)                # ???    55     56     # Attempt to convert ISO-8859-1 characters as if they were UTF-8.    57     58     from_utf8.reset()    59     60     try:    61         from_utf8.feed(iso)             # should raise an exception    62     except UnicodeDecodeError, exc:    63         print "Not UTF-8 input:", exc.value    64     except OSError, exc:    65         print "OSError:", exc.value    66     67     print str(from_utf8)                #    68     69     # Attempt to convert ISO-8859-1 characters following some UTF-8 ones.    70     71     to_utf8.reset()    72     to_utf8.feed("???")    73     utf8_2 = str(to_utf8)    74     75     from_utf8.reset()    76     77     try:    78         from_utf8.feed(utf8_2 + iso)    # should raise an exception    79     except UnicodeDecodeError, exc:    80         print "Not UTF-8 input:", exc.value    81     except OSError, exc:    82         print "OSError:", exc.value    83     84     print str(from_utf8)                #    85     86 finally:    87     to_utf8.close()    88     to_utf16.close()    89     from_utf8.close()    90     from_utf16.close()    91     92 try:    93     Converter("horses", "giraffes")    94 except OSError, exc:    95     print 'Converter("horses", "giraffes"): not valid encodings; error is', exc.value