paul@380 | 1 | # -*- coding: ISO-8859-1 -*- |
paul@380 | 2 | |
paul@410 | 3 | from posix.iconv import Converter |
paul@380 | 4 | |
paul@410 | 5 | only_utf8 = Converter("UTF-8", "UTF-8") |
paul@380 | 6 | to_utf8 = Converter("ISO-8859-1", "UTF-8") |
paul@380 | 7 | to_utf16 = Converter("ISO-8859-1", "UTF-16") |
paul@380 | 8 | from_utf8 = Converter("UTF-8", "ISO-8859-1") |
paul@380 | 9 | from_utf16 = Converter("UTF-16", "ISO-8859-1") |
paul@380 | 10 | |
paul@380 | 11 | try: |
paul@408 | 12 | iso = b"???" |
paul@386 | 13 | print iso # ??? |
paul@386 | 14 | to_utf8.feed(iso) |
paul@386 | 15 | utf8 = str(to_utf8) |
paul@386 | 16 | print utf8 # ?????? |
paul@386 | 17 | from_utf8.feed(utf8) |
paul@386 | 18 | print str(from_utf8) # ??? |
paul@386 | 19 | to_utf16.feed(iso) |
paul@386 | 20 | utf16 = str(to_utf16) |
paul@386 | 21 | print utf16 # ... |
paul@386 | 22 | from_utf16.feed(utf16) |
paul@386 | 23 | print str(from_utf16) # ??? |
paul@386 | 24 | |
paul@410 | 25 | # Convert UTF-8 to UTF-8. |
paul@410 | 26 | |
paul@410 | 27 | only_utf8.feed(utf8) |
paul@410 | 28 | utf8_2 = str(only_utf8) |
paul@410 | 29 | print utf8_2 # ?????? |
paul@410 | 30 | |
paul@386 | 31 | # Convert part of a UTF-16 sequence, then convert the remainder, then obtain |
paul@386 | 32 | # the result. |
paul@386 | 33 | |
paul@386 | 34 | first = utf16[:3] |
paul@386 | 35 | second = utf16[3:] |
paul@386 | 36 | |
paul@386 | 37 | from_utf16.reset() |
paul@386 | 38 | print "first:", first # ... |
paul@386 | 39 | from_utf16.feed(first) # should have handled an incomplete input |
paul@386 | 40 | print "second:", second # ... |
paul@386 | 41 | from_utf16.feed(second) # should have handled the complete input |
paul@386 | 42 | print str(from_utf16) # ??? |
paul@386 | 43 | |
paul@386 | 44 | # Convert part of a UTF-8 sequence, then the remainder, then get the result. |
paul@386 | 45 | |
paul@386 | 46 | first = utf8[:3] |
paul@386 | 47 | second = utf8[3:] |
paul@386 | 48 | |
paul@386 | 49 | from_utf8.reset() |
paul@386 | 50 | print "first:", first # ??? |
paul@386 | 51 | from_utf8.feed(first) # should have handled an incomplete input |
paul@386 | 52 | print "second:", second # ??? |
paul@386 | 53 | from_utf8.feed(second) # should have handled the complete input |
paul@386 | 54 | print str(from_utf8) # ??? |
paul@386 | 55 | |
paul@387 | 56 | # Attempt to convert ISO-8859-1 characters as if they were UTF-8. |
paul@387 | 57 | |
paul@387 | 58 | from_utf8.reset() |
paul@387 | 59 | |
paul@387 | 60 | try: |
paul@387 | 61 | from_utf8.feed(iso) # should raise an exception |
paul@410 | 62 | except UnicodeDecodeError, exc: |
paul@410 | 63 | print "Not UTF-8 input:", exc.value |
paul@387 | 64 | except OSError, exc: |
paul@410 | 65 | print "OSError:", exc.value |
paul@387 | 66 | |
paul@387 | 67 | print str(from_utf8) # |
paul@387 | 68 | |
paul@387 | 69 | # Attempt to convert ISO-8859-1 characters following some UTF-8 ones. |
paul@387 | 70 | |
paul@387 | 71 | to_utf8.reset() |
paul@387 | 72 | to_utf8.feed("???") |
paul@387 | 73 | utf8_2 = str(to_utf8) |
paul@387 | 74 | |
paul@387 | 75 | from_utf8.reset() |
paul@387 | 76 | |
paul@387 | 77 | try: |
paul@387 | 78 | from_utf8.feed(utf8_2 + iso) # should raise an exception |
paul@410 | 79 | except UnicodeDecodeError, exc: |
paul@410 | 80 | print "Not UTF-8 input:", exc.value |
paul@387 | 81 | except OSError, exc: |
paul@410 | 82 | print "OSError:", exc.value |
paul@387 | 83 | |
paul@387 | 84 | print str(from_utf8) # |
paul@387 | 85 | |
paul@380 | 86 | finally: |
paul@380 | 87 | to_utf8.close() |
paul@380 | 88 | to_utf16.close() |
paul@380 | 89 | from_utf8.close() |
paul@380 | 90 | from_utf16.close() |
paul@380 | 91 | |
paul@380 | 92 | try: |
paul@380 | 93 | Converter("horses", "giraffes") |
paul@380 | 94 | except OSError, exc: |
paul@380 | 95 | print 'Converter("horses", "giraffes"): not valid encodings; error is', exc.value |