paul@380 | 1 | # -*- coding: ISO-8859-1 -*- |
paul@380 | 2 | |
paul@387 | 3 | from posix.iconv import Converter, EILSEQ |
paul@380 | 4 | |
paul@380 | 5 | to_utf8 = Converter("ISO-8859-1", "UTF-8") |
paul@380 | 6 | to_utf16 = Converter("ISO-8859-1", "UTF-16") |
paul@380 | 7 | from_utf8 = Converter("UTF-8", "ISO-8859-1") |
paul@380 | 8 | from_utf16 = Converter("UTF-16", "ISO-8859-1") |
paul@380 | 9 | |
paul@380 | 10 | try: |
paul@408 | 11 | iso = b"???" |
paul@386 | 12 | print iso # ??? |
paul@386 | 13 | to_utf8.feed(iso) |
paul@386 | 14 | utf8 = str(to_utf8) |
paul@386 | 15 | print utf8 # ?????? |
paul@386 | 16 | from_utf8.feed(utf8) |
paul@386 | 17 | print str(from_utf8) # ??? |
paul@386 | 18 | to_utf16.feed(iso) |
paul@386 | 19 | utf16 = str(to_utf16) |
paul@386 | 20 | print utf16 # ... |
paul@386 | 21 | from_utf16.feed(utf16) |
paul@386 | 22 | print str(from_utf16) # ??? |
paul@386 | 23 | |
paul@386 | 24 | # Convert part of a UTF-16 sequence, then convert the remainder, then obtain |
paul@386 | 25 | # the result. |
paul@386 | 26 | |
paul@386 | 27 | first = utf16[:3] |
paul@386 | 28 | second = utf16[3:] |
paul@386 | 29 | |
paul@386 | 30 | from_utf16.reset() |
paul@386 | 31 | print "first:", first # ... |
paul@386 | 32 | from_utf16.feed(first) # should have handled an incomplete input |
paul@386 | 33 | print "second:", second # ... |
paul@386 | 34 | from_utf16.feed(second) # should have handled the complete input |
paul@386 | 35 | print str(from_utf16) # ??? |
paul@386 | 36 | |
paul@386 | 37 | # Convert part of a UTF-8 sequence, then the remainder, then get the result. |
paul@386 | 38 | |
paul@386 | 39 | first = utf8[:3] |
paul@386 | 40 | second = utf8[3:] |
paul@386 | 41 | |
paul@386 | 42 | from_utf8.reset() |
paul@386 | 43 | print "first:", first # ??? |
paul@386 | 44 | from_utf8.feed(first) # should have handled an incomplete input |
paul@386 | 45 | print "second:", second # ??? |
paul@386 | 46 | from_utf8.feed(second) # should have handled the complete input |
paul@386 | 47 | print str(from_utf8) # ??? |
paul@386 | 48 | |
paul@387 | 49 | # Attempt to convert ISO-8859-1 characters as if they were UTF-8. |
paul@387 | 50 | |
paul@387 | 51 | from_utf8.reset() |
paul@387 | 52 | |
paul@387 | 53 | try: |
paul@387 | 54 | from_utf8.feed(iso) # should raise an exception |
paul@387 | 55 | except OSError, exc: |
paul@387 | 56 | if exc.value == EILSEQ: |
paul@387 | 57 | print "Not UTF-8 input:", exc.arg |
paul@387 | 58 | else: |
paul@387 | 59 | print "OSError:", exc.value |
paul@387 | 60 | |
paul@387 | 61 | print str(from_utf8) # |
paul@387 | 62 | |
paul@387 | 63 | # Attempt to convert ISO-8859-1 characters following some UTF-8 ones. |
paul@387 | 64 | |
paul@387 | 65 | to_utf8.reset() |
paul@387 | 66 | to_utf8.feed("???") |
paul@387 | 67 | utf8_2 = str(to_utf8) |
paul@387 | 68 | |
paul@387 | 69 | from_utf8.reset() |
paul@387 | 70 | |
paul@387 | 71 | try: |
paul@387 | 72 | from_utf8.feed(utf8_2 + iso) # should raise an exception |
paul@387 | 73 | except OSError, exc: |
paul@387 | 74 | if exc.value == EILSEQ: |
paul@387 | 75 | print "Not UTF-8 input:", exc.arg |
paul@387 | 76 | else: |
paul@387 | 77 | print "OSError:", exc.value |
paul@387 | 78 | |
paul@387 | 79 | print str(from_utf8) # |
paul@387 | 80 | |
paul@380 | 81 | finally: |
paul@380 | 82 | to_utf8.close() |
paul@380 | 83 | to_utf16.close() |
paul@380 | 84 | from_utf8.close() |
paul@380 | 85 | from_utf16.close() |
paul@380 | 86 | |
paul@380 | 87 | try: |
paul@380 | 88 | Converter("horses", "giraffes") |
paul@380 | 89 | except OSError, exc: |
paul@380 | 90 | print 'Converter("horses", "giraffes"): not valid encodings; error is', exc.value |