# HG changeset patch # User Paul Boddie # Date 1481814541 -3600 # Node ID 0c59d603a56928d0fd42455ab4dc2ad70beb61f7 # Parent 0c718cccf00fa9d34470bfa35546067763cf2bae Raise UnicodeDecodeError instead of propagating OSError with EILSEQ from iconv. diff -r 0c718cccf00f -r 0c59d603a569 lib/__builtins__/exception/unicode.py --- a/lib/__builtins__/exception/unicode.py Thu Dec 15 01:40:31 2016 +0100 +++ b/lib/__builtins__/exception/unicode.py Thu Dec 15 16:09:01 2016 +0100 @@ -3,7 +3,7 @@ """ Unicode exception objects. -Copyright (C) 2015 Paul Boddie +Copyright (C) 2015, 2016 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -19,7 +19,22 @@ this program. If not, see . """ -class UnicodeDecodeError(Exception): pass +class UnicodeDecodeError(Exception): + + """ + An exception indicating a failure to interpret a byte sequence according to + a character encoding. + """ + + def __init__(self, value): + + """ + Initialise an exception with a 'value' providing the illegal byte + sequence responsible for the error. + """ + + self.value = value + class UnicodeEncodeError(Exception): pass class UnicodeError(Exception): pass class UnicodeTranslateError(Exception): pass diff -r 0c718cccf00f -r 0c59d603a569 lib/__builtins__/unicode.py --- a/lib/__builtins__/unicode.py Thu Dec 15 01:40:31 2016 +0100 +++ b/lib/__builtins__/unicode.py Thu Dec 15 16:09:01 2016 +0100 @@ -172,7 +172,8 @@ s = s.__str__() - # Convert the string to UTF-8. + # Convert the string to UTF-8. Even if the stated encoding is UTF-8, it + # needs to be validated. to_utf8 = Converter(encoding, "UTF-8") diff -r 0c718cccf00f -r 0c59d603a569 lib/posix/iconv.py --- a/lib/posix/iconv.py Thu Dec 15 01:40:31 2016 +0100 +++ b/lib/posix/iconv.py Thu Dec 15 16:09:01 2016 +0100 @@ -63,7 +63,7 @@ def feed(self, s): - "Feed 's' to the converter." + "Feed 's' to the converter, converting its byte representation." if self.__data__ is None: raise ConverterError @@ -90,6 +90,8 @@ if exc.value == EINVAL: self.result.append(exc.arg) return + elif exc.value == EILSEQ: + raise UnicodeDecodeError(exc.arg) else: raise diff -r 0c718cccf00f -r 0c59d603a569 tests/iconv.py --- a/tests/iconv.py Thu Dec 15 01:40:31 2016 +0100 +++ b/tests/iconv.py Thu Dec 15 16:09:01 2016 +0100 @@ -1,7 +1,8 @@ # -*- coding: ISO-8859-1 -*- -from posix.iconv import Converter, EILSEQ +from posix.iconv import Converter +only_utf8 = Converter("UTF-8", "UTF-8") to_utf8 = Converter("ISO-8859-1", "UTF-8") to_utf16 = Converter("ISO-8859-1", "UTF-16") from_utf8 = Converter("UTF-8", "ISO-8859-1") @@ -21,6 +22,12 @@ from_utf16.feed(utf16) print str(from_utf16) # æøå + # Convert UTF-8 to UTF-8. + + only_utf8.feed(utf8) + utf8_2 = str(only_utf8) + print utf8_2 # æøå + # Convert part of a UTF-16 sequence, then convert the remainder, then obtain # the result. @@ -52,11 +59,10 @@ try: from_utf8.feed(iso) # should raise an exception + except UnicodeDecodeError, exc: + print "Not UTF-8 input:", exc.value except OSError, exc: - if exc.value == EILSEQ: - print "Not UTF-8 input:", exc.arg - else: - print "OSError:", exc.value + print "OSError:", exc.value print str(from_utf8) # @@ -70,11 +76,10 @@ try: from_utf8.feed(utf8_2 + iso) # should raise an exception + except UnicodeDecodeError, exc: + print "Not UTF-8 input:", exc.value except OSError, exc: - if exc.value == EILSEQ: - print "Not UTF-8 input:", exc.arg - else: - print "OSError:", exc.value + print "OSError:", exc.value print str(from_utf8) # diff -r 0c718cccf00f -r 0c59d603a569 tests/unicode.py --- a/tests/unicode.py Thu Dec 15 01:40:31 2016 +0100 +++ b/tests/unicode.py Thu Dec 15 16:09:01 2016 +0100 @@ -37,6 +37,13 @@ print u3.encoding # ISO-8859-1 print len(u3) # 3 +# Test invalid sequences. + +try: + u4 = unicode(s, "UTF-8") +except UnicodeDecodeError, exc: + print "Attempt to decode", s, "as UTF-8 failed." + # Combine bytes and text. # The text should be decoded.