# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1481814541 -3600
# Node ID 0c59d603a56928d0fd42455ab4dc2ad70beb61f7
# Parent  0c718cccf00fa9d34470bfa35546067763cf2bae
Raise UnicodeDecodeError instead of propagating OSError with EILSEQ from iconv.

diff -r 0c718cccf00f -r 0c59d603a569 lib/__builtins__/exception/unicode.py
--- a/lib/__builtins__/exception/unicode.py	Thu Dec 15 01:40:31 2016 +0100
+++ b/lib/__builtins__/exception/unicode.py	Thu Dec 15 16:09:01 2016 +0100
@@ -3,7 +3,7 @@
 """
 Unicode exception objects.
 
-Copyright (C) 2015 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2015, 2016 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -19,7 +19,22 @@
 this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 
-class UnicodeDecodeError(Exception): pass
+class UnicodeDecodeError(Exception):
+
+    """
+    An exception indicating a failure to interpret a byte sequence according to
+    a character encoding.
+    """
+
+    def __init__(self, value):
+
+        """
+        Initialise an exception with a 'value' providing the illegal byte
+        sequence responsible for the error.
+        """
+
+        self.value = value
+
 class UnicodeEncodeError(Exception): pass
 class UnicodeError(Exception): pass
 class UnicodeTranslateError(Exception): pass
diff -r 0c718cccf00f -r 0c59d603a569 lib/__builtins__/unicode.py
--- a/lib/__builtins__/unicode.py	Thu Dec 15 01:40:31 2016 +0100
+++ b/lib/__builtins__/unicode.py	Thu Dec 15 16:09:01 2016 +0100
@@ -172,7 +172,8 @@
 
     s = s.__str__()
 
-    # Convert the string to UTF-8.
+    # Convert the string to UTF-8. Even if the stated encoding is UTF-8, it
+    # needs to be validated.
 
     to_utf8 = Converter(encoding, "UTF-8")
 
diff -r 0c718cccf00f -r 0c59d603a569 lib/posix/iconv.py
--- a/lib/posix/iconv.py	Thu Dec 15 01:40:31 2016 +0100
+++ b/lib/posix/iconv.py	Thu Dec 15 16:09:01 2016 +0100
@@ -63,7 +63,7 @@
 
     def feed(self, s):
 
-        "Feed 's' to the converter."
+        "Feed 's' to the converter, converting its byte representation."
 
         if self.__data__ is None:
             raise ConverterError
@@ -90,6 +90,8 @@
                 if exc.value == EINVAL:
                     self.result.append(exc.arg)
                     return
+                elif exc.value == EILSEQ:
+                    raise UnicodeDecodeError(exc.arg)
                 else:
                     raise
 
diff -r 0c718cccf00f -r 0c59d603a569 tests/iconv.py
--- a/tests/iconv.py	Thu Dec 15 01:40:31 2016 +0100
+++ b/tests/iconv.py	Thu Dec 15 16:09:01 2016 +0100
@@ -1,7 +1,8 @@
 # -*- coding: ISO-8859-1 -*-
 
-from posix.iconv import Converter, EILSEQ
+from posix.iconv import Converter
 
+only_utf8 = Converter("UTF-8", "UTF-8")
 to_utf8 = Converter("ISO-8859-1", "UTF-8")
 to_utf16 = Converter("ISO-8859-1", "UTF-16")
 from_utf8 = Converter("UTF-8", "ISO-8859-1")
@@ -21,6 +22,12 @@
     from_utf16.feed(utf16)
     print str(from_utf16)               # цјх
 
+    # Convert UTF-8 to UTF-8.
+
+    only_utf8.feed(utf8)
+    utf8_2 = str(only_utf8)
+    print utf8_2                        # УІУИУЅ
+
     # Convert part of a UTF-16 sequence, then convert the remainder, then obtain
     # the result.
 
@@ -52,11 +59,10 @@
 
     try:
         from_utf8.feed(iso)             # should raise an exception
+    except UnicodeDecodeError, exc:
+        print "Not UTF-8 input:", exc.value
     except OSError, exc:
-        if exc.value == EILSEQ:
-            print "Not UTF-8 input:", exc.arg
-        else:
-            print "OSError:", exc.value
+        print "OSError:", exc.value
 
     print str(from_utf8)                #
 
@@ -70,11 +76,10 @@
 
     try:
         from_utf8.feed(utf8_2 + iso)    # should raise an exception
+    except UnicodeDecodeError, exc:
+        print "Not UTF-8 input:", exc.value
     except OSError, exc:
-        if exc.value == EILSEQ:
-            print "Not UTF-8 input:", exc.arg
-        else:
-            print "OSError:", exc.value
+        print "OSError:", exc.value
 
     print str(from_utf8)                #
 
diff -r 0c718cccf00f -r 0c59d603a569 tests/unicode.py
--- a/tests/unicode.py	Thu Dec 15 01:40:31 2016 +0100
+++ b/tests/unicode.py	Thu Dec 15 16:09:01 2016 +0100
@@ -37,6 +37,13 @@
 print u3.encoding                   # ISO-8859-1
 print len(u3)                       # 3
 
+# Test invalid sequences.
+
+try:
+    u4 = unicode(s, "UTF-8")
+except UnicodeDecodeError, exc:
+    print "Attempt to decode", s, "as UTF-8 failed."
+
 # Combine bytes and text.
 # The text should be decoded.