# HG changeset patch # User Paul Boddie # Date 1481665708 -3600 # Node ID 87ba8cc4108297dc58f0595d20f212293ba1728c # Parent 0be168e888a9d4d66b363354585afd5e222182ba Make sure that Unicode objects result from text-only concatenation. diff -r 0be168e888a9 -r 87ba8cc41082 lib/__builtins__/unicode.py --- a/lib/__builtins__/unicode.py Tue Dec 13 22:46:52 2016 +0100 +++ b/lib/__builtins__/unicode.py Tue Dec 13 22:48:28 2016 +0100 @@ -77,6 +77,29 @@ else: return op(other.__data__, self.__data__) + def _convert(self, result, other): + + "Convert 'result' to a Unicode object if 'other' already is." + + if _isinstance(other, utf8string): + return utf8string(result, self.encoding) + else: + return result + + def __iadd__(self, other): + + "Return a string combining this string with 'other'." + + return self._convert(self._binary_op(str_add, other), other) + + __add__ = __iadd__ + + def __radd__(self, other): + + "Return a string combining this string with 'other'." + + return self._convert(self._binary_op_rev(str_add, other), other) + def encode(self, encoding=None): """ diff -r 0be168e888a9 -r 87ba8cc41082 tests/unicode.py --- a/tests/unicode.py Tue Dec 13 22:46:52 2016 +0100 +++ b/tests/unicode.py Tue Dec 13 22:48:28 2016 +0100 @@ -14,35 +14,56 @@ u = unicode("æøå", "ISO-8859-1") print u # æøå print u.encode("ISO-8859-1") # æøå +print u.encoding # ISO-8859-1 # Explicitly from Unicode literals. u2 = u"æøå" print u2 # æøå print u2.encode("ISO-8859-1") # æøå +print u2.encoding # ISO-8859-1 # Implicitly from string literals. #u3 = "æøå" #print u3 # æøå #print u3.encode("ISO-8859-1") # æøå +#print u3.encoding # ISO-8859-1 # Combine bytes and text. # The text should be decoded. su = s + u print su # ÆØÅæøå +print su.__class__ # __builtins__.str.string # Combine text and bytes. # The text should be decoded. us = u + s print us # æøåÆØÅ +print us.__class__ # __builtins__.str.string + +# Combine text and text. + +uu2 = u + u2 +print uu2 # æøå +print uu2.__class__ # __builtins__.unicode.utf8string +print uu2.encoding # ISO-8859-1 # Inspect and update the encoding of stdout. +# Note that su and us are byte strings and are not recoded. print sys.stdout.encoding # None + sys.stdout.encoding = "ISO-8859-1" +print sys.stdout.encoding # ISO-8859-1 print u # æøå print su # ÆØÅæøå print us # æøåÆØÅ + +sys.stdout.encoding = "UTF-8" +print sys.stdout.encoding # UTF-8 +print u # æøå +print su # ÆØÅæøå +print us # æøåÆØÅ