1.1 --- a/lib/__builtins__/str.py Tue Dec 13 17:58:26 2016 +0100
1.2 +++ b/lib/__builtins__/str.py Tue Dec 13 19:19:23 2016 +0100
1.3 @@ -74,12 +74,31 @@
1.4
1.5 def _binary_op(self, op, other):
1.6
1.7 - "Perform 'op' on this int and 'other' if appropriate."
1.8 + "Perform 'op' on this object and 'other' if appropriate."
1.9 +
1.10 + # Refuse to operate on specialisations of this class.
1.11 +
1.12 + if self.__class__ is not other.__class__:
1.13 + return NotImplemented
1.14 +
1.15 + # Otherwise, perform the operation on the operands' data.
1.16 +
1.17 + else:
1.18 + return op(self.__data__, other.__data__)
1.19
1.20 - if isinstance(other, basestring):
1.21 - return op(self.__data__, other.__data__)
1.22 + def _binary_op_rev(self, op, other):
1.23 +
1.24 + "Perform 'op' on 'other' and this object if appropriate."
1.25 +
1.26 + # Refuse to operate on specialisations of this class.
1.27 +
1.28 + if self.__class__ is not other.__class__:
1.29 + return NotImplemented
1.30 +
1.31 + # Otherwise, perform the operation on the operands' data.
1.32 +
1.33 else:
1.34 - return NotImplemented
1.35 + return op(other.__data__, self.__data__)
1.36
1.37 def __iadd__(self, other):
1.38
1.39 @@ -87,7 +106,13 @@
1.40
1.41 return self._binary_op(str_add, other)
1.42
1.43 - __add__ = __radd__ = __iadd__
1.44 + __add__ = __iadd__
1.45 +
1.46 + def __radd__(self, other):
1.47 +
1.48 + "Return a string combining this string with 'other'."
1.49 +
1.50 + return self._binary_op_rev(str_add, other)
1.51
1.52 def __mul__(self, other): pass
1.53 def __rmul__(self, other): pass
2.1 --- a/lib/__builtins__/unicode.py Tue Dec 13 17:58:26 2016 +0100
2.2 +++ b/lib/__builtins__/unicode.py Tue Dec 13 19:19:23 2016 +0100
2.3 @@ -21,14 +21,72 @@
2.4
2.5 from __builtins__.str import basestring
2.6 from posix.iconv import Converter
2.7 +from native import str_add, isinstance as _isinstance
2.8
2.9 class utf8string(basestring):
2.10
2.11 "A character string representation based on UTF-8."
2.12
2.13 - def encode(self, encoding):
2.14 + def __init__(self, other=None, encoding=None):
2.15 +
2.16 + """
2.17 + Initialise the string, perhaps from 'other', with any original
2.18 + 'encoding' indicated.
2.19 + """
2.20 +
2.21 + get_using(basestring.__init__, self)(other)
2.22 + self.encoding = encoding
2.23 +
2.24 + def _binary_op(self, op, other):
2.25 +
2.26 + "Perform 'op' on this object and 'other' if appropriate."
2.27 +
2.28 + # Reject non-strings.
2.29 +
2.30 + if not _isinstance(other, basestring):
2.31 + return NotImplemented
2.32 +
2.33 + # Combining text with bytes.
2.34 +
2.35 + elif not _isinstance(other, utf8string):
2.36 + s = self.encode()
2.37 + return op(s.__data__, other.__data__)
2.38 +
2.39 + # Otherwise, perform the operation on the operands' data.
2.40 +
2.41 + else:
2.42 + return op(self.__data__, other.__data__)
2.43
2.44 - "Encode the string to the given 'encoding'."
2.45 + def _binary_op_rev(self, op, other):
2.46 +
2.47 + "Perform 'op' on 'other' and this object if appropriate."
2.48 +
2.49 + # Reject non-strings.
2.50 +
2.51 + if not _isinstance(other, basestring):
2.52 + return NotImplemented
2.53 +
2.54 + # Combining text with bytes.
2.55 +
2.56 + elif not _isinstance(other, utf8string):
2.57 + s = self.encode()
2.58 + return op(other.__data__, s.__data__)
2.59 +
2.60 + # Otherwise, perform the operation on the operands' data.
2.61 +
2.62 + else:
2.63 + return op(other.__data__, self.__data__)
2.64 +
2.65 + def encode(self, encoding=None):
2.66 +
2.67 + """
2.68 + Encode the string to the given 'encoding' or any original encoding if
2.69 + omitted.
2.70 + """
2.71 +
2.72 + encoding = encoding or self.encoding
2.73 + if not encoding:
2.74 + return self
2.75
2.76 from_utf8 = Converter("UTF-8", encoding)
2.77
2.78 @@ -56,7 +114,7 @@
2.79
2.80 try:
2.81 to_utf8.feed(s)
2.82 - return utf8string(str(to_utf8))
2.83 + return utf8string(str(to_utf8), encoding)
2.84
2.85 finally:
2.86 to_utf8.close()
3.1 --- a/tests/unicode.py Tue Dec 13 17:58:26 2016 +0100
3.2 +++ b/tests/unicode.py Tue Dec 13 19:19:23 2016 +0100
3.3 @@ -4,14 +4,14 @@
3.4
3.5 # Print bytes.
3.6
3.7 -s = b"æøå"
3.8 -print s # æøå
3.9 +s = b"ÆØÅ"
3.10 +print s # ÆØÅ
3.11
3.12 # Obtain text and print it.
3.13
3.14 # Explicitly from bytes.
3.15
3.16 -u = unicode(s, "ISO-8859-1")
3.17 +u = unicode("æøå", "ISO-8859-1")
3.18 print u # æøå
3.19 print u.encode("ISO-8859-1") # æøå
3.20
3.21 @@ -27,8 +27,22 @@
3.22 #print u3 # æøå
3.23 #print u3.encode("ISO-8859-1") # æøå
3.24
3.25 +# Combine bytes and text.
3.26 +# The text should be decoded.
3.27 +
3.28 +su = s + u
3.29 +print su # ÆØÅæøå
3.30 +
3.31 +# Combine text and bytes.
3.32 +# The text should be decoded.
3.33 +
3.34 +us = u + s
3.35 +print us # æøåÆØÅ
3.36 +
3.37 # Inspect and update the encoding of stdout.
3.38
3.39 print sys.stdout.encoding # None
3.40 sys.stdout.encoding = "ISO-8859-1"
3.41 print u # æøå
3.42 +print su # ÆØÅæøå
3.43 +print us # æøåÆØÅ