1.1 --- a/lib/__builtins__/str.py Sat Feb 04 00:10:47 2017 +0100
1.2 +++ b/lib/__builtins__/str.py Sat Feb 04 00:12:06 2017 +0100
1.3 @@ -127,13 +127,7 @@
1.4 elif c == "\r":
1.5 b.append("\\r")
1.6 else:
1.7 - if n < 0:
1.8 - n += 256
1.9 - b.append("\\x")
1.10 - x = hex(n, "")
1.11 - if len(x) < 2:
1.12 - b.append("0")
1.13 - b.append(x)
1.14 + self._quote_value(b, n)
1.15
1.16 i += 1
1.17 last = i
1.18 @@ -144,6 +138,18 @@
1.19 b.append(quote)
1.20 return str(b)
1.21
1.22 + def _quote_value(self, b, n):
1.23 +
1.24 + "Append to 'b' the quoted form of 'n'."
1.25 +
1.26 + if n < 0:
1.27 + n += 256
1.28 + b.append("\\x")
1.29 + x = hex(n, "")
1.30 + if len(x) < 2:
1.31 + b.append("0")
1.32 + b.append(x)
1.33 +
1.34 def bytelength(self):
1.35
1.36 "Return the number of bytes in this string."
2.1 --- a/lib/__builtins__/unicode.py Sat Feb 04 00:10:47 2017 +0100
2.2 +++ b/lib/__builtins__/unicode.py Sat Feb 04 00:12:06 2017 +0100
2.3 @@ -89,6 +89,34 @@
2.4 else:
2.5 return result
2.6
2.7 + def _quote_value(self, b, n):
2.8 +
2.9 + "Append to 'b' the quoted form of 'n'."
2.10 +
2.11 + if n < 0:
2.12 + n += 256
2.13 +
2.14 + if n > 0xffff:
2.15 + b.append("\\U")
2.16 + digits = 8
2.17 + elif n > 0xff:
2.18 + b.append("\\u")
2.19 + digits = 4
2.20 + else:
2.21 + b.append("\\x")
2.22 + digits = 2
2.23 +
2.24 + x = hex(n, "")
2.25 + i = len(x)
2.26 +
2.27 + while i < digits:
2.28 + b.append("0")
2.29 + i += 1
2.30 +
2.31 + b.append(x)
2.32 +
2.33 + # Operator methods.
2.34 +
2.35 def __iadd__(self, other):
2.36
2.37 "Return a string combining this string with 'other'."
3.1 --- a/tests/unicode.py Sat Feb 04 00:10:47 2017 +0100
3.2 +++ b/tests/unicode.py Sat Feb 04 00:12:06 2017 +0100
3.3 @@ -1,22 +1,22 @@
3.4 -# -*- coding: ISO-8859-1 -*-
3.5 +# -*- coding: ISO-8859-15 -*-
3.6
3.7 import sys
3.8
3.9 # Print bytes.
3.10
3.11 s = b"ÆØÅ"
3.12 -print "ISO-8859-1 values:"
3.13 +print "ISO-8859-15 values:"
3.14 print s # ÆØÅ
3.15 print len(s) # 3
3.16
3.17 s2 = b"\xe6\xf8\xe5"
3.18 -print "ISO-8859-1 values:"
3.19 +print "ISO-8859-15 values:"
3.20 print s2 # æøå
3.21 print s2.__class__ # __builtins__.str.string
3.22 print len(s2) # 3
3.23
3.24 s3 = "\xe6\xf8\xe5"
3.25 -print "ISO-8859-1 values:"
3.26 +print "ISO-8859-15 values:"
3.27 print s3 # æøå
3.28 print s3.__class__ # __builtins__.str.string
3.29 print len(s3) # 3
3.30 @@ -28,13 +28,13 @@
3.31 print len(s4) # 18
3.32
3.33 s5 = b"\346\370\345"
3.34 -print "ISO-8859-1 values:"
3.35 +print "ISO-8859-15 values:"
3.36 print s5 # æøå
3.37 print s5.__class__ # __builtins__.str.string
3.38 print len(s5) # 3
3.39
3.40 s6 = "\346\370\345"
3.41 -print "ISO-8859-1 values:"
3.42 +print "ISO-8859-15 values:"
3.43 print s6 # æøå
3.44 print s6.__class__ # __builtins__.str.string
3.45 print len(s6) # 3
3.46 @@ -49,12 +49,12 @@
3.47
3.48 # Explicitly from bytes.
3.49
3.50 -u = unicode(b"æøå", "ISO-8859-1")
3.51 +u = unicode(b"æøå", "ISO-8859-15")
3.52 print "Unicode values:"
3.53 print u # æøå
3.54 print u.__class__ # __builtins__.unicode.utf8string
3.55 -print u.encode("ISO-8859-1") # æøå
3.56 -print u.encoding # ISO-8859-1
3.57 +print u.encode("ISO-8859-15") # æøå
3.58 +print u.encoding # ISO-8859-15
3.59 print len(u) # 3
3.60
3.61 # Explicitly from Unicode literals.
3.62 @@ -63,8 +63,8 @@
3.63 print "Unicode values:"
3.64 print u2 # æøå
3.65 print u2.__class__ # __builtins__.unicode.utf8string
3.66 -print u2.encode("ISO-8859-1") # æøå
3.67 -print u2.encoding # ISO-8859-1
3.68 +print u2.encode("ISO-8859-15") # æøå
3.69 +print u2.encoding # ISO-8859-15
3.70 print len(u2) # 3
3.71
3.72 # Implicitly from string literals.
3.73 @@ -73,18 +73,18 @@
3.74 print "Unicode values:"
3.75 print u3 # æøå
3.76 print u3.__class__ # __builtins__.unicode.utf8string
3.77 -print u3.encode("ISO-8859-1") # æøå
3.78 -print u3.encoding # ISO-8859-1
3.79 +print u3.encode("ISO-8859-15") # æøå
3.80 +print u3.encoding # ISO-8859-15
3.81 print len(u3) # 3
3.82
3.83 # Explicitly from implicitly-converted literal.
3.84
3.85 -u4 = unicode("æøå", "ISO-8859-1")
3.86 +u4 = unicode("æøå", "ISO-8859-15")
3.87 print "Unicode values:"
3.88 print u4 # æøå
3.89 print u4.__class__ # __builtins__.unicode.utf8string
3.90 -print u4.encode("ISO-8859-1") # æøå
3.91 -print u4.encoding # ISO-8859-1
3.92 +print u4.encode("ISO-8859-15") # æøå
3.93 +print u4.encoding # ISO-8859-15
3.94 print len(u4) # 3
3.95
3.96 # Test Unicode values.
3.97 @@ -130,7 +130,7 @@
3.98 # The text should be decoded.
3.99
3.100 su = s + u
3.101 -print "ISO-8859-1 values:"
3.102 +print "ISO-8859-15 values:"
3.103 print su # ÆØÅæøå
3.104 print su.__class__ # __builtins__.str.string
3.105 print len(su) # 6
3.106 @@ -139,7 +139,7 @@
3.107 # The text should be decoded.
3.108
3.109 us = u + s
3.110 -print "ISO-8859-1 values:"
3.111 +print "ISO-8859-15 values:"
3.112 print us # æøåÆØÅ
3.113 print us.__class__ # __builtins__.str.string
3.114 print len(us) # 6
3.115 @@ -150,7 +150,7 @@
3.116 print "Unicode values:"
3.117 print uu2 # æøåæøå
3.118 print uu2.__class__ # __builtins__.unicode.utf8string
3.119 -print uu2.encoding # ISO-8859-1
3.120 +print uu2.encoding # ISO-8859-15
3.121 print len(uu2) # 6
3.122
3.123 # Inspect and update the encoding of stdout.
3.124 @@ -159,9 +159,9 @@
3.125 print sys.stdout # <libc.io.sysstream instance>
3.126 print sys.stdout.encoding # None
3.127
3.128 -sys.stdout.encoding = "ISO-8859-1"
3.129 -print "ISO-8859-1 and Unicode values as ISO-8859-1:"
3.130 -print sys.stdout.encoding # ISO-8859-1
3.131 +sys.stdout.encoding = "ISO-8859-15"
3.132 +print "ISO-8859-15 and Unicode values as ISO-8859-15:"
3.133 +print sys.stdout.encoding # ISO-8859-15
3.134 print u # æøå
3.135 print su # ÆØÅæøå
3.136 print us # æøåÆØÅ
3.137 @@ -170,19 +170,19 @@
3.138 print "Unicode values as UTF-8:"
3.139 print sys.stdout.encoding # UTF-8
3.140 print u # æøå
3.141 -print "ISO-8859-1 values bypassing UTF-8 output encoding:"
3.142 +print "ISO-8859-15 values bypassing UTF-8 output encoding:"
3.143 print su # ÆØÅæøå
3.144 print us # æøåÆØÅ
3.145
3.146 # Reset the encoding.
3.147
3.148 -sys.stdout.encoding = "ISO-8859-1"
3.149 +sys.stdout.encoding = "ISO-8859-15"
3.150
3.151 # Test character access.
3.152
3.153 u0 = u[0]
3.154 print u0.__class__ # __builtins__.unicode.utf8string
3.155 -print u0.encoding # ISO-8859-1
3.156 +print u0.encoding # ISO-8859-15
3.157 print u0 # æ
3.158 print u[-1] # å
3.159 print len(u[0]) # 1
3.160 @@ -200,3 +200,9 @@
3.161 print ord(u) # should raise an exception
3.162 except ValueError, exc:
3.163 print "ord(u): value is not appropriate", repr(exc.value)
3.164 +
3.165 +euro = "¤"
3.166 +print euro # ¤
3.167 +print repr(euro) # "\u20ac"
3.168 +print ord(euro) # 8364
3.169 +print "\u20ac" # ¤