1.1 --- a/tests/unicode.py Sat Feb 04 00:10:47 2017 +0100
1.2 +++ b/tests/unicode.py Sat Feb 04 00:12:06 2017 +0100
1.3 @@ -1,22 +1,22 @@
1.4 -# -*- coding: ISO-8859-1 -*-
1.5 +# -*- coding: ISO-8859-15 -*-
1.6
1.7 import sys
1.8
1.9 # Print bytes.
1.10
1.11 s = b"ÆØÅ"
1.12 -print "ISO-8859-1 values:"
1.13 +print "ISO-8859-15 values:"
1.14 print s # ÆØÅ
1.15 print len(s) # 3
1.16
1.17 s2 = b"\xe6\xf8\xe5"
1.18 -print "ISO-8859-1 values:"
1.19 +print "ISO-8859-15 values:"
1.20 print s2 # æøå
1.21 print s2.__class__ # __builtins__.str.string
1.22 print len(s2) # 3
1.23
1.24 s3 = "\xe6\xf8\xe5"
1.25 -print "ISO-8859-1 values:"
1.26 +print "ISO-8859-15 values:"
1.27 print s3 # æøå
1.28 print s3.__class__ # __builtins__.str.string
1.29 print len(s3) # 3
1.30 @@ -28,13 +28,13 @@
1.31 print len(s4) # 18
1.32
1.33 s5 = b"\346\370\345"
1.34 -print "ISO-8859-1 values:"
1.35 +print "ISO-8859-15 values:"
1.36 print s5 # æøå
1.37 print s5.__class__ # __builtins__.str.string
1.38 print len(s5) # 3
1.39
1.40 s6 = "\346\370\345"
1.41 -print "ISO-8859-1 values:"
1.42 +print "ISO-8859-15 values:"
1.43 print s6 # æøå
1.44 print s6.__class__ # __builtins__.str.string
1.45 print len(s6) # 3
1.46 @@ -49,12 +49,12 @@
1.47
1.48 # Explicitly from bytes.
1.49
1.50 -u = unicode(b"æøå", "ISO-8859-1")
1.51 +u = unicode(b"æøå", "ISO-8859-15")
1.52 print "Unicode values:"
1.53 print u # æøå
1.54 print u.__class__ # __builtins__.unicode.utf8string
1.55 -print u.encode("ISO-8859-1") # æøå
1.56 -print u.encoding # ISO-8859-1
1.57 +print u.encode("ISO-8859-15") # æøå
1.58 +print u.encoding # ISO-8859-15
1.59 print len(u) # 3
1.60
1.61 # Explicitly from Unicode literals.
1.62 @@ -63,8 +63,8 @@
1.63 print "Unicode values:"
1.64 print u2 # æøå
1.65 print u2.__class__ # __builtins__.unicode.utf8string
1.66 -print u2.encode("ISO-8859-1") # æøå
1.67 -print u2.encoding # ISO-8859-1
1.68 +print u2.encode("ISO-8859-15") # æøå
1.69 +print u2.encoding # ISO-8859-15
1.70 print len(u2) # 3
1.71
1.72 # Implicitly from string literals.
1.73 @@ -73,18 +73,18 @@
1.74 print "Unicode values:"
1.75 print u3 # æøå
1.76 print u3.__class__ # __builtins__.unicode.utf8string
1.77 -print u3.encode("ISO-8859-1") # æøå
1.78 -print u3.encoding # ISO-8859-1
1.79 +print u3.encode("ISO-8859-15") # æøå
1.80 +print u3.encoding # ISO-8859-15
1.81 print len(u3) # 3
1.82
1.83 # Explicitly from implicitly-converted literal.
1.84
1.85 -u4 = unicode("æøå", "ISO-8859-1")
1.86 +u4 = unicode("æøå", "ISO-8859-15")
1.87 print "Unicode values:"
1.88 print u4 # æøå
1.89 print u4.__class__ # __builtins__.unicode.utf8string
1.90 -print u4.encode("ISO-8859-1") # æøå
1.91 -print u4.encoding # ISO-8859-1
1.92 +print u4.encode("ISO-8859-15") # æøå
1.93 +print u4.encoding # ISO-8859-15
1.94 print len(u4) # 3
1.95
1.96 # Test Unicode values.
1.97 @@ -130,7 +130,7 @@
1.98 # The text should be decoded.
1.99
1.100 su = s + u
1.101 -print "ISO-8859-1 values:"
1.102 +print "ISO-8859-15 values:"
1.103 print su # ÆØÅæøå
1.104 print su.__class__ # __builtins__.str.string
1.105 print len(su) # 6
1.106 @@ -139,7 +139,7 @@
1.107 # The text should be decoded.
1.108
1.109 us = u + s
1.110 -print "ISO-8859-1 values:"
1.111 +print "ISO-8859-15 values:"
1.112 print us # æøåÆØÅ
1.113 print us.__class__ # __builtins__.str.string
1.114 print len(us) # 6
1.115 @@ -150,7 +150,7 @@
1.116 print "Unicode values:"
1.117 print uu2 # æøåæøå
1.118 print uu2.__class__ # __builtins__.unicode.utf8string
1.119 -print uu2.encoding # ISO-8859-1
1.120 +print uu2.encoding # ISO-8859-15
1.121 print len(uu2) # 6
1.122
1.123 # Inspect and update the encoding of stdout.
1.124 @@ -159,9 +159,9 @@
1.125 print sys.stdout # <libc.io.sysstream instance>
1.126 print sys.stdout.encoding # None
1.127
1.128 -sys.stdout.encoding = "ISO-8859-1"
1.129 -print "ISO-8859-1 and Unicode values as ISO-8859-1:"
1.130 -print sys.stdout.encoding # ISO-8859-1
1.131 +sys.stdout.encoding = "ISO-8859-15"
1.132 +print "ISO-8859-15 and Unicode values as ISO-8859-15:"
1.133 +print sys.stdout.encoding # ISO-8859-15
1.134 print u # æøå
1.135 print su # ÆØÅæøå
1.136 print us # æøåÆØÅ
1.137 @@ -170,19 +170,19 @@
1.138 print "Unicode values as UTF-8:"
1.139 print sys.stdout.encoding # UTF-8
1.140 print u # æøå
1.141 -print "ISO-8859-1 values bypassing UTF-8 output encoding:"
1.142 +print "ISO-8859-15 values bypassing UTF-8 output encoding:"
1.143 print su # ÆØÅæøå
1.144 print us # æøåÆØÅ
1.145
1.146 # Reset the encoding.
1.147
1.148 -sys.stdout.encoding = "ISO-8859-1"
1.149 +sys.stdout.encoding = "ISO-8859-15"
1.150
1.151 # Test character access.
1.152
1.153 u0 = u[0]
1.154 print u0.__class__ # __builtins__.unicode.utf8string
1.155 -print u0.encoding # ISO-8859-1
1.156 +print u0.encoding # ISO-8859-15
1.157 print u0 # æ
1.158 print u[-1] # å
1.159 print len(u[0]) # 1
1.160 @@ -200,3 +200,9 @@
1.161 print ord(u) # should raise an exception
1.162 except ValueError, exc:
1.163 print "ord(u): value is not appropriate", repr(exc.value)
1.164 +
1.165 +euro = "¤"
1.166 +print euro # ¤
1.167 +print repr(euro) # "\u20ac"
1.168 +print ord(euro) # 8364
1.169 +print "\u20ac" # ¤