1 # -*- coding: ISO-8859-15 -*- 2 3 import sys 4 5 # Print bytes. 6 7 s = b"???" 8 print "ISO-8859-15 values:" 9 print s # ??? 10 print len(s) # 3 11 12 s1 = b"???" \ 13 "???" 14 print "ISO-8859-15 values:" 15 print s1 # ?????? 16 print len(s1) # 6 17 18 s2 = b"\xe6\xf8\xe5" 19 print "ISO-8859-15 values:" 20 print s2 # ??? 21 print s2.__class__ # __builtins__.str.str 22 print len(s2) # 3 23 24 s3 = "\xe6\xf8\xe5" 25 print "ISO-8859-15 values:" 26 print s3 # ??? 27 print s3.__class__ # __builtins__.str.str 28 print len(s3) # 3 29 30 s4 = b"\u00e6\u00f8\u00e5" 31 print "Untranslated values:" 32 print s4 # \u00e6\u00f8\u00e5 33 print s4.__class__ # __builtins__.str.str 34 print len(s4) # 18 35 36 s5 = b"\346\370\345" 37 print "ISO-8859-15 values:" 38 print s5 # ??? 39 print s5.__class__ # __builtins__.str.str 40 print len(s5) # 3 41 42 s6 = "\346\370\345" 43 print "ISO-8859-15 values:" 44 print s6 # ??? 45 print s6.__class__ # __builtins__.str.str 46 print len(s6) # 3 47 48 s7 = r"\346\370\345" 49 print "Untranslated values:" 50 print s7 # \346\370\345 51 print s7.__class__ # __builtins__.unicode.unicode 52 print len(s7) # 12 53 54 # Obtain text and print it. 55 56 # Explicitly from bytes. 57 58 u = unicode(b"???", "ISO-8859-15") 59 print "Unicode values:" 60 print u # ??? 61 print u.__class__ # __builtins__.unicode.unicode 62 print u.encode("ISO-8859-15") # ??? 63 print u.encoding # ISO-8859-15 64 print len(u) # 3 65 66 # Explicitly from Unicode literals. 67 68 u2 = u"???" 69 print "Unicode values:" 70 print u2 # ??? 71 print u2.__class__ # __builtins__.unicode.unicode 72 print u2.encode("ISO-8859-15") # ??? 73 print u2.encoding # ISO-8859-15 74 print len(u2) # 3 75 76 # Implicitly from string literals. 77 78 u3 = "???" 79 print "Unicode values:" 80 print u3 # ??? 81 print u3.__class__ # __builtins__.unicode.unicode 82 print u3.encode("ISO-8859-15") # ??? 83 print u3.encoding # ISO-8859-15 84 print len(u3) # 3 85 86 # Explicitly from implicitly-converted literal. 87 88 u4 = unicode("???", "ISO-8859-15") 89 print "Unicode values:" 90 print u4 # ??? 91 print u4.__class__ # __builtins__.unicode.unicode 92 print u4.encode("ISO-8859-15") # ??? 93 print u4.encoding # ISO-8859-15 94 print len(u4) # 3 95 96 # Test Unicode values. 97 98 u5 = "\u00e6\u00f8\u00e5" 99 print "Unicode values:" 100 print u5 # ??? 101 print u5.__class__ # __builtins__.unicode.unicode 102 print len(u5) # 3 103 104 # Test some untranslated values. 105 106 u6 = "\\u00e6\\u00f8\\u00e5" 107 print "Untranslated values:" 108 print u6 # \u00e6\u00f8\u00e5 109 print u6.__class__ # __builtins__.unicode.unicode 110 print len(u6) # 18 111 112 # Test Unicode values. 113 114 u7 = u"\346\370\345" 115 print "Unicode values:" 116 print u7 # ??? 117 print u7.__class__ # __builtins__.unicode.unicode 118 print len(u7) # 3 119 120 # Test Unicode values. 121 122 u8 = ur"\346\370\345" 123 print "Untranslated values:" 124 print u8 # \346\370\345 125 print u8.__class__ # __builtins__.unicode.unicode 126 print len(u8) # 12 127 128 # Test invalid sequences. 129 130 try: 131 u9 = unicode(s, "UTF-8") 132 except UnicodeDecodeError, exc: 133 print "Attempt to decode", s, "as UTF-8 failed." 134 135 # Mix Unicode and byte values. 136 137 u10 = "\u00e6\xf8\u00e5" 138 print "ISO-8859-15 values:" 139 print u10 # \u00e6?\u00e5 140 print u10.__class__ # __builtins__.str.str 141 print len(u10) # 13 142 143 # Combine bytes and text. 144 # The text should be decoded. 145 146 su = s + u 147 print "ISO-8859-15 values:" 148 print su # ?????? 149 print su.__class__ # __builtins__.str.str 150 print len(su) # 6 151 152 # Combine text and bytes. 153 # The text should be decoded. 154 155 us = u + s 156 print "ISO-8859-15 values:" 157 print us # ?????? 158 print us.__class__ # __builtins__.str.str 159 print len(us) # 6 160 161 # Combine text and text. 162 163 uu2 = u + u2 164 print "Unicode values:" 165 print uu2 # ?????? 166 print uu2.__class__ # __builtins__.unicode.unicode 167 print uu2.encoding # ISO-8859-15 168 print len(uu2) # 6 169 170 # Inspect and update the encoding of stdout. 171 # Note that su and us are byte strings and are not recoded. 172 173 print sys.stdout # <libc.io.sysstream instance> 174 print sys.stdout.encoding # None 175 176 sys.stdout.encoding = "ISO-8859-15" 177 print "ISO-8859-15 and Unicode values as ISO-8859-15:" 178 print sys.stdout.encoding # ISO-8859-15 179 print u # ??? 180 print su # ?????? 181 print us # ?????? 182 183 sys.stdout.encoding = "UTF-8" 184 print "Unicode values as UTF-8:" 185 print sys.stdout.encoding # UTF-8 186 print u # ?????? 187 print "ISO-8859-15 values bypassing UTF-8 output encoding:" 188 print su # ?????? 189 print us # ?????? 190 191 # Reset the encoding. 192 193 sys.stdout.encoding = "ISO-8859-15" 194 195 # Test character access. 196 197 u0 = u[0] 198 print u0.__class__ # __builtins__.unicode.unicode 199 print u0.encoding # ISO-8859-15 200 print u0 # ? 201 print u[-1] # ? 202 print len(u[0]) # 1 203 print len(u[-1]) # 1 204 print u[:2] # ?? 205 print len(u[:2]) # 2 206 print u[-1::-1] # ??? 207 print len(u[-1::-1]) # 3 208 209 # Test character values. 210 211 print ord(u[0]) # 230 212 213 try: 214 print ord(u) # should raise an exception 215 except ValueError, exc: 216 print "ord(u): value is not appropriate", repr(exc.value) 217 218 euro = "?" 219 print euro # ? 220 print repr(euro) # "\u20ac" 221 print ord(euro) # 8364 222 print "\u20ac" # ? 223 print unichr(ord(euro)) # ? 224 print unichr(ord(euro)) == euro # True