paul@536 | 1 | # -*- coding: ISO-8859-15 -*- |
paul@392 | 2 | |
paul@392 | 3 | import sys |
paul@392 | 4 | |
paul@392 | 5 | # Print bytes. |
paul@392 | 6 | |
paul@396 | 7 | s = b"???" |
paul@536 | 8 | print "ISO-8859-15 values:" |
paul@396 | 9 | print s # ??? |
paul@403 | 10 | print len(s) # 3 |
paul@392 | 11 | |
paul@537 | 12 | s1 = b"???" \ |
paul@537 | 13 | "???" |
paul@537 | 14 | print "ISO-8859-15 values:" |
paul@537 | 15 | print s1 # ?????? |
paul@537 | 16 | print len(s1) # 6 |
paul@537 | 17 | |
paul@512 | 18 | s2 = b"\xe6\xf8\xe5" |
paul@536 | 19 | print "ISO-8859-15 values:" |
paul@512 | 20 | print s2 # ??? |
paul@512 | 21 | print s2.__class__ # __builtins__.str.string |
paul@512 | 22 | print len(s2) # 3 |
paul@512 | 23 | |
paul@512 | 24 | s3 = "\xe6\xf8\xe5" |
paul@536 | 25 | print "ISO-8859-15 values:" |
paul@512 | 26 | print s3 # ??? |
paul@512 | 27 | print s3.__class__ # __builtins__.str.string |
paul@512 | 28 | print len(s3) # 3 |
paul@512 | 29 | |
paul@512 | 30 | s4 = b"\u00e6\u00f8\u00e5" |
paul@512 | 31 | print "Untranslated values:" |
paul@512 | 32 | print s4 # \u00e6\u00f8\u00e5 |
paul@512 | 33 | print s4.__class__ # __builtins__.str.string |
paul@512 | 34 | print len(s4) # 18 |
paul@512 | 35 | |
paul@512 | 36 | s5 = b"\346\370\345" |
paul@536 | 37 | print "ISO-8859-15 values:" |
paul@512 | 38 | print s5 # ??? |
paul@512 | 39 | print s5.__class__ # __builtins__.str.string |
paul@512 | 40 | print len(s5) # 3 |
paul@512 | 41 | |
paul@512 | 42 | s6 = "\346\370\345" |
paul@536 | 43 | print "ISO-8859-15 values:" |
paul@512 | 44 | print s6 # ??? |
paul@512 | 45 | print s6.__class__ # __builtins__.str.string |
paul@512 | 46 | print len(s6) # 3 |
paul@512 | 47 | |
paul@512 | 48 | s7 = r"\346\370\345" |
paul@512 | 49 | print "Untranslated values:" |
paul@512 | 50 | print s7 # \346\370\345 |
paul@512 | 51 | print s7.__class__ # __builtins__.unicode.utf8string |
paul@512 | 52 | print len(s7) # 12 |
paul@512 | 53 | |
paul@392 | 54 | # Obtain text and print it. |
paul@392 | 55 | |
paul@394 | 56 | # Explicitly from bytes. |
paul@394 | 57 | |
paul@536 | 58 | u = unicode(b"???", "ISO-8859-15") |
paul@512 | 59 | print "Unicode values:" |
paul@406 | 60 | print u # ??? |
paul@403 | 61 | print u.__class__ # __builtins__.unicode.utf8string |
paul@536 | 62 | print u.encode("ISO-8859-15") # ??? |
paul@536 | 63 | print u.encoding # ISO-8859-15 |
paul@403 | 64 | print len(u) # 3 |
paul@392 | 65 | |
paul@394 | 66 | # Explicitly from Unicode literals. |
paul@394 | 67 | |
paul@394 | 68 | u2 = u"???" |
paul@512 | 69 | print "Unicode values:" |
paul@406 | 70 | print u2 # ??? |
paul@403 | 71 | print u2.__class__ # __builtins__.unicode.utf8string |
paul@536 | 72 | print u2.encode("ISO-8859-15") # ??? |
paul@536 | 73 | print u2.encoding # ISO-8859-15 |
paul@403 | 74 | print len(u2) # 3 |
paul@394 | 75 | |
paul@394 | 76 | # Implicitly from string literals. |
paul@394 | 77 | |
paul@405 | 78 | u3 = "???" |
paul@512 | 79 | print "Unicode values:" |
paul@406 | 80 | print u3 # ??? |
paul@405 | 81 | print u3.__class__ # __builtins__.unicode.utf8string |
paul@536 | 82 | print u3.encode("ISO-8859-15") # ??? |
paul@536 | 83 | print u3.encoding # ISO-8859-15 |
paul@405 | 84 | print len(u3) # 3 |
paul@394 | 85 | |
paul@512 | 86 | # Explicitly from implicitly-converted literal. |
paul@512 | 87 | |
paul@536 | 88 | u4 = unicode("???", "ISO-8859-15") |
paul@512 | 89 | print "Unicode values:" |
paul@512 | 90 | print u4 # ??? |
paul@512 | 91 | print u4.__class__ # __builtins__.unicode.utf8string |
paul@536 | 92 | print u4.encode("ISO-8859-15") # ??? |
paul@536 | 93 | print u4.encoding # ISO-8859-15 |
paul@512 | 94 | print len(u4) # 3 |
paul@512 | 95 | |
paul@512 | 96 | # Test Unicode values. |
paul@512 | 97 | |
paul@512 | 98 | u5 = "\u00e6\u00f8\u00e5" |
paul@512 | 99 | print "Unicode values:" |
paul@512 | 100 | print u5 # ??? |
paul@512 | 101 | print u5.__class__ # __builtins__.unicode.ut8string |
paul@512 | 102 | print len(u5) # 3 |
paul@512 | 103 | |
paul@512 | 104 | # Test some untranslated values. |
paul@512 | 105 | |
paul@512 | 106 | u6 = "\\u00e6\\u00f8\\u00e5" |
paul@512 | 107 | print "Untranslated values:" |
paul@512 | 108 | print u6 # \u00e6\u00f8\u00e5 |
paul@512 | 109 | print u6.__class__ # __builtins__.unicode.ut8string |
paul@512 | 110 | print len(u6) # 18 |
paul@512 | 111 | |
paul@512 | 112 | # Test Unicode values. |
paul@512 | 113 | |
paul@512 | 114 | u7 = u"\346\370\345" |
paul@512 | 115 | print "Unicode values:" |
paul@512 | 116 | print u7 # ??? |
paul@512 | 117 | print u7.__class__ # __builtins__.unicode.ut8string |
paul@512 | 118 | print len(u7) # 3 |
paul@512 | 119 | |
paul@512 | 120 | # Test Unicode values. |
paul@512 | 121 | |
paul@512 | 122 | u8 = ur"\346\370\345" |
paul@512 | 123 | print "Untranslated values:" |
paul@512 | 124 | print u8 # \346\370\345 |
paul@512 | 125 | print u8.__class__ # __builtins__.unicode.ut8string |
paul@512 | 126 | print len(u8) # 12 |
paul@512 | 127 | |
paul@410 | 128 | # Test invalid sequences. |
paul@410 | 129 | |
paul@410 | 130 | try: |
paul@512 | 131 | u9 = unicode(s, "UTF-8") |
paul@410 | 132 | except UnicodeDecodeError, exc: |
paul@410 | 133 | print "Attempt to decode", s, "as UTF-8 failed." |
paul@410 | 134 | |
paul@396 | 135 | # Combine bytes and text. |
paul@396 | 136 | # The text should be decoded. |
paul@396 | 137 | |
paul@396 | 138 | su = s + u |
paul@536 | 139 | print "ISO-8859-15 values:" |
paul@396 | 140 | print su # ?????? |
paul@398 | 141 | print su.__class__ # __builtins__.str.string |
paul@403 | 142 | print len(su) # 6 |
paul@396 | 143 | |
paul@396 | 144 | # Combine text and bytes. |
paul@396 | 145 | # The text should be decoded. |
paul@396 | 146 | |
paul@396 | 147 | us = u + s |
paul@536 | 148 | print "ISO-8859-15 values:" |
paul@396 | 149 | print us # ?????? |
paul@398 | 150 | print us.__class__ # __builtins__.str.string |
paul@403 | 151 | print len(us) # 6 |
paul@398 | 152 | |
paul@398 | 153 | # Combine text and text. |
paul@398 | 154 | |
paul@398 | 155 | uu2 = u + u2 |
paul@512 | 156 | print "Unicode values:" |
paul@406 | 157 | print uu2 # ?????? |
paul@398 | 158 | print uu2.__class__ # __builtins__.unicode.utf8string |
paul@536 | 159 | print uu2.encoding # ISO-8859-15 |
paul@403 | 160 | print len(uu2) # 6 |
paul@396 | 161 | |
paul@392 | 162 | # Inspect and update the encoding of stdout. |
paul@398 | 163 | # Note that su and us are byte strings and are not recoded. |
paul@392 | 164 | |
paul@532 | 165 | print sys.stdout # <libc.io.sysstream instance> |
paul@392 | 166 | print sys.stdout.encoding # None |
paul@398 | 167 | |
paul@536 | 168 | sys.stdout.encoding = "ISO-8859-15" |
paul@536 | 169 | print "ISO-8859-15 and Unicode values as ISO-8859-15:" |
paul@536 | 170 | print sys.stdout.encoding # ISO-8859-15 |
paul@392 | 171 | print u # ??? |
paul@396 | 172 | print su # ?????? |
paul@396 | 173 | print us # ?????? |
paul@398 | 174 | |
paul@398 | 175 | sys.stdout.encoding = "UTF-8" |
paul@512 | 176 | print "Unicode values as UTF-8:" |
paul@398 | 177 | print sys.stdout.encoding # UTF-8 |
paul@398 | 178 | print u # ?????? |
paul@536 | 179 | print "ISO-8859-15 values bypassing UTF-8 output encoding:" |
paul@398 | 180 | print su # ?????? |
paul@398 | 181 | print us # ?????? |
paul@431 | 182 | |
paul@431 | 183 | # Reset the encoding. |
paul@431 | 184 | |
paul@536 | 185 | sys.stdout.encoding = "ISO-8859-15" |
paul@431 | 186 | |
paul@431 | 187 | # Test character access. |
paul@431 | 188 | |
paul@431 | 189 | u0 = u[0] |
paul@431 | 190 | print u0.__class__ # __builtins__.unicode.utf8string |
paul@536 | 191 | print u0.encoding # ISO-8859-15 |
paul@431 | 192 | print u0 # ? |
paul@431 | 193 | print u[-1] # ? |
paul@431 | 194 | print len(u[0]) # 1 |
paul@431 | 195 | print len(u[-1]) # 1 |
paul@431 | 196 | print u[:2] # ?? |
paul@431 | 197 | print len(u[:2]) # 2 |
paul@431 | 198 | print u[-1::-1] # ??? |
paul@431 | 199 | print len(u[-1::-1]) # 3 |
paul@534 | 200 | |
paul@534 | 201 | # Test character values. |
paul@534 | 202 | |
paul@534 | 203 | print ord(u[0]) # 230 |
paul@534 | 204 | |
paul@534 | 205 | try: |
paul@534 | 206 | print ord(u) # should raise an exception |
paul@534 | 207 | except ValueError, exc: |
paul@534 | 208 | print "ord(u): value is not appropriate", repr(exc.value) |
paul@536 | 209 | |
paul@536 | 210 | euro = "?" |
paul@536 | 211 | print euro # ? |
paul@536 | 212 | print repr(euro) # "\u20ac" |
paul@536 | 213 | print ord(euro) # 8364 |
paul@536 | 214 | print "\u20ac" # ? |
paul@607 | 215 | print unichr(ord(euro)) # ? |
paul@607 | 216 | print unichr(ord(euro)) == euro # True |