paul@392 | 1 | # -*- coding: ISO-8859-1 -*- |
paul@392 | 2 | |
paul@392 | 3 | import sys |
paul@392 | 4 | |
paul@392 | 5 | # Print bytes. |
paul@392 | 6 | |
paul@396 | 7 | s = b"???" |
paul@512 | 8 | print "ISO-8859-1 values:" |
paul@396 | 9 | print s # ??? |
paul@403 | 10 | print len(s) # 3 |
paul@392 | 11 | |
paul@512 | 12 | s2 = b"\xe6\xf8\xe5" |
paul@512 | 13 | print "ISO-8859-1 values:" |
paul@512 | 14 | print s2 # ??? |
paul@512 | 15 | print s2.__class__ # __builtins__.str.string |
paul@512 | 16 | print len(s2) # 3 |
paul@512 | 17 | |
paul@512 | 18 | s3 = "\xe6\xf8\xe5" |
paul@512 | 19 | print "ISO-8859-1 values:" |
paul@512 | 20 | print s3 # ??? |
paul@512 | 21 | print s3.__class__ # __builtins__.str.string |
paul@512 | 22 | print len(s3) # 3 |
paul@512 | 23 | |
paul@512 | 24 | s4 = b"\u00e6\u00f8\u00e5" |
paul@512 | 25 | print "Untranslated values:" |
paul@512 | 26 | print s4 # \u00e6\u00f8\u00e5 |
paul@512 | 27 | print s4.__class__ # __builtins__.str.string |
paul@512 | 28 | print len(s4) # 18 |
paul@512 | 29 | |
paul@512 | 30 | s5 = b"\346\370\345" |
paul@512 | 31 | print "ISO-8859-1 values:" |
paul@512 | 32 | print s5 # ??? |
paul@512 | 33 | print s5.__class__ # __builtins__.str.string |
paul@512 | 34 | print len(s5) # 3 |
paul@512 | 35 | |
paul@512 | 36 | s6 = "\346\370\345" |
paul@512 | 37 | print "ISO-8859-1 values:" |
paul@512 | 38 | print s6 # ??? |
paul@512 | 39 | print s6.__class__ # __builtins__.str.string |
paul@512 | 40 | print len(s6) # 3 |
paul@512 | 41 | |
paul@512 | 42 | s7 = r"\346\370\345" |
paul@512 | 43 | print "Untranslated values:" |
paul@512 | 44 | print s7 # \346\370\345 |
paul@512 | 45 | print s7.__class__ # __builtins__.unicode.utf8string |
paul@512 | 46 | print len(s7) # 12 |
paul@512 | 47 | |
paul@392 | 48 | # Obtain text and print it. |
paul@392 | 49 | |
paul@394 | 50 | # Explicitly from bytes. |
paul@394 | 51 | |
paul@512 | 52 | u = unicode(b"???", "ISO-8859-1") |
paul@512 | 53 | print "Unicode values:" |
paul@406 | 54 | print u # ??? |
paul@403 | 55 | print u.__class__ # __builtins__.unicode.utf8string |
paul@392 | 56 | print u.encode("ISO-8859-1") # ??? |
paul@398 | 57 | print u.encoding # ISO-8859-1 |
paul@403 | 58 | print len(u) # 3 |
paul@392 | 59 | |
paul@394 | 60 | # Explicitly from Unicode literals. |
paul@394 | 61 | |
paul@394 | 62 | u2 = u"???" |
paul@512 | 63 | print "Unicode values:" |
paul@406 | 64 | print u2 # ??? |
paul@403 | 65 | print u2.__class__ # __builtins__.unicode.utf8string |
paul@394 | 66 | print u2.encode("ISO-8859-1") # ??? |
paul@398 | 67 | print u2.encoding # ISO-8859-1 |
paul@403 | 68 | print len(u2) # 3 |
paul@394 | 69 | |
paul@394 | 70 | # Implicitly from string literals. |
paul@394 | 71 | |
paul@405 | 72 | u3 = "???" |
paul@512 | 73 | print "Unicode values:" |
paul@406 | 74 | print u3 # ??? |
paul@405 | 75 | print u3.__class__ # __builtins__.unicode.utf8string |
paul@405 | 76 | print u3.encode("ISO-8859-1") # ??? |
paul@405 | 77 | print u3.encoding # ISO-8859-1 |
paul@405 | 78 | print len(u3) # 3 |
paul@394 | 79 | |
paul@512 | 80 | # Explicitly from implicitly-converted literal. |
paul@512 | 81 | |
paul@512 | 82 | u4 = unicode("???", "ISO-8859-1") |
paul@512 | 83 | print "Unicode values:" |
paul@512 | 84 | print u4 # ??? |
paul@512 | 85 | print u4.__class__ # __builtins__.unicode.utf8string |
paul@512 | 86 | print u4.encode("ISO-8859-1") # ??? |
paul@512 | 87 | print u4.encoding # ISO-8859-1 |
paul@512 | 88 | print len(u4) # 3 |
paul@512 | 89 | |
paul@512 | 90 | # Test Unicode values. |
paul@512 | 91 | |
paul@512 | 92 | u5 = "\u00e6\u00f8\u00e5" |
paul@512 | 93 | print "Unicode values:" |
paul@512 | 94 | print u5 # ??? |
paul@512 | 95 | print u5.__class__ # __builtins__.unicode.ut8string |
paul@512 | 96 | print len(u5) # 3 |
paul@512 | 97 | |
paul@512 | 98 | # Test some untranslated values. |
paul@512 | 99 | |
paul@512 | 100 | u6 = "\\u00e6\\u00f8\\u00e5" |
paul@512 | 101 | print "Untranslated values:" |
paul@512 | 102 | print u6 # \u00e6\u00f8\u00e5 |
paul@512 | 103 | print u6.__class__ # __builtins__.unicode.ut8string |
paul@512 | 104 | print len(u6) # 18 |
paul@512 | 105 | |
paul@512 | 106 | # Test Unicode values. |
paul@512 | 107 | |
paul@512 | 108 | u7 = u"\346\370\345" |
paul@512 | 109 | print "Unicode values:" |
paul@512 | 110 | print u7 # ??? |
paul@512 | 111 | print u7.__class__ # __builtins__.unicode.ut8string |
paul@512 | 112 | print len(u7) # 3 |
paul@512 | 113 | |
paul@512 | 114 | # Test Unicode values. |
paul@512 | 115 | |
paul@512 | 116 | u8 = ur"\346\370\345" |
paul@512 | 117 | print "Untranslated values:" |
paul@512 | 118 | print u8 # \346\370\345 |
paul@512 | 119 | print u8.__class__ # __builtins__.unicode.ut8string |
paul@512 | 120 | print len(u8) # 12 |
paul@512 | 121 | |
paul@410 | 122 | # Test invalid sequences. |
paul@410 | 123 | |
paul@410 | 124 | try: |
paul@512 | 125 | u9 = unicode(s, "UTF-8") |
paul@410 | 126 | except UnicodeDecodeError, exc: |
paul@410 | 127 | print "Attempt to decode", s, "as UTF-8 failed." |
paul@410 | 128 | |
paul@396 | 129 | # Combine bytes and text. |
paul@396 | 130 | # The text should be decoded. |
paul@396 | 131 | |
paul@396 | 132 | su = s + u |
paul@512 | 133 | print "ISO-8859-1 values:" |
paul@396 | 134 | print su # ?????? |
paul@398 | 135 | print su.__class__ # __builtins__.str.string |
paul@403 | 136 | print len(su) # 6 |
paul@396 | 137 | |
paul@396 | 138 | # Combine text and bytes. |
paul@396 | 139 | # The text should be decoded. |
paul@396 | 140 | |
paul@396 | 141 | us = u + s |
paul@512 | 142 | print "ISO-8859-1 values:" |
paul@396 | 143 | print us # ?????? |
paul@398 | 144 | print us.__class__ # __builtins__.str.string |
paul@403 | 145 | print len(us) # 6 |
paul@398 | 146 | |
paul@398 | 147 | # Combine text and text. |
paul@398 | 148 | |
paul@398 | 149 | uu2 = u + u2 |
paul@512 | 150 | print "Unicode values:" |
paul@406 | 151 | print uu2 # ?????? |
paul@398 | 152 | print uu2.__class__ # __builtins__.unicode.utf8string |
paul@398 | 153 | print uu2.encoding # ISO-8859-1 |
paul@403 | 154 | print len(uu2) # 6 |
paul@396 | 155 | |
paul@392 | 156 | # Inspect and update the encoding of stdout. |
paul@398 | 157 | # Note that su and us are byte strings and are not recoded. |
paul@392 | 158 | |
paul@426 | 159 | print sys.stdout # <posix.io.sysstream instance> |
paul@392 | 160 | print sys.stdout.encoding # None |
paul@398 | 161 | |
paul@392 | 162 | sys.stdout.encoding = "ISO-8859-1" |
paul@512 | 163 | print "ISO-8859-1 and Unicode values as ISO-8859-1:" |
paul@398 | 164 | print sys.stdout.encoding # ISO-8859-1 |
paul@392 | 165 | print u # ??? |
paul@396 | 166 | print su # ?????? |
paul@396 | 167 | print us # ?????? |
paul@398 | 168 | |
paul@398 | 169 | sys.stdout.encoding = "UTF-8" |
paul@512 | 170 | print "Unicode values as UTF-8:" |
paul@398 | 171 | print sys.stdout.encoding # UTF-8 |
paul@398 | 172 | print u # ?????? |
paul@512 | 173 | print "ISO-8859-1 values bypassing UTF-8 output encoding:" |
paul@398 | 174 | print su # ?????? |
paul@398 | 175 | print us # ?????? |
paul@431 | 176 | |
paul@431 | 177 | # Reset the encoding. |
paul@431 | 178 | |
paul@431 | 179 | sys.stdout.encoding = "ISO-8859-1" |
paul@431 | 180 | |
paul@431 | 181 | # Test character access. |
paul@431 | 182 | |
paul@431 | 183 | u0 = u[0] |
paul@431 | 184 | print u0.__class__ # __builtins__.unicode.utf8string |
paul@431 | 185 | print u0.encoding # ISO-8859-1 |
paul@431 | 186 | print u0 # ? |
paul@431 | 187 | print u[-1] # ? |
paul@431 | 188 | print len(u[0]) # 1 |
paul@431 | 189 | print len(u[-1]) # 1 |
paul@431 | 190 | print u[:2] # ?? |
paul@431 | 191 | print len(u[:2]) # 2 |
paul@431 | 192 | print u[-1::-1] # ??? |
paul@431 | 193 | print len(u[-1::-1]) # 3 |