1.1 --- a/common.py Tue Dec 13 00:59:23 2016 +0100
1.2 +++ b/common.py Tue Dec 13 17:57:33 2016 +0100
1.3 @@ -218,6 +218,14 @@
1.4
1.5 # Constant and literal recording.
1.6
1.7 + def get_constant_value(self, value):
1.8 +
1.9 + "Encode the 'value' if appropriate."
1.10 +
1.11 + if isinstance(value, unicode):
1.12 + value = value.encode("utf-8")
1.13 + return value
1.14 +
1.15 def get_constant_reference(self, ref, value):
1.16
1.17 "Return a constant reference for the given 'ref' type and 'value'."
1.18 @@ -923,13 +931,25 @@
1.19
1.20 # NOTE: This makes assumptions about the __builtins__ structure.
1.21
1.22 - return name == "string" and "str" or name == "NoneType" and "none" or name
1.23 + if name == "string":
1.24 + return "str"
1.25 + elif name == "utf8string":
1.26 + return "unicode"
1.27 + elif name == "NoneType":
1.28 + return "none"
1.29 + else:
1.30 + return name
1.31
1.32 def get_builtin_type(name):
1.33
1.34 "Return the type name provided by the given Python value 'name'."
1.35
1.36 - return name == "str" and "string" or name
1.37 + if name == "str":
1.38 + return "string"
1.39 + elif name == "unicode":
1.40 + return "utf8string"
1.41 + else:
1.42 + return name
1.43
1.44 # Useful data.
1.45
2.1 --- a/inspector.py Tue Dec 13 00:59:23 2016 +0100
2.2 +++ b/inspector.py Tue Dec 13 17:57:33 2016 +0100
2.3 @@ -1406,7 +1406,8 @@
2.4 # Constant values are independently recorded.
2.5
2.6 else:
2.7 - return self.get_constant_reference(ref, n.value)
2.8 + value = self.get_constant_value(n.value)
2.9 + return self.get_constant_reference(ref, value)
2.10
2.11 # Special names.
2.12
3.1 --- a/tests/unicode.py Tue Dec 13 00:59:23 2016 +0100
3.2 +++ b/tests/unicode.py Tue Dec 13 17:57:33 2016 +0100
3.3 @@ -4,15 +4,29 @@
3.4
3.5 # Print bytes.
3.6
3.7 -s = "æøå"
3.8 +s = b"æøå"
3.9 print s # æøå
3.10
3.11 # Obtain text and print it.
3.12
3.13 +# Explicitly from bytes.
3.14 +
3.15 u = unicode(s, "ISO-8859-1")
3.16 print u # æøå
3.17 print u.encode("ISO-8859-1") # æøå
3.18
3.19 +# Explicitly from Unicode literals.
3.20 +
3.21 +u2 = u"æøå"
3.22 +print u2 # æøå
3.23 +print u2.encode("ISO-8859-1") # æøå
3.24 +
3.25 +# Implicitly from string literals.
3.26 +
3.27 +#u3 = "æøå"
3.28 +#print u3 # æøå
3.29 +#print u3.encode("ISO-8859-1") # æøå
3.30 +
3.31 # Inspect and update the encoding of stdout.
3.32
3.33 print sys.stdout.encoding # None
4.1 --- a/translator.py Tue Dec 13 00:59:23 2016 +0100
4.2 +++ b/translator.py Tue Dec 13 17:57:33 2016 +0100
4.3 @@ -421,12 +421,13 @@
4.4 if name in ("dict", "list", "tuple"):
4.5 return self.process_literal_sequence_node(n, name, ref, TrLiteralSequenceRef)
4.6 else:
4.7 + value = self.get_constant_value(n.value)
4.8 path = self.get_namespace_path()
4.9 - local_number = self.importer.all_constants[path][n.value]
4.10 + local_number = self.importer.all_constants[path][value]
4.11 constant_name = "$c%d" % local_number
4.12 objpath = self.get_object_path(constant_name)
4.13 number = self.optimiser.constant_numbers[objpath]
4.14 - return TrConstantValueRef(constant_name, ref.instance_of(), n.value, number)
4.15 + return TrConstantValueRef(constant_name, ref.instance_of(), value, number)
4.16
4.17 # Namespace translation.
4.18