# HG changeset patch # User Paul Boddie # Date 1481648253 -3600 # Node ID b8782209d44595cf33317ea0e3ac60dc31d1fe16 # Parent a39afb95fb2610a1085979c6671f3ec4a8be234c Introduced initial support for Unicode constants and literals. diff -r a39afb95fb26 -r b8782209d445 common.py --- a/common.py Tue Dec 13 00:59:23 2016 +0100 +++ b/common.py Tue Dec 13 17:57:33 2016 +0100 @@ -218,6 +218,14 @@ # Constant and literal recording. + def get_constant_value(self, value): + + "Encode the 'value' if appropriate." + + if isinstance(value, unicode): + value = value.encode("utf-8") + return value + def get_constant_reference(self, ref, value): "Return a constant reference for the given 'ref' type and 'value'." @@ -923,13 +931,25 @@ # NOTE: This makes assumptions about the __builtins__ structure. - return name == "string" and "str" or name == "NoneType" and "none" or name + if name == "string": + return "str" + elif name == "utf8string": + return "unicode" + elif name == "NoneType": + return "none" + else: + return name def get_builtin_type(name): "Return the type name provided by the given Python value 'name'." - return name == "str" and "string" or name + if name == "str": + return "string" + elif name == "unicode": + return "utf8string" + else: + return name # Useful data. diff -r a39afb95fb26 -r b8782209d445 inspector.py --- a/inspector.py Tue Dec 13 00:59:23 2016 +0100 +++ b/inspector.py Tue Dec 13 17:57:33 2016 +0100 @@ -1406,7 +1406,8 @@ # Constant values are independently recorded. else: - return self.get_constant_reference(ref, n.value) + value = self.get_constant_value(n.value) + return self.get_constant_reference(ref, value) # Special names. diff -r a39afb95fb26 -r b8782209d445 tests/unicode.py --- a/tests/unicode.py Tue Dec 13 00:59:23 2016 +0100 +++ b/tests/unicode.py Tue Dec 13 17:57:33 2016 +0100 @@ -4,15 +4,29 @@ # Print bytes. -s = "æøå" +s = b"æøå" print s # æøå # Obtain text and print it. +# Explicitly from bytes. + u = unicode(s, "ISO-8859-1") print u # æøå print u.encode("ISO-8859-1") # æøå +# Explicitly from Unicode literals. + +u2 = u"æøå" +print u2 # æøå +print u2.encode("ISO-8859-1") # æøå + +# Implicitly from string literals. + +#u3 = "æøå" +#print u3 # æøå +#print u3.encode("ISO-8859-1") # æøå + # Inspect and update the encoding of stdout. print sys.stdout.encoding # None diff -r a39afb95fb26 -r b8782209d445 translator.py --- a/translator.py Tue Dec 13 00:59:23 2016 +0100 +++ b/translator.py Tue Dec 13 17:57:33 2016 +0100 @@ -421,12 +421,13 @@ if name in ("dict", "list", "tuple"): return self.process_literal_sequence_node(n, name, ref, TrLiteralSequenceRef) else: + value = self.get_constant_value(n.value) path = self.get_namespace_path() - local_number = self.importer.all_constants[path][n.value] + local_number = self.importer.all_constants[path][value] constant_name = "$c%d" % local_number objpath = self.get_object_path(constant_name) number = self.optimiser.constant_numbers[objpath] - return TrConstantValueRef(constant_name, ref.instance_of(), n.value, number) + return TrConstantValueRef(constant_name, ref.instance_of(), value, number) # Namespace translation.