# HG changeset patch # User Paul Boddie # Date 1481732527 -3600 # Node ID 06d7f3b0dcc60f20832cdc1437e0a678f9c95a17 # Parent acd579cd51896e44803db37a514c35c439deb432 Added encoding information to constants so that Unicode literals provide it. diff -r acd579cd5189 -r 06d7f3b0dcc6 common.py --- a/common.py Wed Dec 14 16:40:00 2016 +0100 +++ b/common.py Wed Dec 14 17:22:07 2016 +0100 @@ -172,7 +172,7 @@ # Constant reference naming. - def get_constant_name(self, value, value_type): + def get_constant_name(self, value, value_type, encoding=None): """ Add a new constant to the current namespace for 'value' with @@ -181,7 +181,7 @@ path = self.get_namespace_path() init_item(self.constants, path, dict) - return "$c%d" % add_counter_item(self.constants[path], (value, value_type)) + return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) # Literal reference naming. @@ -235,27 +235,33 @@ def get_constant_value(self, value, literal=None): - "Encode the 'value' if appropriate, returning a value and typename." + """ + Encode the 'value' if appropriate, returning a value, a typename and any + encoding. + """ if isinstance(value, unicode): - return value.encode("utf-8"), "unicode" + return value.encode("utf-8"), "unicode", self.encoding # Attempt to convert plain strings to text. elif isinstance(value, str) and self.encoding: if not literal.startswith("b"): try: - return unicode(value, self.encoding).encode("utf-8"), "unicode" + return unicode(value, self.encoding).encode("utf-8"), "unicode", self.encoding except UnicodeDecodeError: pass - return value, value.__class__.__name__ + return value, value.__class__.__name__, None - def get_constant_reference(self, ref, value): + def get_constant_reference(self, ref, value, encoding=None): - "Return a constant reference for the given 'ref' type and 'value'." + """ + Return a constant reference for the given 'ref' type and 'value', with + the optional 'encoding' applying to text values. + """ - constant_name = self.get_constant_name(value, ref.get_origin()) + constant_name = self.get_constant_name(value, ref.get_origin(), encoding) # Return a reference for the constant. @@ -264,25 +270,29 @@ # Record the value and type for the constant. - self._reserve_constant(objpath, name_ref.value, name_ref.get_origin()) + self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) return name_ref - def reserve_constant(self, objpath, value, origin): + def reserve_constant(self, objpath, value, origin, encoding=None): """ Reserve a constant within 'objpath' with the given 'value' and having a - type with the given 'origin'. + type with the given 'origin', with the optional 'encoding' applying to + text values. """ constant_name = self.get_constant_name(value, origin) objpath = self.get_object_path(constant_name) - self._reserve_constant(objpath, value, origin) + self._reserve_constant(objpath, value, origin, encoding) - def _reserve_constant(self, objpath, value, origin): + def _reserve_constant(self, objpath, value, origin, encoding): - "Store a constant for 'objpath' with the given 'value' and 'origin'." + """ + Store a constant for 'objpath' with the given 'value' and 'origin', with + the optional 'encoding' applying to text values. + """ - self.constant_values[objpath] = value, origin + self.constant_values[objpath] = value, origin, encoding def get_literal_reference(self, name, ref, items, cls): diff -r acd579cd5189 -r 06d7f3b0dcc6 generator.py --- a/generator.py Wed Dec 14 16:40:00 2016 +0100 +++ b/generator.py Wed Dec 14 17:22:07 2016 +0100 @@ -445,13 +445,13 @@ 'n' with the given 'constant'. """ - value, value_type = constant + value, value_type, encoding = constant const_path = encode_literal_constant(n) structure_name = encode_literal_reference(n) ref = Reference("", value_type) - self.make_constant(f_decls, f_defs, ref, const_path, structure_name, value) + self.make_constant(f_decls, f_defs, ref, const_path, structure_name, value, encoding) def make_predefined_constant(self, f_decls, f_defs, path, name): @@ -469,13 +469,16 @@ self.make_constant(f_decls, f_defs, ref, attr_path, structure_name) - def make_constant(self, f_decls, f_defs, ref, const_path, structure_name, data=None): + def make_constant(self, f_decls, f_defs, ref, const_path, structure_name, data=None, encoding=None): """ Write constant details to 'f_decls' (to declare a structure) and to 'f_defs' (to define the contents) for the constant described by 'ref' having the given 'path' and 'structure_name' (for the constant structure itself). + + The additional 'data' and 'encoding' are used to describe specific + values. """ # Obtain the attributes. @@ -501,7 +504,23 @@ # Define Unicode constant encoding details. if cls == self.unicode_type: - attrs["encoding"] = Reference("", self.none_type) + + # Reference the encoding's own constant value. + + if encoding: + n = self.optimiser.constants[(encoding, self.string_type, None)] + + # Employ a special alias that will be tested specifically in + # encode_member. + + encoding_ref = Reference("", self.string_type, "$c%d" % n) + + # Use None where no encoding was indicated. + + else: + encoding_ref = Reference("", self.none_type) + + attrs["encoding"] = encoding_ref # Define the structure details. An object is created for the constant, # but an attribute is provided, referring to the object, for access to @@ -904,7 +923,9 @@ else: value = path - local_number = self.importer.all_constants[path][(value, value_type)] + encoding = None + + local_number = self.importer.all_constants[path][(value, value_type, encoding)] constant_name = "$c%d" % local_number attr_path = "%s.%s" % (path, constant_name) constant_number = self.optimiser.constant_numbers[attr_path] @@ -918,6 +939,8 @@ structure.append("{0, &%s}" % encode_path(decode_type_attribute(attrname))) continue + # All other kinds of members. + structure.append(self.encode_member(origin, attrname, attr, kind)) def encode_member(self, path, name, ref, structure_type): @@ -935,11 +958,17 @@ if kind == "" and ref.is_constant_alias(): alias = ref.get_name() + # Use the alias directly if appropriate. + + if alias.startswith("$c"): + constant_value = encode_literal_constant(int(alias[2:])) + return "%s /* %s */" % (constant_value, name) + # Obtain a constant value directly assigned to the attribute. if self.optimiser.constant_numbers.has_key(alias): constant_number = self.optimiser.constant_numbers[alias] - constant_value = "__const%d" % constant_number + constant_value = encode_literal_constant(constant_number) return "%s /* %s */" % (constant_value, name) # Usage of predefined constants, currently only None supported. diff -r acd579cd5189 -r 06d7f3b0dcc6 inspector.py --- a/inspector.py Wed Dec 14 16:40:00 2016 +0100 +++ b/inspector.py Wed Dec 14 17:22:07 2016 +0100 @@ -80,6 +80,11 @@ self.set_name("__mname__", self.get_constant("string", self.name).reference()) self.set_name("__file__", self.get_constant("string", filename).reference()) + # Reserve a constant for the encoding. + + if self.encoding: + self.get_constant("string", self.encoding) + # Get module-level attribute usage details. self.stop_tracking_in_module() @@ -1405,10 +1410,10 @@ # Constant values are independently recorded. else: - value, typename = self.get_constant_value(n.value, n.literal) + value, typename, encoding = self.get_constant_value(n.value, n.literal) name = get_builtin_type(typename) ref = self.get_builtin_class(name) - return self.get_constant_reference(ref, value) + return self.get_constant_reference(ref, value, encoding) # Special names. diff -r acd579cd5189 -r 06d7f3b0dcc6 modules.py --- a/modules.py Wed Dec 14 16:40:00 2016 +0100 +++ b/modules.py Wed Dec 14 17:22:07 2016 +0100 @@ -611,7 +611,7 @@ last_path = None n = None while line: - path, value_type, value = self._get_fields(line, 3) + path, value_type, encoding, value = self._get_fields(line, 4) if path != last_path: n = 0 last_path = path @@ -619,15 +619,18 @@ n += 1 init_item(self.constants, path, dict) value = eval(value) - self.constants[path][(value, value_type)] = n + encoding = encoding != "{}" and encoding or None + self.constants[path][(value, value_type, encoding)] = n line = f.readline().rstrip() def _get_constant_values(self, f): f.readline() # "constant values:" line = f.readline().rstrip() while line: - name, value_type, value = self._get_fields(line, 3) - self.constant_values[name] = eval(value), value_type + name, value_type, encoding, value = self._get_fields(line, 4) + value = eval(value) + encoding = encoding != "{}" and encoding or None + self.constant_values[name] = value, value_type, encoding line = f.readline().rstrip() # Generic parsing methods. @@ -975,19 +978,19 @@ paths.sort() for path in paths: constants = [] - for (value, value_type), n in self.constants[path].items(): - constants.append((n, value_type, value)) + for (value, value_type, encoding), n in self.constants[path].items(): + constants.append((n, value_type, encoding, value)) constants.sort() - for n, value_type, value in constants: - print >>f, path, value_type, repr(value) + for n, value_type, encoding, value in constants: + print >>f, path, value_type, encoding or "{}", repr(value) print >>f print >>f, "constant values:" names = self.constant_values.keys() names.sort() for name in names: - value, value_type = self.constant_values[name] - print >>f, name, value_type, repr(value) + value, value_type, encoding = self.constant_values[name] + print >>f, name, value_type, encoding or "{}", repr(value) finally: f.close() diff -r acd579cd5189 -r 06d7f3b0dcc6 optimiser.py --- a/optimiser.py Wed Dec 14 16:40:00 2016 +0100 +++ b/optimiser.py Wed Dec 14 17:22:07 2016 +0100 @@ -275,11 +275,11 @@ f = open(join(self.output, "constants"), "w") try: constants = [] - for (value, value_type), n in self.constants.items(): - constants.append((n, value_type, value)) + for (value, value_type, encoding), n in self.constants.items(): + constants.append((n, value_type, encoding, value)) constants.sort() - for n, value_type, value in constants: - print >>f, value_type, repr(value) + for n, value_type, encoding, value in constants: + print >>f, value_type, encoding or "{}", repr(value) finally: f.close() @@ -648,7 +648,7 @@ for path, constants in self.importer.all_constants.items(): # Record constants and obtain a number for them. - # Each constant is actually (value, value_type). + # Each constant is actually (value, value_type, encoding). for constant, n in constants.items(): add_counter_item(self.constants, constant) diff -r acd579cd5189 -r 06d7f3b0dcc6 resolving.py --- a/resolving.py Wed Dec 14 16:40:00 2016 +0100 +++ b/resolving.py Wed Dec 14 17:22:07 2016 +0100 @@ -350,7 +350,7 @@ for path, constants in self.constants.items(): for constant, n in constants.items(): objpath = "%s.$c%d" % (path, n) - _constant, value_type = self.constant_values[objpath] + _constant, value_type, encoding = self.constant_values[objpath] self.initialised_names[objpath] = {0 : Reference("", value_type)} # Get the literals defined in each namespace. diff -r acd579cd5189 -r 06d7f3b0dcc6 tests/unicode.py --- a/tests/unicode.py Wed Dec 14 16:40:00 2016 +0100 +++ b/tests/unicode.py Wed Dec 14 17:22:07 2016 +0100 @@ -13,7 +13,7 @@ # Explicitly from bytes. u = unicode("æøå", "ISO-8859-1") -print u # æøå +print u # æøå print u.__class__ # __builtins__.unicode.utf8string print u.encode("ISO-8859-1") # æøå print u.encoding # ISO-8859-1 @@ -22,7 +22,7 @@ # Explicitly from Unicode literals. u2 = u"æøå" -print u2 # æøå +print u2 # æøå print u2.__class__ # __builtins__.unicode.utf8string print u2.encode("ISO-8859-1") # æøå print u2.encoding # ISO-8859-1 @@ -31,7 +31,7 @@ # Implicitly from string literals. u3 = "æøå" -print u3 # æøå +print u3 # æøå print u3.__class__ # __builtins__.unicode.utf8string print u3.encode("ISO-8859-1") # æøå print u3.encoding # ISO-8859-1 @@ -56,7 +56,7 @@ # Combine text and text. uu2 = u + u2 -print uu2 # æøå +print uu2 # æøåæøå print uu2.__class__ # __builtins__.unicode.utf8string print uu2.encoding # ISO-8859-1 print len(uu2) # 6 diff -r acd579cd5189 -r 06d7f3b0dcc6 translator.py --- a/translator.py Wed Dec 14 16:40:00 2016 +0100 +++ b/translator.py Wed Dec 14 17:22:07 2016 +0100 @@ -431,15 +431,22 @@ ref = self.get_builtin_class(name) return self.process_literal_sequence_node(n, name, ref, TrLiteralSequenceRef) else: - value, typename = self.get_constant_value(n.value, n.literal) + value, typename, encoding = self.get_constant_value(n.value, n.literal) name = get_builtin_type(typename) ref = self.get_builtin_class(name) value_type = ref.get_origin() path = self.get_namespace_path() - local_number = self.importer.all_constants[path][(value, value_type)] + + # Obtain the local numbering of the constant and thus the + # locally-qualified name. + + local_number = self.importer.all_constants[path][(value, value_type, encoding)] constant_name = "$c%d" % local_number objpath = self.get_object_path(constant_name) + + # Obtain the unique identifier for the constant. + number = self.optimiser.constant_numbers[objpath] return TrConstantValueRef(constant_name, ref.instance_of(), value, number)