1.1 --- a/common.py Wed Dec 14 16:40:00 2016 +0100
1.2 +++ b/common.py Wed Dec 14 17:22:07 2016 +0100
1.3 @@ -172,7 +172,7 @@
1.4
1.5 # Constant reference naming.
1.6
1.7 - def get_constant_name(self, value, value_type):
1.8 + def get_constant_name(self, value, value_type, encoding=None):
1.9
1.10 """
1.11 Add a new constant to the current namespace for 'value' with
1.12 @@ -181,7 +181,7 @@
1.13
1.14 path = self.get_namespace_path()
1.15 init_item(self.constants, path, dict)
1.16 - return "$c%d" % add_counter_item(self.constants[path], (value, value_type))
1.17 + return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))
1.18
1.19 # Literal reference naming.
1.20
1.21 @@ -235,27 +235,33 @@
1.22
1.23 def get_constant_value(self, value, literal=None):
1.24
1.25 - "Encode the 'value' if appropriate, returning a value and typename."
1.26 + """
1.27 + Encode the 'value' if appropriate, returning a value, a typename and any
1.28 + encoding.
1.29 + """
1.30
1.31 if isinstance(value, unicode):
1.32 - return value.encode("utf-8"), "unicode"
1.33 + return value.encode("utf-8"), "unicode", self.encoding
1.34
1.35 # Attempt to convert plain strings to text.
1.36
1.37 elif isinstance(value, str) and self.encoding:
1.38 if not literal.startswith("b"):
1.39 try:
1.40 - return unicode(value, self.encoding).encode("utf-8"), "unicode"
1.41 + return unicode(value, self.encoding).encode("utf-8"), "unicode", self.encoding
1.42 except UnicodeDecodeError:
1.43 pass
1.44
1.45 - return value, value.__class__.__name__
1.46 + return value, value.__class__.__name__, None
1.47
1.48 - def get_constant_reference(self, ref, value):
1.49 + def get_constant_reference(self, ref, value, encoding=None):
1.50
1.51 - "Return a constant reference for the given 'ref' type and 'value'."
1.52 + """
1.53 + Return a constant reference for the given 'ref' type and 'value', with
1.54 + the optional 'encoding' applying to text values.
1.55 + """
1.56
1.57 - constant_name = self.get_constant_name(value, ref.get_origin())
1.58 + constant_name = self.get_constant_name(value, ref.get_origin(), encoding)
1.59
1.60 # Return a reference for the constant.
1.61
1.62 @@ -264,25 +270,29 @@
1.63
1.64 # Record the value and type for the constant.
1.65
1.66 - self._reserve_constant(objpath, name_ref.value, name_ref.get_origin())
1.67 + self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)
1.68 return name_ref
1.69
1.70 - def reserve_constant(self, objpath, value, origin):
1.71 + def reserve_constant(self, objpath, value, origin, encoding=None):
1.72
1.73 """
1.74 Reserve a constant within 'objpath' with the given 'value' and having a
1.75 - type with the given 'origin'.
1.76 + type with the given 'origin', with the optional 'encoding' applying to
1.77 + text values.
1.78 """
1.79
1.80 constant_name = self.get_constant_name(value, origin)
1.81 objpath = self.get_object_path(constant_name)
1.82 - self._reserve_constant(objpath, value, origin)
1.83 + self._reserve_constant(objpath, value, origin, encoding)
1.84
1.85 - def _reserve_constant(self, objpath, value, origin):
1.86 + def _reserve_constant(self, objpath, value, origin, encoding):
1.87
1.88 - "Store a constant for 'objpath' with the given 'value' and 'origin'."
1.89 + """
1.90 + Store a constant for 'objpath' with the given 'value' and 'origin', with
1.91 + the optional 'encoding' applying to text values.
1.92 + """
1.93
1.94 - self.constant_values[objpath] = value, origin
1.95 + self.constant_values[objpath] = value, origin, encoding
1.96
1.97 def get_literal_reference(self, name, ref, items, cls):
1.98
2.1 --- a/generator.py Wed Dec 14 16:40:00 2016 +0100
2.2 +++ b/generator.py Wed Dec 14 17:22:07 2016 +0100
2.3 @@ -445,13 +445,13 @@
2.4 'n' with the given 'constant'.
2.5 """
2.6
2.7 - value, value_type = constant
2.8 + value, value_type, encoding = constant
2.9
2.10 const_path = encode_literal_constant(n)
2.11 structure_name = encode_literal_reference(n)
2.12
2.13 ref = Reference("<instance>", value_type)
2.14 - self.make_constant(f_decls, f_defs, ref, const_path, structure_name, value)
2.15 + self.make_constant(f_decls, f_defs, ref, const_path, structure_name, value, encoding)
2.16
2.17 def make_predefined_constant(self, f_decls, f_defs, path, name):
2.18
2.19 @@ -469,13 +469,16 @@
2.20
2.21 self.make_constant(f_decls, f_defs, ref, attr_path, structure_name)
2.22
2.23 - def make_constant(self, f_decls, f_defs, ref, const_path, structure_name, data=None):
2.24 + def make_constant(self, f_decls, f_defs, ref, const_path, structure_name, data=None, encoding=None):
2.25
2.26 """
2.27 Write constant details to 'f_decls' (to declare a structure) and to
2.28 'f_defs' (to define the contents) for the constant described by 'ref'
2.29 having the given 'path' and 'structure_name' (for the constant structure
2.30 itself).
2.31 +
2.32 + The additional 'data' and 'encoding' are used to describe specific
2.33 + values.
2.34 """
2.35
2.36 # Obtain the attributes.
2.37 @@ -501,7 +504,23 @@
2.38 # Define Unicode constant encoding details.
2.39
2.40 if cls == self.unicode_type:
2.41 - attrs["encoding"] = Reference("<instance>", self.none_type)
2.42 +
2.43 + # Reference the encoding's own constant value.
2.44 +
2.45 + if encoding:
2.46 + n = self.optimiser.constants[(encoding, self.string_type, None)]
2.47 +
2.48 + # Employ a special alias that will be tested specifically in
2.49 + # encode_member.
2.50 +
2.51 + encoding_ref = Reference("<instance>", self.string_type, "$c%d" % n)
2.52 +
2.53 + # Use None where no encoding was indicated.
2.54 +
2.55 + else:
2.56 + encoding_ref = Reference("<instance>", self.none_type)
2.57 +
2.58 + attrs["encoding"] = encoding_ref
2.59
2.60 # Define the structure details. An object is created for the constant,
2.61 # but an attribute is provided, referring to the object, for access to
2.62 @@ -904,7 +923,9 @@
2.63 else:
2.64 value = path
2.65
2.66 - local_number = self.importer.all_constants[path][(value, value_type)]
2.67 + encoding = None
2.68 +
2.69 + local_number = self.importer.all_constants[path][(value, value_type, encoding)]
2.70 constant_name = "$c%d" % local_number
2.71 attr_path = "%s.%s" % (path, constant_name)
2.72 constant_number = self.optimiser.constant_numbers[attr_path]
2.73 @@ -918,6 +939,8 @@
2.74 structure.append("{0, &%s}" % encode_path(decode_type_attribute(attrname)))
2.75 continue
2.76
2.77 + # All other kinds of members.
2.78 +
2.79 structure.append(self.encode_member(origin, attrname, attr, kind))
2.80
2.81 def encode_member(self, path, name, ref, structure_type):
2.82 @@ -935,11 +958,17 @@
2.83 if kind == "<instance>" and ref.is_constant_alias():
2.84 alias = ref.get_name()
2.85
2.86 + # Use the alias directly if appropriate.
2.87 +
2.88 + if alias.startswith("$c"):
2.89 + constant_value = encode_literal_constant(int(alias[2:]))
2.90 + return "%s /* %s */" % (constant_value, name)
2.91 +
2.92 # Obtain a constant value directly assigned to the attribute.
2.93
2.94 if self.optimiser.constant_numbers.has_key(alias):
2.95 constant_number = self.optimiser.constant_numbers[alias]
2.96 - constant_value = "__const%d" % constant_number
2.97 + constant_value = encode_literal_constant(constant_number)
2.98 return "%s /* %s */" % (constant_value, name)
2.99
2.100 # Usage of predefined constants, currently only None supported.
3.1 --- a/inspector.py Wed Dec 14 16:40:00 2016 +0100
3.2 +++ b/inspector.py Wed Dec 14 17:22:07 2016 +0100
3.3 @@ -80,6 +80,11 @@
3.4 self.set_name("__mname__", self.get_constant("string", self.name).reference())
3.5 self.set_name("__file__", self.get_constant("string", filename).reference())
3.6
3.7 + # Reserve a constant for the encoding.
3.8 +
3.9 + if self.encoding:
3.10 + self.get_constant("string", self.encoding)
3.11 +
3.12 # Get module-level attribute usage details.
3.13
3.14 self.stop_tracking_in_module()
3.15 @@ -1405,10 +1410,10 @@
3.16 # Constant values are independently recorded.
3.17
3.18 else:
3.19 - value, typename = self.get_constant_value(n.value, n.literal)
3.20 + value, typename, encoding = self.get_constant_value(n.value, n.literal)
3.21 name = get_builtin_type(typename)
3.22 ref = self.get_builtin_class(name)
3.23 - return self.get_constant_reference(ref, value)
3.24 + return self.get_constant_reference(ref, value, encoding)
3.25
3.26 # Special names.
3.27
4.1 --- a/modules.py Wed Dec 14 16:40:00 2016 +0100
4.2 +++ b/modules.py Wed Dec 14 17:22:07 2016 +0100
4.3 @@ -611,7 +611,7 @@
4.4 last_path = None
4.5 n = None
4.6 while line:
4.7 - path, value_type, value = self._get_fields(line, 3)
4.8 + path, value_type, encoding, value = self._get_fields(line, 4)
4.9 if path != last_path:
4.10 n = 0
4.11 last_path = path
4.12 @@ -619,15 +619,18 @@
4.13 n += 1
4.14 init_item(self.constants, path, dict)
4.15 value = eval(value)
4.16 - self.constants[path][(value, value_type)] = n
4.17 + encoding = encoding != "{}" and encoding or None
4.18 + self.constants[path][(value, value_type, encoding)] = n
4.19 line = f.readline().rstrip()
4.20
4.21 def _get_constant_values(self, f):
4.22 f.readline() # "constant values:"
4.23 line = f.readline().rstrip()
4.24 while line:
4.25 - name, value_type, value = self._get_fields(line, 3)
4.26 - self.constant_values[name] = eval(value), value_type
4.27 + name, value_type, encoding, value = self._get_fields(line, 4)
4.28 + value = eval(value)
4.29 + encoding = encoding != "{}" and encoding or None
4.30 + self.constant_values[name] = value, value_type, encoding
4.31 line = f.readline().rstrip()
4.32
4.33 # Generic parsing methods.
4.34 @@ -975,19 +978,19 @@
4.35 paths.sort()
4.36 for path in paths:
4.37 constants = []
4.38 - for (value, value_type), n in self.constants[path].items():
4.39 - constants.append((n, value_type, value))
4.40 + for (value, value_type, encoding), n in self.constants[path].items():
4.41 + constants.append((n, value_type, encoding, value))
4.42 constants.sort()
4.43 - for n, value_type, value in constants:
4.44 - print >>f, path, value_type, repr(value)
4.45 + for n, value_type, encoding, value in constants:
4.46 + print >>f, path, value_type, encoding or "{}", repr(value)
4.47
4.48 print >>f
4.49 print >>f, "constant values:"
4.50 names = self.constant_values.keys()
4.51 names.sort()
4.52 for name in names:
4.53 - value, value_type = self.constant_values[name]
4.54 - print >>f, name, value_type, repr(value)
4.55 + value, value_type, encoding = self.constant_values[name]
4.56 + print >>f, name, value_type, encoding or "{}", repr(value)
4.57
4.58 finally:
4.59 f.close()
5.1 --- a/optimiser.py Wed Dec 14 16:40:00 2016 +0100
5.2 +++ b/optimiser.py Wed Dec 14 17:22:07 2016 +0100
5.3 @@ -275,11 +275,11 @@
5.4 f = open(join(self.output, "constants"), "w")
5.5 try:
5.6 constants = []
5.7 - for (value, value_type), n in self.constants.items():
5.8 - constants.append((n, value_type, value))
5.9 + for (value, value_type, encoding), n in self.constants.items():
5.10 + constants.append((n, value_type, encoding, value))
5.11 constants.sort()
5.12 - for n, value_type, value in constants:
5.13 - print >>f, value_type, repr(value)
5.14 + for n, value_type, encoding, value in constants:
5.15 + print >>f, value_type, encoding or "{}", repr(value)
5.16
5.17 finally:
5.18 f.close()
5.19 @@ -648,7 +648,7 @@
5.20 for path, constants in self.importer.all_constants.items():
5.21
5.22 # Record constants and obtain a number for them.
5.23 - # Each constant is actually (value, value_type).
5.24 + # Each constant is actually (value, value_type, encoding).
5.25
5.26 for constant, n in constants.items():
5.27 add_counter_item(self.constants, constant)
6.1 --- a/resolving.py Wed Dec 14 16:40:00 2016 +0100
6.2 +++ b/resolving.py Wed Dec 14 17:22:07 2016 +0100
6.3 @@ -350,7 +350,7 @@
6.4 for path, constants in self.constants.items():
6.5 for constant, n in constants.items():
6.6 objpath = "%s.$c%d" % (path, n)
6.7 - _constant, value_type = self.constant_values[objpath]
6.8 + _constant, value_type, encoding = self.constant_values[objpath]
6.9 self.initialised_names[objpath] = {0 : Reference("<instance>", value_type)}
6.10
6.11 # Get the literals defined in each namespace.
7.1 --- a/tests/unicode.py Wed Dec 14 16:40:00 2016 +0100
7.2 +++ b/tests/unicode.py Wed Dec 14 17:22:07 2016 +0100
7.3 @@ -13,7 +13,7 @@
7.4 # Explicitly from bytes.
7.5
7.6 u = unicode("æøå", "ISO-8859-1")
7.7 -print u # æøå
7.8 +print u # æøå
7.9 print u.__class__ # __builtins__.unicode.utf8string
7.10 print u.encode("ISO-8859-1") # æøå
7.11 print u.encoding # ISO-8859-1
7.12 @@ -22,7 +22,7 @@
7.13 # Explicitly from Unicode literals.
7.14
7.15 u2 = u"æøå"
7.16 -print u2 # æøå
7.17 +print u2 # æøå
7.18 print u2.__class__ # __builtins__.unicode.utf8string
7.19 print u2.encode("ISO-8859-1") # æøå
7.20 print u2.encoding # ISO-8859-1
7.21 @@ -31,7 +31,7 @@
7.22 # Implicitly from string literals.
7.23
7.24 u3 = "æøå"
7.25 -print u3 # æøå
7.26 +print u3 # æøå
7.27 print u3.__class__ # __builtins__.unicode.utf8string
7.28 print u3.encode("ISO-8859-1") # æøå
7.29 print u3.encoding # ISO-8859-1
7.30 @@ -56,7 +56,7 @@
7.31 # Combine text and text.
7.32
7.33 uu2 = u + u2
7.34 -print uu2 # æøå
7.35 +print uu2 # æøåæøå
7.36 print uu2.__class__ # __builtins__.unicode.utf8string
7.37 print uu2.encoding # ISO-8859-1
7.38 print len(uu2) # 6
8.1 --- a/translator.py Wed Dec 14 16:40:00 2016 +0100
8.2 +++ b/translator.py Wed Dec 14 17:22:07 2016 +0100
8.3 @@ -431,15 +431,22 @@
8.4 ref = self.get_builtin_class(name)
8.5 return self.process_literal_sequence_node(n, name, ref, TrLiteralSequenceRef)
8.6 else:
8.7 - value, typename = self.get_constant_value(n.value, n.literal)
8.8 + value, typename, encoding = self.get_constant_value(n.value, n.literal)
8.9 name = get_builtin_type(typename)
8.10 ref = self.get_builtin_class(name)
8.11 value_type = ref.get_origin()
8.12
8.13 path = self.get_namespace_path()
8.14 - local_number = self.importer.all_constants[path][(value, value_type)]
8.15 +
8.16 + # Obtain the local numbering of the constant and thus the
8.17 + # locally-qualified name.
8.18 +
8.19 + local_number = self.importer.all_constants[path][(value, value_type, encoding)]
8.20 constant_name = "$c%d" % local_number
8.21 objpath = self.get_object_path(constant_name)
8.22 +
8.23 + # Obtain the unique identifier for the constant.
8.24 +
8.25 number = self.optimiser.constant_numbers[objpath]
8.26 return TrConstantValueRef(constant_name, ref.instance_of(), value, number)
8.27