2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/optimiser.py Tue Oct 11 16:29:50 2016 +0200
2.3 @@ -0,0 +1,637 @@
2.4 +#!/usr/bin/env python
2.5 +
2.6 +"""
2.7 +Optimise object layouts.
2.8 +
2.9 +Copyright (C) 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk>
2.10 +
2.11 +This program is free software; you can redistribute it and/or modify it under
2.12 +the terms of the GNU General Public License as published by the Free Software
2.13 +Foundation; either version 3 of the License, or (at your option) any later
2.14 +version.
2.15 +
2.16 +This program is distributed in the hope that it will be useful, but WITHOUT
2.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
2.19 +details.
2.20 +
2.21 +You should have received a copy of the GNU General Public License along with
2.22 +this program. If not, see <http://www.gnu.org/licenses/>.
2.23 +"""
2.24 +
2.25 +from common import add_counter_item, get_attrname_from_location, init_item, \
2.26 + sorted_output
2.27 +from encoders import encode_access_location, get_kinds
2.28 +from os.path import exists, join
2.29 +from os import makedirs
2.30 +from referencing import Reference
2.31 +
2.32 +class Optimiser:
2.33 +
2.34 + "Optimise objects in a program."
2.35 +
2.36 + def __init__(self, importer, deducer, output):
2.37 +
2.38 + """
2.39 + Initialise an instance using the given 'importer' and 'deducer' that
2.40 + will perform the arrangement of attributes for program objects, writing
2.41 + the results to the given 'output' directory.
2.42 + """
2.43 +
2.44 + self.importer = importer
2.45 + self.deducer = deducer
2.46 + self.output = output
2.47 +
2.48 + # Locations/offsets of attributes in objects.
2.49 +
2.50 + self.locations = None
2.51 + self.attr_locations = None
2.52 + self.all_attrnames = None
2.53 +
2.54 + # Locations of parameters in parameter tables.
2.55 +
2.56 + self.arg_locations = None
2.57 + self.param_locations = None
2.58 + self.all_paramnames = None
2.59 +
2.60 + # Specific attribute access information.
2.61 +
2.62 + self.access_plans = {}
2.63 +
2.64 + # Object structure information.
2.65 +
2.66 + self.structures = {}
2.67 + self.attr_table = {}
2.68 +
2.69 + # Parameter list information.
2.70 +
2.71 + self.parameters = {}
2.72 + self.param_table = {}
2.73 +
2.74 + # Constant literal information.
2.75 +
2.76 + self.constants = []
2.77 + self.constant_numbers = {}
2.78 +
2.79 + # Optimiser activities.
2.80 +
2.81 + self.populate_objects()
2.82 + self.position_attributes()
2.83 + self.populate_parameters()
2.84 + self.position_parameters()
2.85 + self.populate_tables()
2.86 + self.populate_constants()
2.87 + self.refine_access_plans()
2.88 +
2.89 + def to_output(self):
2.90 +
2.91 + "Write the output files using optimisation information."
2.92 +
2.93 + if not exists(self.output):
2.94 + makedirs(self.output)
2.95 +
2.96 + self.write_objects()
2.97 +
2.98 + def write_objects(self):
2.99 +
2.100 + """
2.101 + Write object-related output.
2.102 +
2.103 + The locations are a list of positions indicating the attributes residing
2.104 + at each position in the different structures in a program.
2.105 +
2.106 + ----
2.107 +
2.108 + The parameter locations are a list of positions indicating the parameters
2.109 + residing at each position in the different parameter lists in a program.
2.110 +
2.111 + ----
2.112 +
2.113 + Each attribute plan provides attribute details in the following format:
2.114 +
2.115 + location " " name " " test " " test type " " base
2.116 + " " traversed attributes " " traversed attribute ambiguity
2.117 + " " attributes to traverse " " attribute ambiguity
2.118 + " " context " " access method " " static attribute
2.119 +
2.120 + Locations have the following format:
2.121 +
2.122 + qualified name of scope "." local name ":" name version
2.123 +
2.124 + ----
2.125 +
2.126 + The structures are presented as a table in the following format:
2.127 +
2.128 + qualified name " " attribute names
2.129 +
2.130 + The attribute names are separated by ", " characters and indicate the
2.131 + attribute provided at each position in the structure associated with the
2.132 + given type. Where no attribute is provided at a particular location
2.133 + within a structure, "-" is given.
2.134 +
2.135 + ----
2.136 +
2.137 + The parameters are presented as a table in the following format:
2.138 +
2.139 + qualified name " " parameter details
2.140 +
2.141 + The parameter details are separated by ", " characters and indicate
2.142 + the parameter name and list position for each parameter described at
2.143 + each location in the parameter table associated with the given
2.144 + function. Where no parameter details are provided at a particular
2.145 + location within a parameter table, "-" is given. The name and list
2.146 + position are separated by a colon (":").
2.147 +
2.148 + ----
2.149 +
2.150 + The attribute table is presented as a table in the following format:
2.151 +
2.152 + qualified name " " attribute identifiers
2.153 +
2.154 + Instead of attribute names, identifiers defined according to the order
2.155 + given in the "attrnames" file are employed to denote the attributes
2.156 + featured in each type's structure. Where no attribute is provided at a
2.157 + particular location within a structure, "-" is given.
2.158 +
2.159 + ----
2.160 +
2.161 + The parameter table is presented as a table in the following format:
2.162 +
2.163 + qualified name " " parameter details
2.164 +
2.165 + Instead of parameter names, identifiers defined according to the order
2.166 + given in the "paramnames" file are employed to denote the parameters
2.167 + featured in each function's parameter table. Where no parameter is
2.168 + provided at a particular location within a table, "-" is given.
2.169 +
2.170 + ----
2.171 +
2.172 + The ordered list of attribute names is given in the "attrnames" file.
2.173 +
2.174 + ----
2.175 +
2.176 + The ordered list of parameter names is given in the "paramnames" file.
2.177 +
2.178 + ----
2.179 +
2.180 + The ordered list of constant literals is given in the "constants" file.
2.181 + """
2.182 +
2.183 + f = open(join(self.output, "locations"), "w")
2.184 + try:
2.185 + for attrnames in self.locations:
2.186 + print >>f, sorted_output(attrnames)
2.187 +
2.188 + finally:
2.189 + f.close()
2.190 +
2.191 + f = open(join(self.output, "parameter_locations"), "w")
2.192 + try:
2.193 + for argnames in self.arg_locations:
2.194 + print >>f, sorted_output(argnames)
2.195 +
2.196 + finally:
2.197 + f.close()
2.198 +
2.199 + f = open(join(self.output, "attribute_plans"), "w")
2.200 + try:
2.201 + access_plans = self.access_plans.items()
2.202 + access_plans.sort()
2.203 +
2.204 + for location, (name, test, test_type, base,
2.205 + traversed, traversed_ambiguity,
2.206 + attrnames, attrnames_ambiguity, context, method, attr) in access_plans:
2.207 +
2.208 + print >>f, encode_access_location(location), \
2.209 + name, test, test_type or "{}", \
2.210 + base or "{}", \
2.211 + ".".join(traversed) or "{}", \
2.212 + ".".join(map(str, traversed_ambiguity)) or "{}", \
2.213 + ".".join(map(str, attrnames_ambiguity)) or "{}", \
2.214 + ".".join(attrnames) or "{}", \
2.215 + context, method, attr or "{}"
2.216 +
2.217 + finally:
2.218 + f.close()
2.219 +
2.220 + f = open(join(self.output, "structures"), "w")
2.221 + try:
2.222 + structures = self.structures.items()
2.223 + structures.sort()
2.224 +
2.225 + for name, attrnames in structures:
2.226 + print >>f, name, ", ".join([s or "-" for s in attrnames])
2.227 +
2.228 + finally:
2.229 + f.close()
2.230 +
2.231 + f = open(join(self.output, "parameters"), "w")
2.232 + try:
2.233 + parameters = self.parameters.items()
2.234 + parameters.sort()
2.235 +
2.236 + for name, argnames in parameters:
2.237 + print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames])
2.238 +
2.239 + finally:
2.240 + f.close()
2.241 +
2.242 + f = open(join(self.output, "attrtable"), "w")
2.243 + try:
2.244 + attr_table = self.attr_table.items()
2.245 + attr_table.sort()
2.246 +
2.247 + for name, attrcodes in attr_table:
2.248 + print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes])
2.249 +
2.250 + finally:
2.251 + f.close()
2.252 +
2.253 + f = open(join(self.output, "paramtable"), "w")
2.254 + try:
2.255 + param_table = self.param_table.items()
2.256 + param_table.sort()
2.257 +
2.258 + for name, paramcodes in param_table:
2.259 + print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes])
2.260 +
2.261 + finally:
2.262 + f.close()
2.263 +
2.264 + f = open(join(self.output, "attrnames"), "w")
2.265 + try:
2.266 + for name in self.all_attrnames:
2.267 + print >>f, name
2.268 +
2.269 + finally:
2.270 + f.close()
2.271 +
2.272 + f = open(join(self.output, "paramnames"), "w")
2.273 + try:
2.274 + for name in self.all_paramnames:
2.275 + print >>f, name
2.276 +
2.277 + finally:
2.278 + f.close()
2.279 +
2.280 + f = open(join(self.output, "constants"), "w")
2.281 + try:
2.282 + constants = [(n, value) for (value, n) in self.constants.items()]
2.283 + constants.sort()
2.284 + for n, value in constants:
2.285 + print >>f, repr(value)
2.286 +
2.287 + finally:
2.288 + f.close()
2.289 +
2.290 + def populate_objects(self):
2.291 +
2.292 + "Populate objects using attribute and usage information."
2.293 +
2.294 + all_attrs = {}
2.295 +
2.296 + # Partition attributes into separate sections so that class and instance
2.297 + # attributes are treated separately.
2.298 +
2.299 + for source, objtype in [
2.300 + (self.importer.all_class_attrs, "<class>"),
2.301 + (self.importer.all_instance_attrs, "<instance>"),
2.302 + (self.importer.all_module_attrs, "<module>")
2.303 + ]:
2.304 + for name, attrs in source.items():
2.305 + all_attrs[(objtype, name)] = attrs
2.306 +
2.307 + self.locations = get_allocated_locations(all_attrs, get_attributes_and_sizes)
2.308 +
2.309 + def populate_parameters(self):
2.310 +
2.311 + "Populate parameter tables using parameter information."
2.312 +
2.313 + self.arg_locations = get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes)
2.314 +
2.315 + def position_attributes(self):
2.316 +
2.317 + "Position specific attribute references."
2.318 +
2.319 + # Reverse the location mappings.
2.320 +
2.321 + attr_locations = self.attr_locations = {}
2.322 +
2.323 + for i, attrnames in enumerate(self.locations):
2.324 + for attrname in attrnames:
2.325 + attr_locations[attrname] = i
2.326 +
2.327 + # Record the structures.
2.328 +
2.329 + for source, objtype in [
2.330 + (self.importer.all_class_attrs, "<class>"),
2.331 + (self.importer.all_instance_attrs, "<instance>"),
2.332 + (self.importer.all_module_attrs, "<module>")
2.333 + ]:
2.334 +
2.335 + for name, attrnames in source.items():
2.336 + key = Reference(objtype, name)
2.337 + l = self.structures[key] = [None] * len(attrnames)
2.338 + for attrname in attrnames:
2.339 + position = attr_locations[attrname]
2.340 + if position >= len(l):
2.341 + l.extend([None] * (position - len(l) + 1))
2.342 + l[position] = attrname
2.343 +
2.344 + def refine_access_plans(self):
2.345 +
2.346 + "Augment deduced access plans with attribute position information."
2.347 +
2.348 + for access_location, access_plan in self.deducer.access_plans.items():
2.349 +
2.350 + # Obtain the access details.
2.351 +
2.352 + name, test, test_type, base, traversed, attrnames, \
2.353 + context, method, attr = access_plan
2.354 +
2.355 + traversed_ambiguity = self.get_ambiguity_for_attributes(traversed)
2.356 + attrnames_ambiguity = self.get_ambiguity_for_attributes(attrnames)
2.357 +
2.358 + self.access_plans[access_location] = \
2.359 + name, test, test_type, base, traversed, traversed_ambiguity, \
2.360 + attrnames, attrnames_ambiguity, context, method, attr
2.361 +
2.362 + def get_ambiguity_for_attributes(self, attrnames):
2.363 +
2.364 + """
2.365 + Return a list of attribute position alternatives corresponding to each
2.366 + of the given 'attrnames'.
2.367 + """
2.368 +
2.369 + ambiguity = []
2.370 +
2.371 + for attrname in attrnames:
2.372 + position = self.attr_locations[attrname]
2.373 + ambiguity.append(len(self.locations[position]))
2.374 +
2.375 + return ambiguity
2.376 +
2.377 + def position_parameters(self):
2.378 +
2.379 + "Position the parameters for each function's parameter table."
2.380 +
2.381 + # Reverse the location mappings.
2.382 +
2.383 + param_locations = self.param_locations = {}
2.384 +
2.385 + for i, argnames in enumerate(self.arg_locations):
2.386 + for argname in argnames:
2.387 + param_locations[argname] = i
2.388 +
2.389 + for name, argnames in self.importer.function_parameters.items():
2.390 + l = self.parameters[name] = [None] * len(argnames)
2.391 +
2.392 + # Store an entry for the name along with the name's position in the
2.393 + # parameter list.
2.394 +
2.395 + for pos, argname in enumerate(argnames):
2.396 + position = param_locations[argname]
2.397 + if position >= len(l):
2.398 + l.extend([None] * (position - len(l) + 1))
2.399 + l[position] = (argname, pos)
2.400 +
2.401 + def populate_tables(self):
2.402 +
2.403 + """
2.404 + Assign identifiers to attributes and encode structure information using
2.405 + these identifiers.
2.406 + """
2.407 +
2.408 + self.all_attrnames, d = self._get_name_mapping(self.attr_locations)
2.409 +
2.410 + # Record the numbers indicating the locations of the names.
2.411 +
2.412 + for key, attrnames in self.structures.items():
2.413 + l = self.attr_table[key] = []
2.414 + for attrname in attrnames:
2.415 + if attrname is None:
2.416 + l.append(None)
2.417 + else:
2.418 + l.append(d[attrname])
2.419 +
2.420 + self.all_paramnames, d = self._get_name_mapping(self.param_locations)
2.421 +
2.422 + # Record the numbers indicating the locations of the names.
2.423 +
2.424 + for key, values in self.parameters.items():
2.425 + l = self.param_table[key] = []
2.426 + for value in values:
2.427 + if value is None:
2.428 + l.append(None)
2.429 + else:
2.430 + name, pos = value
2.431 + l.append((d[name], pos))
2.432 +
2.433 + def _get_name_mapping(self, locations):
2.434 +
2.435 + """
2.436 + Get a sorted list of names from 'locations', then map them to
2.437 + identifying numbers. Return the list and the mapping.
2.438 + """
2.439 +
2.440 + all_names = locations.keys()
2.441 + all_names.sort()
2.442 + return all_names, dict([(name, i) for i, name in enumerate(all_names)])
2.443 +
2.444 + def populate_constants(self):
2.445 +
2.446 + """
2.447 + Obtain a collection of distinct constant literals, building a mapping
2.448 + from constant references to those in this collection.
2.449 + """
2.450 +
2.451 + # Obtain mappings from constant values to identifiers.
2.452 +
2.453 + self.constants = {}
2.454 +
2.455 + for path, constants in self.importer.all_constants.items():
2.456 + for constant, n in constants.items():
2.457 +
2.458 + # Record constants and obtain a number for them.
2.459 +
2.460 + add_counter_item(self.constants, constant)
2.461 +
2.462 + self.constant_numbers = {}
2.463 +
2.464 + for name, (value, value_type) in self.importer.all_constant_values.items():
2.465 + self.constant_numbers[name] = self.constants[value]
2.466 +
2.467 +def combine_rows(a, b):
2.468 + c = []
2.469 + for i, j in zip(a, b):
2.470 + if i is None or j is None:
2.471 + c.append(i or j)
2.472 + else:
2.473 + return None
2.474 + return c
2.475 +
2.476 +def get_attributes_and_sizes(d):
2.477 +
2.478 + """
2.479 + Return a matrix of attributes, a list of type names corresponding to columns
2.480 + in the matrix, and a list of ranked sizes each indicating...
2.481 +
2.482 + * a weighted size depending on the kind of object
2.483 + * the minimum size of an object employing an attribute
2.484 + * the number of free columns in the matrix for the attribute
2.485 + * the attribute name itself
2.486 + """
2.487 +
2.488 + attrs = {}
2.489 + sizes = {}
2.490 + objtypes = {}
2.491 +
2.492 + for name, attrnames in d.items():
2.493 + objtype, _name = name
2.494 +
2.495 + for attrname in attrnames:
2.496 +
2.497 + # Record each type supporting the attribute.
2.498 +
2.499 + init_item(attrs, attrname, set)
2.500 + attrs[attrname].add(name)
2.501 +
2.502 + # Maintain a record of the smallest object size supporting the given
2.503 + # attribute.
2.504 +
2.505 + if not sizes.has_key(attrname):
2.506 + sizes[attrname] = len(attrnames)
2.507 + else:
2.508 + sizes[attrname] = min(sizes[attrname], len(attrnames))
2.509 +
2.510 + # Record the object types/kinds supporting the attribute.
2.511 +
2.512 + init_item(objtypes, attrname, set)
2.513 + objtypes[attrname].add(objtype)
2.514 +
2.515 + # Obtain attribute details in order of size and occupancy.
2.516 +
2.517 + names = d.keys()
2.518 +
2.519 + rsizes = []
2.520 + for attrname, size in sizes.items():
2.521 + priority = "<instance>" in objtypes[attrname] and 0.5 or 1
2.522 + occupied = len(attrs[attrname])
2.523 + key = (priority * size, size, len(names) - occupied, attrname)
2.524 + rsizes.append(key)
2.525 +
2.526 + rsizes.sort()
2.527 +
2.528 + # Make a matrix of attributes.
2.529 +
2.530 + matrix = {}
2.531 + for attrname, types in attrs.items():
2.532 + row = []
2.533 + for name in names:
2.534 + if name in types:
2.535 + row.append(attrname)
2.536 + else:
2.537 + row.append(None)
2.538 + matrix[attrname] = row
2.539 +
2.540 + return matrix, names, rsizes
2.541 +
2.542 +def get_parameters_and_sizes(d):
2.543 +
2.544 + """
2.545 + Return a matrix of parameters, a list of functions corresponding to columns
2.546 + in the matrix, and a list of ranked sizes each indicating...
2.547 +
2.548 + * a weighted size depending on the kind of object
2.549 + * the minimum size of a parameter list employing a parameter
2.550 + * the number of free columns in the matrix for the parameter
2.551 + * the parameter name itself
2.552 +
2.553 + This is a slightly simpler version of the above 'get_attributes_and_sizes'
2.554 + function.
2.555 + """
2.556 +
2.557 + params = {}
2.558 + sizes = {}
2.559 +
2.560 + for name, argnames in d.items():
2.561 + for argname in argnames:
2.562 +
2.563 + # Record each function supporting the parameter.
2.564 +
2.565 + init_item(params, argname, set)
2.566 + params[argname].add(name)
2.567 +
2.568 + # Maintain a record of the smallest parameter list supporting the
2.569 + # given parameter.
2.570 +
2.571 + if not sizes.has_key(argname):
2.572 + sizes[argname] = len(argnames)
2.573 + else:
2.574 + sizes[argname] = min(sizes[argname], len(argnames))
2.575 +
2.576 + # Obtain attribute details in order of size and occupancy.
2.577 +
2.578 + names = d.keys()
2.579 +
2.580 + rsizes = []
2.581 + for argname, size in sizes.items():
2.582 + occupied = len(params[argname])
2.583 + key = (size, size, len(names) - occupied, argname)
2.584 + rsizes.append(key)
2.585 +
2.586 + rsizes.sort()
2.587 +
2.588 + # Make a matrix of parameters.
2.589 +
2.590 + matrix = {}
2.591 + for argname, types in params.items():
2.592 + row = []
2.593 + for name in names:
2.594 + if name in types:
2.595 + row.append(argname)
2.596 + else:
2.597 + row.append(None)
2.598 + matrix[argname] = row
2.599 +
2.600 + return matrix, names, rsizes
2.601 +
2.602 +def get_allocated_locations(d, fn):
2.603 +
2.604 + """
2.605 + Return a list where each element corresponds to a structure location and
2.606 + contains a set of attribute names that may be stored at that location, given
2.607 + a mapping 'd' whose keys are (object type, object name) tuples and whose
2.608 + values are collections of attributes.
2.609 + """
2.610 +
2.611 + matrix, names, rsizes = fn(d)
2.612 + allocated = []
2.613 +
2.614 + x = 0
2.615 + while x < len(rsizes):
2.616 + weight, size, free, attrname = rsizes[x]
2.617 + base = matrix[attrname]
2.618 + y = x + 1
2.619 + while y < len(rsizes):
2.620 + _weight, _size, _free, _attrname = rsizes[y]
2.621 + occupied = len(names) - _free
2.622 + if occupied > free:
2.623 + break
2.624 + new = combine_rows(base, matrix[_attrname])
2.625 + if new:
2.626 + del matrix[_attrname]
2.627 + del rsizes[y]
2.628 + base = new
2.629 + free -= occupied
2.630 + else:
2.631 + y += 1
2.632 + allocated.append(base)
2.633 + x += 1
2.634 +
2.635 + # Return the list of attribute names from each row of the allocated
2.636 + # attributes table.
2.637 +
2.638 + return [set([attrname for attrname in attrnames if attrname]) for attrnames in allocated]
2.639 +
2.640 +# vim: tabstop=4 expandtab shiftwidth=4