1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/optimiser.py Tue Oct 11 16:29:50 2016 +0200
1.3 @@ -0,0 +1,637 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"""
1.7 +Optimise object layouts.
1.8 +
1.9 +Copyright (C) 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk>
1.10 +
1.11 +This program is free software; you can redistribute it and/or modify it under
1.12 +the terms of the GNU General Public License as published by the Free Software
1.13 +Foundation; either version 3 of the License, or (at your option) any later
1.14 +version.
1.15 +
1.16 +This program is distributed in the hope that it will be useful, but WITHOUT
1.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
1.19 +details.
1.20 +
1.21 +You should have received a copy of the GNU General Public License along with
1.22 +this program. If not, see <http://www.gnu.org/licenses/>.
1.23 +"""
1.24 +
1.25 +from common import add_counter_item, get_attrname_from_location, init_item, \
1.26 + sorted_output
1.27 +from encoders import encode_access_location, get_kinds
1.28 +from os.path import exists, join
1.29 +from os import makedirs
1.30 +from referencing import Reference
1.31 +
1.32 +class Optimiser:
1.33 +
1.34 + "Optimise objects in a program."
1.35 +
1.36 + def __init__(self, importer, deducer, output):
1.37 +
1.38 + """
1.39 + Initialise an instance using the given 'importer' and 'deducer' that
1.40 + will perform the arrangement of attributes for program objects, writing
1.41 + the results to the given 'output' directory.
1.42 + """
1.43 +
1.44 + self.importer = importer
1.45 + self.deducer = deducer
1.46 + self.output = output
1.47 +
1.48 + # Locations/offsets of attributes in objects.
1.49 +
1.50 + self.locations = None
1.51 + self.attr_locations = None
1.52 + self.all_attrnames = None
1.53 +
1.54 + # Locations of parameters in parameter tables.
1.55 +
1.56 + self.arg_locations = None
1.57 + self.param_locations = None
1.58 + self.all_paramnames = None
1.59 +
1.60 + # Specific attribute access information.
1.61 +
1.62 + self.access_plans = {}
1.63 +
1.64 + # Object structure information.
1.65 +
1.66 + self.structures = {}
1.67 + self.attr_table = {}
1.68 +
1.69 + # Parameter list information.
1.70 +
1.71 + self.parameters = {}
1.72 + self.param_table = {}
1.73 +
1.74 + # Constant literal information.
1.75 +
1.76 + self.constants = []
1.77 + self.constant_numbers = {}
1.78 +
1.79 + # Optimiser activities.
1.80 +
1.81 + self.populate_objects()
1.82 + self.position_attributes()
1.83 + self.populate_parameters()
1.84 + self.position_parameters()
1.85 + self.populate_tables()
1.86 + self.populate_constants()
1.87 + self.refine_access_plans()
1.88 +
1.89 + def to_output(self):
1.90 +
1.91 + "Write the output files using optimisation information."
1.92 +
1.93 + if not exists(self.output):
1.94 + makedirs(self.output)
1.95 +
1.96 + self.write_objects()
1.97 +
1.98 + def write_objects(self):
1.99 +
1.100 + """
1.101 + Write object-related output.
1.102 +
1.103 + The locations are a list of positions indicating the attributes residing
1.104 + at each position in the different structures in a program.
1.105 +
1.106 + ----
1.107 +
1.108 + The parameter locations are a list of positions indicating the parameters
1.109 + residing at each position in the different parameter lists in a program.
1.110 +
1.111 + ----
1.112 +
1.113 + Each attribute plan provides attribute details in the following format:
1.114 +
1.115 + location " " name " " test " " test type " " base
1.116 + " " traversed attributes " " traversed attribute ambiguity
1.117 + " " attributes to traverse " " attribute ambiguity
1.118 + " " context " " access method " " static attribute
1.119 +
1.120 + Locations have the following format:
1.121 +
1.122 + qualified name of scope "." local name ":" name version
1.123 +
1.124 + ----
1.125 +
1.126 + The structures are presented as a table in the following format:
1.127 +
1.128 + qualified name " " attribute names
1.129 +
1.130 + The attribute names are separated by ", " characters and indicate the
1.131 + attribute provided at each position in the structure associated with the
1.132 + given type. Where no attribute is provided at a particular location
1.133 + within a structure, "-" is given.
1.134 +
1.135 + ----
1.136 +
1.137 + The parameters are presented as a table in the following format:
1.138 +
1.139 + qualified name " " parameter details
1.140 +
1.141 + The parameter details are separated by ", " characters and indicate
1.142 + the parameter name and list position for each parameter described at
1.143 + each location in the parameter table associated with the given
1.144 + function. Where no parameter details are provided at a particular
1.145 + location within a parameter table, "-" is given. The name and list
1.146 + position are separated by a colon (":").
1.147 +
1.148 + ----
1.149 +
1.150 + The attribute table is presented as a table in the following format:
1.151 +
1.152 + qualified name " " attribute identifiers
1.153 +
1.154 + Instead of attribute names, identifiers defined according to the order
1.155 + given in the "attrnames" file are employed to denote the attributes
1.156 + featured in each type's structure. Where no attribute is provided at a
1.157 + particular location within a structure, "-" is given.
1.158 +
1.159 + ----
1.160 +
1.161 + The parameter table is presented as a table in the following format:
1.162 +
1.163 + qualified name " " parameter details
1.164 +
1.165 + Instead of parameter names, identifiers defined according to the order
1.166 + given in the "paramnames" file are employed to denote the parameters
1.167 + featured in each function's parameter table. Where no parameter is
1.168 + provided at a particular location within a table, "-" is given.
1.169 +
1.170 + ----
1.171 +
1.172 + The ordered list of attribute names is given in the "attrnames" file.
1.173 +
1.174 + ----
1.175 +
1.176 + The ordered list of parameter names is given in the "paramnames" file.
1.177 +
1.178 + ----
1.179 +
1.180 + The ordered list of constant literals is given in the "constants" file.
1.181 + """
1.182 +
1.183 + f = open(join(self.output, "locations"), "w")
1.184 + try:
1.185 + for attrnames in self.locations:
1.186 + print >>f, sorted_output(attrnames)
1.187 +
1.188 + finally:
1.189 + f.close()
1.190 +
1.191 + f = open(join(self.output, "parameter_locations"), "w")
1.192 + try:
1.193 + for argnames in self.arg_locations:
1.194 + print >>f, sorted_output(argnames)
1.195 +
1.196 + finally:
1.197 + f.close()
1.198 +
1.199 + f = open(join(self.output, "attribute_plans"), "w")
1.200 + try:
1.201 + access_plans = self.access_plans.items()
1.202 + access_plans.sort()
1.203 +
1.204 + for location, (name, test, test_type, base,
1.205 + traversed, traversed_ambiguity,
1.206 + attrnames, attrnames_ambiguity, context, method, attr) in access_plans:
1.207 +
1.208 + print >>f, encode_access_location(location), \
1.209 + name, test, test_type or "{}", \
1.210 + base or "{}", \
1.211 + ".".join(traversed) or "{}", \
1.212 + ".".join(map(str, traversed_ambiguity)) or "{}", \
1.213 + ".".join(map(str, attrnames_ambiguity)) or "{}", \
1.214 + ".".join(attrnames) or "{}", \
1.215 + context, method, attr or "{}"
1.216 +
1.217 + finally:
1.218 + f.close()
1.219 +
1.220 + f = open(join(self.output, "structures"), "w")
1.221 + try:
1.222 + structures = self.structures.items()
1.223 + structures.sort()
1.224 +
1.225 + for name, attrnames in structures:
1.226 + print >>f, name, ", ".join([s or "-" for s in attrnames])
1.227 +
1.228 + finally:
1.229 + f.close()
1.230 +
1.231 + f = open(join(self.output, "parameters"), "w")
1.232 + try:
1.233 + parameters = self.parameters.items()
1.234 + parameters.sort()
1.235 +
1.236 + for name, argnames in parameters:
1.237 + print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames])
1.238 +
1.239 + finally:
1.240 + f.close()
1.241 +
1.242 + f = open(join(self.output, "attrtable"), "w")
1.243 + try:
1.244 + attr_table = self.attr_table.items()
1.245 + attr_table.sort()
1.246 +
1.247 + for name, attrcodes in attr_table:
1.248 + print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes])
1.249 +
1.250 + finally:
1.251 + f.close()
1.252 +
1.253 + f = open(join(self.output, "paramtable"), "w")
1.254 + try:
1.255 + param_table = self.param_table.items()
1.256 + param_table.sort()
1.257 +
1.258 + for name, paramcodes in param_table:
1.259 + print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes])
1.260 +
1.261 + finally:
1.262 + f.close()
1.263 +
1.264 + f = open(join(self.output, "attrnames"), "w")
1.265 + try:
1.266 + for name in self.all_attrnames:
1.267 + print >>f, name
1.268 +
1.269 + finally:
1.270 + f.close()
1.271 +
1.272 + f = open(join(self.output, "paramnames"), "w")
1.273 + try:
1.274 + for name in self.all_paramnames:
1.275 + print >>f, name
1.276 +
1.277 + finally:
1.278 + f.close()
1.279 +
1.280 + f = open(join(self.output, "constants"), "w")
1.281 + try:
1.282 + constants = [(n, value) for (value, n) in self.constants.items()]
1.283 + constants.sort()
1.284 + for n, value in constants:
1.285 + print >>f, repr(value)
1.286 +
1.287 + finally:
1.288 + f.close()
1.289 +
1.290 + def populate_objects(self):
1.291 +
1.292 + "Populate objects using attribute and usage information."
1.293 +
1.294 + all_attrs = {}
1.295 +
1.296 + # Partition attributes into separate sections so that class and instance
1.297 + # attributes are treated separately.
1.298 +
1.299 + for source, objtype in [
1.300 + (self.importer.all_class_attrs, "<class>"),
1.301 + (self.importer.all_instance_attrs, "<instance>"),
1.302 + (self.importer.all_module_attrs, "<module>")
1.303 + ]:
1.304 + for name, attrs in source.items():
1.305 + all_attrs[(objtype, name)] = attrs
1.306 +
1.307 + self.locations = get_allocated_locations(all_attrs, get_attributes_and_sizes)
1.308 +
1.309 + def populate_parameters(self):
1.310 +
1.311 + "Populate parameter tables using parameter information."
1.312 +
1.313 + self.arg_locations = get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes)
1.314 +
1.315 + def position_attributes(self):
1.316 +
1.317 + "Position specific attribute references."
1.318 +
1.319 + # Reverse the location mappings.
1.320 +
1.321 + attr_locations = self.attr_locations = {}
1.322 +
1.323 + for i, attrnames in enumerate(self.locations):
1.324 + for attrname in attrnames:
1.325 + attr_locations[attrname] = i
1.326 +
1.327 + # Record the structures.
1.328 +
1.329 + for source, objtype in [
1.330 + (self.importer.all_class_attrs, "<class>"),
1.331 + (self.importer.all_instance_attrs, "<instance>"),
1.332 + (self.importer.all_module_attrs, "<module>")
1.333 + ]:
1.334 +
1.335 + for name, attrnames in source.items():
1.336 + key = Reference(objtype, name)
1.337 + l = self.structures[key] = [None] * len(attrnames)
1.338 + for attrname in attrnames:
1.339 + position = attr_locations[attrname]
1.340 + if position >= len(l):
1.341 + l.extend([None] * (position - len(l) + 1))
1.342 + l[position] = attrname
1.343 +
1.344 + def refine_access_plans(self):
1.345 +
1.346 + "Augment deduced access plans with attribute position information."
1.347 +
1.348 + for access_location, access_plan in self.deducer.access_plans.items():
1.349 +
1.350 + # Obtain the access details.
1.351 +
1.352 + name, test, test_type, base, traversed, attrnames, \
1.353 + context, method, attr = access_plan
1.354 +
1.355 + traversed_ambiguity = self.get_ambiguity_for_attributes(traversed)
1.356 + attrnames_ambiguity = self.get_ambiguity_for_attributes(attrnames)
1.357 +
1.358 + self.access_plans[access_location] = \
1.359 + name, test, test_type, base, traversed, traversed_ambiguity, \
1.360 + attrnames, attrnames_ambiguity, context, method, attr
1.361 +
1.362 + def get_ambiguity_for_attributes(self, attrnames):
1.363 +
1.364 + """
1.365 + Return a list of attribute position alternatives corresponding to each
1.366 + of the given 'attrnames'.
1.367 + """
1.368 +
1.369 + ambiguity = []
1.370 +
1.371 + for attrname in attrnames:
1.372 + position = self.attr_locations[attrname]
1.373 + ambiguity.append(len(self.locations[position]))
1.374 +
1.375 + return ambiguity
1.376 +
1.377 + def position_parameters(self):
1.378 +
1.379 + "Position the parameters for each function's parameter table."
1.380 +
1.381 + # Reverse the location mappings.
1.382 +
1.383 + param_locations = self.param_locations = {}
1.384 +
1.385 + for i, argnames in enumerate(self.arg_locations):
1.386 + for argname in argnames:
1.387 + param_locations[argname] = i
1.388 +
1.389 + for name, argnames in self.importer.function_parameters.items():
1.390 + l = self.parameters[name] = [None] * len(argnames)
1.391 +
1.392 + # Store an entry for the name along with the name's position in the
1.393 + # parameter list.
1.394 +
1.395 + for pos, argname in enumerate(argnames):
1.396 + position = param_locations[argname]
1.397 + if position >= len(l):
1.398 + l.extend([None] * (position - len(l) + 1))
1.399 + l[position] = (argname, pos)
1.400 +
1.401 + def populate_tables(self):
1.402 +
1.403 + """
1.404 + Assign identifiers to attributes and encode structure information using
1.405 + these identifiers.
1.406 + """
1.407 +
1.408 + self.all_attrnames, d = self._get_name_mapping(self.attr_locations)
1.409 +
1.410 + # Record the numbers indicating the locations of the names.
1.411 +
1.412 + for key, attrnames in self.structures.items():
1.413 + l = self.attr_table[key] = []
1.414 + for attrname in attrnames:
1.415 + if attrname is None:
1.416 + l.append(None)
1.417 + else:
1.418 + l.append(d[attrname])
1.419 +
1.420 + self.all_paramnames, d = self._get_name_mapping(self.param_locations)
1.421 +
1.422 + # Record the numbers indicating the locations of the names.
1.423 +
1.424 + for key, values in self.parameters.items():
1.425 + l = self.param_table[key] = []
1.426 + for value in values:
1.427 + if value is None:
1.428 + l.append(None)
1.429 + else:
1.430 + name, pos = value
1.431 + l.append((d[name], pos))
1.432 +
1.433 + def _get_name_mapping(self, locations):
1.434 +
1.435 + """
1.436 + Get a sorted list of names from 'locations', then map them to
1.437 + identifying numbers. Return the list and the mapping.
1.438 + """
1.439 +
1.440 + all_names = locations.keys()
1.441 + all_names.sort()
1.442 + return all_names, dict([(name, i) for i, name in enumerate(all_names)])
1.443 +
1.444 + def populate_constants(self):
1.445 +
1.446 + """
1.447 + Obtain a collection of distinct constant literals, building a mapping
1.448 + from constant references to those in this collection.
1.449 + """
1.450 +
1.451 + # Obtain mappings from constant values to identifiers.
1.452 +
1.453 + self.constants = {}
1.454 +
1.455 + for path, constants in self.importer.all_constants.items():
1.456 + for constant, n in constants.items():
1.457 +
1.458 + # Record constants and obtain a number for them.
1.459 +
1.460 + add_counter_item(self.constants, constant)
1.461 +
1.462 + self.constant_numbers = {}
1.463 +
1.464 + for name, (value, value_type) in self.importer.all_constant_values.items():
1.465 + self.constant_numbers[name] = self.constants[value]
1.466 +
1.467 +def combine_rows(a, b):
1.468 + c = []
1.469 + for i, j in zip(a, b):
1.470 + if i is None or j is None:
1.471 + c.append(i or j)
1.472 + else:
1.473 + return None
1.474 + return c
1.475 +
1.476 +def get_attributes_and_sizes(d):
1.477 +
1.478 + """
1.479 + Return a matrix of attributes, a list of type names corresponding to columns
1.480 + in the matrix, and a list of ranked sizes each indicating...
1.481 +
1.482 + * a weighted size depending on the kind of object
1.483 + * the minimum size of an object employing an attribute
1.484 + * the number of free columns in the matrix for the attribute
1.485 + * the attribute name itself
1.486 + """
1.487 +
1.488 + attrs = {}
1.489 + sizes = {}
1.490 + objtypes = {}
1.491 +
1.492 + for name, attrnames in d.items():
1.493 + objtype, _name = name
1.494 +
1.495 + for attrname in attrnames:
1.496 +
1.497 + # Record each type supporting the attribute.
1.498 +
1.499 + init_item(attrs, attrname, set)
1.500 + attrs[attrname].add(name)
1.501 +
1.502 + # Maintain a record of the smallest object size supporting the given
1.503 + # attribute.
1.504 +
1.505 + if not sizes.has_key(attrname):
1.506 + sizes[attrname] = len(attrnames)
1.507 + else:
1.508 + sizes[attrname] = min(sizes[attrname], len(attrnames))
1.509 +
1.510 + # Record the object types/kinds supporting the attribute.
1.511 +
1.512 + init_item(objtypes, attrname, set)
1.513 + objtypes[attrname].add(objtype)
1.514 +
1.515 + # Obtain attribute details in order of size and occupancy.
1.516 +
1.517 + names = d.keys()
1.518 +
1.519 + rsizes = []
1.520 + for attrname, size in sizes.items():
1.521 + priority = "<instance>" in objtypes[attrname] and 0.5 or 1
1.522 + occupied = len(attrs[attrname])
1.523 + key = (priority * size, size, len(names) - occupied, attrname)
1.524 + rsizes.append(key)
1.525 +
1.526 + rsizes.sort()
1.527 +
1.528 + # Make a matrix of attributes.
1.529 +
1.530 + matrix = {}
1.531 + for attrname, types in attrs.items():
1.532 + row = []
1.533 + for name in names:
1.534 + if name in types:
1.535 + row.append(attrname)
1.536 + else:
1.537 + row.append(None)
1.538 + matrix[attrname] = row
1.539 +
1.540 + return matrix, names, rsizes
1.541 +
1.542 +def get_parameters_and_sizes(d):
1.543 +
1.544 + """
1.545 + Return a matrix of parameters, a list of functions corresponding to columns
1.546 + in the matrix, and a list of ranked sizes each indicating...
1.547 +
1.548 + * a weighted size depending on the kind of object
1.549 + * the minimum size of a parameter list employing a parameter
1.550 + * the number of free columns in the matrix for the parameter
1.551 + * the parameter name itself
1.552 +
1.553 + This is a slightly simpler version of the above 'get_attributes_and_sizes'
1.554 + function.
1.555 + """
1.556 +
1.557 + params = {}
1.558 + sizes = {}
1.559 +
1.560 + for name, argnames in d.items():
1.561 + for argname in argnames:
1.562 +
1.563 + # Record each function supporting the parameter.
1.564 +
1.565 + init_item(params, argname, set)
1.566 + params[argname].add(name)
1.567 +
1.568 + # Maintain a record of the smallest parameter list supporting the
1.569 + # given parameter.
1.570 +
1.571 + if not sizes.has_key(argname):
1.572 + sizes[argname] = len(argnames)
1.573 + else:
1.574 + sizes[argname] = min(sizes[argname], len(argnames))
1.575 +
1.576 + # Obtain attribute details in order of size and occupancy.
1.577 +
1.578 + names = d.keys()
1.579 +
1.580 + rsizes = []
1.581 + for argname, size in sizes.items():
1.582 + occupied = len(params[argname])
1.583 + key = (size, size, len(names) - occupied, argname)
1.584 + rsizes.append(key)
1.585 +
1.586 + rsizes.sort()
1.587 +
1.588 + # Make a matrix of parameters.
1.589 +
1.590 + matrix = {}
1.591 + for argname, types in params.items():
1.592 + row = []
1.593 + for name in names:
1.594 + if name in types:
1.595 + row.append(argname)
1.596 + else:
1.597 + row.append(None)
1.598 + matrix[argname] = row
1.599 +
1.600 + return matrix, names, rsizes
1.601 +
1.602 +def get_allocated_locations(d, fn):
1.603 +
1.604 + """
1.605 + Return a list where each element corresponds to a structure location and
1.606 + contains a set of attribute names that may be stored at that location, given
1.607 + a mapping 'd' whose keys are (object type, object name) tuples and whose
1.608 + values are collections of attributes.
1.609 + """
1.610 +
1.611 + matrix, names, rsizes = fn(d)
1.612 + allocated = []
1.613 +
1.614 + x = 0
1.615 + while x < len(rsizes):
1.616 + weight, size, free, attrname = rsizes[x]
1.617 + base = matrix[attrname]
1.618 + y = x + 1
1.619 + while y < len(rsizes):
1.620 + _weight, _size, _free, _attrname = rsizes[y]
1.621 + occupied = len(names) - _free
1.622 + if occupied > free:
1.623 + break
1.624 + new = combine_rows(base, matrix[_attrname])
1.625 + if new:
1.626 + del matrix[_attrname]
1.627 + del rsizes[y]
1.628 + base = new
1.629 + free -= occupied
1.630 + else:
1.631 + y += 1
1.632 + allocated.append(base)
1.633 + x += 1
1.634 +
1.635 + # Return the list of attribute names from each row of the allocated
1.636 + # attributes table.
1.637 +
1.638 + return [set([attrname for attrname in attrnames if attrname]) for attrnames in allocated]
1.639 +
1.640 +# vim: tabstop=4 expandtab shiftwidth=4