1 #!/usr/bin/env python 2 3 """ 4 Optimise object layouts. 5 6 Copyright (C) 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import add_counter_item, get_attrname_from_location, init_item, \ 23 sorted_output 24 from encoders import encode_access_location, encode_instruction, get_kinds 25 from os.path import exists, join 26 from os import makedirs 27 from referencing import Reference 28 29 class Optimiser: 30 31 "Optimise objects in a program." 32 33 def __init__(self, importer, deducer, output): 34 35 """ 36 Initialise an instance using the given 'importer' and 'deducer' that 37 will perform the arrangement of attributes for program objects, writing 38 the results to the given 'output' directory. 39 """ 40 41 self.importer = importer 42 self.deducer = deducer 43 self.output = output 44 45 # Locations/offsets of attributes in objects. 46 47 self.locations = None 48 self.attr_locations = None 49 self.all_attrnames = None 50 51 # Locations of parameters in parameter tables. 52 53 self.arg_locations = None 54 self.param_locations = None 55 self.all_paramnames = None 56 57 # Specific attribute access information. 58 59 self.access_plans = {} 60 self.access_instructions = {} 61 62 # Object structure information. 63 64 self.structures = {} 65 self.attr_table = {} 66 67 # Parameter list information. 68 69 self.parameters = {} 70 self.param_table = {} 71 72 # Constant literal information. 73 74 self.constants = [] 75 self.constant_numbers = {} 76 77 # Optimiser activities. 78 79 self.populate_objects() 80 self.position_attributes() 81 self.populate_parameters() 82 self.position_parameters() 83 self.populate_tables() 84 self.populate_constants() 85 self.refine_access_plans() 86 self.initialise_access_instructions() 87 88 def to_output(self): 89 90 "Write the output files using optimisation information." 91 92 if not exists(self.output): 93 makedirs(self.output) 94 95 self.write_objects() 96 97 def write_objects(self): 98 99 """ 100 Write object-related output. 101 102 The locations are a list of positions indicating the attributes residing 103 at each position in the different structures in a program. 104 105 ---- 106 107 The parameter locations are a list of positions indicating the parameters 108 residing at each position in the different parameter lists in a program. 109 110 ---- 111 112 Each attribute plan provides attribute details in the following format: 113 114 location " " name " " test " " test type " " base 115 " " traversed attributes " " traversed attribute ambiguity 116 " " attributes to traverse " " attribute ambiguity 117 " " context " " access method " " static attribute 118 119 Locations have the following format: 120 121 qualified name of scope "." local name ":" name version 122 123 ---- 124 125 The structures are presented as a table in the following format: 126 127 qualified name " " attribute names 128 129 The attribute names are separated by ", " characters and indicate the 130 attribute provided at each position in the structure associated with the 131 given type. Where no attribute is provided at a particular location 132 within a structure, "-" is given. 133 134 ---- 135 136 The parameters are presented as a table in the following format: 137 138 qualified name " " parameter details 139 140 The parameter details are separated by ", " characters and indicate 141 the parameter name and list position for each parameter described at 142 each location in the parameter table associated with the given 143 function. Where no parameter details are provided at a particular 144 location within a parameter table, "-" is given. The name and list 145 position are separated by a colon (":"). 146 147 ---- 148 149 The attribute table is presented as a table in the following format: 150 151 qualified name " " attribute identifiers 152 153 Instead of attribute names, identifiers defined according to the order 154 given in the "attrnames" file are employed to denote the attributes 155 featured in each type's structure. Where no attribute is provided at a 156 particular location within a structure, "-" is given. 157 158 ---- 159 160 The parameter table is presented as a table in the following format: 161 162 qualified name " " parameter details 163 164 Instead of parameter names, identifiers defined according to the order 165 given in the "paramnames" file are employed to denote the parameters 166 featured in each function's parameter table. Where no parameter is 167 provided at a particular location within a table, "-" is given. 168 169 ---- 170 171 The ordered list of attribute names is given in the "attrnames" file. 172 173 ---- 174 175 The ordered list of parameter names is given in the "paramnames" file. 176 177 ---- 178 179 The ordered list of constant literals is given in the "constants" file. 180 """ 181 182 f = open(join(self.output, "locations"), "w") 183 try: 184 for attrnames in self.locations: 185 print >>f, sorted_output(attrnames) 186 187 finally: 188 f.close() 189 190 f = open(join(self.output, "parameter_locations"), "w") 191 try: 192 for argnames in self.arg_locations: 193 print >>f, sorted_output(argnames) 194 195 finally: 196 f.close() 197 198 f = open(join(self.output, "attribute_plans"), "w") 199 try: 200 access_plans = self.access_plans.items() 201 access_plans.sort() 202 203 for location, (name, test, test_type, base, 204 traversed, traversed_ambiguity, 205 attrnames, attrnames_ambiguity, context, 206 first_method, final_method, attr) in access_plans: 207 208 print >>f, encode_access_location(location), \ 209 name, test, test_type or "{}", \ 210 base or "{}", \ 211 ".".join(traversed) or "{}", \ 212 ".".join(map(str, traversed_ambiguity)) or "{}", \ 213 ".".join(map(str, attrnames_ambiguity)) or "{}", \ 214 ".".join(attrnames) or "{}", \ 215 context, first_method, final_method, attr or "{}" 216 217 finally: 218 f.close() 219 220 f = open(join(self.output, "instruction_plans"), "w") 221 try: 222 access_instructions = self.access_instructions.items() 223 access_instructions.sort() 224 225 for location, instructions in access_instructions: 226 print >>f, encode_access_location(location), "..." 227 for instruction in instructions: 228 print >>f, encode_instruction(instruction) 229 print >>f 230 231 finally: 232 f.close() 233 234 f = open(join(self.output, "structures"), "w") 235 try: 236 structures = self.structures.items() 237 structures.sort() 238 239 for name, attrnames in structures: 240 print >>f, name, ", ".join([s or "-" for s in attrnames]) 241 242 finally: 243 f.close() 244 245 f = open(join(self.output, "parameters"), "w") 246 try: 247 parameters = self.parameters.items() 248 parameters.sort() 249 250 for name, argnames in parameters: 251 print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames]) 252 253 finally: 254 f.close() 255 256 f = open(join(self.output, "attrtable"), "w") 257 try: 258 attr_table = self.attr_table.items() 259 attr_table.sort() 260 261 for name, attrcodes in attr_table: 262 print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes]) 263 264 finally: 265 f.close() 266 267 f = open(join(self.output, "paramtable"), "w") 268 try: 269 param_table = self.param_table.items() 270 param_table.sort() 271 272 for name, paramcodes in param_table: 273 print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes]) 274 275 finally: 276 f.close() 277 278 f = open(join(self.output, "attrnames"), "w") 279 try: 280 for name in self.all_attrnames: 281 print >>f, name 282 283 finally: 284 f.close() 285 286 f = open(join(self.output, "paramnames"), "w") 287 try: 288 for name in self.all_paramnames: 289 print >>f, name 290 291 finally: 292 f.close() 293 294 f = open(join(self.output, "constants"), "w") 295 try: 296 constants = [(n, value) for (value, n) in self.constants.items()] 297 constants.sort() 298 for n, value in constants: 299 print >>f, repr(value) 300 301 finally: 302 f.close() 303 304 def populate_objects(self): 305 306 "Populate objects using attribute and usage information." 307 308 all_attrs = {} 309 310 # Partition attributes into separate sections so that class and instance 311 # attributes are treated separately. 312 313 for source, objtype in [ 314 (self.importer.all_class_attrs, "<class>"), 315 (self.importer.all_instance_attrs, "<instance>"), 316 (self.importer.all_module_attrs, "<module>") 317 ]: 318 for name, attrs in source.items(): 319 all_attrs[(objtype, name)] = attrs 320 321 self.locations = get_allocated_locations(all_attrs, get_attributes_and_sizes) 322 323 def populate_parameters(self): 324 325 "Populate parameter tables using parameter information." 326 327 self.arg_locations = get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes) 328 329 def position_attributes(self): 330 331 "Position specific attribute references." 332 333 # Reverse the location mappings. 334 335 attr_locations = self.attr_locations = {} 336 337 for i, attrnames in enumerate(self.locations): 338 for attrname in attrnames: 339 attr_locations[attrname] = i 340 341 # Record the structures. 342 343 for source, objtype in [ 344 (self.importer.all_class_attrs, "<class>"), 345 (self.importer.all_instance_attrs, "<instance>"), 346 (self.importer.all_module_attrs, "<module>") 347 ]: 348 349 for name, attrnames in source.items(): 350 key = Reference(objtype, name) 351 l = self.structures[key] = [None] * len(attrnames) 352 for attrname in attrnames: 353 position = attr_locations[attrname] 354 if position >= len(l): 355 l.extend([None] * (position - len(l) + 1)) 356 l[position] = attrname 357 358 def refine_access_plans(self): 359 360 "Augment deduced access plans with attribute position information." 361 362 for access_location, access_plan in self.deducer.access_plans.items(): 363 364 # Obtain the access details. 365 366 name, test, test_type, base, traversed, attrnames, \ 367 context, first_method, final_method, origin = access_plan 368 369 traversed_ambiguity = self.get_ambiguity_for_attributes(traversed) 370 attrnames_ambiguity = self.get_ambiguity_for_attributes(attrnames) 371 372 self.access_plans[access_location] = \ 373 name, test, test_type, base, traversed, traversed_ambiguity, \ 374 attrnames, attrnames_ambiguity, context, \ 375 first_method, final_method, origin 376 377 def initialise_access_instructions(self): 378 379 "Expand access plans into instruction sequences." 380 381 for access_location, access_plan in self.access_plans.items(): 382 383 # Obtain the access details. 384 385 name, test, test_type, base, traversed, traversed_ambiguity, \ 386 attrnames, attrnames_ambiguity, context, \ 387 first_method, final_method, origin = access_plan 388 389 instructions = [] 390 emit = instructions.append 391 392 if base: 393 original_accessor = base 394 else: 395 original_accessor = name 396 397 # Prepare for any first attribute access. 398 399 if traversed: 400 attrname = traversed[0] 401 del traversed[0] 402 elif attrnames: 403 attrname = attrnames[0] 404 del attrnames[0] 405 406 access_first_attribute = final_method == "access" or traversed or attrnames 407 408 if context == "final-accessor" or access_first_attribute: 409 emit(("set_accessor", original_accessor)) 410 411 # Set the context if already available. 412 413 if context == "original-accessor": 414 emit(("set_context", original_accessor)) 415 elif context == "base": 416 emit(("set_context", base)) 417 418 # Apply any test. 419 420 if test_type == "specific-type": 421 emit(("test_specific_type", accessor, test_type)) 422 elif test_type == "specific-instance": 423 emit(("test_specific_instance", accessor, test_type)) 424 elif test_type == "specific-object": 425 emit(("test_specific_object", accessor, test_type)) 426 elif test_type == "common-type": 427 emit(("test_common_type", accessor, test_type)) 428 elif test_type == "common-instance": 429 emit(("test_common_instance", accessor, test_type)) 430 elif test_type == "common-object": 431 emit(("test_common_object", accessor, test_type)) 432 433 # Perform the first or final access. 434 # The access only needs performing if the resulting accessor is used. 435 436 if access_first_attribute: 437 438 if first_method == "relative-class": 439 emit(("set_accessor", ("load_via_class", "accessor", "attrname"))) 440 elif first_method == "relative-object": 441 emit(("set_accessor", ("load_via_object", "accessor", "attrname"))) 442 elif first_method == "relative-object-class": 443 emit(("set_accessor", ("get_class_and_load", "accessor", "attrname"))) 444 elif first_method == "check-class": 445 emit(("set_accessor", ("check_and_load_via_class", "accessor", "attrname"))) 446 elif first_method == "check-object": 447 emit(("set_accessor", ("check_and_load_via_object", "accessor", "attrname"))) 448 elif first_method == "check-object-class": 449 emit(("set_accessor", ("get_class_check_and_load", "accessor", "attrname"))) 450 451 # Obtain an accessor. 452 453 remaining = len(traversed + attrnames) 454 455 if traversed: 456 for attrname in traversed: 457 458 # Set the context, if appropriate. 459 460 if remaining == 1 and context == "final-accessor": 461 emit(("set_context", "accessor")) 462 463 # Perform the access only if not achieved directly. 464 465 if remaining > 1 or final_method == "access": 466 emit(("set_accessor", ("load_unambiguous", "accessor", attrname))) 467 468 remaining -= 1 469 470 if attrnames: 471 for attrname, ambiguity in zip(attrnames, attrnames_ambiguity): 472 473 # Set the context, if appropriate. 474 475 if remaining == 1 and context == "final-accessor": 476 emit(("set_context", "accessor")) 477 478 # Perform the access only if not achieved directly. 479 480 if remaining > 1 or final_method == "access": 481 if ambiguity == 1: 482 emit(("set_accessor", ("load_unambiguous", "accessor", attrname))) 483 else: 484 emit(("set_accessor", ("load_ambiguous", "accessor", attrname))) 485 486 remaining -= 1 487 488 if final_method == "assign": 489 emit(("store_member", origin, "<expr>")) 490 elif final_method == "static": 491 emit(("load_static", origin)) 492 493 self.access_instructions[access_location] = instructions 494 495 def get_ambiguity_for_attributes(self, attrnames): 496 497 """ 498 Return a list of attribute position alternatives corresponding to each 499 of the given 'attrnames'. 500 """ 501 502 ambiguity = [] 503 504 for attrname in attrnames: 505 position = self.attr_locations[attrname] 506 ambiguity.append(len(self.locations[position])) 507 508 return ambiguity 509 510 def position_parameters(self): 511 512 "Position the parameters for each function's parameter table." 513 514 # Reverse the location mappings. 515 516 param_locations = self.param_locations = {} 517 518 for i, argnames in enumerate(self.arg_locations): 519 for argname in argnames: 520 param_locations[argname] = i 521 522 for name, argnames in self.importer.function_parameters.items(): 523 l = self.parameters[name] = [None] * len(argnames) 524 525 # Store an entry for the name along with the name's position in the 526 # parameter list. 527 528 for pos, argname in enumerate(argnames): 529 position = param_locations[argname] 530 if position >= len(l): 531 l.extend([None] * (position - len(l) + 1)) 532 l[position] = (argname, pos) 533 534 def populate_tables(self): 535 536 """ 537 Assign identifiers to attributes and encode structure information using 538 these identifiers. 539 """ 540 541 self.all_attrnames, d = self._get_name_mapping(self.attr_locations) 542 543 # Record the numbers indicating the locations of the names. 544 545 for key, attrnames in self.structures.items(): 546 l = self.attr_table[key] = [] 547 for attrname in attrnames: 548 if attrname is None: 549 l.append(None) 550 else: 551 l.append(d[attrname]) 552 553 self.all_paramnames, d = self._get_name_mapping(self.param_locations) 554 555 # Record the numbers indicating the locations of the names. 556 557 for key, values in self.parameters.items(): 558 l = self.param_table[key] = [] 559 for value in values: 560 if value is None: 561 l.append(None) 562 else: 563 name, pos = value 564 l.append((d[name], pos)) 565 566 def _get_name_mapping(self, locations): 567 568 """ 569 Get a sorted list of names from 'locations', then map them to 570 identifying numbers. Return the list and the mapping. 571 """ 572 573 all_names = locations.keys() 574 all_names.sort() 575 return all_names, dict([(name, i) for i, name in enumerate(all_names)]) 576 577 def populate_constants(self): 578 579 """ 580 Obtain a collection of distinct constant literals, building a mapping 581 from constant references to those in this collection. 582 """ 583 584 # Obtain mappings from constant values to identifiers. 585 586 self.constants = {} 587 588 for path, constants in self.importer.all_constants.items(): 589 for constant, n in constants.items(): 590 591 # Record constants and obtain a number for them. 592 593 add_counter_item(self.constants, constant) 594 595 self.constant_numbers = {} 596 597 for name, (value, value_type) in self.importer.all_constant_values.items(): 598 self.constant_numbers[name] = self.constants[value] 599 600 def combine_rows(a, b): 601 c = [] 602 for i, j in zip(a, b): 603 if i is None or j is None: 604 c.append(i or j) 605 else: 606 return None 607 return c 608 609 def get_attributes_and_sizes(d): 610 611 """ 612 Return a matrix of attributes, a list of type names corresponding to columns 613 in the matrix, and a list of ranked sizes each indicating... 614 615 * a weighted size depending on the kind of object 616 * the minimum size of an object employing an attribute 617 * the number of free columns in the matrix for the attribute 618 * the attribute name itself 619 """ 620 621 attrs = {} 622 sizes = {} 623 objtypes = {} 624 625 for name, attrnames in d.items(): 626 objtype, _name = name 627 628 for attrname in attrnames: 629 630 # Record each type supporting the attribute. 631 632 init_item(attrs, attrname, set) 633 attrs[attrname].add(name) 634 635 # Maintain a record of the smallest object size supporting the given 636 # attribute. 637 638 if not sizes.has_key(attrname): 639 sizes[attrname] = len(attrnames) 640 else: 641 sizes[attrname] = min(sizes[attrname], len(attrnames)) 642 643 # Record the object types/kinds supporting the attribute. 644 645 init_item(objtypes, attrname, set) 646 objtypes[attrname].add(objtype) 647 648 # Obtain attribute details in order of size and occupancy. 649 650 names = d.keys() 651 652 rsizes = [] 653 for attrname, size in sizes.items(): 654 priority = "<instance>" in objtypes[attrname] and 0.5 or 1 655 occupied = len(attrs[attrname]) 656 key = (priority * size, size, len(names) - occupied, attrname) 657 rsizes.append(key) 658 659 rsizes.sort() 660 661 # Make a matrix of attributes. 662 663 matrix = {} 664 for attrname, types in attrs.items(): 665 row = [] 666 for name in names: 667 if name in types: 668 row.append(attrname) 669 else: 670 row.append(None) 671 matrix[attrname] = row 672 673 return matrix, names, rsizes 674 675 def get_parameters_and_sizes(d): 676 677 """ 678 Return a matrix of parameters, a list of functions corresponding to columns 679 in the matrix, and a list of ranked sizes each indicating... 680 681 * a weighted size depending on the kind of object 682 * the minimum size of a parameter list employing a parameter 683 * the number of free columns in the matrix for the parameter 684 * the parameter name itself 685 686 This is a slightly simpler version of the above 'get_attributes_and_sizes' 687 function. 688 """ 689 690 params = {} 691 sizes = {} 692 693 for name, argnames in d.items(): 694 for argname in argnames: 695 696 # Record each function supporting the parameter. 697 698 init_item(params, argname, set) 699 params[argname].add(name) 700 701 # Maintain a record of the smallest parameter list supporting the 702 # given parameter. 703 704 if not sizes.has_key(argname): 705 sizes[argname] = len(argnames) 706 else: 707 sizes[argname] = min(sizes[argname], len(argnames)) 708 709 # Obtain attribute details in order of size and occupancy. 710 711 names = d.keys() 712 713 rsizes = [] 714 for argname, size in sizes.items(): 715 occupied = len(params[argname]) 716 key = (size, size, len(names) - occupied, argname) 717 rsizes.append(key) 718 719 rsizes.sort() 720 721 # Make a matrix of parameters. 722 723 matrix = {} 724 for argname, types in params.items(): 725 row = [] 726 for name in names: 727 if name in types: 728 row.append(argname) 729 else: 730 row.append(None) 731 matrix[argname] = row 732 733 return matrix, names, rsizes 734 735 def get_allocated_locations(d, fn): 736 737 """ 738 Return a list where each element corresponds to a structure location and 739 contains a set of attribute names that may be stored at that location, given 740 a mapping 'd' whose keys are (object type, object name) tuples and whose 741 values are collections of attributes. 742 """ 743 744 matrix, names, rsizes = fn(d) 745 allocated = [] 746 747 x = 0 748 while x < len(rsizes): 749 weight, size, free, attrname = rsizes[x] 750 base = matrix[attrname] 751 y = x + 1 752 while y < len(rsizes): 753 _weight, _size, _free, _attrname = rsizes[y] 754 occupied = len(names) - _free 755 if occupied > free: 756 break 757 new = combine_rows(base, matrix[_attrname]) 758 if new: 759 del matrix[_attrname] 760 del rsizes[y] 761 base = new 762 free -= occupied 763 else: 764 y += 1 765 allocated.append(base) 766 x += 1 767 768 # Return the list of attribute names from each row of the allocated 769 # attributes table. 770 771 return [set([attrname for attrname in attrnames if attrname]) for attrnames in allocated] 772 773 # vim: tabstop=4 expandtab shiftwidth=4