1 #!/usr/bin/env python 2 3 """ 4 Optimise object layouts and generate access instruction plans. 5 6 Copyright (C) 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import add_counter_item, get_attrname_from_location, init_item, \ 23 sorted_output 24 from encoders import encode_access_location, encode_instruction, get_kinds 25 from os.path import exists, join 26 from os import makedirs 27 from referencing import Reference 28 29 class Optimiser: 30 31 "Optimise objects in a program." 32 33 def __init__(self, importer, deducer, output): 34 35 """ 36 Initialise an instance using the given 'importer' and 'deducer' that 37 will perform the arrangement of attributes for program objects, writing 38 the results to the given 'output' directory. 39 """ 40 41 self.importer = importer 42 self.deducer = deducer 43 self.output = output 44 45 # Locations/offsets of attributes in objects. 46 47 self.locations = None 48 self.attr_locations = None 49 self.all_attrnames = None 50 51 # Locations of parameters in parameter tables. 52 53 self.arg_locations = None 54 self.param_locations = None 55 self.all_paramnames = None 56 57 # Specific attribute access information. 58 59 self.access_instructions = {} 60 self.accessor_kinds = {} 61 62 # Object structure information. 63 64 self.structures = {} 65 self.attr_table = {} 66 67 # Parameter list information. 68 69 self.parameters = {} 70 self.param_table = {} 71 72 # Constant literal information. 73 74 self.constants = [] 75 self.constant_numbers = {} 76 77 # Optimiser activities. 78 79 self.populate_objects() 80 self.position_attributes() 81 self.populate_parameters() 82 self.position_parameters() 83 self.populate_tables() 84 self.populate_constants() 85 self.initialise_access_instructions() 86 87 def to_output(self): 88 89 "Write the output files using optimisation information." 90 91 if not exists(self.output): 92 makedirs(self.output) 93 94 self.write_objects() 95 96 def write_objects(self): 97 98 """ 99 Write object-related output. 100 101 The locations are a list of positions indicating the attributes residing 102 at each position in the different structures in a program. 103 104 ---- 105 106 The parameter locations are a list of positions indicating the parameters 107 residing at each position in the different parameter lists in a program. 108 109 ---- 110 111 Each attribute plan provides attribute details in the following format: 112 113 location " " name " " test " " test type " " base 114 " " traversed attributes " " traversed attribute ambiguity 115 " " traversal access modes 116 " " attributes to traverse " " attribute ambiguity 117 " " context " " access method " " static attribute 118 119 Locations have the following format: 120 121 qualified name of scope "." local name ":" name version 122 123 Traversal access modes are either "class" (obtain accessor class to 124 access attribute) or "object" (obtain attribute directly from accessor). 125 126 ---- 127 128 The structures are presented as a table in the following format: 129 130 qualified name " " attribute names 131 132 The attribute names are separated by ", " characters and indicate the 133 attribute provided at each position in the structure associated with the 134 given type. Where no attribute is provided at a particular location 135 within a structure, "-" is given. 136 137 ---- 138 139 The parameters are presented as a table in the following format: 140 141 qualified name " " parameter details 142 143 The parameter details are separated by ", " characters and indicate 144 the parameter name and list position for each parameter described at 145 each location in the parameter table associated with the given 146 function. Where no parameter details are provided at a particular 147 location within a parameter table, "-" is given. The name and list 148 position are separated by a colon (":"). 149 150 ---- 151 152 The attribute table is presented as a table in the following format: 153 154 qualified name " " attribute identifiers 155 156 Instead of attribute names, identifiers defined according to the order 157 given in the "attrnames" file are employed to denote the attributes 158 featured in each type's structure. Where no attribute is provided at a 159 particular location within a structure, "-" is given. 160 161 ---- 162 163 The parameter table is presented as a table in the following format: 164 165 qualified name " " parameter details 166 167 Instead of parameter names, identifiers defined according to the order 168 given in the "paramnames" file are employed to denote the parameters 169 featured in each function's parameter table. Where no parameter is 170 provided at a particular location within a table, "-" is given. 171 172 ---- 173 174 The ordered list of attribute names is given in the "attrnames" file. 175 176 ---- 177 178 The ordered list of parameter names is given in the "paramnames" file. 179 180 ---- 181 182 The ordered list of constant literals is given in the "constants" file. 183 """ 184 185 f = open(join(self.output, "locations"), "w") 186 try: 187 for attrnames in self.locations: 188 print >>f, sorted_output(attrnames) 189 190 finally: 191 f.close() 192 193 f = open(join(self.output, "parameter_locations"), "w") 194 try: 195 for argnames in self.arg_locations: 196 print >>f, sorted_output(argnames) 197 198 finally: 199 f.close() 200 201 f = open(join(self.output, "instruction_plans"), "w") 202 try: 203 access_instructions = self.access_instructions.items() 204 access_instructions.sort() 205 206 for location, instructions in access_instructions: 207 print >>f, encode_access_location(location), "..." 208 for instruction in instructions: 209 print >>f, encode_instruction(instruction) 210 print >>f 211 212 finally: 213 f.close() 214 215 f = open(join(self.output, "structures"), "w") 216 try: 217 structures = self.structures.items() 218 structures.sort() 219 220 for name, attrnames in structures: 221 print >>f, name, ", ".join([s or "-" for s in attrnames]) 222 223 finally: 224 f.close() 225 226 f = open(join(self.output, "parameters"), "w") 227 try: 228 parameters = self.parameters.items() 229 parameters.sort() 230 231 for name, argnames in parameters: 232 print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames]) 233 234 finally: 235 f.close() 236 237 f = open(join(self.output, "attrtable"), "w") 238 try: 239 attr_table = self.attr_table.items() 240 attr_table.sort() 241 242 for name, attrcodes in attr_table: 243 print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes]) 244 245 finally: 246 f.close() 247 248 f = open(join(self.output, "paramtable"), "w") 249 try: 250 param_table = self.param_table.items() 251 param_table.sort() 252 253 for name, paramcodes in param_table: 254 print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes]) 255 256 finally: 257 f.close() 258 259 f = open(join(self.output, "attrnames"), "w") 260 try: 261 for name in self.all_attrnames: 262 print >>f, name 263 264 finally: 265 f.close() 266 267 f = open(join(self.output, "paramnames"), "w") 268 try: 269 for name in self.all_paramnames: 270 print >>f, name 271 272 finally: 273 f.close() 274 275 f = open(join(self.output, "constants"), "w") 276 try: 277 constants = [(n, value) for (value, n) in self.constants.items()] 278 constants.sort() 279 for n, value in constants: 280 print >>f, repr(value) 281 282 finally: 283 f.close() 284 285 def populate_objects(self): 286 287 "Populate objects using attribute and usage information." 288 289 all_attrs = {} 290 291 # Partition attributes into separate sections so that class and instance 292 # attributes are treated separately. 293 294 for source, objtype in [ 295 (self.importer.all_class_attrs, "<class>"), 296 (self.importer.all_instance_attrs, "<instance>"), 297 (self.importer.all_module_attrs, "<module>") 298 ]: 299 for name, attrs in source.items(): 300 all_attrs[(objtype, name)] = attrs 301 302 self.locations = get_allocated_locations(all_attrs, get_attributes_and_sizes) 303 304 def populate_parameters(self): 305 306 "Populate parameter tables using parameter information." 307 308 self.arg_locations = [set()] + get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes) 309 310 def position_attributes(self): 311 312 "Position specific attribute references." 313 314 # Reverse the location mappings. 315 316 attr_locations = self.attr_locations = {} 317 318 for i, attrnames in enumerate(self.locations): 319 for attrname in attrnames: 320 attr_locations[attrname] = i 321 322 # Record the structures. 323 324 for source, objtype in [ 325 (self.importer.all_class_attrs, "<class>"), 326 (self.importer.all_instance_attrs, "<instance>"), 327 (self.importer.all_module_attrs, "<module>") 328 ]: 329 330 for name, attrnames in source.items(): 331 key = Reference(objtype, name) 332 l = self.structures[key] = [None] * len(attrnames) 333 for attrname in attrnames: 334 position = attr_locations[attrname] 335 if position >= len(l): 336 l.extend([None] * (position - len(l) + 1)) 337 l[position] = attrname 338 339 def initialise_access_instructions(self): 340 341 "Expand access plans into instruction sequences." 342 343 for access_location, access_plan in self.deducer.access_plans.items(): 344 345 # Obtain the access details. 346 347 name, test, test_type, base, \ 348 traversed, traversal_modes, attrnames, \ 349 context, context_test, \ 350 first_method, final_method, \ 351 origin, accessor_kinds = access_plan 352 353 instructions = [] 354 emit = instructions.append 355 356 if base: 357 original_accessor = base 358 else: 359 original_accessor = "<expr>" # use a generic placeholder 360 361 # Prepare for any first attribute access. 362 363 if traversed: 364 attrname = traversed[0] 365 del traversed[0] 366 elif attrnames: 367 attrname = attrnames[0] 368 del attrnames[0] 369 370 # Perform the first access explicitly if at least one operation 371 # requires it. 372 373 access_first_attribute = final_method in ("access", "assign") or traversed or attrnames 374 375 # Determine whether the first access involves assignment. 376 377 assigning = not traversed and not attrnames and final_method == "assign" 378 379 # Set the context if already available. 380 381 if context == "base": 382 accessor = context_var = (base,) 383 elif context == "original-accessor": 384 385 # Prevent re-evaluation of any dynamic expression by storing it. 386 387 if original_accessor == "<expr>": 388 emit(("__set_accessor", original_accessor)) 389 accessor = context_var = ("<accessor>",) 390 else: 391 accessor = context_var = (original_accessor,) 392 393 # Assigning does not set the context. 394 395 elif context in ("final-accessor", "unset") and access_first_attribute: 396 397 # Prevent re-evaluation of any dynamic expression by storing it. 398 399 if original_accessor == "<expr>": 400 emit(("__set_accessor", original_accessor)) 401 accessor = ("<accessor>",) 402 else: 403 accessor = (original_accessor,) 404 405 # Apply any test. 406 407 if test == "specific-type": 408 accessor = ("__test_specific_type", accessor, test_type) 409 elif test == "specific-instance": 410 accessor = ("__test_specific_instance", accessor, test_type) 411 elif test == "specific-object": 412 accessor = ("__test_specific_object", accessor, test_type) 413 elif test == "common-type": 414 accessor = ("__test_common_type", accessor, test_type) 415 elif test == "common-instance": 416 accessor = ("__test_common_instance", accessor, test_type) 417 elif test == "common-object": 418 accessor = ("__test_common_object", accessor, test_type) 419 420 # Perform the first or final access. 421 # The access only needs performing if the resulting accessor is used. 422 423 remaining = len(traversed + attrnames) 424 425 if access_first_attribute: 426 427 if first_method == "relative-class": 428 if assigning: 429 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 430 else: 431 accessor = ("__load_via_class", accessor, attrname) 432 433 elif first_method == "relative-object": 434 if assigning: 435 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 436 else: 437 accessor = ("__load_via_object", accessor, attrname) 438 439 elif first_method == "relative-object-class": 440 if assigning: 441 emit(("__get_class_and_store", accessor, attrname, "<assexpr>")) 442 else: 443 accessor = ("__get_class_and_load", accessor, attrname) 444 445 elif first_method == "check-class": 446 if assigning: 447 emit(("__check_and_store_via_class", accessor, attrname, "<assexpr>")) 448 else: 449 accessor = ("__check_and_load_via_class", accessor, attrname) 450 451 elif first_method == "check-object": 452 if assigning: 453 emit(("__check_and_store_via_object", accessor, attrname, "<assexpr>")) 454 else: 455 accessor = ("__check_and_load_via_object", accessor, attrname) 456 457 elif first_method == "check-object-class": 458 if assigning: 459 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 460 else: 461 accessor = ("__check_and_load_via_any", accessor, attrname) 462 463 # Traverse attributes using the accessor. 464 465 if traversed: 466 for attrname, traversal_mode in zip(traversed, traversal_modes): 467 assigning = remaining == 1 and final_method == "assign" 468 469 # Set the context, if appropriate. 470 471 if remaining == 1 and final_method != "assign" and context == "final-accessor": 472 emit(("__set_context", accessor)) 473 accessor = context_var = "<context>" 474 475 # Perform the access only if not achieved directly. 476 477 if remaining > 1 or final_method in ("access", "assign"): 478 479 if traversal_mode == "class": 480 if assigning: 481 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 482 else: 483 accessor = ("__load_via_class", accessor, attrname) 484 else: 485 if assigning: 486 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 487 else: 488 accessor = ("__load_via_object", accessor, attrname) 489 490 remaining -= 1 491 492 if attrnames: 493 for attrname in attrnames: 494 assigning = remaining == 1 and final_method == "assign" 495 496 # Set the context, if appropriate. 497 498 if remaining == 1 and final_method != "assign" and context == "final-accessor": 499 emit(("__set_context", accessor)) 500 accessor = context_var = "<context>" 501 502 # Perform the access only if not achieved directly. 503 504 if remaining > 1 or final_method in ("access", "assign"): 505 506 if assigning: 507 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 508 else: 509 accessor = ("__check_and_load_via_any", accessor, attrname) 510 511 remaining -= 1 512 513 # Define or emit the means of accessing the actual target. 514 515 if final_method == "static-assign": 516 parent, attrname = origin.rsplit(".", 1) 517 emit(("__store_via_object", parent, attrname, "<assexpr>")) 518 519 elif final_method in ("static", "static-invoke"): 520 parent, attrname = origin.rsplit(".", 1) 521 accessor = ("__load_static", parent, origin) 522 523 # Wrap accesses in context operations. 524 525 if context_test == "test": 526 emit(("__test_context", context_var, accessor)) 527 528 elif context_test == "replace": 529 530 # Static invocation targets have a context added but no other 531 # transformation performed. 532 533 if final_method == "static-invoke": 534 emit(("__update_context", context_var, accessor)) 535 536 # Other invocation targets gain a context and have the bound 537 # version of the callable activated. 538 539 else: 540 emit(("__replace_context", context_var, accessor)) 541 542 elif final_method not in ("assign", "static-assign"): 543 emit(accessor) 544 545 self.access_instructions[access_location] = instructions 546 self.accessor_kinds[access_location] = accessor_kinds 547 548 def get_ambiguity_for_attributes(self, attrnames): 549 550 """ 551 Return a list of attribute position alternatives corresponding to each 552 of the given 'attrnames'. 553 """ 554 555 ambiguity = [] 556 557 for attrname in attrnames: 558 position = self.attr_locations[attrname] 559 ambiguity.append(len(self.locations[position])) 560 561 return ambiguity 562 563 def position_parameters(self): 564 565 "Position the parameters for each function's parameter table." 566 567 # Reverse the location mappings. 568 569 param_locations = self.param_locations = {} 570 571 for i, argnames in enumerate(self.arg_locations): 572 573 # Position the arguments. 574 575 for argname in argnames: 576 param_locations[argname] = i 577 578 for name, argnames in self.importer.function_parameters.items(): 579 580 # Allocate an extra context parameter in the table. 581 582 l = self.parameters[name] = [None] + [None] * len(argnames) 583 584 # Store an entry for the name along with the name's position in the 585 # parameter list. 586 587 for pos, argname in enumerate(argnames): 588 589 # Position the argument in the table. 590 591 position = param_locations[argname] 592 if position >= len(l): 593 l.extend([None] * (position - len(l) + 1)) 594 595 # Indicate an argument list position starting from 1 (after the 596 # initial context argument). 597 598 l[position] = (argname, pos + 1) 599 600 def populate_tables(self): 601 602 """ 603 Assign identifiers to attributes and encode structure information using 604 these identifiers. 605 """ 606 607 self.all_attrnames, d = self._get_name_mapping(self.attr_locations) 608 609 # Record the numbers indicating the locations of the names. 610 611 for key, attrnames in self.structures.items(): 612 l = self.attr_table[key] = [] 613 for attrname in attrnames: 614 if attrname is None: 615 l.append(None) 616 else: 617 l.append(d[attrname]) 618 619 self.all_paramnames, d = self._get_name_mapping(self.param_locations) 620 621 # Record the numbers indicating the locations of the names. 622 623 for key, values in self.parameters.items(): 624 l = self.param_table[key] = [] 625 for value in values: 626 if value is None: 627 l.append(None) 628 else: 629 name, pos = value 630 l.append((d[name], pos)) 631 632 def _get_name_mapping(self, locations): 633 634 """ 635 Get a sorted list of names from 'locations', then map them to 636 identifying numbers. Return the list and the mapping. 637 """ 638 639 all_names = locations.keys() 640 all_names.sort() 641 return all_names, dict([(name, i) for i, name in enumerate(all_names)]) 642 643 def populate_constants(self): 644 645 """ 646 Obtain a collection of distinct constant literals, building a mapping 647 from constant references to those in this collection. 648 """ 649 650 # Obtain mappings from constant values to identifiers. 651 652 self.constants = {} 653 654 for path, constants in self.importer.all_constants.items(): 655 for constant, n in constants.items(): 656 657 # Record constants and obtain a number for them. 658 659 add_counter_item(self.constants, constant) 660 661 self.constant_numbers = {} 662 663 for name, (value, value_type) in self.importer.all_constant_values.items(): 664 self.constant_numbers[name] = self.constants[value] 665 666 def combine_rows(a, b): 667 c = [] 668 for i, j in zip(a, b): 669 if i is None or j is None: 670 c.append(i or j) 671 else: 672 return None 673 return c 674 675 def get_attributes_and_sizes(d): 676 677 """ 678 Return a matrix of attributes, a list of type names corresponding to columns 679 in the matrix, and a list of ranked sizes each indicating... 680 681 * a weighted size depending on the kind of object 682 * the minimum size of an object employing an attribute 683 * the number of free columns in the matrix for the attribute 684 * the attribute name itself 685 """ 686 687 attrs = {} 688 sizes = {} 689 objtypes = {} 690 691 for name, attrnames in d.items(): 692 objtype, _name = name 693 694 for attrname in attrnames: 695 696 # Record each type supporting the attribute. 697 698 init_item(attrs, attrname, set) 699 attrs[attrname].add(name) 700 701 # Maintain a record of the smallest object size supporting the given 702 # attribute. 703 704 if not sizes.has_key(attrname): 705 sizes[attrname] = len(attrnames) 706 else: 707 sizes[attrname] = min(sizes[attrname], len(attrnames)) 708 709 # Record the object types/kinds supporting the attribute. 710 711 init_item(objtypes, attrname, set) 712 objtypes[attrname].add(objtype) 713 714 # Obtain attribute details in order of size and occupancy. 715 716 names = d.keys() 717 718 rsizes = [] 719 for attrname, size in sizes.items(): 720 priority = "<instance>" in objtypes[attrname] and 0.5 or 1 721 occupied = len(attrs[attrname]) 722 key = (priority * size, size, len(names) - occupied, attrname) 723 rsizes.append(key) 724 725 rsizes.sort() 726 727 # Make a matrix of attributes. 728 729 matrix = {} 730 for attrname, types in attrs.items(): 731 row = [] 732 for name in names: 733 if name in types: 734 row.append(attrname) 735 else: 736 row.append(None) 737 matrix[attrname] = row 738 739 return matrix, names, rsizes 740 741 def get_parameters_and_sizes(d): 742 743 """ 744 Return a matrix of parameters, a list of functions corresponding to columns 745 in the matrix, and a list of ranked sizes each indicating... 746 747 * a weighted size depending on the kind of object 748 * the minimum size of a parameter list employing a parameter 749 * the number of free columns in the matrix for the parameter 750 * the parameter name itself 751 752 This is a slightly simpler version of the above 'get_attributes_and_sizes' 753 function. 754 """ 755 756 params = {} 757 sizes = {} 758 759 for name, argnames in d.items(): 760 for argname in argnames: 761 762 # Record each function supporting the parameter. 763 764 init_item(params, argname, set) 765 params[argname].add(name) 766 767 # Maintain a record of the smallest parameter list supporting the 768 # given parameter. 769 770 if not sizes.has_key(argname): 771 sizes[argname] = len(argnames) 772 else: 773 sizes[argname] = min(sizes[argname], len(argnames)) 774 775 # Obtain attribute details in order of size and occupancy. 776 777 names = d.keys() 778 779 rsizes = [] 780 for argname, size in sizes.items(): 781 occupied = len(params[argname]) 782 key = (size, size, len(names) - occupied, argname) 783 rsizes.append(key) 784 785 rsizes.sort() 786 787 # Make a matrix of parameters. 788 789 matrix = {} 790 for argname, types in params.items(): 791 row = [] 792 for name in names: 793 if name in types: 794 row.append(argname) 795 else: 796 row.append(None) 797 matrix[argname] = row 798 799 return matrix, names, rsizes 800 801 def get_allocated_locations(d, fn): 802 803 """ 804 Return a list where each element corresponds to a structure location and 805 contains a set of attribute names that may be stored at that location, given 806 a mapping 'd' whose keys are (object type, object name) tuples and whose 807 values are collections of attributes. 808 """ 809 810 matrix, names, rsizes = fn(d) 811 allocated = [] 812 813 x = 0 814 while x < len(rsizes): 815 weight, size, free, attrname = rsizes[x] 816 base = matrix[attrname] 817 y = x + 1 818 while y < len(rsizes): 819 _weight, _size, _free, _attrname = rsizes[y] 820 occupied = len(names) - _free 821 if occupied > free: 822 break 823 new = combine_rows(base, matrix[_attrname]) 824 if new: 825 del matrix[_attrname] 826 del rsizes[y] 827 base = new 828 free -= occupied 829 else: 830 y += 1 831 allocated.append(base) 832 x += 1 833 834 # Return the list of attribute names from each row of the allocated 835 # attributes table. 836 837 locations = [] 838 for attrnames in allocated: 839 l = set() 840 for attrname in attrnames: 841 if attrname: 842 l.add(attrname) 843 locations.append(l) 844 return locations 845 846 # vim: tabstop=4 expandtab shiftwidth=4