1 #!/usr/bin/env python 2 3 """ 4 Optimise object layouts and generate access instruction plans. 5 6 Copyright (C) 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import add_counter_item, get_attrname_from_location, init_item, \ 23 sorted_output 24 from encoders import encode_access_location, encode_instruction, get_kinds 25 from os.path import exists, join 26 from os import makedirs 27 from referencing import Reference 28 29 class Optimiser: 30 31 "Optimise objects in a program." 32 33 def __init__(self, importer, deducer, output): 34 35 """ 36 Initialise an instance using the given 'importer' and 'deducer' that 37 will perform the arrangement of attributes for program objects, writing 38 the results to the given 'output' directory. 39 """ 40 41 self.importer = importer 42 self.deducer = deducer 43 self.output = output 44 45 # Locations/offsets of attributes in objects. 46 47 self.locations = None 48 self.attr_locations = None 49 self.all_attrnames = None 50 51 # Locations of parameters in parameter tables. 52 53 self.arg_locations = None 54 self.param_locations = None 55 self.all_paramnames = None 56 57 # Specific attribute access information. 58 59 self.access_instructions = {} 60 self.accessor_kinds = {} 61 62 # Object structure information. 63 64 self.structures = {} 65 self.attr_table = {} 66 67 # Parameter list information. 68 69 self.parameters = {} 70 self.param_table = {} 71 72 # Constant literal information. 73 74 self.constants = [] 75 self.constant_numbers = {} 76 77 # Optimiser activities. 78 79 self.populate_objects() 80 self.position_attributes() 81 self.populate_parameters() 82 self.position_parameters() 83 self.populate_tables() 84 self.populate_constants() 85 self.initialise_access_instructions() 86 87 def to_output(self): 88 89 "Write the output files using optimisation information." 90 91 if not exists(self.output): 92 makedirs(self.output) 93 94 self.write_objects() 95 96 def write_objects(self): 97 98 """ 99 Write object-related output. 100 101 The locations are a list of positions indicating the attributes residing 102 at each position in the different structures in a program. 103 104 ---- 105 106 The parameter locations are a list of positions indicating the parameters 107 residing at each position in the different parameter lists in a program. 108 109 ---- 110 111 Each attribute plan provides attribute details in the following format: 112 113 location " " name " " test " " test type " " base 114 " " traversed attributes " " traversed attribute ambiguity 115 " " traversal access modes 116 " " attributes to traverse " " attribute ambiguity 117 " " context " " access method " " static attribute 118 119 Locations have the following format: 120 121 qualified name of scope "." local name ":" name version 122 123 Traversal access modes are either "class" (obtain accessor class to 124 access attribute) or "object" (obtain attribute directly from accessor). 125 126 ---- 127 128 The structures are presented as a table in the following format: 129 130 qualified name " " attribute names 131 132 The attribute names are separated by ", " characters and indicate the 133 attribute provided at each position in the structure associated with the 134 given type. Where no attribute is provided at a particular location 135 within a structure, "-" is given. 136 137 ---- 138 139 The parameters are presented as a table in the following format: 140 141 qualified name " " parameter details 142 143 The parameter details are separated by ", " characters and indicate 144 the parameter name and list position for each parameter described at 145 each location in the parameter table associated with the given 146 function. Where no parameter details are provided at a particular 147 location within a parameter table, "-" is given. The name and list 148 position are separated by a colon (":"). 149 150 ---- 151 152 The attribute table is presented as a table in the following format: 153 154 qualified name " " attribute identifiers 155 156 Instead of attribute names, identifiers defined according to the order 157 given in the "attrnames" file are employed to denote the attributes 158 featured in each type's structure. Where no attribute is provided at a 159 particular location within a structure, "-" is given. 160 161 ---- 162 163 The parameter table is presented as a table in the following format: 164 165 qualified name " " parameter details 166 167 Instead of parameter names, identifiers defined according to the order 168 given in the "paramnames" file are employed to denote the parameters 169 featured in each function's parameter table. Where no parameter is 170 provided at a particular location within a table, "-" is given. 171 172 ---- 173 174 The ordered list of attribute names is given in the "attrnames" file. 175 176 ---- 177 178 The ordered list of parameter names is given in the "paramnames" file. 179 180 ---- 181 182 The ordered list of constant literals is given in the "constants" file. 183 """ 184 185 f = open(join(self.output, "locations"), "w") 186 try: 187 for attrnames in self.locations: 188 print >>f, sorted_output(attrnames) 189 190 finally: 191 f.close() 192 193 f = open(join(self.output, "parameter_locations"), "w") 194 try: 195 for argnames in self.arg_locations: 196 print >>f, sorted_output(argnames) 197 198 finally: 199 f.close() 200 201 f = open(join(self.output, "instruction_plans"), "w") 202 try: 203 access_instructions = self.access_instructions.items() 204 access_instructions.sort() 205 206 for location, instructions in access_instructions: 207 print >>f, encode_access_location(location), "..." 208 for instruction in instructions: 209 print >>f, encode_instruction(instruction) 210 print >>f 211 212 finally: 213 f.close() 214 215 f = open(join(self.output, "structures"), "w") 216 try: 217 structures = self.structures.items() 218 structures.sort() 219 220 for name, attrnames in structures: 221 print >>f, name, ", ".join([s or "-" for s in attrnames]) 222 223 finally: 224 f.close() 225 226 f = open(join(self.output, "parameters"), "w") 227 try: 228 parameters = self.parameters.items() 229 parameters.sort() 230 231 for name, argnames in parameters: 232 print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames]) 233 234 finally: 235 f.close() 236 237 f = open(join(self.output, "attrtable"), "w") 238 try: 239 attr_table = self.attr_table.items() 240 attr_table.sort() 241 242 for name, attrcodes in attr_table: 243 print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes]) 244 245 finally: 246 f.close() 247 248 f = open(join(self.output, "paramtable"), "w") 249 try: 250 param_table = self.param_table.items() 251 param_table.sort() 252 253 for name, paramcodes in param_table: 254 print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes]) 255 256 finally: 257 f.close() 258 259 f = open(join(self.output, "attrnames"), "w") 260 try: 261 for name in self.all_attrnames: 262 print >>f, name 263 264 finally: 265 f.close() 266 267 f = open(join(self.output, "paramnames"), "w") 268 try: 269 for name in self.all_paramnames: 270 print >>f, name 271 272 finally: 273 f.close() 274 275 f = open(join(self.output, "constants"), "w") 276 try: 277 constants = [] 278 for (value, value_type, encoding), n in self.constants.items(): 279 constants.append((n, value_type, encoding, value)) 280 constants.sort() 281 for n, value_type, encoding, value in constants: 282 print >>f, value_type, encoding or "{}", repr(value) 283 284 finally: 285 f.close() 286 287 def populate_objects(self): 288 289 "Populate objects using attribute and usage information." 290 291 all_attrs = {} 292 293 # Partition attributes into separate sections so that class and instance 294 # attributes are treated separately. 295 296 for source, objtype in [ 297 (self.importer.all_class_attrs, "<class>"), 298 (self.importer.all_instance_attrs, "<instance>"), 299 (self.importer.all_module_attrs, "<module>") 300 ]: 301 for name, attrs in source.items(): 302 all_attrs[(objtype, name)] = attrs 303 304 self.locations = get_allocated_locations(all_attrs, get_attributes_and_sizes) 305 306 def populate_parameters(self): 307 308 "Populate parameter tables using parameter information." 309 310 self.arg_locations = [set()] + get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes) 311 312 def position_attributes(self): 313 314 "Position specific attribute references." 315 316 # Reverse the location mappings. 317 318 attr_locations = self.attr_locations = {} 319 320 for i, attrnames in enumerate(self.locations): 321 for attrname in attrnames: 322 attr_locations[attrname] = i 323 324 # Record the structures. 325 326 for source, objtype in [ 327 (self.importer.all_class_attrs, "<class>"), 328 (self.importer.all_instance_attrs, "<instance>"), 329 (self.importer.all_module_attrs, "<module>") 330 ]: 331 332 for name, attrnames in source.items(): 333 key = Reference(objtype, name) 334 l = self.structures[key] = [None] * len(attrnames) 335 for attrname in attrnames: 336 position = attr_locations[attrname] 337 if position >= len(l): 338 l.extend([None] * (position - len(l) + 1)) 339 l[position] = attrname 340 341 def initialise_access_instructions(self): 342 343 "Expand access plans into instruction sequences." 344 345 for access_location, access_plan in self.deducer.access_plans.items(): 346 347 # Obtain the access details. 348 349 name, test, test_type, base, \ 350 traversed, traversal_modes, attrnames, \ 351 context, context_test, \ 352 first_method, final_method, \ 353 origin, accessor_kinds = access_plan 354 355 instructions = [] 356 emit = instructions.append 357 358 if base: 359 original_accessor = base 360 else: 361 original_accessor = "<expr>" # use a generic placeholder 362 363 # Prepare for any first attribute access. 364 365 if traversed: 366 attrname = traversed[0] 367 del traversed[0] 368 elif attrnames: 369 attrname = attrnames[0] 370 del attrnames[0] 371 372 # Perform the first access explicitly if at least one operation 373 # requires it. 374 375 access_first_attribute = final_method in ("access", "assign") or traversed or attrnames 376 377 # Determine whether the first access involves assignment. 378 379 assigning = not traversed and not attrnames and final_method == "assign" 380 set_accessor = assigning and "<set_target_accessor>" or "<set_accessor>" 381 stored_accessor = assigning and "<target_accessor>" or "<accessor>" 382 383 # Set the context if already available. 384 385 if context == "base": 386 accessor = context_var = (base,) 387 elif context == "original-accessor": 388 389 # Prevent re-evaluation of any dynamic expression by storing it. 390 391 if original_accessor == "<expr>": 392 emit((set_accessor, original_accessor)) 393 accessor = context_var = (stored_accessor,) 394 else: 395 accessor = context_var = (original_accessor,) 396 397 # Assigning does not set the context. 398 399 elif context in ("final-accessor", "unset") and access_first_attribute: 400 401 # Prevent re-evaluation of any dynamic expression by storing it. 402 403 if original_accessor == "<expr>": 404 emit((set_accessor, original_accessor)) 405 accessor = (stored_accessor,) 406 else: 407 accessor = (original_accessor,) 408 409 # Apply any test. 410 411 if test[0] == "test": 412 accessor = ("__%s_%s_%s" % test, accessor, test_type) 413 414 # Perform the first or final access. 415 # The access only needs performing if the resulting accessor is used. 416 417 remaining = len(traversed + attrnames) 418 419 if access_first_attribute: 420 421 if first_method == "relative-class": 422 if assigning: 423 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 424 else: 425 accessor = ("__load_via_class", accessor, attrname) 426 427 elif first_method == "relative-object": 428 if assigning: 429 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 430 else: 431 accessor = ("__load_via_object", accessor, attrname) 432 433 elif first_method == "relative-object-class": 434 if assigning: 435 emit(("__get_class_and_store", accessor, attrname, "<assexpr>")) 436 else: 437 accessor = ("__get_class_and_load", accessor, attrname) 438 439 elif first_method == "check-class": 440 if assigning: 441 emit(("__check_and_store_via_class", accessor, attrname, "<assexpr>")) 442 else: 443 accessor = ("__check_and_load_via_class", accessor, attrname) 444 445 elif first_method == "check-object": 446 if assigning: 447 emit(("__check_and_store_via_object", accessor, attrname, "<assexpr>")) 448 else: 449 accessor = ("__check_and_load_via_object", accessor, attrname) 450 451 elif first_method == "check-object-class": 452 if assigning: 453 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 454 else: 455 accessor = ("__check_and_load_via_any", accessor, attrname) 456 457 # Traverse attributes using the accessor. 458 459 if traversed: 460 for attrname, traversal_mode in zip(traversed, traversal_modes): 461 assigning = remaining == 1 and final_method == "assign" 462 463 # Set the context, if appropriate. 464 465 if remaining == 1 and final_method != "assign" and context == "final-accessor": 466 emit(("__set_context", accessor)) 467 accessor = context_var = "<context>" 468 469 # Perform the access only if not achieved directly. 470 471 if remaining > 1 or final_method in ("access", "assign"): 472 473 if traversal_mode == "class": 474 if assigning: 475 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 476 else: 477 accessor = ("__load_via_class", accessor, attrname) 478 else: 479 if assigning: 480 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 481 else: 482 accessor = ("__load_via_object", accessor, attrname) 483 484 remaining -= 1 485 486 if attrnames: 487 for attrname in attrnames: 488 assigning = remaining == 1 and final_method == "assign" 489 490 # Set the context, if appropriate. 491 492 if remaining == 1 and final_method != "assign" and context == "final-accessor": 493 emit(("__set_context", accessor)) 494 accessor = context_var = "<context>" 495 496 # Perform the access only if not achieved directly. 497 498 if remaining > 1 or final_method in ("access", "assign"): 499 500 if assigning: 501 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 502 else: 503 accessor = ("__check_and_load_via_any", accessor, attrname) 504 505 remaining -= 1 506 507 # Define or emit the means of accessing the actual target. 508 509 if final_method == "static-assign": 510 parent, attrname = origin.rsplit(".", 1) 511 emit(("__store_via_object", parent, attrname, "<assexpr>")) 512 513 elif final_method in ("static", "static-invoke"): 514 parent, attrname = origin.rsplit(".", 1) 515 accessor = ("__load_static", parent, origin) 516 517 # Wrap accesses in context operations. 518 519 if context_test == "test": 520 emit(("__test_context", context_var, accessor)) 521 522 elif context_test == "replace": 523 524 # Static invocation targets have a context added but no other 525 # transformation performed. 526 527 if final_method == "static-invoke": 528 emit(("__update_context", context_var, accessor)) 529 530 # Other invocation targets gain a context and have the bound 531 # version of the callable activated. 532 533 else: 534 emit(("__replace_context", context_var, accessor)) 535 536 elif final_method not in ("assign", "static-assign"): 537 emit(accessor) 538 539 self.access_instructions[access_location] = instructions 540 self.accessor_kinds[access_location] = accessor_kinds 541 542 def get_ambiguity_for_attributes(self, attrnames): 543 544 """ 545 Return a list of attribute position alternatives corresponding to each 546 of the given 'attrnames'. 547 """ 548 549 ambiguity = [] 550 551 for attrname in attrnames: 552 position = self.attr_locations[attrname] 553 ambiguity.append(len(self.locations[position])) 554 555 return ambiguity 556 557 def position_parameters(self): 558 559 "Position the parameters for each function's parameter table." 560 561 # Reverse the location mappings. 562 563 param_locations = self.param_locations = {} 564 565 for i, argnames in enumerate(self.arg_locations): 566 567 # Position the arguments. 568 569 for argname in argnames: 570 param_locations[argname] = i 571 572 for name, argnames in self.importer.function_parameters.items(): 573 574 # Allocate an extra context parameter in the table. 575 576 l = self.parameters[name] = [None] + [None] * len(argnames) 577 578 # Store an entry for the name along with the name's position in the 579 # parameter list. 580 581 for pos, argname in enumerate(argnames): 582 583 # Position the argument in the table. 584 585 position = param_locations[argname] 586 if position >= len(l): 587 l.extend([None] * (position - len(l) + 1)) 588 589 # Indicate an argument list position starting from 1 (after the 590 # initial context argument). 591 592 l[position] = (argname, pos + 1) 593 594 def populate_tables(self): 595 596 """ 597 Assign identifiers to attributes and encode structure information using 598 these identifiers. 599 """ 600 601 self.all_attrnames, d = self._get_name_mapping(self.attr_locations) 602 603 # Record the numbers indicating the locations of the names. 604 605 for key, attrnames in self.structures.items(): 606 l = self.attr_table[key] = [] 607 for attrname in attrnames: 608 if attrname is None: 609 l.append(None) 610 else: 611 l.append(d[attrname]) 612 613 self.all_paramnames, d = self._get_name_mapping(self.param_locations) 614 615 # Record the numbers indicating the locations of the names. 616 617 for key, values in self.parameters.items(): 618 l = self.param_table[key] = [] 619 for value in values: 620 if value is None: 621 l.append(None) 622 else: 623 name, pos = value 624 l.append((d[name], pos)) 625 626 def _get_name_mapping(self, locations): 627 628 """ 629 Get a sorted list of names from 'locations', then map them to 630 identifying numbers. Return the list and the mapping. 631 """ 632 633 all_names = locations.keys() 634 all_names.sort() 635 d = {} 636 for i, name in enumerate(all_names): 637 d[name] = i 638 return all_names, d 639 640 def populate_constants(self): 641 642 """ 643 Obtain a collection of distinct constant literals, building a mapping 644 from constant references to those in this collection. 645 """ 646 647 # Obtain mappings from constant values to identifiers. 648 649 self.constants = {} 650 651 for path, constants in self.importer.all_constants.items(): 652 653 # Record constants and obtain a number for them. 654 # Each constant is actually (value, value_type, encoding). 655 656 for constant, n in constants.items(): 657 add_counter_item(self.constants, constant) 658 659 self.constant_numbers = {} 660 661 for name, constant in self.importer.all_constant_values.items(): 662 self.constant_numbers[name] = self.constants[constant] 663 664 def combine_rows(a, b): 665 c = [] 666 for i, j in zip(a, b): 667 if i is None or j is None: 668 c.append(i or j) 669 else: 670 return None 671 return c 672 673 def get_attributes_and_sizes(d): 674 675 """ 676 Return a matrix of attributes, a list of type names corresponding to columns 677 in the matrix, and a list of ranked sizes each indicating... 678 679 * a weighted size depending on the kind of object 680 * the minimum size of an object employing an attribute 681 * the number of free columns in the matrix for the attribute 682 * the attribute name itself 683 """ 684 685 attrs = {} 686 sizes = {} 687 objtypes = {} 688 689 for name, attrnames in d.items(): 690 objtype, _name = name 691 692 for attrname in attrnames: 693 694 # Record each type supporting the attribute. 695 696 init_item(attrs, attrname, set) 697 attrs[attrname].add(name) 698 699 # Maintain a record of the smallest object size supporting the given 700 # attribute. 701 702 if not sizes.has_key(attrname): 703 sizes[attrname] = len(attrnames) 704 else: 705 sizes[attrname] = min(sizes[attrname], len(attrnames)) 706 707 # Record the object types/kinds supporting the attribute. 708 709 init_item(objtypes, attrname, set) 710 objtypes[attrname].add(objtype) 711 712 # Obtain attribute details in order of size and occupancy. 713 714 names = d.keys() 715 716 rsizes = [] 717 for attrname, size in sizes.items(): 718 priority = "<instance>" in objtypes[attrname] and 0.5 or 1 719 occupied = len(attrs[attrname]) 720 key = (priority * size, size, len(names) - occupied, attrname) 721 rsizes.append(key) 722 723 rsizes.sort() 724 725 # Make a matrix of attributes. 726 727 matrix = {} 728 for attrname, types in attrs.items(): 729 row = [] 730 for name in names: 731 if name in types: 732 row.append(attrname) 733 else: 734 row.append(None) 735 matrix[attrname] = row 736 737 return matrix, names, rsizes 738 739 def get_parameters_and_sizes(d): 740 741 """ 742 Return a matrix of parameters, a list of functions corresponding to columns 743 in the matrix, and a list of ranked sizes each indicating... 744 745 * a weighted size depending on the kind of object 746 * the minimum size of a parameter list employing a parameter 747 * the number of free columns in the matrix for the parameter 748 * the parameter name itself 749 750 This is a slightly simpler version of the above 'get_attributes_and_sizes' 751 function. 752 """ 753 754 params = {} 755 sizes = {} 756 757 for name, argnames in d.items(): 758 for argname in argnames: 759 760 # Record each function supporting the parameter. 761 762 init_item(params, argname, set) 763 params[argname].add(name) 764 765 # Maintain a record of the smallest parameter list supporting the 766 # given parameter. 767 768 if not sizes.has_key(argname): 769 sizes[argname] = len(argnames) 770 else: 771 sizes[argname] = min(sizes[argname], len(argnames)) 772 773 # Obtain attribute details in order of size and occupancy. 774 775 names = d.keys() 776 777 rsizes = [] 778 for argname, size in sizes.items(): 779 occupied = len(params[argname]) 780 key = (size, size, len(names) - occupied, argname) 781 rsizes.append(key) 782 783 rsizes.sort() 784 785 # Make a matrix of parameters. 786 787 matrix = {} 788 for argname, types in params.items(): 789 row = [] 790 for name in names: 791 if name in types: 792 row.append(argname) 793 else: 794 row.append(None) 795 matrix[argname] = row 796 797 return matrix, names, rsizes 798 799 def get_allocated_locations(d, fn): 800 801 """ 802 Return a list where each element corresponds to a structure location and 803 contains a set of attribute names that may be stored at that location, given 804 a mapping 'd' whose keys are (object type, object name) tuples and whose 805 values are collections of attributes. 806 """ 807 808 matrix, names, rsizes = fn(d) 809 allocated = [] 810 811 x = 0 812 while x < len(rsizes): 813 weight, size, free, attrname = rsizes[x] 814 base = matrix[attrname] 815 y = x + 1 816 while y < len(rsizes): 817 _weight, _size, _free, _attrname = rsizes[y] 818 occupied = len(names) - _free 819 if occupied > free: 820 break 821 new = combine_rows(base, matrix[_attrname]) 822 if new: 823 del matrix[_attrname] 824 del rsizes[y] 825 base = new 826 free -= occupied 827 else: 828 y += 1 829 allocated.append(base) 830 x += 1 831 832 # Return the list of attribute names from each row of the allocated 833 # attributes table. 834 835 locations = [] 836 for attrnames in allocated: 837 l = set() 838 for attrname in attrnames: 839 if attrname: 840 l.add(attrname) 841 locations.append(l) 842 return locations 843 844 # vim: tabstop=4 expandtab shiftwidth=4