1 #!/usr/bin/env python 2 3 """ 4 Optimise object layouts and generate access instruction plans. 5 6 Copyright (C) 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import add_counter_item, get_attrname_from_location, init_item, \ 23 sorted_output 24 from encoders import encode_access_location, encode_instruction, get_kinds 25 from os.path import exists, join 26 from os import makedirs 27 from referencing import Reference 28 29 class Optimiser: 30 31 "Optimise objects in a program." 32 33 def __init__(self, importer, deducer, output): 34 35 """ 36 Initialise an instance using the given 'importer' and 'deducer' that 37 will perform the arrangement of attributes for program objects, writing 38 the results to the given 'output' directory. 39 """ 40 41 self.importer = importer 42 self.deducer = deducer 43 self.output = output 44 45 # Locations/offsets of attributes in objects. 46 47 self.locations = None 48 self.attr_locations = None 49 self.all_attrnames = None 50 51 # Locations of parameters in parameter tables. 52 53 self.arg_locations = None 54 self.param_locations = None 55 self.all_paramnames = None 56 57 # Specific attribute access information. 58 59 self.access_instructions = {} 60 self.accessor_kinds = {} 61 62 # Object structure information. 63 64 self.structures = {} 65 self.attr_table = {} 66 67 # Parameter list information. 68 69 self.parameters = {} 70 self.param_table = {} 71 72 # Constant literal information. 73 74 self.constants = [] 75 self.constant_numbers = {} 76 77 # Optimiser activities. 78 79 self.populate_objects() 80 self.position_attributes() 81 self.populate_parameters() 82 self.position_parameters() 83 self.populate_tables() 84 self.populate_constants() 85 self.initialise_access_instructions() 86 87 def to_output(self): 88 89 "Write the output files using optimisation information." 90 91 if not exists(self.output): 92 makedirs(self.output) 93 94 self.write_objects() 95 96 def write_objects(self): 97 98 """ 99 Write object-related output. 100 101 The locations are a list of positions indicating the attributes residing 102 at each position in the different structures in a program. 103 104 ---- 105 106 The parameter locations are a list of positions indicating the parameters 107 residing at each position in the different parameter lists in a program. 108 109 ---- 110 111 Each attribute plan provides attribute details in the following format: 112 113 location " " name " " test " " test type " " base 114 " " traversed attributes " " traversed attribute ambiguity 115 " " traversal access modes 116 " " attributes to traverse " " attribute ambiguity 117 " " context " " access method " " static attribute 118 119 Locations have the following format: 120 121 qualified name of scope "." local name ":" name version 122 123 Traversal access modes are either "class" (obtain accessor class to 124 access attribute) or "object" (obtain attribute directly from accessor). 125 126 ---- 127 128 The structures are presented as a table in the following format: 129 130 qualified name " " attribute names 131 132 The attribute names are separated by ", " characters and indicate the 133 attribute provided at each position in the structure associated with the 134 given type. Where no attribute is provided at a particular location 135 within a structure, "-" is given. 136 137 ---- 138 139 The parameters are presented as a table in the following format: 140 141 qualified name " " parameter details 142 143 The parameter details are separated by ", " characters and indicate 144 the parameter name and list position for each parameter described at 145 each location in the parameter table associated with the given 146 function. Where no parameter details are provided at a particular 147 location within a parameter table, "-" is given. The name and list 148 position are separated by a colon (":"). 149 150 ---- 151 152 The attribute table is presented as a table in the following format: 153 154 qualified name " " attribute identifiers 155 156 Instead of attribute names, identifiers defined according to the order 157 given in the "attrnames" file are employed to denote the attributes 158 featured in each type's structure. Where no attribute is provided at a 159 particular location within a structure, "-" is given. 160 161 ---- 162 163 The parameter table is presented as a table in the following format: 164 165 qualified name " " parameter details 166 167 Instead of parameter names, identifiers defined according to the order 168 given in the "paramnames" file are employed to denote the parameters 169 featured in each function's parameter table. Where no parameter is 170 provided at a particular location within a table, "-" is given. 171 172 ---- 173 174 The ordered list of attribute names is given in the "attrnames" file. 175 176 ---- 177 178 The ordered list of parameter names is given in the "paramnames" file. 179 180 ---- 181 182 The ordered list of constant literals is given in the "constants" file. 183 """ 184 185 f = open(join(self.output, "locations"), "w") 186 try: 187 for attrnames in self.locations: 188 print >>f, sorted_output(attrnames) 189 190 finally: 191 f.close() 192 193 f = open(join(self.output, "parameter_locations"), "w") 194 try: 195 for argnames in self.arg_locations: 196 print >>f, sorted_output(argnames) 197 198 finally: 199 f.close() 200 201 f = open(join(self.output, "instruction_plans"), "w") 202 try: 203 access_instructions = self.access_instructions.items() 204 access_instructions.sort() 205 206 for location, instructions in access_instructions: 207 print >>f, encode_access_location(location), "..." 208 for instruction in instructions: 209 print >>f, encode_instruction(instruction) 210 print >>f 211 212 finally: 213 f.close() 214 215 f = open(join(self.output, "structures"), "w") 216 try: 217 structures = self.structures.items() 218 structures.sort() 219 220 for name, attrnames in structures: 221 print >>f, name, ", ".join([s or "-" for s in attrnames]) 222 223 finally: 224 f.close() 225 226 f = open(join(self.output, "parameters"), "w") 227 try: 228 parameters = self.parameters.items() 229 parameters.sort() 230 231 for name, argnames in parameters: 232 print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames]) 233 234 finally: 235 f.close() 236 237 f = open(join(self.output, "attrtable"), "w") 238 try: 239 attr_table = self.attr_table.items() 240 attr_table.sort() 241 242 for name, attrcodes in attr_table: 243 print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes]) 244 245 finally: 246 f.close() 247 248 f = open(join(self.output, "paramtable"), "w") 249 try: 250 param_table = self.param_table.items() 251 param_table.sort() 252 253 for name, paramcodes in param_table: 254 print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes]) 255 256 finally: 257 f.close() 258 259 f = open(join(self.output, "attrnames"), "w") 260 try: 261 for name in self.all_attrnames: 262 print >>f, name 263 264 finally: 265 f.close() 266 267 f = open(join(self.output, "paramnames"), "w") 268 try: 269 for name in self.all_paramnames: 270 print >>f, name 271 272 finally: 273 f.close() 274 275 f = open(join(self.output, "constants"), "w") 276 try: 277 constants = [] 278 for (value, value_type, encoding), n in self.constants.items(): 279 constants.append((n, value_type, encoding, value)) 280 constants.sort() 281 for n, value_type, encoding, value in constants: 282 print >>f, value_type, encoding or "{}", repr(value) 283 284 finally: 285 f.close() 286 287 def populate_objects(self): 288 289 "Populate objects using attribute and usage information." 290 291 self.all_attrs = {} 292 293 # Partition attributes into separate sections so that class and instance 294 # attributes are treated separately. 295 296 for source, objkind in [ 297 (self.importer.all_class_attrs, "<class>"), 298 (self.importer.all_instance_attrs, "<instance>"), 299 (self.importer.all_module_attrs, "<module>") 300 ]: 301 302 for name, attrnames in source.items(): 303 304 # Remove temporary names from structures. 305 306 attrnames = filter(lambda x: not x.startswith("$t"), attrnames) 307 self.all_attrs[(objkind, name)] = attrnames 308 309 self.locations = get_allocated_locations(self.all_attrs, get_attributes_and_sizes) 310 311 def populate_parameters(self): 312 313 "Populate parameter tables using parameter information." 314 315 self.arg_locations = [set()] + get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes) 316 317 def position_attributes(self): 318 319 "Position specific attribute references." 320 321 # Reverse the location mappings. 322 323 attr_locations = self.attr_locations = {} 324 325 for i, attrnames in enumerate(self.locations): 326 for attrname in attrnames: 327 attr_locations[attrname] = i 328 329 # Record the structures. 330 331 for (objkind, name), attrnames in self.all_attrs.items(): 332 key = Reference(objkind, name) 333 l = self.structures[key] = [None] * len(attrnames) 334 for attrname in attrnames: 335 position = attr_locations[attrname] 336 if position >= len(l): 337 l.extend([None] * (position - len(l) + 1)) 338 l[position] = attrname 339 340 def initialise_access_instructions(self): 341 342 "Expand access plans into instruction sequences." 343 344 for access_location, access_plan in self.deducer.access_plans.items(): 345 346 # Obtain the access details. 347 348 name, test, test_type, base, \ 349 traversed, traversal_modes, attrnames, \ 350 context, context_test, \ 351 first_method, final_method, \ 352 origin, accessor_kinds = access_plan 353 354 instructions = [] 355 emit = instructions.append 356 357 if base: 358 original_accessor = base 359 else: 360 original_accessor = "<expr>" # use a generic placeholder 361 362 # Prepare for any first attribute access. 363 364 if traversed: 365 attrname = traversed[0] 366 del traversed[0] 367 elif attrnames: 368 attrname = attrnames[0] 369 del attrnames[0] 370 371 # Perform the first access explicitly if at least one operation 372 # requires it. 373 374 access_first_attribute = final_method in ("access", "access-invoke", "assign") or traversed or attrnames 375 376 # Determine whether the first access involves assignment. 377 378 assigning = not traversed and not attrnames and final_method == "assign" 379 set_accessor = assigning and "<set_target_accessor>" or "<set_accessor>" 380 stored_accessor = assigning and "<target_accessor>" or "<accessor>" 381 382 # Set the context if already available. 383 384 if context == "base": 385 accessor = context_var = (base,) 386 elif context == "original-accessor": 387 388 # Prevent re-evaluation of any dynamic expression by storing it. 389 390 if original_accessor == "<expr>": 391 if final_method in ("access-invoke", "static-invoke"): 392 emit(("<set_context>", original_accessor)) 393 accessor = context_var = ("<context>",) 394 else: 395 emit((set_accessor, original_accessor)) 396 accessor = context_var = (stored_accessor,) 397 else: 398 accessor = context_var = (original_accessor,) 399 400 # Assigning does not set the context. 401 402 elif context in ("final-accessor", "unset") and access_first_attribute: 403 404 # Prevent re-evaluation of any dynamic expression by storing it. 405 406 if original_accessor == "<expr>": 407 emit((set_accessor, original_accessor)) 408 accessor = (stored_accessor,) 409 else: 410 accessor = (original_accessor,) 411 412 # Apply any test. 413 414 if test[0] == "test": 415 accessor = ("__%s_%s_%s" % test, accessor, test_type) 416 417 # Perform the first or final access. 418 # The access only needs performing if the resulting accessor is used. 419 420 remaining = len(traversed + attrnames) 421 422 if access_first_attribute: 423 424 if first_method == "relative-class": 425 if assigning: 426 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 427 else: 428 accessor = ("__load_via_class", accessor, attrname) 429 430 elif first_method == "relative-object": 431 if assigning: 432 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 433 else: 434 accessor = ("__load_via_object", accessor, attrname) 435 436 elif first_method == "relative-object-class": 437 if assigning: 438 emit(("__get_class_and_store", accessor, attrname, "<assexpr>")) 439 else: 440 accessor = ("__get_class_and_load", accessor, attrname) 441 442 elif first_method == "check-class": 443 if assigning: 444 emit(("__check_and_store_via_class", accessor, attrname, "<assexpr>")) 445 else: 446 accessor = ("__check_and_load_via_class", accessor, attrname) 447 448 elif first_method == "check-object": 449 if assigning: 450 emit(("__check_and_store_via_object", accessor, attrname, "<assexpr>")) 451 else: 452 accessor = ("__check_and_load_via_object", accessor, attrname) 453 454 elif first_method == "check-object-class": 455 if assigning: 456 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 457 else: 458 accessor = ("__check_and_load_via_any", accessor, attrname) 459 460 # Traverse attributes using the accessor. 461 462 if traversed: 463 for attrname, traversal_mode in zip(traversed, traversal_modes): 464 assigning = remaining == 1 and final_method == "assign" 465 466 # Set the context, if appropriate. 467 468 if remaining == 1 and final_method != "assign" and context == "final-accessor": 469 if final_method in ("access-invoke", "static-invoke"): 470 emit(("<set_context>", accessor)) 471 accessor = context_var = "<context>" 472 else: 473 emit(("<set_private_context>", accessor)) 474 accessor = context_var = "<private_context>" 475 476 # Perform the access only if not achieved directly. 477 478 if remaining > 1 or final_method in ("access", "access-invoke", "assign"): 479 480 if traversal_mode == "class": 481 if assigning: 482 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 483 else: 484 accessor = ("__load_via_class", accessor, attrname) 485 else: 486 if assigning: 487 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 488 else: 489 accessor = ("__load_via_object", accessor, attrname) 490 491 remaining -= 1 492 493 if attrnames: 494 for attrname in attrnames: 495 assigning = remaining == 1 and final_method == "assign" 496 497 # Set the context, if appropriate. 498 499 if remaining == 1 and final_method != "assign" and context == "final-accessor": 500 if final_method in ("access-invoke", "static-invoke"): 501 emit(("<set_context>", accessor)) 502 accessor = context_var = "<context>" 503 else: 504 emit(("<set_private_context>", accessor)) 505 accessor = context_var = "<private_context>" 506 507 # Perform the access only if not achieved directly. 508 509 if remaining > 1 or final_method in ("access", "access-invoke", "assign"): 510 511 if assigning: 512 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 513 else: 514 accessor = ("__check_and_load_via_any", accessor, attrname) 515 516 remaining -= 1 517 518 # Define or emit the means of accessing the actual target. 519 520 # Assignments to known attributes. 521 522 if final_method == "static-assign": 523 parent, attrname = origin.rsplit(".", 1) 524 emit(("__store_via_object", parent, attrname, "<assexpr>")) 525 526 # Invoked attributes employ a separate context. 527 528 elif final_method in ("static", "static-invoke"): 529 accessor = ("__load_static_ignore", origin) 530 531 # Wrap accesses in context operations. 532 533 if context_test == "test": 534 if final_method == "static": 535 emit(("__load_static_test", context_var, origin)) 536 elif final_method == "static-invoke": 537 emit(("<test_context_static>", context_var, origin)) 538 elif final_method == "access-invoke": 539 emit(("<test_context>", context_var, accessor)) 540 else: 541 emit(("__test_context", context_var, accessor)) 542 543 elif context_test == "replace": 544 545 # Produce an object with updated context. 546 547 if final_method == "static": 548 emit(("__load_static_replace", context_var, origin)) 549 550 # Omit the context update operation where the target is static 551 # and the context is recorded separately. 552 553 elif final_method == "static-invoke": 554 pass 555 556 # Only update any context if no separate context is used. 557 558 elif final_method != "access-invoke": 559 emit(("__update_context", context_var, accessor)) 560 561 else: 562 emit(accessor) 563 564 # Omit the accessor for assignments and for invocations of static 565 # targets. 566 567 elif final_method not in ("assign", "static-assign", "static-invoke"): 568 emit(accessor) 569 570 self.access_instructions[access_location] = instructions 571 self.accessor_kinds[access_location] = accessor_kinds 572 573 def get_ambiguity_for_attributes(self, attrnames): 574 575 """ 576 Return a list of attribute position alternatives corresponding to each 577 of the given 'attrnames'. 578 """ 579 580 ambiguity = [] 581 582 for attrname in attrnames: 583 position = self.attr_locations[attrname] 584 ambiguity.append(len(self.locations[position])) 585 586 return ambiguity 587 588 def position_parameters(self): 589 590 "Position the parameters for each function's parameter table." 591 592 # Reverse the location mappings. 593 594 param_locations = self.param_locations = {} 595 596 for i, argnames in enumerate(self.arg_locations): 597 598 # Position the arguments. 599 600 for argname in argnames: 601 param_locations[argname] = i 602 603 for name, argnames in self.importer.function_parameters.items(): 604 605 # Allocate an extra context parameter in the table. 606 607 l = self.parameters[name] = [None] + [None] * len(argnames) 608 609 # Store an entry for the name along with the name's position in the 610 # parameter list. 611 612 for pos, argname in enumerate(argnames): 613 614 # Position the argument in the table. 615 616 position = param_locations[argname] 617 if position >= len(l): 618 l.extend([None] * (position - len(l) + 1)) 619 620 # Indicate an argument list position starting from 1 (after the 621 # initial context argument). 622 623 l[position] = (argname, pos + 1) 624 625 def populate_tables(self): 626 627 """ 628 Assign identifiers to attributes and encode structure information using 629 these identifiers. 630 """ 631 632 self.all_attrnames, d = self._get_name_mapping(self.attr_locations) 633 634 # Record the numbers indicating the locations of the names. 635 636 for key, attrnames in self.structures.items(): 637 l = self.attr_table[key] = [] 638 for attrname in attrnames: 639 if attrname is None: 640 l.append(None) 641 else: 642 l.append(d[attrname]) 643 644 self.all_paramnames, d = self._get_name_mapping(self.param_locations) 645 646 # Record the numbers indicating the locations of the names. 647 648 for key, values in self.parameters.items(): 649 l = self.param_table[key] = [] 650 for value in values: 651 if value is None: 652 l.append(None) 653 else: 654 name, pos = value 655 l.append((d[name], pos)) 656 657 def _get_name_mapping(self, locations): 658 659 """ 660 Get a sorted list of names from 'locations', then map them to 661 identifying numbers. Return the list and the mapping. 662 """ 663 664 all_names = locations.keys() 665 all_names.sort() 666 d = {} 667 for i, name in enumerate(all_names): 668 d[name] = i 669 return all_names, d 670 671 def populate_constants(self): 672 673 """ 674 Obtain a collection of distinct constant literals, building a mapping 675 from constant references to those in this collection. 676 """ 677 678 # Obtain mappings from constant values to identifiers. 679 680 self.constants = {} 681 682 for path, constants in self.importer.all_constants.items(): 683 684 # Record constants and obtain a number for them. 685 # Each constant is actually (value, value_type, encoding). 686 687 for constant, n in constants.items(): 688 add_counter_item(self.constants, constant) 689 690 self.constant_numbers = {} 691 692 for name, constant in self.importer.all_constant_values.items(): 693 self.constant_numbers[name] = self.constants[constant] 694 695 def combine_rows(a, b): 696 c = [] 697 for i, j in zip(a, b): 698 if i is None or j is None: 699 c.append(i or j) 700 else: 701 return None 702 return c 703 704 def get_attributes_and_sizes(d): 705 706 """ 707 Return a matrix of attributes, a list of type names corresponding to columns 708 in the matrix, and a list of ranked sizes each indicating... 709 710 * a weighted size depending on the kind of object 711 * the minimum size of an object employing an attribute 712 * the number of free columns in the matrix for the attribute 713 * the attribute name itself 714 """ 715 716 attrs = {} 717 sizes = {} 718 objkinds = {} 719 720 for name, attrnames in d.items(): 721 objkind, _name = name 722 723 for attrname in attrnames: 724 725 # Record each type supporting the attribute. 726 727 init_item(attrs, attrname, set) 728 attrs[attrname].add(name) 729 730 # Maintain a record of the smallest object size supporting the given 731 # attribute. 732 733 if not sizes.has_key(attrname): 734 sizes[attrname] = len(attrnames) 735 else: 736 sizes[attrname] = min(sizes[attrname], len(attrnames)) 737 738 # Record the object types/kinds supporting the attribute. 739 740 init_item(objkinds, attrname, set) 741 objkinds[attrname].add(objkind) 742 743 # Obtain attribute details in order of size and occupancy. 744 745 names = d.keys() 746 747 rsizes = [] 748 for attrname, size in sizes.items(): 749 priority = "<instance>" in objkinds[attrname] and 0.5 or 1 750 occupied = len(attrs[attrname]) 751 key = (priority * size, size, len(names) - occupied, attrname) 752 rsizes.append(key) 753 754 rsizes.sort() 755 756 # Make a matrix of attributes. 757 758 matrix = {} 759 for attrname, types in attrs.items(): 760 row = [] 761 for name in names: 762 if name in types: 763 row.append(attrname) 764 else: 765 row.append(None) 766 matrix[attrname] = row 767 768 return matrix, names, rsizes 769 770 def get_parameters_and_sizes(d): 771 772 """ 773 Return a matrix of parameters, a list of functions corresponding to columns 774 in the matrix, and a list of ranked sizes each indicating... 775 776 * a weighted size depending on the kind of object 777 * the minimum size of a parameter list employing a parameter 778 * the number of free columns in the matrix for the parameter 779 * the parameter name itself 780 781 This is a slightly simpler version of the above 'get_attributes_and_sizes' 782 function. 783 """ 784 785 params = {} 786 sizes = {} 787 788 for name, argnames in d.items(): 789 for argname in argnames: 790 791 # Record each function supporting the parameter. 792 793 init_item(params, argname, set) 794 params[argname].add(name) 795 796 # Maintain a record of the smallest parameter list supporting the 797 # given parameter. 798 799 if not sizes.has_key(argname): 800 sizes[argname] = len(argnames) 801 else: 802 sizes[argname] = min(sizes[argname], len(argnames)) 803 804 # Obtain attribute details in order of size and occupancy. 805 806 names = d.keys() 807 808 rsizes = [] 809 for argname, size in sizes.items(): 810 occupied = len(params[argname]) 811 key = (size, size, len(names) - occupied, argname) 812 rsizes.append(key) 813 814 rsizes.sort() 815 816 # Make a matrix of parameters. 817 818 matrix = {} 819 for argname, types in params.items(): 820 row = [] 821 for name in names: 822 if name in types: 823 row.append(argname) 824 else: 825 row.append(None) 826 matrix[argname] = row 827 828 return matrix, names, rsizes 829 830 def get_allocated_locations(d, fn): 831 832 """ 833 Return a list where each element corresponds to a structure location and 834 contains a set of attribute names that may be stored at that location, given 835 a mapping 'd' whose keys are (object kind, object name) tuples and whose 836 values are collections of attributes. 837 """ 838 839 matrix, names, rsizes = fn(d) 840 allocated = [] 841 842 x = 0 843 while x < len(rsizes): 844 weight, size, free, attrname = rsizes[x] 845 base = matrix[attrname] 846 y = x + 1 847 while y < len(rsizes): 848 _weight, _size, _free, _attrname = rsizes[y] 849 occupied = len(names) - _free 850 if occupied > free: 851 break 852 new = combine_rows(base, matrix[_attrname]) 853 if new: 854 del matrix[_attrname] 855 del rsizes[y] 856 base = new 857 free -= occupied 858 else: 859 y += 1 860 allocated.append(base) 861 x += 1 862 863 # Return the list of attribute names from each row of the allocated 864 # attributes table. 865 866 locations = [] 867 for attrnames in allocated: 868 l = set() 869 for attrname in attrnames: 870 if attrname: 871 l.add(attrname) 872 locations.append(l) 873 return locations 874 875 # vim: tabstop=4 expandtab shiftwidth=4