1 #!/usr/bin/env python 2 3 """ 4 Inspect and obtain module structure. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from branching import BranchTracker 24 from common import * 25 from errors import InspectError 26 from modules import * 27 from os import listdir 28 from os.path import extsep, split, splitext 29 from referencing import Reference 30 import compiler 31 import sys 32 33 class AccessRef(Result): 34 35 """ 36 A reference to an attribute access that is generally only returned from a 37 processed access for possible initialiser resolution for assignments. 38 """ 39 40 def __init__(self, original_name, attrnames, number): 41 self.original_name = original_name 42 self.attrnames = attrnames 43 self.number = number 44 45 def reference(self): 46 return None 47 48 def __repr__(self): 49 return "AccessRef(%r, %r, %r)" % (self.original_name, self.attrnames, self.number) 50 51 class InvocationRef(Result): 52 53 "An invocation of a name reference." 54 55 def __init__(self, name_ref): 56 self.name_ref = name_ref 57 58 def __repr__(self): 59 return "InvocationRef(%r)" % self.name_ref 60 61 class InspectedModule(BasicModule, CacheWritingModule): 62 63 "A module inspector." 64 65 def __init__(self, name, importer): 66 BasicModule.__init__(self, name, importer) 67 self.in_class = False 68 self.in_conditional = False 69 self.global_attr_accesses = {} 70 71 # Usage tracking. 72 73 self.trackers = [] 74 self.attr_accessor_branches = {} 75 76 def __repr__(self): 77 return "InspectedModule(%r, %r)" % (self.name, self.importer) 78 79 def parse(self, filename): 80 81 "Parse the file having the given 'filename'." 82 83 self.parse_file(filename) 84 85 # Inspect the module. 86 87 self.start_tracking_in_module() 88 89 # Detect and record imports and globals declared in the module. 90 91 self.assign_general_local("__name__", self.get_constant("str", self.name)) 92 self.assign_general_local("__file__", self.get_constant("str", filename)) 93 self.process_structure(self.astnode) 94 95 # Set the class of the module after the definition has occurred. 96 97 ref = self.get_builtin("object") 98 self.set_name("__class__", ref) 99 100 # Get module-level attribute usage details. 101 102 self.stop_tracking_in_module() 103 104 # Check names used and resolve them. 105 106 self.register_submodules() 107 self.loaded = True 108 109 def register_submodules(self): 110 111 "For package modules add submodule details." 112 113 if splitext(split(self.filename)[1])[0] == "__init__": 114 for subname in listdir(split(self.filename)[0]): 115 name, ext = splitext(subname) 116 117 # Add modules whose names are not already defined as globals. 118 119 if ext == ("%spy" % extsep) and name != "__init__" and not self.get_global(name): 120 module_name = self.get_global_path(name) 121 top, submodule = self.get_module(module_name, True) 122 self.set_module(name, submodule, hidden=True) 123 124 def check_special(self): 125 126 "Check special names." 127 128 for name, value in self.special.items(): 129 if value.has_kind("<depends>"): 130 self.find_imported_name(name, self.name) 131 self.special[name] = self.get_object(value.get_origin()) 132 133 def check_names_used(self): 134 135 "Check the names used by each function." 136 137 for path in self.names_used.keys(): 138 self.check_names_used_for_path(path) 139 140 def check_names_used_for_path(self, path): 141 142 "Check the names used by the given 'path'." 143 144 names = self.names_used.get(path) 145 if not names: 146 return 147 148 in_function = self.function_locals.has_key(path) 149 150 for name in names: 151 if name in predefined_constants or in_function and name in self.function_locals[path]: 152 continue 153 154 # Resolve names that have been imported locally. 155 156 self.find_imported_name(name, path) 157 158 # Find local definitions. 159 160 key = "%s.%s" % (path, name) 161 ref = self.get_object(key) 162 if ref: 163 self.name_references[key] = ref.final() or key 164 self.resolve_accesses(path, name, ref) 165 continue 166 167 # Resolve names that have been imported globally. 168 169 self.find_imported_name(name, self.name) 170 171 # Find global or built-in definitions. 172 173 ref = self.get_global_or_builtin(name) 174 objpath = ref and (ref.final() or ref.get_name()) 175 if objpath: 176 self.name_references[key] = objpath 177 self.resolve_accesses(path, name, ref) 178 continue 179 180 print >>sys.stderr, "Name not recognised: %s in %s" % (name, path) 181 init_item(self.names_missing, path, set) 182 self.names_missing[path].add(name) 183 184 def resolve_members(self): 185 186 "Resolve any members referring to deferred references." 187 188 for name, ref in self.objects.items(): 189 if ref.has_kind("<depends>"): 190 ref = self.get_object(ref.get_origin()) 191 ref = ref.alias(name) 192 self.importer.objects[name] = self.objects[name] = ref 193 194 def resolve_accesses(self, path, name, ref): 195 196 """ 197 Resolve any unresolved accesses in the function at the given 'path' 198 for the given 'name' corresponding to the indicated 'ref'. Note that 199 this mechanism cannot resolve things like inherited methods because 200 they are not recorded as program objects in their inherited locations. 201 """ 202 203 attr_accesses = self.global_attr_accesses.get(path) 204 all_attrnames = attr_accesses and attr_accesses.get(name) 205 206 if not all_attrnames: 207 return 208 209 # Insist on constant accessors. 210 211 if not ref.has_kind(["<class>", "<module>"]): 212 return 213 214 found_attrnames = set() 215 216 for attrnames in all_attrnames: 217 218 # Start with the resolved name, adding attributes. 219 220 attrs = ref.get_path() 221 remaining = attrnames.split(".") 222 last_ref = ref 223 224 # Add each component, testing for a constant object. 225 226 while remaining: 227 attrname = remaining[0] 228 attrs.append(attrname) 229 del remaining[0] 230 231 # Find any constant object reference. 232 233 attr_ref = self.get_object(".".join(attrs)) 234 235 # Non-constant accessors terminate the traversal. 236 237 if not attr_ref.has_kind(["<class>", "<module>", "<function>"]): 238 239 # Provide the furthest constant accessor unless the final 240 # access can be resolved. 241 242 if remaining: 243 remaining.insert(0, attrs.pop()) 244 else: 245 last_ref = attr_ref 246 break 247 248 # A module may expose an attribute imported from a hidden 249 # module. 250 251 elif last_ref.has_kind("<module>"): 252 module, leaf_module = self.get_module(last_ref.get_origin()) 253 self.find_imported_name(attrname, module.name, module) 254 255 # Follow any reference to a constant object. 256 # Where the given name refers to an object in another location, 257 # switch to the other location in order to be able to test its 258 # attributes. 259 260 last_ref = attr_ref 261 attrs = attr_ref.get_path() 262 263 # Record a constant accessor only if an object was found 264 # that is different from the namespace involved. 265 266 if last_ref: 267 objpath = ".".join(attrs) 268 if objpath != path: 269 270 # Establish a constant access. 271 272 init_item(self.const_accesses, path, dict) 273 self.const_accesses[path][(name, attrnames)] = (objpath, last_ref, ".".join(remaining)) 274 275 if len(attrs) > 1: 276 found_attrnames.add(attrs[1]) 277 278 # Remove any usage records for the name. 279 280 if found_attrnames: 281 282 # NOTE: Should be only one name version. 283 284 versions = [] 285 for version in self.attr_usage[path][name]: 286 new_usage = set() 287 for usage in version: 288 if found_attrnames.intersection(usage): 289 new_usage.add(tuple(set(usage).difference(found_attrnames))) 290 else: 291 new_usage.add(usage) 292 versions.append(new_usage) 293 294 self.attr_usage[path][name] = versions 295 296 def resolve_initialisers(self): 297 298 "Resolve initialiser values for names." 299 300 # Get the initialisers in each namespace. 301 302 for path, name_initialisers in self.name_initialisers.items(): 303 const_accesses = self.const_accesses.get(path) 304 305 # Resolve values for each name in a scope. 306 307 for name, values in name_initialisers.items(): 308 if path == self.name: 309 assigned_path = name 310 else: 311 assigned_path = "%s.%s" % (path, name) 312 313 initialised_names = {} 314 aliased_names = {} 315 316 for i, name_ref in enumerate(values): 317 318 # Unwrap invocations. 319 320 if isinstance(name_ref, InvocationRef): 321 invocation = True 322 name_ref = name_ref.name_ref 323 else: 324 invocation = False 325 326 # Obtain a usable reference from names or constants. 327 328 if isinstance(name_ref, ResolvedNameRef): 329 if not name_ref.reference(): 330 continue 331 ref = name_ref.reference() 332 333 # Obtain a reference from instances. 334 335 elif isinstance(name_ref, InstanceRef): 336 if not name_ref.reference(): 337 continue 338 ref = name_ref.reference() 339 340 # Resolve accesses that employ constants. 341 342 elif isinstance(name_ref, AccessRef): 343 ref = None 344 345 if const_accesses: 346 resolved_access = const_accesses.get((name_ref.original_name, name_ref.attrnames)) 347 if resolved_access: 348 objpath, ref, remaining_attrnames = resolved_access 349 if remaining_attrnames: 350 ref = None 351 352 # Accesses that do not employ constants cannot be resolved, 353 # but they may be resolvable later. 354 355 if not ref: 356 if not invocation: 357 aliased_names[i] = name_ref.original_name, name_ref.attrnames, name_ref.number 358 continue 359 360 # Attempt to resolve a plain name reference. 361 362 elif isinstance(name_ref, LocalNameRef): 363 key = "%s.%s" % (path, name_ref.name) 364 origin = self.name_references.get(key) 365 366 # Accesses that do not refer to known static objects 367 # cannot be resolved, but they may be resolvable later. 368 369 if not origin: 370 if not invocation: 371 aliased_names[i] = name_ref.name, None, name_ref.number 372 continue 373 374 ref = self.get_object(origin) 375 if not ref: 376 continue 377 378 elif isinstance(name_ref, NameRef): 379 key = "%s.%s" % (path, name_ref.name) 380 origin = self.name_references.get(key) 381 if not origin: 382 continue 383 384 ref = self.get_object(origin) 385 if not ref: 386 continue 387 388 else: 389 continue 390 391 # Convert class invocations to instances. 392 393 if invocation: 394 ref = ref.has_kind("<class>") and ref.instance_of() or None 395 396 if ref: 397 initialised_names[i] = ref 398 399 if initialised_names: 400 self.initialised_names[assigned_path] = initialised_names 401 if aliased_names: 402 self.aliased_names[assigned_path] = aliased_names 403 404 def resolve_literals(self): 405 406 "Resolve constant value types." 407 408 # Get the constants defined in each namespace. 409 410 for path, constants in self.constants.items(): 411 for constant, n in constants.items(): 412 objpath = "%s.$c%d" % (path, n) 413 _constant, value_type = self.constant_values[objpath] 414 self.initialised_names[objpath] = {0 : Reference("<instance>", value_type)} 415 416 # Get the literals defined in each namespace. 417 418 for path, literals in self.literals.items(): 419 for n in range(0, literals): 420 objpath = "%s.$C%d" % (path, n) 421 value_type = self.literal_types[objpath] 422 self.initialised_names[objpath] = {0 : Reference("<instance>", value_type)} 423 424 def remove_redundant_accessors(self): 425 426 "Remove now-redundant modifier and accessor information." 427 428 for path, const_accesses in self.const_accesses.items(): 429 accesses = self.attr_accessors.get(path) 430 modifiers = self.attr_access_modifiers.get(path) 431 if not accesses: 432 continue 433 for access in const_accesses.keys(): 434 if accesses.has_key(access): 435 del accesses[access] 436 if modifiers and modifiers.has_key(access): 437 del modifiers[access] 438 439 def set_invocation_usage(self): 440 441 """ 442 Discard the current invocation storage figures, retaining the maximum 443 values. 444 """ 445 446 for path, (current, maximum) in self.function_targets.items(): 447 self.importer.function_targets[path] = self.function_targets[path] = maximum 448 449 for path, (current, maximum) in self.function_arguments.items(): 450 self.importer.function_arguments[path] = self.function_arguments[path] = maximum 451 452 # Module structure traversal. 453 454 def process_structure_node(self, n): 455 456 "Process the individual node 'n'." 457 458 # Module global detection. 459 460 if isinstance(n, compiler.ast.Global): 461 self.process_global_node(n) 462 463 # Module import declarations. 464 465 elif isinstance(n, compiler.ast.From): 466 self.process_from_node(n) 467 468 elif isinstance(n, compiler.ast.Import): 469 self.process_import_node(n) 470 471 # Nodes using operator module functions. 472 473 elif isinstance(n, compiler.ast.Operator): 474 return self.process_operator_node(n) 475 476 elif isinstance(n, compiler.ast.AugAssign): 477 self.process_augassign_node(n) 478 479 elif isinstance(n, compiler.ast.Compare): 480 return self.process_compare_node(n) 481 482 elif isinstance(n, compiler.ast.Slice): 483 return self.process_slice_node(n) 484 485 elif isinstance(n, compiler.ast.Sliceobj): 486 return self.process_sliceobj_node(n) 487 488 elif isinstance(n, compiler.ast.Subscript): 489 return self.process_subscript_node(n) 490 491 # Namespaces within modules. 492 493 elif isinstance(n, compiler.ast.Class): 494 self.process_class_node(n) 495 496 elif isinstance(n, compiler.ast.Function): 497 self.process_function_node(n, n.name) 498 499 elif isinstance(n, compiler.ast.Lambda): 500 return self.process_lambda_node(n) 501 502 # Assignments. 503 504 elif isinstance(n, compiler.ast.Assign): 505 506 # Handle each assignment node. 507 508 for node in n.nodes: 509 self.process_assignment_node(node, n.expr) 510 511 # Assignments within non-Assign nodes. 512 513 elif isinstance(n, compiler.ast.AssName): 514 self.process_assignment_node(n, None) 515 516 elif isinstance(n, compiler.ast.AssAttr): 517 self.process_attribute_access(n) 518 519 # Accesses. 520 521 elif isinstance(n, compiler.ast.Getattr): 522 return self.process_attribute_access(n) 523 524 # Name recording for later testing. 525 526 elif isinstance(n, compiler.ast.Name): 527 return self.process_name_node(n) 528 529 # Conditional statement tracking. 530 531 elif isinstance(n, compiler.ast.For): 532 self.process_for_node(n) 533 534 elif isinstance(n, compiler.ast.While): 535 self.process_while_node(n) 536 537 elif isinstance(n, compiler.ast.If): 538 self.process_if_node(n) 539 540 elif isinstance(n, (compiler.ast.And, compiler.ast.Or)): 541 return self.process_logical_node(n) 542 543 # Exception control-flow tracking. 544 545 elif isinstance(n, compiler.ast.TryExcept): 546 self.process_try_node(n) 547 548 elif isinstance(n, compiler.ast.TryFinally): 549 self.process_try_finally_node(n) 550 551 # Control-flow modification statements. 552 553 elif isinstance(n, compiler.ast.Break): 554 self.trackers[-1].suspend_broken_branch() 555 556 elif isinstance(n, compiler.ast.Continue): 557 self.trackers[-1].suspend_continuing_branch() 558 559 elif isinstance(n, compiler.ast.Raise): 560 self.process_structure(n) 561 self.trackers[-1].abandon_branch() 562 563 elif isinstance(n, compiler.ast.Return): 564 self.process_structure(n) 565 self.trackers[-1].abandon_returning_branch() 566 567 # Invocations. 568 569 elif isinstance(n, compiler.ast.CallFunc): 570 return self.process_invocation_node(n) 571 572 # Constant usage. 573 574 elif isinstance(n, compiler.ast.Const): 575 return self.get_literal_instance(n, n.value.__class__.__name__) 576 577 elif isinstance(n, compiler.ast.Dict): 578 return self.get_literal_instance(n, "dict") 579 580 elif isinstance(n, compiler.ast.List): 581 return self.get_literal_instance(n, "list") 582 583 elif isinstance(n, compiler.ast.Tuple): 584 return self.get_literal_instance(n, "tuple") 585 586 # Unsupported nodes. 587 588 elif isinstance(n, compiler.ast.GenExpr): 589 raise InspectError("Generator expressions are not supported.", self.get_namespace_path(), n) 590 591 elif isinstance(n, compiler.ast.IfExp): 592 raise InspectError("If-else expressions are not supported.", self.get_namespace_path(), n) 593 594 elif isinstance(n, compiler.ast.ListComp): 595 raise InspectError("List comprehensions are not supported.", self.get_namespace_path(), n) 596 597 # All other nodes are processed depth-first. 598 599 else: 600 self.process_structure(n) 601 602 # By default, no expression details are returned. 603 604 return None 605 606 # Specific node handling. 607 608 def process_assignment_node(self, n, expr): 609 610 "Process the individual node 'n' to be assigned the contents of 'expr'." 611 612 # Names and attributes are assigned the entire expression. 613 614 if isinstance(n, compiler.ast.AssName): 615 616 name_ref = expr and self.process_structure_node(expr) 617 618 # Name assignments populate either function namespaces or the 619 # general namespace hierarchy. 620 621 self.assign_general_local(n.name, name_ref) 622 623 # Record usage of the name. 624 625 self.record_name(n.name) 626 627 elif isinstance(n, compiler.ast.AssAttr): 628 if expr: self.process_structure_node(expr) 629 self.process_attribute_access(n) 630 631 # Lists and tuples are matched against the expression and their 632 # items assigned to expression items. 633 634 elif isinstance(n, (compiler.ast.AssList, compiler.ast.AssTuple)): 635 self.process_assignment_node_items(n, expr) 636 637 # Slices and subscripts are permitted within assignment nodes. 638 639 elif isinstance(n, compiler.ast.Slice): 640 self.process_slice_node(n, expr) 641 642 elif isinstance(n, compiler.ast.Subscript): 643 self.process_subscript_node(n, expr) 644 645 def process_attribute_access(self, n): 646 647 "Process the given attribute access node 'n'." 648 649 # Obtain any completed chain and return the reference to it. 650 651 name_ref = self.process_attribute_chain(n) 652 if self.have_access_expression(n): 653 return name_ref 654 655 # Where the start of the chain of attributes has been reached, determine 656 # the complete access. 657 658 # Given a non-access node, this chain can be handled in its entirety, 659 # either being name-based and thus an access rooted on a name, or being 660 # based on some other node and thus an anonymous access of some kind. 661 662 path = self.get_namespace_path() 663 664 # Start with the the full attribute chain. 665 666 remaining = self.attrs 667 attrnames = ".".join(remaining) 668 669 # If the accessor cannot be identified, or where attributes 670 # remain in an attribute chain, record the anonymous accesses. 671 672 if not isinstance(name_ref, NameRef): # includes ResolvedNameRef 673 674 assignment = isinstance(n, compiler.ast.AssAttr) 675 676 init_item(self.attr_accesses, path, set) 677 self.attr_accesses[path].add(attrnames) 678 679 self.record_access_details(None, attrnames, assignment) 680 del self.attrs[0] 681 return 682 683 # Name-based accesses will handle the first attribute in a 684 # chain. 685 686 else: 687 attrname = remaining[0] 688 689 # Attribute assignments are used to identify instance attributes. 690 691 if isinstance(n, compiler.ast.AssAttr) and \ 692 self.in_class and self.in_function and n.expr.name == "self": 693 694 self.set_instance_attr(attrname) 695 696 # Record attribute usage using any name local to this namespace, 697 # if assigned in the namespace, or using an external name 698 # (presently just globals within classes). 699 700 name = self.get_name_for_tracking(name_ref.name, name_ref.final()) 701 tracker = self.trackers[-1] 702 703 immediate_access = len(self.attrs) == 1 704 assignment = immediate_access and isinstance(n, compiler.ast.AssAttr) 705 706 del self.attrs[0] 707 708 # Record global-based chains for subsequent resolution. 709 710 is_global = self.in_function and not self.function_locals[path].has_key(name) or \ 711 not self.in_function 712 713 if is_global: 714 self.record_global_access_details(name, attrnames) 715 716 # Make sure the name is being tracked: global names will not 717 # already be initialised in a branch and must be added 718 # explicitly. 719 720 if not tracker.have_name(name): 721 tracker.assign_names([name]) 722 if self.in_function: 723 self.scope_globals[path].add(name) 724 725 # Record attribute usage in the tracker, and record the branch 726 # information for the access. 727 728 branches = tracker.use_attribute(name, attrname) 729 730 if not branches: 731 print >>sys.stderr, "In %s, name %s is accessed using %s before an assignment." % ( 732 path, name, attrname) 733 branches = tracker.use_attribute(name, attrname) 734 735 self.record_branches_for_access(branches, name, attrnames) 736 access_number = self.record_access_details(name, attrnames, assignment) 737 return AccessRef(name, attrnames, access_number) 738 739 def process_class_node(self, n): 740 741 "Process the given class node 'n'." 742 743 path = self.get_namespace_path() 744 745 # To avoid notions of class "versions" where the same definition 746 # might be parameterised with different state and be referenced 747 # elsewhere (as base classes, for example), classes in functions or 748 # conditions are forbidden. 749 750 if self.in_function or self.in_conditional: 751 print >>sys.stderr, "In %s, class %s in function or conditional statement ignored." % ( 752 path, n.name) 753 return 754 755 # Resolve base classes. 756 757 bases = [] 758 759 for base in n.bases: 760 base_class = self.get_class(base) 761 762 if not base_class: 763 print >>sys.stderr, "In %s, class %s has unidentifiable base classes." % ( 764 path, n.name) 765 return 766 else: 767 bases.append(base_class) 768 769 # Record bases for the class and retain the class name. 770 771 class_name = self.get_object_path(n.name) 772 773 if not bases and class_name != "__builtins__.core.object": 774 ref = self.get_object("__builtins__.object") 775 bases.append(ref) 776 777 self.importer.classes[class_name] = self.classes[class_name] = bases 778 self.importer.subclasses[class_name] = set() 779 self.scope_globals[class_name] = set() 780 781 # Set the definition before entering the namespace rather than 782 # afterwards because methods may reference it. In normal Python, 783 # a class is not accessible until the definition is complete, but 784 # methods can generally reference it since upon being called the 785 # class will already exist. 786 787 self.set_definition(n.name, "<class>") 788 789 in_class = self.in_class 790 self.in_class = class_name 791 self.set_instance_attr("__class__", Reference("<class>", class_name)) 792 self.enter_namespace(n.name) 793 self.set_name("__fn__") # special instantiator attribute 794 self.set_name("__args__") # special instantiator attribute 795 self.assign_general_local("__name__", self.get_constant("str", class_name)) 796 self.process_structure_node(n.code) 797 self.exit_namespace() 798 self.in_class = in_class 799 800 def process_from_node(self, n): 801 802 "Process the given node 'n', importing from another module." 803 804 path = self.get_namespace_path() 805 806 modname, names = self.get_module_name(n) 807 808 # Load the mentioned module. 809 810 top, module = self.get_module(modname, True) 811 self.set_module(None, module, hidden=True) 812 813 if not module: 814 print >>sys.stderr, "In %s, from statement importing from %s failed." % ( 815 path, modname) 816 817 # Attempt to obtain the referenced objects. 818 819 for name, alias in n.names: 820 821 # NOTE: Package submodules are not implicitly imported. 822 823 if name == "*": 824 if module: 825 826 # Warn about a circular import that probably doesn't find 827 # the names. 828 829 if not module.loaded: 830 print >>sys.stderr, "In %s, from statement performs circular import %s of %s." % ( 831 path, modname) 832 833 for name, value in module.get_globals().items(): 834 if name != "__name__": 835 value = ResolvedNameRef(name, value) 836 self.set_general_local(name, value) 837 self.set_imported_name(name, modname) 838 break 839 840 # Explicit names. 841 842 ref = self.import_name_from_module(name, modname, module, alias) 843 value = ResolvedNameRef(alias or name, ref) 844 self.set_general_local(alias or name, value) 845 self.set_imported_name(name, modname, alias) 846 847 def import_name_from_module(self, name, modname, module, alias=None): 848 849 """ 850 Import 'name' from the module having the given 'modname', with 'module' 851 having been obtained for the module name, using 'alias' for the imported 852 name in the current namespace. 853 """ 854 855 path = self.get_namespace_path() 856 857 if module and module.get_global(name): 858 value = module.get_global(name) 859 860 # Warn about an import that fails to provide a name, perhaps due 861 # to a circular import. 862 863 if not value: 864 print >>sys.stderr, "In %s, from statement cannot import %s from %s%s." % ( 865 path, name, modname, not module.loaded and "(circular import)") 866 867 return value 868 869 # Record the name as a dependency. 870 871 else: 872 return Reference("<depends>", "%s.%s" % (modname, name)) 873 874 def process_function_node(self, n, name): 875 876 """ 877 Process the given function or lambda node 'n' with the given 'name'. 878 """ 879 880 is_lambda = isinstance(n, compiler.ast.Lambda) 881 882 # Where a function is declared conditionally, use a separate name for 883 # the definition, and assign the definition to the stated name. 884 885 if (self.in_conditional or self.in_function) and not is_lambda: 886 original_name = name 887 name = self.get_lambda_name() 888 else: 889 original_name = None 890 891 # Initialise argument and local records. 892 893 function_name = self.get_object_path(name) 894 895 argnames = self.importer.function_parameters[function_name] = \ 896 self.function_parameters[function_name] = get_argnames(n.argnames) 897 locals = self.function_locals[function_name] = {} 898 899 for argname in argnames: 900 locals[argname] = Reference("<var>") 901 902 globals = self.scope_globals[function_name] = set() 903 904 # Process the defaults. 905 906 defaults = self.importer.function_defaults[function_name] = \ 907 self.function_defaults[function_name] = [] 908 909 for argname, default in compiler.ast.get_defaults(n): 910 if default: 911 912 # Obtain any reference for the default. 913 914 name_ref = self.process_structure_node(default) 915 defaults.append((argname, name_ref.is_name() and name_ref.reference() or Reference("<var>"))) 916 917 # Reset conditional tracking to focus on the function contents. 918 919 in_conditional = self.in_conditional 920 self.in_conditional = False 921 922 in_function = self.in_function 923 self.in_function = function_name 924 925 self.enter_namespace(name) 926 927 # Track attribute usage within the namespace. 928 929 path = self.get_namespace_path() 930 931 self.start_tracking(locals) 932 self.process_structure_node(n.code) 933 self.stop_tracking() 934 935 # Exit to the parent. 936 937 self.exit_namespace() 938 939 # Update flags. 940 941 self.in_function = in_function 942 self.in_conditional = in_conditional 943 944 # Define the function using the appropriate name. 945 946 self.set_definition(name, "<function>") 947 948 # Where a function is set conditionally, assign the name. 949 950 if original_name: 951 self.process_assignment_for_function(original_name, name) 952 953 def process_global_node(self, n): 954 955 """ 956 Process the given "global" node 'n'. 957 """ 958 959 path = self.get_namespace_path() 960 961 if path != self.name: 962 self.scope_globals[path].update(n.names) 963 964 def process_if_node(self, n): 965 966 """ 967 Process the given "if" node 'n'. 968 """ 969 970 tracker = self.trackers[-1] 971 tracker.new_branchpoint() 972 973 for test, body in n.tests: 974 self.process_structure_node(test) 975 976 tracker.new_branch() 977 978 in_conditional = self.in_conditional 979 self.in_conditional = True 980 self.process_structure_node(body) 981 self.in_conditional = in_conditional 982 983 tracker.shelve_branch() 984 985 # Maintain a branch for the else clause. 986 987 tracker.new_branch() 988 if n.else_: 989 self.process_structure_node(n.else_) 990 tracker.shelve_branch() 991 992 tracker.merge_branches() 993 994 def process_import_node(self, n): 995 996 "Process the given import node 'n'." 997 998 path = self.get_namespace_path() 999 1000 # Load the mentioned module. 1001 1002 for name, alias in n.names: 1003 module, leaf_module = self.get_module(name, alias) 1004 1005 if not module: 1006 print >>sys.stderr, "In %s, import statement importing from %s failed." % ( 1007 path, name) 1008 if module and not module.loaded: 1009 print >>sys.stderr, "In %s, import statement performs circular import of %s." % ( 1010 path, name) 1011 1012 self.set_module(alias or name.split(".")[0], module, leaf_module) 1013 1014 def process_invocation_node(self, n): 1015 1016 "Process the given invocation node 'n'." 1017 1018 path = self.get_namespace_path() 1019 1020 self.allocate_arguments(path, n.args) 1021 1022 try: 1023 # Process the expression, obtaining any identified reference. 1024 1025 name_ref = self.process_structure_node(n.node) 1026 1027 # Process the arguments. 1028 1029 for arg in n.args: 1030 self.process_structure_node(arg) 1031 1032 # Detect class invocations. 1033 1034 if isinstance(name_ref, ResolvedNameRef) and name_ref.has_kind("<class>"): 1035 return InstanceRef(name_ref.reference().instance_of()) 1036 1037 elif isinstance(name_ref, NameRef): 1038 return InvocationRef(name_ref) 1039 1040 return None 1041 1042 finally: 1043 self.deallocate_arguments(path, n.args) 1044 1045 def process_lambda_node(self, n): 1046 1047 "Process the given lambda node 'n'." 1048 1049 name = self.get_lambda_name() 1050 self.process_function_node(n, name) 1051 1052 origin = self.get_object_path(name) 1053 return ResolvedNameRef(name, Reference("<function>", origin)) 1054 1055 def process_logical_node(self, n): 1056 1057 "Process the given operator node 'n'." 1058 1059 self.process_operator_chain(n.nodes, self.process_structure_node) 1060 1061 def process_name_node(self, n): 1062 1063 "Process the given name node 'n'." 1064 1065 path = self.get_namespace_path() 1066 1067 # Special names. 1068 1069 if n.name.startswith("$"): 1070 value = self.get_special(n.name) 1071 if value: 1072 return value 1073 1074 # Special case for operator functions introduced through code 1075 # transformations. 1076 1077 if n.name.startswith("$op"): 1078 1079 # Obtain the location of the actual function defined in the operator 1080 # package. 1081 1082 op = n.name[len("$op"):] 1083 1084 # Access the operator module. 1085 1086 top, module = self.get_module("operator", True) 1087 self.set_module(None, module, hidden=True) 1088 1089 # Link the operation to the operator module definition in this 1090 # module. 1091 1092 self.set_imported_name(op, "operator", n.name, self.name) 1093 1094 # Attempt to get a reference. 1095 1096 ref = self.import_name_from_module(op, "operator", module) 1097 ref = self.get_object("operator.%s" % op) or ref 1098 1099 # Record the imported name and provide the resolved name reference. 1100 1101 value = ResolvedNameRef(n.name, ref) 1102 self.set_special(n.name, value) 1103 return value 1104 1105 # Record usage of the name. 1106 1107 self.record_name(n.name) 1108 1109 # Search for unknown names in non-function scopes immediately. 1110 # External names in functions are resolved later. 1111 1112 ref = self.find_name(n.name) 1113 if ref: 1114 return ResolvedNameRef(n.name, ref) 1115 1116 # Global name. 1117 1118 elif self.in_function and n.name in self.scope_globals[path]: 1119 return NameRef(n.name) 1120 1121 # Examine other names. 1122 1123 else: 1124 tracker = self.trackers[-1] 1125 1126 # Check local names. 1127 1128 branches = tracker.tracking_name(n.name) 1129 1130 # Local name. 1131 1132 if branches: 1133 self.record_branches_for_access(branches, n.name, None) 1134 access_number = self.record_access_details(n.name, None, False) 1135 return LocalNameRef(n.name, access_number) 1136 1137 # Possible global name. 1138 1139 else: 1140 return NameRef(n.name) 1141 1142 def process_operator_chain(self, nodes, fn): 1143 1144 """ 1145 Process the given chain of 'nodes', applying 'fn' to each node or item. 1146 Each node starts a new conditional region, effectively making a deeply- 1147 nested collection of if-like statements. 1148 """ 1149 1150 tracker = self.trackers[-1] 1151 1152 for item in nodes: 1153 tracker.new_branchpoint() 1154 tracker.new_branch() 1155 fn(item) 1156 1157 for item in nodes[:-1]: 1158 tracker.shelve_branch() 1159 tracker.new_branch() 1160 tracker.shelve_branch() 1161 tracker.merge_branches() 1162 1163 tracker.shelve_branch() 1164 tracker.merge_branches() 1165 1166 def process_try_node(self, n): 1167 1168 """ 1169 Process the given "try...except" node 'n'. 1170 """ 1171 1172 tracker = self.trackers[-1] 1173 tracker.new_branchpoint() 1174 1175 self.process_structure_node(n.body) 1176 1177 for name, var, handler in n.handlers: 1178 if name is not None: 1179 self.process_structure_node(name) 1180 1181 # Any abandoned branches from the body can now be resumed in a new 1182 # branch. 1183 1184 tracker.resume_abandoned_branches() 1185 1186 # Establish the local for the handler. 1187 1188 if var is not None: 1189 self.process_structure_node(var) 1190 if handler is not None: 1191 self.process_structure_node(handler) 1192 1193 tracker.shelve_branch() 1194 1195 # The else clause maintains the usage from the body but without the 1196 # abandoned branches since they would never lead to the else clause 1197 # being executed. 1198 1199 if n.else_: 1200 tracker.new_branch() 1201 self.process_structure_node(n.else_) 1202 tracker.shelve_branch() 1203 1204 # Without an else clause, a null branch propagates the successful 1205 # outcome. 1206 1207 else: 1208 tracker.new_branch() 1209 tracker.shelve_branch() 1210 1211 tracker.merge_branches() 1212 1213 def process_try_finally_node(self, n): 1214 1215 """ 1216 Process the given "try...finally" node 'n'. 1217 """ 1218 1219 tracker = self.trackers[-1] 1220 self.process_structure_node(n.body) 1221 1222 # Any abandoned branches from the body can now be resumed. 1223 1224 branches = tracker.resume_all_abandoned_branches() 1225 self.process_structure_node(n.final) 1226 1227 # At the end of the finally clause, abandoned branches are discarded. 1228 1229 tracker.restore_active_branches(branches) 1230 1231 def process_while_node(self, n): 1232 1233 "Process the given while node 'n'." 1234 1235 tracker = self.trackers[-1] 1236 tracker.new_branchpoint(loop_node=True) 1237 1238 # Evaluate any test or iterator outside the loop. 1239 1240 self.process_structure_node(n.test) 1241 1242 # Propagate attribute usage to branches. 1243 1244 tracker.new_branch(loop_node=True) 1245 1246 # Enter the loop. 1247 1248 in_conditional = self.in_conditional 1249 self.in_conditional = True 1250 self.process_structure_node(n.body) 1251 self.in_conditional = in_conditional 1252 1253 # Continuing branches are resumed before any test. 1254 1255 tracker.resume_continuing_branches() 1256 1257 # Evaluate any continuation test within the body. 1258 1259 self.process_structure_node(n.test) 1260 1261 tracker.shelve_branch(loop_node=True) 1262 1263 # Support the non-looping condition. 1264 1265 tracker.new_branch() 1266 tracker.shelve_branch() 1267 1268 tracker.merge_branches() 1269 1270 # Evaluate any else clause outside branches. 1271 1272 if n.else_: 1273 self.process_structure_node(n.else_) 1274 1275 # Connect broken branches to the code after any loop. 1276 1277 tracker.resume_broken_branches() 1278 1279 # Branch tracking methods. 1280 1281 def start_tracking(self, names): 1282 1283 """ 1284 Start tracking attribute usage for names in the current namespace, 1285 immediately registering the given 'names'. 1286 """ 1287 1288 path = self.get_namespace_path() 1289 parent = self.trackers[-1] 1290 tracker = BranchTracker() 1291 self.trackers.append(tracker) 1292 1293 # Record the given names established as new branches. 1294 1295 tracker.assign_names(names) 1296 1297 def assign_name(self, name, name_ref): 1298 1299 "Assign to 'name' the given 'name_ref' in the current namespace." 1300 1301 name = self.get_name_for_tracking(name) 1302 self.trackers[-1].assign_names([name], [name_ref]) 1303 1304 def stop_tracking(self): 1305 1306 """ 1307 Stop tracking attribute usage, recording computed usage for the current 1308 namespace. 1309 """ 1310 1311 path = self.get_namespace_path() 1312 tracker = self.trackers.pop() 1313 self.record_assignments_for_access(tracker) 1314 1315 self.attr_usage[path] = tracker.get_all_usage() 1316 self.name_initialisers[path] = tracker.get_all_values() 1317 1318 def start_tracking_in_module(self): 1319 1320 "Start tracking attribute usage in the module." 1321 1322 tracker = BranchTracker() 1323 self.trackers.append(tracker) 1324 1325 def stop_tracking_in_module(self): 1326 1327 "Stop tracking attribute usage in the module." 1328 1329 tracker = self.trackers[0] 1330 self.record_assignments_for_access(tracker) 1331 self.attr_usage[self.name] = tracker.get_all_usage() 1332 self.name_initialisers[self.name] = tracker.get_all_values() 1333 1334 def record_assignments_for_access(self, tracker): 1335 1336 """ 1337 For the current path, use the given 'tracker' to record assignment 1338 version information for attribute accesses. 1339 """ 1340 1341 path = self.get_path_for_access() 1342 1343 if not self.attr_accessor_branches.has_key(path): 1344 return 1345 1346 init_item(self.attr_accessors, path, dict) 1347 attr_accessors = self.attr_accessors[path] 1348 1349 # Obtain the branches applying during each access. 1350 1351 for access, all_branches in self.attr_accessor_branches[path].items(): 1352 name, attrnames = access 1353 init_item(attr_accessors, access, list) 1354 1355 # Obtain the assignments applying to each branch. 1356 1357 for branches in all_branches: 1358 positions = tracker.get_assignment_positions_for_branches(name, branches) 1359 1360 # Detect missing name information. 1361 1362 if None in positions: 1363 globals = self.global_attr_accesses.get(path) 1364 accesses = globals and globals.get(name) 1365 if not accesses: 1366 print >>sys.stderr, "In %s, %s may not be defined when used." % ( 1367 self.get_namespace_path(), name) 1368 positions.remove(None) 1369 1370 attr_accessors[access].append(positions) 1371 1372 def record_branches_for_access(self, branches, name, attrnames): 1373 1374 """ 1375 Record the given 'branches' for an access involving the given 'name' and 1376 'attrnames'. 1377 """ 1378 1379 access = name, attrnames 1380 path = self.get_path_for_access() 1381 1382 init_item(self.attr_accessor_branches, path, dict) 1383 attr_accessor_branches = self.attr_accessor_branches[path] 1384 1385 init_item(attr_accessor_branches, access, list) 1386 attr_accessor_branches[access].append(branches) 1387 1388 def record_access_details(self, name, attrnames, assignment): 1389 1390 """ 1391 For the given 'name' and 'attrnames', record an access indicating 1392 whether 'assignment' is occurring. 1393 1394 These details correspond to accesses otherwise recorded by the attribute 1395 accessor and attribute access dictionaries. 1396 """ 1397 1398 access = name, attrnames 1399 path = self.get_path_for_access() 1400 1401 init_item(self.attr_access_modifiers, path, dict) 1402 init_item(self.attr_access_modifiers[path], access, list) 1403 1404 access_number = len(self.attr_access_modifiers[path][access]) 1405 self.attr_access_modifiers[path][access].append(assignment) 1406 return access_number 1407 1408 def record_global_access_details(self, name, attrnames): 1409 1410 """ 1411 Record details of a global access via the given 'name' involving the 1412 indicated 'attrnames'. 1413 """ 1414 1415 path = self.get_namespace_path() 1416 1417 init_item(self.global_attr_accesses, path, dict) 1418 init_item(self.global_attr_accesses[path], name, set) 1419 self.global_attr_accesses[path][name].add(attrnames) 1420 1421 # Namespace modification. 1422 1423 def record_name(self, name): 1424 1425 "Record the use of 'name' in a namespace." 1426 1427 path = self.get_namespace_path() 1428 init_item(self.names_used, path, set) 1429 self.names_used[path].add(name) 1430 1431 def set_module(self, name, module, leaf_module=None, hidden=False): 1432 1433 """ 1434 Set a module in the current namespace using the given 'name' and 1435 corresponding 'module' object, with the 'leaf_module' being recorded 1436 if different. If 'hidden' is a true value, the modules are recorded as 1437 not necessarily being exposed by this module. This module is, however, 1438 recorded as accessing the given modules and is thus dependent on them. 1439 """ 1440 1441 if name: 1442 self.set_general_local(name, module and Reference("<module>", module.name) or None) 1443 if module: 1444 if hidden: 1445 self.imported_hidden.add(module) 1446 if leaf_module and leaf_module is not module: 1447 self.imported_hidden.add(leaf_module) 1448 else: 1449 self.imported.add(module) 1450 module.accessing_modules.add(self.name) 1451 if leaf_module and leaf_module is not module: 1452 self.imported.add(leaf_module) 1453 leaf_module.accessing_modules.add(self.name) 1454 1455 def set_definition(self, name, kind): 1456 1457 """ 1458 Set the definition having the given 'name' and 'kind'. 1459 1460 Definitions are set in the static namespace hierarchy, but they can also 1461 be recorded for function locals. 1462 """ 1463 1464 if self.is_global(name): 1465 print >>sys.stderr, "In %s, %s is defined as being global." % ( 1466 self.get_namespace_path(), name) 1467 1468 path = self.get_object_path(name) 1469 self.set_object(path, kind) 1470 1471 ref = self.get_object(path) 1472 if ref.get_kind() == "<var>": 1473 print >>sys.stderr, "In %s, %s is defined more than once." % ( 1474 self.get_namespace_path(), name) 1475 1476 if not self.is_global(name) and self.in_function: 1477 self.set_function_local(name, ref) 1478 1479 def set_function_local(self, name, ref=None): 1480 1481 "Set the local with the given 'name' and optional 'ref'." 1482 1483 locals = self.function_locals[self.get_namespace_path()] 1484 multiple = not ref or locals.has_key(name) and locals[name] != ref 1485 locals[name] = multiple and Reference("<var>") or ref 1486 1487 def assign_general_local(self, name, name_ref): 1488 1489 """ 1490 Set for 'name' the given 'name_ref', recording the name for attribute 1491 usage tracking. 1492 """ 1493 1494 self.set_general_local(name, name_ref) 1495 self.assign_name(name, name_ref) 1496 1497 def set_general_local(self, name, value=None): 1498 1499 """ 1500 Set the 'name' with optional 'value' in any kind of local namespace, 1501 where the 'value' should be a reference if specified. 1502 """ 1503 1504 init_value = self.get_initialising_value(value) 1505 1506 # Module global names. 1507 1508 if self.is_global(name): 1509 path = self.get_global_path(name) 1510 self.set_object(path, init_value) 1511 1512 # Function local names. 1513 1514 elif self.in_function: 1515 path = self.get_object_path(name) 1516 self.set_function_local(name, init_value) 1517 1518 # Other namespaces (classes). 1519 1520 else: 1521 path = self.get_object_path(name) 1522 self.set_name(name, init_value) 1523 1524 def set_name(self, name, ref=None): 1525 1526 "Attach the 'name' with optional 'ref' to the current namespace." 1527 1528 self.set_object(self.get_object_path(name), ref) 1529 1530 def set_instance_attr(self, name, ref=None): 1531 1532 """ 1533 Add an instance attribute of the given 'name' to the current class, 1534 using the optional 'ref'. 1535 """ 1536 1537 init_item(self.instance_attrs, self.in_class, set) 1538 self.instance_attrs[self.in_class].add(name) 1539 1540 if ref: 1541 init_item(self.instance_attr_constants, self.in_class, dict) 1542 self.instance_attr_constants[self.in_class][name] = ref 1543 1544 def get_initialising_value(self, value): 1545 1546 "Return a suitable initialiser reference for 'value'." 1547 1548 if isinstance(value, (NameRef, AccessRef, InstanceRef)): # includes LiteralSequenceRef, ResolvedNameRef 1549 return value.reference() 1550 1551 # In general, invocations do not produce known results. However, the 1552 # name initialisers are resolved once a module has been inspected. 1553 1554 elif isinstance(value, InvocationRef): 1555 return None 1556 1557 else: 1558 return value 1559 1560 # Static, program-relative naming. 1561 1562 def find_name(self, name): 1563 1564 """ 1565 Return the qualified name for the given 'name' used in the current 1566 non-function namespace. 1567 """ 1568 1569 path = self.get_namespace_path() 1570 ref = None 1571 1572 if not self.in_function and name not in predefined_constants: 1573 if self.in_class: 1574 ref = self.get_object(self.get_object_path(name)) 1575 if not ref: 1576 ref = self.get_global_or_builtin(name) 1577 1578 return ref 1579 1580 def get_class(self, node): 1581 1582 """ 1583 Use the given 'node' to obtain the identity of a class. Return a 1584 reference for the class. Unresolved dependencies are permitted and must 1585 be resolved later. 1586 """ 1587 1588 ref = self._get_class(node) 1589 return ref.has_kind(["<class>", "<depends>"]) and ref or None 1590 1591 def _get_class(self, node): 1592 1593 """ 1594 Use the given 'node' to find a class definition. Return a reference to 1595 the class. 1596 """ 1597 1598 if isinstance(node, compiler.ast.Getattr): 1599 1600 # Obtain the identity of the access target. 1601 1602 ref = self._get_class(node.expr) 1603 1604 # Where the target is a class or module, obtain the identity of the 1605 # attribute. 1606 1607 if ref.has_kind(["<function>", "<var>"]): 1608 return None 1609 else: 1610 attrname = "%s.%s" % (ref.get_origin(), node.attrname) 1611 return self.get_object(attrname) 1612 1613 # Names can be module-level or built-in. 1614 1615 elif isinstance(node, compiler.ast.Name): 1616 1617 # Record usage of the name and attempt to identify it. 1618 1619 self.record_name(node.name) 1620 return self.get_global_or_builtin(node.name) 1621 else: 1622 return None 1623 1624 def get_constant(self, name, value): 1625 1626 "Return a constant reference for the given type 'name' and 'value'." 1627 1628 ref = self.get_literal_builtin(name) 1629 return self.get_constant_reference(ref, value) 1630 1631 def get_literal_instance(self, n, name): 1632 1633 "For node 'n', return a reference to an instance of 'name'." 1634 1635 # Get a class reference. 1636 1637 ref = self.get_literal_builtin(name) 1638 1639 # Obtain the details of the literal itself. 1640 # An alias to the type is generated for sequences. 1641 1642 if name in ("dict", "list", "tuple"): 1643 self.set_special_literal(name, ref) 1644 return self.process_literal_sequence_node(n, name, ref, LiteralSequenceRef) 1645 1646 # Constant values are independently recorded. 1647 1648 else: 1649 return self.get_constant_reference(ref, n.value) 1650 1651 def get_literal_builtin(self, name): 1652 1653 "Return a reference for a built-in literal type of the given 'name'." 1654 1655 ref = self.get_builtin(name) 1656 true_origin = "__builtins__.%s.%s" % (name, name) 1657 exposed_origin = "__builtins__.%s" % name 1658 1659 # Obtain fully-imported built-in class references. 1660 1661 if ref and ref.has_kind("<class>"): 1662 pass 1663 1664 # Early-stage references need explicit references. 1665 1666 elif ref: 1667 ref = Reference("<class>", true_origin) 1668 1669 # Otherwise, the normal locations can be used. 1670 1671 else: 1672 ref = Reference("<class>", true_origin, exposed_origin) 1673 1674 return ref 1675 1676 # Functions and invocations. 1677 1678 def allocate_arguments(self, path, args): 1679 1680 """ 1681 Allocate temporary argument storage using current and maximum 1682 requirements for the given 'path' and 'args'. 1683 """ 1684 1685 init_item(self.function_targets, path, lambda: [0, 0]) 1686 t = self.function_targets[path] 1687 t[0] += 1 1688 t[1] = max(t[0], t[1]) 1689 1690 init_item(self.function_arguments, path, lambda: [0, 0]) 1691 t = self.function_arguments[path] 1692 t[0] += len(args) + 1 1693 t[1] = max(t[0], t[1]) 1694 1695 def deallocate_arguments(self, path, args): 1696 1697 "Deallocate temporary argument storage for the given 'path' and 'args'." 1698 1699 self.function_targets[path][0] -= 1 1700 self.function_arguments[path][0] -= len(args) + 1 1701 1702 # vim: tabstop=4 expandtab shiftwidth=4