1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 # Module importing queue, required modules, removed modules and active 54 # modules in the final program. 55 56 self.to_import = set() 57 self.required = set(["__main__"]) 58 self.removed = {} 59 self.modules = {} 60 61 # Module relationships and invalidated cached modules. 62 63 self.accessing_modules = {} 64 self.invalidated = set() 65 66 # Basic program information. 67 68 self.objects = {} 69 self.classes = {} 70 self.function_parameters = {} 71 self.function_defaults = {} 72 self.function_locals = {} 73 self.function_targets = {} 74 self.function_arguments = {} 75 76 # Unresolved names. 77 78 self.missing = set() 79 80 # Derived information. 81 82 self.subclasses = {} 83 84 # Attributes of different object types. 85 86 self.all_class_attrs = {} 87 self.all_instance_attrs = {} 88 self.all_instance_attr_constants = {} 89 self.all_combined_attrs = {} 90 self.all_module_attrs = {} 91 self.all_shadowed_attrs = {} 92 93 # References to external names and aliases within program units. 94 95 self.all_name_references = {} 96 self.all_initialised_names = {} 97 self.all_aliased_names = {} 98 99 # General attribute accesses. 100 101 self.all_attr_accesses = {} 102 self.all_const_accesses = {} 103 self.all_attr_access_modifiers = {} 104 105 # Constant literals and values. 106 107 self.all_constants = {} 108 self.all_constant_values = {} 109 110 self.make_cache() 111 112 def make_cache(self): 113 if self.cache and not exists(self.cache): 114 makedirs(self.cache) 115 116 def check_cache(self, details): 117 118 """ 119 Check whether the cache applies for the given 'details', invalidating it 120 if it does not. 121 """ 122 123 recorded_details = self.get_cache_details() 124 125 if recorded_details != details: 126 self.remove_cache() 127 128 writefile(self.get_cache_details_filename(), details) 129 130 def get_cache_details_filename(self): 131 132 "Return the filename for the cache details." 133 134 return join(self.cache, "$details") 135 136 def get_cache_details(self): 137 138 "Return details of the cache." 139 140 details_filename = self.get_cache_details_filename() 141 142 if not exists(details_filename): 143 return None 144 else: 145 return readfile(details_filename) 146 147 def remove_cache(self): 148 149 "Remove the contents of the cache." 150 151 for filename in listdir(self.cache): 152 remove(join(self.cache, filename)) 153 154 def to_cache(self): 155 156 "Write modules to the cache." 157 158 if self.cache: 159 for module_name, module in self.modules.items(): 160 module.to_cache(join(self.cache, module_name)) 161 162 # Object retrieval and storage. 163 164 def get_object(self, name): 165 166 """ 167 Return a reference for the given 'name' or None if no such object 168 exists. 169 """ 170 171 return self.objects.get(name) 172 173 def set_object(self, name, value=None): 174 175 "Set the object with the given 'name' and the given 'value'." 176 177 if isinstance(value, Reference): 178 ref = value.alias(name) 179 else: 180 ref = Reference(value, name) 181 182 self.objects[name] = ref 183 184 # Identification of both stored object names and name references. 185 186 def identify(self, name): 187 188 "Identify 'name' using stored object and external name records." 189 190 return self.objects.get(name) or self.all_name_references.get(name) 191 192 # Indirect object retrieval. 193 194 def get_attributes(self, ref, attrname): 195 196 """ 197 Return attributes provided by 'ref' for 'attrname'. Class attributes 198 may be provided by instances. 199 """ 200 201 kind = ref.get_kind() 202 if kind == "<class>": 203 ref = self.get_class_attribute(ref.get_origin(), attrname) 204 return ref and set([ref]) or set() 205 elif kind == "<instance>": 206 return self.get_combined_attributes(ref.get_origin(), attrname) 207 elif kind == "<module>": 208 ref = self.get_module_attribute(ref.get_origin(), attrname) 209 return ref and set([ref]) or set() 210 else: 211 return set() 212 213 def get_class_attribute(self, object_type, attrname): 214 215 "Return from 'object_type' the details of class attribute 'attrname'." 216 217 attrs = self.all_class_attrs.get(object_type) 218 attr = attrs and attrs.get(attrname) 219 return attr and self.get_object(attr) 220 221 def get_instance_attributes(self, object_type, attrname): 222 223 """ 224 Return from 'object_type' the details of instance attribute 'attrname'. 225 """ 226 227 consts = self.all_instance_attr_constants.get(object_type) 228 attrs = set() 229 for attr in self.all_instance_attrs[object_type].get(attrname, []): 230 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 231 return attrs 232 233 def get_combined_attributes(self, object_type, attrname): 234 235 """ 236 Return from 'object_type' the details of class or instance attribute 237 'attrname'. 238 """ 239 240 ref = self.get_class_attribute(object_type, attrname) 241 refs = ref and set([ref]) or set() 242 refs.update(self.get_instance_attributes(object_type, attrname)) 243 return refs 244 245 def get_module_attribute(self, object_type, attrname): 246 247 "Return from 'object_type' the details of module attribute 'attrname'." 248 249 if attrname in self.all_module_attrs[object_type]: 250 return self.get_object("%s.%s" % (object_type, attrname)) 251 else: 252 return None 253 254 # Convenience methods for deducing which kind of object provided an 255 # attribute. 256 257 def get_attribute_provider(self, ref, attrname): 258 259 """ 260 Return the kind of provider of the attribute accessed via 'ref' using 261 'attrname'. 262 """ 263 264 kind = ref.get_kind() 265 266 if kind in ["<class>", "<module>"]: 267 return kind 268 else: 269 return self.get_instance_attribute_provider(ref.get_origin(), attrname) 270 271 def get_instance_attribute_provider(self, object_type, attrname): 272 273 """ 274 Return the kind of provider of the attribute accessed via an instance of 275 'object_type' using 'attrname'. 276 """ 277 278 if self.get_class_attribute(object_type, attrname): 279 return "<class>" 280 else: 281 return "<instance>" 282 283 # Module management. 284 285 def queue_module(self, name, accessor, required=False): 286 287 """ 288 Queue the module with the given 'name' for import from the given 289 'accessor' module. If 'required' is true (it is false by default), the 290 module will be required in the final program. 291 """ 292 293 if not self.modules.has_key(name): 294 self.to_import.add(name) 295 296 if required: 297 self.required.add(name) 298 299 init_item(self.accessing_modules, name, set) 300 self.accessing_modules[name].add(accessor.name) 301 302 def get_modules(self): 303 304 "Return all modules known to the importer." 305 306 return self.modules.values() 307 308 def get_module(self, name): 309 310 "Return the module with the given 'name'." 311 312 if not self.modules.has_key(name): 313 return None 314 315 return self.modules[name] 316 317 # Program operations. 318 319 def initialise(self, filename, reset=False): 320 321 """ 322 Initialise a program whose main module is 'filename', resetting the 323 cache if 'reset' is true. Return the main module. 324 """ 325 326 if reset: 327 self.remove_cache() 328 self.check_cache(filename) 329 330 # Load the program itself. 331 332 m = self.load_from_file(filename) 333 334 # Load any queued modules. 335 336 while self.to_import: 337 for name in list(self.to_import): # avoid mutation issue 338 self.load(name) 339 340 # Resolve dependencies between modules. 341 342 self.resolve() 343 344 # Record the type of all classes. 345 346 self.type_ref = self.get_object("__builtins__.type") 347 348 # Resolve dependencies within the program. 349 350 for module in self.modules.values(): 351 module.complete() 352 353 # Remove unneeded modules. 354 355 all_modules = self.modules.items() 356 357 for name, module in all_modules: 358 if name not in self.required: 359 module.unpropagate() 360 del self.modules[name] 361 self.removed[name] = module 362 363 # Collect redundant objects. 364 365 for module in self.removed.values(): 366 module.collect() 367 368 # Assert module objects where aliases have been removed. 369 370 for name in self.required: 371 if not self.objects.has_key(name): 372 self.objects[name] = Reference("<module>", name) 373 374 return m 375 376 def finalise(self): 377 378 """ 379 Finalise the inspected program, returning whether the program could be 380 finalised. 381 """ 382 383 self.finalise_classes() 384 self.to_cache() 385 386 if self.missing: 387 return False 388 389 self.set_class_types() 390 self.define_instantiators() 391 self.collect_constants() 392 393 return True 394 395 # Supporting operations. 396 397 def resolve(self): 398 399 "Resolve dependencies between modules." 400 401 self.waiting = {} 402 self.depends = {} 403 404 for module in self.modules.values(): 405 406 # Resolve all deferred references in each module. 407 408 original_deferred = [] 409 410 for ref in module.deferred: 411 412 # Retain original references for caching. 413 414 original_deferred.append(ref.copy()) 415 416 # Update references throughout the program. 417 418 found = self.find_dependency(ref) 419 if not found: 420 self.missing.add((module.name, ref.get_origin())) 421 422 # Record the resolved names and identify required modules. 423 424 else: 425 # Find the providing module of this reference. 426 # Where definitive details of the origin cannot be found, 427 # identify the provider using the deferred reference. 428 # NOTE: This may need to test for static origins. 429 430 provider = self.get_module_provider(found.unresolved() and ref or found) 431 ref.mutate(found) 432 433 # Record any external dependency. 434 435 if provider and provider != module.name: 436 437 # Record the provider dependency. 438 439 module.required.add(provider) 440 self.accessing_modules[provider].add(module.name) 441 442 # Postpone any inclusion of the provider until this 443 # module becomes required. 444 445 if module.name not in self.required: 446 init_item(self.waiting, module.name, set) 447 self.waiting[module.name].add(provider) 448 449 # Make this module required in the accessing module. 450 451 elif provider not in self.required: 452 self.required.add(provider) 453 if self.verbose: 454 print >>sys.stderr, "Requiring", provider, "for", ref 455 456 # Record a module ordering dependency. 457 458 if not found.static() or self.uses_dynamic_callable(found): 459 self.add_provider(module.name, provider) 460 461 module.deferred = original_deferred 462 463 # Check modules again to see if they are now required and should now 464 # cause the inclusion of other modules providing objects to the program. 465 466 for module_name in self.waiting.keys(): 467 self.require_providers(module_name) 468 469 def add_provider(self, module_name, provider): 470 471 "Add a dependency for 'module_name' of 'provider'." 472 473 init_item(self.depends, module_name, set) 474 self.depends[module_name].add(provider) 475 476 def require_providers(self, module_name): 477 478 """ 479 Test if 'module_name' is itself required and, if so, require modules 480 containing objects provided to the module. 481 """ 482 483 if module_name in self.required and self.waiting.has_key(module_name): 484 for provider in self.waiting[module_name]: 485 if provider not in self.required: 486 self.required.add(provider) 487 if self.verbose: 488 print >>sys.stderr, "Requiring", provider 489 self.require_providers(provider) 490 491 def uses_dynamic_callable(self, ref): 492 493 """ 494 Return whether 'ref' refers to a callable employing defaults that may 495 need initialising before the callable can be used. 496 """ 497 498 # Find the function or method associated with the reference. 499 500 if ref.has_kind("<function>"): 501 origin = ref.get_origin() 502 elif ref.has_kind("<class>"): 503 origin = "%s.__init__" % ref.get_origin() 504 else: 505 return False 506 507 # Find any defaults for the function or method. 508 509 defaults = self.function_defaults.get(origin) 510 if not defaults: 511 return False 512 513 # Identify non-constant defaults. 514 515 for name, ref in defaults: 516 if not ref.is_constant_alias(): 517 return True 518 519 return False 520 521 def order_modules(self): 522 523 "Produce a module initialisation ordering." 524 525 self.check_ordering() 526 527 module_names = self.modules.keys() 528 529 # Record the number of modules using or depending on each module. 530 531 usage = {} 532 533 for module_name in module_names: 534 usage[module_name] = 0 535 536 for module_name, depend_names in self.depends.items(): 537 if module_name in module_names: 538 for depend_name in depend_names: 539 if depend_name in module_names: 540 usage[depend_name] += 1 541 542 # Produce an ordering by obtaining exposed modules (required by modules 543 # already processed) and putting them at the start of the list. 544 545 ordered = [] 546 547 while usage: 548 for module_name, n in usage.items(): 549 if n == 0: 550 ordered.insert(0, module_name) 551 module_names = self.depends.get(module_name) 552 553 # Reduce usage of the referenced modules. 554 555 if module_names: 556 for name in module_names: 557 usage[name] -= 1 558 559 del usage[module_name] 560 561 ordered.remove("__main__") 562 ordered.append("__main__") 563 return ordered 564 565 def check_ordering(self): 566 567 "Check the ordering dependencies." 568 569 for module_name, modules in self.depends.items(): 570 for provider in modules: 571 if self.depends.has_key(provider) and module_name in self.depends[provider]: 572 raise ProgramError, "Modules %s and %s may not depend on each other for non-static objects." % (module_name, provider) 573 574 def find_dependency(self, ref): 575 576 "Find the ultimate dependency for 'ref'." 577 578 found = set() 579 while ref and ref.has_kind("<depends>") and not ref in found: 580 found.add(ref) 581 ref = self.identify(ref.get_origin()) 582 return ref 583 584 def get_module_provider(self, ref): 585 586 "Identify the provider of the given 'ref'." 587 588 for ancestor in ref.ancestors(): 589 if self.modules.has_key(ancestor): 590 return ancestor 591 return None 592 593 def finalise_classes(self): 594 595 "Finalise the class relationships and attributes." 596 597 self.derive_inherited_attrs() 598 self.derive_subclasses() 599 self.derive_shadowed_attrs() 600 601 def derive_inherited_attrs(self): 602 603 "Derive inherited attributes for classes throughout the program." 604 605 for name in self.classes.keys(): 606 self.propagate_attrs_for_class(name) 607 608 def propagate_attrs_for_class(self, name, visited=None): 609 610 "Propagate inherited attributes for class 'name'." 611 612 # Visit classes only once. 613 614 if self.all_combined_attrs.has_key(name): 615 return 616 617 visited = visited or [] 618 619 if name in visited: 620 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 621 622 visited.append(name) 623 624 class_attrs = {} 625 instance_attrs = {} 626 627 # Aggregate the attributes from base classes, recording the origins of 628 # applicable attributes. 629 630 for base in self.classes[name][::-1]: 631 632 # Get the identity of the class from the reference. 633 634 base = base.get_origin() 635 636 # Define the base class completely before continuing with this 637 # class. 638 639 self.propagate_attrs_for_class(base, visited) 640 class_attrs.update(self.all_class_attrs[base]) 641 642 # Instance attribute origins are combined if different. 643 644 for key, values in self.all_instance_attrs[base].items(): 645 init_item(instance_attrs, key, set) 646 instance_attrs[key].update(values) 647 648 # Class attributes override those defined earlier in the hierarchy. 649 650 class_attrs.update(self.all_class_attrs.get(name, {})) 651 652 # Instance attributes are merely added if not already defined. 653 654 for key in self.all_instance_attrs.get(name, []): 655 if not instance_attrs.has_key(key): 656 instance_attrs[key] = set(["%s.%s" % (name, key)]) 657 658 self.all_class_attrs[name] = class_attrs 659 self.all_instance_attrs[name] = instance_attrs 660 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 661 662 def derive_subclasses(self): 663 664 "Derive subclass details for classes." 665 666 for name, bases in self.classes.items(): 667 for base in bases: 668 669 # Get the identity of the class from the reference. 670 671 base = base.get_origin() 672 self.subclasses[base].add(name) 673 674 def derive_shadowed_attrs(self): 675 676 "Derive shadowed attributes for classes." 677 678 for name, attrs in self.all_instance_attrs.items(): 679 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 680 if attrs: 681 self.all_shadowed_attrs[name] = attrs 682 683 def set_class_types(self): 684 685 "Set the type of each class." 686 687 for attrs in self.all_class_attrs.values(): 688 attrs["__class__"] = self.type_ref.get_origin() 689 690 def define_instantiators(self): 691 692 """ 693 Consolidate parameter and default details, incorporating initialiser 694 details to define instantiator signatures. 695 """ 696 697 for cls, attrs in self.all_class_attrs.items(): 698 initialiser = attrs["__init__"] 699 self.function_parameters[cls] = self.function_parameters[initialiser] 700 self.function_defaults[cls] = self.function_defaults[initialiser] 701 702 def collect_constants(self): 703 704 "Get constants from all active modules." 705 706 for module in self.modules.values(): 707 self.all_constants.update(module.constants) 708 709 # Import methods. 710 711 def find_in_path(self, name): 712 713 """ 714 Find the given module 'name' in the search path, returning None where no 715 such module could be found, or a 2-tuple from the 'find' method 716 otherwise. 717 """ 718 719 for d in self.path: 720 m = self.find(d, name) 721 if m: return m 722 return None 723 724 def find(self, d, name): 725 726 """ 727 In the directory 'd', find the given module 'name', where 'name' can 728 either refer to a single file module or to a package. Return None if the 729 'name' cannot be associated with either a file or a package directory, 730 or a 2-tuple from '_find_package' or '_find_module' otherwise. 731 """ 732 733 m = self._find_package(d, name) 734 if m: return m 735 m = self._find_module(d, name) 736 if m: return m 737 return None 738 739 def _find_module(self, d, name): 740 741 """ 742 In the directory 'd', find the given module 'name', returning None where 743 no suitable file exists in the directory, or a 2-tuple consisting of 744 None (indicating that no package directory is involved) and a filename 745 indicating the location of the module. 746 """ 747 748 name_py = name + extsep + "py" 749 filename = self._find_file(d, name_py) 750 if filename: 751 return None, filename 752 return None 753 754 def _find_package(self, d, name): 755 756 """ 757 In the directory 'd', find the given package 'name', returning None 758 where no suitable package directory exists, or a 2-tuple consisting of 759 a directory (indicating the location of the package directory itself) 760 and a filename indicating the location of the __init__.py module which 761 declares the package's top-level contents. 762 """ 763 764 filename = self._find_file(d, name) 765 if filename: 766 init_py = "__init__" + extsep + "py" 767 init_py_filename = self._find_file(filename, init_py) 768 if init_py_filename: 769 return filename, init_py_filename 770 return None 771 772 def _find_file(self, d, filename): 773 774 """ 775 Return the filename obtained when searching the directory 'd' for the 776 given 'filename', or None if no actual file exists for the filename. 777 """ 778 779 filename = join(d, filename) 780 if exists(filename): 781 return filename 782 else: 783 return None 784 785 def load(self, name): 786 787 """ 788 Load the module or package with the given 'name'. Return an object 789 referencing the loaded module or package, or None if no such module or 790 package exists. 791 """ 792 793 # Loaded modules are returned immediately. 794 # Modules may be known but not yet loading (having been registered as 795 # submodules), loading, loaded, or completely unknown. 796 797 module = self.get_module(name) 798 799 if module: 800 return self.modules[name] 801 802 # Otherwise, modules are loaded. 803 804 # Split the name into path components, and try to find the uppermost in 805 # the search path. 806 807 path = name.split(".") 808 path_so_far = [] 809 module = None 810 811 for p in path: 812 813 # Get the module's filesystem details. 814 815 if not path_so_far: 816 m = self.find_in_path(p) 817 elif d: 818 m = self.find(d, p) 819 else: 820 m = None 821 822 path_so_far.append(p) 823 module_name = ".".join(path_so_far) 824 825 # Return None if the module could not be located. 826 827 if not m: 828 if self.verbose: 829 print >>sys.stderr, "Not found (%s)" % name 830 return None 831 832 # Get the directory and module filename. 833 834 d, filename = m 835 836 # Get the module itself. 837 838 return self.load_from_file(filename, module_name) 839 840 def load_from_file(self, filename, module_name=None): 841 842 "Load the module from the given 'filename'." 843 844 if module_name is None: 845 module_name = "__main__" 846 847 module = self.modules.get(module_name) 848 849 if not module: 850 851 # Try to load from cache. 852 853 module = self.load_from_cache(filename, module_name) 854 if module: 855 return module 856 857 # If no cache entry exists, load from file. 858 859 module = inspector.InspectedModule(module_name, self) 860 self.add_module(module_name, module) 861 self.update_cache_validity(module) 862 863 self._load(module, module_name, lambda m: m.parse, filename) 864 865 return module 866 867 def update_cache_validity(self, module): 868 869 "Make 'module' valid in the cache, but invalidate accessing modules." 870 871 accessing = self.accessing_modules.get(module.name) 872 if accessing: 873 self.invalidated.update(accessing) 874 if module.name in self.invalidated: 875 self.invalidated.remove(module.name) 876 877 def source_is_new(self, filename, module_name): 878 879 "Return whether 'filename' is newer than the cached 'module_name'." 880 881 if self.cache: 882 cache_filename = join(self.cache, module_name) 883 return not exists(cache_filename) or \ 884 getmtime(filename) > getmtime(cache_filename) or \ 885 module_name in self.invalidated 886 else: 887 return True 888 889 def load_from_cache(self, filename, module_name): 890 891 "Return a module residing in the cache." 892 893 module = self.modules.get(module_name) 894 895 if not module and not self.source_is_new(filename, module_name): 896 module = CachedModule(module_name, self) 897 self.add_module(module_name, module) 898 899 filename = join(self.cache, module_name) 900 self._load(module, module_name, lambda m: m.from_cache, filename) 901 902 return module 903 904 def _load(self, module, module_name, fn, filename): 905 906 """ 907 Load 'module' for the given 'module_name', and with 'fn' performing an 908 invocation on the module with the given 'filename'. 909 """ 910 911 # Load the module. 912 913 if self.verbose: 914 print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename 915 fn(module)(filename) 916 917 # Add the module object if not already defined. 918 919 if not self.objects.has_key(module_name): 920 self.objects[module_name] = Reference("<module>", module_name) 921 922 def add_module(self, module_name, module): 923 924 """ 925 Return the module with the given 'module_name', adding a new module 926 object if one does not already exist. 927 """ 928 929 self.modules[module_name] = module 930 if module_name in self.to_import: 931 self.to_import.remove(module_name) 932 933 # vim: tabstop=4 expandtab shiftwidth=4