1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 # Module importing queue, required modules, removed modules and active 54 # modules in the final program. 55 56 self.to_import = set() 57 self.required = set(["__main__"]) 58 self.removed = {} 59 self.modules = {} 60 61 # Module relationships and invalidated cached modules. 62 63 self.accessing_modules = {} 64 self.invalidated = set() 65 66 # Basic program information. 67 68 self.objects = {} 69 self.classes = {} 70 self.function_parameters = {} 71 self.function_defaults = {} 72 self.function_locals = {} 73 self.function_targets = {} 74 self.function_arguments = {} 75 76 # Unresolved names. 77 78 self.missing = set() 79 80 # Derived information. 81 82 self.subclasses = {} 83 84 # Attributes of different object types. 85 86 self.all_class_attrs = {} 87 self.all_instance_attrs = {} 88 self.all_instance_attr_constants = {} 89 self.all_combined_attrs = {} 90 self.all_module_attrs = {} 91 self.all_shadowed_attrs = {} 92 93 # References to external names and aliases within program units. 94 95 self.all_name_references = {} 96 self.all_initialised_names = {} 97 self.all_aliased_names = {} 98 99 # General attribute accesses. 100 101 self.all_attr_accesses = {} 102 self.all_const_accesses = {} 103 self.all_attr_access_modifiers = {} 104 105 # Constant literals and values. 106 107 self.all_constants = {} 108 self.all_constant_values = {} 109 110 self.make_cache() 111 112 def make_cache(self): 113 if self.cache and not exists(self.cache): 114 makedirs(self.cache) 115 116 def check_cache(self, details): 117 118 """ 119 Check whether the cache applies for the given 'details', invalidating it 120 if it does not. 121 """ 122 123 recorded_details = self.get_cache_details() 124 125 if recorded_details != details: 126 self.remove_cache() 127 128 writefile(self.get_cache_details_filename(), details) 129 130 def get_cache_details_filename(self): 131 132 "Return the filename for the cache details." 133 134 return join(self.cache, "$details") 135 136 def get_cache_details(self): 137 138 "Return details of the cache." 139 140 details_filename = self.get_cache_details_filename() 141 142 if not exists(details_filename): 143 return None 144 else: 145 return readfile(details_filename) 146 147 def remove_cache(self): 148 149 "Remove the contents of the cache." 150 151 for filename in listdir(self.cache): 152 remove(join(self.cache, filename)) 153 154 def to_cache(self): 155 156 "Write modules to the cache." 157 158 if self.cache: 159 for module_name, module in self.modules.items(): 160 module.to_cache(join(self.cache, module_name)) 161 162 # Object retrieval and storage. 163 164 def get_object(self, name): 165 166 """ 167 Return a reference for the given 'name' or None if no such object 168 exists. 169 """ 170 171 return self.objects.get(name) 172 173 def set_object(self, name, value=None): 174 175 "Set the object with the given 'name' and the given 'value'." 176 177 if isinstance(value, Reference): 178 ref = value.alias(name) 179 else: 180 ref = Reference(value, name) 181 182 self.objects[name] = ref 183 184 # Identification of both stored object names and name references. 185 186 def identify(self, name): 187 188 "Identify 'name' using stored object and external name records." 189 190 ref = self.objects.get(name) 191 if not ref or ref.has_kind("<module>"): 192 ref = self.all_name_references.get(name) or ref 193 return ref 194 195 # Indirect object retrieval. 196 197 def get_attributes(self, ref, attrname): 198 199 """ 200 Return attributes provided by 'ref' for 'attrname'. Class attributes 201 may be provided by instances. 202 """ 203 204 kind = ref.get_kind() 205 if kind == "<class>": 206 ref = self.get_class_attribute(ref.get_origin(), attrname) 207 return ref and set([ref]) or set() 208 elif kind == "<instance>": 209 return self.get_combined_attributes(ref.get_origin(), attrname) 210 elif kind == "<module>": 211 ref = self.get_module_attribute(ref.get_origin(), attrname) 212 return ref and set([ref]) or set() 213 else: 214 return set() 215 216 def get_class_attribute(self, object_type, attrname): 217 218 "Return from 'object_type' the details of class attribute 'attrname'." 219 220 attr = self.all_class_attrs[object_type].get(attrname) 221 return attr and self.get_object(attr) 222 223 def get_instance_attributes(self, object_type, attrname): 224 225 """ 226 Return from 'object_type' the details of instance attribute 'attrname'. 227 """ 228 229 consts = self.all_instance_attr_constants.get(object_type) 230 attrs = set() 231 for attr in self.all_instance_attrs[object_type].get(attrname, []): 232 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 233 return attrs 234 235 def get_combined_attributes(self, object_type, attrname): 236 237 """ 238 Return from 'object_type' the details of class or instance attribute 239 'attrname'. 240 """ 241 242 ref = self.get_class_attribute(object_type, attrname) 243 refs = ref and set([ref]) or set() 244 refs.update(self.get_instance_attributes(object_type, attrname)) 245 return refs 246 247 def get_module_attribute(self, object_type, attrname): 248 249 "Return from 'object_type' the details of module attribute 'attrname'." 250 251 if attrname in self.all_module_attrs[object_type]: 252 return self.get_object("%s.%s" % (object_type, attrname)) 253 else: 254 return None 255 256 # Convenience methods for deducing which kind of object provided an 257 # attribute. 258 259 def get_attribute_provider(self, ref, attrname): 260 261 """ 262 Return the kind of provider of the attribute accessed via 'ref' using 263 'attrname'. 264 """ 265 266 kind = ref.get_kind() 267 268 if kind in ["<class>", "<module>"]: 269 return kind 270 else: 271 return self.get_instance_attribute_provider(ref.get_origin(), attrname) 272 273 def get_instance_attribute_provider(self, object_type, attrname): 274 275 """ 276 Return the kind of provider of the attribute accessed via an instance of 277 'object_type' using 'attrname'. 278 """ 279 280 if self.get_class_attribute(object_type, attrname): 281 return "<class>" 282 else: 283 return "<instance>" 284 285 # Module management. 286 287 def queue_module(self, name, accessor, required=False): 288 289 """ 290 Queue the module with the given 'name' for import from the given 291 'accessor' module. If 'required' is true (it is false by default), the 292 module will be required in the final program. 293 """ 294 295 if not self.modules.has_key(name): 296 self.to_import.add(name) 297 298 if required: 299 self.required.add(name) 300 301 init_item(self.accessing_modules, name, set) 302 self.accessing_modules[name].add(accessor.name) 303 304 def get_modules(self): 305 306 "Return all modules known to the importer." 307 308 return self.modules.values() 309 310 def get_module(self, name): 311 312 "Return the module with the given 'name'." 313 314 if not self.modules.has_key(name): 315 return None 316 317 return self.modules[name] 318 319 # Program operations. 320 321 def initialise(self, filename, reset=False): 322 323 """ 324 Initialise a program whose main module is 'filename', resetting the 325 cache if 'reset' is true. Return the main module. 326 """ 327 328 if reset: 329 self.remove_cache() 330 self.check_cache(filename) 331 332 # Load the program itself. 333 334 m = self.load_from_file(filename) 335 336 # Load any queued modules. 337 338 while self.to_import: 339 for name in list(self.to_import): # avoid mutation issue 340 self.load(name) 341 342 # Resolve dependencies between modules. 343 344 self.resolve() 345 346 # Record the type of all classes. 347 348 self.type_ref = self.get_object("__builtins__.core.type") 349 350 # Resolve dependencies within the program. 351 352 for module in self.modules.values(): 353 module.complete() 354 355 # Remove unneeded modules. 356 357 all_modules = self.modules.items() 358 359 for name, module in all_modules: 360 if name not in self.required: 361 module.unpropagate() 362 del self.modules[name] 363 self.removed[name] = module 364 365 # Collect redundant objects. 366 367 for module in self.removed.values(): 368 module.collect() 369 370 # Assert module objects where aliases have been removed. 371 372 for name in self.required: 373 if not self.objects.has_key(name): 374 self.objects[name] = Reference("<module>", name) 375 376 return m 377 378 def finalise(self): 379 380 """ 381 Finalise the inspected program, returning whether the program could be 382 finalised. 383 """ 384 385 if self.missing: 386 return False 387 388 self.finalise_classes() 389 self.to_cache() 390 self.set_class_types() 391 self.define_instantiators() 392 self.collect_constants() 393 394 return True 395 396 # Supporting operations. 397 398 def resolve(self): 399 400 "Resolve dependencies between modules." 401 402 self.waiting = {} 403 404 for module in self.modules.values(): 405 406 # Resolve all deferred references in each module. 407 408 for ref in module.deferred: 409 found = self.find_dependency(ref) 410 if not found: 411 self.missing.add((module.name, ref.get_origin())) 412 413 # Record the resolved names and identify required modules. 414 415 else: 416 # Find the providing module of this reference. 417 # Where definitive details of the origin cannot be found, 418 # identify the provider using the deferred reference. 419 # NOTE: This may need to test for static origins. 420 421 provider = self.get_module_provider(found.unresolved() and ref or found) 422 ref.mutate(found) 423 424 if provider: 425 426 module.required.add(provider) 427 self.accessing_modules[provider].add(module.name) 428 429 # Postpone any inclusion of the provider until this 430 # module becomes required. 431 432 if module.name not in self.required: 433 init_item(self.waiting, module.name, set) 434 self.waiting[module.name].add(provider) 435 436 # Make this module required in the accessing module. 437 438 elif provider not in self.required: 439 self.required.add(provider) 440 if self.verbose: 441 print >>sys.stderr, "Requiring", provider, "for", ref 442 443 # Check modules again to see if they are now required and should now 444 # cause the inclusion of other modules providing objects to the program. 445 446 for module_name in self.waiting.keys(): 447 self.require_providers(module_name) 448 449 def require_providers(self, module_name): 450 451 """ 452 Test if 'module_name' is itself required and, if so, require modules 453 containing objects provided to the module. 454 """ 455 456 if module_name in self.required and self.waiting.has_key(module_name): 457 for provider in self.waiting[module_name]: 458 if provider not in self.required: 459 self.required.add(provider) 460 if self.verbose: 461 print >>sys.stderr, "Requiring", provider 462 self.require_providers(provider) 463 464 def find_dependency(self, ref): 465 466 "Find the ultimate dependency for 'ref'." 467 468 found = set() 469 while ref and ref.has_kind("<depends>") and not ref in found: 470 found.add(ref) 471 ref = self.identify(ref.get_origin()) 472 return ref 473 474 def get_module_provider(self, ref): 475 476 "Identify the provider of the given 'ref'." 477 478 for ancestor in ref.ancestors(): 479 if self.modules.has_key(ancestor): 480 return ancestor 481 return None 482 483 def finalise_classes(self): 484 485 "Finalise the class relationships and attributes." 486 487 self.derive_inherited_attrs() 488 self.derive_subclasses() 489 self.derive_shadowed_attrs() 490 491 def derive_inherited_attrs(self): 492 493 "Derive inherited attributes for classes throughout the program." 494 495 for name in self.classes.keys(): 496 self.propagate_attrs_for_class(name) 497 498 def propagate_attrs_for_class(self, name, visited=None): 499 500 "Propagate inherited attributes for class 'name'." 501 502 # Visit classes only once. 503 504 if self.all_combined_attrs.has_key(name): 505 return 506 507 visited = visited or [] 508 509 if name in visited: 510 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 511 512 visited.append(name) 513 514 class_attrs = {} 515 instance_attrs = {} 516 517 # Aggregate the attributes from base classes, recording the origins of 518 # applicable attributes. 519 520 for base in self.classes[name][::-1]: 521 522 # Get the identity of the class from the reference. 523 524 base = base.get_origin() 525 526 # Define the base class completely before continuing with this 527 # class. 528 529 self.propagate_attrs_for_class(base, visited) 530 class_attrs.update(self.all_class_attrs[base]) 531 532 # Instance attribute origins are combined if different. 533 534 for key, values in self.all_instance_attrs[base].items(): 535 init_item(instance_attrs, key, set) 536 instance_attrs[key].update(values) 537 538 # Class attributes override those defined earlier in the hierarchy. 539 540 class_attrs.update(self.all_class_attrs.get(name, {})) 541 542 # Instance attributes are merely added if not already defined. 543 544 for key in self.all_instance_attrs.get(name, []): 545 if not instance_attrs.has_key(key): 546 instance_attrs[key] = set(["%s.%s" % (name, key)]) 547 548 self.all_class_attrs[name] = class_attrs 549 self.all_instance_attrs[name] = instance_attrs 550 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 551 552 def derive_subclasses(self): 553 554 "Derive subclass details for classes." 555 556 for name, bases in self.classes.items(): 557 for base in bases: 558 559 # Get the identity of the class from the reference. 560 561 base = base.get_origin() 562 self.subclasses[base].add(name) 563 564 def derive_shadowed_attrs(self): 565 566 "Derive shadowed attributes for classes." 567 568 for name, attrs in self.all_instance_attrs.items(): 569 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 570 if attrs: 571 self.all_shadowed_attrs[name] = attrs 572 573 def set_class_types(self): 574 575 "Set the type of each class." 576 577 for attrs in self.all_class_attrs.values(): 578 attrs["__class__"] = self.type_ref.get_origin() 579 580 def define_instantiators(self): 581 582 """ 583 Consolidate parameter and default details, incorporating initialiser 584 details to define instantiator signatures. 585 """ 586 587 for cls, attrs in self.all_class_attrs.items(): 588 initialiser = attrs["__init__"] 589 self.function_parameters[cls] = self.function_parameters[initialiser] 590 self.function_defaults[cls] = self.function_defaults[initialiser] 591 592 def collect_constants(self): 593 594 "Get constants from all active modules." 595 596 for module in self.modules.values(): 597 self.all_constants.update(module.constants) 598 599 # Import methods. 600 601 def find_in_path(self, name): 602 603 """ 604 Find the given module 'name' in the search path, returning None where no 605 such module could be found, or a 2-tuple from the 'find' method 606 otherwise. 607 """ 608 609 for d in self.path: 610 m = self.find(d, name) 611 if m: return m 612 return None 613 614 def find(self, d, name): 615 616 """ 617 In the directory 'd', find the given module 'name', where 'name' can 618 either refer to a single file module or to a package. Return None if the 619 'name' cannot be associated with either a file or a package directory, 620 or a 2-tuple from '_find_package' or '_find_module' otherwise. 621 """ 622 623 m = self._find_package(d, name) 624 if m: return m 625 m = self._find_module(d, name) 626 if m: return m 627 return None 628 629 def _find_module(self, d, name): 630 631 """ 632 In the directory 'd', find the given module 'name', returning None where 633 no suitable file exists in the directory, or a 2-tuple consisting of 634 None (indicating that no package directory is involved) and a filename 635 indicating the location of the module. 636 """ 637 638 name_py = name + extsep + "py" 639 filename = self._find_file(d, name_py) 640 if filename: 641 return None, filename 642 return None 643 644 def _find_package(self, d, name): 645 646 """ 647 In the directory 'd', find the given package 'name', returning None 648 where no suitable package directory exists, or a 2-tuple consisting of 649 a directory (indicating the location of the package directory itself) 650 and a filename indicating the location of the __init__.py module which 651 declares the package's top-level contents. 652 """ 653 654 filename = self._find_file(d, name) 655 if filename: 656 init_py = "__init__" + extsep + "py" 657 init_py_filename = self._find_file(filename, init_py) 658 if init_py_filename: 659 return filename, init_py_filename 660 return None 661 662 def _find_file(self, d, filename): 663 664 """ 665 Return the filename obtained when searching the directory 'd' for the 666 given 'filename', or None if no actual file exists for the filename. 667 """ 668 669 filename = join(d, filename) 670 if exists(filename): 671 return filename 672 else: 673 return None 674 675 def load(self, name): 676 677 """ 678 Load the module or package with the given 'name'. Return an object 679 referencing the loaded module or package, or None if no such module or 680 package exists. 681 """ 682 683 # Loaded modules are returned immediately. 684 # Modules may be known but not yet loading (having been registered as 685 # submodules), loading, loaded, or completely unknown. 686 687 module = self.get_module(name) 688 689 if module: 690 return self.modules[name] 691 692 # Otherwise, modules are loaded. 693 694 # Split the name into path components, and try to find the uppermost in 695 # the search path. 696 697 path = name.split(".") 698 path_so_far = [] 699 module = None 700 701 for p in path: 702 703 # Get the module's filesystem details. 704 705 if not path_so_far: 706 m = self.find_in_path(p) 707 elif d: 708 m = self.find(d, p) 709 else: 710 m = None 711 712 path_so_far.append(p) 713 module_name = ".".join(path_so_far) 714 715 if not m: 716 if self.verbose: 717 print >>sys.stderr, "Not found (%s)" % name 718 719 return None # NOTE: Import error. 720 721 # Get the module itself. 722 723 d, filename = m 724 module = self.load_from_file(filename, module_name) 725 726 return module 727 728 def load_from_file(self, filename, module_name=None): 729 730 "Load the module from the given 'filename'." 731 732 if module_name is None: 733 module_name = "__main__" 734 735 module = self.modules.get(module_name) 736 737 if not module: 738 739 # Try to load from cache. 740 741 module = self.load_from_cache(filename, module_name) 742 if module: 743 return module 744 745 # If no cache entry exists, load from file. 746 747 module = inspector.InspectedModule(module_name, self) 748 self.add_module(module_name, module) 749 self.update_cache_validity(module) 750 751 self._load(module, module_name, lambda m: m.parse, filename) 752 753 return module 754 755 def update_cache_validity(self, module): 756 757 "Make 'module' valid in the cache, but invalidate accessing modules." 758 759 accessing = self.accessing_modules.get(module.name) 760 if accessing: 761 self.invalidated.update(accessing) 762 if module.name in self.invalidated: 763 self.invalidated.remove(module.name) 764 765 def source_is_new(self, filename, module_name): 766 767 "Return whether 'filename' is newer than the cached 'module_name'." 768 769 if self.cache: 770 cache_filename = join(self.cache, module_name) 771 return not exists(cache_filename) or \ 772 getmtime(filename) > getmtime(cache_filename) or \ 773 module_name in self.invalidated 774 else: 775 return True 776 777 def load_from_cache(self, filename, module_name): 778 779 "Return a module residing in the cache." 780 781 module = self.modules.get(module_name) 782 783 if not module and not self.source_is_new(filename, module_name): 784 module = CachedModule(module_name, self) 785 self.add_module(module_name, module) 786 787 filename = join(self.cache, module_name) 788 self._load(module, module_name, lambda m: m.from_cache, filename) 789 790 return module 791 792 def _load(self, module, module_name, fn, filename): 793 794 """ 795 Load 'module' for the given 'module_name', and with 'fn' performing an 796 invocation on the module with the given 'filename'. 797 """ 798 799 # Load the module. 800 801 if self.verbose: 802 print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename 803 fn(module)(filename) 804 805 # Add the module object if not already defined. 806 807 if not self.objects.has_key(module_name): 808 self.objects[module_name] = Reference("<module>", module_name) 809 810 def add_module(self, module_name, module): 811 812 """ 813 Return the module with the given 'module_name', adding a new module 814 object if one does not already exist. 815 """ 816 817 self.modules[module_name] = module 818 if module_name in self.to_import: 819 self.to_import.remove(module_name) 820 821 # vim: tabstop=4 expandtab shiftwidth=4