1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 # Module importing queue, required modules, removed modules and active 54 # modules in the final program. 55 56 self.to_import = set() 57 self.required = set(["__main__"]) 58 self.removed = {} 59 self.modules = {} 60 61 # Module relationships and invalidated cached modules. 62 63 self.accessing_modules = {} 64 self.invalidated = set() 65 66 # Basic program information. 67 68 self.objects = {} 69 self.classes = {} 70 self.function_parameters = {} 71 self.function_defaults = {} 72 self.function_targets = {} 73 self.function_arguments = {} 74 75 # Unresolved names. 76 77 self.missing = set() 78 79 # Derived information. 80 81 self.subclasses = {} 82 83 # Attributes of different object types. 84 85 self.all_class_attrs = {} 86 self.all_instance_attrs = {} 87 self.all_instance_attr_constants = {} 88 self.all_combined_attrs = {} 89 self.all_module_attrs = {} 90 self.all_shadowed_attrs = {} 91 92 # References to external names and aliases within program units. 93 94 self.all_name_references = {} 95 self.all_initialised_names = {} 96 self.all_aliased_names = {} 97 98 # General attribute accesses. 99 100 self.all_attr_accesses = {} 101 self.all_const_accesses = {} 102 self.all_attr_access_modifiers = {} 103 104 # Constant literals and values. 105 106 self.all_constants = {} 107 self.all_constant_values = {} 108 109 self.make_cache() 110 111 def make_cache(self): 112 if self.cache and not exists(self.cache): 113 makedirs(self.cache) 114 115 def check_cache(self, details): 116 117 """ 118 Check whether the cache applies for the given 'details', invalidating it 119 if it does not. 120 """ 121 122 recorded_details = self.get_cache_details() 123 124 if recorded_details != details: 125 self.remove_cache() 126 127 writefile(self.get_cache_details_filename(), details) 128 129 def get_cache_details_filename(self): 130 131 "Return the filename for the cache details." 132 133 return join(self.cache, "$details") 134 135 def get_cache_details(self): 136 137 "Return details of the cache." 138 139 details_filename = self.get_cache_details_filename() 140 141 if not exists(details_filename): 142 return None 143 else: 144 return readfile(details_filename) 145 146 def remove_cache(self): 147 148 "Remove the contents of the cache." 149 150 for filename in listdir(self.cache): 151 remove(join(self.cache, filename)) 152 153 def to_cache(self): 154 155 "Write modules to the cache." 156 157 if self.cache: 158 for module_name, module in self.modules.items(): 159 module.to_cache(join(self.cache, module_name)) 160 161 # Object retrieval and storage. 162 163 def get_object(self, name): 164 165 """ 166 Return a reference for the given 'name' or None if no such object 167 exists. 168 """ 169 170 return self.objects.get(name) 171 172 def set_object(self, name, value=None): 173 174 "Set the object with the given 'name' and the given 'value'." 175 176 if isinstance(value, Reference): 177 ref = value.alias(name) 178 else: 179 ref = Reference(value, name) 180 181 self.objects[name] = ref 182 183 # Identification of both stored object names and name references. 184 185 def identify(self, name): 186 187 "Identify 'name' using stored object and external name records." 188 189 return self.objects.get(name) or self.all_name_references.get(name) 190 191 # Indirect object retrieval. 192 193 def get_attributes(self, ref, attrname): 194 195 """ 196 Return attributes provided by 'ref' for 'attrname'. Class attributes 197 may be provided by instances. 198 """ 199 200 kind = ref.get_kind() 201 if kind == "<class>": 202 ref = self.get_class_attribute(ref.get_origin(), attrname) 203 return ref and set([ref]) or set() 204 elif kind == "<instance>": 205 return self.get_combined_attributes(ref.get_origin(), attrname) 206 elif kind == "<module>": 207 ref = self.get_module_attribute(ref.get_origin(), attrname) 208 return ref and set([ref]) or set() 209 else: 210 return set() 211 212 def get_class_attribute(self, object_type, attrname): 213 214 "Return from 'object_type' the details of class attribute 'attrname'." 215 216 attr = self.all_class_attrs[object_type].get(attrname) 217 return attr and self.get_object(attr) 218 219 def get_instance_attributes(self, object_type, attrname): 220 221 """ 222 Return from 'object_type' the details of instance attribute 'attrname'. 223 """ 224 225 consts = self.all_instance_attr_constants.get(object_type) 226 attrs = set() 227 for attr in self.all_instance_attrs[object_type].get(attrname, []): 228 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 229 return attrs 230 231 def get_combined_attributes(self, object_type, attrname): 232 233 """ 234 Return from 'object_type' the details of class or instance attribute 235 'attrname'. 236 """ 237 238 ref = self.get_class_attribute(object_type, attrname) 239 refs = ref and set([ref]) or set() 240 refs.update(self.get_instance_attributes(object_type, attrname)) 241 return refs 242 243 def get_module_attribute(self, object_type, attrname): 244 245 "Return from 'object_type' the details of module attribute 'attrname'." 246 247 if attrname in self.all_module_attrs[object_type]: 248 return self.get_object("%s.%s" % (object_type, attrname)) 249 else: 250 return None 251 252 # Module management. 253 254 def queue_module(self, name, accessor, required=False): 255 256 """ 257 Queue the module with the given 'name' for import from the given 258 'accessor' module. If 'required' is true (it is false by default), the 259 module will be required in the final program. 260 """ 261 262 if not self.modules.has_key(name): 263 self.to_import.add(name) 264 265 if required: 266 self.required.add(name) 267 268 init_item(self.accessing_modules, name, set) 269 self.accessing_modules[name].add(accessor.name) 270 271 def get_modules(self): 272 273 "Return all modules known to the importer." 274 275 return self.modules.values() 276 277 def get_module(self, name): 278 279 "Return the module with the given 'name'." 280 281 if not self.modules.has_key(name): 282 return None 283 284 return self.modules[name] 285 286 # Program operations. 287 288 def initialise(self, filename, reset=False): 289 290 """ 291 Initialise a program whose main module is 'filename', resetting the 292 cache if 'reset' is true. Return the main module. 293 """ 294 295 if reset: 296 self.remove_cache() 297 self.check_cache(filename) 298 299 # Load the program itself. 300 301 m = self.load_from_file(filename) 302 303 # Load any queued modules. 304 305 while self.to_import: 306 for name in list(self.to_import): # avoid mutation issue 307 self.load(name) 308 309 # Resolve dependencies between modules. 310 311 self.resolve() 312 313 # Record the type of all classes. 314 315 self.type_ref = self.get_object("__builtins__.type") 316 317 # Resolve dependencies within the program. 318 319 for module in self.modules.values(): 320 module.complete() 321 322 # Remove unneeded modules. 323 324 all_modules = self.modules.items() 325 326 for name, module in all_modules: 327 if name not in self.required: 328 module.unpropagate() 329 del self.modules[name] 330 self.removed[name] = module 331 332 return m 333 334 def finalise(self): 335 336 """ 337 Finalise the inspected program, returning whether the program could be 338 finalised. 339 """ 340 341 if self.missing: 342 return False 343 344 self.finalise_classes() 345 self.to_cache() 346 self.set_class_types() 347 self.define_instantiators() 348 self.collect_constants() 349 350 return True 351 352 # Supporting operations. 353 354 def resolve(self): 355 356 "Resolve dependencies between modules." 357 358 self.waiting = {} 359 360 for module in self.modules.values(): 361 362 # Resolve all deferred references in each module. 363 364 for ref in module.deferred: 365 found = self.find_dependency(ref) 366 if not found: 367 self.missing.add((module.name, ref.get_origin())) 368 369 # Record the resolved names and identify required modules. 370 371 else: 372 ref.mutate(found) 373 374 # Find the providing module of this reference. 375 376 provider = self.get_module_provider(ref) 377 if provider: 378 379 module.required.add(provider) 380 self.accessing_modules[provider].add(module.name) 381 382 # Postpone any inclusion of the provider until this 383 # module becomes required. 384 385 if module.name not in self.required: 386 init_item(self.waiting, module.name, set) 387 self.waiting[module.name].add(provider) 388 389 # Make this module required in the accessing module. 390 391 else: 392 self.required.add(provider) 393 394 # Check modules again to see if they are now required and should now 395 # cause the inclusion of other modules providing objects to the program. 396 397 for module_name in self.waiting.keys(): 398 self.require_providers(module_name) 399 400 def require_providers(self, module_name): 401 402 """ 403 Test if 'module_name' is itself required and, if so, require modules 404 containing objects provided to the module. 405 """ 406 407 if module_name in self.required and self.waiting.has_key(module_name): 408 for provider in self.waiting[module_name]: 409 if provider not in self.required: 410 self.required.add(provider) 411 self.require_providers(provider) 412 413 def find_dependency(self, ref): 414 415 "Find the ultimate dependency for 'ref'." 416 417 found = set() 418 while ref and ref.has_kind("<depends>") and not ref in found: 419 found.add(ref) 420 ref = self.identify(ref.get_origin()) 421 return ref 422 423 def get_module_provider(self, ref): 424 425 "Identify the provider of the given 'ref'." 426 427 for ancestor in ref.ancestors(): 428 if self.modules.has_key(ancestor): 429 return ancestor 430 return None 431 432 def finalise_classes(self): 433 434 "Finalise the class relationships and attributes." 435 436 self.derive_inherited_attrs() 437 self.derive_subclasses() 438 self.derive_shadowed_attrs() 439 440 def derive_inherited_attrs(self): 441 442 "Derive inherited attributes for classes throughout the program." 443 444 for name in self.classes.keys(): 445 self.propagate_attrs_for_class(name) 446 447 def propagate_attrs_for_class(self, name, visited=None): 448 449 "Propagate inherited attributes for class 'name'." 450 451 # Visit classes only once. 452 453 if self.all_combined_attrs.has_key(name): 454 return 455 456 visited = visited or [] 457 458 if name in visited: 459 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 460 461 visited.append(name) 462 463 class_attrs = {} 464 instance_attrs = {} 465 466 # Aggregate the attributes from base classes, recording the origins of 467 # applicable attributes. 468 469 for base in self.classes[name][::-1]: 470 471 # Get the identity of the class from the reference. 472 473 base = base.get_origin() 474 475 # Define the base class completely before continuing with this 476 # class. 477 478 self.propagate_attrs_for_class(base, visited) 479 class_attrs.update(self.all_class_attrs[base]) 480 481 # Instance attribute origins are combined if different. 482 483 for key, values in self.all_instance_attrs[base].items(): 484 init_item(instance_attrs, key, set) 485 instance_attrs[key].update(values) 486 487 # Class attributes override those defined earlier in the hierarchy. 488 489 class_attrs.update(self.all_class_attrs.get(name, {})) 490 491 # Instance attributes are merely added if not already defined. 492 493 for key in self.all_instance_attrs.get(name, []): 494 if not instance_attrs.has_key(key): 495 instance_attrs[key] = set(["%s.%s" % (name, key)]) 496 497 self.all_class_attrs[name] = class_attrs 498 self.all_instance_attrs[name] = instance_attrs 499 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 500 501 def derive_subclasses(self): 502 503 "Derive subclass details for classes." 504 505 for name, bases in self.classes.items(): 506 for base in bases: 507 508 # Get the identity of the class from the reference. 509 510 base = base.get_origin() 511 self.subclasses[base].add(name) 512 513 def derive_shadowed_attrs(self): 514 515 "Derive shadowed attributes for classes." 516 517 for name, attrs in self.all_instance_attrs.items(): 518 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 519 if attrs: 520 self.all_shadowed_attrs[name] = attrs 521 522 def set_class_types(self): 523 524 "Set the type of each class." 525 526 for attrs in self.all_class_attrs.values(): 527 attrs["__class__"] = self.type_ref.get_origin() 528 529 def define_instantiators(self): 530 531 """ 532 Consolidate parameter and default details, incorporating initialiser 533 details to define instantiator signatures. 534 """ 535 536 for cls, attrs in self.all_class_attrs.items(): 537 initialiser = attrs["__init__"] 538 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 539 self.function_defaults[cls] = self.function_defaults[initialiser] 540 541 def collect_constants(self): 542 543 "Get constants from all active modules." 544 545 for module in self.modules.values(): 546 self.all_constants.update(module.constants) 547 548 # Import methods. 549 550 def find_in_path(self, name): 551 552 """ 553 Find the given module 'name' in the search path, returning None where no 554 such module could be found, or a 2-tuple from the 'find' method 555 otherwise. 556 """ 557 558 for d in self.path: 559 m = self.find(d, name) 560 if m: return m 561 return None 562 563 def find(self, d, name): 564 565 """ 566 In the directory 'd', find the given module 'name', where 'name' can 567 either refer to a single file module or to a package. Return None if the 568 'name' cannot be associated with either a file or a package directory, 569 or a 2-tuple from '_find_package' or '_find_module' otherwise. 570 """ 571 572 m = self._find_package(d, name) 573 if m: return m 574 m = self._find_module(d, name) 575 if m: return m 576 return None 577 578 def _find_module(self, d, name): 579 580 """ 581 In the directory 'd', find the given module 'name', returning None where 582 no suitable file exists in the directory, or a 2-tuple consisting of 583 None (indicating that no package directory is involved) and a filename 584 indicating the location of the module. 585 """ 586 587 name_py = name + extsep + "py" 588 filename = self._find_file(d, name_py) 589 if filename: 590 return None, filename 591 return None 592 593 def _find_package(self, d, name): 594 595 """ 596 In the directory 'd', find the given package 'name', returning None 597 where no suitable package directory exists, or a 2-tuple consisting of 598 a directory (indicating the location of the package directory itself) 599 and a filename indicating the location of the __init__.py module which 600 declares the package's top-level contents. 601 """ 602 603 filename = self._find_file(d, name) 604 if filename: 605 init_py = "__init__" + extsep + "py" 606 init_py_filename = self._find_file(filename, init_py) 607 if init_py_filename: 608 return filename, init_py_filename 609 return None 610 611 def _find_file(self, d, filename): 612 613 """ 614 Return the filename obtained when searching the directory 'd' for the 615 given 'filename', or None if no actual file exists for the filename. 616 """ 617 618 filename = join(d, filename) 619 if exists(filename): 620 return filename 621 else: 622 return None 623 624 def load(self, name): 625 626 """ 627 Load the module or package with the given 'name'. Return an object 628 referencing the loaded module or package, or None if no such module or 629 package exists. 630 """ 631 632 # Loaded modules are returned immediately. 633 # Modules may be known but not yet loading (having been registered as 634 # submodules), loading, loaded, or completely unknown. 635 636 module = self.get_module(name) 637 638 if module: 639 return self.modules[name] 640 641 # Otherwise, modules are loaded. 642 643 if self.verbose: 644 print >>sys.stderr, "Loading", name 645 646 # Split the name into path components, and try to find the uppermost in 647 # the search path. 648 649 path = name.split(".") 650 path_so_far = [] 651 module = None 652 653 for p in path: 654 655 # Get the module's filesystem details. 656 657 if not path_so_far: 658 m = self.find_in_path(p) 659 elif d: 660 m = self.find(d, p) 661 else: 662 m = None 663 664 path_so_far.append(p) 665 module_name = ".".join(path_so_far) 666 667 if not m: 668 if self.verbose: 669 print >>sys.stderr, "Not found (%s)" % name 670 671 return None # NOTE: Import error. 672 673 # Get the module itself. 674 675 d, filename = m 676 module = self.load_from_file(filename, module_name) 677 678 return module 679 680 def load_from_file(self, filename, module_name=None): 681 682 "Load the module from the given 'filename'." 683 684 if module_name is None: 685 module_name = "__main__" 686 687 module = self.modules.get(module_name) 688 689 if not module: 690 691 # Try to load from cache. 692 693 module = self.load_from_cache(filename, module_name) 694 if module: 695 return module 696 697 # If no cache entry exists, load from file. 698 699 module = inspector.InspectedModule(module_name, self) 700 self.add_module(module_name, module) 701 self.update_cache_validity(module) 702 703 self._load(module, module_name, lambda m: m.parse, filename) 704 705 return module 706 707 def update_cache_validity(self, module): 708 709 "Make 'module' valid in the cache, but invalidate accessing modules." 710 711 accessing = self.accessing_modules.get(module.name) 712 if accessing: 713 self.invalidated.update(accessing) 714 if module.name in self.invalidated: 715 self.invalidated.remove(module.name) 716 717 def source_is_new(self, filename, module_name): 718 719 "Return whether 'filename' is newer than the cached 'module_name'." 720 721 if self.cache: 722 cache_filename = join(self.cache, module_name) 723 return not exists(cache_filename) or \ 724 getmtime(filename) > getmtime(cache_filename) or \ 725 module_name in self.invalidated 726 else: 727 return True 728 729 def load_from_cache(self, filename, module_name): 730 731 "Return a module residing in the cache." 732 733 module = self.modules.get(module_name) 734 735 if not module and not self.source_is_new(filename, module_name): 736 module = CachedModule(module_name, self) 737 self.add_module(module_name, module) 738 739 filename = join(self.cache, module_name) 740 self._load(module, module_name, lambda m: m.from_cache, filename) 741 742 return module 743 744 def _load(self, module, module_name, fn, filename): 745 746 """ 747 Load 'module' for the given 'module_name', and with 'fn' performing an 748 invocation on the module with the given 'filename'. 749 """ 750 751 # Load the module. 752 753 if self.verbose: 754 print >>sys.stderr, "Loading", filename 755 fn(module)(filename) 756 if self.verbose: 757 print >>sys.stderr, "Loaded", filename 758 759 def add_module(self, module_name, module): 760 761 """ 762 Return the module with the given 'module_name', adding a new module 763 object if one does not already exist. 764 """ 765 766 self.modules[module_name] = module 767 self.objects[module_name] = Reference("<module>", module_name) 768 if module_name in self.to_import: 769 self.to_import.remove(module_name) 770 771 # vim: tabstop=4 expandtab shiftwidth=4