1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 self.to_import = set() 54 self.required = set(["__main__"]) 55 self.removed = {} 56 57 self.modules = {} 58 self.accessing_modules = {} 59 self.invalidated = set() 60 61 self.objects = {} 62 self.classes = {} 63 self.function_parameters = {} 64 self.function_defaults = {} 65 self.function_targets = {} 66 self.function_arguments = {} 67 68 # Derived information. 69 70 self.subclasses = {} 71 72 # Attributes of different object types. 73 74 self.all_class_attrs = {} 75 self.all_instance_attrs = {} 76 self.all_instance_attr_constants = {} 77 self.all_combined_attrs = {} 78 self.all_module_attrs = {} 79 self.all_shadowed_attrs = {} 80 81 # References to external names and aliases within program units. 82 83 self.all_name_references = {} 84 self.all_initialised_names = {} 85 self.all_aliased_names = {} 86 87 # General attribute accesses. 88 89 self.all_attr_accesses = {} 90 self.all_const_accesses = {} 91 self.all_attr_access_modifiers = {} 92 93 # Constant literals and values. 94 95 self.all_constants = {} 96 self.all_constant_values = {} 97 98 self.make_cache() 99 100 def make_cache(self): 101 if self.cache and not exists(self.cache): 102 makedirs(self.cache) 103 104 def check_cache(self, details): 105 106 """ 107 Check whether the cache applies for the given 'details', invalidating it 108 if it does not. 109 """ 110 111 recorded_details = self.get_cache_details() 112 113 if recorded_details != details: 114 self.remove_cache() 115 116 writefile(self.get_cache_details_filename(), details) 117 118 def get_cache_details_filename(self): 119 120 "Return the filename for the cache details." 121 122 return join(self.cache, "$details") 123 124 def get_cache_details(self): 125 126 "Return details of the cache." 127 128 details_filename = self.get_cache_details_filename() 129 130 if not exists(details_filename): 131 return None 132 else: 133 return readfile(details_filename) 134 135 def remove_cache(self): 136 137 "Remove the contents of the cache." 138 139 for filename in listdir(self.cache): 140 remove(join(self.cache, filename)) 141 142 def to_cache(self): 143 144 "Write modules to the cache." 145 146 if self.cache: 147 for module_name, module in self.modules.items(): 148 module.to_cache(join(self.cache, module_name)) 149 150 # Object retrieval and storage. 151 152 def get_object(self, name): 153 154 """ 155 Return a reference for the given 'name' or None if no such object 156 exists. 157 """ 158 159 return self.objects.get(name) 160 161 def set_object(self, name, value=None): 162 163 "Set the object with the given 'name' and the given 'value'." 164 165 if isinstance(value, Reference): 166 ref = value.alias(name) 167 else: 168 ref = Reference(value, name) 169 170 self.objects[name] = ref 171 172 # Identification of both stored object names and name references. 173 174 def identify(self, name): 175 176 "Identify 'name' using stored object and external name records." 177 178 return self.objects.get(name) or self.all_name_references.get(name) 179 180 # Indirect object retrieval. 181 182 def get_attributes(self, ref, attrname): 183 184 """ 185 Return attributes provided by 'ref' for 'attrname'. Class attributes 186 may be provided by instances. 187 """ 188 189 kind = ref.get_kind() 190 if kind == "<class>": 191 ref = self.get_class_attribute(ref.get_origin(), attrname) 192 return ref and set([ref]) or set() 193 elif kind == "<instance>": 194 return self.get_combined_attributes(ref.get_origin(), attrname) 195 elif kind == "<module>": 196 ref = self.get_module_attribute(ref.get_origin(), attrname) 197 return ref and set([ref]) or set() 198 else: 199 return set() 200 201 def get_class_attribute(self, object_type, attrname): 202 203 "Return from 'object_type' the details of class attribute 'attrname'." 204 205 attr = self.all_class_attrs[object_type].get(attrname) 206 return attr and self.get_object(attr) 207 208 def get_instance_attributes(self, object_type, attrname): 209 210 """ 211 Return from 'object_type' the details of instance attribute 'attrname'. 212 """ 213 214 consts = self.all_instance_attr_constants.get(object_type) 215 attrs = set() 216 for attr in self.all_instance_attrs[object_type].get(attrname, []): 217 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 218 return attrs 219 220 def get_combined_attributes(self, object_type, attrname): 221 222 """ 223 Return from 'object_type' the details of class or instance attribute 224 'attrname'. 225 """ 226 227 ref = self.get_class_attribute(object_type, attrname) 228 refs = ref and set([ref]) or set() 229 refs.update(self.get_instance_attributes(object_type, attrname)) 230 return refs 231 232 def get_module_attribute(self, object_type, attrname): 233 234 "Return from 'object_type' the details of module attribute 'attrname'." 235 236 if attrname in self.all_module_attrs[object_type]: 237 return self.get_object("%s.%s" % (object_type, attrname)) 238 else: 239 return None 240 241 # Module management. 242 243 def queue_module(self, name, accessor, required=False): 244 245 """ 246 Queue the module with the given 'name' for import from the given 247 'accessor' module. If 'required' is true (it is false by default), the 248 module will be required in the final program. 249 """ 250 251 if not self.modules.has_key(name): 252 self.to_import.add(name) 253 254 if required: 255 self.required.add(name) 256 257 init_item(self.accessing_modules, name, set) 258 self.accessing_modules[name].add(accessor.name) 259 260 def get_modules(self): 261 262 "Return all modules known to the importer." 263 264 return self.modules.values() 265 266 def get_module(self, name): 267 268 "Return the module with the given 'name'." 269 270 if not self.modules.has_key(name): 271 return None 272 273 return self.modules[name] 274 275 # Program operations. 276 277 def initialise(self, filename, reset=False): 278 279 """ 280 Initialise a program whose main module is 'filename', resetting the 281 cache if 'reset' is true. Return the main module. 282 """ 283 284 if reset: 285 self.remove_cache() 286 self.check_cache(filename) 287 288 # Load the program itself. 289 290 m = self.load_from_file(filename) 291 292 # Load any queued modules. 293 294 while self.to_import: 295 for name in list(self.to_import): # avoid mutation issue 296 self.load(name) 297 298 # Resolve dependencies between modules. 299 300 self.resolve() 301 302 # Record the type of all classes. 303 304 self.type_ref = self.get_object("__builtins__.type") 305 306 # Resolve dependencies within the program. 307 308 for module in self.modules.values(): 309 module.complete() 310 311 # Remove unneeded modules. 312 313 all_modules = self.modules.items() 314 315 for name, module in all_modules: 316 if name not in self.required: 317 module.unpropagate() 318 del self.modules[name] 319 self.removed[name] = module 320 321 return m 322 323 def finalise(self): 324 325 "Finalise the inspected program." 326 327 self.finalise_classes() 328 self.to_cache() 329 self.set_class_types() 330 self.define_instantiators() 331 self.collect_constants() 332 333 # Supporting operations. 334 335 def resolve(self): 336 337 "Resolve dependencies between modules." 338 339 self.waiting = {} 340 341 for module in self.modules.values(): 342 343 # Resolve all deferred references in each module. 344 345 for ref in module.deferred: 346 found = self.find_dependency(ref) 347 if not found: 348 print >>sys.stderr, "Module %s references an unknown object: %s" % (module.name, ref.get_origin()) 349 350 # Record the resolved names and identify required modules. 351 352 else: 353 ref.mutate(found) 354 355 # Find the providing module of this reference. 356 357 provider = self.get_module_provider(ref) 358 if provider: 359 360 module.required.add(provider) 361 self.accessing_modules[provider].add(module.name) 362 363 # Postpone any inclusion of the provider until this 364 # module becomes required. 365 366 if module.name not in self.required: 367 init_item(self.waiting, module.name, set) 368 self.waiting[module.name].add(provider) 369 370 # Make this module required in the accessing module. 371 372 else: 373 self.required.add(provider) 374 375 # Check modules again to see if they are now required and should now 376 # cause the inclusion of other modules providing objects to the program. 377 378 for module_name in self.waiting.keys(): 379 self.require_providers(module_name) 380 381 def require_providers(self, module_name): 382 383 """ 384 Test if 'module_name' is itself required and, if so, require modules 385 containing objects provided to the module. 386 """ 387 388 if module_name in self.required and self.waiting.has_key(module_name): 389 for provider in self.waiting[module_name]: 390 if provider not in self.required: 391 self.required.add(provider) 392 self.require_providers(provider) 393 394 def find_dependency(self, ref): 395 396 "Find the ultimate dependency for 'ref'." 397 398 found = set() 399 while ref and ref.has_kind("<depends>") and not ref in found: 400 found.add(ref) 401 ref = self.identify(ref.get_origin()) 402 return ref 403 404 def get_module_provider(self, ref): 405 406 "Identify the provider of the given 'ref'." 407 408 for ancestor in ref.ancestors(): 409 if self.modules.has_key(ancestor): 410 return ancestor 411 return None 412 413 def finalise_classes(self): 414 415 "Finalise the class relationships and attributes." 416 417 self.derive_inherited_attrs() 418 self.derive_subclasses() 419 self.derive_shadowed_attrs() 420 421 def derive_inherited_attrs(self): 422 423 "Derive inherited attributes for classes throughout the program." 424 425 for name in self.classes.keys(): 426 self.propagate_attrs_for_class(name) 427 428 def propagate_attrs_for_class(self, name, visited=None): 429 430 "Propagate inherited attributes for class 'name'." 431 432 # Visit classes only once. 433 434 if self.all_combined_attrs.has_key(name): 435 return 436 437 visited = visited or [] 438 439 if name in visited: 440 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 441 442 visited.append(name) 443 444 class_attrs = {} 445 instance_attrs = {} 446 447 # Aggregate the attributes from base classes, recording the origins of 448 # applicable attributes. 449 450 for base in self.classes[name][::-1]: 451 452 # Get the identity of the class from the reference. 453 454 base = base.get_origin() 455 456 # Define the base class completely before continuing with this 457 # class. 458 459 self.propagate_attrs_for_class(base, visited) 460 class_attrs.update(self.all_class_attrs[base]) 461 462 # Instance attribute origins are combined if different. 463 464 for key, values in self.all_instance_attrs[base].items(): 465 init_item(instance_attrs, key, set) 466 instance_attrs[key].update(values) 467 468 # Class attributes override those defined earlier in the hierarchy. 469 470 class_attrs.update(self.all_class_attrs.get(name, {})) 471 472 # Instance attributes are merely added if not already defined. 473 474 for key in self.all_instance_attrs.get(name, []): 475 if not instance_attrs.has_key(key): 476 instance_attrs[key] = set(["%s.%s" % (name, key)]) 477 478 self.all_class_attrs[name] = class_attrs 479 self.all_instance_attrs[name] = instance_attrs 480 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 481 482 def derive_subclasses(self): 483 484 "Derive subclass details for classes." 485 486 for name, bases in self.classes.items(): 487 for base in bases: 488 489 # Get the identity of the class from the reference. 490 491 base = base.get_origin() 492 self.subclasses[base].add(name) 493 494 def derive_shadowed_attrs(self): 495 496 "Derive shadowed attributes for classes." 497 498 for name, attrs in self.all_instance_attrs.items(): 499 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 500 if attrs: 501 self.all_shadowed_attrs[name] = attrs 502 503 def set_class_types(self): 504 505 "Set the type of each class." 506 507 for attrs in self.all_class_attrs.values(): 508 attrs["__class__"] = self.type_ref.get_origin() 509 510 def define_instantiators(self): 511 512 """ 513 Consolidate parameter and default details, incorporating initialiser 514 details to define instantiator signatures. 515 """ 516 517 for cls, attrs in self.all_class_attrs.items(): 518 initialiser = attrs["__init__"] 519 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 520 self.function_defaults[cls] = self.function_defaults[initialiser] 521 522 def collect_constants(self): 523 524 "Get constants from all active modules." 525 526 for module in self.modules.values(): 527 self.all_constants.update(module.constants) 528 529 # Import methods. 530 531 def find_in_path(self, name): 532 533 """ 534 Find the given module 'name' in the search path, returning None where no 535 such module could be found, or a 2-tuple from the 'find' method 536 otherwise. 537 """ 538 539 for d in self.path: 540 m = self.find(d, name) 541 if m: return m 542 return None 543 544 def find(self, d, name): 545 546 """ 547 In the directory 'd', find the given module 'name', where 'name' can 548 either refer to a single file module or to a package. Return None if the 549 'name' cannot be associated with either a file or a package directory, 550 or a 2-tuple from '_find_package' or '_find_module' otherwise. 551 """ 552 553 m = self._find_package(d, name) 554 if m: return m 555 m = self._find_module(d, name) 556 if m: return m 557 return None 558 559 def _find_module(self, d, name): 560 561 """ 562 In the directory 'd', find the given module 'name', returning None where 563 no suitable file exists in the directory, or a 2-tuple consisting of 564 None (indicating that no package directory is involved) and a filename 565 indicating the location of the module. 566 """ 567 568 name_py = name + extsep + "py" 569 filename = self._find_file(d, name_py) 570 if filename: 571 return None, filename 572 return None 573 574 def _find_package(self, d, name): 575 576 """ 577 In the directory 'd', find the given package 'name', returning None 578 where no suitable package directory exists, or a 2-tuple consisting of 579 a directory (indicating the location of the package directory itself) 580 and a filename indicating the location of the __init__.py module which 581 declares the package's top-level contents. 582 """ 583 584 filename = self._find_file(d, name) 585 if filename: 586 init_py = "__init__" + extsep + "py" 587 init_py_filename = self._find_file(filename, init_py) 588 if init_py_filename: 589 return filename, init_py_filename 590 return None 591 592 def _find_file(self, d, filename): 593 594 """ 595 Return the filename obtained when searching the directory 'd' for the 596 given 'filename', or None if no actual file exists for the filename. 597 """ 598 599 filename = join(d, filename) 600 if exists(filename): 601 return filename 602 else: 603 return None 604 605 def load(self, name): 606 607 """ 608 Load the module or package with the given 'name'. Return an object 609 referencing the loaded module or package, or None if no such module or 610 package exists. 611 """ 612 613 # Loaded modules are returned immediately. 614 # Modules may be known but not yet loading (having been registered as 615 # submodules), loading, loaded, or completely unknown. 616 617 module = self.get_module(name) 618 619 if module: 620 return self.modules[name] 621 622 # Otherwise, modules are loaded. 623 624 if self.verbose: 625 print >>sys.stderr, "Loading", name 626 627 # Split the name into path components, and try to find the uppermost in 628 # the search path. 629 630 path = name.split(".") 631 path_so_far = [] 632 module = None 633 634 for p in path: 635 636 # Get the module's filesystem details. 637 638 if not path_so_far: 639 m = self.find_in_path(p) 640 elif d: 641 m = self.find(d, p) 642 else: 643 m = None 644 645 path_so_far.append(p) 646 module_name = ".".join(path_so_far) 647 648 if not m: 649 if self.verbose: 650 print >>sys.stderr, "Not found (%s)" % name 651 652 return None # NOTE: Import error. 653 654 # Get the module itself. 655 656 d, filename = m 657 module = self.load_from_file(filename, module_name) 658 659 return module 660 661 def load_from_file(self, filename, module_name=None): 662 663 "Load the module from the given 'filename'." 664 665 if module_name is None: 666 module_name = "__main__" 667 668 module = self.modules.get(module_name) 669 670 if not module: 671 672 # Try to load from cache. 673 674 module = self.load_from_cache(filename, module_name) 675 if module: 676 return module 677 678 # If no cache entry exists, load from file. 679 680 module = inspector.InspectedModule(module_name, self) 681 self.add_module(module_name, module) 682 self.update_cache_validity(module) 683 684 self._load(module, module_name, lambda m: m.parse, filename) 685 686 return module 687 688 def update_cache_validity(self, module): 689 690 "Make 'module' valid in the cache, but invalidate accessing modules." 691 692 accessing = self.accessing_modules.get(module.name) 693 if accessing: 694 self.invalidated.update(accessing) 695 if module.name in self.invalidated: 696 self.invalidated.remove(module.name) 697 698 def source_is_new(self, filename, module_name): 699 700 "Return whether 'filename' is newer than the cached 'module_name'." 701 702 if self.cache: 703 cache_filename = join(self.cache, module_name) 704 return not exists(cache_filename) or \ 705 getmtime(filename) > getmtime(cache_filename) or \ 706 module_name in self.invalidated 707 else: 708 return True 709 710 def load_from_cache(self, filename, module_name): 711 712 "Return a module residing in the cache." 713 714 module = self.modules.get(module_name) 715 716 if not module and not self.source_is_new(filename, module_name): 717 module = CachedModule(module_name, self) 718 self.add_module(module_name, module) 719 720 filename = join(self.cache, module_name) 721 self._load(module, module_name, lambda m: m.from_cache, filename) 722 723 return module 724 725 def _load(self, module, module_name, fn, filename): 726 727 """ 728 Load 'module' for the given 'module_name', and with 'fn' performing an 729 invocation on the module with the given 'filename'. 730 """ 731 732 # Load the module. 733 734 if self.verbose: 735 print >>sys.stderr, "Loading", filename 736 fn(module)(filename) 737 if self.verbose: 738 print >>sys.stderr, "Loaded", filename 739 740 def add_module(self, module_name, module): 741 742 """ 743 Return the module with the given 'module_name', adding a new module 744 object if one does not already exist. 745 """ 746 747 self.modules[module_name] = module 748 self.objects[module_name] = Reference("<module>", module_name) 749 if module_name in self.to_import: 750 self.to_import.remove(module_name) 751 752 # vim: tabstop=4 expandtab shiftwidth=4