1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 self.to_import = set() 54 self.required = set(["__main__"]) 55 self.removed = {} 56 57 self.modules = {} 58 self.accessing_modules = {} 59 self.invalidated = set() 60 61 self.objects = {} 62 self.classes = {} 63 self.function_parameters = {} 64 self.function_defaults = {} 65 self.function_targets = {} 66 self.function_arguments = {} 67 68 # Derived information. 69 70 self.subclasses = {} 71 72 # Attributes of different object types. 73 74 self.all_class_attrs = {} 75 self.all_instance_attrs = {} 76 self.all_instance_attr_constants = {} 77 self.all_combined_attrs = {} 78 self.all_module_attrs = {} 79 self.all_shadowed_attrs = {} 80 81 # References to external names and aliases within program units. 82 83 self.all_name_references = {} 84 self.all_initialised_names = {} 85 self.all_aliased_names = {} 86 87 # General attribute accesses. 88 89 self.all_attr_accesses = {} 90 self.all_const_accesses = {} 91 self.all_attr_access_modifiers = {} 92 93 # Constant literals and values. 94 95 self.all_constants = {} 96 self.all_constant_values = {} 97 98 self.make_cache() 99 100 def make_cache(self): 101 if self.cache and not exists(self.cache): 102 makedirs(self.cache) 103 104 def check_cache(self, details): 105 106 """ 107 Check whether the cache applies for the given 'details', invalidating it 108 if it does not. 109 """ 110 111 recorded_details = self.get_cache_details() 112 113 if recorded_details != details: 114 self.remove_cache() 115 116 writefile(self.get_cache_details_filename(), details) 117 118 def get_cache_details_filename(self): 119 120 "Return the filename for the cache details." 121 122 return join(self.cache, "$details") 123 124 def get_cache_details(self): 125 126 "Return details of the cache." 127 128 details_filename = self.get_cache_details_filename() 129 130 if not exists(details_filename): 131 return None 132 else: 133 return readfile(details_filename) 134 135 def remove_cache(self): 136 137 "Remove the contents of the cache." 138 139 for filename in listdir(self.cache): 140 remove(join(self.cache, filename)) 141 142 def to_cache(self): 143 144 "Write modules to the cache." 145 146 if self.cache: 147 for module_name, module in self.modules.items(): 148 module.to_cache(join(self.cache, module_name)) 149 150 # Object retrieval and storage. 151 152 def get_object(self, name): 153 154 """ 155 Return a reference for the given 'name' or None if no such object 156 exists. 157 """ 158 159 return self.objects.get(name) 160 161 def set_object(self, name, value=None): 162 163 "Set the object with the given 'name' and the given 'value'." 164 165 if isinstance(value, Reference): 166 ref = value.alias(name) 167 else: 168 ref = Reference(value, name) 169 170 self.objects[name] = ref 171 172 # Identification of both stored object names and name references. 173 174 def identify(self, name): 175 176 "Identify 'name' using stored object and external name records." 177 178 return self.objects.get(name) or self.all_name_references.get(name) 179 180 # Indirect object retrieval. 181 182 def get_attributes(self, ref, attrname): 183 184 """ 185 Return attributes provided by 'ref' for 'attrname'. Class attributes 186 may be provided by instances. 187 """ 188 189 kind = ref.get_kind() 190 if kind == "<class>": 191 ref = self.get_class_attribute(ref.get_origin(), attrname) 192 return ref and set([ref]) or set() 193 elif kind == "<instance>": 194 return self.get_combined_attributes(ref.get_origin(), attrname) 195 elif kind == "<module>": 196 ref = self.get_module_attribute(ref.get_origin(), attrname) 197 return ref and set([ref]) or set() 198 else: 199 return set() 200 201 def get_class_attribute(self, object_type, attrname): 202 203 "Return from 'object_type' the details of class attribute 'attrname'." 204 205 attr = self.all_class_attrs[object_type].get(attrname) 206 return attr and self.get_object(attr) 207 208 def get_instance_attributes(self, object_type, attrname): 209 210 """ 211 Return from 'object_type' the details of instance attribute 'attrname'. 212 """ 213 214 consts = self.all_instance_attr_constants.get(object_type) 215 attrs = set() 216 for attr in self.all_instance_attrs[object_type].get(attrname, []): 217 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 218 return attrs 219 220 def get_combined_attributes(self, object_type, attrname): 221 222 """ 223 Return from 'object_type' the details of class or instance attribute 224 'attrname'. 225 """ 226 227 ref = self.get_class_attribute(object_type, attrname) 228 refs = ref and set([ref]) or set() 229 refs.update(self.get_instance_attributes(object_type, attrname)) 230 return refs 231 232 def get_module_attribute(self, object_type, attrname): 233 234 "Return from 'object_type' the details of module attribute 'attrname'." 235 236 if attrname in self.all_module_attrs[object_type]: 237 return self.get_object("%s.%s" % (object_type, attrname)) 238 else: 239 return None 240 241 # Module management. 242 243 def queue_module(self, name, accessor, required=False): 244 245 """ 246 Queue the module with the given 'name' for import from the given 247 'accessor' module. If 'required' is true (it is false by default), the 248 module will be required in the final program. 249 """ 250 251 if not self.modules.has_key(name): 252 self.to_import.add(name) 253 254 if required: 255 self.required.add(name) 256 257 init_item(self.accessing_modules, name, set) 258 self.accessing_modules[name].add(accessor.name) 259 260 def get_modules(self): 261 262 "Return all modules known to the importer." 263 264 return self.modules.values() 265 266 def get_module(self, name): 267 268 "Return the module with the given 'name'." 269 270 if not self.modules.has_key(name): 271 return None 272 273 return self.modules[name] 274 275 # Program operations. 276 277 def initialise(self, filename, reset=False): 278 279 """ 280 Initialise a program whose main module is 'filename', resetting the 281 cache if 'reset' is true. Return the main module. 282 """ 283 284 if reset: 285 self.remove_cache() 286 self.check_cache(filename) 287 288 # Load the program itself. 289 290 m = self.load_from_file(filename) 291 292 # Load any queued modules. 293 294 while self.to_import: 295 for name in list(self.to_import): # avoid mutation issue 296 self.load(name) 297 298 # Resolve dependencies between modules. 299 300 self.resolve() 301 302 # Record the type of all classes. 303 304 self.type_ref = self.get_object("__builtins__.type") 305 306 # Resolve dependencies within the program. 307 308 for module in self.modules.values(): 309 module.complete() 310 311 # Remove unneeded modules. 312 313 all_modules = self.modules.items() 314 315 for name, module in all_modules: 316 if name not in self.required: 317 module.unpropagate() 318 del self.modules[name] 319 self.removed[name] = module 320 321 return m 322 323 def finalise(self): 324 325 "Finalise the inspected program." 326 327 self.finalise_classes() 328 self.to_cache() 329 self.set_class_types() 330 self.define_instantiators() 331 self.collect_constants() 332 333 # Supporting operations. 334 335 def resolve(self): 336 337 "Resolve dependencies between modules." 338 339 for d in [self.objects, self.all_name_references]: 340 resolved = {} 341 342 for name, ref in d.items(): 343 if ref.has_kind("<depends>"): 344 found = self.find_dependency(ref) 345 if found: 346 resolved[name] = found 347 else: 348 print >>sys.stderr, "Name %s references an unknown object: %s" % (name, ref.get_origin()) 349 350 # Record the resolved names and identify required modules. 351 352 for name, ref in resolved.items(): 353 d[name] = ref 354 355 module_name = self.get_module_provider(ref) 356 if module_name: 357 self.required.add(module_name) 358 359 def find_dependency(self, ref): 360 361 "Find the ultimate dependency for 'ref'." 362 363 found = set() 364 while ref and ref.has_kind("<depends>") and not ref in found: 365 found.add(ref) 366 ref = self.objects.get(ref.get_origin()) 367 return ref 368 369 def get_module_provider(self, ref): 370 371 "Identify the provider of the given 'ref'." 372 373 for ancestor in ref.ancestors(): 374 if self.modules.has_key(ancestor): 375 return ancestor 376 return None 377 378 def finalise_classes(self): 379 380 "Finalise the class relationships and attributes." 381 382 self.derive_inherited_attrs() 383 self.derive_subclasses() 384 self.derive_shadowed_attrs() 385 386 def derive_inherited_attrs(self): 387 388 "Derive inherited attributes for classes throughout the program." 389 390 for name in self.classes.keys(): 391 self.propagate_attrs_for_class(name) 392 393 def propagate_attrs_for_class(self, name, visited=None): 394 395 "Propagate inherited attributes for class 'name'." 396 397 # Visit classes only once. 398 399 if self.all_combined_attrs.has_key(name): 400 return 401 402 visited = visited or [] 403 404 if name in visited: 405 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 406 407 visited.append(name) 408 409 class_attrs = {} 410 instance_attrs = {} 411 412 # Aggregate the attributes from base classes, recording the origins of 413 # applicable attributes. 414 415 for base in self.classes[name][::-1]: 416 417 # Get the identity of the class from the reference. 418 419 base = base.get_origin() 420 421 # Define the base class completely before continuing with this 422 # class. 423 424 self.propagate_attrs_for_class(base, visited) 425 class_attrs.update(self.all_class_attrs[base]) 426 427 # Instance attribute origins are combined if different. 428 429 for key, values in self.all_instance_attrs[base].items(): 430 init_item(instance_attrs, key, set) 431 instance_attrs[key].update(values) 432 433 # Class attributes override those defined earlier in the hierarchy. 434 435 class_attrs.update(self.all_class_attrs.get(name, {})) 436 437 # Instance attributes are merely added if not already defined. 438 439 for key in self.all_instance_attrs.get(name, []): 440 if not instance_attrs.has_key(key): 441 instance_attrs[key] = set(["%s.%s" % (name, key)]) 442 443 self.all_class_attrs[name] = class_attrs 444 self.all_instance_attrs[name] = instance_attrs 445 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 446 447 def derive_subclasses(self): 448 449 "Derive subclass details for classes." 450 451 for name, bases in self.classes.items(): 452 for base in bases: 453 454 # Get the identity of the class from the reference. 455 456 base = base.get_origin() 457 self.subclasses[base].add(name) 458 459 def derive_shadowed_attrs(self): 460 461 "Derive shadowed attributes for classes." 462 463 for name, attrs in self.all_instance_attrs.items(): 464 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 465 if attrs: 466 self.all_shadowed_attrs[name] = attrs 467 468 def set_class_types(self): 469 470 "Set the type of each class." 471 472 for attrs in self.all_class_attrs.values(): 473 attrs["__class__"] = self.type_ref.get_origin() 474 475 def define_instantiators(self): 476 477 """ 478 Consolidate parameter and default details, incorporating initialiser 479 details to define instantiator signatures. 480 """ 481 482 for cls, attrs in self.all_class_attrs.items(): 483 initialiser = attrs["__init__"] 484 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 485 self.function_defaults[cls] = self.function_defaults[initialiser] 486 487 def collect_constants(self): 488 489 "Get constants from all active modules." 490 491 for module in self.modules.values(): 492 self.all_constants.update(module.constants) 493 494 # Import methods. 495 496 def find_in_path(self, name): 497 498 """ 499 Find the given module 'name' in the search path, returning None where no 500 such module could be found, or a 2-tuple from the 'find' method 501 otherwise. 502 """ 503 504 for d in self.path: 505 m = self.find(d, name) 506 if m: return m 507 return None 508 509 def find(self, d, name): 510 511 """ 512 In the directory 'd', find the given module 'name', where 'name' can 513 either refer to a single file module or to a package. Return None if the 514 'name' cannot be associated with either a file or a package directory, 515 or a 2-tuple from '_find_package' or '_find_module' otherwise. 516 """ 517 518 m = self._find_package(d, name) 519 if m: return m 520 m = self._find_module(d, name) 521 if m: return m 522 return None 523 524 def _find_module(self, d, name): 525 526 """ 527 In the directory 'd', find the given module 'name', returning None where 528 no suitable file exists in the directory, or a 2-tuple consisting of 529 None (indicating that no package directory is involved) and a filename 530 indicating the location of the module. 531 """ 532 533 name_py = name + extsep + "py" 534 filename = self._find_file(d, name_py) 535 if filename: 536 return None, filename 537 return None 538 539 def _find_package(self, d, name): 540 541 """ 542 In the directory 'd', find the given package 'name', returning None 543 where no suitable package directory exists, or a 2-tuple consisting of 544 a directory (indicating the location of the package directory itself) 545 and a filename indicating the location of the __init__.py module which 546 declares the package's top-level contents. 547 """ 548 549 filename = self._find_file(d, name) 550 if filename: 551 init_py = "__init__" + extsep + "py" 552 init_py_filename = self._find_file(filename, init_py) 553 if init_py_filename: 554 return filename, init_py_filename 555 return None 556 557 def _find_file(self, d, filename): 558 559 """ 560 Return the filename obtained when searching the directory 'd' for the 561 given 'filename', or None if no actual file exists for the filename. 562 """ 563 564 filename = join(d, filename) 565 if exists(filename): 566 return filename 567 else: 568 return None 569 570 def load(self, name): 571 572 """ 573 Load the module or package with the given 'name'. Return an object 574 referencing the loaded module or package, or None if no such module or 575 package exists. 576 """ 577 578 # Loaded modules are returned immediately. 579 # Modules may be known but not yet loading (having been registered as 580 # submodules), loading, loaded, or completely unknown. 581 582 module = self.get_module(name) 583 584 if module: 585 return self.modules[name] 586 587 # Otherwise, modules are loaded. 588 589 if self.verbose: 590 print >>sys.stderr, "Loading", name 591 592 # Split the name into path components, and try to find the uppermost in 593 # the search path. 594 595 path = name.split(".") 596 path_so_far = [] 597 module = None 598 599 for p in path: 600 601 # Get the module's filesystem details. 602 603 if not path_so_far: 604 m = self.find_in_path(p) 605 elif d: 606 m = self.find(d, p) 607 else: 608 m = None 609 610 path_so_far.append(p) 611 module_name = ".".join(path_so_far) 612 613 if not m: 614 if self.verbose: 615 print >>sys.stderr, "Not found (%s)" % name 616 617 return None # NOTE: Import error. 618 619 # Get the module itself. 620 621 d, filename = m 622 module = self.load_from_file(filename, module_name) 623 624 return module 625 626 def load_from_file(self, filename, module_name=None): 627 628 "Load the module from the given 'filename'." 629 630 if module_name is None: 631 module_name = "__main__" 632 633 module = self.modules.get(module_name) 634 635 if not module: 636 637 # Try to load from cache. 638 639 module = self.load_from_cache(filename, module_name) 640 if module: 641 return module 642 643 # If no cache entry exists, load from file. 644 645 module = inspector.InspectedModule(module_name, self) 646 self.add_module(module_name, module) 647 self.update_cache_validity(module) 648 649 self._load(module, module_name, lambda m: m.parse, filename) 650 651 return module 652 653 def update_cache_validity(self, module): 654 655 "Make 'module' valid in the cache, but invalidate accessing modules." 656 657 accessing = self.accessing_modules.get(module.name) 658 if accessing: 659 self.invalidated.update(accessing) 660 if module.name in self.invalidated: 661 self.invalidated.remove(module.name) 662 663 def source_is_new(self, filename, module_name): 664 665 "Return whether 'filename' is newer than the cached 'module_name'." 666 667 if self.cache: 668 cache_filename = join(self.cache, module_name) 669 return not exists(cache_filename) or \ 670 getmtime(filename) > getmtime(cache_filename) or \ 671 module_name in self.invalidated 672 else: 673 return True 674 675 def load_from_cache(self, filename, module_name): 676 677 "Return a module residing in the cache." 678 679 module = self.modules.get(module_name) 680 681 if not module and not self.source_is_new(filename, module_name): 682 module = CachedModule(module_name, self) 683 self.add_module(module_name, module) 684 685 filename = join(self.cache, module_name) 686 self._load(module, module_name, lambda m: m.from_cache, filename) 687 688 return module 689 690 def _load(self, module, module_name, fn, filename): 691 692 """ 693 Load 'module' for the given 'module_name', and with 'fn' performing an 694 invocation on the module with the given 'filename'. 695 """ 696 697 # Load the module. 698 699 if self.verbose: 700 print >>sys.stderr, "Loading", filename 701 fn(module)(filename) 702 if self.verbose: 703 print >>sys.stderr, "Loaded", filename 704 705 def add_module(self, module_name, module): 706 707 """ 708 Return the module with the given 'module_name', adding a new module 709 object if one does not already exist. 710 """ 711 712 self.modules[module_name] = module 713 self.objects[module_name] = Reference("<module>", module_name) 714 if module_name in self.to_import: 715 self.to_import.remove(module_name) 716 717 # vim: tabstop=4 expandtab shiftwidth=4