1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 self.to_import = set() 54 self.required = set(["__main__"]) 55 56 self.modules = {} 57 self.accessing_modules = {} 58 self.invalidated = set() 59 60 self.objects = {} 61 self.classes = {} 62 self.function_parameters = {} 63 self.function_defaults = {} 64 self.function_targets = {} 65 self.function_arguments = {} 66 67 # Derived information. 68 69 self.subclasses = {} 70 71 # Attributes of different object types. 72 73 self.all_class_attrs = {} 74 self.all_instance_attrs = {} 75 self.all_instance_attr_constants = {} 76 self.all_combined_attrs = {} 77 self.all_module_attrs = {} 78 self.all_shadowed_attrs = {} 79 80 # References to external names and aliases within program units. 81 82 self.all_name_references = {} 83 self.all_initialised_names = {} 84 self.all_aliased_names = {} 85 86 # General attribute accesses. 87 88 self.all_attr_accesses = {} 89 self.all_const_accesses = {} 90 self.all_attr_access_modifiers = {} 91 92 # Constant literals and values. 93 94 self.all_constants = {} 95 self.all_constant_values = {} 96 97 self.make_cache() 98 99 def make_cache(self): 100 if self.cache and not exists(self.cache): 101 makedirs(self.cache) 102 103 def check_cache(self, details): 104 105 """ 106 Check whether the cache applies for the given 'details', invalidating it 107 if it does not. 108 """ 109 110 recorded_details = self.get_cache_details() 111 112 if recorded_details != details: 113 self.remove_cache() 114 115 writefile(self.get_cache_details_filename(), details) 116 117 def get_cache_details_filename(self): 118 119 "Return the filename for the cache details." 120 121 return join(self.cache, "$details") 122 123 def get_cache_details(self): 124 125 "Return details of the cache." 126 127 details_filename = self.get_cache_details_filename() 128 129 if not exists(details_filename): 130 return None 131 else: 132 return readfile(details_filename) 133 134 def remove_cache(self): 135 136 "Remove the contents of the cache." 137 138 for filename in listdir(self.cache): 139 remove(join(self.cache, filename)) 140 141 def to_cache(self): 142 143 "Write modules to the cache." 144 145 if self.cache: 146 for module_name, module in self.modules.items(): 147 module.to_cache(join(self.cache, module_name)) 148 149 # Object retrieval and storage. 150 151 def get_object(self, name): 152 153 """ 154 Return a reference for the given 'name' or None if no such object 155 exists. 156 """ 157 158 return self.objects.get(name) 159 160 def set_object(self, name, value=None): 161 162 "Set the object with the given 'name' and the given 'value'." 163 164 if isinstance(value, Reference): 165 ref = value.alias(name) 166 else: 167 ref = Reference(value, name) 168 169 self.objects[name] = ref 170 171 # Indirect object retrieval. 172 173 def get_attributes(self, ref, attrname): 174 175 """ 176 Return attributes provided by 'ref' for 'attrname'. Class attributes 177 may be provided by instances. 178 """ 179 180 kind = ref.get_kind() 181 if kind == "<class>": 182 ref = self.get_class_attribute(ref.get_origin(), attrname) 183 return ref and set([ref]) or set() 184 elif kind == "<instance>": 185 return self.get_combined_attributes(ref.get_origin(), attrname) 186 elif kind == "<module>": 187 ref = self.get_module_attribute(ref.get_origin(), attrname) 188 return ref and set([ref]) or set() 189 else: 190 return set() 191 192 def get_class_attribute(self, object_type, attrname): 193 194 "Return from 'object_type' the details of class attribute 'attrname'." 195 196 attr = self.all_class_attrs[object_type].get(attrname) 197 return attr and self.get_object(attr) 198 199 def get_instance_attributes(self, object_type, attrname): 200 201 """ 202 Return from 'object_type' the details of instance attribute 'attrname'. 203 """ 204 205 consts = self.all_instance_attr_constants.get(object_type) 206 attrs = set() 207 for attr in self.all_instance_attrs[object_type].get(attrname, []): 208 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 209 return attrs 210 211 def get_combined_attributes(self, object_type, attrname): 212 213 """ 214 Return from 'object_type' the details of class or instance attribute 215 'attrname'. 216 """ 217 218 ref = self.get_class_attribute(object_type, attrname) 219 refs = ref and set([ref]) or set() 220 refs.update(self.get_instance_attributes(object_type, attrname)) 221 return refs 222 223 def get_module_attribute(self, object_type, attrname): 224 225 "Return from 'object_type' the details of module attribute 'attrname'." 226 227 if attrname in self.all_module_attrs[object_type]: 228 return self.get_object("%s.%s" % (object_type, attrname)) 229 else: 230 return None 231 232 # Module management. 233 234 def queue_module(self, name, accessor, required=False): 235 236 """ 237 Queue the module with the given 'name' for import from the given 238 'accessor' module. If 'required' is true (it is false by default), the 239 module will be required in the final program. 240 """ 241 242 if not self.modules.has_key(name): 243 self.to_import.add(name) 244 245 if required: 246 self.required.add(name) 247 248 init_item(self.accessing_modules, name, set) 249 self.accessing_modules[name].add(accessor.name) 250 251 def get_modules(self): 252 253 "Return all modules known to the importer." 254 255 return self.modules.values() 256 257 def get_module(self, name): 258 259 "Return the module with the given 'name'." 260 261 if not self.modules.has_key(name): 262 return None 263 264 return self.modules[name] 265 266 # Program operations. 267 268 def initialise(self, filename, reset=False): 269 270 """ 271 Initialise a program whose main module is 'filename', resetting the 272 cache if 'reset' is true. Return the main module. 273 """ 274 275 if reset: 276 self.remove_cache() 277 self.check_cache(filename) 278 279 # Load the program itself. 280 281 m = self.load_from_file(filename) 282 283 # Load any queued modules. 284 285 while self.to_import: 286 for name in list(self.to_import): # avoid mutation issue 287 self.load(name) 288 289 # Resolve dependencies between modules. 290 291 self.resolve() 292 293 # Record the type of all classes. 294 295 self.type_ref = self.get_object("__builtins__.type") 296 297 # Resolve dependencies within the program. 298 299 for module in self.modules.values(): 300 module.complete() 301 302 # Remove unneeded modules. 303 304 all_modules = self.modules.items() 305 306 for name, module in all_modules: 307 if name not in self.required: 308 module.unpropagate() 309 del self.modules[name] 310 311 return m 312 313 def finalise(self): 314 315 "Finalise the inspected program." 316 317 self.finalise_classes() 318 self.to_cache() 319 self.set_class_types() 320 self.define_instantiators() 321 self.collect_constants() 322 323 # Supporting operations. 324 325 def resolve(self): 326 327 "Resolve dependencies between modules." 328 329 resolved = {} 330 331 for name, ref in self.objects.items(): 332 if ref.has_kind("<depends>"): 333 found = self.find_dependency(ref) 334 if found: 335 resolved[name] = found 336 else: 337 print >>sys.stderr, "Name %s references an unknown object: %s" % (name, ref.get_origin()) 338 339 # Record the resolved names and identify required modules. 340 341 for name, ref in resolved.items(): 342 self.objects[name] = ref 343 344 module_name = self.get_module_provider(ref) 345 if module_name: 346 self.required.add(module_name) 347 348 def find_dependency(self, ref): 349 350 "Find the ultimate dependency for 'ref'." 351 352 found = set() 353 while ref and ref.has_kind("<depends>") and not ref in found: 354 found.add(ref) 355 ref = self.objects.get(ref.get_origin()) 356 return ref 357 358 def get_module_provider(self, ref): 359 360 "Identify the provider of the given 'ref'." 361 362 for ancestor in ref.ancestors(): 363 if self.modules.has_key(ancestor): 364 return ancestor 365 return None 366 367 def finalise_classes(self): 368 369 "Finalise the class relationships and attributes." 370 371 self.derive_inherited_attrs() 372 self.derive_subclasses() 373 self.derive_shadowed_attrs() 374 375 def derive_inherited_attrs(self): 376 377 "Derive inherited attributes for classes throughout the program." 378 379 for name in self.classes.keys(): 380 self.propagate_attrs_for_class(name) 381 382 def propagate_attrs_for_class(self, name, visited=None): 383 384 "Propagate inherited attributes for class 'name'." 385 386 # Visit classes only once. 387 388 if self.all_combined_attrs.has_key(name): 389 return 390 391 visited = visited or [] 392 393 if name in visited: 394 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 395 396 visited.append(name) 397 398 class_attrs = {} 399 instance_attrs = {} 400 401 # Aggregate the attributes from base classes, recording the origins of 402 # applicable attributes. 403 404 for base in self.classes[name][::-1]: 405 406 # Get the identity of the class from the reference. 407 408 base = base.get_origin() 409 410 # Define the base class completely before continuing with this 411 # class. 412 413 self.propagate_attrs_for_class(base, visited) 414 class_attrs.update(self.all_class_attrs[base]) 415 416 # Instance attribute origins are combined if different. 417 418 for key, values in self.all_instance_attrs[base].items(): 419 init_item(instance_attrs, key, set) 420 instance_attrs[key].update(values) 421 422 # Class attributes override those defined earlier in the hierarchy. 423 424 class_attrs.update(self.all_class_attrs.get(name, {})) 425 426 # Instance attributes are merely added if not already defined. 427 428 for key in self.all_instance_attrs.get(name, []): 429 if not instance_attrs.has_key(key): 430 instance_attrs[key] = set(["%s.%s" % (name, key)]) 431 432 self.all_class_attrs[name] = class_attrs 433 self.all_instance_attrs[name] = instance_attrs 434 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 435 436 def derive_subclasses(self): 437 438 "Derive subclass details for classes." 439 440 for name, bases in self.classes.items(): 441 for base in bases: 442 443 # Get the identity of the class from the reference. 444 445 base = base.get_origin() 446 self.subclasses[base].add(name) 447 448 def derive_shadowed_attrs(self): 449 450 "Derive shadowed attributes for classes." 451 452 for name, attrs in self.all_instance_attrs.items(): 453 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 454 if attrs: 455 self.all_shadowed_attrs[name] = attrs 456 457 def set_class_types(self): 458 459 "Set the type of each class." 460 461 for attrs in self.all_class_attrs.values(): 462 attrs["__class__"] = self.type_ref.get_origin() 463 464 def define_instantiators(self): 465 466 """ 467 Consolidate parameter and default details, incorporating initialiser 468 details to define instantiator signatures. 469 """ 470 471 for cls, attrs in self.all_class_attrs.items(): 472 initialiser = attrs["__init__"] 473 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 474 self.function_defaults[cls] = self.function_defaults[initialiser] 475 476 def collect_constants(self): 477 478 "Get constants from all active modules." 479 480 for module in self.modules.values(): 481 self.all_constants.update(module.constants) 482 483 # Import methods. 484 485 def find_in_path(self, name): 486 487 """ 488 Find the given module 'name' in the search path, returning None where no 489 such module could be found, or a 2-tuple from the 'find' method 490 otherwise. 491 """ 492 493 for d in self.path: 494 m = self.find(d, name) 495 if m: return m 496 return None 497 498 def find(self, d, name): 499 500 """ 501 In the directory 'd', find the given module 'name', where 'name' can 502 either refer to a single file module or to a package. Return None if the 503 'name' cannot be associated with either a file or a package directory, 504 or a 2-tuple from '_find_package' or '_find_module' otherwise. 505 """ 506 507 m = self._find_package(d, name) 508 if m: return m 509 m = self._find_module(d, name) 510 if m: return m 511 return None 512 513 def _find_module(self, d, name): 514 515 """ 516 In the directory 'd', find the given module 'name', returning None where 517 no suitable file exists in the directory, or a 2-tuple consisting of 518 None (indicating that no package directory is involved) and a filename 519 indicating the location of the module. 520 """ 521 522 name_py = name + extsep + "py" 523 filename = self._find_file(d, name_py) 524 if filename: 525 return None, filename 526 return None 527 528 def _find_package(self, d, name): 529 530 """ 531 In the directory 'd', find the given package 'name', returning None 532 where no suitable package directory exists, or a 2-tuple consisting of 533 a directory (indicating the location of the package directory itself) 534 and a filename indicating the location of the __init__.py module which 535 declares the package's top-level contents. 536 """ 537 538 filename = self._find_file(d, name) 539 if filename: 540 init_py = "__init__" + extsep + "py" 541 init_py_filename = self._find_file(filename, init_py) 542 if init_py_filename: 543 return filename, init_py_filename 544 return None 545 546 def _find_file(self, d, filename): 547 548 """ 549 Return the filename obtained when searching the directory 'd' for the 550 given 'filename', or None if no actual file exists for the filename. 551 """ 552 553 filename = join(d, filename) 554 if exists(filename): 555 return filename 556 else: 557 return None 558 559 def load(self, name): 560 561 """ 562 Load the module or package with the given 'name'. Return an object 563 referencing the loaded module or package, or None if no such module or 564 package exists. 565 """ 566 567 # Loaded modules are returned immediately. 568 # Modules may be known but not yet loading (having been registered as 569 # submodules), loading, loaded, or completely unknown. 570 571 module = self.get_module(name) 572 573 if module: 574 return self.modules[name] 575 576 # Otherwise, modules are loaded. 577 578 if self.verbose: 579 print >>sys.stderr, "Loading", name 580 581 # Split the name into path components, and try to find the uppermost in 582 # the search path. 583 584 path = name.split(".") 585 path_so_far = [] 586 module = None 587 588 for p in path: 589 590 # Get the module's filesystem details. 591 592 if not path_so_far: 593 m = self.find_in_path(p) 594 elif d: 595 m = self.find(d, p) 596 else: 597 m = None 598 599 path_so_far.append(p) 600 module_name = ".".join(path_so_far) 601 602 if not m: 603 if self.verbose: 604 print >>sys.stderr, "Not found (%s)" % name 605 606 return None # NOTE: Import error. 607 608 # Get the module itself. 609 610 d, filename = m 611 module = self.load_from_file(filename, module_name) 612 613 return module 614 615 def load_from_file(self, filename, module_name=None): 616 617 "Load the module from the given 'filename'." 618 619 if module_name is None: 620 module_name = "__main__" 621 622 module = self.modules.get(module_name) 623 624 if not module: 625 626 # Try to load from cache. 627 628 module = self.load_from_cache(filename, module_name) 629 if module: 630 return module 631 632 # If no cache entry exists, load from file. 633 634 module = inspector.InspectedModule(module_name, self) 635 self.add_module(module_name, module) 636 self.update_cache_validity(module) 637 638 self._load(module, module_name, lambda m: m.parse, filename) 639 640 return module 641 642 def update_cache_validity(self, module): 643 644 "Make 'module' valid in the cache, but invalidate accessing modules." 645 646 accessing = self.accessing_modules.get(module.name) 647 if accessing: 648 self.invalidated.update(accessing) 649 if module.name in self.invalidated: 650 self.invalidated.remove(module.name) 651 652 def source_is_new(self, filename, module_name): 653 654 "Return whether 'filename' is newer than the cached 'module_name'." 655 656 if self.cache: 657 cache_filename = join(self.cache, module_name) 658 return not exists(cache_filename) or \ 659 getmtime(filename) > getmtime(cache_filename) or \ 660 module_name in self.invalidated 661 else: 662 return True 663 664 def load_from_cache(self, filename, module_name): 665 666 "Return a module residing in the cache." 667 668 module = self.modules.get(module_name) 669 670 if not module and not self.source_is_new(filename, module_name): 671 module = CachedModule(module_name, self) 672 self.add_module(module_name, module) 673 674 filename = join(self.cache, module_name) 675 self._load(module, module_name, lambda m: m.from_cache, filename) 676 677 return module 678 679 def _load(self, module, module_name, fn, filename): 680 681 """ 682 Load 'module' for the given 'module_name', and with 'fn' performing an 683 invocation on the module with the given 'filename'. 684 """ 685 686 # Load the module. 687 688 if self.verbose: 689 print >>sys.stderr, "Loading", filename 690 fn(module)(filename) 691 if self.verbose: 692 print >>sys.stderr, "Loaded", filename 693 694 def add_module(self, module_name, module): 695 696 """ 697 Return the module with the given 'module_name', adding a new module 698 object if one does not already exist. 699 """ 700 701 self.modules[module_name] = module 702 self.objects[module_name] = Reference("<module>", module_name) 703 if module_name in self.to_import: 704 self.to_import.remove(module_name) 705 706 # vim: tabstop=4 expandtab shiftwidth=4