1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from referencing import Reference 28 import inspector 29 import sys 30 31 class Importer: 32 33 "An import machine, searching for and loading modules." 34 35 def __init__(self, path, cache=None, verbose=False): 36 37 """ 38 Initialise the importer with the given search 'path' - a list of 39 directories to search for Python modules. 40 41 The optional 'cache' should be the name of a directory used to store 42 cached module information. 43 44 The optional 'verbose' parameter causes output concerning the activities 45 of the object to be produced if set to a true value (not the default). 46 """ 47 48 self.path = path 49 self.cache = cache 50 self.verbose = verbose 51 52 self.modules = {} 53 self.modules_ordered = [] 54 self.loading = set() 55 self.hidden = {} 56 self.revealing = {} 57 self.invalidated = set() 58 59 self.objects = {} 60 self.classes = {} 61 self.function_parameters = {} 62 self.function_defaults = {} 63 self.function_targets = {} 64 self.function_arguments = {} 65 66 # Derived information. 67 68 self.subclasses = {} 69 70 # Attributes of different object types. 71 72 self.all_class_attrs = {} 73 self.all_instance_attrs = {} 74 self.all_instance_attr_constants = {} 75 self.all_combined_attrs = {} 76 self.all_module_attrs = {} 77 self.all_shadowed_attrs = {} 78 79 # References to external names and aliases within program units. 80 81 self.all_name_references = {} 82 self.all_initialised_names = {} 83 self.all_aliased_names = {} 84 85 # General attribute accesses. 86 87 self.all_attr_accesses = {} 88 self.all_const_accesses = {} 89 self.all_attr_access_modifiers = {} 90 91 # Constant literals and values. 92 93 self.all_constants = {} 94 self.all_constant_values = {} 95 96 self.make_cache() 97 98 def make_cache(self): 99 if self.cache and not exists(self.cache): 100 makedirs(self.cache) 101 102 def check_cache(self, details): 103 104 """ 105 Check whether the cache applies for the given 'details', invalidating it 106 if it does not. 107 """ 108 109 recorded_details = self.get_cache_details() 110 111 if recorded_details != details: 112 self.remove_cache() 113 114 writefile(self.get_cache_details_filename(), details) 115 116 def get_cache_details_filename(self): 117 118 "Return the filename for the cache details." 119 120 return join(self.cache, "$details") 121 122 def get_cache_details(self): 123 124 "Return details of the cache." 125 126 details_filename = self.get_cache_details_filename() 127 128 if not exists(details_filename): 129 return None 130 else: 131 return readfile(details_filename) 132 133 def remove_cache(self): 134 135 "Remove the contents of the cache." 136 137 for filename in listdir(self.cache): 138 remove(join(self.cache, filename)) 139 140 def to_cache(self): 141 142 "Write modules to the cache." 143 144 if self.cache: 145 for module_name, module in self.modules.items(): 146 module.to_cache(join(self.cache, module_name)) 147 148 # Object retrieval and storage. 149 150 def get_object(self, name): 151 152 """ 153 Return a reference for the given 'name' or None if no such object 154 exists. 155 """ 156 157 return self.objects.get(name) 158 159 def set_object(self, name, value=None): 160 161 "Set the object with the given 'name' and the given 'value'." 162 163 if isinstance(value, Reference): 164 ref = value.alias(name) 165 else: 166 ref = Reference(value, name) 167 168 self.objects[name] = ref 169 170 # Indirect object retrieval. 171 172 def get_attributes(self, ref, attrname): 173 174 """ 175 Return attributes provided by 'ref' for 'attrname'. Class attributes 176 may be provided by instances. 177 """ 178 179 kind = ref.get_kind() 180 if kind == "<class>": 181 ref = self.get_class_attribute(ref.get_origin(), attrname) 182 return ref and set([ref]) or set() 183 elif kind == "<instance>": 184 return self.get_combined_attributes(ref.get_origin(), attrname) 185 elif kind == "<module>": 186 ref = self.get_module_attribute(ref.get_origin(), attrname) 187 return ref and set([ref]) or set() 188 else: 189 return set() 190 191 def get_class_attribute(self, object_type, attrname): 192 193 "Return from 'object_type' the details of class attribute 'attrname'." 194 195 attr = self.all_class_attrs[object_type].get(attrname) 196 return attr and self.get_object(attr) 197 198 def get_instance_attributes(self, object_type, attrname): 199 200 """ 201 Return from 'object_type' the details of instance attribute 'attrname'. 202 """ 203 204 consts = self.all_instance_attr_constants.get(object_type) 205 attrs = set() 206 for attr in self.all_instance_attrs[object_type].get(attrname, []): 207 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 208 return attrs 209 210 def get_combined_attributes(self, object_type, attrname): 211 212 """ 213 Return from 'object_type' the details of class or instance attribute 214 'attrname'. 215 """ 216 217 ref = self.get_class_attribute(object_type, attrname) 218 refs = ref and set([ref]) or set() 219 refs.update(self.get_instance_attributes(object_type, attrname)) 220 return refs 221 222 def get_module_attribute(self, object_type, attrname): 223 224 "Return from 'object_type' the details of module attribute 'attrname'." 225 226 if attrname in self.all_module_attrs[object_type]: 227 return self.get_object("%s.%s" % (object_type, attrname)) 228 else: 229 return None 230 231 # Module management. 232 233 def get_modules(self): 234 235 "Return all modules known to the importer." 236 237 return self.modules.values() 238 239 def get_module(self, name, hidden=False): 240 241 "Return the module with the given 'name'." 242 243 if not self.modules.has_key(name): 244 return None 245 246 # Obtain the module and attempt to reveal it. 247 248 module = self.modules[name] 249 if not hidden: 250 self.reveal_module(module) 251 return module 252 253 def reveal_module(self, module): 254 255 "Check if 'module' is hidden and reveal it." 256 257 if module.name in self.hidden: 258 del self.hidden[module.name] 259 260 # Reveal referenced modules. 261 262 module.reveal_referenced() 263 264 def set_revealing(self, module, name, instigator): 265 266 """ 267 Make the revealing of 'module' conditional on 'name' for the given 268 'instigator' of the reveal operation. 269 """ 270 271 self.revealing[module.name].add((name, instigator)) 272 273 # Program operations. 274 275 def initialise(self, filename, reset=False): 276 277 """ 278 Initialise a program whose main module is 'filename', resetting the 279 cache if 'reset' is true. Return the main module. 280 """ 281 282 if reset: 283 self.remove_cache() 284 self.check_cache(filename) 285 286 # Load the program itself. 287 288 m = self.load_from_file(filename) 289 290 # Resolve dependencies within the program. 291 292 for module in self.modules_ordered: 293 module.resolve() 294 295 return m 296 297 def finalise(self): 298 299 "Finalise the inspected program." 300 301 self.finalise_classes() 302 self.remove_hidden() 303 self.to_cache() 304 self.set_class_types() 305 self.define_instantiators() 306 self.collect_constants() 307 308 def finalise_classes(self): 309 310 "Finalise the class relationships and attributes." 311 312 self.derive_inherited_attrs() 313 self.derive_subclasses() 314 self.derive_shadowed_attrs() 315 316 def derive_inherited_attrs(self): 317 318 "Derive inherited attributes for classes throughout the program." 319 320 for name in self.classes.keys(): 321 self.propagate_attrs_for_class(name) 322 323 def propagate_attrs_for_class(self, name, visited=None): 324 325 "Propagate inherited attributes for class 'name'." 326 327 # Visit classes only once. 328 329 if self.all_combined_attrs.has_key(name): 330 return 331 332 visited = visited or [] 333 334 if name in visited: 335 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 336 337 visited.append(name) 338 339 class_attrs = {} 340 instance_attrs = {} 341 342 # Aggregate the attributes from base classes, recording the origins of 343 # applicable attributes. 344 345 for base in self.classes[name][::-1]: 346 347 # Get the identity of the class from the reference. 348 349 base = base.get_origin() 350 351 # Define the base class completely before continuing with this 352 # class. 353 354 self.propagate_attrs_for_class(base, visited) 355 class_attrs.update(self.all_class_attrs[base]) 356 357 # Instance attribute origins are combined if different. 358 359 for key, values in self.all_instance_attrs[base].items(): 360 init_item(instance_attrs, key, set) 361 instance_attrs[key].update(values) 362 363 # Class attributes override those defined earlier in the hierarchy. 364 365 class_attrs.update(self.all_class_attrs.get(name, {})) 366 367 # Instance attributes are merely added if not already defined. 368 369 for key in self.all_instance_attrs.get(name, []): 370 if not instance_attrs.has_key(key): 371 instance_attrs[key] = set(["%s.%s" % (name, key)]) 372 373 self.all_class_attrs[name] = class_attrs 374 self.all_instance_attrs[name] = instance_attrs 375 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 376 377 def derive_subclasses(self): 378 379 "Derive subclass details for classes." 380 381 for name, bases in self.classes.items(): 382 for base in bases: 383 384 # Get the identity of the class from the reference. 385 386 base = base.get_origin() 387 self.subclasses[base].add(name) 388 389 def derive_shadowed_attrs(self): 390 391 "Derive shadowed attributes for classes." 392 393 for name, attrs in self.all_instance_attrs.items(): 394 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 395 if attrs: 396 self.all_shadowed_attrs[name] = attrs 397 398 def remove_hidden(self): 399 400 "Remove all hidden modules." 401 402 # First reveal any modules exposing names. 403 404 for modname, names in self.revealing.items(): 405 module = self.modules[modname] 406 407 # Obtain the imported names and determine whether they should cause 408 # the module to be revealed. 409 410 for (name, instigator) in names: 411 if module is not instigator: 412 413 # Only if an object is provided by the module should the 414 # module be revealed. References to objects in other modules 415 # should not in themselves expose the module in which those 416 # references occur. 417 418 ref = module.get_global(name) 419 if ref and ref.provided_by_module(module.name): 420 self.reveal_module(module) 421 instigator.revealed.add(module) 422 423 # Then remove all modules that are still hidden. 424 425 for modname in self.hidden: 426 module = self.modules[modname] 427 module.unpropagate() 428 del self.modules[modname] 429 ref = self.objects.get(modname) 430 if ref and ref.get_kind() == "<module>": 431 del self.objects[modname] 432 433 def set_class_types(self): 434 435 "Set the type of each class." 436 437 ref = self.get_object("__builtins__.type") 438 for attrs in self.all_class_attrs.values(): 439 attrs["__class__"] = ref.get_origin() 440 441 def define_instantiators(self): 442 443 """ 444 Consolidate parameter and default details, incorporating initialiser 445 details to define instantiator signatures. 446 """ 447 448 for cls, attrs in self.all_class_attrs.items(): 449 initialiser = attrs["__init__"] 450 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 451 self.function_defaults[cls] = self.function_defaults[initialiser] 452 453 def collect_constants(self): 454 455 "Get constants from all active modules." 456 457 for module in self.modules.values(): 458 self.all_constants.update(module.constants) 459 460 # Import methods. 461 462 def find_in_path(self, name): 463 464 """ 465 Find the given module 'name' in the search path, returning None where no 466 such module could be found, or a 2-tuple from the 'find' method 467 otherwise. 468 """ 469 470 for d in self.path: 471 m = self.find(d, name) 472 if m: return m 473 return None 474 475 def find(self, d, name): 476 477 """ 478 In the directory 'd', find the given module 'name', where 'name' can 479 either refer to a single file module or to a package. Return None if the 480 'name' cannot be associated with either a file or a package directory, 481 or a 2-tuple from '_find_package' or '_find_module' otherwise. 482 """ 483 484 m = self._find_package(d, name) 485 if m: return m 486 m = self._find_module(d, name) 487 if m: return m 488 return None 489 490 def _find_module(self, d, name): 491 492 """ 493 In the directory 'd', find the given module 'name', returning None where 494 no suitable file exists in the directory, or a 2-tuple consisting of 495 None (indicating that no package directory is involved) and a filename 496 indicating the location of the module. 497 """ 498 499 name_py = name + extsep + "py" 500 filename = self._find_file(d, name_py) 501 if filename: 502 return None, filename 503 return None 504 505 def _find_package(self, d, name): 506 507 """ 508 In the directory 'd', find the given package 'name', returning None 509 where no suitable package directory exists, or a 2-tuple consisting of 510 a directory (indicating the location of the package directory itself) 511 and a filename indicating the location of the __init__.py module which 512 declares the package's top-level contents. 513 """ 514 515 filename = self._find_file(d, name) 516 if filename: 517 init_py = "__init__" + extsep + "py" 518 init_py_filename = self._find_file(filename, init_py) 519 if init_py_filename: 520 return filename, init_py_filename 521 return None 522 523 def _find_file(self, d, filename): 524 525 """ 526 Return the filename obtained when searching the directory 'd' for the 527 given 'filename', or None if no actual file exists for the filename. 528 """ 529 530 filename = join(d, filename) 531 if exists(filename): 532 return filename 533 else: 534 return None 535 536 def load(self, name, return_leaf=False, hidden=False): 537 538 """ 539 Load the module or package with the given 'name'. Return an object 540 referencing the loaded module or package, or None if no such module or 541 package exists. 542 543 Where 'return_leaf' is specified, the final module in the chain is 544 returned. Where 'hidden' is specified, the module is marked as hidden. 545 """ 546 547 if return_leaf: 548 name_for_return = name 549 else: 550 name_for_return = name.split(".")[0] 551 552 # Loaded modules are returned immediately. 553 # Modules may be known but not yet loading (having been registered as 554 # submodules), loading, loaded, or completely unknown. 555 556 module = self.get_module(name, hidden) 557 558 if module: 559 return self.modules[name_for_return] 560 561 # Otherwise, modules are loaded. 562 563 if self.verbose: 564 print >>sys.stderr, "Loading", name 565 566 # Split the name into path components, and try to find the uppermost in 567 # the search path. 568 569 path = name.split(".") 570 path_so_far = [] 571 top = module = None 572 573 for p in path: 574 575 # Get the module's filesystem details. 576 577 if not path_so_far: 578 m = self.find_in_path(p) 579 elif d: 580 m = self.find(d, p) 581 else: 582 m = None 583 584 path_so_far.append(p) 585 module_name = ".".join(path_so_far) 586 587 if not m: 588 if self.verbose: 589 print >>sys.stderr, "Not found (%s)" % name 590 591 return None # NOTE: Import error. 592 593 # Get the module itself. 594 595 d, filename = m 596 submodule = self.load_from_file(filename, module_name, hidden) 597 598 if module is None: 599 top = submodule 600 601 module = submodule 602 603 # Return either the deepest or the uppermost module. 604 605 return return_leaf and module or top 606 607 def load_from_file(self, filename, module_name=None, hidden=False): 608 609 "Load the module from the given 'filename'." 610 611 if module_name is None: 612 module_name = "__main__" 613 614 module = self.modules.get(module_name) 615 616 if not module: 617 618 # Try to load from cache. 619 620 module = self.load_from_cache(filename, module_name, hidden) 621 if module: 622 return module 623 624 # If no cache entry exists, load from file. 625 626 module = inspector.InspectedModule(module_name, self) 627 self.add_module(module_name, module) 628 self.update_cache_validity(module) 629 630 # Initiate loading if not already in progress. 631 632 if not module.loaded and module not in self.loading: 633 self._load(module, module_name, hidden, lambda m: m.parse, filename) 634 635 return module 636 637 def update_cache_validity(self, module): 638 639 "Make 'module' valid in the cache, but invalidate accessing modules." 640 641 self.invalidated.update(module.accessing_modules) 642 if module.name in self.invalidated: 643 self.invalidated.remove(module.name) 644 645 def source_is_new(self, filename, module_name): 646 647 "Return whether 'filename' is newer than the cached 'module_name'." 648 649 if self.cache: 650 cache_filename = join(self.cache, module_name) 651 return not exists(cache_filename) or \ 652 getmtime(filename) > getmtime(cache_filename) or \ 653 module_name in self.invalidated 654 else: 655 return True 656 657 def load_from_cache(self, filename, module_name, hidden=False): 658 659 "Return a module residing in the cache." 660 661 module = self.modules.get(module_name) 662 663 if not self.source_is_new(filename, module_name): 664 665 if not module: 666 module = inspector.CachedModule(module_name, self) 667 self.add_module(module_name, module) 668 669 if not module.loaded and module not in self.loading: 670 filename = join(self.cache, module_name) 671 self._load(module, module_name, hidden, lambda m: m.from_cache, filename) 672 673 return module 674 675 def _load(self, module, module_name, hidden, fn, filename): 676 677 """ 678 Load 'module' for the given 'module_name', with the module being hidden 679 if 'hidden' is a true value, and with 'fn' performing an invocation on 680 the module with the given 'filename'. 681 """ 682 683 # Indicate that the module is hidden if requested. 684 685 if hidden: 686 self.hidden[module_name] = module 687 688 # Indicate that loading is in progress and load the module. 689 690 self.loading.add(module) 691 if self.verbose: 692 print >>sys.stderr, "Loading", filename 693 fn(module)(filename) 694 if self.verbose: 695 print >>sys.stderr, "Loaded", filename 696 self.loading.remove(module) 697 698 self.modules_ordered.append(module) 699 700 def add_module(self, module_name, module): 701 702 """ 703 Return the module with the given 'module_name', adding a new module 704 object if one does not already exist. 705 """ 706 707 self.modules[module_name] = module 708 self.objects[module_name] = Reference("<module>", module_name) 709 710 # vim: tabstop=4 expandtab shiftwidth=4