1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 self.to_import = set() 54 55 self.modules = {} 56 self.accessing_modules = {} 57 self.invalidated = set() 58 59 self.objects = {} 60 self.classes = {} 61 self.function_parameters = {} 62 self.function_defaults = {} 63 self.function_targets = {} 64 self.function_arguments = {} 65 66 # Derived information. 67 68 self.subclasses = {} 69 70 # Attributes of different object types. 71 72 self.all_class_attrs = {} 73 self.all_instance_attrs = {} 74 self.all_instance_attr_constants = {} 75 self.all_combined_attrs = {} 76 self.all_module_attrs = {} 77 self.all_shadowed_attrs = {} 78 79 # References to external names and aliases within program units. 80 81 self.all_name_references = {} 82 self.all_initialised_names = {} 83 self.all_aliased_names = {} 84 85 # General attribute accesses. 86 87 self.all_attr_accesses = {} 88 self.all_const_accesses = {} 89 self.all_attr_access_modifiers = {} 90 91 # Constant literals and values. 92 93 self.all_constants = {} 94 self.all_constant_values = {} 95 96 self.make_cache() 97 98 def make_cache(self): 99 if self.cache and not exists(self.cache): 100 makedirs(self.cache) 101 102 def check_cache(self, details): 103 104 """ 105 Check whether the cache applies for the given 'details', invalidating it 106 if it does not. 107 """ 108 109 recorded_details = self.get_cache_details() 110 111 if recorded_details != details: 112 self.remove_cache() 113 114 writefile(self.get_cache_details_filename(), details) 115 116 def get_cache_details_filename(self): 117 118 "Return the filename for the cache details." 119 120 return join(self.cache, "$details") 121 122 def get_cache_details(self): 123 124 "Return details of the cache." 125 126 details_filename = self.get_cache_details_filename() 127 128 if not exists(details_filename): 129 return None 130 else: 131 return readfile(details_filename) 132 133 def remove_cache(self): 134 135 "Remove the contents of the cache." 136 137 for filename in listdir(self.cache): 138 remove(join(self.cache, filename)) 139 140 def to_cache(self): 141 142 "Write modules to the cache." 143 144 if self.cache: 145 for module_name, module in self.modules.items(): 146 module.to_cache(join(self.cache, module_name)) 147 148 # Object retrieval and storage. 149 150 def get_object(self, name): 151 152 """ 153 Return a reference for the given 'name' or None if no such object 154 exists. 155 """ 156 157 return self.objects.get(name) 158 159 def set_object(self, name, value=None): 160 161 "Set the object with the given 'name' and the given 'value'." 162 163 if isinstance(value, Reference): 164 ref = value.alias(name) 165 else: 166 ref = Reference(value, name) 167 168 self.objects[name] = ref 169 170 # Indirect object retrieval. 171 172 def get_attributes(self, ref, attrname): 173 174 """ 175 Return attributes provided by 'ref' for 'attrname'. Class attributes 176 may be provided by instances. 177 """ 178 179 kind = ref.get_kind() 180 if kind == "<class>": 181 ref = self.get_class_attribute(ref.get_origin(), attrname) 182 return ref and set([ref]) or set() 183 elif kind == "<instance>": 184 return self.get_combined_attributes(ref.get_origin(), attrname) 185 elif kind == "<module>": 186 ref = self.get_module_attribute(ref.get_origin(), attrname) 187 return ref and set([ref]) or set() 188 else: 189 return set() 190 191 def get_class_attribute(self, object_type, attrname): 192 193 "Return from 'object_type' the details of class attribute 'attrname'." 194 195 attr = self.all_class_attrs[object_type].get(attrname) 196 return attr and self.get_object(attr) 197 198 def get_instance_attributes(self, object_type, attrname): 199 200 """ 201 Return from 'object_type' the details of instance attribute 'attrname'. 202 """ 203 204 consts = self.all_instance_attr_constants.get(object_type) 205 attrs = set() 206 for attr in self.all_instance_attrs[object_type].get(attrname, []): 207 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 208 return attrs 209 210 def get_combined_attributes(self, object_type, attrname): 211 212 """ 213 Return from 'object_type' the details of class or instance attribute 214 'attrname'. 215 """ 216 217 ref = self.get_class_attribute(object_type, attrname) 218 refs = ref and set([ref]) or set() 219 refs.update(self.get_instance_attributes(object_type, attrname)) 220 return refs 221 222 def get_module_attribute(self, object_type, attrname): 223 224 "Return from 'object_type' the details of module attribute 'attrname'." 225 226 if attrname in self.all_module_attrs[object_type]: 227 return self.get_object("%s.%s" % (object_type, attrname)) 228 else: 229 return None 230 231 # Module management. 232 233 def queue_module(self, name, module): 234 235 """ 236 Queue the module with the given 'name' for import from the given 237 'module'. 238 """ 239 240 if not self.modules.has_key(name): 241 self.to_import.add(name) 242 243 init_item(self.accessing_modules, name, set) 244 self.accessing_modules[name].add(module.name) 245 246 def get_modules(self): 247 248 "Return all modules known to the importer." 249 250 return self.modules.values() 251 252 def get_module(self, name): 253 254 "Return the module with the given 'name'." 255 256 if not self.modules.has_key(name): 257 return None 258 259 return self.modules[name] 260 261 # Program operations. 262 263 def initialise(self, filename, reset=False): 264 265 """ 266 Initialise a program whose main module is 'filename', resetting the 267 cache if 'reset' is true. Return the main module. 268 """ 269 270 if reset: 271 self.remove_cache() 272 self.check_cache(filename) 273 274 # Load the program itself. 275 276 m = self.load_from_file(filename) 277 278 # Load any queued modules. 279 280 while self.to_import: 281 for name in list(self.to_import): # avoid mutation issue 282 self.load(name) 283 284 # Resolve dependencies between modules. 285 286 self.resolve() 287 288 # Resolve dependencies within the program. 289 290 for module in self.modules.values(): 291 module.complete() 292 293 return m 294 295 def finalise(self): 296 297 "Finalise the inspected program." 298 299 self.finalise_classes() 300 self.to_cache() 301 self.set_class_types() 302 self.define_instantiators() 303 self.collect_constants() 304 305 # Supporting operations. 306 307 def resolve(self): 308 309 "Resolve dependencies between modules." 310 311 resolved = {} 312 313 for name, ref in self.objects.items(): 314 if ref.has_kind("<depends>"): 315 found = self.find_dependency(ref) 316 if found: 317 resolved[name] = found 318 else: 319 print >>sys.stderr, "Name %s references an unknown object: %s" % (name, ref.get_origin()) 320 321 for name, ref in resolved.items(): 322 self.objects[name] = ref 323 324 def find_dependency(self, ref): 325 326 "Find the ultimate dependency for 'ref'." 327 328 found = set() 329 while ref and ref.has_kind("<depends>") and not ref in found: 330 found.add(ref) 331 ref = self.objects.get(ref.get_origin()) 332 return ref 333 334 def finalise_classes(self): 335 336 "Finalise the class relationships and attributes." 337 338 self.derive_inherited_attrs() 339 self.derive_subclasses() 340 self.derive_shadowed_attrs() 341 342 def derive_inherited_attrs(self): 343 344 "Derive inherited attributes for classes throughout the program." 345 346 for name in self.classes.keys(): 347 self.propagate_attrs_for_class(name) 348 349 def propagate_attrs_for_class(self, name, visited=None): 350 351 "Propagate inherited attributes for class 'name'." 352 353 # Visit classes only once. 354 355 if self.all_combined_attrs.has_key(name): 356 return 357 358 visited = visited or [] 359 360 if name in visited: 361 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 362 363 visited.append(name) 364 365 class_attrs = {} 366 instance_attrs = {} 367 368 # Aggregate the attributes from base classes, recording the origins of 369 # applicable attributes. 370 371 for base in self.classes[name][::-1]: 372 373 # Get the identity of the class from the reference. 374 375 base = base.get_origin() 376 377 # Define the base class completely before continuing with this 378 # class. 379 380 self.propagate_attrs_for_class(base, visited) 381 class_attrs.update(self.all_class_attrs[base]) 382 383 # Instance attribute origins are combined if different. 384 385 for key, values in self.all_instance_attrs[base].items(): 386 init_item(instance_attrs, key, set) 387 instance_attrs[key].update(values) 388 389 # Class attributes override those defined earlier in the hierarchy. 390 391 class_attrs.update(self.all_class_attrs.get(name, {})) 392 393 # Instance attributes are merely added if not already defined. 394 395 for key in self.all_instance_attrs.get(name, []): 396 if not instance_attrs.has_key(key): 397 instance_attrs[key] = set(["%s.%s" % (name, key)]) 398 399 self.all_class_attrs[name] = class_attrs 400 self.all_instance_attrs[name] = instance_attrs 401 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 402 403 def derive_subclasses(self): 404 405 "Derive subclass details for classes." 406 407 for name, bases in self.classes.items(): 408 for base in bases: 409 410 # Get the identity of the class from the reference. 411 412 base = base.get_origin() 413 self.subclasses[base].add(name) 414 415 def derive_shadowed_attrs(self): 416 417 "Derive shadowed attributes for classes." 418 419 for name, attrs in self.all_instance_attrs.items(): 420 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 421 if attrs: 422 self.all_shadowed_attrs[name] = attrs 423 424 def set_class_types(self): 425 426 "Set the type of each class." 427 428 ref = self.get_object("__builtins__.type") 429 for attrs in self.all_class_attrs.values(): 430 attrs["__class__"] = ref.get_origin() 431 432 def define_instantiators(self): 433 434 """ 435 Consolidate parameter and default details, incorporating initialiser 436 details to define instantiator signatures. 437 """ 438 439 for cls, attrs in self.all_class_attrs.items(): 440 initialiser = attrs["__init__"] 441 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 442 self.function_defaults[cls] = self.function_defaults[initialiser] 443 444 def collect_constants(self): 445 446 "Get constants from all active modules." 447 448 for module in self.modules.values(): 449 self.all_constants.update(module.constants) 450 451 # Import methods. 452 453 def find_in_path(self, name): 454 455 """ 456 Find the given module 'name' in the search path, returning None where no 457 such module could be found, or a 2-tuple from the 'find' method 458 otherwise. 459 """ 460 461 for d in self.path: 462 m = self.find(d, name) 463 if m: return m 464 return None 465 466 def find(self, d, name): 467 468 """ 469 In the directory 'd', find the given module 'name', where 'name' can 470 either refer to a single file module or to a package. Return None if the 471 'name' cannot be associated with either a file or a package directory, 472 or a 2-tuple from '_find_package' or '_find_module' otherwise. 473 """ 474 475 m = self._find_package(d, name) 476 if m: return m 477 m = self._find_module(d, name) 478 if m: return m 479 return None 480 481 def _find_module(self, d, name): 482 483 """ 484 In the directory 'd', find the given module 'name', returning None where 485 no suitable file exists in the directory, or a 2-tuple consisting of 486 None (indicating that no package directory is involved) and a filename 487 indicating the location of the module. 488 """ 489 490 name_py = name + extsep + "py" 491 filename = self._find_file(d, name_py) 492 if filename: 493 return None, filename 494 return None 495 496 def _find_package(self, d, name): 497 498 """ 499 In the directory 'd', find the given package 'name', returning None 500 where no suitable package directory exists, or a 2-tuple consisting of 501 a directory (indicating the location of the package directory itself) 502 and a filename indicating the location of the __init__.py module which 503 declares the package's top-level contents. 504 """ 505 506 filename = self._find_file(d, name) 507 if filename: 508 init_py = "__init__" + extsep + "py" 509 init_py_filename = self._find_file(filename, init_py) 510 if init_py_filename: 511 return filename, init_py_filename 512 return None 513 514 def _find_file(self, d, filename): 515 516 """ 517 Return the filename obtained when searching the directory 'd' for the 518 given 'filename', or None if no actual file exists for the filename. 519 """ 520 521 filename = join(d, filename) 522 if exists(filename): 523 return filename 524 else: 525 return None 526 527 def load(self, name): 528 529 """ 530 Load the module or package with the given 'name'. Return an object 531 referencing the loaded module or package, or None if no such module or 532 package exists. 533 """ 534 535 # Loaded modules are returned immediately. 536 # Modules may be known but not yet loading (having been registered as 537 # submodules), loading, loaded, or completely unknown. 538 539 module = self.get_module(name) 540 541 if module: 542 return self.modules[name] 543 544 # Otherwise, modules are loaded. 545 546 if self.verbose: 547 print >>sys.stderr, "Loading", name 548 549 # Split the name into path components, and try to find the uppermost in 550 # the search path. 551 552 path = name.split(".") 553 path_so_far = [] 554 module = None 555 556 for p in path: 557 558 # Get the module's filesystem details. 559 560 if not path_so_far: 561 m = self.find_in_path(p) 562 elif d: 563 m = self.find(d, p) 564 else: 565 m = None 566 567 path_so_far.append(p) 568 module_name = ".".join(path_so_far) 569 570 if not m: 571 if self.verbose: 572 print >>sys.stderr, "Not found (%s)" % name 573 574 return None # NOTE: Import error. 575 576 # Get the module itself. 577 578 d, filename = m 579 module = self.load_from_file(filename, module_name) 580 581 return module 582 583 def load_from_file(self, filename, module_name=None): 584 585 "Load the module from the given 'filename'." 586 587 if module_name is None: 588 module_name = "__main__" 589 590 module = self.modules.get(module_name) 591 592 if not module: 593 594 # Try to load from cache. 595 596 module = self.load_from_cache(filename, module_name) 597 if module: 598 return module 599 600 # If no cache entry exists, load from file. 601 602 module = inspector.InspectedModule(module_name, self) 603 self.add_module(module_name, module) 604 self.update_cache_validity(module) 605 606 self._load(module, module_name, lambda m: m.parse, filename) 607 608 return module 609 610 def update_cache_validity(self, module): 611 612 "Make 'module' valid in the cache, but invalidate accessing modules." 613 614 accessing = self.accessing_modules.get(module.name) 615 if accessing: 616 self.invalidated.update(accessing) 617 if module.name in self.invalidated: 618 self.invalidated.remove(module.name) 619 620 def source_is_new(self, filename, module_name): 621 622 "Return whether 'filename' is newer than the cached 'module_name'." 623 624 if self.cache: 625 cache_filename = join(self.cache, module_name) 626 return not exists(cache_filename) or \ 627 getmtime(filename) > getmtime(cache_filename) or \ 628 module_name in self.invalidated 629 else: 630 return True 631 632 def load_from_cache(self, filename, module_name): 633 634 "Return a module residing in the cache." 635 636 module = self.modules.get(module_name) 637 638 if not module and not self.source_is_new(filename, module_name): 639 module = CachedModule(module_name, self) 640 self.add_module(module_name, module) 641 642 filename = join(self.cache, module_name) 643 self._load(module, module_name, lambda m: m.from_cache, filename) 644 645 return module 646 647 def _load(self, module, module_name, fn, filename): 648 649 """ 650 Load 'module' for the given 'module_name', and with 'fn' performing an 651 invocation on the module with the given 'filename'. 652 """ 653 654 # Load the module. 655 656 if self.verbose: 657 print >>sys.stderr, "Loading", filename 658 fn(module)(filename) 659 if self.verbose: 660 print >>sys.stderr, "Loaded", filename 661 662 def add_module(self, module_name, module): 663 664 """ 665 Return the module with the given 'module_name', adding a new module 666 object if one does not already exist. 667 """ 668 669 self.modules[module_name] = module 670 self.objects[module_name] = Reference("<module>", module_name) 671 if module_name in self.to_import: 672 self.to_import.remove(module_name) 673 674 # vim: tabstop=4 expandtab shiftwidth=4