1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 self.to_import = set() 54 55 self.modules = {} 56 self.accessing_modules = {} 57 self.invalidated = set() 58 59 self.objects = {} 60 self.classes = {} 61 self.function_parameters = {} 62 self.function_defaults = {} 63 self.function_targets = {} 64 self.function_arguments = {} 65 66 # Derived information. 67 68 self.subclasses = {} 69 70 # Attributes of different object types. 71 72 self.all_class_attrs = {} 73 self.all_instance_attrs = {} 74 self.all_instance_attr_constants = {} 75 self.all_combined_attrs = {} 76 self.all_module_attrs = {} 77 self.all_shadowed_attrs = {} 78 79 # References to external names and aliases within program units. 80 81 self.all_name_references = {} 82 self.all_initialised_names = {} 83 self.all_aliased_names = {} 84 85 # General attribute accesses. 86 87 self.all_attr_accesses = {} 88 self.all_const_accesses = {} 89 self.all_attr_access_modifiers = {} 90 91 # Constant literals and values. 92 93 self.all_constants = {} 94 self.all_constant_values = {} 95 96 self.make_cache() 97 98 def make_cache(self): 99 if self.cache and not exists(self.cache): 100 makedirs(self.cache) 101 102 def check_cache(self, details): 103 104 """ 105 Check whether the cache applies for the given 'details', invalidating it 106 if it does not. 107 """ 108 109 recorded_details = self.get_cache_details() 110 111 if recorded_details != details: 112 self.remove_cache() 113 114 writefile(self.get_cache_details_filename(), details) 115 116 def get_cache_details_filename(self): 117 118 "Return the filename for the cache details." 119 120 return join(self.cache, "$details") 121 122 def get_cache_details(self): 123 124 "Return details of the cache." 125 126 details_filename = self.get_cache_details_filename() 127 128 if not exists(details_filename): 129 return None 130 else: 131 return readfile(details_filename) 132 133 def remove_cache(self): 134 135 "Remove the contents of the cache." 136 137 for filename in listdir(self.cache): 138 remove(join(self.cache, filename)) 139 140 def to_cache(self): 141 142 "Write modules to the cache." 143 144 if self.cache: 145 for module_name, module in self.modules.items(): 146 module.to_cache(join(self.cache, module_name)) 147 148 # Object retrieval and storage. 149 150 def get_object(self, name): 151 152 """ 153 Return a reference for the given 'name' or None if no such object 154 exists. 155 """ 156 157 return self.objects.get(name) 158 159 def set_object(self, name, value=None): 160 161 "Set the object with the given 'name' and the given 'value'." 162 163 if isinstance(value, Reference): 164 ref = value.alias(name) 165 else: 166 ref = Reference(value, name) 167 168 self.objects[name] = ref 169 170 # Indirect object retrieval. 171 172 def get_attributes(self, ref, attrname): 173 174 """ 175 Return attributes provided by 'ref' for 'attrname'. Class attributes 176 may be provided by instances. 177 """ 178 179 kind = ref.get_kind() 180 if kind == "<class>": 181 ref = self.get_class_attribute(ref.get_origin(), attrname) 182 return ref and set([ref]) or set() 183 elif kind == "<instance>": 184 return self.get_combined_attributes(ref.get_origin(), attrname) 185 elif kind == "<module>": 186 ref = self.get_module_attribute(ref.get_origin(), attrname) 187 return ref and set([ref]) or set() 188 else: 189 return set() 190 191 def get_class_attribute(self, object_type, attrname): 192 193 "Return from 'object_type' the details of class attribute 'attrname'." 194 195 attr = self.all_class_attrs[object_type].get(attrname) 196 return attr and self.get_object(attr) 197 198 def get_instance_attributes(self, object_type, attrname): 199 200 """ 201 Return from 'object_type' the details of instance attribute 'attrname'. 202 """ 203 204 consts = self.all_instance_attr_constants.get(object_type) 205 attrs = set() 206 for attr in self.all_instance_attrs[object_type].get(attrname, []): 207 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 208 return attrs 209 210 def get_combined_attributes(self, object_type, attrname): 211 212 """ 213 Return from 'object_type' the details of class or instance attribute 214 'attrname'. 215 """ 216 217 ref = self.get_class_attribute(object_type, attrname) 218 refs = ref and set([ref]) or set() 219 refs.update(self.get_instance_attributes(object_type, attrname)) 220 return refs 221 222 def get_module_attribute(self, object_type, attrname): 223 224 "Return from 'object_type' the details of module attribute 'attrname'." 225 226 if attrname in self.all_module_attrs[object_type]: 227 return self.get_object("%s.%s" % (object_type, attrname)) 228 else: 229 return None 230 231 # Module management. 232 233 def queue_module(self, name, module): 234 235 """ 236 Queue the module with the given 'name' for import from the given 237 'module'. 238 """ 239 240 if not self.modules.has_key(name): 241 self.to_import.add(name) 242 243 init_item(self.accessing_modules, name, set) 244 self.accessing_modules[name].add(module.name) 245 246 def get_modules(self): 247 248 "Return all modules known to the importer." 249 250 return self.modules.values() 251 252 def get_module(self, name): 253 254 "Return the module with the given 'name'." 255 256 if not self.modules.has_key(name): 257 return None 258 259 return self.modules[name] 260 261 # Program operations. 262 263 def initialise(self, filename, reset=False): 264 265 """ 266 Initialise a program whose main module is 'filename', resetting the 267 cache if 'reset' is true. Return the main module. 268 """ 269 270 if reset: 271 self.remove_cache() 272 self.check_cache(filename) 273 274 # Load the program itself. 275 276 m = self.load_from_file(filename) 277 278 # Load any queued modules. 279 280 while self.to_import: 281 for name in list(self.to_import): # avoid mutation issue 282 self.load(name) 283 284 # Resolve dependencies between modules. 285 286 self.resolve() 287 288 # Resolve dependencies within the program. 289 290 for module in self.modules.values(): 291 module.complete() 292 293 return m 294 295 def finalise(self): 296 297 "Finalise the inspected program." 298 299 self.finalise_classes() 300 self.to_cache() 301 self.set_class_types() 302 self.define_instantiators() 303 self.collect_constants() 304 305 # Supporting operations. 306 307 def resolve(self): 308 309 "Resolve dependencies between modules." 310 311 resolved = {} 312 313 for name, ref in self.objects.items(): 314 if ref.has_kind("<depends>"): 315 ref = self.find_dependency(ref) 316 if ref: 317 resolved[name] = ref 318 319 for name, ref in resolved.items(): 320 self.objects[name] = ref 321 322 def find_dependency(self, ref): 323 324 "Find the ultimate dependency for 'ref'." 325 326 found = set() 327 while ref and ref.has_kind("<depends>") and not ref in found: 328 found.add(ref) 329 ref = self.objects.get(ref.get_origin()) 330 return ref 331 332 def finalise_classes(self): 333 334 "Finalise the class relationships and attributes." 335 336 self.derive_inherited_attrs() 337 self.derive_subclasses() 338 self.derive_shadowed_attrs() 339 340 def derive_inherited_attrs(self): 341 342 "Derive inherited attributes for classes throughout the program." 343 344 for name in self.classes.keys(): 345 self.propagate_attrs_for_class(name) 346 347 def propagate_attrs_for_class(self, name, visited=None): 348 349 "Propagate inherited attributes for class 'name'." 350 351 # Visit classes only once. 352 353 if self.all_combined_attrs.has_key(name): 354 return 355 356 visited = visited or [] 357 358 if name in visited: 359 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 360 361 visited.append(name) 362 363 class_attrs = {} 364 instance_attrs = {} 365 366 # Aggregate the attributes from base classes, recording the origins of 367 # applicable attributes. 368 369 for base in self.classes[name][::-1]: 370 371 # Get the identity of the class from the reference. 372 373 base = base.get_origin() 374 375 # Define the base class completely before continuing with this 376 # class. 377 378 self.propagate_attrs_for_class(base, visited) 379 class_attrs.update(self.all_class_attrs[base]) 380 381 # Instance attribute origins are combined if different. 382 383 for key, values in self.all_instance_attrs[base].items(): 384 init_item(instance_attrs, key, set) 385 instance_attrs[key].update(values) 386 387 # Class attributes override those defined earlier in the hierarchy. 388 389 class_attrs.update(self.all_class_attrs.get(name, {})) 390 391 # Instance attributes are merely added if not already defined. 392 393 for key in self.all_instance_attrs.get(name, []): 394 if not instance_attrs.has_key(key): 395 instance_attrs[key] = set(["%s.%s" % (name, key)]) 396 397 self.all_class_attrs[name] = class_attrs 398 self.all_instance_attrs[name] = instance_attrs 399 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 400 401 def derive_subclasses(self): 402 403 "Derive subclass details for classes." 404 405 for name, bases in self.classes.items(): 406 for base in bases: 407 408 # Get the identity of the class from the reference. 409 410 base = base.get_origin() 411 self.subclasses[base].add(name) 412 413 def derive_shadowed_attrs(self): 414 415 "Derive shadowed attributes for classes." 416 417 for name, attrs in self.all_instance_attrs.items(): 418 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 419 if attrs: 420 self.all_shadowed_attrs[name] = attrs 421 422 def set_class_types(self): 423 424 "Set the type of each class." 425 426 ref = self.get_object("__builtins__.type") 427 for attrs in self.all_class_attrs.values(): 428 attrs["__class__"] = ref.get_origin() 429 430 def define_instantiators(self): 431 432 """ 433 Consolidate parameter and default details, incorporating initialiser 434 details to define instantiator signatures. 435 """ 436 437 for cls, attrs in self.all_class_attrs.items(): 438 initialiser = attrs["__init__"] 439 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 440 self.function_defaults[cls] = self.function_defaults[initialiser] 441 442 def collect_constants(self): 443 444 "Get constants from all active modules." 445 446 for module in self.modules.values(): 447 self.all_constants.update(module.constants) 448 449 # Import methods. 450 451 def find_in_path(self, name): 452 453 """ 454 Find the given module 'name' in the search path, returning None where no 455 such module could be found, or a 2-tuple from the 'find' method 456 otherwise. 457 """ 458 459 for d in self.path: 460 m = self.find(d, name) 461 if m: return m 462 return None 463 464 def find(self, d, name): 465 466 """ 467 In the directory 'd', find the given module 'name', where 'name' can 468 either refer to a single file module or to a package. Return None if the 469 'name' cannot be associated with either a file or a package directory, 470 or a 2-tuple from '_find_package' or '_find_module' otherwise. 471 """ 472 473 m = self._find_package(d, name) 474 if m: return m 475 m = self._find_module(d, name) 476 if m: return m 477 return None 478 479 def _find_module(self, d, name): 480 481 """ 482 In the directory 'd', find the given module 'name', returning None where 483 no suitable file exists in the directory, or a 2-tuple consisting of 484 None (indicating that no package directory is involved) and a filename 485 indicating the location of the module. 486 """ 487 488 name_py = name + extsep + "py" 489 filename = self._find_file(d, name_py) 490 if filename: 491 return None, filename 492 return None 493 494 def _find_package(self, d, name): 495 496 """ 497 In the directory 'd', find the given package 'name', returning None 498 where no suitable package directory exists, or a 2-tuple consisting of 499 a directory (indicating the location of the package directory itself) 500 and a filename indicating the location of the __init__.py module which 501 declares the package's top-level contents. 502 """ 503 504 filename = self._find_file(d, name) 505 if filename: 506 init_py = "__init__" + extsep + "py" 507 init_py_filename = self._find_file(filename, init_py) 508 if init_py_filename: 509 return filename, init_py_filename 510 return None 511 512 def _find_file(self, d, filename): 513 514 """ 515 Return the filename obtained when searching the directory 'd' for the 516 given 'filename', or None if no actual file exists for the filename. 517 """ 518 519 filename = join(d, filename) 520 if exists(filename): 521 return filename 522 else: 523 return None 524 525 def load(self, name): 526 527 """ 528 Load the module or package with the given 'name'. Return an object 529 referencing the loaded module or package, or None if no such module or 530 package exists. 531 """ 532 533 # Loaded modules are returned immediately. 534 # Modules may be known but not yet loading (having been registered as 535 # submodules), loading, loaded, or completely unknown. 536 537 module = self.get_module(name) 538 539 if module: 540 return self.modules[name] 541 542 # Otherwise, modules are loaded. 543 544 if self.verbose: 545 print >>sys.stderr, "Loading", name 546 547 # Split the name into path components, and try to find the uppermost in 548 # the search path. 549 550 path = name.split(".") 551 path_so_far = [] 552 module = None 553 554 for p in path: 555 556 # Get the module's filesystem details. 557 558 if not path_so_far: 559 m = self.find_in_path(p) 560 elif d: 561 m = self.find(d, p) 562 else: 563 m = None 564 565 path_so_far.append(p) 566 module_name = ".".join(path_so_far) 567 568 if not m: 569 if self.verbose: 570 print >>sys.stderr, "Not found (%s)" % name 571 572 return None # NOTE: Import error. 573 574 # Get the module itself. 575 576 d, filename = m 577 module = self.load_from_file(filename, module_name) 578 579 return module 580 581 def load_from_file(self, filename, module_name=None): 582 583 "Load the module from the given 'filename'." 584 585 if module_name is None: 586 module_name = "__main__" 587 588 module = self.modules.get(module_name) 589 590 if not module: 591 592 # Try to load from cache. 593 594 module = self.load_from_cache(filename, module_name) 595 if module: 596 return module 597 598 # If no cache entry exists, load from file. 599 600 module = inspector.InspectedModule(module_name, self) 601 self.add_module(module_name, module) 602 self.update_cache_validity(module) 603 604 self._load(module, module_name, lambda m: m.parse, filename) 605 606 return module 607 608 def update_cache_validity(self, module): 609 610 "Make 'module' valid in the cache, but invalidate accessing modules." 611 612 accessing = self.accessing_modules.get(module.name) 613 if accessing: 614 self.invalidated.update(accessing) 615 if module.name in self.invalidated: 616 self.invalidated.remove(module.name) 617 618 def source_is_new(self, filename, module_name): 619 620 "Return whether 'filename' is newer than the cached 'module_name'." 621 622 if self.cache: 623 cache_filename = join(self.cache, module_name) 624 return not exists(cache_filename) or \ 625 getmtime(filename) > getmtime(cache_filename) or \ 626 module_name in self.invalidated 627 else: 628 return True 629 630 def load_from_cache(self, filename, module_name): 631 632 "Return a module residing in the cache." 633 634 module = self.modules.get(module_name) 635 636 if not module and not self.source_is_new(filename, module_name): 637 module = CachedModule(module_name, self) 638 self.add_module(module_name, module) 639 640 filename = join(self.cache, module_name) 641 self._load(module, module_name, lambda m: m.from_cache, filename) 642 643 return module 644 645 def _load(self, module, module_name, fn, filename): 646 647 """ 648 Load 'module' for the given 'module_name', and with 'fn' performing an 649 invocation on the module with the given 'filename'. 650 """ 651 652 # Load the module. 653 654 if self.verbose: 655 print >>sys.stderr, "Loading", filename 656 fn(module)(filename) 657 if self.verbose: 658 print >>sys.stderr, "Loaded", filename 659 660 def add_module(self, module_name, module): 661 662 """ 663 Return the module with the given 'module_name', adding a new module 664 object if one does not already exist. 665 """ 666 667 self.modules[module_name] = module 668 self.objects[module_name] = Reference("<module>", module_name) 669 if module_name in self.to_import: 670 self.to_import.remove(module_name) 671 672 # vim: tabstop=4 expandtab shiftwidth=4