javaclass

Annotated javaclass/classhook.py

166:fed5a5ceb0e6
2005-02-13 Paul Boddie Introduced initial measures for handling circular imports/references.
paul@137 1
#!/usr/bin/env python
paul@137 2
paul@137 3
import ihooks # for the import machinery
paul@137 4
import os, glob # for getting suitably-named files
paul@137 5
from imp import PY_SOURCE, PKG_DIRECTORY, C_BUILTIN # import machinery magic
paul@137 6
import classfile, bytecode # Java class support
paul@137 7
import zipfile # for Java archive inspection
paul@166 8
import sys
paul@137 9
paul@137 10
# NOTE: Arbitrary constants pulled from thin air.
paul@137 11
paul@137 12
JAVA_PACKAGE = 20041113
paul@137 13
JAVA_CLASS = 20041114
paul@137 14
JAVA_ARCHIVE = 20041115
paul@137 15
paul@137 16
class ClassHooks(ihooks.Hooks):
paul@137 17
paul@137 18
    "A filesystem hooks class providing information about supported files."
paul@137 19
paul@137 20
    def get_suffixes(self):
paul@137 21
paul@137 22
        "Return the recognised suffixes."
paul@137 23
paul@137 24
        return [("", "", JAVA_PACKAGE), (os.extsep + "jar", "r", JAVA_ARCHIVE)] + ihooks.Hooks.get_suffixes(self)
paul@137 25
paul@137 26
    def path_isdir(self, x, archive=None):
paul@137 27
paul@137 28
        "Return whether 'x' is a directory in the given 'archive'."
paul@137 29
paul@137 30
        if archive is None:
paul@137 31
            return ihooks.Hooks.path_isdir(self, x)
paul@137 32
paul@137 33
        return self._get_dirname(x) in archive.namelist()
paul@137 34
paul@137 35
    def _get_dirname(self, x):
paul@137 36
paul@137 37
        """
paul@137 38
        Return the directory name for 'x'.
paul@137 39
        In zip files, the presence of "/" seems to indicate a directory.
paul@137 40
        """
paul@137 41
paul@137 42
        if x.endswith("/"):
paul@137 43
            return x
paul@137 44
        else:
paul@137 45
            return x + "/"
paul@137 46
paul@137 47
    def listdir(self, x, archive=None):
paul@137 48
paul@137 49
        "Return the contents of the directory 'x' in the given 'archive'."
paul@137 50
paul@137 51
        if archive is None:
paul@137 52
            return ihooks.Hooks.listdir(self, x)
paul@137 53
paul@137 54
        x = self._get_dirname(x)
paul@137 55
        l = []
paul@137 56
        for path in archive.namelist():
paul@137 57
paul@137 58
            # Find out if the path is within the given directory.
paul@137 59
paul@137 60
            if path != x and path.startswith(x):
paul@137 61
paul@137 62
                # Get the path below the given directory.
paul@137 63
paul@137 64
                subpath = path[len(x):]
paul@137 65
paul@137 66
                # Find out whether the path is an object in the current directory.
paul@137 67
paul@137 68
                if subpath.count("/") == 0 or subpath.count("/") == 1 and subpath.endswith("/"):
paul@137 69
                    l.append(subpath)
paul@137 70
paul@137 71
        return l
paul@137 72
paul@137 73
    def matching(self, dir, extension, archive=None):
paul@137 74
paul@137 75
        """
paul@137 76
        Return the matching files in the given directory 'dir' having the given
paul@137 77
        'extension' within the given 'archive'. Produce a list containing full
paul@137 78
        paths as opposed to simple filenames.
paul@137 79
        """
paul@137 80
paul@137 81
        if archive is None:
paul@137 82
            return glob.glob(self.path_join(dir, "*" + extension))
paul@137 83
paul@137 84
        dir = self._get_dirname(dir)
paul@137 85
        l = []
paul@137 86
        for path in self.listdir(dir, archive):
paul@137 87
            if path.endswith(extension):
paul@137 88
                l.append(self.path_join(dir, path))
paul@137 89
        return l
paul@137 90
paul@137 91
    def read(self, filename, archive=None):
paul@137 92
paul@137 93
        """
paul@137 94
        Return the contents of the file with the given 'filename' in the given
paul@137 95
        'archive'.
paul@137 96
        """
paul@137 97
paul@137 98
        if archive is None:
paul@137 99
            f = open(filename, "rb")
paul@137 100
            s = f.read()
paul@137 101
            f.close()
paul@137 102
            return s
paul@137 103
        return archive.read(filename)
paul@137 104
paul@137 105
class ClassLoader(ihooks.ModuleLoader):
paul@137 106
paul@137 107
    "A class providing support for searching directories for supported files."
paul@137 108
paul@137 109
    def find_module(self, name, path=None):
paul@137 110
paul@137 111
        """
paul@137 112
        Find the module with the given 'name', using the given 'path' to locate
paul@137 113
        it. Note that ModuleLoader.find_module is almost sufficient, but does
paul@137 114
        not provide enough support for "package unions" where the root of a
paul@137 115
        package hierarchy may appear in several places.
paul@137 116
paul@137 117
        Return a list of locations (each being the "stuff" data structure used
paul@137 118
        by load_module); this replaces the single "stuff" value or None returned
paul@137 119
        by ModuleLoader.find_module.
paul@137 120
        """
paul@137 121
paul@137 122
        if path is None:
paul@137 123
            path = [None] + self.default_path()
paul@137 124
paul@137 125
        found_locations = []
paul@137 126
paul@137 127
        for dir in path:
paul@137 128
            stuff = self.find_module_in_dir(name, dir)
paul@137 129
            if stuff:
paul@137 130
                found_locations.append(stuff)
paul@137 131
paul@137 132
        return found_locations
paul@137 133
paul@137 134
    def find_module_in_dir(self, name, dir, allow_packages=1):
paul@137 135
paul@137 136
        """
paul@137 137
        Find the module with the given 'name' in the given directory 'dir'.
paul@137 138
        Since Java packages/modules are directories containing class files,
paul@137 139
        return the required information tuple only when the path constructed
paul@137 140
        from 'dir' and 'name' refers to a directory containing class files.
paul@137 141
        """
paul@137 142
paul@137 143
        result = ihooks.ModuleLoader.find_module_in_dir(self, name, dir, allow_packages)
paul@137 144
        if result is not None:
paul@137 145
            return result
paul@137 146
paul@137 147
        # An archive may be opened.
paul@137 148
paul@137 149
        archive = None
paul@137 150
paul@137 151
        # Provide a special name for the current directory.
paul@137 152
paul@137 153
        if name == "__this__":
paul@137 154
            if dir == None:
paul@137 155
                return (None, ".", ("", "", JAVA_PACKAGE))
paul@137 156
            else:
paul@137 157
                return None
paul@137 158
paul@137 159
        # Where no directory is given, return failure immediately.
paul@137 160
paul@137 161
        elif dir is None:
paul@137 162
            return None
paul@137 163
paul@137 164
        # Detect archives.
paul@137 165
paul@137 166
        else:
paul@137 167
            archive, archive_path, path = self._get_archive_and_path(dir, name)
paul@137 168
paul@137 169
        #print "Processing name", name, "in", dir, "producing", path, "within archive", archive
paul@137 170
paul@137 171
        if self._find_module_at_path(path, archive):
paul@137 172
            if archive is not None:
paul@137 173
                return (archive, archive_path + ":" + path, (os.extsep + "jar", "r", JAVA_ARCHIVE))
paul@137 174
            else:
paul@137 175
                return (None, path, ("", "", JAVA_PACKAGE))
paul@137 176
        else:
paul@137 177
            return None
paul@137 178
paul@137 179
    def _get_archive_and_path(self, dir, name):
paul@137 180
        parts = dir.split(":")
paul@137 181
        archive_path = parts[0]
paul@137 182
paul@137 183
        # Archives may include an internal path, but will in any case have
paul@137 184
        # a primary part ending in .jar.
paul@137 185
paul@137 186
        if archive_path.endswith(os.extsep + "jar"):
paul@137 187
            archive = zipfile.ZipFile(archive_path, "r")
paul@137 188
            path = self.hooks.path_join(":".join(parts[1:]), name)
paul@137 189
paul@137 190
        # Otherwise, produce a filesystem-based path.
paul@137 191
paul@137 192
        else:
paul@137 193
            archive = None
paul@137 194
            path = self.hooks.path_join(dir, name)
paul@137 195
paul@137 196
        return archive, archive_path, path
paul@137 197
paul@137 198
    def _get_path_in_archive(self, path):
paul@137 199
        parts = path.split(":")
paul@137 200
        if len(parts) == 1:
paul@137 201
            return parts[0]
paul@137 202
        else:
paul@137 203
            return ":".join(parts[1:])
paul@137 204
paul@137 205
    def _find_module_at_path(self, path, archive):
paul@137 206
        if self.hooks.path_isdir(path, archive):
paul@137 207
            #print "Looking in", path, "using archive", archive
paul@137 208
paul@137 209
            # Look for classes in the directory.
paul@137 210
paul@137 211
            if len(self.hooks.matching(path, os.extsep + "class", archive)) != 0:
paul@137 212
                return 1
paul@137 213
paul@137 214
            # Otherwise permit importing where directories containing classes exist.
paul@137 215
paul@137 216
            #print "Filenames are", self.hooks.listdir(path, archive)
paul@137 217
            for filename in self.hooks.listdir(path, archive):
paul@137 218
                pathname = self.hooks.path_join(path, filename)
paul@137 219
                result = self._find_module_at_path(pathname, archive)
paul@137 220
                if result is not None:
paul@137 221
                    return result
paul@137 222
paul@137 223
        return 0
paul@137 224
paul@137 225
    def load_module(self, name, stuff):
paul@137 226
paul@137 227
        """
paul@137 228
        Load the module with the given 'name', with a list of 'stuff' items,
paul@137 229
        each of which describes the location of the module and is a tuple of the
paul@137 230
        form (file, filename, (suffix, mode, data type)).
paul@137 231
paul@137 232
        Return a module object or raise an ImportError if a problem occurred in
paul@137 233
        the import operation.
paul@137 234
paul@137 235
        Note that the 'stuff' parameter is a list and not a single item as in
paul@137 236
        ModuleLoader.load_module. This should still work, however, since the
paul@137 237
        find_module method produces such a list.
paul@137 238
        """
paul@137 239
paul@166 240
        loaded_module_names = []
paul@166 241
        loaded_classes = {}
paul@166 242
        main_module = self._load_module(name, stuff, loaded_module_names, loaded_classes)
paul@166 243
paul@166 244
        # Initialise the loaded classes.
paul@166 245
paul@166 246
        for module, classes in loaded_classes.items():
paul@166 247
            self._init_classes(module, classes)
paul@166 248
paul@166 249
        return main_module
paul@166 250
paul@166 251
    def _filter_names(self, module_names, loaded_module_names):
paul@166 252
        for module_name in loaded_module_names:
paul@166 253
            try:
paul@166 254
                i = module_names.index(module_name)
paul@166 255
                del module_names[i]
paul@166 256
            except ValueError:
paul@166 257
                pass
paul@166 258
paul@166 259
    def _load_module(self, name, stuff, loaded_module_names, loaded_classes):
paul@166 260
        #print "_load_module", name, loaded_module_names
paul@166 261
        loaded_module_names.append(name)
paul@166 262
paul@166 263
        # Detect non-Java modules.
paul@166 264
paul@166 265
        for stuff_item in stuff:
paul@166 266
            archive, filename, info = stuff_item
paul@166 267
            suffix, mode, datatype = info
paul@166 268
            if datatype not in (JAVA_PACKAGE, JAVA_ARCHIVE):
paul@166 269
                return ihooks.ModuleLoader.load_module(self, name, stuff_item)
paul@166 270
paul@137 271
        # Set up the module.
paul@137 272
        # A union of all locations is placed in the module's path.
paul@137 273
paul@166 274
        external_names = []
paul@137 275
        module = self.hooks.add_module(name)
paul@137 276
        module.__path__ = [item_filename for (item_archive, item_filename, item_info) in stuff]
paul@137 277
paul@166 278
        # Prepare a dictionary of globals.
paul@166 279
paul@166 280
        global_names = module.__dict__
paul@166 281
        global_names["__builtins__"] = __builtins__
paul@166 282
paul@137 283
        # Just go into each package and find the class files.
paul@137 284
paul@166 285
        classes = {}
paul@137 286
        for stuff_item in stuff:
paul@137 287
paul@137 288
            # Extract the details, delegating loading responsibility to the
paul@137 289
            # default loader where appropriate.
paul@137 290
            # NOTE: Should we not be using some saved loader remembered upon
paul@137 291
            # NOTE: installation?
paul@137 292
paul@137 293
            archive, filename, info = stuff_item
paul@137 294
            suffix, mode, datatype = info
paul@137 295
            #print "Loading", archive, filename, info
paul@137 296
paul@137 297
            # Get the real filename.
paul@137 298
paul@137 299
            filename = self._get_path_in_archive(filename)
paul@137 300
            #print "Real filename", filename
paul@137 301
paul@137 302
            # Load the class files.
paul@137 303
paul@137 304
            for class_filename in self.hooks.matching(filename, os.extsep + "class", archive):
paul@137 305
                #print "Loading class", class_filename
paul@137 306
                s = self.hooks.read(class_filename, archive)
paul@137 307
                class_file = classfile.ClassFile(s)
paul@166 308
                translator = bytecode.ClassTranslator(class_file)
paul@166 309
                classes[str(class_file.this_class.get_name())] = translator
paul@166 310
                external_names += translator.process(global_names)
paul@137 311
paul@166 312
        # Record the classes found under the current module.
paul@137 313
paul@166 314
        loaded_classes[module] = classes
paul@137 315
paul@166 316
        # Return modules used by external names.
paul@137 317
paul@166 318
        external_module_names = self._get_external_module_names(external_names)
paul@137 319
paul@166 320
        # Repeatedly load classes from referenced modules.
paul@137 321
paul@166 322
        self._filter_names(external_module_names, loaded_module_names)
paul@166 323
        for module_name in external_module_names:
paul@166 324
            if module_name not in loaded_module_names:
paul@137 325
paul@166 326
                # Emulate the __import__ function, loading the requested module
paul@166 327
                # but returning the top-level module.
paul@137 328
paul@166 329
                self._import(module_name, global_names, loaded_module_names, loaded_classes)
paul@137 330
paul@137 331
        return module
paul@137 332
paul@166 333
    def _import(self, module_name, parent, loaded_module_names, loaded_classes):
paul@166 334
paul@166 335
        # Where no Java-based submodules can be found, look for
paul@166 336
        # Python modules instead.
paul@166 337
paul@166 338
        new_stuff = self.find_module(module_name)
paul@166 339
        #print "_", new_stuff
paul@166 340
        if not new_stuff:
paul@166 341
            new_module = __import__(module_name, parent)
paul@166 342
            #print "P", new_module
paul@166 343
            parent[module_name.split(".")[0]] = new_module
paul@166 344
            return new_module
paul@166 345
paul@166 346
        module_name_parts = module_name.split(".")
paul@166 347
        path = []
paul@166 348
        for module_name_part in module_name_parts:
paul@166 349
            path.append(module_name_part)
paul@166 350
            path_str = ".".join(path)
paul@166 351
            if self.modules_dict().has_key(path_str):
paul@166 352
paul@166 353
                # Add submodules to existing modules.
paul@166 354
paul@166 355
                new_module = self.modules_dict()[path_str]
paul@166 356
                parent = new_module.__dict__
paul@166 357
                #print "-", path_str
paul@166 358
paul@166 359
            else:
paul@166 360
paul@166 361
                # Find submodules.
paul@166 362
paul@166 363
                new_stuff = self.find_module(path_str)
paul@166 364
                new_module = self._load_module(path_str, new_stuff, loaded_module_names, loaded_classes)
paul@166 365
                #print "J", new_module
paul@166 366
                #print "+", path_str, new_module
paul@166 367
                parent[module_name_part] = new_module
paul@166 368
                parent = new_module.__dict__
paul@166 369
paul@166 370
        #print "->", new_module.__dict__.keys()
paul@166 371
        return new_module
paul@166 372
paul@166 373
    def _get_external_module_names(self, names):
paul@166 374
        groups = self._get_names_grouped_by_module(names)
paul@166 375
        if groups.has_key(""):
paul@166 376
            del groups[""]
paul@166 377
        return groups.keys()
paul@166 378
paul@166 379
    def _get_names_grouped_by_module(self, names):
paul@166 380
        groups = {}
paul@166 381
        for name in names:
paul@166 382
            module_name, class_name = self._get_module_and_class_names(name)
paul@166 383
            if not groups.has_key(module_name):
paul@166 384
                groups[module_name] = []
paul@166 385
            groups[module_name].append(class_name)
paul@166 386
        return groups
paul@166 387
paul@166 388
    def _get_module_and_class_names(self, full_name):
paul@166 389
        full_name_parts = full_name.split(".")
paul@166 390
        class_name = full_name_parts[-1]
paul@166 391
        module_name = ".".join(full_name_parts[:-1])
paul@166 392
        return module_name, class_name
paul@166 393
paul@166 394
    def _init_classes(self, module, classes):
paul@166 395
        global_names = module.__dict__
paul@166 396
paul@166 397
        # First, create the classes.
paul@166 398
paul@166 399
        real_classes = []
paul@166 400
        for name, translator in classes.items():
paul@166 401
            real_classes.append(translator.get_class(global_names))
paul@166 402
paul@166 403
        # Finally, call __clinit__ methods for all relevant classes.
paul@166 404
paul@166 405
        for cls in real_classes:
paul@166 406
            if hasattr(cls, "__clinit__"):
paul@166 407
                eval(cls.__clinit__.func_code, global_names)
paul@166 408
paul@137 409
ihooks.ModuleImporter(loader=ClassLoader(hooks=ClassHooks())).install()
paul@137 410
paul@137 411
# vim: tabstop=4 expandtab shiftwidth=4