javaclass

Changeset

0:7c67f86c4c10
2004-10-23 Paul Boddie raw files shortlog changelog graph Initial revision
classfile.py (file)
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/classfile.py	Sat Oct 23 21:31:59 2004 +0200
     1.3 @@ -0,0 +1,379 @@
     1.4 +#!/usr/bin/env python
     1.5 +
     1.6 +"""
     1.7 +Java class file decoder. Specification found at the following URL:
     1.8 +http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html
     1.9 +"""
    1.10 +
    1.11 +import struct
    1.12 +
    1.13 +# Constant information.
    1.14 +
    1.15 +class ClassInfo:
    1.16 +    def init(self, data):
    1.17 +        self.name_index = struct.unpack(">H", data[0:2])[0]
    1.18 +        return data[2:]
    1.19 +
    1.20 +class RefInfo:
    1.21 +    def init(self, data):
    1.22 +        self.class_index = struct.unpack(">H", data[0:2])[0]
    1.23 +        self.name_and_type_index = struct.unpack(">H", data[2:4])[0]
    1.24 +        return data[4:]
    1.25 +
    1.26 +class FieldRefInfo(RefInfo):
    1.27 +    pass
    1.28 +
    1.29 +class MethodRefInfo(RefInfo):
    1.30 +    pass
    1.31 +
    1.32 +class InterfaceMethodRefInfo(RefInfo):
    1.33 +    pass
    1.34 +
    1.35 +class NameAndTypeInfo:
    1.36 +    def init(self, data):
    1.37 +        self.name_index = struct.unpack(">H", data[0:2])[0]
    1.38 +        self.descriptor_index = struct.unpack(">H", data[2:4])[0]
    1.39 +        return data[4:]
    1.40 +
    1.41 +class Utf8Info:
    1.42 +    def init(self, data):
    1.43 +        self.length = struct.unpack(">H", data[0:2])[0]
    1.44 +        self.bytes = data[2:2+self.length]
    1.45 +        return data[2+self.length:]
    1.46 +
    1.47 +    def __str__(self):
    1.48 +        return self.bytes
    1.49 +
    1.50 +    def __unicode__(self):
    1.51 +        return unicode(self.bytes, "utf-8")
    1.52 +
    1.53 +class StringInfo:
    1.54 +    def init(self, data):
    1.55 +        self.string_index = struct.unpack(">H", data[0:2])[0]
    1.56 +        return data[2:]
    1.57 +
    1.58 +class SmallNumInfo:
    1.59 +    def init(self, data):
    1.60 +        self.bytes = struct.unpack(">L", data[0:4])[0]
    1.61 +        return data[4:]
    1.62 +
    1.63 +class IntegerInfo(SmallNumInfo):
    1.64 +    pass
    1.65 +
    1.66 +class FloatInfo(SmallNumInfo):
    1.67 +    pass
    1.68 +
    1.69 +class LargeNumInfo:
    1.70 +    def init(self, data):
    1.71 +        self.high_bytes = struct.unpack(">L", data[0:4])[0]
    1.72 +        self.low_bytes = struct.unpack(">L", data[4:8])[0]
    1.73 +        return data[8:]
    1.74 +
    1.75 +class LongInfo(LargeNumInfo):
    1.76 +    pass
    1.77 +
    1.78 +class DoubleInfo(LargeNumInfo):
    1.79 +    pass
    1.80 +
    1.81 +# Other information.
    1.82 +
    1.83 +class ItemInfo:
    1.84 +    def init(self, data, class_file):
    1.85 +        self.class_file = class_file
    1.86 +        self.access_flags = struct.unpack(">H", data[0:2])[0]
    1.87 +        self.name_index = struct.unpack(">H", data[2:4])[0]
    1.88 +        self.descriptor_index = struct.unpack(">H", data[4:6])[0]
    1.89 +        self.attributes, data = self.class_file._get_attributes(data[6:])
    1.90 +        return data
    1.91 +
    1.92 +    # Symbol parsing.
    1.93 +
    1.94 +    def _get_method_descriptor(self, s):
    1.95 +        assert s[0] == "("
    1.96 +        params = []
    1.97 +        s = s[1:]
    1.98 +        while s[0] != ")":
    1.99 +            parameter_descriptor, s = self._get_parameter_descriptor(s)
   1.100 +            params.append(parameter_descriptor)
   1.101 +        if s[1] != "V":
   1.102 +            return_type, s = self._get_field_type(s[1:])
   1.103 +        else:
   1.104 +            return_type, s = None, s[1:]
   1.105 +        return params, return_type
   1.106 +
   1.107 +    def _get_parameter_descriptor(self, s):
   1.108 +        return self._get_field_type(s)
   1.109 +
   1.110 +    def _get_field_descriptor(self, s):
   1.111 +        return self._get_field_type(s)
   1.112 +
   1.113 +    def _get_component_type(self, s):
   1.114 +        return self._get_field_type(s)
   1.115 +
   1.116 +    def _get_field_type(self, s):
   1.117 +        base_type, s = self._get_base_type(s)
   1.118 +        object_type = None
   1.119 +        array_type = None
   1.120 +        if base_type == "L":
   1.121 +            object_type, s = self._get_object_type(s)
   1.122 +        elif base_type == "[":
   1.123 +            array_type, s = self._get_array_type(s)
   1.124 +        return (base_type, object_type, array_type), s
   1.125 +
   1.126 +    def _get_base_type(self, s):
   1.127 +        if len(s) > 0:
   1.128 +            return s[0], s[1:]
   1.129 +        else:
   1.130 +            return None, s
   1.131 +
   1.132 +    def _get_object_type(self, s):
   1.133 +        if len(s) > 0:
   1.134 +            s_end = s.find(";")
   1.135 +            assert s_end != -1
   1.136 +            return s[:s_end], s[s_end+1:]
   1.137 +        else:
   1.138 +            return None, s
   1.139 +
   1.140 +    def _get_array_type(self, s):
   1.141 +        if len(s) > 0:
   1.142 +            return self._get_component_type(s[1:])
   1.143 +        else:
   1.144 +            return None, s
   1.145 +
   1.146 +    # Processed details.
   1.147 +
   1.148 +    def get_name(self):
   1.149 +        return unicode(self.class_file.constants[self.name_index - 1])
   1.150 +
   1.151 +class FieldInfo(ItemInfo):
   1.152 +    def get_descriptor(self):
   1.153 +        return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))
   1.154 +
   1.155 +class MethodInfo(ItemInfo):
   1.156 +    def get_descriptor(self):
   1.157 +        return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))
   1.158 +
   1.159 +class AttributeInfo:
   1.160 +    def init(self, data, class_file):
   1.161 +        self.attribute_length = struct.unpack(">L", data[0:4])[0]
   1.162 +        self.info = data[4:4+self.attribute_length]
   1.163 +        return data[4+self.attribute_length:]
   1.164 +
   1.165 +# NOTE: Decode the different attribute formats.
   1.166 +
   1.167 +class SourceFileAttributeInfo(AttributeInfo):
   1.168 +    pass
   1.169 +
   1.170 +class ConstantValueAttributeInfo(AttributeInfo):
   1.171 +    def init(self, data, class_file):
   1.172 +        self.attribute_length = struct.unpack(">L", data[0:4])[0]
   1.173 +        self.constant_value_index = struct.unpack(">H", data[4:6])[0]
   1.174 +        assert 4+self.attribute_length == 6
   1.175 +        return data[4+self.attribute_length:]
   1.176 +
   1.177 +class CodeAttributeInfo(AttributeInfo):
   1.178 +    def init(self, data, class_file):
   1.179 +        self.class_file = class_file
   1.180 +        self.attribute_length = struct.unpack(">L", data[0:4])[0]
   1.181 +        self.max_stack = struct.unpack(">H", data[4:6])[0]
   1.182 +        self.max_locals = struct.unpack(">H", data[6:8])[0]
   1.183 +        self.code_length = struct.unpack(">L", data[8:12])[0]
   1.184 +        end_of_code = 12+self.code_length
   1.185 +        self.code = data[12:end_of_code]
   1.186 +        self.exception_table_length = struct.unpack(">H", data[end_of_code:end_of_code+2])[0]
   1.187 +        self.exception_table = []
   1.188 +        data = data[end_of_code + 2:]
   1.189 +        for i in range(0, self.exception_table_length):
   1.190 +            exception = ExceptionInfo()
   1.191 +            data = exception.init(data)
   1.192 +        self.attributes, data = self.class_file._get_attributes(data)
   1.193 +        return data
   1.194 +
   1.195 +class ExceptionsAttributeInfo(AttributeInfo):
   1.196 +    pass
   1.197 +
   1.198 +class InnerClassesAttributeInfo(AttributeInfo):
   1.199 +    pass
   1.200 +
   1.201 +class SyntheticAttributeInfo(AttributeInfo):
   1.202 +    pass
   1.203 +
   1.204 +class LineNumberAttributeInfo(AttributeInfo):
   1.205 +    pass
   1.206 +
   1.207 +class LocalVariableAttributeInfo(AttributeInfo):
   1.208 +    pass
   1.209 +
   1.210 +class DeprecatedAttributeInfo(AttributeInfo):
   1.211 +    pass
   1.212 +
   1.213 +class ExceptionInfo:
   1.214 +    def __init__(self):
   1.215 +        self.start_pc, self.end_pc, self.handler_pc, self.catch_type = None, None, None, None
   1.216 +
   1.217 +    def init(self, data):
   1.218 +        self.start_pc = struct.unpack(">H", data[0:2])[0]
   1.219 +        self.end_pc = struct.unpack(">H", data[2:4])[0]
   1.220 +        self.handler_pc = struct.unpack(">H", data[4:6])[0]
   1.221 +        self.catch_type = struct.unpack(">H", data[6:8])[0]
   1.222 +        return data[8:]
   1.223 +
   1.224 +class UnknownTag(Exception):
   1.225 +    pass
   1.226 +
   1.227 +class UnknownAttribute(Exception):
   1.228 +    pass
   1.229 +
   1.230 +# Abstractions for the main structures.
   1.231 +
   1.232 +class ClassFile:
   1.233 +
   1.234 +    "A class representing a Java class file."
   1.235 +
   1.236 +    def __init__(self, s):
   1.237 +
   1.238 +        """
   1.239 +        Process the given string 's', populating the object with the class
   1.240 +        file's details.
   1.241 +        """
   1.242 +
   1.243 +        self.constants, s = self._get_constants(s[8:])
   1.244 +        self.access_flags, s = self._get_access_flags(s)
   1.245 +        self.this_class, s = self._get_this_class(s)
   1.246 +        self.super_class, s = self._get_super_class(s)
   1.247 +        self.interfaces, s = self._get_interfaces(s)
   1.248 +        self.fields, s = self._get_fields(s)
   1.249 +        self.methods, s = self._get_methods(s)
   1.250 +        self.attributes, s = self._get_attributes(s)
   1.251 +
   1.252 +    def _decode_const(self, s):
   1.253 +        tag = struct.unpack(">B", s[0:1])[0]
   1.254 +        if tag == 1:
   1.255 +            const = Utf8Info()
   1.256 +        elif tag == 3:
   1.257 +            const = IntegerInfo()
   1.258 +        elif tag == 4:
   1.259 +            const = FloatInfo()
   1.260 +        elif tag == 5:
   1.261 +            const = LongInfo()
   1.262 +        elif tag == 6:
   1.263 +            const = DoubleInfo()
   1.264 +        elif tag == 7:
   1.265 +            const = ClassInfo()
   1.266 +        elif tag == 8:
   1.267 +            const = StringInfo()
   1.268 +        elif tag == 9:
   1.269 +            const = FieldRefInfo()
   1.270 +        elif tag == 10:
   1.271 +            const = MethodRefInfo()
   1.272 +        elif tag == 11:
   1.273 +            const = InterfaceMethodRefInfo()
   1.274 +        elif tag == 12:
   1.275 +            const = NameAndTypeInfo()
   1.276 +        else:
   1.277 +            raise UnknownTag, tag
   1.278 +        s = const.init(s[1:])
   1.279 +        return const, s
   1.280 +
   1.281 +    def _get_constants_from_table(self, count, s):
   1.282 +        l = []
   1.283 +        # Have to skip certain entries specially.
   1.284 +        i = 1
   1.285 +        while i < count:
   1.286 +            c, s = self._decode_const(s)
   1.287 +            l.append(c)
   1.288 +            # Add a blank entry after "large" entries.
   1.289 +            if isinstance(c, LargeNumInfo):
   1.290 +                l.append(None)
   1.291 +                i += 1
   1.292 +            i += 1
   1.293 +        return l, s
   1.294 +
   1.295 +    def _get_items_from_table(self, cls, number, s):
   1.296 +        l = []
   1.297 +        for i in range(0, number):
   1.298 +            f = cls()
   1.299 +            s = f.init(s, self)
   1.300 +            l.append(f)
   1.301 +        return l, s
   1.302 +
   1.303 +    def _get_methods_from_table(self, number, s):
   1.304 +        return self._get_items_from_table(MethodInfo, number, s)
   1.305 +
   1.306 +    def _get_fields_from_table(self, number, s):
   1.307 +        return self._get_items_from_table(FieldInfo, number, s)
   1.308 +
   1.309 +    def _get_attribute_from_table(self, s):
   1.310 +        attribute_name_index = struct.unpack(">H", s[0:2])[0]
   1.311 +        constant_name = self.constants[attribute_name_index - 1].bytes
   1.312 +        if constant_name == "SourceFile":
   1.313 +            attribute = SourceFileAttributeInfo()
   1.314 +        elif constant_name == "ConstantValue":
   1.315 +            attribute = ConstantValueAttributeInfo()
   1.316 +        elif constant_name == "Code":
   1.317 +            attribute = CodeAttributeInfo()
   1.318 +        elif constant_name == "Exceptions":
   1.319 +            attribute = ExceptionsAttributeInfo()
   1.320 +        elif constant_name == "InnerClasses":
   1.321 +            attribute = InnerClassesAttributeInfo()
   1.322 +        elif constant_name == "Synthetic":
   1.323 +            attribute = SyntheticAttributeInfo()
   1.324 +        elif constant_name == "LineNumberTable":
   1.325 +            attribute = LineNumberAttributeInfo()
   1.326 +        elif constant_name == "LocalVariableTable":
   1.327 +            attribute = LocalVariableAttributeInfo()
   1.328 +        elif constant_name == "Deprecated":
   1.329 +            attribute = DeprecatedAttributeInfo()
   1.330 +        else:
   1.331 +            raise UnknownAttribute, constant_name
   1.332 +        s = attribute.init(s[2:], self)
   1.333 +        return attribute, s
   1.334 +
   1.335 +    def _get_attributes_from_table(self, number, s):
   1.336 +        attributes = []
   1.337 +        for i in range(0, number):
   1.338 +            attribute, s = self._get_attribute_from_table(s)
   1.339 +            attributes.append(attribute)
   1.340 +        return attributes, s
   1.341 +
   1.342 +    def _get_constants(self, s):
   1.343 +        count = struct.unpack(">H", s[0:2])[0]
   1.344 +        return self._get_constants_from_table(count, s[2:])
   1.345 +
   1.346 +    def _get_access_flags(self, s):
   1.347 +        return struct.unpack(">H", s[0:2])[0], s[2:]
   1.348 +
   1.349 +    def _get_this_class(self, s):
   1.350 +        index = struct.unpack(">H", s[0:2])[0]
   1.351 +        return self.constants[index - 1], s[2:]
   1.352 +
   1.353 +    _get_super_class = _get_this_class
   1.354 +
   1.355 +    def _get_interfaces(self, s):
   1.356 +        interfaces = []
   1.357 +        number = struct.unpack(">H", s[0:2])[0]
   1.358 +        s = s[2:]
   1.359 +        for i in range(0, number):
   1.360 +            index = struct.unpack(">H", s[0:2])[0]
   1.361 +            interfaces.append(self.constants[index - 1])
   1.362 +            s = s[2:]
   1.363 +        return interfaces, s
   1.364 +
   1.365 +    def _get_fields(self, s):
   1.366 +        number = struct.unpack(">H", s[0:2])[0]
   1.367 +        return self._get_fields_from_table(number, s[2:])
   1.368 +
   1.369 +    def _get_attributes(self, s):
   1.370 +        number = struct.unpack(">H", s[0:2])[0]
   1.371 +        return self._get_attributes_from_table(number, s[2:])
   1.372 +
   1.373 +    def _get_methods(self, s):
   1.374 +        number = struct.unpack(">H", s[0:2])[0]
   1.375 +        return self._get_methods_from_table(number, s[2:])
   1.376 +
   1.377 +if __name__ == "__main__":
   1.378 +    import sys
   1.379 +    f = open(sys.argv[1])
   1.380 +    c = ClassFile(f.read())
   1.381 +
   1.382 +# vim: tabstop=4 expandtab shiftwidth=4