1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/classfile.py Sat Oct 23 21:31:59 2004 +0200
1.3 @@ -0,0 +1,379 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"""
1.7 +Java class file decoder. Specification found at the following URL:
1.8 +http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html
1.9 +"""
1.10 +
1.11 +import struct
1.12 +
1.13 +# Constant information.
1.14 +
1.15 +class ClassInfo:
1.16 + def init(self, data):
1.17 + self.name_index = struct.unpack(">H", data[0:2])[0]
1.18 + return data[2:]
1.19 +
1.20 +class RefInfo:
1.21 + def init(self, data):
1.22 + self.class_index = struct.unpack(">H", data[0:2])[0]
1.23 + self.name_and_type_index = struct.unpack(">H", data[2:4])[0]
1.24 + return data[4:]
1.25 +
1.26 +class FieldRefInfo(RefInfo):
1.27 + pass
1.28 +
1.29 +class MethodRefInfo(RefInfo):
1.30 + pass
1.31 +
1.32 +class InterfaceMethodRefInfo(RefInfo):
1.33 + pass
1.34 +
1.35 +class NameAndTypeInfo:
1.36 + def init(self, data):
1.37 + self.name_index = struct.unpack(">H", data[0:2])[0]
1.38 + self.descriptor_index = struct.unpack(">H", data[2:4])[0]
1.39 + return data[4:]
1.40 +
1.41 +class Utf8Info:
1.42 + def init(self, data):
1.43 + self.length = struct.unpack(">H", data[0:2])[0]
1.44 + self.bytes = data[2:2+self.length]
1.45 + return data[2+self.length:]
1.46 +
1.47 + def __str__(self):
1.48 + return self.bytes
1.49 +
1.50 + def __unicode__(self):
1.51 + return unicode(self.bytes, "utf-8")
1.52 +
1.53 +class StringInfo:
1.54 + def init(self, data):
1.55 + self.string_index = struct.unpack(">H", data[0:2])[0]
1.56 + return data[2:]
1.57 +
1.58 +class SmallNumInfo:
1.59 + def init(self, data):
1.60 + self.bytes = struct.unpack(">L", data[0:4])[0]
1.61 + return data[4:]
1.62 +
1.63 +class IntegerInfo(SmallNumInfo):
1.64 + pass
1.65 +
1.66 +class FloatInfo(SmallNumInfo):
1.67 + pass
1.68 +
1.69 +class LargeNumInfo:
1.70 + def init(self, data):
1.71 + self.high_bytes = struct.unpack(">L", data[0:4])[0]
1.72 + self.low_bytes = struct.unpack(">L", data[4:8])[0]
1.73 + return data[8:]
1.74 +
1.75 +class LongInfo(LargeNumInfo):
1.76 + pass
1.77 +
1.78 +class DoubleInfo(LargeNumInfo):
1.79 + pass
1.80 +
1.81 +# Other information.
1.82 +
1.83 +class ItemInfo:
1.84 + def init(self, data, class_file):
1.85 + self.class_file = class_file
1.86 + self.access_flags = struct.unpack(">H", data[0:2])[0]
1.87 + self.name_index = struct.unpack(">H", data[2:4])[0]
1.88 + self.descriptor_index = struct.unpack(">H", data[4:6])[0]
1.89 + self.attributes, data = self.class_file._get_attributes(data[6:])
1.90 + return data
1.91 +
1.92 + # Symbol parsing.
1.93 +
1.94 + def _get_method_descriptor(self, s):
1.95 + assert s[0] == "("
1.96 + params = []
1.97 + s = s[1:]
1.98 + while s[0] != ")":
1.99 + parameter_descriptor, s = self._get_parameter_descriptor(s)
1.100 + params.append(parameter_descriptor)
1.101 + if s[1] != "V":
1.102 + return_type, s = self._get_field_type(s[1:])
1.103 + else:
1.104 + return_type, s = None, s[1:]
1.105 + return params, return_type
1.106 +
1.107 + def _get_parameter_descriptor(self, s):
1.108 + return self._get_field_type(s)
1.109 +
1.110 + def _get_field_descriptor(self, s):
1.111 + return self._get_field_type(s)
1.112 +
1.113 + def _get_component_type(self, s):
1.114 + return self._get_field_type(s)
1.115 +
1.116 + def _get_field_type(self, s):
1.117 + base_type, s = self._get_base_type(s)
1.118 + object_type = None
1.119 + array_type = None
1.120 + if base_type == "L":
1.121 + object_type, s = self._get_object_type(s)
1.122 + elif base_type == "[":
1.123 + array_type, s = self._get_array_type(s)
1.124 + return (base_type, object_type, array_type), s
1.125 +
1.126 + def _get_base_type(self, s):
1.127 + if len(s) > 0:
1.128 + return s[0], s[1:]
1.129 + else:
1.130 + return None, s
1.131 +
1.132 + def _get_object_type(self, s):
1.133 + if len(s) > 0:
1.134 + s_end = s.find(";")
1.135 + assert s_end != -1
1.136 + return s[:s_end], s[s_end+1:]
1.137 + else:
1.138 + return None, s
1.139 +
1.140 + def _get_array_type(self, s):
1.141 + if len(s) > 0:
1.142 + return self._get_component_type(s[1:])
1.143 + else:
1.144 + return None, s
1.145 +
1.146 + # Processed details.
1.147 +
1.148 + def get_name(self):
1.149 + return unicode(self.class_file.constants[self.name_index - 1])
1.150 +
1.151 +class FieldInfo(ItemInfo):
1.152 + def get_descriptor(self):
1.153 + return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))
1.154 +
1.155 +class MethodInfo(ItemInfo):
1.156 + def get_descriptor(self):
1.157 + return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))
1.158 +
1.159 +class AttributeInfo:
1.160 + def init(self, data, class_file):
1.161 + self.attribute_length = struct.unpack(">L", data[0:4])[0]
1.162 + self.info = data[4:4+self.attribute_length]
1.163 + return data[4+self.attribute_length:]
1.164 +
1.165 +# NOTE: Decode the different attribute formats.
1.166 +
1.167 +class SourceFileAttributeInfo(AttributeInfo):
1.168 + pass
1.169 +
1.170 +class ConstantValueAttributeInfo(AttributeInfo):
1.171 + def init(self, data, class_file):
1.172 + self.attribute_length = struct.unpack(">L", data[0:4])[0]
1.173 + self.constant_value_index = struct.unpack(">H", data[4:6])[0]
1.174 + assert 4+self.attribute_length == 6
1.175 + return data[4+self.attribute_length:]
1.176 +
1.177 +class CodeAttributeInfo(AttributeInfo):
1.178 + def init(self, data, class_file):
1.179 + self.class_file = class_file
1.180 + self.attribute_length = struct.unpack(">L", data[0:4])[0]
1.181 + self.max_stack = struct.unpack(">H", data[4:6])[0]
1.182 + self.max_locals = struct.unpack(">H", data[6:8])[0]
1.183 + self.code_length = struct.unpack(">L", data[8:12])[0]
1.184 + end_of_code = 12+self.code_length
1.185 + self.code = data[12:end_of_code]
1.186 + self.exception_table_length = struct.unpack(">H", data[end_of_code:end_of_code+2])[0]
1.187 + self.exception_table = []
1.188 + data = data[end_of_code + 2:]
1.189 + for i in range(0, self.exception_table_length):
1.190 + exception = ExceptionInfo()
1.191 + data = exception.init(data)
1.192 + self.attributes, data = self.class_file._get_attributes(data)
1.193 + return data
1.194 +
1.195 +class ExceptionsAttributeInfo(AttributeInfo):
1.196 + pass
1.197 +
1.198 +class InnerClassesAttributeInfo(AttributeInfo):
1.199 + pass
1.200 +
1.201 +class SyntheticAttributeInfo(AttributeInfo):
1.202 + pass
1.203 +
1.204 +class LineNumberAttributeInfo(AttributeInfo):
1.205 + pass
1.206 +
1.207 +class LocalVariableAttributeInfo(AttributeInfo):
1.208 + pass
1.209 +
1.210 +class DeprecatedAttributeInfo(AttributeInfo):
1.211 + pass
1.212 +
1.213 +class ExceptionInfo:
1.214 + def __init__(self):
1.215 + self.start_pc, self.end_pc, self.handler_pc, self.catch_type = None, None, None, None
1.216 +
1.217 + def init(self, data):
1.218 + self.start_pc = struct.unpack(">H", data[0:2])[0]
1.219 + self.end_pc = struct.unpack(">H", data[2:4])[0]
1.220 + self.handler_pc = struct.unpack(">H", data[4:6])[0]
1.221 + self.catch_type = struct.unpack(">H", data[6:8])[0]
1.222 + return data[8:]
1.223 +
1.224 +class UnknownTag(Exception):
1.225 + pass
1.226 +
1.227 +class UnknownAttribute(Exception):
1.228 + pass
1.229 +
1.230 +# Abstractions for the main structures.
1.231 +
1.232 +class ClassFile:
1.233 +
1.234 + "A class representing a Java class file."
1.235 +
1.236 + def __init__(self, s):
1.237 +
1.238 + """
1.239 + Process the given string 's', populating the object with the class
1.240 + file's details.
1.241 + """
1.242 +
1.243 + self.constants, s = self._get_constants(s[8:])
1.244 + self.access_flags, s = self._get_access_flags(s)
1.245 + self.this_class, s = self._get_this_class(s)
1.246 + self.super_class, s = self._get_super_class(s)
1.247 + self.interfaces, s = self._get_interfaces(s)
1.248 + self.fields, s = self._get_fields(s)
1.249 + self.methods, s = self._get_methods(s)
1.250 + self.attributes, s = self._get_attributes(s)
1.251 +
1.252 + def _decode_const(self, s):
1.253 + tag = struct.unpack(">B", s[0:1])[0]
1.254 + if tag == 1:
1.255 + const = Utf8Info()
1.256 + elif tag == 3:
1.257 + const = IntegerInfo()
1.258 + elif tag == 4:
1.259 + const = FloatInfo()
1.260 + elif tag == 5:
1.261 + const = LongInfo()
1.262 + elif tag == 6:
1.263 + const = DoubleInfo()
1.264 + elif tag == 7:
1.265 + const = ClassInfo()
1.266 + elif tag == 8:
1.267 + const = StringInfo()
1.268 + elif tag == 9:
1.269 + const = FieldRefInfo()
1.270 + elif tag == 10:
1.271 + const = MethodRefInfo()
1.272 + elif tag == 11:
1.273 + const = InterfaceMethodRefInfo()
1.274 + elif tag == 12:
1.275 + const = NameAndTypeInfo()
1.276 + else:
1.277 + raise UnknownTag, tag
1.278 + s = const.init(s[1:])
1.279 + return const, s
1.280 +
1.281 + def _get_constants_from_table(self, count, s):
1.282 + l = []
1.283 + # Have to skip certain entries specially.
1.284 + i = 1
1.285 + while i < count:
1.286 + c, s = self._decode_const(s)
1.287 + l.append(c)
1.288 + # Add a blank entry after "large" entries.
1.289 + if isinstance(c, LargeNumInfo):
1.290 + l.append(None)
1.291 + i += 1
1.292 + i += 1
1.293 + return l, s
1.294 +
1.295 + def _get_items_from_table(self, cls, number, s):
1.296 + l = []
1.297 + for i in range(0, number):
1.298 + f = cls()
1.299 + s = f.init(s, self)
1.300 + l.append(f)
1.301 + return l, s
1.302 +
1.303 + def _get_methods_from_table(self, number, s):
1.304 + return self._get_items_from_table(MethodInfo, number, s)
1.305 +
1.306 + def _get_fields_from_table(self, number, s):
1.307 + return self._get_items_from_table(FieldInfo, number, s)
1.308 +
1.309 + def _get_attribute_from_table(self, s):
1.310 + attribute_name_index = struct.unpack(">H", s[0:2])[0]
1.311 + constant_name = self.constants[attribute_name_index - 1].bytes
1.312 + if constant_name == "SourceFile":
1.313 + attribute = SourceFileAttributeInfo()
1.314 + elif constant_name == "ConstantValue":
1.315 + attribute = ConstantValueAttributeInfo()
1.316 + elif constant_name == "Code":
1.317 + attribute = CodeAttributeInfo()
1.318 + elif constant_name == "Exceptions":
1.319 + attribute = ExceptionsAttributeInfo()
1.320 + elif constant_name == "InnerClasses":
1.321 + attribute = InnerClassesAttributeInfo()
1.322 + elif constant_name == "Synthetic":
1.323 + attribute = SyntheticAttributeInfo()
1.324 + elif constant_name == "LineNumberTable":
1.325 + attribute = LineNumberAttributeInfo()
1.326 + elif constant_name == "LocalVariableTable":
1.327 + attribute = LocalVariableAttributeInfo()
1.328 + elif constant_name == "Deprecated":
1.329 + attribute = DeprecatedAttributeInfo()
1.330 + else:
1.331 + raise UnknownAttribute, constant_name
1.332 + s = attribute.init(s[2:], self)
1.333 + return attribute, s
1.334 +
1.335 + def _get_attributes_from_table(self, number, s):
1.336 + attributes = []
1.337 + for i in range(0, number):
1.338 + attribute, s = self._get_attribute_from_table(s)
1.339 + attributes.append(attribute)
1.340 + return attributes, s
1.341 +
1.342 + def _get_constants(self, s):
1.343 + count = struct.unpack(">H", s[0:2])[0]
1.344 + return self._get_constants_from_table(count, s[2:])
1.345 +
1.346 + def _get_access_flags(self, s):
1.347 + return struct.unpack(">H", s[0:2])[0], s[2:]
1.348 +
1.349 + def _get_this_class(self, s):
1.350 + index = struct.unpack(">H", s[0:2])[0]
1.351 + return self.constants[index - 1], s[2:]
1.352 +
1.353 + _get_super_class = _get_this_class
1.354 +
1.355 + def _get_interfaces(self, s):
1.356 + interfaces = []
1.357 + number = struct.unpack(">H", s[0:2])[0]
1.358 + s = s[2:]
1.359 + for i in range(0, number):
1.360 + index = struct.unpack(">H", s[0:2])[0]
1.361 + interfaces.append(self.constants[index - 1])
1.362 + s = s[2:]
1.363 + return interfaces, s
1.364 +
1.365 + def _get_fields(self, s):
1.366 + number = struct.unpack(">H", s[0:2])[0]
1.367 + return self._get_fields_from_table(number, s[2:])
1.368 +
1.369 + def _get_attributes(self, s):
1.370 + number = struct.unpack(">H", s[0:2])[0]
1.371 + return self._get_attributes_from_table(number, s[2:])
1.372 +
1.373 + def _get_methods(self, s):
1.374 + number = struct.unpack(">H", s[0:2])[0]
1.375 + return self._get_methods_from_table(number, s[2:])
1.376 +
1.377 +if __name__ == "__main__":
1.378 + import sys
1.379 + f = open(sys.argv[1])
1.380 + c = ClassFile(f.read())
1.381 +
1.382 +# vim: tabstop=4 expandtab shiftwidth=4