javaclass

classfile.py

60:6dda8e27def2
2004-11-19 Paul Boddie Added lookupswitch and tableswitch fixes. Added constant loading fixes. Added slightly improved import behaviour around superclasses.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 # Useful tables and constants.    34     35 descriptor_base_type_mapping = {    36     "B" : "int",    37     "C" : "str",    38     "D" : "float",    39     "F" : "float",    40     "I" : "int",    41     "J" : "int",    42     "L" : "object",    43     "S" : "int",    44     "Z" : "bool",    45     "[" : "list"    46     }    47     48 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL,  SUPER,  SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \    49 0x0001, 0x0002,  0x0004,    0x0008, 0x0010, 0x0020, 0x0020,       0x0040,   0x0080,    0x0100, 0x0200,    0x0400,   0x0800    50     51 def has_flags(flags, desired):    52     desired_flags = reduce(lambda a, b: a | b, desired, 0)    53     return (flags & desired_flags) == desired_flags    54     55 # Useful mix-ins.    56     57 class PythonMethodUtils:    58     def get_python_name(self):    59         name = self.get_name()    60         if str(name) == "<init>":    61             name = "__init__"    62         elif str(name) == "<clinit>":    63             return "__clinit__"    64         else:    65             name = str(name)    66         return name + "$" + self._get_descriptor_as_name()    67     68     def _get_descriptor_as_name(self):    69         l = []    70         for descriptor_type in self.get_descriptor()[0]:    71             l.append(self._get_type_as_name(descriptor_type))    72         return "$".join(l)    73     74     def _get_type_as_name(self, descriptor_type, s=""):    75         base_type, object_type, array_type = descriptor_type    76         if base_type == "L":    77             return object_type + s    78         elif base_type == "[":    79             return self._get_type_as_name(array_type, s + "[]")    80         else:    81             return "<" + base_type + ">" + s    82     83 class PythonNameUtils:    84     def get_python_name(self):    85         return str(self.get_name()).replace("/", ".")    86     87 class NameUtils:    88     def get_name(self):    89         if self.name_index != 0:    90             return self.class_file.constants[self.name_index - 1]    91         else:    92             # Some name indexes are zero to indicate special conditions.    93             return None    94     95 class NameAndTypeUtils:    96     def get_name(self):    97         if self.name_and_type_index != 0:    98             return self.class_file.constants[self.name_and_type_index - 1].get_name()    99         else:   100             # Some name indexes are zero to indicate special conditions.   101             return None   102    103     def get_field_descriptor(self):   104         if self.name_and_type_index != 0:   105             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()   106         else:   107             # Some name indexes are zero to indicate special conditions.   108             return None   109    110     def get_method_descriptor(self):   111         if self.name_and_type_index != 0:   112             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   113         else:   114             # Some name indexes are zero to indicate special conditions.   115             return None   116    117 class DescriptorUtils:   118    119     "Symbol parsing."   120    121     def _get_method_descriptor(self, s):   122         assert s[0] == "("   123         params = []   124         s = s[1:]   125         while s[0] != ")":   126             parameter_descriptor, s = self._get_parameter_descriptor(s)   127             params.append(parameter_descriptor)   128         if s[1] != "V":   129             return_type, s = self._get_field_type(s[1:])   130         else:   131             return_type, s = None, s[1:]   132         return params, return_type   133    134     def _get_parameter_descriptor(self, s):   135         return self._get_field_type(s)   136    137     def _get_field_descriptor(self, s):   138         return self._get_field_type(s)   139    140     def _get_component_type(self, s):   141         return self._get_field_type(s)   142    143     def _get_field_type(self, s):   144         base_type, s = self._get_base_type(s)   145         object_type = None   146         array_type = None   147         if base_type == "L":   148             object_type, s = self._get_object_type(s)   149         elif base_type == "[":   150             array_type, s = self._get_array_type(s)   151         return (base_type, object_type, array_type), s   152    153     def _get_base_type(self, s):   154         if len(s) > 0:   155             return s[0], s[1:]   156         else:   157             return None, s   158    159     def _get_object_type(self, s):   160         if len(s) > 0:   161             s_end = s.find(";")   162             assert s_end != -1   163             return s[:s_end], s[s_end+1:]   164         else:   165             return None, s   166    167     def _get_array_type(self, s):   168         if len(s) > 0:   169             return self._get_component_type(s)   170         else:   171             return None, s   172    173 # Constant information.   174 # Objects of these classes are not directly aware of the class they reside in.   175    176 class ClassInfo(NameUtils, PythonNameUtils):   177     def init(self, data, class_file):   178         self.class_file = class_file   179         self.name_index = u2(data[0:2])   180         return data[2:]   181    182 class RefInfo(NameAndTypeUtils):   183     def init(self, data, class_file):   184         self.class_file = class_file   185         self.class_index = u2(data[0:2])   186         self.name_and_type_index = u2(data[2:4])   187         return data[4:]   188    189 class FieldRefInfo(RefInfo, PythonNameUtils):   190     def get_descriptor(self):   191         return RefInfo.get_field_descriptor(self)   192    193 class MethodRefInfo(RefInfo, PythonMethodUtils):   194     def get_descriptor(self):   195         return RefInfo.get_method_descriptor(self)   196    197 class InterfaceMethodRefInfo(MethodRefInfo):   198     pass   199    200 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonNameUtils):   201     def init(self, data, class_file):   202         self.class_file = class_file   203         self.name_index = u2(data[0:2])   204         self.descriptor_index = u2(data[2:4])   205         return data[4:]   206    207     def get_field_descriptor(self):   208         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   209    210     def get_method_descriptor(self):   211         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   212    213 class Utf8Info:   214     def init(self, data, class_file):   215         self.class_file = class_file   216         self.length = u2(data[0:2])   217         self.bytes = data[2:2+self.length]   218         return data[2+self.length:]   219    220     def __str__(self):   221         return self.bytes   222    223     def __unicode__(self):   224         return unicode(self.bytes, "utf-8")   225    226 class StringInfo:   227     def init(self, data, class_file):   228         self.class_file = class_file   229         self.string_index = u2(data[0:2])   230         return data[2:]   231    232 class SmallNumInfo:   233     def init(self, data, class_file):   234         self.class_file = class_file   235         self.bytes = data[0:4]   236         return data[4:]   237    238 class IntegerInfo(SmallNumInfo):   239     def get_value(self):   240         return s4(self.bytes)   241    242 class FloatInfo(SmallNumInfo):   243     def get_value(self):   244         return f4(self.bytes)   245    246 class LargeNumInfo:   247     def init(self, data, class_file):   248         self.class_file = class_file   249         self.high_bytes = u4(data[0:4])   250         self.low_bytes = u4(data[4:8])   251         return data[8:]   252    253 class LongInfo(LargeNumInfo):   254     def get_value(self):   255         return s8(self.high_bytes + self.low_bytes)   256    257 class DoubleInfo(LargeNumInfo):   258     def get_value(self):   259         return f8(self.high_bytes + self.low_bytes)   260    261 # Other information.   262 # Objects of these classes are generally aware of the class they reside in.   263    264 class ItemInfo(NameUtils, DescriptorUtils):   265     def init(self, data, class_file):   266         self.class_file = class_file   267         self.access_flags = u2(data[0:2])   268         self.name_index = u2(data[2:4])   269         self.descriptor_index = u2(data[4:6])   270         self.attributes, data = self.class_file._get_attributes(data[6:])   271         return data   272    273 class FieldInfo(ItemInfo, PythonNameUtils):   274     def get_descriptor(self):   275         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   276    277 class MethodInfo(ItemInfo, PythonMethodUtils):   278     def get_descriptor(self):   279         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   280    281 class AttributeInfo:   282     def init(self, data, class_file):   283         self.attribute_length = u4(data[0:4])   284         self.info = data[4:4+self.attribute_length]   285         return data[4+self.attribute_length:]   286    287 # NOTE: Decode the different attribute formats.   288    289 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   290     def init(self, data, class_file):   291         self.class_file = class_file   292         self.attribute_length = u4(data[0:4])   293         # Permit the NameUtils mix-in.   294         self.name_index = self.sourcefile_index = u2(data[4:6])   295    296 class ConstantValueAttributeInfo(AttributeInfo):   297     def init(self, data, class_file):   298         self.class_file = class_file   299         self.attribute_length = u4(data[0:4])   300         self.constant_value_index = u2(data[4:6])   301         assert 4+self.attribute_length == 6   302         return data[4+self.attribute_length:]   303    304     def get_value(self):   305         return self.class_file.constants[self.constant_value_index - 1].get_value()   306    307 class CodeAttributeInfo(AttributeInfo):   308     def init(self, data, class_file):   309         self.class_file = class_file   310         self.attribute_length = u4(data[0:4])   311         self.max_stack = u2(data[4:6])   312         self.max_locals = u2(data[6:8])   313         self.code_length = u4(data[8:12])   314         end_of_code = 12+self.code_length   315         self.code = data[12:end_of_code]   316         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   317         self.exception_table = []   318         data = data[end_of_code + 2:]   319         for i in range(0, self.exception_table_length):   320             exception = ExceptionInfo()   321             data = exception.init(data)   322             self.exception_table.append(exception)   323         self.attributes, data = self.class_file._get_attributes(data)   324         return data   325    326 class ExceptionsAttributeInfo(AttributeInfo):   327     def init(self, data, class_file):   328         self.class_file = class_file   329         self.attribute_length = u4(data[0:4])   330         self.number_of_exceptions = u2(data[4:6])   331         self.exception_index_table = []   332         index = 6   333         for i in range(0, self.number_of_exceptions):   334             self.exception_index_table.append(u2(data[index:index+2]))   335             index += 2   336         return data[index:]   337    338     def get_exception(self, i):   339         exception_index = self.exception_index_table[i]   340         return self.class_file.constants[exception_index - 1]   341    342 class InnerClassesAttributeInfo(AttributeInfo):   343     def init(self, data, class_file):   344         self.class_file = class_file   345         self.attribute_length = u4(data[0:4])   346         self.number_of_classes = u2(data[4:6])   347         self.classes = []   348         data = data[6:]   349         for i in range(0, self.number_of_classes):   350             inner_class = InnerClassInfo()   351             data = inner_class.init(data, self.class_file)   352             self.classes.append(inner_class)   353         return data   354    355 class SyntheticAttributeInfo(AttributeInfo):   356     pass   357    358 class LineNumberAttributeInfo(AttributeInfo):   359     def init(self, data, class_file):   360         self.class_file = class_file   361         self.attribute_length = u4(data[0:4])   362         self.line_number_table_length = u2(data[4:6])   363         self.line_number_table = []   364         data = data[6:]   365         for i in range(0, self.line_number_table_length):   366             line_number = LineNumberInfo()   367             data = line_number.init(data)   368             self.line_number_table.append(line_number)   369         return data   370    371 class LocalVariableAttributeInfo(AttributeInfo):   372     def init(self, data, class_file):   373         self.class_file = class_file   374         self.attribute_length = u4(data[0:4])   375         self.local_variable_table_length = u2(data[4:6])   376         self.local_variable_table = []   377         data = data[6:]   378         for i in range(0, self.local_variable_table_length):   379             local_variable = LocalVariableInfo()   380             data = local_variable.init(data, self.class_file)   381             self.local_variable_table.append(local_variable)   382         return data   383    384 class DeprecatedAttributeInfo(AttributeInfo):   385     pass   386    387 # Child classes of the attribute information classes.   388    389 class ExceptionInfo:   390     def init(self, data):   391         self.start_pc = u2(data[0:2])   392         self.end_pc = u2(data[2:4])   393         self.handler_pc = u2(data[4:6])   394         self.catch_type = u2(data[6:8])   395         return data[8:]   396    397 class InnerClassInfo(NameUtils):   398     def init(self, data, class_file):   399         self.class_file = class_file   400         self.inner_class_info_index = u2(data[0:2])   401         self.outer_class_info_index = u2(data[2:4])   402         # Permit the NameUtils mix-in.   403         self.name_index = self.inner_name_index = u2(data[4:6])   404         self.inner_class_access_flags = u2(data[6:8])   405         return data[8:]   406    407 class LineNumberInfo:   408     def init(self, data):   409         self.start_pc = u2(data[0:2])   410         self.line_number = u2(data[2:4])   411         return data[4:]   412    413 class LocalVariableInfo(NameUtils, PythonNameUtils):   414     def init(self, data, class_file):   415         self.class_file = class_file   416         self.start_pc = u2(data[0:2])   417         self.length = u2(data[2:4])   418         self.name_index = u2(data[4:6])   419         self.descriptor_index = u2(data[6:8])   420         self.index = u2(data[8:10])   421         return data[10:]   422    423     def get_descriptor(self):   424         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   425    426 # Exceptions.   427    428 class UnknownTag(Exception):   429     pass   430    431 class UnknownAttribute(Exception):   432     pass   433    434 # Abstractions for the main structures.   435    436 class ClassFile:   437    438     "A class representing a Java class file."   439    440     def __init__(self, s):   441    442         """   443         Process the given string 's', populating the object with the class   444         file's details.   445         """   446    447         self.constants, s = self._get_constants(s[8:])   448         self.access_flags, s = self._get_access_flags(s)   449         self.this_class, s = self._get_this_class(s)   450         self.super_class, s = self._get_super_class(s)   451         self.interfaces, s = self._get_interfaces(s)   452         self.fields, s = self._get_fields(s)   453         self.methods, s = self._get_methods(s)   454         self.attributes, s = self._get_attributes(s)   455    456     def _decode_const(self, s):   457         tag = u1(s[0:1])   458         if tag == 1:   459             const = Utf8Info()   460         elif tag == 3:   461             const = IntegerInfo()   462         elif tag == 4:   463             const = FloatInfo()   464         elif tag == 5:   465             const = LongInfo()   466         elif tag == 6:   467             const = DoubleInfo()   468         elif tag == 7:   469             const = ClassInfo()   470         elif tag == 8:   471             const = StringInfo()   472         elif tag == 9:   473             const = FieldRefInfo()   474         elif tag == 10:   475             const = MethodRefInfo()   476         elif tag == 11:   477             const = InterfaceMethodRefInfo()   478         elif tag == 12:   479             const = NameAndTypeInfo()   480         else:   481             raise UnknownTag, tag   482    483         # Initialise the constant object.   484    485         s = const.init(s[1:], self)   486         return const, s   487    488     def _get_constants_from_table(self, count, s):   489         l = []   490         # Have to skip certain entries specially.   491         i = 1   492         while i < count:   493             c, s = self._decode_const(s)   494             l.append(c)   495             # Add a blank entry after "large" entries.   496             if isinstance(c, LargeNumInfo):   497                 l.append(None)   498                 i += 1   499             i += 1   500         return l, s   501    502     def _get_items_from_table(self, cls, number, s):   503         l = []   504         for i in range(0, number):   505             f = cls()   506             s = f.init(s, self)   507             l.append(f)   508         return l, s   509    510     def _get_methods_from_table(self, number, s):   511         return self._get_items_from_table(MethodInfo, number, s)   512    513     def _get_fields_from_table(self, number, s):   514         return self._get_items_from_table(FieldInfo, number, s)   515    516     def _get_attribute_from_table(self, s):   517         attribute_name_index = u2(s[0:2])   518         constant_name = self.constants[attribute_name_index - 1].bytes   519         if constant_name == "SourceFile":   520             attribute = SourceFileAttributeInfo()   521         elif constant_name == "ConstantValue":   522             attribute = ConstantValueAttributeInfo()   523         elif constant_name == "Code":   524             attribute = CodeAttributeInfo()   525         elif constant_name == "Exceptions":   526             attribute = ExceptionsAttributeInfo()   527         elif constant_name == "InnerClasses":   528             attribute = InnerClassesAttributeInfo()   529         elif constant_name == "Synthetic":   530             attribute = SyntheticAttributeInfo()   531         elif constant_name == "LineNumberTable":   532             attribute = LineNumberAttributeInfo()   533         elif constant_name == "LocalVariableTable":   534             attribute = LocalVariableAttributeInfo()   535         elif constant_name == "Deprecated":   536             attribute = DeprecatedAttributeInfo()   537         else:   538             raise UnknownAttribute, constant_name   539         s = attribute.init(s[2:], self)   540         return attribute, s   541    542     def _get_attributes_from_table(self, number, s):   543         attributes = []   544         for i in range(0, number):   545             attribute, s = self._get_attribute_from_table(s)   546             attributes.append(attribute)   547         return attributes, s   548    549     def _get_constants(self, s):   550         count = u2(s[0:2])   551         return self._get_constants_from_table(count, s[2:])   552    553     def _get_access_flags(self, s):   554         return u2(s[0:2]), s[2:]   555    556     def _get_this_class(self, s):   557         index = u2(s[0:2])   558         return self.constants[index - 1], s[2:]   559    560     _get_super_class = _get_this_class   561    562     def _get_interfaces(self, s):   563         interfaces = []   564         number = u2(s[0:2])   565         s = s[2:]   566         for i in range(0, number):   567             index = u2(s[0:2])   568             interfaces.append(self.constants[index - 1])   569             s = s[2:]   570         return interfaces, s   571    572     def _get_fields(self, s):   573         number = u2(s[0:2])   574         return self._get_fields_from_table(number, s[2:])   575    576     def _get_attributes(self, s):   577         number = u2(s[0:2])   578         return self._get_attributes_from_table(number, s[2:])   579    580     def _get_methods(self, s):   581         number = u2(s[0:2])   582         return self._get_methods_from_table(number, s[2:])   583    584 if __name__ == "__main__":   585     import sys   586     f = open(sys.argv[1])   587     c = ClassFile(f.read())   588    589 # vim: tabstop=4 expandtab shiftwidth=4