javaclass

classfile.py

5:d18e689a422d
2004-10-29 Paul Boddie Added a class for the reading and conversion of Java bytecodes, moved the bytecode table into this class, added some initial conversion methods.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 # Useful mix-ins.    34     35 class NameUtils:    36     def get_name(self):    37         if self.name_index != 0:    38             return unicode(self.class_file.constants[self.name_index - 1])    39         else:    40             # Some name indexes are zero to indicate special conditions.    41             return None    42     43 # Constant information.    44 # Objects of these classes are not directly aware of the class they reside in.    45     46 class ClassInfo(NameUtils):    47     def init(self, data, class_file):    48         self.class_file = class_file    49         self.name_index = u2(data[0:2])    50         return data[2:]    51     52 class RefInfo:    53     def init(self, data, class_file):    54         self.class_file = class_file    55         self.class_index = u2(data[0:2])    56         self.name_and_type_index = u2(data[2:4])    57         return data[4:]    58     59 class FieldRefInfo(RefInfo):    60     pass    61     62 class MethodRefInfo(RefInfo):    63     pass    64     65 class InterfaceMethodRefInfo(RefInfo):    66     pass    67     68 class NameAndTypeInfo(NameUtils):    69     def init(self, data, class_file):    70         self.class_file = class_file    71         self.name_index = u2(data[0:2])    72         self.descriptor_index = u2(data[2:4])    73         return data[4:]    74     75 class Utf8Info:    76     def init(self, data, class_file):    77         self.class_file = class_file    78         self.length = u2(data[0:2])    79         self.bytes = data[2:2+self.length]    80         return data[2+self.length:]    81     82     def __str__(self):    83         return self.bytes    84     85     def __unicode__(self):    86         return unicode(self.bytes, "utf-8")    87     88 class StringInfo:    89     def init(self, data, class_file):    90         self.class_file = class_file    91         self.string_index = u2(data[0:2])    92         return data[2:]    93     94 class SmallNumInfo:    95     def init(self, data, class_file):    96         self.class_file = class_file    97         self.bytes = data[0:4]    98         return data[4:]    99    100 class IntegerInfo(SmallNumInfo):   101     def get_value(self):   102         return s4(self.bytes)   103    104 class FloatInfo(SmallNumInfo):   105     def get_value(self):   106         return f4(self.bytes)   107    108 class LargeNumInfo:   109     def init(self, data, class_file):   110         self.class_file = class_file   111         self.high_bytes = u4(data[0:4])   112         self.low_bytes = u4(data[4:8])   113         return data[8:]   114    115 class LongInfo(LargeNumInfo):   116     def get_value(self):   117         return s8(self.high_bytes + self.low_bytes)   118    119 class DoubleInfo(LargeNumInfo):   120     def get_value(self):   121         return f8(self.high_bytes + self.low_bytes)   122    123 # Other information.   124 # Objects of these classes are generally aware of the class they reside in.   125    126 class ItemInfo(NameUtils):   127     def init(self, data, class_file):   128         self.class_file = class_file   129         self.access_flags = u2(data[0:2])   130         self.name_index = u2(data[2:4])   131         self.descriptor_index = u2(data[4:6])   132         self.attributes, data = self.class_file._get_attributes(data[6:])   133         return data   134    135     # Symbol parsing.   136    137     def _get_method_descriptor(self, s):   138         assert s[0] == "("   139         params = []   140         s = s[1:]   141         while s[0] != ")":   142             parameter_descriptor, s = self._get_parameter_descriptor(s)   143             params.append(parameter_descriptor)   144         if s[1] != "V":   145             return_type, s = self._get_field_type(s[1:])   146         else:   147             return_type, s = None, s[1:]   148         return params, return_type   149    150     def _get_parameter_descriptor(self, s):   151         return self._get_field_type(s)   152    153     def _get_field_descriptor(self, s):   154         return self._get_field_type(s)   155    156     def _get_component_type(self, s):   157         return self._get_field_type(s)   158    159     def _get_field_type(self, s):   160         base_type, s = self._get_base_type(s)   161         object_type = None   162         array_type = None   163         if base_type == "L":   164             object_type, s = self._get_object_type(s)   165         elif base_type == "[":   166             array_type, s = self._get_array_type(s)   167         return (base_type, object_type, array_type), s   168    169     def _get_base_type(self, s):   170         if len(s) > 0:   171             return s[0], s[1:]   172         else:   173             return None, s   174    175     def _get_object_type(self, s):   176         if len(s) > 0:   177             s_end = s.find(";")   178             assert s_end != -1   179             return s[:s_end], s[s_end+1:]   180         else:   181             return None, s   182    183     def _get_array_type(self, s):   184         if len(s) > 0:   185             return self._get_component_type(s)   186         else:   187             return None, s   188    189 class FieldInfo(ItemInfo):   190     def get_descriptor(self):   191         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   192    193 class MethodInfo(ItemInfo):   194     def get_descriptor(self):   195         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   196    197 class AttributeInfo:   198     def init(self, data, class_file):   199         self.attribute_length = u4(data[0:4])   200         self.info = data[4:4+self.attribute_length]   201         return data[4+self.attribute_length:]   202    203 # NOTE: Decode the different attribute formats.   204    205 class SourceFileAttributeInfo(AttributeInfo, NameUtils):   206     def init(self, data, class_file):   207         self.class_file = class_file   208         self.attribute_length = u4(data[0:4])   209         # Permit the NameUtils mix-in.   210         self.name_index = self.sourcefile_index = u2(data[4:6])   211    212 class ConstantValueAttributeInfo(AttributeInfo):   213     def init(self, data, class_file):   214         self.class_file = class_file   215         self.attribute_length = u4(data[0:4])   216         self.constant_value_index = u2(data[4:6])   217         assert 4+self.attribute_length == 6   218         return data[4+self.attribute_length:]   219    220     def get_value(self):   221         return self.class_file.constants[self.constant_value_index - 1].get_value()   222    223 class CodeAttributeInfo(AttributeInfo):   224     def init(self, data, class_file):   225         self.class_file = class_file   226         self.attribute_length = u4(data[0:4])   227         self.max_stack = u2(data[4:6])   228         self.max_locals = u2(data[6:8])   229         self.code_length = u4(data[8:12])   230         end_of_code = 12+self.code_length   231         self.code = data[12:end_of_code]   232         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   233         self.exception_table = []   234         data = data[end_of_code + 2:]   235         for i in range(0, self.exception_table_length):   236             exception = ExceptionInfo()   237             data = exception.init(data)   238             self.exception_table.append(exception)   239         self.attributes, data = self.class_file._get_attributes(data)   240         return data   241    242 class ExceptionsAttributeInfo(AttributeInfo):   243     def init(self, data, class_file):   244         self.class_file = class_file   245         self.attribute_length = u4(data[0:4])   246         self.number_of_exceptions = u2(data[4:6])   247         self.exception_index_table = []   248         index = 6   249         for i in range(0, self.number_of_exceptions):   250             self.exception_index_table.append(u2(data[index:index+2]))   251             index += 2   252         return data[index:]   253    254     def get_exception(self, i):   255         exception_index = self.exception_index_table[i]   256         return self.class_file.constants[exception_index - 1]   257    258 class InnerClassesAttributeInfo(AttributeInfo):   259     def init(self, data, class_file):   260         self.class_file = class_file   261         self.attribute_length = u4(data[0:4])   262         self.number_of_classes = u2(data[4:6])   263         self.classes = []   264         data = data[6:]   265         for i in range(0, self.number_of_classes):   266             inner_class = InnerClassInfo()   267             data = inner_class.init(data, self.class_file)   268             self.classes.append(inner_class)   269         return data   270    271 class SyntheticAttributeInfo(AttributeInfo):   272     pass   273    274 class LineNumberAttributeInfo(AttributeInfo):   275     def init(self, data, class_file):   276         self.class_file = class_file   277         self.attribute_length = u4(data[0:4])   278         self.line_number_table_length = u2(data[4:6])   279         self.line_number_table = []   280         data = data[6:]   281         for i in range(0, self.line_number_table_length):   282             line_number = LineNumberInfo()   283             data = line_number.init(data)   284             self.line_number_table.append(line_number)   285         return data   286    287 class LocalVariableAttributeInfo(AttributeInfo):   288     def init(self, data, class_file):   289         self.class_file = class_file   290         self.attribute_length = u4(data[0:4])   291         self.local_variable_table_length = u2(data[4:6])   292         self.local_variable_table = []   293         data = data[6:]   294         for i in range(0, self.local_variable_table_length):   295             local_variable = LocalVariableInfo()   296             data = local_variable.init(data)   297             self.local_variable_table.append(local_variable)   298         return data   299    300 class DeprecatedAttributeInfo(AttributeInfo):   301     pass   302    303 # Child classes of the attribute information classes.   304    305 class ExceptionInfo:   306     def init(self, data):   307         self.start_pc = u2(data[0:2])   308         self.end_pc = u2(data[2:4])   309         self.handler_pc = u2(data[4:6])   310         self.catch_type = u2(data[6:8])   311         return data[8:]   312    313 class InnerClassInfo(NameUtils):   314     def init(self, data, class_file):   315         self.class_file = class_file   316         self.inner_class_info_index = u2(data[0:2])   317         self.outer_class_info_index = u2(data[2:4])   318         # Permit the NameUtils mix-in.   319         self.name_index = self.inner_name_index = u2(data[4:6])   320         self.inner_class_access_flags = u2(data[6:8])   321         return data[8:]   322    323 class LineNumberInfo:   324     def init(self, data):   325         self.start_pc = u2(data[0:2])   326         self.line_number = u2(data[2:4])   327         return data[4:]   328    329 class LocalVariableInfo(NameUtils):   330     def init(self, data, class_file):   331         self.class_file = class_file   332         self.start_pc = u2(data[0:2])   333         self.length = u2(data[2:4])   334         self.name_index = u2(data[4:6])   335         self.descriptor_index = u2(data[6:8])   336         self.index = u2(data[8:10])   337         return data[10:]   338    339     def get_descriptor(self):   340         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   341    342 # Exceptions.   343    344 class UnknownTag(Exception):   345     pass   346    347 class UnknownAttribute(Exception):   348     pass   349    350 # Abstractions for the main structures.   351    352 class ClassFile:   353    354     "A class representing a Java class file."   355    356     def __init__(self, s):   357    358         """   359         Process the given string 's', populating the object with the class   360         file's details.   361         """   362    363         self.constants, s = self._get_constants(s[8:])   364         self.access_flags, s = self._get_access_flags(s)   365         self.this_class, s = self._get_this_class(s)   366         self.super_class, s = self._get_super_class(s)   367         self.interfaces, s = self._get_interfaces(s)   368         self.fields, s = self._get_fields(s)   369         self.methods, s = self._get_methods(s)   370         self.attributes, s = self._get_attributes(s)   371    372     def _decode_const(self, s):   373         tag = u1(s[0:1])   374         if tag == 1:   375             const = Utf8Info()   376         elif tag == 3:   377             const = IntegerInfo()   378         elif tag == 4:   379             const = FloatInfo()   380         elif tag == 5:   381             const = LongInfo()   382         elif tag == 6:   383             const = DoubleInfo()   384         elif tag == 7:   385             const = ClassInfo()   386         elif tag == 8:   387             const = StringInfo()   388         elif tag == 9:   389             const = FieldRefInfo()   390         elif tag == 10:   391             const = MethodRefInfo()   392         elif tag == 11:   393             const = InterfaceMethodRefInfo()   394         elif tag == 12:   395             const = NameAndTypeInfo()   396         else:   397             raise UnknownTag, tag   398    399         # Initialise the constant object.   400    401         s = const.init(s[1:], self)   402         return const, s   403    404     def _get_constants_from_table(self, count, s):   405         l = []   406         # Have to skip certain entries specially.   407         i = 1   408         while i < count:   409             c, s = self._decode_const(s)   410             l.append(c)   411             # Add a blank entry after "large" entries.   412             if isinstance(c, LargeNumInfo):   413                 l.append(None)   414                 i += 1   415             i += 1   416         return l, s   417    418     def _get_items_from_table(self, cls, number, s):   419         l = []   420         for i in range(0, number):   421             f = cls()   422             s = f.init(s, self)   423             l.append(f)   424         return l, s   425    426     def _get_methods_from_table(self, number, s):   427         return self._get_items_from_table(MethodInfo, number, s)   428    429     def _get_fields_from_table(self, number, s):   430         return self._get_items_from_table(FieldInfo, number, s)   431    432     def _get_attribute_from_table(self, s):   433         attribute_name_index = u2(s[0:2])   434         constant_name = self.constants[attribute_name_index - 1].bytes   435         if constant_name == "SourceFile":   436             attribute = SourceFileAttributeInfo()   437         elif constant_name == "ConstantValue":   438             attribute = ConstantValueAttributeInfo()   439         elif constant_name == "Code":   440             attribute = CodeAttributeInfo()   441         elif constant_name == "Exceptions":   442             attribute = ExceptionsAttributeInfo()   443         elif constant_name == "InnerClasses":   444             attribute = InnerClassesAttributeInfo()   445         elif constant_name == "Synthetic":   446             attribute = SyntheticAttributeInfo()   447         elif constant_name == "LineNumberTable":   448             attribute = LineNumberAttributeInfo()   449         elif constant_name == "LocalVariableTable":   450             attribute = LocalVariableAttributeInfo()   451         elif constant_name == "Deprecated":   452             attribute = DeprecatedAttributeInfo()   453         else:   454             raise UnknownAttribute, constant_name   455         s = attribute.init(s[2:], self)   456         return attribute, s   457    458     def _get_attributes_from_table(self, number, s):   459         attributes = []   460         for i in range(0, number):   461             attribute, s = self._get_attribute_from_table(s)   462             attributes.append(attribute)   463         return attributes, s   464    465     def _get_constants(self, s):   466         count = u2(s[0:2])   467         return self._get_constants_from_table(count, s[2:])   468    469     def _get_access_flags(self, s):   470         return u2(s[0:2]), s[2:]   471    472     def _get_this_class(self, s):   473         index = u2(s[0:2])   474         return self.constants[index - 1], s[2:]   475    476     _get_super_class = _get_this_class   477    478     def _get_interfaces(self, s):   479         interfaces = []   480         number = u2(s[0:2])   481         s = s[2:]   482         for i in range(0, number):   483             index = u2(s[0:2])   484             interfaces.append(self.constants[index - 1])   485             s = s[2:]   486         return interfaces, s   487    488     def _get_fields(self, s):   489         number = u2(s[0:2])   490         return self._get_fields_from_table(number, s[2:])   491    492     def _get_attributes(self, s):   493         number = u2(s[0:2])   494         return self._get_attributes_from_table(number, s[2:])   495    496     def _get_methods(self, s):   497         number = u2(s[0:2])   498         return self._get_methods_from_table(number, s[2:])   499    500 if __name__ == "__main__":   501     import sys   502     f = open(sys.argv[1])   503     c = ClassFile(f.read())   504    505 # vim: tabstop=4 expandtab shiftwidth=4