javaclass

classfile.py

6:a714f8355910
2004-11-07 Paul Boddie Added label-based jumping (rather than permitting only simple jumping forward to a single point). Added "lazy" dictionary support for the instruction mapping (from Java to Python bytecodes) along with a class which provides the "lazy" value for each case where the value is not known initially but is provided later. Added more bytecode translations and some Python bytecode sequence methods. Fixed the interpretation of various signed values.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 # Useful mix-ins.    34     35 class NameUtils:    36     def get_name(self):    37         if self.name_index != 0:    38             return unicode(self.class_file.constants[self.name_index - 1])    39         else:    40             # Some name indexes are zero to indicate special conditions.    41             return None    42     43 # Constant information.    44 # Objects of these classes are not directly aware of the class they reside in.    45     46 class ClassInfo(NameUtils):    47     def init(self, data, class_file):    48         self.class_file = class_file    49         self.name_index = u2(data[0:2])    50         return data[2:]    51     52 class RefInfo:    53     def init(self, data, class_file):    54         self.class_file = class_file    55         self.class_index = u2(data[0:2])    56         self.name_and_type_index = u2(data[2:4])    57         return data[4:]    58     59 class FieldRefInfo(RefInfo):    60     pass    61     62 class MethodRefInfo(RefInfo):    63     pass    64     65 class InterfaceMethodRefInfo(RefInfo):    66     pass    67     68 class NameAndTypeInfo(NameUtils):    69     def init(self, data, class_file):    70         self.class_file = class_file    71         self.name_index = u2(data[0:2])    72         self.descriptor_index = u2(data[2:4])    73         return data[4:]    74     75 class Utf8Info:    76     def init(self, data, class_file):    77         self.class_file = class_file    78         self.length = u2(data[0:2])    79         self.bytes = data[2:2+self.length]    80         return data[2+self.length:]    81     82     def __str__(self):    83         return self.bytes    84     85     def __unicode__(self):    86         return unicode(self.bytes, "utf-8")    87     88 class StringInfo:    89     def init(self, data, class_file):    90         self.class_file = class_file    91         self.string_index = u2(data[0:2])    92         return data[2:]    93     94 class SmallNumInfo:    95     def init(self, data, class_file):    96         self.class_file = class_file    97         self.bytes = data[0:4]    98         return data[4:]    99    100 class IntegerInfo(SmallNumInfo):   101     def get_value(self):   102         return s4(self.bytes)   103    104 class FloatInfo(SmallNumInfo):   105     def get_value(self):   106         return f4(self.bytes)   107    108 class LargeNumInfo:   109     def init(self, data, class_file):   110         self.class_file = class_file   111         self.high_bytes = u4(data[0:4])   112         self.low_bytes = u4(data[4:8])   113         return data[8:]   114    115 class LongInfo(LargeNumInfo):   116     def get_value(self):   117         return s8(self.high_bytes + self.low_bytes)   118    119 class DoubleInfo(LargeNumInfo):   120     def get_value(self):   121         return f8(self.high_bytes + self.low_bytes)   122    123 # Other information.   124 # Objects of these classes are generally aware of the class they reside in.   125    126 class ItemInfo(NameUtils):   127     def init(self, data, class_file):   128         self.class_file = class_file   129         self.access_flags = u2(data[0:2])   130         self.name_index = u2(data[2:4])   131         self.descriptor_index = u2(data[4:6])   132         self.attributes, data = self.class_file._get_attributes(data[6:])   133         return data   134    135     # Symbol parsing.   136    137     def _get_method_descriptor(self, s):   138         assert s[0] == "("   139         params = []   140         s = s[1:]   141         while s[0] != ")":   142             parameter_descriptor, s = self._get_parameter_descriptor(s)   143             params.append(parameter_descriptor)   144         if s[1] != "V":   145             return_type, s = self._get_field_type(s[1:])   146         else:   147             return_type, s = None, s[1:]   148         return params, return_type   149    150     def _get_parameter_descriptor(self, s):   151         return self._get_field_type(s)   152    153     def _get_field_descriptor(self, s):   154         return self._get_field_type(s)   155    156     def _get_component_type(self, s):   157         return self._get_field_type(s)   158    159     def _get_field_type(self, s):   160         base_type, s = self._get_base_type(s)   161         object_type = None   162         array_type = None   163         if base_type == "L":   164             object_type, s = self._get_object_type(s)   165         elif base_type == "[":   166             array_type, s = self._get_array_type(s)   167         return (base_type, object_type, array_type), s   168    169     def _get_base_type(self, s):   170         if len(s) > 0:   171             return s[0], s[1:]   172         else:   173             return None, s   174    175     def _get_object_type(self, s):   176         if len(s) > 0:   177             s_end = s.find(";")   178             assert s_end != -1   179             return s[:s_end], s[s_end+1:]   180         else:   181             return None, s   182    183     def _get_array_type(self, s):   184         if len(s) > 0:   185             return self._get_component_type(s)   186         else:   187             return None, s   188    189 class FieldInfo(ItemInfo):   190     def get_descriptor(self):   191         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   192    193 class MethodInfo(ItemInfo):   194     def get_descriptor(self):   195         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   196    197 class AttributeInfo:   198     def init(self, data, class_file):   199         self.attribute_length = u4(data[0:4])   200         self.info = data[4:4+self.attribute_length]   201         return data[4+self.attribute_length:]   202    203 # NOTE: Decode the different attribute formats.   204    205 class SourceFileAttributeInfo(AttributeInfo, NameUtils):   206     def init(self, data, class_file):   207         self.class_file = class_file   208         self.attribute_length = u4(data[0:4])   209         # Permit the NameUtils mix-in.   210         self.name_index = self.sourcefile_index = u2(data[4:6])   211    212 class ConstantValueAttributeInfo(AttributeInfo):   213     def init(self, data, class_file):   214         self.class_file = class_file   215         self.attribute_length = u4(data[0:4])   216         self.constant_value_index = u2(data[4:6])   217         assert 4+self.attribute_length == 6   218         return data[4+self.attribute_length:]   219    220     def get_value(self):   221         return self.class_file.constants[self.constant_value_index - 1].get_value()   222    223 class CodeAttributeInfo(AttributeInfo):   224     def init(self, data, class_file):   225         self.class_file = class_file   226         self.attribute_length = u4(data[0:4])   227         self.max_stack = u2(data[4:6])   228         self.max_locals = u2(data[6:8])   229         self.code_length = u4(data[8:12])   230         end_of_code = 12+self.code_length   231         self.code = data[12:end_of_code]   232         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   233         self.exception_table = []   234         data = data[end_of_code + 2:]   235         for i in range(0, self.exception_table_length):   236             exception = ExceptionInfo()   237             data = exception.init(data)   238             self.exception_table.append(exception)   239         self.attributes, data = self.class_file._get_attributes(data)   240         return data   241    242 class ExceptionsAttributeInfo(AttributeInfo):   243     def init(self, data, class_file):   244         self.class_file = class_file   245         self.attribute_length = u4(data[0:4])   246         self.number_of_exceptions = u2(data[4:6])   247         self.exception_index_table = []   248         index = 6   249         for i in range(0, self.number_of_exceptions):   250             self.exception_index_table.append(u2(data[index:index+2]))   251             index += 2   252         return data[index:]   253    254     def get_exception(self, i):   255         exception_index = self.exception_index_table[i]   256         return self.class_file.constants[exception_index - 1]   257    258 class InnerClassesAttributeInfo(AttributeInfo):   259     def init(self, data, class_file):   260         self.class_file = class_file   261         self.attribute_length = u4(data[0:4])   262         self.number_of_classes = u2(data[4:6])   263         self.classes = []   264         data = data[6:]   265         for i in range(0, self.number_of_classes):   266             inner_class = InnerClassInfo()   267             data = inner_class.init(data, self.class_file)   268             self.classes.append(inner_class)   269         return data   270    271 class SyntheticAttributeInfo(AttributeInfo):   272     pass   273    274 class LineNumberAttributeInfo(AttributeInfo):   275     def init(self, data, class_file):   276         self.class_file = class_file   277         self.attribute_length = u4(data[0:4])   278         self.line_number_table_length = u2(data[4:6])   279         self.line_number_table = []   280         data = data[6:]   281         for i in range(0, self.line_number_table_length):   282             line_number = LineNumberInfo()   283             data = line_number.init(data)   284             self.line_number_table.append(line_number)   285         return data   286    287 class LocalVariableAttributeInfo(AttributeInfo):   288     def init(self, data, class_file):   289         self.class_file = class_file   290         self.attribute_length = u4(data[0:4])   291         self.local_variable_table_length = u2(data[4:6])   292         self.local_variable_table = []   293         data = data[6:]   294         for i in range(0, self.local_variable_table_length):   295             local_variable = LocalVariableInfo()   296             data = local_variable.init(data)   297             self.local_variable_table.append(local_variable)   298         return data   299    300 class DeprecatedAttributeInfo(AttributeInfo):   301     pass   302    303 # Child classes of the attribute information classes.   304    305 class ExceptionInfo:   306     def init(self, data):   307         self.start_pc = u2(data[0:2])   308         self.end_pc = u2(data[2:4])   309         self.handler_pc = u2(data[4:6])   310         self.catch_type = u2(data[6:8])   311         return data[8:]   312    313 class InnerClassInfo(NameUtils):   314     def init(self, data, class_file):   315         self.class_file = class_file   316         self.inner_class_info_index = u2(data[0:2])   317         self.outer_class_info_index = u2(data[2:4])   318         # Permit the NameUtils mix-in.   319         self.name_index = self.inner_name_index = u2(data[4:6])   320         self.inner_class_access_flags = u2(data[6:8])   321         return data[8:]   322    323 class LineNumberInfo:   324     def init(self, data):   325         self.start_pc = u2(data[0:2])   326         self.line_number = u2(data[2:4])   327         return data[4:]   328    329 class LocalVariableInfo(NameUtils):   330     def init(self, data, class_file):   331         self.class_file = class_file   332         self.start_pc = u2(data[0:2])   333         self.length = u2(data[2:4])   334         self.name_index = u2(data[4:6])   335         self.descriptor_index = u2(data[6:8])   336         self.index = u2(data[8:10])   337         return data[10:]   338    339     def get_descriptor(self):   340         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   341    342 # Exceptions.   343    344 class UnknownTag(Exception):   345     pass   346    347 class UnknownAttribute(Exception):   348     pass   349    350 # Abstractions for the main structures.   351    352 class ClassFile:   353    354     "A class representing a Java class file."   355    356     def __init__(self, s):   357    358         """   359         Process the given string 's', populating the object with the class   360         file's details.   361         """   362    363         self.constants, s = self._get_constants(s[8:])   364         self.access_flags, s = self._get_access_flags(s)   365         self.this_class, s = self._get_this_class(s)   366         self.super_class, s = self._get_super_class(s)   367         self.interfaces, s = self._get_interfaces(s)   368         self.fields, s = self._get_fields(s)   369         self.methods, s = self._get_methods(s)   370         self.attributes, s = self._get_attributes(s)   371    372     def _decode_const(self, s):   373         tag = u1(s[0:1])   374         if tag == 1:   375             const = Utf8Info()   376         elif tag == 3:   377             const = IntegerInfo()   378         elif tag == 4:   379             const = FloatInfo()   380         elif tag == 5:   381             const = LongInfo()   382         elif tag == 6:   383             const = DoubleInfo()   384         elif tag == 7:   385             const = ClassInfo()   386         elif tag == 8:   387             const = StringInfo()   388         elif tag == 9:   389             const = FieldRefInfo()   390         elif tag == 10:   391             const = MethodRefInfo()   392         elif tag == 11:   393             const = InterfaceMethodRefInfo()   394         elif tag == 12:   395             const = NameAndTypeInfo()   396         else:   397             raise UnknownTag, tag   398    399         # Initialise the constant object.   400    401         s = const.init(s[1:], self)   402         return const, s   403    404     def _get_constants_from_table(self, count, s):   405         l = []   406         # Have to skip certain entries specially.   407         i = 1   408         while i < count:   409             c, s = self._decode_const(s)   410             l.append(c)   411             # Add a blank entry after "large" entries.   412             if isinstance(c, LargeNumInfo):   413                 l.append(None)   414                 i += 1   415             i += 1   416         return l, s   417    418     def _get_items_from_table(self, cls, number, s):   419         l = []   420         for i in range(0, number):   421             f = cls()   422             s = f.init(s, self)   423             l.append(f)   424         return l, s   425    426     def _get_methods_from_table(self, number, s):   427         return self._get_items_from_table(MethodInfo, number, s)   428    429     def _get_fields_from_table(self, number, s):   430         return self._get_items_from_table(FieldInfo, number, s)   431    432     def _get_attribute_from_table(self, s):   433         attribute_name_index = u2(s[0:2])   434         constant_name = self.constants[attribute_name_index - 1].bytes   435         if constant_name == "SourceFile":   436             attribute = SourceFileAttributeInfo()   437         elif constant_name == "ConstantValue":   438             attribute = ConstantValueAttributeInfo()   439         elif constant_name == "Code":   440             attribute = CodeAttributeInfo()   441         elif constant_name == "Exceptions":   442             attribute = ExceptionsAttributeInfo()   443         elif constant_name == "InnerClasses":   444             attribute = InnerClassesAttributeInfo()   445         elif constant_name == "Synthetic":   446             attribute = SyntheticAttributeInfo()   447         elif constant_name == "LineNumberTable":   448             attribute = LineNumberAttributeInfo()   449         elif constant_name == "LocalVariableTable":   450             attribute = LocalVariableAttributeInfo()   451         elif constant_name == "Deprecated":   452             attribute = DeprecatedAttributeInfo()   453         else:   454             raise UnknownAttribute, constant_name   455         s = attribute.init(s[2:], self)   456         return attribute, s   457    458     def _get_attributes_from_table(self, number, s):   459         attributes = []   460         for i in range(0, number):   461             attribute, s = self._get_attribute_from_table(s)   462             attributes.append(attribute)   463         return attributes, s   464    465     def _get_constants(self, s):   466         count = u2(s[0:2])   467         return self._get_constants_from_table(count, s[2:])   468    469     def _get_access_flags(self, s):   470         return u2(s[0:2]), s[2:]   471    472     def _get_this_class(self, s):   473         index = u2(s[0:2])   474         return self.constants[index - 1], s[2:]   475    476     _get_super_class = _get_this_class   477    478     def _get_interfaces(self, s):   479         interfaces = []   480         number = u2(s[0:2])   481         s = s[2:]   482         for i in range(0, number):   483             index = u2(s[0:2])   484             interfaces.append(self.constants[index - 1])   485             s = s[2:]   486         return interfaces, s   487    488     def _get_fields(self, s):   489         number = u2(s[0:2])   490         return self._get_fields_from_table(number, s[2:])   491    492     def _get_attributes(self, s):   493         number = u2(s[0:2])   494         return self._get_attributes_from_table(number, s[2:])   495    496     def _get_methods(self, s):   497         number = u2(s[0:2])   498         return self._get_methods_from_table(number, s[2:])   499    500 if __name__ == "__main__":   501     import sys   502     f = open(sys.argv[1])   503     c = ClassFile(f.read())   504    505 # vim: tabstop=4 expandtab shiftwidth=4