javaclass

classfile.py

24:7121c7af1076
2004-11-10 Paul Boddie Added exception type checking.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 # Useful mix-ins.    34     35 class PythonNameUtils:    36     def get_python_name(self):    37         name = self.get_name()    38         if str(name) == "<init>":    39             return "__init__"    40         else:    41             return name    42     43 class NameUtils(PythonNameUtils):    44     def get_name(self):    45         if self.name_index != 0:    46             return self.class_file.constants[self.name_index - 1]    47         else:    48             # Some name indexes are zero to indicate special conditions.    49             return None    50     51 class NameAndTypeUtils(PythonNameUtils):    52     def get_name(self):    53         if self.name_and_type_index != 0:    54             return self.class_file.constants[self.name_and_type_index - 1].get_name()    55         else:    56             # Some name indexes are zero to indicate special conditions.    57             return None    58     59     def get_field_descriptor(self):    60         if self.name_and_type_index != 0:    61             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()    62         else:    63             # Some name indexes are zero to indicate special conditions.    64             return None    65     66     def get_method_descriptor(self):    67         if self.name_and_type_index != 0:    68             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()    69         else:    70             # Some name indexes are zero to indicate special conditions.    71             return None    72     73 class DescriptorUtils:    74     75     "Symbol parsing."    76     77     def _get_method_descriptor(self, s):    78         assert s[0] == "("    79         params = []    80         s = s[1:]    81         while s[0] != ")":    82             parameter_descriptor, s = self._get_parameter_descriptor(s)    83             params.append(parameter_descriptor)    84         if s[1] != "V":    85             return_type, s = self._get_field_type(s[1:])    86         else:    87             return_type, s = None, s[1:]    88         return params, return_type    89     90     def _get_parameter_descriptor(self, s):    91         return self._get_field_type(s)    92     93     def _get_field_descriptor(self, s):    94         return self._get_field_type(s)    95     96     def _get_component_type(self, s):    97         return self._get_field_type(s)    98     99     def _get_field_type(self, s):   100         base_type, s = self._get_base_type(s)   101         object_type = None   102         array_type = None   103         if base_type == "L":   104             object_type, s = self._get_object_type(s)   105         elif base_type == "[":   106             array_type, s = self._get_array_type(s)   107         return (base_type, object_type, array_type), s   108    109     def _get_base_type(self, s):   110         if len(s) > 0:   111             return s[0], s[1:]   112         else:   113             return None, s   114    115     def _get_object_type(self, s):   116         if len(s) > 0:   117             s_end = s.find(";")   118             assert s_end != -1   119             return s[:s_end], s[s_end+1:]   120         else:   121             return None, s   122    123     def _get_array_type(self, s):   124         if len(s) > 0:   125             return self._get_component_type(s)   126         else:   127             return None, s   128    129 # Constant information.   130 # Objects of these classes are not directly aware of the class they reside in.   131    132 class ClassInfo(NameUtils):   133     def init(self, data, class_file):   134         self.class_file = class_file   135         self.name_index = u2(data[0:2])   136         return data[2:]   137    138 class RefInfo(NameAndTypeUtils):   139     def init(self, data, class_file):   140         self.class_file = class_file   141         self.class_index = u2(data[0:2])   142         self.name_and_type_index = u2(data[2:4])   143         return data[4:]   144    145 class FieldRefInfo(RefInfo):   146     def get_descriptor(self):   147         return RefInfo.get_field_descriptor(self)   148    149 class MethodRefInfo(RefInfo):   150     def get_descriptor(self):   151         return RefInfo.get_method_descriptor(self)   152    153 class InterfaceMethodRefInfo(RefInfo):   154     def get_descriptor(self):   155         return RefInfo.get_method_descriptor(self)   156    157 class NameAndTypeInfo(NameUtils, DescriptorUtils):   158     def init(self, data, class_file):   159         self.class_file = class_file   160         self.name_index = u2(data[0:2])   161         self.descriptor_index = u2(data[2:4])   162         return data[4:]   163    164     def get_field_descriptor(self):   165         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   166    167     def get_method_descriptor(self):   168         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   169    170 class Utf8Info:   171     def init(self, data, class_file):   172         self.class_file = class_file   173         self.length = u2(data[0:2])   174         self.bytes = data[2:2+self.length]   175         return data[2+self.length:]   176    177     def __str__(self):   178         return self.bytes   179    180     def __unicode__(self):   181         return unicode(self.bytes, "utf-8")   182    183 class StringInfo:   184     def init(self, data, class_file):   185         self.class_file = class_file   186         self.string_index = u2(data[0:2])   187         return data[2:]   188    189 class SmallNumInfo:   190     def init(self, data, class_file):   191         self.class_file = class_file   192         self.bytes = data[0:4]   193         return data[4:]   194    195 class IntegerInfo(SmallNumInfo):   196     def get_value(self):   197         return s4(self.bytes)   198    199 class FloatInfo(SmallNumInfo):   200     def get_value(self):   201         return f4(self.bytes)   202    203 class LargeNumInfo:   204     def init(self, data, class_file):   205         self.class_file = class_file   206         self.high_bytes = u4(data[0:4])   207         self.low_bytes = u4(data[4:8])   208         return data[8:]   209    210 class LongInfo(LargeNumInfo):   211     def get_value(self):   212         return s8(self.high_bytes + self.low_bytes)   213    214 class DoubleInfo(LargeNumInfo):   215     def get_value(self):   216         return f8(self.high_bytes + self.low_bytes)   217    218 # Other information.   219 # Objects of these classes are generally aware of the class they reside in.   220    221 class ItemInfo(NameUtils, DescriptorUtils):   222     def init(self, data, class_file):   223         self.class_file = class_file   224         self.access_flags = u2(data[0:2])   225         self.name_index = u2(data[2:4])   226         self.descriptor_index = u2(data[4:6])   227         self.attributes, data = self.class_file._get_attributes(data[6:])   228         return data   229    230 class FieldInfo(ItemInfo):   231     def get_descriptor(self):   232         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   233    234 class MethodInfo(ItemInfo):   235     def get_descriptor(self):   236         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   237    238 class AttributeInfo:   239     def init(self, data, class_file):   240         self.attribute_length = u4(data[0:4])   241         self.info = data[4:4+self.attribute_length]   242         return data[4+self.attribute_length:]   243    244 # NOTE: Decode the different attribute formats.   245    246 class SourceFileAttributeInfo(AttributeInfo, NameUtils):   247     def init(self, data, class_file):   248         self.class_file = class_file   249         self.attribute_length = u4(data[0:4])   250         # Permit the NameUtils mix-in.   251         self.name_index = self.sourcefile_index = u2(data[4:6])   252    253 class ConstantValueAttributeInfo(AttributeInfo):   254     def init(self, data, class_file):   255         self.class_file = class_file   256         self.attribute_length = u4(data[0:4])   257         self.constant_value_index = u2(data[4:6])   258         assert 4+self.attribute_length == 6   259         return data[4+self.attribute_length:]   260    261     def get_value(self):   262         return self.class_file.constants[self.constant_value_index - 1].get_value()   263    264 class CodeAttributeInfo(AttributeInfo):   265     def init(self, data, class_file):   266         self.class_file = class_file   267         self.attribute_length = u4(data[0:4])   268         self.max_stack = u2(data[4:6])   269         self.max_locals = u2(data[6:8])   270         self.code_length = u4(data[8:12])   271         end_of_code = 12+self.code_length   272         self.code = data[12:end_of_code]   273         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   274         self.exception_table = []   275         data = data[end_of_code + 2:]   276         for i in range(0, self.exception_table_length):   277             exception = ExceptionInfo()   278             data = exception.init(data)   279             self.exception_table.append(exception)   280         self.attributes, data = self.class_file._get_attributes(data)   281         return data   282    283 class ExceptionsAttributeInfo(AttributeInfo):   284     def init(self, data, class_file):   285         self.class_file = class_file   286         self.attribute_length = u4(data[0:4])   287         self.number_of_exceptions = u2(data[4:6])   288         self.exception_index_table = []   289         index = 6   290         for i in range(0, self.number_of_exceptions):   291             self.exception_index_table.append(u2(data[index:index+2]))   292             index += 2   293         return data[index:]   294    295     def get_exception(self, i):   296         exception_index = self.exception_index_table[i]   297         return self.class_file.constants[exception_index - 1]   298    299 class InnerClassesAttributeInfo(AttributeInfo):   300     def init(self, data, class_file):   301         self.class_file = class_file   302         self.attribute_length = u4(data[0:4])   303         self.number_of_classes = u2(data[4:6])   304         self.classes = []   305         data = data[6:]   306         for i in range(0, self.number_of_classes):   307             inner_class = InnerClassInfo()   308             data = inner_class.init(data, self.class_file)   309             self.classes.append(inner_class)   310         return data   311    312 class SyntheticAttributeInfo(AttributeInfo):   313     pass   314    315 class LineNumberAttributeInfo(AttributeInfo):   316     def init(self, data, class_file):   317         self.class_file = class_file   318         self.attribute_length = u4(data[0:4])   319         self.line_number_table_length = u2(data[4:6])   320         self.line_number_table = []   321         data = data[6:]   322         for i in range(0, self.line_number_table_length):   323             line_number = LineNumberInfo()   324             data = line_number.init(data)   325             self.line_number_table.append(line_number)   326         return data   327    328 class LocalVariableAttributeInfo(AttributeInfo):   329     def init(self, data, class_file):   330         self.class_file = class_file   331         self.attribute_length = u4(data[0:4])   332         self.local_variable_table_length = u2(data[4:6])   333         self.local_variable_table = []   334         data = data[6:]   335         for i in range(0, self.local_variable_table_length):   336             local_variable = LocalVariableInfo()   337             data = local_variable.init(data)   338             self.local_variable_table.append(local_variable)   339         return data   340    341 class DeprecatedAttributeInfo(AttributeInfo):   342     pass   343    344 # Child classes of the attribute information classes.   345    346 class ExceptionInfo:   347     def init(self, data):   348         self.start_pc = u2(data[0:2])   349         self.end_pc = u2(data[2:4])   350         self.handler_pc = u2(data[4:6])   351         self.catch_type = u2(data[6:8])   352         return data[8:]   353    354 class InnerClassInfo(NameUtils):   355     def init(self, data, class_file):   356         self.class_file = class_file   357         self.inner_class_info_index = u2(data[0:2])   358         self.outer_class_info_index = u2(data[2:4])   359         # Permit the NameUtils mix-in.   360         self.name_index = self.inner_name_index = u2(data[4:6])   361         self.inner_class_access_flags = u2(data[6:8])   362         return data[8:]   363    364 class LineNumberInfo:   365     def init(self, data):   366         self.start_pc = u2(data[0:2])   367         self.line_number = u2(data[2:4])   368         return data[4:]   369    370 class LocalVariableInfo(NameUtils):   371     def init(self, data, class_file):   372         self.class_file = class_file   373         self.start_pc = u2(data[0:2])   374         self.length = u2(data[2:4])   375         self.name_index = u2(data[4:6])   376         self.descriptor_index = u2(data[6:8])   377         self.index = u2(data[8:10])   378         return data[10:]   379    380     def get_descriptor(self):   381         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   382    383 # Exceptions.   384    385 class UnknownTag(Exception):   386     pass   387    388 class UnknownAttribute(Exception):   389     pass   390    391 # Abstractions for the main structures.   392    393 class ClassFile:   394    395     "A class representing a Java class file."   396    397     def __init__(self, s):   398    399         """   400         Process the given string 's', populating the object with the class   401         file's details.   402         """   403    404         self.constants, s = self._get_constants(s[8:])   405         self.access_flags, s = self._get_access_flags(s)   406         self.this_class, s = self._get_this_class(s)   407         self.super_class, s = self._get_super_class(s)   408         self.interfaces, s = self._get_interfaces(s)   409         self.fields, s = self._get_fields(s)   410         self.methods, s = self._get_methods(s)   411         self.attributes, s = self._get_attributes(s)   412    413     def _decode_const(self, s):   414         tag = u1(s[0:1])   415         if tag == 1:   416             const = Utf8Info()   417         elif tag == 3:   418             const = IntegerInfo()   419         elif tag == 4:   420             const = FloatInfo()   421         elif tag == 5:   422             const = LongInfo()   423         elif tag == 6:   424             const = DoubleInfo()   425         elif tag == 7:   426             const = ClassInfo()   427         elif tag == 8:   428             const = StringInfo()   429         elif tag == 9:   430             const = FieldRefInfo()   431         elif tag == 10:   432             const = MethodRefInfo()   433         elif tag == 11:   434             const = InterfaceMethodRefInfo()   435         elif tag == 12:   436             const = NameAndTypeInfo()   437         else:   438             raise UnknownTag, tag   439    440         # Initialise the constant object.   441    442         s = const.init(s[1:], self)   443         return const, s   444    445     def _get_constants_from_table(self, count, s):   446         l = []   447         # Have to skip certain entries specially.   448         i = 1   449         while i < count:   450             c, s = self._decode_const(s)   451             l.append(c)   452             # Add a blank entry after "large" entries.   453             if isinstance(c, LargeNumInfo):   454                 l.append(None)   455                 i += 1   456             i += 1   457         return l, s   458    459     def _get_items_from_table(self, cls, number, s):   460         l = []   461         for i in range(0, number):   462             f = cls()   463             s = f.init(s, self)   464             l.append(f)   465         return l, s   466    467     def _get_methods_from_table(self, number, s):   468         return self._get_items_from_table(MethodInfo, number, s)   469    470     def _get_fields_from_table(self, number, s):   471         return self._get_items_from_table(FieldInfo, number, s)   472    473     def _get_attribute_from_table(self, s):   474         attribute_name_index = u2(s[0:2])   475         constant_name = self.constants[attribute_name_index - 1].bytes   476         if constant_name == "SourceFile":   477             attribute = SourceFileAttributeInfo()   478         elif constant_name == "ConstantValue":   479             attribute = ConstantValueAttributeInfo()   480         elif constant_name == "Code":   481             attribute = CodeAttributeInfo()   482         elif constant_name == "Exceptions":   483             attribute = ExceptionsAttributeInfo()   484         elif constant_name == "InnerClasses":   485             attribute = InnerClassesAttributeInfo()   486         elif constant_name == "Synthetic":   487             attribute = SyntheticAttributeInfo()   488         elif constant_name == "LineNumberTable":   489             attribute = LineNumberAttributeInfo()   490         elif constant_name == "LocalVariableTable":   491             attribute = LocalVariableAttributeInfo()   492         elif constant_name == "Deprecated":   493             attribute = DeprecatedAttributeInfo()   494         else:   495             raise UnknownAttribute, constant_name   496         s = attribute.init(s[2:], self)   497         return attribute, s   498    499     def _get_attributes_from_table(self, number, s):   500         attributes = []   501         for i in range(0, number):   502             attribute, s = self._get_attribute_from_table(s)   503             attributes.append(attribute)   504         return attributes, s   505    506     def _get_constants(self, s):   507         count = u2(s[0:2])   508         return self._get_constants_from_table(count, s[2:])   509    510     def _get_access_flags(self, s):   511         return u2(s[0:2]), s[2:]   512    513     def _get_this_class(self, s):   514         index = u2(s[0:2])   515         return self.constants[index - 1], s[2:]   516    517     _get_super_class = _get_this_class   518    519     def _get_interfaces(self, s):   520         interfaces = []   521         number = u2(s[0:2])   522         s = s[2:]   523         for i in range(0, number):   524             index = u2(s[0:2])   525             interfaces.append(self.constants[index - 1])   526             s = s[2:]   527         return interfaces, s   528    529     def _get_fields(self, s):   530         number = u2(s[0:2])   531         return self._get_fields_from_table(number, s[2:])   532    533     def _get_attributes(self, s):   534         number = u2(s[0:2])   535         return self._get_attributes_from_table(number, s[2:])   536    537     def _get_methods(self, s):   538         number = u2(s[0:2])   539         return self._get_methods_from_table(number, s[2:])   540    541 if __name__ == "__main__":   542     import sys   543     f = open(sys.argv[1])   544     c = ClassFile(f.read())   545    546 # vim: tabstop=4 expandtab shiftwidth=4