javaclass

classfile.py

3:ba40e828e2eb
2004-10-28 Paul Boddie Fixed array descriptor handling.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 # Useful mix-ins.    22     23 class NameUtils:    24     def get_name(self):    25         if self.name_index != 0:    26             return unicode(self.class_file.constants[self.name_index - 1])    27         else:    28             # Some name indexes are zero to indicate special conditions.    29             return None    30     31 # Constant information.    32 # Objects of these classes are not directly aware of the class they reside in.    33     34 class ClassInfo(NameUtils):    35     def init(self, data, class_file):    36         self.class_file = class_file    37         self.name_index = u2(data[0:2])    38         return data[2:]    39     40 class RefInfo:    41     def init(self, data, class_file):    42         self.class_file = class_file    43         self.class_index = u2(data[0:2])    44         self.name_and_type_index = u2(data[2:4])    45         return data[4:]    46     47 class FieldRefInfo(RefInfo):    48     pass    49     50 class MethodRefInfo(RefInfo):    51     pass    52     53 class InterfaceMethodRefInfo(RefInfo):    54     pass    55     56 class NameAndTypeInfo(NameUtils):    57     def init(self, data, class_file):    58         self.class_file = class_file    59         self.name_index = u2(data[0:2])    60         self.descriptor_index = u2(data[2:4])    61         return data[4:]    62     63 class Utf8Info:    64     def init(self, data, class_file):    65         self.class_file = class_file    66         self.length = u2(data[0:2])    67         self.bytes = data[2:2+self.length]    68         return data[2+self.length:]    69     70     def __str__(self):    71         return self.bytes    72     73     def __unicode__(self):    74         return unicode(self.bytes, "utf-8")    75     76 class StringInfo:    77     def init(self, data, class_file):    78         self.class_file = class_file    79         self.string_index = u2(data[0:2])    80         return data[2:]    81     82 class SmallNumInfo:    83     def init(self, data, class_file):    84         self.class_file = class_file    85         self.bytes = u4(data[0:4])    86         return data[4:]    87     88 class IntegerInfo(SmallNumInfo):    89     pass    90     91 class FloatInfo(SmallNumInfo):    92     pass    93     94 class LargeNumInfo:    95     def init(self, data, class_file):    96         self.class_file = class_file    97         self.high_bytes = u4(data[0:4])    98         self.low_bytes = u4(data[4:8])    99         return data[8:]   100    101 class LongInfo(LargeNumInfo):   102     pass   103    104 class DoubleInfo(LargeNumInfo):   105     pass   106    107 # Other information.   108 # Objects of these classes are generally aware of the class they reside in.   109    110 class ItemInfo(NameUtils):   111     def init(self, data, class_file):   112         self.class_file = class_file   113         self.access_flags = u2(data[0:2])   114         self.name_index = u2(data[2:4])   115         self.descriptor_index = u2(data[4:6])   116         self.attributes, data = self.class_file._get_attributes(data[6:])   117         return data   118    119     # Symbol parsing.   120    121     def _get_method_descriptor(self, s):   122         assert s[0] == "("   123         params = []   124         s = s[1:]   125         while s[0] != ")":   126             parameter_descriptor, s = self._get_parameter_descriptor(s)   127             params.append(parameter_descriptor)   128         if s[1] != "V":   129             return_type, s = self._get_field_type(s[1:])   130         else:   131             return_type, s = None, s[1:]   132         return params, return_type   133    134     def _get_parameter_descriptor(self, s):   135         return self._get_field_type(s)   136    137     def _get_field_descriptor(self, s):   138         return self._get_field_type(s)   139    140     def _get_component_type(self, s):   141         return self._get_field_type(s)   142    143     def _get_field_type(self, s):   144         base_type, s = self._get_base_type(s)   145         object_type = None   146         array_type = None   147         if base_type == "L":   148             object_type, s = self._get_object_type(s)   149         elif base_type == "[":   150             array_type, s = self._get_array_type(s)   151         return (base_type, object_type, array_type), s   152    153     def _get_base_type(self, s):   154         if len(s) > 0:   155             return s[0], s[1:]   156         else:   157             return None, s   158    159     def _get_object_type(self, s):   160         if len(s) > 0:   161             s_end = s.find(";")   162             assert s_end != -1   163             return s[:s_end], s[s_end+1:]   164         else:   165             return None, s   166    167     def _get_array_type(self, s):   168         if len(s) > 0:   169             return self._get_component_type(s)   170         else:   171             return None, s   172    173 class FieldInfo(ItemInfo):   174     def get_descriptor(self):   175         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   176    177 class MethodInfo(ItemInfo):   178     def get_descriptor(self):   179         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   180    181 class AttributeInfo:   182     def init(self, data, class_file):   183         self.attribute_length = u4(data[0:4])   184         self.info = data[4:4+self.attribute_length]   185         return data[4+self.attribute_length:]   186    187 # NOTE: Decode the different attribute formats.   188    189 class SourceFileAttributeInfo(AttributeInfo, NameUtils):   190     def init(self, data, class_file):   191         self.class_file = class_file   192         self.attribute_length = u4(data[0:4])   193         # Permit the NameUtils mix-in.   194         self.name_index = self.sourcefile_index = u2(data[4:6])   195    196 class ConstantValueAttributeInfo(AttributeInfo):   197     def init(self, data, class_file):   198         self.attribute_length = u4(data[0:4])   199         self.constant_value_index = u2(data[4:6])   200         assert 4+self.attribute_length == 6   201         return data[4+self.attribute_length:]   202    203 class CodeAttributeInfo(AttributeInfo):   204     def init(self, data, class_file):   205         self.class_file = class_file   206         self.attribute_length = u4(data[0:4])   207         self.max_stack = u2(data[4:6])   208         self.max_locals = u2(data[6:8])   209         self.code_length = u4(data[8:12])   210         end_of_code = 12+self.code_length   211         self.code = data[12:end_of_code]   212         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   213         self.exception_table = []   214         data = data[end_of_code + 2:]   215         for i in range(0, self.exception_table_length):   216             exception = ExceptionInfo()   217             data = exception.init(data)   218             self.exception_table.append(exception)   219         self.attributes, data = self.class_file._get_attributes(data)   220         return data   221    222 class ExceptionsAttributeInfo(AttributeInfo):   223     def init(self, data, class_file):   224         self.class_file = class_file   225         self.attribute_length = u4(data[0:4])   226         self.number_of_exceptions = u2(data[4:6])   227         self.exception_index_table = []   228         index = 6   229         for i in range(0, self.number_of_exceptions):   230             self.exception_index_table.append(u2(data[index:index+2]))   231             index += 2   232         return data[index:]   233    234     def get_exception(self, i):   235         exception_index = self.exception_index_table[i]   236         return self.class_file.constants[exception_index - 1]   237    238 class InnerClassesAttributeInfo(AttributeInfo):   239     def init(self, data, class_file):   240         self.class_file = class_file   241         self.attribute_length = u4(data[0:4])   242         self.number_of_classes = u2(data[4:6])   243         self.classes = []   244         data = data[6:]   245         for i in range(0, self.number_of_classes):   246             inner_class = InnerClassInfo()   247             data = inner_class.init(data, self.class_file)   248             self.classes.append(inner_class)   249         return data   250    251 class SyntheticAttributeInfo(AttributeInfo):   252     pass   253    254 class LineNumberAttributeInfo(AttributeInfo):   255     def init(self, data, class_file):   256         self.class_file = class_file   257         self.attribute_length = u4(data[0:4])   258         self.line_number_table_length = u2(data[4:6])   259         self.line_number_table = []   260         data = data[6:]   261         for i in range(0, self.line_number_table_length):   262             line_number = LineNumberInfo()   263             data = line_number.init(data)   264             self.line_number_table.append(line_number)   265         return data   266    267 class LocalVariableAttributeInfo(AttributeInfo):   268     def init(self, data, class_file):   269         self.class_file = class_file   270         self.attribute_length = u4(data[0:4])   271         self.local_variable_table_length = u2(data[4:6])   272         self.local_variable_table = []   273         data = data[6:]   274         for i in range(0, self.local_variable_table_length):   275             local_variable = LocalVariableInfo()   276             data = local_variable.init(data)   277             self.local_variable_table.append(local_variable)   278         return data   279    280 class DeprecatedAttributeInfo(AttributeInfo):   281     pass   282    283 # Child classes of the attribute information classes.   284    285 class ExceptionInfo:   286     def init(self, data):   287         self.start_pc = u2(data[0:2])   288         self.end_pc = u2(data[2:4])   289         self.handler_pc = u2(data[4:6])   290         self.catch_type = u2(data[6:8])   291         return data[8:]   292    293 class InnerClassInfo(NameUtils):   294     def init(self, data, class_file):   295         self.class_file = class_file   296         self.inner_class_info_index = u2(data[0:2])   297         self.outer_class_info_index = u2(data[2:4])   298         # Permit the NameUtils mix-in.   299         self.name_index = self.inner_name_index = u2(data[4:6])   300         self.inner_class_access_flags = u2(data[6:8])   301         return data[8:]   302    303 class LineNumberInfo:   304     def init(self, data):   305         self.start_pc = u2(data[0:2])   306         self.line_number = u2(data[2:4])   307         return data[4:]   308    309 class LocalVariableInfo(NameUtils):   310     def init(self, data, class_file):   311         self.class_file = class_file   312         self.start_pc = u2(data[0:2])   313         self.length = u2(data[2:4])   314         self.name_index = u2(data[4:6])   315         self.descriptor_index = u2(data[6:8])   316         self.index = u2(data[8:10])   317         return data[10:]   318    319     def get_descriptor(self):   320         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   321    322 # Exceptions.   323    324 class UnknownTag(Exception):   325     pass   326    327 class UnknownAttribute(Exception):   328     pass   329    330 # Abstractions for the main structures.   331    332 class ClassFile:   333    334     "A class representing a Java class file."   335    336     def __init__(self, s):   337    338         """   339         Process the given string 's', populating the object with the class   340         file's details.   341         """   342    343         self.constants, s = self._get_constants(s[8:])   344         self.access_flags, s = self._get_access_flags(s)   345         self.this_class, s = self._get_this_class(s)   346         self.super_class, s = self._get_super_class(s)   347         self.interfaces, s = self._get_interfaces(s)   348         self.fields, s = self._get_fields(s)   349         self.methods, s = self._get_methods(s)   350         self.attributes, s = self._get_attributes(s)   351    352     def _decode_const(self, s):   353         tag = u1(s[0:1])   354         if tag == 1:   355             const = Utf8Info()   356         elif tag == 3:   357             const = IntegerInfo()   358         elif tag == 4:   359             const = FloatInfo()   360         elif tag == 5:   361             const = LongInfo()   362         elif tag == 6:   363             const = DoubleInfo()   364         elif tag == 7:   365             const = ClassInfo()   366         elif tag == 8:   367             const = StringInfo()   368         elif tag == 9:   369             const = FieldRefInfo()   370         elif tag == 10:   371             const = MethodRefInfo()   372         elif tag == 11:   373             const = InterfaceMethodRefInfo()   374         elif tag == 12:   375             const = NameAndTypeInfo()   376         else:   377             raise UnknownTag, tag   378    379         # Initialise the constant object.   380    381         s = const.init(s[1:], self)   382         return const, s   383    384     def _get_constants_from_table(self, count, s):   385         l = []   386         # Have to skip certain entries specially.   387         i = 1   388         while i < count:   389             c, s = self._decode_const(s)   390             l.append(c)   391             # Add a blank entry after "large" entries.   392             if isinstance(c, LargeNumInfo):   393                 l.append(None)   394                 i += 1   395             i += 1   396         return l, s   397    398     def _get_items_from_table(self, cls, number, s):   399         l = []   400         for i in range(0, number):   401             f = cls()   402             s = f.init(s, self)   403             l.append(f)   404         return l, s   405    406     def _get_methods_from_table(self, number, s):   407         return self._get_items_from_table(MethodInfo, number, s)   408    409     def _get_fields_from_table(self, number, s):   410         return self._get_items_from_table(FieldInfo, number, s)   411    412     def _get_attribute_from_table(self, s):   413         attribute_name_index = u2(s[0:2])   414         constant_name = self.constants[attribute_name_index - 1].bytes   415         if constant_name == "SourceFile":   416             attribute = SourceFileAttributeInfo()   417         elif constant_name == "ConstantValue":   418             attribute = ConstantValueAttributeInfo()   419         elif constant_name == "Code":   420             attribute = CodeAttributeInfo()   421         elif constant_name == "Exceptions":   422             attribute = ExceptionsAttributeInfo()   423         elif constant_name == "InnerClasses":   424             attribute = InnerClassesAttributeInfo()   425         elif constant_name == "Synthetic":   426             attribute = SyntheticAttributeInfo()   427         elif constant_name == "LineNumberTable":   428             attribute = LineNumberAttributeInfo()   429         elif constant_name == "LocalVariableTable":   430             attribute = LocalVariableAttributeInfo()   431         elif constant_name == "Deprecated":   432             attribute = DeprecatedAttributeInfo()   433         else:   434             raise UnknownAttribute, constant_name   435         s = attribute.init(s[2:], self)   436         return attribute, s   437    438     def _get_attributes_from_table(self, number, s):   439         attributes = []   440         for i in range(0, number):   441             attribute, s = self._get_attribute_from_table(s)   442             attributes.append(attribute)   443         return attributes, s   444    445     def _get_constants(self, s):   446         count = u2(s[0:2])   447         return self._get_constants_from_table(count, s[2:])   448    449     def _get_access_flags(self, s):   450         return u2(s[0:2]), s[2:]   451    452     def _get_this_class(self, s):   453         index = u2(s[0:2])   454         return self.constants[index - 1], s[2:]   455    456     _get_super_class = _get_this_class   457    458     def _get_interfaces(self, s):   459         interfaces = []   460         number = u2(s[0:2])   461         s = s[2:]   462         for i in range(0, number):   463             index = u2(s[0:2])   464             interfaces.append(self.constants[index - 1])   465             s = s[2:]   466         return interfaces, s   467    468     def _get_fields(self, s):   469         number = u2(s[0:2])   470         return self._get_fields_from_table(number, s[2:])   471    472     def _get_attributes(self, s):   473         number = u2(s[0:2])   474         return self._get_attributes_from_table(number, s[2:])   475    476     def _get_methods(self, s):   477         number = u2(s[0:2])   478         return self._get_methods_from_table(number, s[2:])   479    480 if __name__ == "__main__":   481     import sys   482     f = open(sys.argv[1])   483     c = ClassFile(f.read())   484    485 # vim: tabstop=4 expandtab shiftwidth=4