javaclass

classfile.py

2:928489775f5d
2004-10-25 Paul Boddie Fixed exception table population for CodeAttributeInfo. Added specific support in other AttributeInfo classes. Introduced the class file object into constant objects so that they may more directly support get_name and other convenience methods.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 # Useful mix-ins.    22     23 class NameUtils:    24     def get_name(self):    25         if self.name_index != 0:    26             return unicode(self.class_file.constants[self.name_index - 1])    27         else:    28             # Some name indexes are zero to indicate special conditions.    29             return None    30     31 # Constant information.    32 # Objects of these classes are not directly aware of the class they reside in.    33     34 class ClassInfo(NameUtils):    35     def init(self, data, class_file):    36         self.class_file = class_file    37         self.name_index = u2(data[0:2])    38         return data[2:]    39     40 class RefInfo:    41     def init(self, data, class_file):    42         self.class_file = class_file    43         self.class_index = u2(data[0:2])    44         self.name_and_type_index = u2(data[2:4])    45         return data[4:]    46     47 class FieldRefInfo(RefInfo):    48     pass    49     50 class MethodRefInfo(RefInfo):    51     pass    52     53 class InterfaceMethodRefInfo(RefInfo):    54     pass    55     56 class NameAndTypeInfo(NameUtils):    57     def init(self, data, class_file):    58         self.class_file = class_file    59         self.name_index = u2(data[0:2])    60         self.descriptor_index = u2(data[2:4])    61         return data[4:]    62     63 class Utf8Info:    64     def init(self, data, class_file):    65         self.class_file = class_file    66         self.length = u2(data[0:2])    67         self.bytes = data[2:2+self.length]    68         return data[2+self.length:]    69     70     def __str__(self):    71         return self.bytes    72     73     def __unicode__(self):    74         return unicode(self.bytes, "utf-8")    75     76 class StringInfo:    77     def init(self, data, class_file):    78         self.class_file = class_file    79         self.string_index = u2(data[0:2])    80         return data[2:]    81     82 class SmallNumInfo:    83     def init(self, data, class_file):    84         self.class_file = class_file    85         self.bytes = u4(data[0:4])    86         return data[4:]    87     88 class IntegerInfo(SmallNumInfo):    89     pass    90     91 class FloatInfo(SmallNumInfo):    92     pass    93     94 class LargeNumInfo:    95     def init(self, data, class_file):    96         self.class_file = class_file    97         self.high_bytes = u4(data[0:4])    98         self.low_bytes = u4(data[4:8])    99         return data[8:]   100    101 class LongInfo(LargeNumInfo):   102     pass   103    104 class DoubleInfo(LargeNumInfo):   105     pass   106    107 # Other information.   108 # Objects of these classes are generally aware of the class they reside in.   109    110 class ItemInfo(NameUtils):   111     def init(self, data, class_file):   112         self.class_file = class_file   113         self.access_flags = u2(data[0:2])   114         self.name_index = u2(data[2:4])   115         self.descriptor_index = u2(data[4:6])   116         self.attributes, data = self.class_file._get_attributes(data[6:])   117         return data   118    119     # Symbol parsing.   120    121     def _get_method_descriptor(self, s):   122         assert s[0] == "("   123         params = []   124         s = s[1:]   125         while s[0] != ")":   126             parameter_descriptor, s = self._get_parameter_descriptor(s)   127             params.append(parameter_descriptor)   128         if s[1] != "V":   129             return_type, s = self._get_field_type(s[1:])   130         else:   131             return_type, s = None, s[1:]   132         return params, return_type   133    134     def _get_parameter_descriptor(self, s):   135         return self._get_field_type(s)   136    137     def _get_field_descriptor(self, s):   138         return self._get_field_type(s)   139    140     def _get_component_type(self, s):   141         return self._get_field_type(s)   142    143     def _get_field_type(self, s):   144         base_type, s = self._get_base_type(s)   145         object_type = None   146         array_type = None   147         if base_type == "L":   148             object_type, s = self._get_object_type(s)   149         elif base_type == "[":   150             array_type, s = self._get_array_type(s)   151         return (base_type, object_type, array_type), s   152    153     def _get_base_type(self, s):   154         if len(s) > 0:   155             return s[0], s[1:]   156         else:   157             return None, s   158    159     def _get_object_type(self, s):   160         if len(s) > 0:   161             s_end = s.find(";")   162             assert s_end != -1   163             return s[:s_end], s[s_end+1:]   164         else:   165             return None, s   166    167     def _get_array_type(self, s):   168         if len(s) > 0:   169             return self._get_component_type(s[1:])   170         else:   171             return None, s   172    173 class FieldInfo(ItemInfo):   174     def get_descriptor(self):   175         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   176    177 class MethodInfo(ItemInfo):   178     def get_descriptor(self):   179         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   180    181 class AttributeInfo:   182     def init(self, data, class_file):   183         self.attribute_length = u4(data[0:4])   184         self.info = data[4:4+self.attribute_length]   185         return data[4+self.attribute_length:]   186    187 # NOTE: Decode the different attribute formats.   188    189 class SourceFileAttributeInfo(AttributeInfo, NameUtils):   190     def init(self, data, class_file):   191         self.class_file = class_file   192         self.attribute_length = u4(data[0:4])   193         # Permit the NameUtils mix-in.   194         self.name_index = self.sourcefile_index = u2(data[4:6])   195    196 class ConstantValueAttributeInfo(AttributeInfo):   197     def init(self, data, class_file):   198         self.attribute_length = u4(data[0:4])   199         self.constant_value_index = u2(data[4:6])   200         assert 4+self.attribute_length == 6   201         return data[4+self.attribute_length:]   202    203 class CodeAttributeInfo(AttributeInfo):   204     def init(self, data, class_file):   205         self.class_file = class_file   206         self.attribute_length = u4(data[0:4])   207         self.max_stack = u2(data[4:6])   208         self.max_locals = u2(data[6:8])   209         self.code_length = u4(data[8:12])   210         end_of_code = 12+self.code_length   211         self.code = data[12:end_of_code]   212         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   213         self.exception_table = []   214         data = data[end_of_code + 2:]   215         for i in range(0, self.exception_table_length):   216             exception = ExceptionInfo()   217             data = exception.init(data)   218             self.exception_table.append(exception)   219         self.attributes, data = self.class_file._get_attributes(data)   220         return data   221    222 class ExceptionsAttributeInfo(AttributeInfo):   223     def init(self, data, class_file):   224         self.class_file = class_file   225         self.attribute_length = u4(data[0:4])   226         self.number_of_exceptions = u2(data[4:6])   227         self.exception_index_table = []   228         index = 6   229         for i in range(0, self.number_of_exceptions):   230             self.exception_index_table.append(u2(data[index:index+2]))   231             index += 2   232         return data[index:]   233    234     def get_exception(self, i):   235         exception_index = self.exception_index_table[i]   236         return self.class_file.constants[exception_index - 1]   237    238 class InnerClassesAttributeInfo(AttributeInfo):   239     def init(self, data, class_file):   240         self.class_file = class_file   241         self.attribute_length = u4(data[0:4])   242         self.number_of_classes = u2(data[4:6])   243         self.classes = []   244         data = data[6:]   245         for i in range(0, self.number_of_classes):   246             inner_class = InnerClassInfo()   247             data = inner_class.init(data, self.class_file)   248             self.classes.append(inner_class)   249         return data   250    251 class SyntheticAttributeInfo(AttributeInfo):   252     pass   253    254 class LineNumberAttributeInfo(AttributeInfo):   255     def init(self, data, class_file):   256         self.class_file = class_file   257         self.attribute_length = u4(data[0:4])   258         self.line_number_table_length = u2(data[4:6])   259         self.line_number_table = []   260         data = data[6:]   261         for i in range(0, self.line_number_table_length):   262             line_number = LineNumberInfo()   263             data = line_number.init(data)   264             self.line_number_table.append(line_number)   265         return data   266    267 class LocalVariableAttributeInfo(AttributeInfo):   268     def init(self, data, class_file):   269         self.class_file = class_file   270         self.attribute_length = u4(data[0:4])   271         self.local_variable_table_length = u2(data[4:6])   272         self.local_variable_table = []   273         data = data[6:]   274         for i in range(0, self.local_variable_table_length):   275             local_variable = LocalVariableInfo()   276             data = local_variable.init(data)   277             self.local_variable_table.append(local_variable)   278         return data   279    280 class DeprecatedAttributeInfo(AttributeInfo):   281     pass   282    283 # Child classes of the attribute information classes.   284    285 class ExceptionInfo:   286     def init(self, data):   287         self.start_pc = u2(data[0:2])   288         self.end_pc = u2(data[2:4])   289         self.handler_pc = u2(data[4:6])   290         self.catch_type = u2(data[6:8])   291         return data[8:]   292    293 class InnerClassInfo(NameUtils):   294     def init(self, data, class_file):   295         self.class_file = class_file   296         self.inner_class_info_index = u2(data[0:2])   297         self.outer_class_info_index = u2(data[2:4])   298         # Permit the NameUtils mix-in.   299         self.name_index = self.inner_name_index = u2(data[4:6])   300         self.inner_class_access_flags = u2(data[6:8])   301         return data[8:]   302    303 class LineNumberInfo:   304     def init(self, data):   305         self.start_pc = u2(data[0:2])   306         self.line_number = u2(data[2:4])   307         return data[4:]   308    309 class LocalVariableInfo(NameUtils):   310     def init(self, data, class_file):   311         self.class_file = class_file   312         self.start_pc = u2(data[0:2])   313         self.length = u2(data[2:4])   314         self.name_index = u2(data[4:6])   315         self.descriptor_index = u2(data[6:8])   316         self.index = u2(data[8:10])   317         return data[10:]   318    319     def get_descriptor(self):   320         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   321    322 # Exceptions.   323    324 class UnknownTag(Exception):   325     pass   326    327 class UnknownAttribute(Exception):   328     pass   329    330 # Abstractions for the main structures.   331    332 class ClassFile:   333    334     "A class representing a Java class file."   335    336     def __init__(self, s):   337    338         """   339         Process the given string 's', populating the object with the class   340         file's details.   341         """   342    343         self.constants, s = self._get_constants(s[8:])   344         self.access_flags, s = self._get_access_flags(s)   345         self.this_class, s = self._get_this_class(s)   346         self.super_class, s = self._get_super_class(s)   347         self.interfaces, s = self._get_interfaces(s)   348         self.fields, s = self._get_fields(s)   349         self.methods, s = self._get_methods(s)   350         self.attributes, s = self._get_attributes(s)   351    352     def _decode_const(self, s):   353         tag = u1(s[0:1])   354         if tag == 1:   355             const = Utf8Info()   356         elif tag == 3:   357             const = IntegerInfo()   358         elif tag == 4:   359             const = FloatInfo()   360         elif tag == 5:   361             const = LongInfo()   362         elif tag == 6:   363             const = DoubleInfo()   364         elif tag == 7:   365             const = ClassInfo()   366         elif tag == 8:   367             const = StringInfo()   368         elif tag == 9:   369             const = FieldRefInfo()   370         elif tag == 10:   371             const = MethodRefInfo()   372         elif tag == 11:   373             const = InterfaceMethodRefInfo()   374         elif tag == 12:   375             const = NameAndTypeInfo()   376         else:   377             raise UnknownTag, tag   378    379         # Initialise the constant object.   380    381         s = const.init(s[1:], self)   382         return const, s   383    384     def _get_constants_from_table(self, count, s):   385         l = []   386         # Have to skip certain entries specially.   387         i = 1   388         while i < count:   389             c, s = self._decode_const(s)   390             l.append(c)   391             # Add a blank entry after "large" entries.   392             if isinstance(c, LargeNumInfo):   393                 l.append(None)   394                 i += 1   395             i += 1   396         return l, s   397    398     def _get_items_from_table(self, cls, number, s):   399         l = []   400         for i in range(0, number):   401             f = cls()   402             s = f.init(s, self)   403             l.append(f)   404         return l, s   405    406     def _get_methods_from_table(self, number, s):   407         return self._get_items_from_table(MethodInfo, number, s)   408    409     def _get_fields_from_table(self, number, s):   410         return self._get_items_from_table(FieldInfo, number, s)   411    412     def _get_attribute_from_table(self, s):   413         attribute_name_index = u2(s[0:2])   414         constant_name = self.constants[attribute_name_index - 1].bytes   415         if constant_name == "SourceFile":   416             attribute = SourceFileAttributeInfo()   417         elif constant_name == "ConstantValue":   418             attribute = ConstantValueAttributeInfo()   419         elif constant_name == "Code":   420             attribute = CodeAttributeInfo()   421         elif constant_name == "Exceptions":   422             attribute = ExceptionsAttributeInfo()   423         elif constant_name == "InnerClasses":   424             attribute = InnerClassesAttributeInfo()   425         elif constant_name == "Synthetic":   426             attribute = SyntheticAttributeInfo()   427         elif constant_name == "LineNumberTable":   428             attribute = LineNumberAttributeInfo()   429         elif constant_name == "LocalVariableTable":   430             attribute = LocalVariableAttributeInfo()   431         elif constant_name == "Deprecated":   432             attribute = DeprecatedAttributeInfo()   433         else:   434             raise UnknownAttribute, constant_name   435         s = attribute.init(s[2:], self)   436         return attribute, s   437    438     def _get_attributes_from_table(self, number, s):   439         attributes = []   440         for i in range(0, number):   441             attribute, s = self._get_attribute_from_table(s)   442             attributes.append(attribute)   443         return attributes, s   444    445     def _get_constants(self, s):   446         count = u2(s[0:2])   447         return self._get_constants_from_table(count, s[2:])   448    449     def _get_access_flags(self, s):   450         return u2(s[0:2]), s[2:]   451    452     def _get_this_class(self, s):   453         index = u2(s[0:2])   454         return self.constants[index - 1], s[2:]   455    456     _get_super_class = _get_this_class   457    458     def _get_interfaces(self, s):   459         interfaces = []   460         number = u2(s[0:2])   461         s = s[2:]   462         for i in range(0, number):   463             index = u2(s[0:2])   464             interfaces.append(self.constants[index - 1])   465             s = s[2:]   466         return interfaces, s   467    468     def _get_fields(self, s):   469         number = u2(s[0:2])   470         return self._get_fields_from_table(number, s[2:])   471    472     def _get_attributes(self, s):   473         number = u2(s[0:2])   474         return self._get_attributes_from_table(number, s[2:])   475    476     def _get_methods(self, s):   477         number = u2(s[0:2])   478         return self._get_methods_from_table(number, s[2:])   479    480 if __name__ == "__main__":   481     import sys   482     f = open(sys.argv[1])   483     c = ClassFile(f.read())   484    485 # vim: tabstop=4 expandtab shiftwidth=4