javaclass

classfile.py

0:7c67f86c4c10
2004-10-23 Paul Boddie Initial revision
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct     9     10 # Constant information.    11     12 class ClassInfo:    13     def init(self, data):    14         self.name_index = struct.unpack(">H", data[0:2])[0]    15         return data[2:]    16     17 class RefInfo:    18     def init(self, data):    19         self.class_index = struct.unpack(">H", data[0:2])[0]    20         self.name_and_type_index = struct.unpack(">H", data[2:4])[0]    21         return data[4:]    22     23 class FieldRefInfo(RefInfo):    24     pass    25     26 class MethodRefInfo(RefInfo):    27     pass    28     29 class InterfaceMethodRefInfo(RefInfo):    30     pass    31     32 class NameAndTypeInfo:    33     def init(self, data):    34         self.name_index = struct.unpack(">H", data[0:2])[0]    35         self.descriptor_index = struct.unpack(">H", data[2:4])[0]    36         return data[4:]    37     38 class Utf8Info:    39     def init(self, data):    40         self.length = struct.unpack(">H", data[0:2])[0]    41         self.bytes = data[2:2+self.length]    42         return data[2+self.length:]    43     44     def __str__(self):    45         return self.bytes    46     47     def __unicode__(self):    48         return unicode(self.bytes, "utf-8")    49     50 class StringInfo:    51     def init(self, data):    52         self.string_index = struct.unpack(">H", data[0:2])[0]    53         return data[2:]    54     55 class SmallNumInfo:    56     def init(self, data):    57         self.bytes = struct.unpack(">L", data[0:4])[0]    58         return data[4:]    59     60 class IntegerInfo(SmallNumInfo):    61     pass    62     63 class FloatInfo(SmallNumInfo):    64     pass    65     66 class LargeNumInfo:    67     def init(self, data):    68         self.high_bytes = struct.unpack(">L", data[0:4])[0]    69         self.low_bytes = struct.unpack(">L", data[4:8])[0]    70         return data[8:]    71     72 class LongInfo(LargeNumInfo):    73     pass    74     75 class DoubleInfo(LargeNumInfo):    76     pass    77     78 # Other information.    79     80 class ItemInfo:    81     def init(self, data, class_file):    82         self.class_file = class_file    83         self.access_flags = struct.unpack(">H", data[0:2])[0]    84         self.name_index = struct.unpack(">H", data[2:4])[0]    85         self.descriptor_index = struct.unpack(">H", data[4:6])[0]    86         self.attributes, data = self.class_file._get_attributes(data[6:])    87         return data    88     89     # Symbol parsing.    90     91     def _get_method_descriptor(self, s):    92         assert s[0] == "("    93         params = []    94         s = s[1:]    95         while s[0] != ")":    96             parameter_descriptor, s = self._get_parameter_descriptor(s)    97             params.append(parameter_descriptor)    98         if s[1] != "V":    99             return_type, s = self._get_field_type(s[1:])   100         else:   101             return_type, s = None, s[1:]   102         return params, return_type   103    104     def _get_parameter_descriptor(self, s):   105         return self._get_field_type(s)   106    107     def _get_field_descriptor(self, s):   108         return self._get_field_type(s)   109    110     def _get_component_type(self, s):   111         return self._get_field_type(s)   112    113     def _get_field_type(self, s):   114         base_type, s = self._get_base_type(s)   115         object_type = None   116         array_type = None   117         if base_type == "L":   118             object_type, s = self._get_object_type(s)   119         elif base_type == "[":   120             array_type, s = self._get_array_type(s)   121         return (base_type, object_type, array_type), s   122    123     def _get_base_type(self, s):   124         if len(s) > 0:   125             return s[0], s[1:]   126         else:   127             return None, s   128    129     def _get_object_type(self, s):   130         if len(s) > 0:   131             s_end = s.find(";")   132             assert s_end != -1   133             return s[:s_end], s[s_end+1:]   134         else:   135             return None, s   136    137     def _get_array_type(self, s):   138         if len(s) > 0:   139             return self._get_component_type(s[1:])   140         else:   141             return None, s   142    143     # Processed details.   144    145     def get_name(self):   146         return unicode(self.class_file.constants[self.name_index - 1])   147    148 class FieldInfo(ItemInfo):   149     def get_descriptor(self):   150         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   151    152 class MethodInfo(ItemInfo):   153     def get_descriptor(self):   154         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   155    156 class AttributeInfo:   157     def init(self, data, class_file):   158         self.attribute_length = struct.unpack(">L", data[0:4])[0]   159         self.info = data[4:4+self.attribute_length]   160         return data[4+self.attribute_length:]   161    162 # NOTE: Decode the different attribute formats.   163    164 class SourceFileAttributeInfo(AttributeInfo):   165     pass   166    167 class ConstantValueAttributeInfo(AttributeInfo):   168     def init(self, data, class_file):   169         self.attribute_length = struct.unpack(">L", data[0:4])[0]   170         self.constant_value_index = struct.unpack(">H", data[4:6])[0]   171         assert 4+self.attribute_length == 6   172         return data[4+self.attribute_length:]   173    174 class CodeAttributeInfo(AttributeInfo):   175     def init(self, data, class_file):   176         self.class_file = class_file   177         self.attribute_length = struct.unpack(">L", data[0:4])[0]   178         self.max_stack = struct.unpack(">H", data[4:6])[0]   179         self.max_locals = struct.unpack(">H", data[6:8])[0]   180         self.code_length = struct.unpack(">L", data[8:12])[0]   181         end_of_code = 12+self.code_length   182         self.code = data[12:end_of_code]   183         self.exception_table_length = struct.unpack(">H", data[end_of_code:end_of_code+2])[0]   184         self.exception_table = []   185         data = data[end_of_code + 2:]   186         for i in range(0, self.exception_table_length):   187             exception = ExceptionInfo()   188             data = exception.init(data)   189         self.attributes, data = self.class_file._get_attributes(data)   190         return data   191    192 class ExceptionsAttributeInfo(AttributeInfo):   193     pass   194    195 class InnerClassesAttributeInfo(AttributeInfo):   196     pass   197    198 class SyntheticAttributeInfo(AttributeInfo):   199     pass   200    201 class LineNumberAttributeInfo(AttributeInfo):   202     pass   203    204 class LocalVariableAttributeInfo(AttributeInfo):   205     pass   206    207 class DeprecatedAttributeInfo(AttributeInfo):   208     pass   209    210 class ExceptionInfo:   211     def __init__(self):   212         self.start_pc, self.end_pc, self.handler_pc, self.catch_type = None, None, None, None   213    214     def init(self, data):   215         self.start_pc = struct.unpack(">H", data[0:2])[0]   216         self.end_pc = struct.unpack(">H", data[2:4])[0]   217         self.handler_pc = struct.unpack(">H", data[4:6])[0]   218         self.catch_type = struct.unpack(">H", data[6:8])[0]   219         return data[8:]   220    221 class UnknownTag(Exception):   222     pass   223    224 class UnknownAttribute(Exception):   225     pass   226    227 # Abstractions for the main structures.   228    229 class ClassFile:   230    231     "A class representing a Java class file."   232    233     def __init__(self, s):   234    235         """   236         Process the given string 's', populating the object with the class   237         file's details.   238         """   239    240         self.constants, s = self._get_constants(s[8:])   241         self.access_flags, s = self._get_access_flags(s)   242         self.this_class, s = self._get_this_class(s)   243         self.super_class, s = self._get_super_class(s)   244         self.interfaces, s = self._get_interfaces(s)   245         self.fields, s = self._get_fields(s)   246         self.methods, s = self._get_methods(s)   247         self.attributes, s = self._get_attributes(s)   248    249     def _decode_const(self, s):   250         tag = struct.unpack(">B", s[0:1])[0]   251         if tag == 1:   252             const = Utf8Info()   253         elif tag == 3:   254             const = IntegerInfo()   255         elif tag == 4:   256             const = FloatInfo()   257         elif tag == 5:   258             const = LongInfo()   259         elif tag == 6:   260             const = DoubleInfo()   261         elif tag == 7:   262             const = ClassInfo()   263         elif tag == 8:   264             const = StringInfo()   265         elif tag == 9:   266             const = FieldRefInfo()   267         elif tag == 10:   268             const = MethodRefInfo()   269         elif tag == 11:   270             const = InterfaceMethodRefInfo()   271         elif tag == 12:   272             const = NameAndTypeInfo()   273         else:   274             raise UnknownTag, tag   275         s = const.init(s[1:])   276         return const, s   277    278     def _get_constants_from_table(self, count, s):   279         l = []   280         # Have to skip certain entries specially.   281         i = 1   282         while i < count:   283             c, s = self._decode_const(s)   284             l.append(c)   285             # Add a blank entry after "large" entries.   286             if isinstance(c, LargeNumInfo):   287                 l.append(None)   288                 i += 1   289             i += 1   290         return l, s   291    292     def _get_items_from_table(self, cls, number, s):   293         l = []   294         for i in range(0, number):   295             f = cls()   296             s = f.init(s, self)   297             l.append(f)   298         return l, s   299    300     def _get_methods_from_table(self, number, s):   301         return self._get_items_from_table(MethodInfo, number, s)   302    303     def _get_fields_from_table(self, number, s):   304         return self._get_items_from_table(FieldInfo, number, s)   305    306     def _get_attribute_from_table(self, s):   307         attribute_name_index = struct.unpack(">H", s[0:2])[0]   308         constant_name = self.constants[attribute_name_index - 1].bytes   309         if constant_name == "SourceFile":   310             attribute = SourceFileAttributeInfo()   311         elif constant_name == "ConstantValue":   312             attribute = ConstantValueAttributeInfo()   313         elif constant_name == "Code":   314             attribute = CodeAttributeInfo()   315         elif constant_name == "Exceptions":   316             attribute = ExceptionsAttributeInfo()   317         elif constant_name == "InnerClasses":   318             attribute = InnerClassesAttributeInfo()   319         elif constant_name == "Synthetic":   320             attribute = SyntheticAttributeInfo()   321         elif constant_name == "LineNumberTable":   322             attribute = LineNumberAttributeInfo()   323         elif constant_name == "LocalVariableTable":   324             attribute = LocalVariableAttributeInfo()   325         elif constant_name == "Deprecated":   326             attribute = DeprecatedAttributeInfo()   327         else:   328             raise UnknownAttribute, constant_name   329         s = attribute.init(s[2:], self)   330         return attribute, s   331    332     def _get_attributes_from_table(self, number, s):   333         attributes = []   334         for i in range(0, number):   335             attribute, s = self._get_attribute_from_table(s)   336             attributes.append(attribute)   337         return attributes, s   338    339     def _get_constants(self, s):   340         count = struct.unpack(">H", s[0:2])[0]   341         return self._get_constants_from_table(count, s[2:])   342    343     def _get_access_flags(self, s):   344         return struct.unpack(">H", s[0:2])[0], s[2:]   345    346     def _get_this_class(self, s):   347         index = struct.unpack(">H", s[0:2])[0]   348         return self.constants[index - 1], s[2:]   349    350     _get_super_class = _get_this_class   351    352     def _get_interfaces(self, s):   353         interfaces = []   354         number = struct.unpack(">H", s[0:2])[0]   355         s = s[2:]   356         for i in range(0, number):   357             index = struct.unpack(">H", s[0:2])[0]   358             interfaces.append(self.constants[index - 1])   359             s = s[2:]   360         return interfaces, s   361    362     def _get_fields(self, s):   363         number = struct.unpack(">H", s[0:2])[0]   364         return self._get_fields_from_table(number, s[2:])   365    366     def _get_attributes(self, s):   367         number = struct.unpack(">H", s[0:2])[0]   368         return self._get_attributes_from_table(number, s[2:])   369    370     def _get_methods(self, s):   371         number = struct.unpack(">H", s[0:2])[0]   372         return self._get_methods_from_table(number, s[2:])   373    374 if __name__ == "__main__":   375     import sys   376     f = open(sys.argv[1])   377     c = ClassFile(f.read())   378    379 # vim: tabstop=4 expandtab shiftwidth=4