javaclass

classfile.py

10:38c56a357c49
2004-11-09 Paul Boddie Removed the undesirable conversion to Unicode in NameUtils.get_name.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 # Useful mix-ins.    34     35 class NameUtils:    36     def get_name(self):    37         if self.name_index != 0:    38             return self.class_file.constants[self.name_index - 1]    39         else:    40             # Some name indexes are zero to indicate special conditions.    41             return None    42     43 class NameAndTypeUtils:    44     def get_name(self):    45         if self.name_and_type_index != 0:    46             return self.class_file.constants[self.name_and_type_index - 1].get_name()    47         else:    48             # Some name indexes are zero to indicate special conditions.    49             return None    50     51     def get_field_descriptor(self):    52         if self.name_and_type_index != 0:    53             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()    54         else:    55             # Some name indexes are zero to indicate special conditions.    56             return None    57     58     def get_method_descriptor(self):    59         if self.name_and_type_index != 0:    60             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()    61         else:    62             # Some name indexes are zero to indicate special conditions.    63             return None    64     65 class DescriptorUtils:    66     67     "Symbol parsing."    68     69     def _get_method_descriptor(self, s):    70         assert s[0] == "("    71         params = []    72         s = s[1:]    73         while s[0] != ")":    74             parameter_descriptor, s = self._get_parameter_descriptor(s)    75             params.append(parameter_descriptor)    76         if s[1] != "V":    77             return_type, s = self._get_field_type(s[1:])    78         else:    79             return_type, s = None, s[1:]    80         return params, return_type    81     82     def _get_parameter_descriptor(self, s):    83         return self._get_field_type(s)    84     85     def _get_field_descriptor(self, s):    86         return self._get_field_type(s)    87     88     def _get_component_type(self, s):    89         return self._get_field_type(s)    90     91     def _get_field_type(self, s):    92         base_type, s = self._get_base_type(s)    93         object_type = None    94         array_type = None    95         if base_type == "L":    96             object_type, s = self._get_object_type(s)    97         elif base_type == "[":    98             array_type, s = self._get_array_type(s)    99         return (base_type, object_type, array_type), s   100    101     def _get_base_type(self, s):   102         if len(s) > 0:   103             return s[0], s[1:]   104         else:   105             return None, s   106    107     def _get_object_type(self, s):   108         if len(s) > 0:   109             s_end = s.find(";")   110             assert s_end != -1   111             return s[:s_end], s[s_end+1:]   112         else:   113             return None, s   114    115     def _get_array_type(self, s):   116         if len(s) > 0:   117             return self._get_component_type(s)   118         else:   119             return None, s   120    121 # Constant information.   122 # Objects of these classes are not directly aware of the class they reside in.   123    124 class ClassInfo(NameUtils):   125     def init(self, data, class_file):   126         self.class_file = class_file   127         self.name_index = u2(data[0:2])   128         return data[2:]   129    130 class RefInfo(NameAndTypeUtils):   131     def init(self, data, class_file):   132         self.class_file = class_file   133         self.class_index = u2(data[0:2])   134         self.name_and_type_index = u2(data[2:4])   135         return data[4:]   136    137 class FieldRefInfo(RefInfo):   138     def get_descriptor(self):   139         return RefInfo.get_field_descriptor(self)   140    141 class MethodRefInfo(RefInfo):   142     def get_descriptor(self):   143         return RefInfo.get_method_descriptor(self)   144    145 class InterfaceMethodRefInfo(RefInfo):   146     def get_descriptor(self):   147         return RefInfo.get_method_descriptor(self)   148    149 class NameAndTypeInfo(NameUtils, DescriptorUtils):   150     def init(self, data, class_file):   151         self.class_file = class_file   152         self.name_index = u2(data[0:2])   153         self.descriptor_index = u2(data[2:4])   154         return data[4:]   155    156     def get_field_descriptor(self):   157         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   158    159     def get_method_descriptor(self):   160         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   161    162 class Utf8Info:   163     def init(self, data, class_file):   164         self.class_file = class_file   165         self.length = u2(data[0:2])   166         self.bytes = data[2:2+self.length]   167         return data[2+self.length:]   168    169     def __str__(self):   170         return self.bytes   171    172     def __unicode__(self):   173         return unicode(self.bytes, "utf-8")   174    175 class StringInfo:   176     def init(self, data, class_file):   177         self.class_file = class_file   178         self.string_index = u2(data[0:2])   179         return data[2:]   180    181 class SmallNumInfo:   182     def init(self, data, class_file):   183         self.class_file = class_file   184         self.bytes = data[0:4]   185         return data[4:]   186    187 class IntegerInfo(SmallNumInfo):   188     def get_value(self):   189         return s4(self.bytes)   190    191 class FloatInfo(SmallNumInfo):   192     def get_value(self):   193         return f4(self.bytes)   194    195 class LargeNumInfo:   196     def init(self, data, class_file):   197         self.class_file = class_file   198         self.high_bytes = u4(data[0:4])   199         self.low_bytes = u4(data[4:8])   200         return data[8:]   201    202 class LongInfo(LargeNumInfo):   203     def get_value(self):   204         return s8(self.high_bytes + self.low_bytes)   205    206 class DoubleInfo(LargeNumInfo):   207     def get_value(self):   208         return f8(self.high_bytes + self.low_bytes)   209    210 # Other information.   211 # Objects of these classes are generally aware of the class they reside in.   212    213 class ItemInfo(NameUtils, DescriptorUtils):   214     def init(self, data, class_file):   215         self.class_file = class_file   216         self.access_flags = u2(data[0:2])   217         self.name_index = u2(data[2:4])   218         self.descriptor_index = u2(data[4:6])   219         self.attributes, data = self.class_file._get_attributes(data[6:])   220         return data   221    222 class FieldInfo(ItemInfo):   223     def get_descriptor(self):   224         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   225    226 class MethodInfo(ItemInfo):   227     def get_descriptor(self):   228         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   229    230 class AttributeInfo:   231     def init(self, data, class_file):   232         self.attribute_length = u4(data[0:4])   233         self.info = data[4:4+self.attribute_length]   234         return data[4+self.attribute_length:]   235    236 # NOTE: Decode the different attribute formats.   237    238 class SourceFileAttributeInfo(AttributeInfo, NameUtils):   239     def init(self, data, class_file):   240         self.class_file = class_file   241         self.attribute_length = u4(data[0:4])   242         # Permit the NameUtils mix-in.   243         self.name_index = self.sourcefile_index = u2(data[4:6])   244    245 class ConstantValueAttributeInfo(AttributeInfo):   246     def init(self, data, class_file):   247         self.class_file = class_file   248         self.attribute_length = u4(data[0:4])   249         self.constant_value_index = u2(data[4:6])   250         assert 4+self.attribute_length == 6   251         return data[4+self.attribute_length:]   252    253     def get_value(self):   254         return self.class_file.constants[self.constant_value_index - 1].get_value()   255    256 class CodeAttributeInfo(AttributeInfo):   257     def init(self, data, class_file):   258         self.class_file = class_file   259         self.attribute_length = u4(data[0:4])   260         self.max_stack = u2(data[4:6])   261         self.max_locals = u2(data[6:8])   262         self.code_length = u4(data[8:12])   263         end_of_code = 12+self.code_length   264         self.code = data[12:end_of_code]   265         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   266         self.exception_table = []   267         data = data[end_of_code + 2:]   268         for i in range(0, self.exception_table_length):   269             exception = ExceptionInfo()   270             data = exception.init(data)   271             self.exception_table.append(exception)   272         self.attributes, data = self.class_file._get_attributes(data)   273         return data   274    275 class ExceptionsAttributeInfo(AttributeInfo):   276     def init(self, data, class_file):   277         self.class_file = class_file   278         self.attribute_length = u4(data[0:4])   279         self.number_of_exceptions = u2(data[4:6])   280         self.exception_index_table = []   281         index = 6   282         for i in range(0, self.number_of_exceptions):   283             self.exception_index_table.append(u2(data[index:index+2]))   284             index += 2   285         return data[index:]   286    287     def get_exception(self, i):   288         exception_index = self.exception_index_table[i]   289         return self.class_file.constants[exception_index - 1]   290    291 class InnerClassesAttributeInfo(AttributeInfo):   292     def init(self, data, class_file):   293         self.class_file = class_file   294         self.attribute_length = u4(data[0:4])   295         self.number_of_classes = u2(data[4:6])   296         self.classes = []   297         data = data[6:]   298         for i in range(0, self.number_of_classes):   299             inner_class = InnerClassInfo()   300             data = inner_class.init(data, self.class_file)   301             self.classes.append(inner_class)   302         return data   303    304 class SyntheticAttributeInfo(AttributeInfo):   305     pass   306    307 class LineNumberAttributeInfo(AttributeInfo):   308     def init(self, data, class_file):   309         self.class_file = class_file   310         self.attribute_length = u4(data[0:4])   311         self.line_number_table_length = u2(data[4:6])   312         self.line_number_table = []   313         data = data[6:]   314         for i in range(0, self.line_number_table_length):   315             line_number = LineNumberInfo()   316             data = line_number.init(data)   317             self.line_number_table.append(line_number)   318         return data   319    320 class LocalVariableAttributeInfo(AttributeInfo):   321     def init(self, data, class_file):   322         self.class_file = class_file   323         self.attribute_length = u4(data[0:4])   324         self.local_variable_table_length = u2(data[4:6])   325         self.local_variable_table = []   326         data = data[6:]   327         for i in range(0, self.local_variable_table_length):   328             local_variable = LocalVariableInfo()   329             data = local_variable.init(data)   330             self.local_variable_table.append(local_variable)   331         return data   332    333 class DeprecatedAttributeInfo(AttributeInfo):   334     pass   335    336 # Child classes of the attribute information classes.   337    338 class ExceptionInfo:   339     def init(self, data):   340         self.start_pc = u2(data[0:2])   341         self.end_pc = u2(data[2:4])   342         self.handler_pc = u2(data[4:6])   343         self.catch_type = u2(data[6:8])   344         return data[8:]   345    346 class InnerClassInfo(NameUtils):   347     def init(self, data, class_file):   348         self.class_file = class_file   349         self.inner_class_info_index = u2(data[0:2])   350         self.outer_class_info_index = u2(data[2:4])   351         # Permit the NameUtils mix-in.   352         self.name_index = self.inner_name_index = u2(data[4:6])   353         self.inner_class_access_flags = u2(data[6:8])   354         return data[8:]   355    356 class LineNumberInfo:   357     def init(self, data):   358         self.start_pc = u2(data[0:2])   359         self.line_number = u2(data[2:4])   360         return data[4:]   361    362 class LocalVariableInfo(NameUtils):   363     def init(self, data, class_file):   364         self.class_file = class_file   365         self.start_pc = u2(data[0:2])   366         self.length = u2(data[2:4])   367         self.name_index = u2(data[4:6])   368         self.descriptor_index = u2(data[6:8])   369         self.index = u2(data[8:10])   370         return data[10:]   371    372     def get_descriptor(self):   373         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   374    375 # Exceptions.   376    377 class UnknownTag(Exception):   378     pass   379    380 class UnknownAttribute(Exception):   381     pass   382    383 # Abstractions for the main structures.   384    385 class ClassFile:   386    387     "A class representing a Java class file."   388    389     def __init__(self, s):   390    391         """   392         Process the given string 's', populating the object with the class   393         file's details.   394         """   395    396         self.constants, s = self._get_constants(s[8:])   397         self.access_flags, s = self._get_access_flags(s)   398         self.this_class, s = self._get_this_class(s)   399         self.super_class, s = self._get_super_class(s)   400         self.interfaces, s = self._get_interfaces(s)   401         self.fields, s = self._get_fields(s)   402         self.methods, s = self._get_methods(s)   403         self.attributes, s = self._get_attributes(s)   404    405     def _decode_const(self, s):   406         tag = u1(s[0:1])   407         if tag == 1:   408             const = Utf8Info()   409         elif tag == 3:   410             const = IntegerInfo()   411         elif tag == 4:   412             const = FloatInfo()   413         elif tag == 5:   414             const = LongInfo()   415         elif tag == 6:   416             const = DoubleInfo()   417         elif tag == 7:   418             const = ClassInfo()   419         elif tag == 8:   420             const = StringInfo()   421         elif tag == 9:   422             const = FieldRefInfo()   423         elif tag == 10:   424             const = MethodRefInfo()   425         elif tag == 11:   426             const = InterfaceMethodRefInfo()   427         elif tag == 12:   428             const = NameAndTypeInfo()   429         else:   430             raise UnknownTag, tag   431    432         # Initialise the constant object.   433    434         s = const.init(s[1:], self)   435         return const, s   436    437     def _get_constants_from_table(self, count, s):   438         l = []   439         # Have to skip certain entries specially.   440         i = 1   441         while i < count:   442             c, s = self._decode_const(s)   443             l.append(c)   444             # Add a blank entry after "large" entries.   445             if isinstance(c, LargeNumInfo):   446                 l.append(None)   447                 i += 1   448             i += 1   449         return l, s   450    451     def _get_items_from_table(self, cls, number, s):   452         l = []   453         for i in range(0, number):   454             f = cls()   455             s = f.init(s, self)   456             l.append(f)   457         return l, s   458    459     def _get_methods_from_table(self, number, s):   460         return self._get_items_from_table(MethodInfo, number, s)   461    462     def _get_fields_from_table(self, number, s):   463         return self._get_items_from_table(FieldInfo, number, s)   464    465     def _get_attribute_from_table(self, s):   466         attribute_name_index = u2(s[0:2])   467         constant_name = self.constants[attribute_name_index - 1].bytes   468         if constant_name == "SourceFile":   469             attribute = SourceFileAttributeInfo()   470         elif constant_name == "ConstantValue":   471             attribute = ConstantValueAttributeInfo()   472         elif constant_name == "Code":   473             attribute = CodeAttributeInfo()   474         elif constant_name == "Exceptions":   475             attribute = ExceptionsAttributeInfo()   476         elif constant_name == "InnerClasses":   477             attribute = InnerClassesAttributeInfo()   478         elif constant_name == "Synthetic":   479             attribute = SyntheticAttributeInfo()   480         elif constant_name == "LineNumberTable":   481             attribute = LineNumberAttributeInfo()   482         elif constant_name == "LocalVariableTable":   483             attribute = LocalVariableAttributeInfo()   484         elif constant_name == "Deprecated":   485             attribute = DeprecatedAttributeInfo()   486         else:   487             raise UnknownAttribute, constant_name   488         s = attribute.init(s[2:], self)   489         return attribute, s   490    491     def _get_attributes_from_table(self, number, s):   492         attributes = []   493         for i in range(0, number):   494             attribute, s = self._get_attribute_from_table(s)   495             attributes.append(attribute)   496         return attributes, s   497    498     def _get_constants(self, s):   499         count = u2(s[0:2])   500         return self._get_constants_from_table(count, s[2:])   501    502     def _get_access_flags(self, s):   503         return u2(s[0:2]), s[2:]   504    505     def _get_this_class(self, s):   506         index = u2(s[0:2])   507         return self.constants[index - 1], s[2:]   508    509     _get_super_class = _get_this_class   510    511     def _get_interfaces(self, s):   512         interfaces = []   513         number = u2(s[0:2])   514         s = s[2:]   515         for i in range(0, number):   516             index = u2(s[0:2])   517             interfaces.append(self.constants[index - 1])   518             s = s[2:]   519         return interfaces, s   520    521     def _get_fields(self, s):   522         number = u2(s[0:2])   523         return self._get_fields_from_table(number, s[2:])   524    525     def _get_attributes(self, s):   526         number = u2(s[0:2])   527         return self._get_attributes_from_table(number, s[2:])   528    529     def _get_methods(self, s):   530         number = u2(s[0:2])   531         return self._get_methods_from_table(number, s[2:])   532    533 if __name__ == "__main__":   534     import sys   535     f = open(sys.argv[1])   536     c = ClassFile(f.read())   537    538 # vim: tabstop=4 expandtab shiftwidth=4