javaclass

classfile.py

61:aec972e2534b
2004-11-19 Paul Boddie Fixed long and double constant handling. Added/fixed get_value methods to/for various constant types.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def s2(data):    19     return struct.unpack(">h", data[0:2])[0]    20     21 def u4(data):    22     return struct.unpack(">L", data[0:4])[0]    23     24 def s4(data):    25     return struct.unpack(">l", data[0:4])[0]    26     27 def s8(data):    28     return struct.unpack(">q", data[0:8])[0]    29     30 def f4(data):    31     return struct.unpack(">f", data[0:4])[0]    32     33 def f8(data):    34     return struct.unpack(">d", data[0:8])[0]    35     36 # Useful tables and constants.    37     38 descriptor_base_type_mapping = {    39     "B" : "int",    40     "C" : "str",    41     "D" : "float",    42     "F" : "float",    43     "I" : "int",    44     "J" : "int",    45     "L" : "object",    46     "S" : "int",    47     "Z" : "bool",    48     "[" : "list"    49     }    50     51 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL,  SUPER,  SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \    52 0x0001, 0x0002,  0x0004,    0x0008, 0x0010, 0x0020, 0x0020,       0x0040,   0x0080,    0x0100, 0x0200,    0x0400,   0x0800    53     54 def has_flags(flags, desired):    55     desired_flags = reduce(lambda a, b: a | b, desired, 0)    56     return (flags & desired_flags) == desired_flags    57     58 # Useful mix-ins.    59     60 class PythonMethodUtils:    61     def get_python_name(self):    62         name = self.get_name()    63         if str(name) == "<init>":    64             name = "__init__"    65         elif str(name) == "<clinit>":    66             return "__clinit__"    67         else:    68             name = str(name)    69         return name + "$" + self._get_descriptor_as_name()    70     71     def _get_descriptor_as_name(self):    72         l = []    73         for descriptor_type in self.get_descriptor()[0]:    74             l.append(self._get_type_as_name(descriptor_type))    75         return "$".join(l)    76     77     def _get_type_as_name(self, descriptor_type, s=""):    78         base_type, object_type, array_type = descriptor_type    79         if base_type == "L":    80             return object_type + s    81         elif base_type == "[":    82             return self._get_type_as_name(array_type, s + "[]")    83         else:    84             return "<" + base_type + ">" + s    85     86 class PythonNameUtils:    87     def get_python_name(self):    88         return str(self.get_name()).replace("/", ".")    89     90 class NameUtils:    91     def get_name(self):    92         if self.name_index != 0:    93             return self.class_file.constants[self.name_index - 1]    94         else:    95             # Some name indexes are zero to indicate special conditions.    96             return None    97     98 class NameAndTypeUtils:    99     def get_name(self):   100         if self.name_and_type_index != 0:   101             return self.class_file.constants[self.name_and_type_index - 1].get_name()   102         else:   103             # Some name indexes are zero to indicate special conditions.   104             return None   105    106     def get_field_descriptor(self):   107         if self.name_and_type_index != 0:   108             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()   109         else:   110             # Some name indexes are zero to indicate special conditions.   111             return None   112    113     def get_method_descriptor(self):   114         if self.name_and_type_index != 0:   115             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   116         else:   117             # Some name indexes are zero to indicate special conditions.   118             return None   119    120 class DescriptorUtils:   121    122     "Symbol parsing."   123    124     def _get_method_descriptor(self, s):   125         assert s[0] == "("   126         params = []   127         s = s[1:]   128         while s[0] != ")":   129             parameter_descriptor, s = self._get_parameter_descriptor(s)   130             params.append(parameter_descriptor)   131         if s[1] != "V":   132             return_type, s = self._get_field_type(s[1:])   133         else:   134             return_type, s = None, s[1:]   135         return params, return_type   136    137     def _get_parameter_descriptor(self, s):   138         return self._get_field_type(s)   139    140     def _get_field_descriptor(self, s):   141         return self._get_field_type(s)   142    143     def _get_component_type(self, s):   144         return self._get_field_type(s)   145    146     def _get_field_type(self, s):   147         base_type, s = self._get_base_type(s)   148         object_type = None   149         array_type = None   150         if base_type == "L":   151             object_type, s = self._get_object_type(s)   152         elif base_type == "[":   153             array_type, s = self._get_array_type(s)   154         return (base_type, object_type, array_type), s   155    156     def _get_base_type(self, s):   157         if len(s) > 0:   158             return s[0], s[1:]   159         else:   160             return None, s   161    162     def _get_object_type(self, s):   163         if len(s) > 0:   164             s_end = s.find(";")   165             assert s_end != -1   166             return s[:s_end], s[s_end+1:]   167         else:   168             return None, s   169    170     def _get_array_type(self, s):   171         if len(s) > 0:   172             return self._get_component_type(s)   173         else:   174             return None, s   175    176 # Constant information.   177 # Objects of these classes are not directly aware of the class they reside in.   178    179 class ClassInfo(NameUtils, PythonNameUtils):   180     def init(self, data, class_file):   181         self.class_file = class_file   182         self.name_index = u2(data[0:2])   183         return data[2:]   184    185 class RefInfo(NameAndTypeUtils):   186     def init(self, data, class_file):   187         self.class_file = class_file   188         self.class_index = u2(data[0:2])   189         self.name_and_type_index = u2(data[2:4])   190         return data[4:]   191    192 class FieldRefInfo(RefInfo, PythonNameUtils):   193     def get_descriptor(self):   194         return RefInfo.get_field_descriptor(self)   195    196 class MethodRefInfo(RefInfo, PythonMethodUtils):   197     def get_descriptor(self):   198         return RefInfo.get_method_descriptor(self)   199    200 class InterfaceMethodRefInfo(MethodRefInfo):   201     pass   202    203 class NameAndTypeInfo(NameUtils, DescriptorUtils, PythonNameUtils):   204     def init(self, data, class_file):   205         self.class_file = class_file   206         self.name_index = u2(data[0:2])   207         self.descriptor_index = u2(data[2:4])   208         return data[4:]   209    210     def get_field_descriptor(self):   211         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   212    213     def get_method_descriptor(self):   214         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   215    216 class Utf8Info:   217     def init(self, data, class_file):   218         self.class_file = class_file   219         self.length = u2(data[0:2])   220         self.bytes = data[2:2+self.length]   221         return data[2+self.length:]   222    223     def __str__(self):   224         return self.bytes   225    226     def __unicode__(self):   227         return unicode(self.bytes, "utf-8")   228    229     def get_value(self):   230         return str(self)   231    232 class StringInfo:   233     def init(self, data, class_file):   234         self.class_file = class_file   235         self.string_index = u2(data[0:2])   236         return data[2:]   237    238     def __str__(self):   239         return str(self.class_file.constants[self.string_index - 1])   240    241     def __unicode__(self):   242         return unicode(self.class_file.constants[self.string_index - 1])   243    244     def get_value(self):   245         return str(self)   246    247 class SmallNumInfo:   248     def init(self, data, class_file):   249         self.class_file = class_file   250         self.bytes = data[0:4]   251         return data[4:]   252    253 class IntegerInfo(SmallNumInfo):   254     def get_value(self):   255         return s4(self.bytes)   256    257 class FloatInfo(SmallNumInfo):   258     def get_value(self):   259         return f4(self.bytes)   260    261 class LargeNumInfo:   262     def init(self, data, class_file):   263         self.class_file = class_file   264         self.high_bytes = data[0:4]   265         self.low_bytes = data[4:8]   266         return data[8:]   267    268 class LongInfo(LargeNumInfo):   269     def get_value(self):   270         return s8(self.high_bytes + self.low_bytes)   271    272 class DoubleInfo(LargeNumInfo):   273     def get_value(self):   274         return f8(self.high_bytes + self.low_bytes)   275    276 # Other information.   277 # Objects of these classes are generally aware of the class they reside in.   278    279 class ItemInfo(NameUtils, DescriptorUtils):   280     def init(self, data, class_file):   281         self.class_file = class_file   282         self.access_flags = u2(data[0:2])   283         self.name_index = u2(data[2:4])   284         self.descriptor_index = u2(data[4:6])   285         self.attributes, data = self.class_file._get_attributes(data[6:])   286         return data   287    288 class FieldInfo(ItemInfo, PythonNameUtils):   289     def get_descriptor(self):   290         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   291    292 class MethodInfo(ItemInfo, PythonMethodUtils):   293     def get_descriptor(self):   294         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   295    296 class AttributeInfo:   297     def init(self, data, class_file):   298         self.attribute_length = u4(data[0:4])   299         self.info = data[4:4+self.attribute_length]   300         return data[4+self.attribute_length:]   301    302 # NOTE: Decode the different attribute formats.   303    304 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   305     def init(self, data, class_file):   306         self.class_file = class_file   307         self.attribute_length = u4(data[0:4])   308         # Permit the NameUtils mix-in.   309         self.name_index = self.sourcefile_index = u2(data[4:6])   310         return data[6:]   311    312 class ConstantValueAttributeInfo(AttributeInfo):   313     def init(self, data, class_file):   314         self.class_file = class_file   315         self.attribute_length = u4(data[0:4])   316         self.constant_value_index = u2(data[4:6])   317         assert 4+self.attribute_length == 6   318         return data[4+self.attribute_length:]   319    320     def get_value(self):   321         return self.class_file.constants[self.constant_value_index - 1].get_value()   322    323 class CodeAttributeInfo(AttributeInfo):   324     def init(self, data, class_file):   325         self.class_file = class_file   326         self.attribute_length = u4(data[0:4])   327         self.max_stack = u2(data[4:6])   328         self.max_locals = u2(data[6:8])   329         self.code_length = u4(data[8:12])   330         end_of_code = 12+self.code_length   331         self.code = data[12:end_of_code]   332         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   333         self.exception_table = []   334         data = data[end_of_code + 2:]   335         for i in range(0, self.exception_table_length):   336             exception = ExceptionInfo()   337             data = exception.init(data)   338             self.exception_table.append(exception)   339         self.attributes, data = self.class_file._get_attributes(data)   340         return data   341    342 class ExceptionsAttributeInfo(AttributeInfo):   343     def init(self, data, class_file):   344         self.class_file = class_file   345         self.attribute_length = u4(data[0:4])   346         self.number_of_exceptions = u2(data[4:6])   347         self.exception_index_table = []   348         index = 6   349         for i in range(0, self.number_of_exceptions):   350             self.exception_index_table.append(u2(data[index:index+2]))   351             index += 2   352         return data[index:]   353    354     def get_exception(self, i):   355         exception_index = self.exception_index_table[i]   356         return self.class_file.constants[exception_index - 1]   357    358 class InnerClassesAttributeInfo(AttributeInfo):   359     def init(self, data, class_file):   360         self.class_file = class_file   361         self.attribute_length = u4(data[0:4])   362         self.number_of_classes = u2(data[4:6])   363         self.classes = []   364         data = data[6:]   365         for i in range(0, self.number_of_classes):   366             inner_class = InnerClassInfo()   367             data = inner_class.init(data, self.class_file)   368             self.classes.append(inner_class)   369         return data   370    371 class SyntheticAttributeInfo(AttributeInfo):   372     pass   373    374 class LineNumberAttributeInfo(AttributeInfo):   375     def init(self, data, class_file):   376         self.class_file = class_file   377         self.attribute_length = u4(data[0:4])   378         self.line_number_table_length = u2(data[4:6])   379         self.line_number_table = []   380         data = data[6:]   381         for i in range(0, self.line_number_table_length):   382             line_number = LineNumberInfo()   383             data = line_number.init(data)   384             self.line_number_table.append(line_number)   385         return data   386    387 class LocalVariableAttributeInfo(AttributeInfo):   388     def init(self, data, class_file):   389         self.class_file = class_file   390         self.attribute_length = u4(data[0:4])   391         self.local_variable_table_length = u2(data[4:6])   392         self.local_variable_table = []   393         data = data[6:]   394         for i in range(0, self.local_variable_table_length):   395             local_variable = LocalVariableInfo()   396             data = local_variable.init(data, self.class_file)   397             self.local_variable_table.append(local_variable)   398         return data   399    400 class DeprecatedAttributeInfo(AttributeInfo):   401     pass   402    403 # Child classes of the attribute information classes.   404    405 class ExceptionInfo:   406     def init(self, data):   407         self.start_pc = u2(data[0:2])   408         self.end_pc = u2(data[2:4])   409         self.handler_pc = u2(data[4:6])   410         self.catch_type = u2(data[6:8])   411         return data[8:]   412    413 class InnerClassInfo(NameUtils):   414     def init(self, data, class_file):   415         self.class_file = class_file   416         self.inner_class_info_index = u2(data[0:2])   417         self.outer_class_info_index = u2(data[2:4])   418         # Permit the NameUtils mix-in.   419         self.name_index = self.inner_name_index = u2(data[4:6])   420         self.inner_class_access_flags = u2(data[6:8])   421         return data[8:]   422    423 class LineNumberInfo:   424     def init(self, data):   425         self.start_pc = u2(data[0:2])   426         self.line_number = u2(data[2:4])   427         return data[4:]   428    429 class LocalVariableInfo(NameUtils, PythonNameUtils):   430     def init(self, data, class_file):   431         self.class_file = class_file   432         self.start_pc = u2(data[0:2])   433         self.length = u2(data[2:4])   434         self.name_index = u2(data[4:6])   435         self.descriptor_index = u2(data[6:8])   436         self.index = u2(data[8:10])   437         return data[10:]   438    439     def get_descriptor(self):   440         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   441    442 # Exceptions.   443    444 class UnknownTag(Exception):   445     pass   446    447 class UnknownAttribute(Exception):   448     pass   449    450 # Abstractions for the main structures.   451    452 class ClassFile:   453    454     "A class representing a Java class file."   455    456     def __init__(self, s):   457    458         """   459         Process the given string 's', populating the object with the class   460         file's details.   461         """   462    463         self.constants, s = self._get_constants(s[8:])   464         self.access_flags, s = self._get_access_flags(s)   465         self.this_class, s = self._get_this_class(s)   466         self.super_class, s = self._get_super_class(s)   467         self.interfaces, s = self._get_interfaces(s)   468         self.fields, s = self._get_fields(s)   469         self.methods, s = self._get_methods(s)   470         self.attributes, s = self._get_attributes(s)   471    472     def _decode_const(self, s):   473         tag = u1(s[0:1])   474         if tag == 1:   475             const = Utf8Info()   476         elif tag == 3:   477             const = IntegerInfo()   478         elif tag == 4:   479             const = FloatInfo()   480         elif tag == 5:   481             const = LongInfo()   482         elif tag == 6:   483             const = DoubleInfo()   484         elif tag == 7:   485             const = ClassInfo()   486         elif tag == 8:   487             const = StringInfo()   488         elif tag == 9:   489             const = FieldRefInfo()   490         elif tag == 10:   491             const = MethodRefInfo()   492         elif tag == 11:   493             const = InterfaceMethodRefInfo()   494         elif tag == 12:   495             const = NameAndTypeInfo()   496         else:   497             raise UnknownTag, tag   498    499         # Initialise the constant object.   500    501         s = const.init(s[1:], self)   502         return const, s   503    504     def _get_constants_from_table(self, count, s):   505         l = []   506         # Have to skip certain entries specially.   507         i = 1   508         while i < count:   509             c, s = self._decode_const(s)   510             l.append(c)   511             # Add a blank entry after "large" entries.   512             if isinstance(c, LargeNumInfo):   513                 l.append(None)   514                 i += 1   515             i += 1   516         return l, s   517    518     def _get_items_from_table(self, cls, number, s):   519         l = []   520         for i in range(0, number):   521             f = cls()   522             s = f.init(s, self)   523             l.append(f)   524         return l, s   525    526     def _get_methods_from_table(self, number, s):   527         return self._get_items_from_table(MethodInfo, number, s)   528    529     def _get_fields_from_table(self, number, s):   530         return self._get_items_from_table(FieldInfo, number, s)   531    532     def _get_attribute_from_table(self, s):   533         attribute_name_index = u2(s[0:2])   534         constant_name = self.constants[attribute_name_index - 1].bytes   535         if constant_name == "SourceFile":   536             attribute = SourceFileAttributeInfo()   537         elif constant_name == "ConstantValue":   538             attribute = ConstantValueAttributeInfo()   539         elif constant_name == "Code":   540             attribute = CodeAttributeInfo()   541         elif constant_name == "Exceptions":   542             attribute = ExceptionsAttributeInfo()   543         elif constant_name == "InnerClasses":   544             attribute = InnerClassesAttributeInfo()   545         elif constant_name == "Synthetic":   546             attribute = SyntheticAttributeInfo()   547         elif constant_name == "LineNumberTable":   548             attribute = LineNumberAttributeInfo()   549         elif constant_name == "LocalVariableTable":   550             attribute = LocalVariableAttributeInfo()   551         elif constant_name == "Deprecated":   552             attribute = DeprecatedAttributeInfo()   553         else:   554             raise UnknownAttribute, constant_name   555         s = attribute.init(s[2:], self)   556         return attribute, s   557    558     def _get_attributes_from_table(self, number, s):   559         attributes = []   560         for i in range(0, number):   561             attribute, s = self._get_attribute_from_table(s)   562             attributes.append(attribute)   563         return attributes, s   564    565     def _get_constants(self, s):   566         count = u2(s[0:2])   567         return self._get_constants_from_table(count, s[2:])   568    569     def _get_access_flags(self, s):   570         return u2(s[0:2]), s[2:]   571    572     def _get_this_class(self, s):   573         index = u2(s[0:2])   574         return self.constants[index - 1], s[2:]   575    576     _get_super_class = _get_this_class   577    578     def _get_interfaces(self, s):   579         interfaces = []   580         number = u2(s[0:2])   581         s = s[2:]   582         for i in range(0, number):   583             index = u2(s[0:2])   584             interfaces.append(self.constants[index - 1])   585             s = s[2:]   586         return interfaces, s   587    588     def _get_fields(self, s):   589         number = u2(s[0:2])   590         return self._get_fields_from_table(number, s[2:])   591    592     def _get_attributes(self, s):   593         number = u2(s[0:2])   594         return self._get_attributes_from_table(number, s[2:])   595    596     def _get_methods(self, s):   597         number = u2(s[0:2])   598         return self._get_methods_from_table(number, s[2:])   599    600 if __name__ == "__main__":   601     import sys   602     f = open(sys.argv[1])   603     c = ClassFile(f.read())   604    605 # vim: tabstop=4 expandtab shiftwidth=4