javaclass

classfile.py

11:4184dc282002
2004-11-09 Paul Boddie Added supposedly improved exception handling in order to use Python VM features. The problem is that the JVM athrow instruction is dynamic and best translates to the Python VM RAISE_VARARGS instruction. However, in order to support RAISE_VARARGS, the SETUP_EXCEPT, SETUP_FINALLY, POP_BLOCK and END_FINALLY instructions are also required, yet the JVM supports try... catch...finally (whereas the Python VM only supports try...except and try...finally), and anticipates finally handling using very low-level subroutine calling and arcane usage of local variables. Changed the result of the translate convenience function and the parameter specification of the translate and disassemble functions. Fixed the get_value method in LazySubValue and the LazyValue portion of the _write_value method in BytecodeWriter.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6 """     7      8 import struct # for general decoding of class files     9     10 # Utility functions.    11     12 def u1(data):    13     return struct.unpack(">B", data[0:1])[0]    14     15 def u2(data):    16     return struct.unpack(">H", data[0:2])[0]    17     18 def u4(data):    19     return struct.unpack(">L", data[0:4])[0]    20     21 def s4(data):    22     return struct.unpack(">l", data[0:4])[0]    23     24 def s8(data):    25     return struct.unpack(">q", data[0:8])[0]    26     27 def f4(data):    28     return struct.unpack(">f", data[0:4])[0]    29     30 def f8(data):    31     return struct.unpack(">d", data[0:8])[0]    32     33 # Useful mix-ins.    34     35 class NameUtils:    36     def get_name(self):    37         if self.name_index != 0:    38             return self.class_file.constants[self.name_index - 1]    39         else:    40             # Some name indexes are zero to indicate special conditions.    41             return None    42     43 class NameAndTypeUtils:    44     def get_name(self):    45         if self.name_and_type_index != 0:    46             return self.class_file.constants[self.name_and_type_index - 1].get_name()    47         else:    48             # Some name indexes are zero to indicate special conditions.    49             return None    50     51     def get_field_descriptor(self):    52         if self.name_and_type_index != 0:    53             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()    54         else:    55             # Some name indexes are zero to indicate special conditions.    56             return None    57     58     def get_method_descriptor(self):    59         if self.name_and_type_index != 0:    60             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()    61         else:    62             # Some name indexes are zero to indicate special conditions.    63             return None    64     65 class DescriptorUtils:    66     67     "Symbol parsing."    68     69     def _get_method_descriptor(self, s):    70         assert s[0] == "("    71         params = []    72         s = s[1:]    73         while s[0] != ")":    74             parameter_descriptor, s = self._get_parameter_descriptor(s)    75             params.append(parameter_descriptor)    76         if s[1] != "V":    77             return_type, s = self._get_field_type(s[1:])    78         else:    79             return_type, s = None, s[1:]    80         return params, return_type    81     82     def _get_parameter_descriptor(self, s):    83         return self._get_field_type(s)    84     85     def _get_field_descriptor(self, s):    86         return self._get_field_type(s)    87     88     def _get_component_type(self, s):    89         return self._get_field_type(s)    90     91     def _get_field_type(self, s):    92         base_type, s = self._get_base_type(s)    93         object_type = None    94         array_type = None    95         if base_type == "L":    96             object_type, s = self._get_object_type(s)    97         elif base_type == "[":    98             array_type, s = self._get_array_type(s)    99         return (base_type, object_type, array_type), s   100    101     def _get_base_type(self, s):   102         if len(s) > 0:   103             return s[0], s[1:]   104         else:   105             return None, s   106    107     def _get_object_type(self, s):   108         if len(s) > 0:   109             s_end = s.find(";")   110             assert s_end != -1   111             return s[:s_end], s[s_end+1:]   112         else:   113             return None, s   114    115     def _get_array_type(self, s):   116         if len(s) > 0:   117             return self._get_component_type(s)   118         else:   119             return None, s   120    121 # Constant information.   122 # Objects of these classes are not directly aware of the class they reside in.   123    124 class ClassInfo(NameUtils):   125     def init(self, data, class_file):   126         self.class_file = class_file   127         self.name_index = u2(data[0:2])   128         return data[2:]   129    130 class RefInfo(NameAndTypeUtils):   131     def init(self, data, class_file):   132         self.class_file = class_file   133         self.class_index = u2(data[0:2])   134         self.name_and_type_index = u2(data[2:4])   135         return data[4:]   136    137 class FieldRefInfo(RefInfo):   138     def get_descriptor(self):   139         return RefInfo.get_field_descriptor(self)   140    141 class MethodRefInfo(RefInfo):   142     def get_descriptor(self):   143         return RefInfo.get_method_descriptor(self)   144    145 class InterfaceMethodRefInfo(RefInfo):   146     def get_descriptor(self):   147         return RefInfo.get_method_descriptor(self)   148    149 class NameAndTypeInfo(NameUtils, DescriptorUtils):   150     def init(self, data, class_file):   151         self.class_file = class_file   152         self.name_index = u2(data[0:2])   153         self.descriptor_index = u2(data[2:4])   154         return data[4:]   155    156     def get_field_descriptor(self):   157         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   158    159     def get_method_descriptor(self):   160         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   161    162 class Utf8Info:   163     def init(self, data, class_file):   164         self.class_file = class_file   165         self.length = u2(data[0:2])   166         self.bytes = data[2:2+self.length]   167         return data[2+self.length:]   168    169     def __str__(self):   170         return self.bytes   171    172     def __unicode__(self):   173         return unicode(self.bytes, "utf-8")   174    175 class StringInfo:   176     def init(self, data, class_file):   177         self.class_file = class_file   178         self.string_index = u2(data[0:2])   179         return data[2:]   180    181 class SmallNumInfo:   182     def init(self, data, class_file):   183         self.class_file = class_file   184         self.bytes = data[0:4]   185         return data[4:]   186    187 class IntegerInfo(SmallNumInfo):   188     def get_value(self):   189         return s4(self.bytes)   190    191 class FloatInfo(SmallNumInfo):   192     def get_value(self):   193         return f4(self.bytes)   194    195 class LargeNumInfo:   196     def init(self, data, class_file):   197         self.class_file = class_file   198         self.high_bytes = u4(data[0:4])   199         self.low_bytes = u4(data[4:8])   200         return data[8:]   201    202 class LongInfo(LargeNumInfo):   203     def get_value(self):   204         return s8(self.high_bytes + self.low_bytes)   205    206 class DoubleInfo(LargeNumInfo):   207     def get_value(self):   208         return f8(self.high_bytes + self.low_bytes)   209    210 # Other information.   211 # Objects of these classes are generally aware of the class they reside in.   212    213 class ItemInfo(NameUtils, DescriptorUtils):   214     def init(self, data, class_file):   215         self.class_file = class_file   216         self.access_flags = u2(data[0:2])   217         self.name_index = u2(data[2:4])   218         self.descriptor_index = u2(data[4:6])   219         self.attributes, data = self.class_file._get_attributes(data[6:])   220         return data   221    222 class FieldInfo(ItemInfo):   223     def get_descriptor(self):   224         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   225    226 class MethodInfo(ItemInfo):   227     def get_descriptor(self):   228         return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   229    230 class AttributeInfo:   231     def init(self, data, class_file):   232         self.attribute_length = u4(data[0:4])   233         self.info = data[4:4+self.attribute_length]   234         return data[4+self.attribute_length:]   235    236 # NOTE: Decode the different attribute formats.   237    238 class SourceFileAttributeInfo(AttributeInfo, NameUtils):   239     def init(self, data, class_file):   240         self.class_file = class_file   241         self.attribute_length = u4(data[0:4])   242         # Permit the NameUtils mix-in.   243         self.name_index = self.sourcefile_index = u2(data[4:6])   244    245 class ConstantValueAttributeInfo(AttributeInfo):   246     def init(self, data, class_file):   247         self.class_file = class_file   248         self.attribute_length = u4(data[0:4])   249         self.constant_value_index = u2(data[4:6])   250         assert 4+self.attribute_length == 6   251         return data[4+self.attribute_length:]   252    253     def get_value(self):   254         return self.class_file.constants[self.constant_value_index - 1].get_value()   255    256 class CodeAttributeInfo(AttributeInfo):   257     def init(self, data, class_file):   258         self.class_file = class_file   259         self.attribute_length = u4(data[0:4])   260         self.max_stack = u2(data[4:6])   261         self.max_locals = u2(data[6:8])   262         self.code_length = u4(data[8:12])   263         end_of_code = 12+self.code_length   264         self.code = data[12:end_of_code]   265         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   266         self.exception_table = []   267         data = data[end_of_code + 2:]   268         for i in range(0, self.exception_table_length):   269             exception = ExceptionInfo()   270             data = exception.init(data)   271             self.exception_table.append(exception)   272         self.attributes, data = self.class_file._get_attributes(data)   273         return data   274    275 class ExceptionsAttributeInfo(AttributeInfo):   276     def init(self, data, class_file):   277         self.class_file = class_file   278         self.attribute_length = u4(data[0:4])   279         self.number_of_exceptions = u2(data[4:6])   280         self.exception_index_table = []   281         index = 6   282         for i in range(0, self.number_of_exceptions):   283             self.exception_index_table.append(u2(data[index:index+2]))   284             index += 2   285         return data[index:]   286    287     def get_exception(self, i):   288         exception_index = self.exception_index_table[i]   289         return self.class_file.constants[exception_index - 1]   290    291 class InnerClassesAttributeInfo(AttributeInfo):   292     def init(self, data, class_file):   293         self.class_file = class_file   294         self.attribute_length = u4(data[0:4])   295         self.number_of_classes = u2(data[4:6])   296         self.classes = []   297         data = data[6:]   298         for i in range(0, self.number_of_classes):   299             inner_class = InnerClassInfo()   300             data = inner_class.init(data, self.class_file)   301             self.classes.append(inner_class)   302         return data   303    304 class SyntheticAttributeInfo(AttributeInfo):   305     pass   306    307 class LineNumberAttributeInfo(AttributeInfo):   308     def init(self, data, class_file):   309         self.class_file = class_file   310         self.attribute_length = u4(data[0:4])   311         self.line_number_table_length = u2(data[4:6])   312         self.line_number_table = []   313         data = data[6:]   314         for i in range(0, self.line_number_table_length):   315             line_number = LineNumberInfo()   316             data = line_number.init(data)   317             self.line_number_table.append(line_number)   318         return data   319    320 class LocalVariableAttributeInfo(AttributeInfo):   321     def init(self, data, class_file):   322         self.class_file = class_file   323         self.attribute_length = u4(data[0:4])   324         self.local_variable_table_length = u2(data[4:6])   325         self.local_variable_table = []   326         data = data[6:]   327         for i in range(0, self.local_variable_table_length):   328             local_variable = LocalVariableInfo()   329             data = local_variable.init(data)   330             self.local_variable_table.append(local_variable)   331         return data   332    333 class DeprecatedAttributeInfo(AttributeInfo):   334     pass   335    336 # Child classes of the attribute information classes.   337    338 class ExceptionInfo:   339     def init(self, data):   340         self.start_pc = u2(data[0:2])   341         self.end_pc = u2(data[2:4])   342         self.handler_pc = u2(data[4:6])   343         self.catch_type = u2(data[6:8])   344         return data[8:]   345    346 class InnerClassInfo(NameUtils):   347     def init(self, data, class_file):   348         self.class_file = class_file   349         self.inner_class_info_index = u2(data[0:2])   350         self.outer_class_info_index = u2(data[2:4])   351         # Permit the NameUtils mix-in.   352         self.name_index = self.inner_name_index = u2(data[4:6])   353         self.inner_class_access_flags = u2(data[6:8])   354         return data[8:]   355    356 class LineNumberInfo:   357     def init(self, data):   358         self.start_pc = u2(data[0:2])   359         self.line_number = u2(data[2:4])   360         return data[4:]   361    362 class LocalVariableInfo(NameUtils):   363     def init(self, data, class_file):   364         self.class_file = class_file   365         self.start_pc = u2(data[0:2])   366         self.length = u2(data[2:4])   367         self.name_index = u2(data[4:6])   368         self.descriptor_index = u2(data[6:8])   369         self.index = u2(data[8:10])   370         return data[10:]   371    372     def get_descriptor(self):   373         return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   374    375 # Exceptions.   376    377 class UnknownTag(Exception):   378     pass   379    380 class UnknownAttribute(Exception):   381     pass   382    383 # Abstractions for the main structures.   384    385 class ClassFile:   386    387     "A class representing a Java class file."   388    389     def __init__(self, s):   390    391         """   392         Process the given string 's', populating the object with the class   393         file's details.   394         """   395    396         self.constants, s = self._get_constants(s[8:])   397         self.access_flags, s = self._get_access_flags(s)   398         self.this_class, s = self._get_this_class(s)   399         self.super_class, s = self._get_super_class(s)   400         self.interfaces, s = self._get_interfaces(s)   401         self.fields, s = self._get_fields(s)   402         self.methods, s = self._get_methods(s)   403         self.attributes, s = self._get_attributes(s)   404    405     def _decode_const(self, s):   406         tag = u1(s[0:1])   407         if tag == 1:   408             const = Utf8Info()   409         elif tag == 3:   410             const = IntegerInfo()   411         elif tag == 4:   412             const = FloatInfo()   413         elif tag == 5:   414             const = LongInfo()   415         elif tag == 6:   416             const = DoubleInfo()   417         elif tag == 7:   418             const = ClassInfo()   419         elif tag == 8:   420             const = StringInfo()   421         elif tag == 9:   422             const = FieldRefInfo()   423         elif tag == 10:   424             const = MethodRefInfo()   425         elif tag == 11:   426             const = InterfaceMethodRefInfo()   427         elif tag == 12:   428             const = NameAndTypeInfo()   429         else:   430             raise UnknownTag, tag   431    432         # Initialise the constant object.   433    434         s = const.init(s[1:], self)   435         return const, s   436    437     def _get_constants_from_table(self, count, s):   438         l = []   439         # Have to skip certain entries specially.   440         i = 1   441         while i < count:   442             c, s = self._decode_const(s)   443             l.append(c)   444             # Add a blank entry after "large" entries.   445             if isinstance(c, LargeNumInfo):   446                 l.append(None)   447                 i += 1   448             i += 1   449         return l, s   450    451     def _get_items_from_table(self, cls, number, s):   452         l = []   453         for i in range(0, number):   454             f = cls()   455             s = f.init(s, self)   456             l.append(f)   457         return l, s   458    459     def _get_methods_from_table(self, number, s):   460         return self._get_items_from_table(MethodInfo, number, s)   461    462     def _get_fields_from_table(self, number, s):   463         return self._get_items_from_table(FieldInfo, number, s)   464    465     def _get_attribute_from_table(self, s):   466         attribute_name_index = u2(s[0:2])   467         constant_name = self.constants[attribute_name_index - 1].bytes   468         if constant_name == "SourceFile":   469             attribute = SourceFileAttributeInfo()   470         elif constant_name == "ConstantValue":   471             attribute = ConstantValueAttributeInfo()   472         elif constant_name == "Code":   473             attribute = CodeAttributeInfo()   474         elif constant_name == "Exceptions":   475             attribute = ExceptionsAttributeInfo()   476         elif constant_name == "InnerClasses":   477             attribute = InnerClassesAttributeInfo()   478         elif constant_name == "Synthetic":   479             attribute = SyntheticAttributeInfo()   480         elif constant_name == "LineNumberTable":   481             attribute = LineNumberAttributeInfo()   482         elif constant_name == "LocalVariableTable":   483             attribute = LocalVariableAttributeInfo()   484         elif constant_name == "Deprecated":   485             attribute = DeprecatedAttributeInfo()   486         else:   487             raise UnknownAttribute, constant_name   488         s = attribute.init(s[2:], self)   489         return attribute, s   490    491     def _get_attributes_from_table(self, number, s):   492         attributes = []   493         for i in range(0, number):   494             attribute, s = self._get_attribute_from_table(s)   495             attributes.append(attribute)   496         return attributes, s   497    498     def _get_constants(self, s):   499         count = u2(s[0:2])   500         return self._get_constants_from_table(count, s[2:])   501    502     def _get_access_flags(self, s):   503         return u2(s[0:2]), s[2:]   504    505     def _get_this_class(self, s):   506         index = u2(s[0:2])   507         return self.constants[index - 1], s[2:]   508    509     _get_super_class = _get_this_class   510    511     def _get_interfaces(self, s):   512         interfaces = []   513         number = u2(s[0:2])   514         s = s[2:]   515         for i in range(0, number):   516             index = u2(s[0:2])   517             interfaces.append(self.constants[index - 1])   518             s = s[2:]   519         return interfaces, s   520    521     def _get_fields(self, s):   522         number = u2(s[0:2])   523         return self._get_fields_from_table(number, s[2:])   524    525     def _get_attributes(self, s):   526         number = u2(s[0:2])   527         return self._get_attributes_from_table(number, s[2:])   528    529     def _get_methods(self, s):   530         number = u2(s[0:2])   531         return self._get_methods_from_table(number, s[2:])   532    533 if __name__ == "__main__":   534     import sys   535     f = open(sys.argv[1])   536     c = ClassFile(f.read())   537    538 # vim: tabstop=4 expandtab shiftwidth=4