javaclass

javaclass/classfile.py

188:fa7d2529c54a
2011-09-18 David Drysdale Add support for Java 1.5 + 1.6 class files. Also add more tests cases, fix some bugs, and check that serialization of loaded class files returns the same as the input.
     1 #!/usr/bin/env python     2      3 """     4 Java class file decoder. Specification found at the following URL:     5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html     6      7 Copyright (C) 2004, 2005, 2006, 2011 Paul Boddie <paul@boddie.org.uk>     8 Copyright (C) 2010 Braden Thomas <bradenthomas@me.com>     9 Copyright (C) 2011 David Drysdale <dmd@lurklurk.org>    10     11 This program is free software; you can redistribute it and/or modify it under    12 the terms of the GNU Lesser General Public License as published by the Free    13 Software Foundation; either version 3 of the License, or (at your option) any    14 later version.    15     16 This program is distributed in the hope that it will be useful, but WITHOUT    17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    18 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more    19 details.    20     21 You should have received a copy of the GNU Lesser General Public License along    22 with this program.  If not, see <http://www.gnu.org/licenses/>.    23 """    24     25 import struct # for general decoding of class files    26     27 # Utility functions.    28     29 def u1(data):    30     return struct.unpack(">B", data[0:1])[0]    31     32 def u2(data):    33     return struct.unpack(">H", data[0:2])[0]    34     35 def s2(data):    36     return struct.unpack(">h", data[0:2])[0]    37     38 def u4(data):    39     return struct.unpack(">L", data[0:4])[0]    40     41 def s4(data):    42     return struct.unpack(">l", data[0:4])[0]    43     44 def s8(data):    45     return struct.unpack(">q", data[0:8])[0]    46     47 def f4(data):    48     return struct.unpack(">f", data[0:4])[0]    49     50 def f8(data):    51     return struct.unpack(">d", data[0:8])[0]    52     53 def su1(value):    54     return struct.pack(">B", value)    55     56 def su2(value):    57     return struct.pack(">H", value)    58     59 def ss2(value):    60     return struct.pack(">h", value)    61     62 def su4(value):    63     return struct.pack(">L", value)    64     65 def ss4(value):    66     return struct.pack(">l", value)    67     68 def ss8(value):    69     return struct.pack(">q", value)    70     71 def sf4(value):    72     return struct.pack(">f", value)    73     74 def sf8(value):    75     return struct.pack(">d", value)    76     77 # Useful tables and constants.    78     79 descriptor_base_type_mapping = {    80     "B" : "int",    81     "C" : "str",    82     "D" : "float",    83     "F" : "float",    84     "I" : "int",    85     "J" : "int",    86     "L" : "object",    87     "S" : "int",    88     "Z" : "bool",    89     "[" : "list"    90     }    91     92 type_names_to_default_values = {    93     "int" : 0,    94     "str" : u"",    95     "float" : 0.0,    96     "object" : None,    97     "bool" : 0, # NOTE: Should be False.    98     "list" : []    99     }   100    101 def get_default_for_type(type_name):   102     global type_names_to_default_values   103     return type_names_to_default_values.get(type_name)   104    105 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL,  SUPER,  SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \   106 0x0001, 0x0002,  0x0004,    0x0008, 0x0010, 0x0020, 0x0020,       0x0040,   0x0080,    0x0100, 0x0200,    0x0400,   0x0800   107    108 def has_flags(flags, desired):   109     desired_flags = reduce(lambda a, b: a | b, desired, 0)   110     return (flags & desired_flags) == desired_flags   111    112 # Useful mix-ins.   113    114 class PythonMethodUtils:   115     symbol_sep = "___" # was "$"   116     type_sep = "__" # replaces "/"   117     array_sep = "_array_" # was "[]"   118     base_seps = ("_", "_") # was "<" and ">"   119    120     def get_unqualified_python_name(self):   121         name = self.get_name()   122         if str(name) == "<init>":   123             return "__init__"   124         elif str(name) == "<clinit>":   125             return "__clinit__"   126         else:   127             return str(name)   128    129     def get_python_name(self):   130         name = self.get_unqualified_python_name()   131         if name == "__clinit__":   132             return name   133         return name + self.symbol_sep + self._get_descriptor_as_name()   134    135     def _get_descriptor_as_name(self):   136         l = []   137         for descriptor_type in self.get_descriptor()[0]:   138             l.append(self._get_type_as_name(descriptor_type))   139         return self.symbol_sep.join(l)   140    141     def _get_type_as_name(self, descriptor_type, s=""):   142         base_type, object_type, array_type = descriptor_type   143         if base_type == "L":   144             return object_type.replace("/", self.type_sep) + s   145         elif base_type == "[":   146             return self._get_type_as_name(array_type, s + self.array_sep)   147         else:   148             return self.base_seps[0] + base_type + self.base_seps[1] + s   149    150 class PythonNameUtils:   151     def get_python_name(self):   152         # NOTE: This may not be comprehensive.   153         if not str(self.get_name()).startswith("["):   154             return str(self.get_name()).replace("/", ".")   155         else:   156             return self._get_type_name(   157                 get_field_descriptor(   158                     str(self.get_name())   159                     )   160                 ).replace("/", ".")   161    162     def _get_type_name(self, descriptor_type):   163         base_type, object_type, array_type = descriptor_type   164         if base_type == "L":   165             return object_type   166         elif base_type == "[":   167             return self._get_type_name(array_type)   168         else:   169             return descriptor_base_type_mapping[base_type]   170    171 class NameUtils:   172     def get_name(self):   173         if self.name_index != 0:   174             return self.class_file.constants[self.name_index - 1]   175         else:   176             # Some name indexes are zero to indicate special conditions.   177             return None   178    179 class NameAndTypeUtils:   180     def get_name(self):   181         if self.name_and_type_index != 0:   182             return self.class_file.constants[self.name_and_type_index - 1].get_name()   183         else:   184             # Some name indexes are zero to indicate special conditions.   185             return None   186    187     def get_field_descriptor(self):   188         if self.name_and_type_index != 0:   189             return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor()   190         else:   191             # Some name indexes are zero to indicate special conditions.   192             return None   193    194     def get_method_descriptor(self):   195         if self.name_and_type_index != 0:   196             return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor()   197         else:   198             # Some name indexes are zero to indicate special conditions.   199             return None   200    201     def get_class(self):   202         return self.class_file.constants[self.class_index - 1]   203    204 # Symbol parsing.   205    206 def get_method_descriptor(s):   207     assert s[0] == "("   208     params = []   209     s = s[1:]   210     while s[0] != ")":   211         parameter_descriptor, s = _get_parameter_descriptor(s)   212         params.append(parameter_descriptor)   213     if s[1] != "V":   214         return_type, s = _get_field_type(s[1:])   215     else:   216         return_type, s = None, s[1:]   217     return params, return_type   218    219 def get_field_descriptor(s):   220     return _get_field_type(s)[0]   221    222 def _get_parameter_descriptor(s):   223     return _get_field_type(s)   224    225 def _get_component_type(s):   226     return _get_field_type(s)   227    228 def _get_field_type(s):   229     base_type, s = _get_base_type(s)   230     object_type = None   231     array_type = None   232     if base_type == "L":   233         object_type, s = _get_object_type(s)   234     elif base_type == "[":   235         array_type, s = _get_array_type(s)   236     return (base_type, object_type, array_type), s   237    238 def _get_base_type(s):   239     if len(s) > 0:   240         return s[0], s[1:]   241     else:   242         return None, s   243    244 def _get_object_type(s):   245     if len(s) > 0:   246         s_end = s.find(";")   247         assert s_end != -1   248         return s[:s_end], s[s_end+1:]   249     else:   250         return None, s   251    252 def _get_array_type(s):   253     if len(s) > 0:   254         return _get_component_type(s)   255     else:   256         return None, s   257    258 # Constant information.   259    260 class ClassInfo(NameUtils, PythonNameUtils):   261     def init(self, data, class_file):   262         self.class_file = class_file   263         self.name_index = u2(data[0:2])   264         return data[2:]   265     def serialize(self):   266         return su2(self.name_index)   267    268 class RefInfo(NameAndTypeUtils):   269     def init(self, data, class_file):   270         self.class_file = class_file   271         self.class_index = u2(data[0:2])   272         self.name_and_type_index = u2(data[2:4])   273         return data[4:]   274     def serialize(self):   275         return su2(self.class_index)+su2(self.name_and_type_index)   276    277 class FieldRefInfo(RefInfo, PythonNameUtils):   278     def get_descriptor(self):   279         return RefInfo.get_field_descriptor(self)   280    281 class MethodRefInfo(RefInfo, PythonMethodUtils):   282     def get_descriptor(self):   283         return RefInfo.get_method_descriptor(self)   284    285 class InterfaceMethodRefInfo(MethodRefInfo):   286     pass   287    288 class NameAndTypeInfo(NameUtils, PythonNameUtils):   289     def init(self, data, class_file):   290         self.class_file = class_file   291         self.name_index = u2(data[0:2])   292         self.descriptor_index = u2(data[2:4])   293         return data[4:]   294    295     def serialize(self):   296         return su2(self.name_index)+su2(self.descriptor_index)   297    298     def get_field_descriptor(self):   299         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   300    301     def get_method_descriptor(self):   302         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   303    304 class Utf8Info:   305     def init(self, data, class_file):   306         self.class_file = class_file   307         self.length = u2(data[0:2])   308         self.bytes = data[2:2+self.length]   309         return data[2+self.length:]   310    311     def serialize(self):   312         return su2(self.length)+self.bytes   313    314     def __str__(self):   315         return self.bytes   316    317     def __unicode__(self):   318         return unicode(self.bytes, "utf-8")   319    320     def get_value(self):   321         return str(self)   322    323 class StringInfo:   324     def init(self, data, class_file):   325         self.class_file = class_file   326         self.string_index = u2(data[0:2])   327         return data[2:]   328    329     def serialize(self):   330         return su2(self.string_index)   331    332     def __str__(self):   333         return str(self.class_file.constants[self.string_index - 1])   334    335     def __unicode__(self):   336         return unicode(self.class_file.constants[self.string_index - 1])   337    338     def get_value(self):   339         return str(self)   340    341 class SmallNumInfo:   342     def init(self, data, class_file):   343         self.class_file = class_file   344         self.bytes = data[0:4]   345         return data[4:]   346     def serialize(self):   347         return self.bytes   348    349 class IntegerInfo(SmallNumInfo):   350     def get_value(self):   351         return s4(self.bytes)   352    353 class FloatInfo(SmallNumInfo):   354     def get_value(self):   355         return f4(self.bytes)   356    357 class LargeNumInfo:   358     def init(self, data, class_file):   359         self.class_file = class_file   360         self.high_bytes = data[0:4]   361         self.low_bytes = data[4:8]   362         return data[8:]   363     def serialize(self):   364         return self.high_bytes+self.low_bytes   365    366    367 class LongInfo(LargeNumInfo):   368     def get_value(self):   369         return s8(self.high_bytes + self.low_bytes)   370    371 class DoubleInfo(LargeNumInfo):   372     def get_value(self):   373         return f8(self.high_bytes + self.low_bytes)   374    375 # Other information.   376 # Objects of these classes are generally aware of the class they reside in.   377    378 class ItemInfo(NameUtils):   379     def init(self, data, class_file):   380         self.class_file = class_file   381         self.access_flags = u2(data[0:2])   382         self.name_index = u2(data[2:4])   383         self.descriptor_index = u2(data[4:6])   384         self.attributes, data = self.class_file._get_attributes(data[6:])   385         return data   386     def serialize(self):   387         od = su2(self.access_flags)+su2(self.name_index)+su2(self.descriptor_index)   388         od += self.class_file._serialize_attributes(self.attributes)   389         return od   390    391 class FieldInfo(ItemInfo, PythonNameUtils):   392     def get_descriptor(self):   393         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   394    395 class MethodInfo(ItemInfo, PythonMethodUtils):   396     def get_descriptor(self):   397         return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   398    399 class AttributeInfo:   400     def init(self, data, class_file):   401         self.attribute_length = u4(data[0:4])   402         self.info = data[4:4+self.attribute_length]   403         return data[4+self.attribute_length:]   404     def serialize(self):   405         return su4(self.attribute_length)+self.info   406    407 # NOTE: Decode the different attribute formats.   408    409 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils):   410     def init(self, data, class_file):   411         self.class_file = class_file   412         self.attribute_length = u4(data[0:4])   413         # Permit the NameUtils mix-in.   414         self.name_index = self.sourcefile_index = u2(data[4:6])   415         return data[6:]   416     def serialize(self):   417         return su4(self.attribute_length)+su2(self.name_index)   418    419 class ConstantValueAttributeInfo(AttributeInfo):   420     def init(self, data, class_file):   421         self.class_file = class_file   422         self.attribute_length = u4(data[0:4])   423         self.constant_value_index = u2(data[4:6])   424         assert 4+self.attribute_length == 6   425         return data[4+self.attribute_length:]   426    427     def get_value(self):   428         return self.class_file.constants[self.constant_value_index - 1].get_value()   429    430     def serialize(self):   431         return su4(self.attribute_length)+su2(self.constant_value_index)   432    433 class CodeAttributeInfo(AttributeInfo):   434     def init(self, data, class_file):   435         self.class_file = class_file   436         self.attribute_length = u4(data[0:4])   437         self.max_stack = u2(data[4:6])   438         self.max_locals = u2(data[6:8])   439         self.code_length = u4(data[8:12])   440         end_of_code = 12+self.code_length   441         self.code = data[12:end_of_code]   442         self.exception_table_length = u2(data[end_of_code:end_of_code+2])   443         self.exception_table = []   444         data = data[end_of_code + 2:]   445         for i in range(0, self.exception_table_length):   446             exception = ExceptionInfo()   447             data = exception.init(data)   448             self.exception_table.append(exception)   449         self.attributes, data = self.class_file._get_attributes(data)   450         return data   451     def serialize(self):   452         od = su4(self.attribute_length)+su2(self.max_stack)+su2(self.max_locals)+su4(self.code_length)+self.code   453         od += su2(self.exception_table_length)   454         for e in self.exception_table:   455             od += e.serialize()   456         od += self.class_file._serialize_attributes(self.attributes)   457         return od   458    459 class ExceptionsAttributeInfo(AttributeInfo):   460     def init(self, data, class_file):   461         self.class_file = class_file   462         self.attribute_length = u4(data[0:4])   463         self.number_of_exceptions = u2(data[4:6])   464         self.exception_index_table = []   465         index = 6   466         for i in range(0, self.number_of_exceptions):   467             self.exception_index_table.append(u2(data[index:index+2]))   468             index += 2   469         return data[index:]   470    471     def get_exception(self, i):   472         exception_index = self.exception_index_table[i]   473         return self.class_file.constants[exception_index - 1]   474            475     def serialize(self):   476         od = su4(self.attribute_length)+su2(self.number_of_exceptions)   477         for ei in self.exception_index_table:   478             od += su2(ei)   479         return od   480    481 class InnerClassesAttributeInfo(AttributeInfo):   482     def init(self, data, class_file):   483         self.class_file = class_file   484         self.attribute_length = u4(data[0:4])   485         self.number_of_classes = u2(data[4:6])   486         self.classes = []   487         data = data[6:]   488         for i in range(0, self.number_of_classes):   489             inner_class = InnerClassInfo()   490             data = inner_class.init(data, self.class_file)   491             self.classes.append(inner_class)   492         return data   493    494     def serialize(self):   495         od = su4(self.attribute_length)+su2(self.number_of_classes)   496         for c in self.classes:   497             od += c.serialize()   498         return od   499    500 class SyntheticAttributeInfo(AttributeInfo):   501     pass   502    503 class LineNumberAttributeInfo(AttributeInfo):   504     def init(self, data, class_file):   505         self.class_file = class_file   506         self.attribute_length = u4(data[0:4])   507         self.line_number_table_length = u2(data[4:6])   508         self.line_number_table = []   509         data = data[6:]   510         for i in range(0, self.line_number_table_length):   511             line_number = LineNumberInfo()   512             data = line_number.init(data)   513             self.line_number_table.append(line_number)   514         return data   515            516     def serialize(self):   517         od = su4(self.attribute_length)+su2(self.line_number_table_length)   518         for ln in self.line_number_table:   519             od += ln.serialize()   520         return od   521    522 class LocalVariableAttributeInfo(AttributeInfo):   523     def init(self, data, class_file):   524         self.class_file = class_file   525         self.attribute_length = u4(data[0:4])   526         self.local_variable_table_length = u2(data[4:6])   527         self.local_variable_table = []   528         data = data[6:]   529         for i in range(0, self.local_variable_table_length):   530             local_variable = LocalVariableInfo()   531             data = local_variable.init(data, self.class_file)   532             self.local_variable_table.append(local_variable)   533         return data   534    535     def serialize(self):   536         od = su4(self.attribute_length)+su2(self.local_variable_table_length)   537         for lv in self.local_variable_table:   538             od += lv.serialize()   539         return od   540    541 class LocalVariableTypeAttributeInfo(AttributeInfo):   542     def init(self, data, class_file):   543         self.class_file = class_file   544         self.attribute_length = u4(data[0:4])   545         local_variable_type_table_length = u2(data[4:6])   546         data = data[6:]   547         self.local_variable_type_table = []   548         for i in range(0, local_variable_type_table_length):   549             local_variable = LocalVariableInfo()   550             data = local_variable.init(data, self.class_file)   551             self.local_variable_type_table.append(local_variable)   552         return data   553    554     def serialize(self):   555         od = su4(self.attribute_length)+su2(len(self.local_variable_type_table))   556         od += "".join([lv.serialize() for lv in self.local_variable_type_table])   557         return od   558    559 class DeprecatedAttributeInfo(AttributeInfo):   560     pass   561    562 class VerificationTypeInfo(object):   563     def __init__(self, tag):   564         self.tag = tag   565     def init(self, data, class_file):   566         self.class_file = class_file   567         tag = u1(data[0:1])   568         assert(tag == self.tag)   569         return data[1:]   570     def serialize(self):   571         return su1(self.tag)   572 class TopVariableInfo(VerificationTypeInfo):   573     TAG = 0   574 class IntegerVariableInfo(VerificationTypeInfo):   575     TAG = 1   576 class FloatVariableInfo(VerificationTypeInfo):   577     TAG = 2   578 class DoubleVariableInfo(VerificationTypeInfo):   579     TAG = 3   580 class LongVariableInfo(VerificationTypeInfo):   581     TAG = 4   582 class NullVariableInfo(VerificationTypeInfo):   583     TAG = 5   584 class UninitializedThisVariableInfo(VerificationTypeInfo):   585     TAG = 6   586 class ObjectVariableInfo(VerificationTypeInfo):   587     TAG = 7   588     def init(self, data, class_file):   589         data = super(ObjectVariableInfo, self).init(data, class_file)   590         self.cpool_index = u2(data)   591         return data[2:]   592     def serialize(self):   593         return super(ObjectVariableInfo, self).serialize() + su2(self.cpool_index)   594 class UninitializedVariableInfo(VerificationTypeInfo):   595     TAG = 8   596     def init(self, data, class_file):   597         data = super(UninitializedVariableInfo, self).init(data, class_file)   598         self.offset = u2(data)   599         return data[2:]   600     def serialize(self):   601         return super(UninitializedVariableInfo, self).serialize() + su2(self.offset)   602    603 VARIABLE_INFO_CLASSES = (TopVariableInfo, IntegerVariableInfo, FloatVariableInfo, DoubleVariableInfo,   604                          LongVariableInfo, NullVariableInfo, UninitializedThisVariableInfo,   605                          ObjectVariableInfo, UninitializedVariableInfo)   606 VARIABLE_INFO_TAG_MAP = dict([(cls.TAG, cls) for cls in VARIABLE_INFO_CLASSES])   607    608 # Exception   609 class UnknownVariableInfo:   610     def __init__(self, tag):   611         self.tag = tag   612     def __str__(self):   613         return repr(self.tag)   614    615 def create_verification_type_info(data):   616     # Does not consume data, just does lookahead   617     tag = u1(data[0:1])   618     if tag in VARIABLE_INFO_TAG_MAP:   619         return VARIABLE_INFO_TAG_MAP[tag](tag)   620     else:   621         raise UnknownVariableInfo, tag   622    623    624 class StackMapFrame(object):   625     def __init__(self, frame_type):   626         self.frame_type = frame_type   627     def init(self, data, class_file):   628         self.class_file = class_file   629         frame_type = u1(data[0:1])   630         assert(frame_type == self.frame_type)   631         return data[1:]   632     def serialize(self):   633         return su1(self.frame_type)   634 class SameFrame(StackMapFrame):   635     TYPE_LOWER = 0   636     TYPE_UPPER = 63   637 class SameLocals1StackItemFrame(StackMapFrame):   638     TYPE_LOWER = 64   639     TYPE_UPPER = 127   640     def init(self, data, class_file):   641         data = super(SameLocals1StackItemFrame, self).init(data, class_file)   642         self.offset_delta = self.frame_type - 64   643         self.stack = [create_verification_type_info(data)]   644         return self.stack[0].init(data, class_file)   645     def serialize(self):   646         return super(SameLocals1StackItemFrame, self).serialize()+self.stack[0].serialize()   647 class SameLocals1StackItemFrameExtended(StackMapFrame):   648     TYPE_LOWER = 247   649     TYPE_UPPER = 247   650     def init(self, data, class_file):   651         data = super(SameLocals1StackItemFrameExtended, self).init(data, class_file)   652         self.offset_delta = u2(data[0:2])   653         data = data[2:]   654         self.stack = [create_verification_type_info(data)]   655         return self.stack[0].init(data, class_file)   656     def serialize(self):   657         return super(SameLocals1StackItemFrameExtended, self).serialize()+su2(self.offset_delta)+self.stack[0].serialize()   658 class ChopFrame(StackMapFrame):   659     TYPE_LOWER = 248   660     TYPE_UPPER = 250   661     def init(self, data, class_file):   662         data = super(ChopFrame, self).init(data, class_file)   663         self.offset_delta = u2(data[0:2])   664         return data[2:]   665     def serialize(self):   666         return super(ChopFrame, self).serialize()+su2(self.offset_delta)   667 class SameFrameExtended(StackMapFrame):   668     TYPE_LOWER = 251   669     TYPE_UPPER = 251   670     def init(self, data, class_file):   671         data = super(SameFrameExtended, self).init(data, class_file)   672         self.offset_delta = u2(data[0:2])   673         return data[2:]   674     def serialize(self):   675         return super(SameFrameExtended, self).serialize()+su2(self.offset_delta)   676 class AppendFrame(StackMapFrame):   677     TYPE_LOWER = 252   678     TYPE_UPPER = 254   679     def init(self, data, class_file):   680         data = super(AppendFrame, self).init(data, class_file)   681         self.offset_delta = u2(data[0:2])   682         data = data[2:]   683         num_locals = self.frame_type - 251   684         self.locals = []   685         for ii in xrange(num_locals):   686             info = create_verification_type_info(data)   687             data = info.init(data, class_file)   688             self.locals.append(info)   689         return data   690     def serialize(self):   691         od = super(AppendFrame, self).serialize()+su2(self.offset_delta)   692         od += "".join([l.serialize() for l in self.locals])   693         return od   694 class FullFrame(StackMapFrame):   695     TYPE_LOWER = 255   696     TYPE_UPPER = 255   697     def init(self, data, class_file):   698         data = super(FullFrame, self).init(data, class_file)   699         self.offset_delta = u2(data[0:2])   700         num_locals = u2(data[2:4])   701         data = data[4:]   702         self.locals = []   703         for ii in xrange(num_locals):   704             info = create_verification_type_info(data)   705             data = info.init(data, class_file)   706             self.locals.append(info)   707         num_stack_items = u2(data[0:2])   708         data = data[2:]   709         self.stack = []   710         for ii in xrange(num_stack_items):   711             stack_item = create_verification_type_info(data)   712             data = stack_item.init(data, class_file)   713             self.stack.append(stack_item)   714         return data   715     def serialize(self):   716         od = super(FullFrame, self).serialize()+su2(self.offset_delta)+su2(len(self.locals))   717         od += "".join([l.serialize() for l in self.locals])   718         od += su2(len(self.stack))   719         od += "".join([s.serialize() for s in self.stack])   720         return od   721    722 FRAME_CLASSES = (SameFrame, SameLocals1StackItemFrame, SameLocals1StackItemFrameExtended,   723                  ChopFrame, SameFrameExtended, AppendFrame, FullFrame)   724    725 # Exception   726 class UnknownStackFrame:   727     def __init__(self, frame_type):   728         self.frame_type = frame_type   729     def __str__(self):   730         return repr(self.frame_type)   731    732 def create_stack_frame(data):   733     # Does not consume data, just does lookahead   734     frame_type = u1(data[0:1])   735     for cls in FRAME_CLASSES:   736         if frame_type >= cls.TYPE_LOWER and frame_type <= cls.TYPE_UPPER:   737             return cls(frame_type)   738     raise UnknownStackFrame, frame_type   739    740 class StackMapTableAttributeInfo(AttributeInfo):   741     def init(self, data, class_file):   742         self.class_file = class_file   743         self.attribute_length = u4(data[0:4])   744         num_entries = u2(data[4:6])   745         self.entries = []   746         data = data[6:]   747         for i in range(0, num_entries):   748             frame = create_stack_frame(data)   749             data = frame.init(data, class_file)   750             self.entries.append(frame)   751         return data   752     def serialize(self):   753         od = su4(self.attribute_length)+su2(len(self.entries))   754         od += "".join([e.serialize() for e in self.entries])   755         return od   756    757    758 class EnclosingMethodAttributeInfo(AttributeInfo):   759     def init(self, data, class_file):   760         self.class_file = class_file   761         self.attribute_length = u4(data[0:4])   762         self.class_index = u2(data[4:6])   763         self.method_index = u2(data[6:8])   764         return data[8:]   765     def serialize(self):   766         return su4(self.attribute_length)+su2(self.class_index)+su2(self.method_index)   767    768    769 class SignatureAttributeInfo(AttributeInfo):   770     def init(self, data, class_file):   771         self.class_file = class_file   772         self.attribute_length = u4(data[0:4])   773         self.signature_index = u2(data[4:6])   774         return data[6:]   775     def serialize(self):   776         return su4(self.attribute_length)+su2(self.signature_index)   777    778    779 class SourceDebugExtensionAttributeInfo(AttributeInfo):   780     def init(self, data, class_file):   781         self.class_file = class_file   782         self.attribute_length = u4(data[0:4])   783         self.debug_extension = data[4:(4 + self.attribute_length)]   784         return data[(4+ self.attribute_length):]   785     def serialize(self):   786         return su4(self.attribute_length)+self.debug_extension   787    788    789 class ElementValue(object):   790     def __init__(self, tag):   791         self.tag = tag   792     def init(self, data, class_file):   793         self.class_file = class_file   794         tag = chr(u1(data[0:1]))   795         assert(tag == self.tag)   796         return data[1:]   797     def serialize(self):   798         return su1(ord(self.tag))   799 class ConstValue(ElementValue):   800     def init(self, data, class_file):   801         data = super(ConstValue, self).init(data, class_file)   802         self.const_value_index = u2(data[0:2])   803         return data[2:]   804     def serialize(self):   805         return super(ConstValue, self).serialize()+su2(self.const_value_index)   806 class EnumConstValue(ElementValue):   807     def init(self, data, class_file):   808         data = super(EnumConstValue, self).init(data, class_file)   809         self.type_name_index = u2(data[0:2])   810         self.const_name_index = u2(data[2:4])   811         return data[4:]   812     def serialize(self):   813         return super(EnumConstValue, self).serialize()+su2(self.type_name_index)+su2(self.const_name_index)   814 class ClassInfoValue(ElementValue):   815     def init(self, data, class_file):   816         data = super(ClassInfoValue, self).init(data, class_file)   817         self.class_info_index = u2(data[0:2])   818         return data[2:]   819     def serialize(self):   820         return super(ClassInfoValue, self).serialize()+su2(self.class_info_index)   821 class AnnotationValue(ElementValue):   822     def init(self, data, class_file):   823         data = super(AnnotationValue, self).init(data, class_file)   824         self.annotation_value = Annotation()   825         return self.annotation_value.init(data, class_file)   826     def serialize(self):   827         return super(AnnotationValue, self).serialize()+self.annotation_value.serialize()   828 class ArrayValue(ElementValue):   829     def init(self, data, class_file):   830         data = super(ArrayValue, self).init(data, class_file)   831         num_values = u2(data[0:2])   832         data = data[2:]   833         self.values = []   834         for ii in xrange(num_values):   835             element_value = create_element_value(data)   836             data = element_value.init(data, class_file)   837             self.values.append(element_value)   838         return data   839     def serialize(self):   840         od = super(ArrayValue, self).serialize()+su2(len(self.values))   841         od += "".join([v.serialize() for v in self.values])   842         return od   843 # Exception   844 class UnknownElementValue:   845     def __init__(self, tag):   846         self.tag = tag   847     def __str__(self):   848         return repr(self.tag)   849    850 def create_element_value(data):   851     tag = chr(u1(data[0:1]))   852     if tag in ('B', 'C', 'D', 'F', 'I', 'J', 'S', 'Z', 's'):   853         return ConstValue(tag)   854     elif tag == 'e':   855         return EnumConstValue(tag)   856     elif tag == 'c':   857         return ClassInfoValue(tag)   858     elif tag == '@':   859         return AnnotationValue(tag)   860     elif tag == '[':   861         return ArrayValue(tag)   862     else:   863         raise UnknownElementValue, tag   864        865    866 class Annotation(object):   867     def init(self, data, class_file):   868         self.class_file = class_file   869         self.type_index = u2(data[0:2])   870         num_element_value_pairs = u2(data[2:4])   871         data = data[4:]   872         self.element_value_pairs = []   873         for ii in xrange(num_element_value_pairs):   874             element_name_index = u2(data[0:2])   875             data = data[2:]   876             element_value = create_element_value(data)   877             data = element_value.init(data, class_file)   878             self.element_value_pairs.append((element_name_index, element_value))   879         return data   880     def serialize(self):   881         od = su2(self.type_index)+su2(len(self.element_value_pairs))   882         od += "".join([su2(evp[0])+evp[1].serialize() for evp in self.element_value_pairs])   883         return od   884    885    886 class RuntimeAnnotationsAttributeInfo(AttributeInfo):   887     def init(self, data, class_file):   888         self.class_file = class_file   889         self.attribute_length = u4(data[0:4])   890         num_annotations = u2(data[4:6])   891         data = data[6:]   892         self.annotations = []   893         for ii in xrange(num_annotations):   894             annotation = Annotation()    895             data = annotation.init(data, class_file)   896             self.annotations.append(annotation)   897         return data   898     def serialize(self):   899         od = su4(self.attribute_length)+su2(len(self.annotations))   900         od += "".join([a.serialize() for a in self.annotations])   901         return od   902    903 class RuntimeVisibleAnnotationsAttributeInfo(RuntimeAnnotationsAttributeInfo):   904     pass   905    906 class RuntimeInvisibleAnnotationsAttributeInfo(RuntimeAnnotationsAttributeInfo):   907     pass   908    909 class RuntimeParameterAnnotationsAttributeInfo(AttributeInfo):   910     def init(self, data, class_file):   911         self.class_file = class_file   912         self.attribute_length = u4(data[0:4])   913         num_parameters = u1(data[4:5])   914         data = data[5:]   915         self.parameter_annotations = []   916         for ii in xrange(num_parameters):   917             num_annotations = u2(data[0:2])   918             data = data[2:]   919             annotations = []   920             for jj in xrange(num_annotations):   921                 annotation = Annotation()    922                 data = annotation.init(data, class_file)   923                 annotations.append(annotation)   924             self.parameter_annotations.append(annotations)   925         return data   926     def serialize(self):   927         od = su4(self.attribute_length)+su1(len(self.parameter_annotations))   928         for pa in self.parameter_annotations:   929             od += su2(len(pa))   930             od += "".join([a.serialize() for a in pa])   931         return od   932            933 class RuntimeVisibleParameterAnnotationsAttributeInfo(RuntimeParameterAnnotationsAttributeInfo):   934     pass   935    936 class RuntimeInvisibleParameterAnnotationsAttributeInfo(RuntimeParameterAnnotationsAttributeInfo):   937     pass   938    939 class AnnotationDefaultAttributeInfo(AttributeInfo):   940     def init(self, data, class_file):   941         self.class_file = class_file   942         self.attribute_length = u4(data[0:4])   943         data = data[4:]   944         self.default_value = create_element_value(data)   945         return self.default_value.init(data, class_file)   946     def serialize(self):   947         return su4(self.attribute_length)+self.default_value.serialize()   948    949    950 # Child classes of the attribute information classes.   951    952 class ExceptionInfo:   953     def init(self, data):   954         self.start_pc = u2(data[0:2])   955         self.end_pc = u2(data[2:4])   956         self.handler_pc = u2(data[4:6])   957         self.catch_type = u2(data[6:8])   958         return data[8:]   959     def serialize(self):   960         return su2(self.start_pc)+su2(self.end_pc)+su2(self.handler_pc)+su2(self.catch_type)   961    962 class InnerClassInfo(NameUtils):   963     def init(self, data, class_file):   964         self.class_file = class_file   965         self.inner_class_info_index = u2(data[0:2])   966         self.outer_class_info_index = u2(data[2:4])   967         # Permit the NameUtils mix-in.   968         self.name_index = self.inner_name_index = u2(data[4:6])   969         self.inner_class_access_flags = u2(data[6:8])   970         return data[8:]   971     def serialize(self):   972         return su2(self.inner_class_info_index)+su2(self.outer_class_info_index)+su2(self.name_index)+su2(self.inner_class_access_flags)   973    974 class LineNumberInfo:   975     def init(self, data):   976         self.start_pc = u2(data[0:2])   977         self.line_number = u2(data[2:4])   978         return data[4:]   979            980     def serialize(self):   981         return su2(self.start_pc)+su2(self.line_number)   982    983 class LocalVariableInfo(NameUtils, PythonNameUtils):   984     def init(self, data, class_file):   985         self.class_file = class_file   986         self.start_pc = u2(data[0:2])   987         self.length = u2(data[2:4])   988         self.name_index = u2(data[4:6])   989         self.descriptor_index = u2(data[6:8])   990         self.index = u2(data[8:10])   991         return data[10:]   992    993     def get_descriptor(self):   994         return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1]))   995            996     def serialize(self):   997         return su2(self.start_pc)+su2(self.length)+su2(self.name_index)+su2(self.descriptor_index)+su2(self.index)   998    999 # Exceptions.  1000   1001 class UnknownTag(Exception):  1002     def __init__(self, tag):  1003         self.tag = tag  1004     def __str__(self):  1005         return repr(self.tag)  1006   1007 class UnknownAttribute(Exception):  1008     def __init__(self, name):  1009         self.name = name  1010   1011 ATTR_NAMES_TO_CLASS = {"SourceFile": SourceFileAttributeInfo,   1012                        "ConstantValue": ConstantValueAttributeInfo,   1013                        "Code": CodeAttributeInfo,   1014                        "Exceptions": ExceptionsAttributeInfo,  1015                        "InnerClasses": InnerClassesAttributeInfo,   1016                        "Synthetic": SyntheticAttributeInfo,  1017                        "LineNumberTable": LineNumberAttributeInfo,   1018                        "LocalVariableTable": LocalVariableAttributeInfo,   1019                        "Deprecated": DeprecatedAttributeInfo,  1020                        # Java SE 1.6, class file >= 50.0, VMSpec v3 s4.7.4  1021                        "StackMapTable": StackMapTableAttributeInfo,  1022                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.7  1023                        "EnclosingMethod": EnclosingMethodAttributeInfo,  1024                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.9  1025                        "Signature": SignatureAttributeInfo,  1026                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.11  1027                        "SourceDebugExtension": SourceDebugExtensionAttributeInfo,  1028                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.14  1029                        "LocalVariableTypeTable": LocalVariableTypeAttributeInfo,  1030                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.16  1031                        "RuntimeVisibleAnnotations": RuntimeVisibleAnnotationsAttributeInfo,  1032                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.17  1033                        "RuntimeInvisibleAnnotations": RuntimeInvisibleAnnotationsAttributeInfo,  1034                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.18  1035                        "RuntimeVisibleParameterAnnotations": RuntimeVisibleParameterAnnotationsAttributeInfo,  1036                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.19  1037                        "RuntimeInvisibleParameterAnnotations": RuntimeInvisibleParameterAnnotationsAttributeInfo,  1038                        # Java SE 1.5, class file >= 49.0, VMSpec v3  s4.7.20  1039                        "AnnotationDefault": AnnotationDefaultAttributeInfo,}  1040                          1041 # Abstractions for the main structures.  1042   1043 class ClassFile:  1044   1045     "A class representing a Java class file."  1046   1047     def __init__(self, s):  1048   1049         """  1050         Process the given string 's', populating the object with the class  1051         file's details.  1052         """  1053   1054         self.attribute_class_to_index = None  1055         magic = u4(s[0:])  1056         if magic != 0xCAFEBABE:  1057             raise UnknownAttribute, magic  1058         self.minorv,self.majorv = u2(s[4:]),u2(s[6:])  1059         self.constants, s = self._get_constants(s[8:])  1060         self.access_flags, s = self._get_access_flags(s)  1061         self.this_class, s = self._get_this_class(s)  1062         self.super_class, s = self._get_super_class(s)  1063         self.interfaces, s = self._get_interfaces(s)  1064         self.fields, s = self._get_fields(s)  1065         self.methods, s = self._get_methods(s)  1066         self.attributes, s = self._get_attributes(s)  1067   1068     def serialize(self):  1069         od = su4(0xCAFEBABE)+su2(self.minorv)+su2(self.majorv)  1070         od += self._serialize_constants()  1071         od += self._serialize_access_flags()  1072         od += self._serialize_this_class()  1073         od += self._serialize_super_class()  1074         od += self._serialize_interfaces()  1075         od += self._serialize_fields()  1076         od += self._serialize_methods()  1077         od += self._serialize_attributes(self.attributes)  1078         return od  1079   1080     def _encode_const(self, c):  1081         od = ''  1082         if isinstance(c, Utf8Info):  1083             od += su1(1)  1084         elif isinstance(c, IntegerInfo):  1085             od += su1(3)  1086         elif isinstance(c, FloatInfo):  1087             od += su1(4)  1088         elif isinstance(c, LongInfo):  1089             od += su1(5)  1090         elif isinstance(c, DoubleInfo):  1091             od += su1(6)  1092         elif isinstance(c, ClassInfo):  1093             od += su1(7)  1094         elif isinstance(c, StringInfo):  1095             od += su1(8)  1096         elif isinstance(c, FieldRefInfo):  1097             od += su1(9)  1098         elif isinstance(c, InterfaceMethodRefInfo):  # check subclass first  1099             od += su1(11)  1100         elif isinstance(c, MethodRefInfo):  1101             od += su1(10)  1102         elif isinstance(c, NameAndTypeInfo):  1103             od += su1(12)  1104         else:  1105             return od  1106         od += c.serialize()  1107         return od  1108   1109     def _decode_const(self, s):  1110         tag = u1(s[0:1])  1111         if tag == 1:  1112             const = Utf8Info()  1113         elif tag == 3:  1114             const = IntegerInfo()  1115         elif tag == 4:  1116             const = FloatInfo()  1117         elif tag == 5:  1118             const = LongInfo()  1119         elif tag == 6:  1120             const = DoubleInfo()  1121         elif tag == 7:  1122             const = ClassInfo()  1123         elif tag == 8:  1124             const = StringInfo()  1125         elif tag == 9:  1126             const = FieldRefInfo()  1127         elif tag == 10:  1128             const = MethodRefInfo()  1129         elif tag == 11:  1130             const = InterfaceMethodRefInfo()  1131         elif tag == 12:  1132             const = NameAndTypeInfo()  1133         else:  1134             raise UnknownTag, tag  1135   1136         # Initialise the constant object.  1137   1138         s = const.init(s[1:], self)  1139         return const, s  1140   1141     def _get_constants_from_table(self, count, s):  1142         l = []  1143         # Have to skip certain entries specially.  1144         i = 1  1145         while i < count:  1146             c, s = self._decode_const(s)  1147             l.append(c)  1148             # Add a blank entry after "large" entries.  1149             if isinstance(c, LargeNumInfo):  1150                 l.append(None)  1151                 i += 1  1152             i += 1  1153         return l, s  1154   1155     def _get_items_from_table(self, cls, number, s):  1156         l = []  1157         for i in range(0, number):  1158             f = cls()  1159             s = f.init(s, self)  1160             l.append(f)  1161         return l, s  1162   1163     def _get_methods_from_table(self, number, s):  1164         return self._get_items_from_table(MethodInfo, number, s)  1165   1166     def _get_fields_from_table(self, number, s):  1167         return self._get_items_from_table(FieldInfo, number, s)  1168   1169     def _get_attribute_from_table(self, s):  1170         attribute_name_index = u2(s[0:2])  1171         constant_name = self.constants[attribute_name_index - 1].bytes  1172         if constant_name in ATTR_NAMES_TO_CLASS:  1173             attribute = ATTR_NAMES_TO_CLASS[constant_name]()  1174         else:  1175             raise UnknownAttribute, constant_name  1176         s = attribute.init(s[2:], self)  1177         return attribute, s  1178   1179     def _get_attributes_from_table(self, number, s):  1180         attributes = []  1181         for i in range(0, number):  1182             attribute, s = self._get_attribute_from_table(s)  1183             attributes.append(attribute)  1184         return attributes, s  1185   1186     def _get_constants(self, s):  1187         count = u2(s[0:2])  1188         return self._get_constants_from_table(count, s[2:])  1189   1190     def _serialize_constants(self):  1191         return su2(len(self.constants)+1)+"".join([self._encode_const(c) for c in self.constants])  1192   1193     def _get_access_flags(self, s):  1194         return u2(s[0:2]), s[2:]  1195           1196     def _serialize_access_flags(self):  1197         return su2(self.access_flags)  1198   1199     def _get_this_class(self, s):  1200         index = u2(s[0:2])  1201         return self.constants[index - 1], s[2:]  1202   1203     def _serialize_this_class(self):  1204         return su2(self.constants.index(self.this_class)+1)  1205   1206     def _serialize_super_class(self):  1207         return su2(self.constants.index(self.super_class)+1)  1208   1209     def _get_super_class(self, s):  1210         index = u2(s[0:2])  1211         if index != 0:  1212             return self.constants[index - 1], s[2:]  1213         else:  1214             return None, s[2:]  1215   1216     def _get_interfaces(self, s):  1217         interfaces = []  1218         number = u2(s[0:2])  1219         s = s[2:]  1220         for i in range(0, number):  1221             index = u2(s[0:2])  1222             interfaces.append(self.constants[index - 1])  1223             s = s[2:]  1224         return interfaces, s  1225   1226     def _serialize_interfaces(self):  1227         return su2(len(self.interfaces))+"".join([su2(self.constants.index(interf)+1) for interf in self.interfaces])  1228   1229     def _get_fields(self, s):  1230         number = u2(s[0:2])  1231         return self._get_fields_from_table(number, s[2:])  1232   1233     def _serialize_fields(self):  1234         od = su2(len(self.fields))  1235         od += "".join([f.serialize() for f in self.fields])  1236         return od  1237   1238     def _get_attributes(self, s):  1239         number = u2(s[0:2])  1240         return self._get_attributes_from_table(number, s[2:])  1241   1242     def _serialize_attributes(self, attrs):  1243         od = su2(len(attrs))  1244         if len(attrs) == 0: return od  1245         if self.attribute_class_to_index == None:  1246             self.attribute_class_to_index = {}  1247             index = 0  1248             for c in self.constants:  1249                 index += 1  1250                 if isinstance(c, Utf8Info) and str(c) in ATTR_NAMES_TO_CLASS.keys():  1251                     self.attribute_class_to_index[ATTR_NAMES_TO_CLASS[str(c)]]=index  1252         for attribute in attrs:  1253             for (classtype,name_index) in self.attribute_class_to_index.iteritems():  1254                 if isinstance(attribute, classtype):  1255                     od += su2(name_index)  1256                     break  1257             od += attribute.serialize()  1258         return od  1259   1260     def _get_methods(self, s):  1261         number = u2(s[0:2])  1262         return self._get_methods_from_table(number, s[2:])  1263   1264     def _serialize_methods(self):  1265         od = su2(len(self.methods))  1266         od += "".join([m.serialize() for m in self.methods])  1267         return od  1268   1269   1270 if __name__ == "__main__":  1271     import sys  1272     f = open(sys.argv[1], "rb")  1273     in_data = f.read()  1274     c = ClassFile(in_data)  1275     f.close()  1276     out_data = c.serialize()  1277     assert(in_data == out_data)  1278   1279 # vim: tabstop=4 expandtab shiftwidth=4