javaclass

Changeset

4:668cb12070c2
2004-10-28 Paul Boddie raw files shortlog changelog graph Added proper support for integers, longs, floats and doubles, along with constant value retrieval. Added a bytecode processing module.
bytecode.py (file) classfile.py (file)
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/bytecode.py	Thu Oct 28 20:47:25 2004 +0200
     1.3 @@ -0,0 +1,240 @@
     1.4 +#!/usr/bin/env python
     1.5 +
     1.6 +"""
     1.7 +Java bytecode conversion. Specification found at the following URL:
     1.8 +http://java.sun.com/docs/books/vmspec/2nd-edition/html/Instructions2.doc.html
     1.9 +"""
    1.10 +
    1.11 +import dis # for access to Python bytecode values
    1.12 +
    1.13 +# Bytecode conversion.
    1.14 +
    1.15 +def get_instructions(code):
    1.16 +    global java_bytecodes
    1.17 +
    1.18 +    i = 0
    1.19 +    instructions = []
    1.20 +    while i < len(code):
    1.21 +        bytecode = ord(code[i])
    1.22 +        mnemonic, number_of_arguments, stack_change = java_bytecodes[bytecode]
    1.23 +
    1.24 +        # NOTE: To be fixed.
    1.25 +        if number_of_arguments is None:
    1.26 +            print "Stop at", mnemonic
    1.27 +            return instructions
    1.28 +
    1.29 +        arguments = []
    1.30 +        for j in range(0, number_of_arguments):
    1.31 +            arguments.append(ord(code[i + 1 + j]))
    1.32 +
    1.33 +        i = i + 1 + number_of_arguments
    1.34 +        instructions.append((mnemonic, arguments))
    1.35 +
    1.36 +    return instructions
    1.37 +
    1.38 +java_bytecodes = {
    1.39 +    # code : (mnemonic, number of following bytes, change in stack)
    1.40 +    0 : ("nop", 0, 0),
    1.41 +    1 : ("aconst_null", 0, 1),
    1.42 +    2 : ("iconst_m1", 0, 1),
    1.43 +    3 : ("iconst_0", 0, 1),
    1.44 +    4 : ("iconst_1", 0, 1),
    1.45 +    5 : ("iconst_2", 0, 1),
    1.46 +    6 : ("iconst_3", 0, 1),
    1.47 +    7 : ("iconst_4", 0, 1),
    1.48 +    8 : ("iconst_5", 0, 1),
    1.49 +    9 : ("lconst_0", 0, 1),
    1.50 +    10 : ("lconst_1", 0, 1),
    1.51 +    11 : ("fconst_0", 0, 1),
    1.52 +    12 : ("fconst_1", 0, 1),
    1.53 +    13 : ("fconst_2", 0, 1),
    1.54 +    14 : ("dconst_0", 0, 1),
    1.55 +    15 : ("dconst_1", 0, 1),
    1.56 +    16 : ("bipush", 1, 1),
    1.57 +    17 : ("sipush", 2, 1),
    1.58 +    18 : ("ldc", 1, 1),
    1.59 +    19 : ("ldc_w", 2, 1),
    1.60 +    20 : ("ldc2_w", 2, 1),
    1.61 +    21 : ("iload", 1, 1),
    1.62 +    22 : ("lload", 1, 1),
    1.63 +    23 : ("fload", 1, 1),
    1.64 +    24 : ("dload", 1, 1),
    1.65 +    25 : ("aload", 1, 1),
    1.66 +    26 : ("iload_0", 0, 1),
    1.67 +    27 : ("iload_1", 0, 1),
    1.68 +    28 : ("iload_2", 0, 1),
    1.69 +    29 : ("iload_3", 0, 1),
    1.70 +    30 : ("lload_0", 0, 1),
    1.71 +    31 : ("lload_1", 0, 1),
    1.72 +    32 : ("lload_2", 0, 1),
    1.73 +    33 : ("lload_3", 0, 1),
    1.74 +    34 : ("fload_0", 0, 1),
    1.75 +    35 : ("fload_1", 0, 1),
    1.76 +    36 : ("fload_2", 0, 1),
    1.77 +    37 : ("fload_3", 0, 1),
    1.78 +    38 : ("dload_0", 0, 1),
    1.79 +    39 : ("dload_1", 0, 1),
    1.80 +    40 : ("dload_2", 0, 1),
    1.81 +    41 : ("dload_3", 0, 1),
    1.82 +    42 : ("aload_0", 0, 1),
    1.83 +    43 : ("aload_1", 0, 1),
    1.84 +    44 : ("aload_2", 0, 1),
    1.85 +    45 : ("aload_3", 0, 1),
    1.86 +    46 : ("iaload", 0, -1),
    1.87 +    47 : ("laload", 0, -1),
    1.88 +    48 : ("faload", 0, -1),
    1.89 +    49 : ("daload", 0, -1),
    1.90 +    50 : ("aaload", 0, -1),
    1.91 +    51 : ("baload", 0, -1),
    1.92 +    52 : ("caload", 0, -1),
    1.93 +    53 : ("saload", 0, -1),
    1.94 +    54 : ("istore", 1, -1),
    1.95 +    55 : ("lstore", 1, -1),
    1.96 +    56 : ("fstore", 1, -1),
    1.97 +    57 : ("dstore", 1, -1),
    1.98 +    58 : ("astore", 1, -1),
    1.99 +    59 : ("istore_0", 0, -1),
   1.100 +    60 : ("istore_1", 0, -1),
   1.101 +    61 : ("istore_2", 0, -1),
   1.102 +    62 : ("istore_3", 0, -1),
   1.103 +    63 : ("lstore_0", 0, -1),
   1.104 +    64 : ("lstore_1", 0, -1),
   1.105 +    65 : ("lstore_2", 0, -1),
   1.106 +    66 : ("lstore_3", 0, -1),
   1.107 +    67 : ("fstore_0", 0, -1),
   1.108 +    68 : ("fstore_1", 0, -1),
   1.109 +    69 : ("fstore_2", 0, -1),
   1.110 +    70 : ("fstore_3", 0, -1),
   1.111 +    71 : ("dstore_0", 0, -1),
   1.112 +    72 : ("dstore_1", 0, -1),
   1.113 +    73 : ("dstore_2", 0, -1),
   1.114 +    74 : ("dstore_3", 0, -1),
   1.115 +    75 : ("astore_0", 0, -1),
   1.116 +    76 : ("astore_1", 0, -1),
   1.117 +    77 : ("astore_2", 0, -1),
   1.118 +    78 : ("astore_3", 0, -1),
   1.119 +    79 : ("iastore", 0, -3),
   1.120 +    80 : ("lastore", 0, -3),
   1.121 +    81 : ("fastore", 0, -3),
   1.122 +    82 : ("dastore", 0, -3),
   1.123 +    83 : ("aastore", 0, -3),
   1.124 +    84 : ("bastore", 0, -3),
   1.125 +    85 : ("castore", 0, -3),
   1.126 +    86 : ("sastore", 0, -3),
   1.127 +    87 : ("pop", 0, -1),
   1.128 +    88 : ("pop2", 0, None), # variable number of elements removed
   1.129 +    89 : ("dup", 0, 1),
   1.130 +    90 : ("dup_x1", 0, 1),
   1.131 +    91 : ("dup_x2", 0, 1),
   1.132 +    92 : ("dup2", 0, 2), # or 1 extra stack value
   1.133 +    93 : ("dup2_x1", 0, 2), # or 1 extra stack value
   1.134 +    94 : ("dup2_x2", 0, 2), # or 1 extra stack value
   1.135 +    95 : ("swap", 0, 0),
   1.136 +    96 : ("iadd", 0, -1),
   1.137 +    97 : ("ladd", 0, -1),
   1.138 +    98 : ("fadd", 0, -1),
   1.139 +    99 : ("dadd", 0, -1),
   1.140 +    100 : ("isub", 0, -1),
   1.141 +    101 : ("lsub", 0, -1),
   1.142 +    102 : ("fsub", 0, -1),
   1.143 +    103 : ("dsub", 0, -1),
   1.144 +    104 : ("imul", 0, -1),
   1.145 +    105 : ("lmul", 0, -1),
   1.146 +    106 : ("fmul", 0, -1),
   1.147 +    107 : ("dmul", 0, -1),
   1.148 +    108 : ("idiv", 0, -1),
   1.149 +    109 : ("ldiv", 0, -1),
   1.150 +    110 : ("fdiv", 0, -1),
   1.151 +    111 : ("ddiv", 0, -1),
   1.152 +    112 : ("irem", 0, -1),
   1.153 +    113 : ("lrem", 0, -1),
   1.154 +    114 : ("frem", 0, -1),
   1.155 +    115 : ("drem", 0, -1),
   1.156 +    116 : ("ineg", 0, 0),
   1.157 +    117 : ("lneg", 0, 0),
   1.158 +    118 : ("fneg", 0, 0),
   1.159 +    119 : ("dneg", 0, 0),
   1.160 +    120 : ("ishl", 0, -1),
   1.161 +    121 : ("lshl", 0, -1),
   1.162 +    122 : ("ishr", 0, -1),
   1.163 +    123 : ("lshr", 0, -1),
   1.164 +    124 : ("iushr", 0, -1),
   1.165 +    125 : ("lushr", 0, -1),
   1.166 +    126 : ("iand", 0, -1),
   1.167 +    127 : ("land", 0, -1),
   1.168 +    128 : ("ior", 0, -1),
   1.169 +    129 : ("lor", 0, -1),
   1.170 +    130 : ("ixor", 0, -1),
   1.171 +    131 : ("lxor", 0, -1),
   1.172 +    132 : ("iinc", 2, 0),
   1.173 +    133 : ("i2l", 0, 0),
   1.174 +    134 : ("i2f", 0, 0),
   1.175 +    135 : ("i2d", 0, 0),
   1.176 +    136 : ("l2i", 0, 0),
   1.177 +    137 : ("l2f", 0, 0),
   1.178 +    138 : ("l2d", 0, 0),
   1.179 +    139 : ("f2i", 0, 0),
   1.180 +    140 : ("f2l", 0, 0),
   1.181 +    141 : ("f2d", 0, 0),
   1.182 +    142 : ("d2i", 0, 0),
   1.183 +    143 : ("d2l", 0, 0),
   1.184 +    144 : ("d2f", 0, 0),
   1.185 +    145 : ("i2b", 0, 0),
   1.186 +    146 : ("i2c", 0, 0),
   1.187 +    147 : ("i2s", 0, 0),
   1.188 +    148 : ("lcmp", 0, -1),
   1.189 +    149 : ("fcmpl", 0, -1),
   1.190 +    150 : ("fcmpg", 0, -1),
   1.191 +    151 : ("dcmpl", 0, -1),
   1.192 +    152 : ("dcmpg", 0, -1),
   1.193 +    153 : ("ifeq", 2, -1),
   1.194 +    154 : ("ifne", 2, -1),
   1.195 +    155 : ("iflt", 2, -1),
   1.196 +    156 : ("ifge", 2, -1),
   1.197 +    157 : ("ifgt", 2, -1),
   1.198 +    158 : ("ifle", 2, -1),
   1.199 +    159 : ("if_icmpeq", 2, -2),
   1.200 +    160 : ("if_icmpne", 2, -2),
   1.201 +    161 : ("if_icmplt", 2, -2),
   1.202 +    162 : ("if_icmpge", 2, -2),
   1.203 +    163 : ("if_icmpgt", 2, -2),
   1.204 +    164 : ("if_icmple", 2, -2),
   1.205 +    165 : ("if_acmpeq", 2, -2),
   1.206 +    166 : ("if_acmpne", 2, -2),
   1.207 +    167 : ("goto", 2, 0),
   1.208 +    168 : ("jsr", 2, 1),
   1.209 +    169 : ("ret", 1, 0),
   1.210 +    170 : ("tableswitch", None, -1), # variable number of arguments
   1.211 +    171 : ("lookupswitch", None, -1), # variable number of arguments
   1.212 +    172 : ("ireturn", 0, -1),
   1.213 +    173 : ("lreturn", 0, -1),
   1.214 +    174 : ("freturn", 0, -1),
   1.215 +    175 : ("dreturn", 0, -1),
   1.216 +    176 : ("areturn", 0, -1),
   1.217 +    177 : ("return", 0, 0),
   1.218 +    178 : ("getstatic", 2, 1),
   1.219 +    179 : ("putstatic", 2, -1),
   1.220 +    180 : ("getfield", 2, 0),
   1.221 +    181 : ("putfield", 2, -2),
   1.222 +    182 : ("invokevirtual", 2, None), # variable number of elements removed
   1.223 +    183 : ("invokespecial", 2, None), # variable number of elements removed
   1.224 +    184 : ("invokestatic", 2, None), # variable number of elements removed
   1.225 +    185 : ("invokeinterface", 4, None), # variable number of elements removed
   1.226 +    187 : ("new", 2, 1),
   1.227 +    188 : ("newarray", 1, 0),
   1.228 +    189 : ("anewarray", 2, 0),
   1.229 +    190 : ("arraylength", 0, 0),
   1.230 +    191 : ("athrow", 0, 0),
   1.231 +    192 : ("checkcast", 2, 0),
   1.232 +    193 : ("instanceof", 2, 0),
   1.233 +    194 : ("monitorenter", 0, -1),
   1.234 +    195 : ("monitorexit", 0, -1),
   1.235 +    196 : ("wide", None, None), # 3 or 5 arguments, stack changes according to modified element
   1.236 +    197 : ("multianewarray", 3, None), # variable number of elements removed
   1.237 +    198 : ("ifnull", 2, -1),
   1.238 +    199 : ("ifnonnull", 2, -1),
   1.239 +    200 : ("goto_w", 4, 0),
   1.240 +    201 : ("jsr_w", 4, 1),
   1.241 +    }
   1.242 +
   1.243 +# vim: tabstop=4 expandtab shiftwidth=4
     2.1 --- a/classfile.py	Thu Oct 28 16:38:33 2004 +0200
     2.2 +++ b/classfile.py	Thu Oct 28 20:47:25 2004 +0200
     2.3 @@ -5,7 +5,7 @@
     2.4  http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html
     2.5  """
     2.6  
     2.7 -import struct
     2.8 +import struct # for general decoding of class files
     2.9  
    2.10  # Utility functions.
    2.11  
    2.12 @@ -18,6 +18,18 @@
    2.13  def u4(data):
    2.14      return struct.unpack(">L", data[0:4])[0]
    2.15  
    2.16 +def s4(data):
    2.17 +    return struct.unpack(">l", data[0:4])[0]
    2.18 +
    2.19 +def s8(data):
    2.20 +    return struct.unpack(">q", data[0:8])[0]
    2.21 +
    2.22 +def f4(data):
    2.23 +    return struct.unpack(">f", data[0:4])[0]
    2.24 +
    2.25 +def f8(data):
    2.26 +    return struct.unpack(">d", data[0:8])[0]
    2.27 +
    2.28  # Useful mix-ins.
    2.29  
    2.30  class NameUtils:
    2.31 @@ -82,14 +94,16 @@
    2.32  class SmallNumInfo:
    2.33      def init(self, data, class_file):
    2.34          self.class_file = class_file
    2.35 -        self.bytes = u4(data[0:4])
    2.36 +        self.bytes = data[0:4]
    2.37          return data[4:]
    2.38  
    2.39  class IntegerInfo(SmallNumInfo):
    2.40 -    pass
    2.41 +    def get_value(self):
    2.42 +        return s4(self.bytes)
    2.43  
    2.44  class FloatInfo(SmallNumInfo):
    2.45 -    pass
    2.46 +    def get_value(self):
    2.47 +        return f4(self.bytes)
    2.48  
    2.49  class LargeNumInfo:
    2.50      def init(self, data, class_file):
    2.51 @@ -99,10 +113,12 @@
    2.52          return data[8:]
    2.53  
    2.54  class LongInfo(LargeNumInfo):
    2.55 -    pass
    2.56 +    def get_value(self):
    2.57 +        return s8(self.high_bytes + self.low_bytes)
    2.58  
    2.59  class DoubleInfo(LargeNumInfo):
    2.60 -    pass
    2.61 +    def get_value(self):
    2.62 +        return f8(self.high_bytes + self.low_bytes)
    2.63  
    2.64  # Other information.
    2.65  # Objects of these classes are generally aware of the class they reside in.
    2.66 @@ -195,11 +211,15 @@
    2.67  
    2.68  class ConstantValueAttributeInfo(AttributeInfo):
    2.69      def init(self, data, class_file):
    2.70 +        self.class_file = class_file
    2.71          self.attribute_length = u4(data[0:4])
    2.72          self.constant_value_index = u2(data[4:6])
    2.73          assert 4+self.attribute_length == 6
    2.74          return data[4+self.attribute_length:]
    2.75  
    2.76 +    def get_value(self):
    2.77 +        return self.class_file.constants[self.constant_value_index - 1].get_value()
    2.78 +
    2.79  class CodeAttributeInfo(AttributeInfo):
    2.80      def init(self, data, class_file):
    2.81          self.class_file = class_file