javaclass

Changeset

6:a714f8355910
2004-11-07 Paul Boddie raw files shortlog changelog graph Added label-based jumping (rather than permitting only simple jumping forward to a single point). Added "lazy" dictionary support for the instruction mapping (from Java to Python bytecodes) along with a class which provides the "lazy" value for each case where the value is not known initially but is provided later. Added more bytecode translations and some Python bytecode sequence methods. Fixed the interpretation of various signed values.
bytecode.py (file)
     1.1 --- a/bytecode.py	Fri Oct 29 18:48:35 2004 +0200
     1.2 +++ b/bytecode.py	Sun Nov 07 17:42:07 2004 +0100
     1.3 @@ -8,16 +8,33 @@
     1.4  """
     1.5  
     1.6  import dis # for access to Python bytecode values
     1.7 +from UserDict import UserDict
     1.8  
     1.9  # Bytecode production classes.
    1.10  
    1.11  class BytecodeWriter:
    1.12      def __init__(self):
    1.13          self.loops = []
    1.14 -        self.jumps = []
    1.15 +        self.jumps = {}
    1.16          self.output = []
    1.17          self.position = 0
    1.18  
    1.19 +        # Mapping from values to indexes.
    1.20 +        self.constants = {}
    1.21 +
    1.22 +        # Mapping from names to indexes.
    1.23 +        # NOTE: This may be acquired from elsewhere.
    1.24 +        self.globals = {}
    1.25 +
    1.26 +    def get_output(self):
    1.27 +        output = []
    1.28 +        for element in self.output:
    1.29 +            if isinstance(element, LazyValue):
    1.30 +                output.append(chr(element.value))
    1.31 +            else:
    1.32 +                output.append(chr(element))
    1.33 +        return "".join(output)
    1.34 +
    1.35      # Special methods.
    1.36  
    1.37      def end_loop(self):
    1.38 @@ -26,16 +43,40 @@
    1.39          self.output[current_loop_start + 1] = self.position
    1.40          self.pop_block()
    1.41  
    1.42 -    def jump_to_next(self, status):
    1.43 -        self.jumps.push(self.position)
    1.44 -        if status:
    1.45 +    def jump_to_label(self, status, name):
    1.46 +        # Record the instruction using the jump.
    1.47 +        if not self.jumps.has_key(name):
    1.48 +            self.jumps[name] = []
    1.49 +        self.jumps[name].append(self.position)
    1.50 +        if status is None:
    1.51 +            self.jump_forward()
    1.52 +        elif status:
    1.53              self.jump_if_true()
    1.54          else:
    1.55              self.jump_if_false()
    1.56  
    1.57 -    def start_next(self):
    1.58 -        current_jump_start = self.jumps.pop()
    1.59 -        self.output[current_jump_start + 1] = self.position
    1.60 +    def start_label(self, name):
    1.61 +        # Fill in all jump instructions.
    1.62 +        for jump_instruction in self.jumps[name]:
    1.63 +            self.output[jump_instruction + 1] = self.position
    1.64 +
    1.65 +    # Complicated methods.
    1.66 +
    1.67 +    def load_const(self, value):
    1.68 +        self.output.append(opmap["LOAD_CONST"])
    1.69 +        if not self.constants.has_key(value):
    1.70 +            self.constants[value] = len(self.constants.keys())
    1.71 +        self.output.append(self.constants[value])
    1.72 +
    1.73 +    def load_global(self, name):
    1.74 +        self.output.append(opmap["LOAD_GLOBAL"])
    1.75 +        if not self.globals.has_key(value):
    1.76 +            self.globals[name] = len(self.globals.keys())
    1.77 +        self.output.append(self.globals[name])
    1.78 +
    1.79 +    def load_fast(self, index):
    1.80 +        self.output.append(opmap["LOAD_FAST"])
    1.81 +        self.output.append(index)
    1.82  
    1.83      # Normal bytecode generators.
    1.84  
    1.85 @@ -55,12 +96,57 @@
    1.86          self.output.append(offset) # May be filled in later
    1.87          self.position += 2
    1.88  
    1.89 +    def jump_forward(self, offset=None):
    1.90 +        self.output.append(opmap["JUMP_FORWARD"])
    1.91 +        self.output.append(offset) # May be filled in later
    1.92 +        self.position += 2
    1.93 +
    1.94 +# Utility classes and functions.
    1.95 +
    1.96 +class LazyDict(UserDict):
    1.97 +    def __getitem__(self, key):
    1.98 +        if not self.data.has_key(key):
    1.99 +            self.data[key] = LazyValue()
   1.100 +        return self.data[key]
   1.101 +    def __setitem__(self, key, value):
   1.102 +        if self.data.has_key(key):
   1.103 +            existing_value = self.data[key]
   1.104 +            if isinstance(existing_value, LazyValue):
   1.105 +                existing_value.value = value
   1.106 +                return
   1.107 +        self.data[key] = value
   1.108 +
   1.109 +class LazyValue:
   1.110 +    def __init__(self, value=None):
   1.111 +        self.value = value
   1.112 +
   1.113 +def signed(value, limit):
   1.114 +
   1.115 +    """
   1.116 +    Return the signed integer from the unsigned 'value', where 'limit' (a value
   1.117 +    one greater than the highest possible positive integer) is used to determine
   1.118 +    whether a negative or positive result is produced.
   1.119 +    """
   1.120 +
   1.121 +    d, r = divmod(value, limit)
   1.122 +    if d == 1:
   1.123 +        mask = limit * 2 - 1
   1.124 +        return -1 - (value ^ mask)
   1.125 +    else:
   1.126 +        return value
   1.127 +
   1.128 +def signed2(value):
   1.129 +    return signed(value, 0x8000)
   1.130 +
   1.131 +def signed4(value):
   1.132 +    return signed(value, 0x80000000)
   1.133 +
   1.134  # Bytecode conversion.
   1.135  
   1.136  class BytecodeReader:
   1.137      def __init__(self, class_file):
   1.138          self.class_file = class_file
   1.139 -        self.position_mapping = {}
   1.140 +        self.position_mapping = LazyDict()
   1.141  
   1.142      def process(self, code, program):
   1.143          self.java_position = 0
   1.144 @@ -120,7 +206,8 @@
   1.145          # NOTE: Not using the index to type the list/array.
   1.146          index = arguments[0] << 8 + arguments[1]
   1.147  
   1.148 -        program.build_list()
   1.149 +        program.build_list()        # Stack: count, list
   1.150 +        program.rot_two()           # Stack: list, count
   1.151          program.setup_loop()
   1.152          program.load_global("range")
   1.153          program.load_const(0)       # Stack: list, count, range, 0
   1.154 @@ -315,12 +402,12 @@
   1.155      getstatic = getfield # Ignoring Java restrictions
   1.156  
   1.157      def goto(self, arguments, program):
   1.158 -        offset = arguments[0] << 8 + arguments[1]
   1.159 +        offset = signed2(arguments[0] << 8 + arguments[1])
   1.160          java_absolute = self.java_position + offset
   1.161          program.jump_absolute(self.position_mapping[java_absolute])
   1.162  
   1.163      def goto_w(self, arguments, program):
   1.164 -        offset = arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3]
   1.165 +        offset = signed4(arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3])
   1.166          java_absolute = self.java_position + offset
   1.167          program.jump_absolute(self.position_mapping[java_absolute])
   1.168  
   1.169 @@ -378,12 +465,12 @@
   1.170      idiv = fdiv
   1.171  
   1.172      def _if_xcmpx(self, arguments, program, op):
   1.173 -        offset = arguments[0] << 8 + arguments[1]
   1.174 +        offset = signed2(arguments[0] << 8 + arguments[1])
   1.175          java_absolute = self.java_position + offset
   1.176          program.compare_op(op)
   1.177 -        program.jump_to_next(0) # skip if false
   1.178 +        program.jump_to_label(0, "next") # skip if false
   1.179          program.goto(offset)
   1.180 -        program.start_next()
   1.181 +        program.start_label("next")
   1.182  
   1.183      def if_acmpeq(self, arguments, program):
   1.184          # NOTE: No type checking performed.
   1.185 @@ -558,14 +645,156 @@
   1.186      isub = fsub
   1.187      iushr = ishr # Ignoring distinctions between arithmetic and logical shifts
   1.188  
   1.189 -    def ishr(self, arguments, program):
   1.190 +    def ixor(self, arguments, program):
   1.191          # NOTE: No type checking performed.
   1.192          program.binary_xor()
   1.193  
   1.194      def jsr(self, arguments, program):
   1.195 -        offset = arguments[0] << 8 + arguments[1]
   1.196 +        offset = signed2(arguments[0] << 8 + arguments[1])
   1.197 +        java_absolute = self.java_position + offset
   1.198 +        # Store the address of the next instruction.
   1.199 +        program.load_const(self.position_mapping[self.java_position + 3])
   1.200 +        program.jump_absolute(self.position_mapping[java_absolute])
   1.201 +
   1.202 +    def jsr_w(self, arguments, program):
   1.203 +        offset = signed4(arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3])
   1.204          java_absolute = self.java_position + offset
   1.205 +        # Store the address of the next instruction.
   1.206 +        program.load_const(self.position_mapping[self.java_position + 5])
   1.207 +        program.jump_absolute(self.position_mapping[java_absolute])
   1.208 +
   1.209 +    l2d = i2d
   1.210 +    l2f = i2f
   1.211 +
   1.212 +    def l2i(self, arguments, program):
   1.213 +        pass # Preserving Java semantics
   1.214 +
   1.215 +    ladd = iadd
   1.216 +    laload = iaload
   1.217 +    land = iand
   1.218 +    lastore = iastore
   1.219 +
   1.220 +    def lcmp(self, arguments, program):
   1.221 +        # NOTE: No type checking performed.
   1.222 +        program.dup_topx(2)                 # Stack: value1, value2, value1, value2
   1.223 +        program.compare_op(">")             # Stack: value1, value2, result
   1.224 +        program.jump_to_label(0, "equals")
   1.225 +        # True - produce result and branch.
   1.226 +        program.pop_top()                   # Stack: value1, value2
   1.227 +        program.pop_top()                   # Stack: value1
   1.228 +        program.pop_top()                   # Stack:
   1.229 +        program.load_const(1)               # Stack: 1
   1.230 +        program.jump_to_label(None, "next")
   1.231 +        # False - test equality.
   1.232 +        program.start_label("equals")
   1.233 +        program.pop_top()                   # Stack: value1, value2
   1.234 +        program.dup_topx(2)                 # Stack: value1, value2, value1, value2
   1.235 +        program.compare_op("==")            # Stack: value1, value2, result
   1.236 +        program.jump_to_label(0, "less")
   1.237 +        # True - produce result and branch.
   1.238 +        program.pop_top()                   # Stack: value1, value2
   1.239 +        program.pop_top()                   # Stack: value1
   1.240 +        program.pop_top()                   # Stack:
   1.241 +        program.load_const(0)               # Stack: 0
   1.242 +        program.jump_to_label(None, "next")
   1.243 +        # False - produce result.
   1.244 +        program.start_label("less")
   1.245 +        program.pop_top()                   # Stack: value1, value2
   1.246 +        program.pop_top()                   # Stack: value1
   1.247 +        program.pop_top()                   # Stack:
   1.248 +        program.load_const(-1)              # Stack: -1
   1.249 +        program.start_label("next")
   1.250 +
   1.251 +    lconst_0 = iconst_0
   1.252 +    lconst_1 = iconst_1
   1.253 +
   1.254 +    def ldc(self, arguments, program):
   1.255 +        program.load_const(self.class_file.constants[arguments[0] - 1])
   1.256 +
   1.257 +    def ldc_w(self, arguments, program):
   1.258 +        program.load_const(self.class_file.constants[arguments[0] << 8 + arguments[1] - 1])
   1.259 +
   1.260 +    ldc2_w = ldc_w
   1.261 +    ldiv = idiv
   1.262 +    lload = iload
   1.263 +    lload_0 = iload_0
   1.264 +    lload_1 = iload_1
   1.265 +    lload_2 = iload_2
   1.266 +    lload_3 = iload_3
   1.267 +    lmul = imul
   1.268 +    lneg = ineg
   1.269 +
   1.270 +    def lookupswitch(self, arguments, program):
   1.271 +        # Find the offset to the next 4 byte boundary in the code.
   1.272 +        d, r = divmod(self.java_position, 4)
   1.273 +        to_boundary = (4 - r) % 4
   1.274 +        # Get the pertinent arguments.
   1.275 +        arguments = arguments[to_boundary:]
   1.276 +        default = arguments[0] << 24 + arguments[1] << 16 + arguments[2] << 8 + arguments[3]
   1.277 +        npairs = arguments[4] << 24 + arguments[5] << 16 + arguments[6] << 8 + arguments[7]
   1.278 +        # Process the pairs.
   1.279 +        # NOTE: This is not the most optimal implementation.
   1.280 +        pair_index = 8
   1.281 +        for pair in range(0, npairs):
   1.282 +            match = (arguments[pair_index] << 24 + arguments[pair_index + 1] << 16 +
   1.283 +                arguments[pair_index + 2] << 8 + arguments[pair_index + 3])
   1.284 +            offset = signed4(arguments[pair_index + 4] << 24 + arguments[pair_index + 5] << 16 +
   1.285 +                arguments[pair_index + 6] << 8 + arguments[pair_index + 7])
   1.286 +            # Calculate the branch target.
   1.287 +            java_absolute = self.java_position + offset
   1.288 +            # Generate branching code.
   1.289 +            program.dup_top()                                           # Stack: key, key
   1.290 +            program.load_const(match)                                   # Stack: key, key, match
   1.291 +            program.compare_op("==")                                    # Stack: key, result
   1.292 +            program.jump_to_label(0, "end" + str(pair))
   1.293 +            program.pop_top()                                           # Stack: key
   1.294 +            program.pop_top()                                           # Stack:
   1.295 +            program.jump_absolute(self.position_mapping[java_absolute])
   1.296 +            # Generate the label for the end of the branching code.
   1.297 +            program.start_label("end" + str(pair))
   1.298 +            program.pop_top()                                           # Stack: key
   1.299 +            # Update the index.
   1.300 +            pair_index += 8
   1.301 +        # Generate the default.
   1.302 +        java_absolute = self.java_position + default
   1.303 +        program.jump_absolute(self.position_mapping[java_absolute])
   1.304 +
   1.305 +    lor = ior
   1.306 +    lrem = irem
   1.307 +    lreturn = ireturn
   1.308 +    lshl = ishl
   1.309 +    lshr = ishr
   1.310 +    lstore = istore
   1.311 +    lstore_0 = istore_0
   1.312 +    lstore_1 = istore_1
   1.313 +    lstore_2 = istore_2
   1.314 +    lstore_3 = istore_3
   1.315 +    lsub = isub
   1.316 +    lushr = iushr
   1.317 +    lxor = ixor
   1.318 +
   1.319 +    def monitorenter(self, arguments, program):
   1.320          # NOTE: To be implemented.
   1.321 +        pass
   1.322 +
   1.323 +    def monitorexit(self, arguments, program):
   1.324 +        # NOTE: To be implemented.
   1.325 +        pass
   1.326 +
   1.327 +    def multianewarray(self, arguments, program):
   1.328 +        program.build_list()        # Stack: count1, count2, ..., countN, list
   1.329 +        program.rot_two()           # Stack: count1, count2, ..., list, countN
   1.330 +        program.setup_loop()
   1.331 +        program.load_global("range")
   1.332 +        program.load_const(0)       # Stack: list, count, range, 0
   1.333 +        program.rot_three()         # Stack: list, 0, count, range
   1.334 +        program.rot_three()         # Stack: list, range, 0, count
   1.335 +        program.call_function(2)    # Stack: list, range_list
   1.336 +        program.get_iter()          # Stack: list, iter
   1.337 +        program.for_iter()          # Stack: list, iter, value
   1.338 +        for i in range(0, arguments[2]):
   1.339 +                                                # Stack: 
   1.340 +            self.anewarray(arguments, program)  # Stack: list, iter
   1.341  
   1.342      def wide(self, code, program):
   1.343          # NOTE: To be implemented.