paulb@1 | 1 | #!/usr/bin/env python |
paulb@1 | 2 | |
paulb@1 | 3 | """ |
paulb@1 | 4 | A simple parallel processing API for Python, inspired somewhat by the thread |
paulb@1 | 5 | module, slightly less by pypar, and slightly less still by pypvm. |
paulb@1 | 6 | |
paulb@3 | 7 | Thread-style Processing |
paulb@3 | 8 | ----------------------- |
paulb@3 | 9 | |
paulb@1 | 10 | To create new processes to run a function or any callable object, specify the |
paulb@1 | 11 | "callable" and any arguments as follows: |
paulb@1 | 12 | |
paulb@1 | 13 | channel = start(fn, arg1, arg2, named1=value1, named2=value2) |
paulb@1 | 14 | |
paulb@1 | 15 | This returns a channel which can then be used to communicate with the created |
paulb@1 | 16 | process. Meanwhile, in the created process, the given callable will be invoked |
paulb@1 | 17 | with another channel as its first argument followed by the specified arguments: |
paulb@1 | 18 | |
paulb@1 | 19 | def fn(channel, arg1, arg2, named1, named2): |
paulb@3 | 20 | # Read from and write to the channel. |
paulb@1 | 21 | # Return value is ignored. |
paulb@1 | 22 | ... |
paulb@1 | 23 | |
paulb@3 | 24 | Fork-style Processing |
paulb@3 | 25 | --------------------- |
paulb@3 | 26 | |
paulb@1 | 27 | To create new processes in a similar way to that employed when using os.fork |
paulb@1 | 28 | (ie. the fork system call on various operating systems), use the following |
paulb@1 | 29 | method: |
paulb@1 | 30 | |
paulb@1 | 31 | channel = create() |
paulb@1 | 32 | if channel.pid == 0: |
paulb@1 | 33 | # This code is run by the created process. |
paulb@3 | 34 | # Read from and write to the channel to communicate with the |
paulb@1 | 35 | # creating/calling process. |
paulb@1 | 36 | # An explicit exit of the process may be desirable to prevent the process |
paulb@1 | 37 | # from running code which is intended for the creating/calling process. |
paulb@1 | 38 | ... |
paulb@1 | 39 | else: |
paulb@1 | 40 | # This code is run by the creating/calling process. |
paulb@3 | 41 | # Read from and write to the channel to communicate with the created |
paulb@1 | 42 | # process. |
paulb@1 | 43 | ... |
paulb@3 | 44 | |
paulb@3 | 45 | Message Exchanges |
paulb@3 | 46 | ----------------- |
paulb@3 | 47 | |
paulb@3 | 48 | When creating many processes, each providing results for the consumption of the |
paulb@3 | 49 | main process, the collection of those results in an efficient fashion can be |
paulb@3 | 50 | problematic: if some processes take longer than others, and if we decide to read |
paulb@3 | 51 | from those processes when they are not ready instead of other processes which |
paulb@3 | 52 | are ready, the whole activity will take much longer than necessary. |
paulb@3 | 53 | |
paulb@3 | 54 | One solution to the problem of knowing when to read from channels is to create |
paulb@3 | 55 | an Exchange object, initialising it with a list of channels through which data |
paulb@3 | 56 | is expected to arrive: |
paulb@3 | 57 | |
paulb@3 | 58 | exchange = Exchange(channels) |
paulb@3 | 59 | |
paulb@3 | 60 | We may then check the exchange to see whether any data is ready to be received; |
paulb@3 | 61 | for example: |
paulb@3 | 62 | |
paulb@3 | 63 | for channel in exchange.ready(): |
paulb@3 | 64 | # Read from and write to the channel. |
paulb@3 | 65 | ... |
paulb@3 | 66 | |
paulb@3 | 67 | If we do not wish to wait indefinitely for a list of channels, we can set a |
paulb@3 | 68 | timeout value as an argument to the ready method (as a floating point number |
paulb@3 | 69 | specifying the timeout in seconds, where 0 means a non-blocking poll as stated |
paulb@3 | 70 | in the select module's select function documentation). |
paulb@1 | 71 | """ |
paulb@1 | 72 | |
paulb@1 | 73 | import os |
paulb@1 | 74 | import sys |
paulb@5 | 75 | import select |
paulb@3 | 76 | from signal import signal, SIGCHLD |
paulb@1 | 77 | |
paulb@1 | 78 | try: |
paulb@1 | 79 | import cPickle as pickle |
paulb@1 | 80 | except ImportError: |
paulb@1 | 81 | import pickle |
paulb@1 | 82 | |
paulb@1 | 83 | class Channel: |
paulb@1 | 84 | |
paulb@1 | 85 | "A communications channel." |
paulb@1 | 86 | |
paulb@1 | 87 | def __init__(self, pid, read_pipe, write_pipe): |
paulb@1 | 88 | |
paulb@1 | 89 | """ |
paulb@1 | 90 | Initialise the channel with a process identifier 'pid', a 'read_pipe' |
paulb@1 | 91 | from which messages will be received, and a 'write_pipe' into which |
paulb@1 | 92 | messages will be sent. |
paulb@1 | 93 | """ |
paulb@1 | 94 | |
paulb@1 | 95 | self.pid = pid |
paulb@1 | 96 | self.read_pipe = read_pipe |
paulb@1 | 97 | self.write_pipe = write_pipe |
paulb@1 | 98 | |
paulb@5 | 99 | def __del__(self): |
paulb@5 | 100 | |
paulb@5 | 101 | # NOTE: Hack until the signals vs. pipes behaviour is fixed. |
paulb@5 | 102 | |
paulb@5 | 103 | if self.pid != 0: |
paulb@5 | 104 | try: |
paulb@5 | 105 | os.wait() |
paulb@5 | 106 | except OSError: |
paulb@5 | 107 | pass |
paulb@5 | 108 | |
paulb@1 | 109 | def send(self, obj): |
paulb@1 | 110 | |
paulb@1 | 111 | "Send the given object 'obj' through the channel." |
paulb@1 | 112 | |
paulb@1 | 113 | pickle.dump(obj, self.write_pipe) |
paulb@1 | 114 | self.write_pipe.flush() |
paulb@1 | 115 | |
paulb@1 | 116 | def receive(self): |
paulb@1 | 117 | |
paulb@1 | 118 | "Receive an object through the channel, returning the object." |
paulb@1 | 119 | |
paulb@1 | 120 | obj = pickle.load(self.read_pipe) |
paulb@1 | 121 | if isinstance(obj, Exception): |
paulb@1 | 122 | raise obj |
paulb@1 | 123 | else: |
paulb@1 | 124 | return obj |
paulb@1 | 125 | |
paulb@3 | 126 | class Exchange: |
paulb@3 | 127 | |
paulb@3 | 128 | """ |
paulb@3 | 129 | A communications exchange that can be used to detect channels which are |
paulb@3 | 130 | ready to communicate. |
paulb@3 | 131 | """ |
paulb@3 | 132 | |
paulb@3 | 133 | def __init__(self, channels): |
paulb@3 | 134 | |
paulb@3 | 135 | "Initialise the exchange with the given 'channels'." |
paulb@3 | 136 | |
paulb@3 | 137 | self.readables = {} |
paulb@3 | 138 | for channel in channels: |
paulb@3 | 139 | self.readables[channel.read_pipe] = channel |
paulb@3 | 140 | |
paulb@3 | 141 | def ready(self, timeout=None): |
paulb@3 | 142 | |
paulb@3 | 143 | """ |
paulb@3 | 144 | Wait for a period of time specified by the optional 'timeout' (or until |
paulb@3 | 145 | communication is possible) and return a list of channels which are ready |
paulb@3 | 146 | to be read from. |
paulb@3 | 147 | """ |
paulb@3 | 148 | |
paulb@5 | 149 | try: |
paulb@5 | 150 | if timeout is not None: |
paulb@5 | 151 | t = select.select(self.readables.keys(), [], [], timeout) |
paulb@5 | 152 | else: |
paulb@5 | 153 | t = select.select(self.readables.keys(), [], []) |
paulb@5 | 154 | except select.error: |
paulb@5 | 155 | return [] |
paulb@3 | 156 | |
paulb@3 | 157 | readable_fds, writable_fds, exceptional_fds = t |
paulb@3 | 158 | readable = [self.readables[fd] for fd in readable_fds] |
paulb@3 | 159 | return readable |
paulb@3 | 160 | |
paulb@1 | 161 | def create(): |
paulb@1 | 162 | |
paulb@1 | 163 | """ |
paulb@1 | 164 | Create a new process, returning a communications channel to both the |
paulb@1 | 165 | creating process and the created process. |
paulb@1 | 166 | """ |
paulb@1 | 167 | |
paulb@1 | 168 | parent_read_fd, child_write_fd = os.pipe() |
paulb@1 | 169 | child_read_fd, parent_write_fd = os.pipe() |
paulb@1 | 170 | |
paulb@1 | 171 | pid = os.fork() |
paulb@1 | 172 | if pid == 0: |
paulb@1 | 173 | return Channel(pid, os.fdopen(child_read_fd, "r"), os.fdopen(child_write_fd, "w")) |
paulb@1 | 174 | else: |
paulb@1 | 175 | return Channel(pid, os.fdopen(parent_read_fd, "r"), os.fdopen(parent_write_fd, "w")) |
paulb@1 | 176 | |
paulb@1 | 177 | def start(callable, *args, **kwargs): |
paulb@1 | 178 | |
paulb@1 | 179 | """ |
paulb@1 | 180 | Create a new process which shall start running in the given 'callable'. |
paulb@1 | 181 | Return a communications channel to the creating process, and supply such a |
paulb@1 | 182 | channel to the created process as the 'channel' parameter in the given |
paulb@1 | 183 | 'callable'. Additional arguments to the 'callable' can be given as |
paulb@1 | 184 | additional arguments to this function. |
paulb@1 | 185 | """ |
paulb@1 | 186 | |
paulb@1 | 187 | channel = create() |
paulb@1 | 188 | if channel.pid == 0: |
paulb@1 | 189 | try: |
paulb@1 | 190 | try: |
paulb@1 | 191 | callable(channel, *args, **kwargs) |
paulb@1 | 192 | except: |
paulb@1 | 193 | exc_type, exc_value, exc_traceback = sys.exc_info() |
paulb@1 | 194 | channel.send(exc_value) |
paulb@1 | 195 | finally: |
paulb@1 | 196 | sys.exit(0) |
paulb@1 | 197 | else: |
paulb@1 | 198 | return channel |
paulb@1 | 199 | |
paulb@3 | 200 | # Define and install a handler which waits for terminated child processes. |
paulb@3 | 201 | |
paulb@5 | 202 | #def handler(number, frame): |
paulb@5 | 203 | # os.wait() |
paulb@3 | 204 | |
paulb@5 | 205 | #signal(SIGCHLD, handler) |
paulb@3 | 206 | |
paulb@1 | 207 | # vim: tabstop=4 expandtab shiftwidth=4 |