# HG changeset patch # User paulb # Date 1193520877 0 # Node ID 4b8a1d2220b151ab5596c17cb68a6fdc90add6c6 # Parent f1327f4917d39bc193ed99b9d342ab13c37a0640 [project @ 2007-10-27 21:34:37 by paulb] Changed the multipart field handling to write uploaded files to disk as temporary files for access through FileContent objects, and to avoid reading the entire request into memory. Made the Stream wrapper class more efficient for read operations, adding the possibility of reading a limited number of bytes. diff -r f1327f4917d3 -r 4b8a1d2220b1 WebStack/JavaServlet.py --- a/WebStack/JavaServlet.py Sat Oct 27 21:31:02 2007 +0000 +++ b/WebStack/JavaServlet.py Sat Oct 27 21:34:37 2007 +0000 @@ -3,7 +3,7 @@ """ Java Servlet classes. -Copyright (C) 2004, 2005, 2006 Paul Boddie +Copyright (C) 2004, 2005, 2006, 2007 Paul Boddie This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -34,14 +34,15 @@ import javax.mail import java.util import java.net +import java.io class Stream: """ - Wrapper around java.io.ServletInputStream. + Wrapper around java.io.InputStream. """ - bufsize = 100 + bufsize = 1024 def __init__(self, stream): @@ -49,31 +50,40 @@ self.stream = stream - def read(self): + def read(self, limit=None): "Read the entire message, returning it as a string." + bufsize = limit or self.bufsize characters = StringIO() + a = jarray.zeros(bufsize, 'b') while 1: - c = self.stream.read() - if c == -1: + nread = self.stream.read(a, 0, bufsize) + if nread != -1: + self._copy(a, characters, nread) + if nread != bufsize or nread == limit: return characters.getvalue() - else: - characters.write(chr(c)) - def readline(self): + def readline(self, bufsize=None): "Read a line from the stream, returning it as a string." + bufsize = bufsize or self.bufsize characters = StringIO() - a = jarray.zeros(self.bufsize, 'b') + a = jarray.zeros(bufsize, 'b') while 1: - nread = self.stream.readLine(a, 0, self.bufsize) + nread = self.stream.readLine(a, 0, bufsize) if nread != -1: self._copy(a, characters, nread) - if nread != self.bufsize: + if nread != bufsize: return characters.getvalue() + def reset(self): + + "Attempt to reset the stream." + + self.stream.reset() + def _unsigned(self, i): if i < 0: return chr(256 + i) @@ -116,6 +126,10 @@ self.message_fields = None + # Resource management. + + self.tempfiles = [] + def commit(self): """ @@ -124,6 +138,8 @@ """ self.get_response_stream().close() + for tempfile in self.tempfiles: + tempfile.delete() # Server-related methods. @@ -597,7 +613,7 @@ session = javax.mail.Session.getDefaultInstance(java.util.Properties()) - # Fake a multipart message. + # Fake the headers. str_buffer = java.io.StringWriter() fp = self.get_request_stream() @@ -607,62 +623,80 @@ str_buffer.write(fp.read()) str_buffer.close() - # Re-read that message. + # Concatenate the headers with the rest of the stream. + + header_stream = java.io.StringBufferInputStream(str_buffer.toString()) + input_stream = self.request.getInputStream() + message = javax.mail.internet.MimeMessage(session, java.io.SequenceInputStream(header_stream, input_stream)) - input_stream = java.io.StringBufferInputStream(str_buffer.toString()) - message = javax.mail.internet.MimeMessage(session, input_stream) - content = message.getContent() - return self._get_fields_from_multipart(content, encoding) + # Collect the fields by traversing the message. - def _get_fields_from_multipart(self, content, encoding): + fields = {} + self._get_fields_from_multipart(fields, message.getContent(), encoding) + return fields + + def _get_fields_from_multipart(self, fields, content, encoding): "Get fields from multipart 'content'." - fields = {} for i in range(0, content.getCount()): part = content.getBodyPart(i) - subcontent = part.getContent() + self._get_field_from_multipart(fields, part, encoding) - # Convert input stream content. + def _get_field_from_multipart(self, fields, part, encoding): - if isinstance(subcontent, java.io.InputStream): - subcontent = Stream(subcontent) + "Get a field from the given 'part'." - # Record string content. + if not part.getContentType().startswith("multipart"): + + # Should get: form-data; name="x" - if isinstance(subcontent, (type(""), Stream)): + disposition = self.parse_header_value(HeaderValue, part.getHeader("Content-Disposition")[0]) - # Should get: form-data; name="x" + # Store and optionally convert the field. - disposition = self.parse_header_value(HeaderValue, part.getHeader("Content-Disposition")[0]) + if disposition.name is not None: + field_name = disposition.name[1:-1] - # Store and optionally convert the field. + # Test whether the part should be written to a temporary file. - if disposition.name is not None: - field_name = disposition.name[1:-1] + if part.getHeader("Content-Type") is not None: # Using properly decoded header values. - if part.getHeader("Content-Type") is not None: - headers = {} - for header in part.getAllHeaders(): - headers[header.getName()] = self.parse_header_value(HeaderValue, header.getValue()) - field_value = FileContent(subcontent, headers) - else: - field_value = self.decode_path(subcontent, encoding) + headers = {} + for header in part.getAllHeaders(): + headers[header.getName()] = self.parse_header_value(HeaderValue, header.getValue()) + + # Write to a temporary file and then open that file. - # Store the entry in the fields dictionary. + tempfile = java.io.File.createTempFile(str(id(self)), field_name) + temp_stream = java.io.FileOutputStream(tempfile) + try: + part.writeTo(temp_stream) + finally: + self.tempfiles.append(tempfile) + + # The file must be treated like a message. - if not fields.has_key(field_name): - fields[field_name] = [] - fields[field_name].append(field_value) + temp_part = javax.mail.internet.MimeBodyPart(java.io.FileInputStream(tempfile)) + field_value = FileContent(Stream(temp_part.getRawInputStream()), headers) + + else: + subcontent = part.getContent() + field_value = self.decode_path(subcontent, encoding) + + # Store the entry in the fields dictionary. - # Otherwise, descend deeper into the multipart hierarchy. + if not fields.has_key(field_name): + fields[field_name] = [] + fields[field_name].append(field_value) - else: - fields.update(self._get_fields_from_multipart(subcontent, encoding)) + # Otherwise, descend deeper into the multipart hierarchy. - return fields + else: + subcontent = part.getContent() + fields.update(self._get_fields_from_multipart(subcontent, encoding)) class Session: