1 #!/usr/bin/env python 2 3 """ 4 Request helper classes. 5 6 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Paul Boddie <paul@boddie.org.uk> 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Lesser General Public 10 License as published by the Free Software Foundation; either 11 version 2.1 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Lesser General Public License for more details. 17 18 You should have received a copy of the GNU Lesser General Public 19 License along with this library; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 21 """ 22 23 class MessageBodyStream: 24 25 """ 26 A naive stream class, providing a non-blocking stream for transactions when 27 reading the message body. According to the HTTP standard, the following 28 things decide how long the message is: 29 30 * Use of the Content-Length header field (see 4.4 Message Length). 31 * Use of the Transfer-Coding header field (see 3.6 Transfer Codings), 32 particularly when the "chunked" coding is used. 33 34 NOTE: For now, we don't support the Transfer-Coding business. 35 """ 36 37 def __init__(self, stream, headers): 38 39 """ 40 Initialise the object with the given underlying 'stream'. The supplied 41 'headers' in a dictionary-style object are used to examine the nature of 42 the request. 43 """ 44 45 self.stream = stream 46 self.headers = headers 47 self.length = int(headers.get("Content-Length") or 0) 48 49 def read(self, limit=None): 50 51 "Reads all remaining data from the message body." 52 53 if limit is not None: 54 limit = min(limit, self.length) 55 else: 56 limit = self.length 57 data = self.stream.read(limit) 58 self.length = self.length - len(data) 59 return data 60 61 def readline(self, n=None): 62 63 "Reads a single line of data from the message body." 64 65 data = [] 66 while self.length > 0: 67 data.append(self.read(1)) 68 if data[-1] == "\n": 69 break 70 return "".join(data) 71 72 def readlines(self): 73 74 """ 75 Reads all remaining data from the message body, splitting it into lines 76 and returning the data as a list of lines. 77 """ 78 79 lines = self.read().split("\n") 80 for i in range(0, len(lines) - 1): 81 lines[i] = lines[i] + "\n" 82 return lines 83 84 def close(self): 85 86 "Closes the stream." 87 88 self.stream.close() 89 90 class HeaderDict: 91 92 "A dictionary for headers." 93 94 def __init__(self, headers=None): 95 self.headers = {} 96 if headers is not None: 97 self.update(headers) 98 99 # Lower-case-string-coercing methods. 100 101 def __getitem__(self, key): 102 return self.headers[str(key).lower()] 103 104 def __setitem__(self, key, value): 105 self.headers[str(key).lower()] = value 106 107 def get(self, key, default=None): 108 return self.headers.get(str(key).lower(), default) 109 110 def has_key(self, key): 111 return self.headers.has_key(str(key).lower()) 112 113 # Forwarding methods. 114 115 def keys(self): 116 return self.headers.keys() 117 118 def values(self): 119 return self.headers.values() 120 121 def items(self): 122 return self.headers.items() 123 124 # Derived from the above. 125 126 def __contains__(self, key): 127 return self.has_key(key) 128 129 def update(self, other): 130 for k, v in other.items(): 131 self[k] = v 132 133 def __repr__(self): 134 return "HeaderDict(%r)" % self.headers 135 136 class HeaderValue: 137 138 "A container for header information." 139 140 def __init__(self, principal_value, **attributes): 141 142 """ 143 Initialise the container with the given 'principal_value' and optional 144 keyword attributes representing the key=value pairs which accompany the 145 'principal_value'. 146 """ 147 148 self.principal_value = principal_value 149 self.attributes = attributes 150 151 def __getattr__(self, name): 152 if self.attributes.has_key(name): 153 return self.attributes[name] 154 else: 155 raise AttributeError, name 156 157 def __repr__(self): 158 return "HeaderValue(%r)" % str(self) 159 160 def __str__(self): 161 162 """ 163 Format the header value object, producing a string suitable for the 164 response header field. 165 """ 166 167 l = [] 168 if self.principal_value: 169 l.append(self.principal_value) 170 for name, value in self.attributes.items(): 171 l.append("; ") 172 l.append("%s=%s" % (name, value)) 173 174 # Make sure that only ASCII is used. 175 176 return "".join(l).encode("US-ASCII") 177 178 class ContentType(HeaderValue): 179 180 "A container for content type information." 181 182 def __init__(self, media_type, charset=None, **attributes): 183 184 """ 185 Initialise the container with the given 'media_type', an optional 186 'charset', and optional keyword attributes representing the key=value 187 pairs which qualify content types. 188 """ 189 190 if charset is not None: 191 attributes["charset"] = charset 192 HeaderValue.__init__(self, media_type, **attributes) 193 194 def __getattr__(self, name): 195 if name == "media_type": 196 return self.principal_value 197 elif name == "charset": 198 return self.attributes.get("charset") 199 elif self.attributes.has_key(name): 200 return self.attributes[name] 201 else: 202 raise AttributeError, name 203 204 class Cookie: 205 206 """ 207 A simple cookie class for frameworks which do not return cookies in 208 structured form. Instances of this class contain the following attributes: 209 210 * name - the name associated with the cookie 211 * value - the value retained by the cookie 212 """ 213 214 def __init__(self, name, value): 215 self.name = name 216 self.value = value 217 218 class FileTooLargeError(Exception): 219 220 "An exception indicating that an uploaded file was too large." 221 222 pass 223 224 class FileContent: 225 226 """ 227 A simple class representing uploaded file content. This is useful in holding 228 metadata as well as being an indicator of such content in environments such 229 as Jython where it is not trivial to differentiate between plain strings and 230 Unicode in a fashion also applicable to CPython. 231 232 Instances of this class contain the following attributes: 233 234 * stream - a stream object through which the content of an uploaded file 235 may be accessed 236 * content - a plain string containing the contents of the uploaded file 237 * filename - a plain string containing the supplied filename of the 238 uploaded file 239 * headers - a dictionary containing the headers associated with the 240 uploaded file 241 * limit - a limit, if previously specified, on the size of uploaded 242 content 243 """ 244 245 def __init__(self, stream, headers=None, limit=None): 246 247 """ 248 Initialise the object with a 'stream' through which the file can be 249 read, along with optional 'headers' describing the content. An optional 250 'limit' can be specified to state the maximum number of bytes that may 251 be read before the content is considered too large. 252 """ 253 254 self.stream = stream 255 self.headers = headers or HeaderDict() 256 self.limit = limit 257 self.cache = None 258 259 def __getattr__(self, name): 260 261 """ 262 Provides a property value when 'name' is specified as "content" or as 263 "filename". 264 """ 265 266 if name == "content": 267 268 if self.cache is not None: 269 return self.cache 270 271 if self.reset(): 272 return self._read() 273 else: 274 self.cache = self._read() 275 return self.cache 276 277 elif name == "filename": 278 try: 279 content_disposition = self.headers["Content-Disposition"] 280 # NOTE: Always seem to need to remove quotes. 281 return content_disposition.filename[1:-1] 282 except (KeyError, AttributeError): 283 return None 284 285 else: 286 raise AttributeError, name 287 288 def _read(self): 289 290 """ 291 Read from the stream up to any limit, raising an exception if the 292 limit is exceeded. 293 """ 294 295 if self.limit is not None: 296 s = self.stream.read(self.limit) 297 if self.stream.read(1): 298 raise FileTooLargeError 299 else: 300 return s 301 else: 302 return self.stream.read() 303 304 def reset(self): 305 306 "Reset the stream providing the data, returning whether this succeeded." 307 308 # Python file objects. 309 310 if hasattr(self.stream, "seek"): 311 self.stream.seek(0) 312 return 1 313 314 # Java input streams. 315 316 elif hasattr(self.stream, "reset"): 317 self.stream.reset() 318 return 1 319 320 # Other streams. 321 322 else: 323 return 0 324 325 def __str__(self): 326 return self.content 327 328 def parse_header_value(header_class, header_value_str): 329 330 """ 331 Create an object of the given 'header_class' by determining the details 332 of the given 'header_value_str' - a string containing the value of a 333 particular header. 334 """ 335 336 if header_value_str is None: 337 return header_class(None) 338 339 l = header_value_str.split(";") 340 attributes = {} 341 342 # Find the attributes. 343 344 principal_value, attributes_str = l[0].strip(), l[1:] 345 346 for attribute_str in attributes_str: 347 t = attribute_str.split("=") 348 if len(t) > 1: 349 name, value = t[0].strip(), t[1].strip() 350 attributes[name] = value 351 352 return header_class(principal_value, **attributes) 353 354 def parse_headers(headers): 355 356 """ 357 Parse the given 'headers' dictionary (containing names mapped to values), 358 returing a dictionary mapping names to HeaderValue objects. 359 """ 360 361 new_headers = HeaderDict() 362 for name, value in headers.items(): 363 new_headers[name] = parse_header_value(HeaderValue, value) 364 return new_headers 365 366 def get_storage_items(storage_body): 367 368 """ 369 Return the items (2-tuples of the form key, values) from the 'storage_body'. 370 This is used in conjunction with FieldStorage objects. 371 """ 372 373 items = [] 374 for key in storage_body.keys(): 375 items.append((key, storage_body[key])) 376 return items 377 378 def get_body_fields(field_items, encoding): 379 380 """ 381 Returns a dictionary mapping field names to lists of field values for all 382 entries in the given 'field_items' (2-tuples of the form key, values) using 383 the given 'encoding'. 384 This is used in conjunction with FieldStorage objects. 385 """ 386 387 fields = {} 388 389 for field_name, field_values in field_items: 390 field_name = decode_value(field_name, encoding) 391 392 if type(field_values) == type([]): 393 fields[field_name] = [] 394 for field_value in field_values: 395 fields[field_name].append(get_body_field_or_file(field_value, encoding)) 396 else: 397 fields[field_name] = [get_body_field_or_file(field_values, encoding)] 398 399 return fields 400 401 def get_body_field_or_file(field_value, encoding): 402 403 """ 404 Returns the appropriate value for the given 'field_value' either for a 405 normal form field (thus employing the given 'encoding') or for a file 406 upload field (returning a plain string). 407 """ 408 409 if hasattr(field_value, "headers") and field_value.headers.has_key("content-type"): 410 411 # Detect stray FileUpload objects (eg. with Zope). 412 413 if hasattr(field_value, "read"): 414 return FileContent(field_value, parse_headers(field_value.headers)) 415 else: 416 return FileContent(field_value.file, parse_headers(field_value.headers)) 417 else: 418 return get_body_field(field_value, encoding) 419 420 def get_body_field(field_str, encoding): 421 422 """ 423 Returns the appropriate value for the given 'field_str' string using the 424 given 'encoding'. 425 """ 426 427 # Detect stray FieldStorage objects (eg. with Webware). 428 429 if hasattr(field_str, "value"): 430 return get_body_field(field_str.value, encoding) 431 else: 432 return decode_value(field_str, encoding) 433 434 def decode_value(s, encoding): 435 if encoding is not None: 436 try: 437 return unicode(s, encoding) 438 except UnicodeError: 439 pass 440 # NOTE: Hacks to permit graceful failure. 441 return unicode(s, "iso-8859-1") 442 443 def get_fields_from_query_string(query_string, decoder): 444 445 """ 446 Returns a dictionary mapping field names to lists of values for the data 447 encoded in the given 'query_string'. Use the given 'decoder' function or 448 method to process the URL-encoded values. 449 """ 450 451 fields = {} 452 453 for pair in query_string.split("&"): 454 t = pair.split("=") 455 name = decoder(t[0]) 456 457 if len(t) == 2: 458 value = decoder(t[1]) 459 else: 460 value = "" 461 462 # NOTE: Remove empty names. 463 464 if name: 465 if not fields.has_key(name): 466 fields[name] = [] 467 fields[name].append(value) 468 469 return fields 470 471 def filter_fields(all_fields, fields_from_path): 472 473 """ 474 Taking items from the 'all_fields' dictionary, produce a new dictionary 475 which does not contain items from the 'fields_from_path' dictionary. 476 Return a new dictionary. 477 """ 478 479 fields = {} 480 for field_name, field_values in all_fields.items(): 481 482 # Find the path values for this field (for filtering below). 483 484 if fields_from_path.has_key(field_name): 485 field_from_path_values = fields_from_path[field_name] 486 if type(field_from_path_values) != type([]): 487 field_from_path_values = [field_from_path_values] 488 else: 489 field_from_path_values = [] 490 491 fields[field_name] = [] 492 for field_value in field_values: 493 494 # Filter path values. 495 496 if field_value not in field_from_path_values: 497 fields[field_name].append(field_value) 498 499 # Remove filtered fields. 500 501 if fields[field_name] == []: 502 del fields[field_name] 503 504 return fields 505 506 # vim: tabstop=4 expandtab shiftwidth=4