1 #!/usr/bin/env python 2 3 """ 4 Generic Web framework interfaces. 5 6 Copyright (C) 2004, 2005, 2006 Paul Boddie <paul@boddie.org.uk> 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Lesser General Public 10 License as published by the Free Software Foundation; either 11 version 2.1 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Lesser General Public License for more details. 17 18 You should have received a copy of the GNU Lesser General Public 19 License along with this library; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 21 22 -------- 23 24 The WebStack architecture consists of the following layers: 25 26 * Framework - The underlying Web framework implementation. 27 * Adapter - Code operating under the particular framework which creates 28 WebStack abstractions and issues them to the application. 29 * Resources - Units of functionality operating within the hosted Web 30 application. 31 32 Resources can act as both content producers within an application and as request 33 dispatchers to other resources; in the latter role, they may be referred to as 34 directors. 35 """ 36 37 import urllib 38 from WebStack.Helpers.Request import Cookie, parse_header_value, ContentType, HeaderValue 39 40 class EndOfResponse(Exception): 41 42 "An exception which signals the end of a response." 43 44 pass 45 46 class Transaction: 47 48 """ 49 A generic transaction interface containing framework-specific methods to be 50 overridden. 51 """ 52 53 # The default charset ties output together with body field interpretation. 54 55 default_charset = "iso-8859-1" 56 57 # The default path info is provided here, although the manipulated virtual 58 # path info is an instance attribute set through instances of subclasses of 59 # this class. 60 61 path_info = None 62 63 # The default user is provided here, although the manipulated user is an 64 # instance attribute set through instances of subclasses of this class. 65 66 user = None 67 68 def commit(self): 69 70 """ 71 A special method, synchronising the transaction with framework-specific 72 objects. 73 """ 74 75 pass 76 77 def rollback(self): 78 79 """ 80 A special method, partially synchronising the transaction with 81 framework-specific objects, but discarding previously emitted content 82 that is to be replaced by an error message. 83 """ 84 85 pass 86 87 # Utility methods. 88 89 def parse_header_value(self, header_class, header_value_str): 90 91 """ 92 Create an object of the given 'header_class' by determining the details 93 of the given 'header_value_str' - a string containing the value of a 94 particular header. 95 """ 96 97 # Now uses the WebStack.Helpers.Request function of the same name. 98 99 return parse_header_value(header_class, header_value_str) 100 101 def parse_content_type(self, content_type_field): 102 103 """ 104 Parse the given 'content_type_field' - a value found comparable to that 105 found in an HTTP request header for "Content-Type". 106 """ 107 108 return self.parse_header_value(ContentType, content_type_field) 109 110 def format_header_value(self, value): 111 112 """ 113 Format the given header 'value'. Typically, this just ensures the usage 114 of US-ASCII. 115 """ 116 117 return value.encode("US-ASCII") 118 119 def encode_cookie_value(self, value): 120 121 """ 122 Encode the given cookie 'value'. This ensures the usage of US-ASCII 123 through the encoding of Unicode objects as URL-encoded UTF-8 text. 124 """ 125 126 return urllib.quote(value.encode("UTF-8")).encode("US-ASCII") 127 128 def decode_cookie_value(self, value): 129 130 """ 131 Decode the given cookie 'value'. 132 """ 133 134 return unicode(urllib.unquote(value), "UTF-8") 135 136 def process_cookies(self, cookie_dict, using_strings=0): 137 138 """ 139 Process the given 'cookie_dict', returning a dictionary mapping cookie names 140 to cookie objects where the names and values have been decoded from the form 141 used in the cookies retrieved from the request. 142 143 The optional 'using_strings', if set to 1, treats the 'cookie_dict' as a 144 mapping of cookie names to values. 145 """ 146 147 cookies = {} 148 for name in cookie_dict.keys(): 149 if using_strings: 150 value = cookie_dict[name] 151 else: 152 cookie = cookie_dict[name] 153 value = cookie.value 154 cookie_name = self.decode_cookie_value(name) 155 cookie_value = self.decode_cookie_value(value) 156 cookies[cookie_name] = Cookie(cookie_name, cookie_value) 157 return cookies 158 159 def parse_content_preferences(self, accept_preference): 160 161 """ 162 Returns the preferences as requested by the user agent. The preferences are 163 returned as a list of codes in the same order as they appeared in the 164 appropriate environment variable. In other words, the explicit weighting 165 criteria are ignored. 166 167 As the 'accept_preference' parameter, values for language and charset 168 preferences are appropriate. 169 """ 170 171 if accept_preference is None: 172 return [] 173 174 accept_defs = accept_preference.split(",") 175 accept_prefs = [] 176 for accept_def in accept_defs: 177 t = accept_def.split(";") 178 if len(t) >= 1: 179 accept_prefs.append(t[0].strip()) 180 return accept_prefs 181 182 def convert_to_list(self, value): 183 184 """ 185 Returns a single element list containing 'value' if it is not itself a list, a 186 tuple, or None. If 'value' is a list then it is itself returned; if 'value' is a 187 tuple then a new list containing the same elements is returned; if 'value' is None 188 then an empty list is returned. 189 """ 190 191 if type(value) == type([]): 192 return value 193 elif type(value) == type(()): 194 return list(value) 195 elif value is None: 196 return [] 197 else: 198 return [value] 199 200 # Public utility methods. 201 202 def decode_path(self, path, encoding=None): 203 204 """ 205 From the given 'path', use the optional 'encoding' (if specified) to decode the 206 information and convert it to Unicode. Upon failure for a specified 'encoding' 207 or where 'encoding' is not specified, use the default character encoding to 208 perform the conversion. 209 210 Returns the 'path' as a Unicode value without "URL encoded" character values. 211 """ 212 213 unquoted_path = urllib.unquote(path) 214 if encoding is not None: 215 try: 216 return unicode(unquoted_path, encoding) 217 except UnicodeError: 218 pass 219 return unicode(unquoted_path, self.default_charset) 220 221 def encode_path(self, path, encoding=None): 222 223 """ 224 Encode the given 'path', using the optional 'encoding' (if specified) or the 225 default encoding where 'encoding' is not specified, and produce a suitable "URL 226 encoded" string. 227 """ 228 229 if encoding is not None: 230 return urllib.quote(path.encode(encoding)) 231 else: 232 return urllib.quote(path.encode(self.default_charset)) 233 234 # Server-related methods. 235 236 def get_server_name(self): 237 238 "Returns the server name." 239 240 raise NotImplementedError, "get_server_name" 241 242 def get_server_port(self): 243 244 "Returns the server port as a string." 245 246 raise NotImplementedError, "get_server_port" 247 248 # Request-related methods. 249 250 def get_request_stream(self): 251 252 """ 253 Returns the request stream for the transaction. 254 """ 255 256 raise NotImplementedError, "get_request_stream" 257 258 def get_request_method(self): 259 260 """ 261 Returns the request method. 262 """ 263 264 raise NotImplementedError, "get_request_method" 265 266 def get_headers(self): 267 268 """ 269 Returns all request headers as a dictionary-like object mapping header 270 names to values. 271 """ 272 273 raise NotImplementedError, "get_headers" 274 275 def get_header_values(self, key): 276 277 """ 278 Returns a list of all request header values associated with the given 279 'key'. Note that according to RFC 2616, 'key' is treated as a 280 case-insensitive string. 281 """ 282 283 raise NotImplementedError, "get_header_values" 284 285 def get_content_type(self): 286 287 """ 288 Returns the content type specified on the request, along with the 289 charset employed. 290 """ 291 292 raise NotImplementedError, "get_content_type" 293 294 def get_content_charsets(self): 295 296 """ 297 Returns the character set preferences. 298 """ 299 300 raise NotImplementedError, "get_content_charsets" 301 302 def get_content_languages(self): 303 304 """ 305 Returns extracted language information from the transaction. 306 """ 307 308 raise NotImplementedError, "get_content_languages" 309 310 def get_path(self, encoding=None): 311 312 """ 313 Returns the entire path from the request as a Unicode object. Any "URL 314 encoded" character values in the part of the path before the query 315 string will be decoded and presented as genuine characters; the query 316 string will remain "URL encoded", however. 317 318 If the optional 'encoding' is set, use that in preference to the default 319 encoding to convert the path into a form not containing "URL encoded" 320 character values. 321 """ 322 323 raise NotImplementedError, "get_path" 324 325 def get_path_without_query(self, encoding=None): 326 327 """ 328 Returns the entire path from the request minus the query string as a 329 Unicode object containing genuine characters (as opposed to "URL 330 encoded" character values). 331 332 If the optional 'encoding' is set, use that in preference to the default 333 encoding to convert the path into a form not containing "URL encoded" 334 character values. 335 """ 336 337 raise NotImplementedError, "get_path_without_query" 338 339 def get_path_info(self, encoding=None): 340 341 """ 342 Returns the "path info" (the part of the URL after the resource name 343 handling the current request) from the request as a Unicode object 344 containing genuine characters (as opposed to "URL encoded" character 345 values). 346 347 If the optional 'encoding' is set, use that in preference to the default 348 encoding to convert the path into a form not containing "URL encoded" 349 character values. 350 """ 351 352 raise NotImplementedError, "get_path_info" 353 354 def get_path_without_info(self, encoding=None): 355 356 """ 357 Returns the entire path from the request minus the query string and the 358 "path info" as a Unicode object containing genuine characters (as 359 opposed to "URL encoded" character values). 360 361 If the optional 'encoding' is set, use that in preference to the default 362 encoding to convert the path into a form not containing "URL encoded" 363 character values. 364 """ 365 366 entire_path = self.get_path_without_query(encoding) 367 path_info = self.get_path_info(encoding) 368 return entire_path[:-len(path_info)] 369 370 def get_query_string(self): 371 372 """ 373 Returns the query string from the path in the request. 374 """ 375 376 raise NotImplementedError, "get_query_string" 377 378 # Higher level request-related methods. 379 380 def get_fields_from_path(self, encoding=None): 381 382 """ 383 Extracts fields (or request parameters) from the path specified in the 384 transaction. The underlying framework may refuse to supply fields from 385 the path if handling a POST transaction. The optional 'encoding' 386 parameter specifies the character encoding of the query string for cases 387 where the default encoding is to be overridden. 388 389 Returns a dictionary mapping field names to lists of values (even if a 390 single value is associated with any given field name). 391 """ 392 393 raise NotImplementedError, "get_fields_from_path" 394 395 def get_fields_from_body(self, encoding=None): 396 397 """ 398 Extracts fields (or request parameters) from the message body in the 399 transaction. The optional 'encoding' parameter specifies the character 400 encoding of the message body for cases where no such information is 401 available, but where the default encoding is to be overridden. 402 403 Returns a dictionary mapping field names to lists of values (even if a 404 single value is associated with any given field name). Each value is 405 either a Unicode object (representing a simple form field, for example) 406 or a WebStack.Helpers.Request.FileContent object (representing a file 407 upload form field). 408 """ 409 410 raise NotImplementedError, "get_fields_from_body" 411 412 def get_fields(self, encoding=None): 413 414 """ 415 Extracts fields (or request parameters) from both the path specified in 416 the transaction as well as the message body. The optional 'encoding' 417 parameter specifies the character encoding of the message body for cases 418 where no such information is available, but where the default encoding 419 is to be overridden. 420 421 Returns a dictionary mapping field names to lists of values (even if a 422 single value is associated with any given field name). Each value is 423 either a Unicode object (representing a simple form field, for example) 424 or a WebStack.Helpers.Request.FileContent object (representing a file 425 upload form field). 426 427 Where a given field name is used in both the path and message body to 428 specify values, the values from both sources will be combined into a 429 single list associated with that field name. 430 """ 431 432 raise NotImplementedError, "get_fields" 433 434 def get_user(self): 435 436 """ 437 Extracts user information from the transaction. 438 439 Returns a username as a string or None if no user is defined. 440 """ 441 442 raise NotImplementedError, "get_user" 443 444 def get_cookies(self): 445 446 """ 447 Obtains cookie information from the request. 448 449 Returns a dictionary mapping cookie names to cookie objects. 450 """ 451 452 raise NotImplementedError, "get_cookies" 453 454 def get_cookie(self, cookie_name): 455 456 """ 457 Obtains cookie information from the request. 458 459 Returns a cookie object for the given 'cookie_name' or None if no such 460 cookie exists. 461 """ 462 463 raise NotImplementedError, "get_cookie" 464 465 # Response-related methods. 466 467 def get_response_stream(self): 468 469 """ 470 Returns the response stream for the transaction. 471 """ 472 473 raise NotImplementedError, "get_response_stream" 474 475 def get_response_stream_encoding(self): 476 477 """ 478 Returns the response stream encoding. 479 """ 480 481 raise NotImplementedError, "get_response_stream_encoding" 482 483 def get_response_code(self): 484 485 """ 486 Get the response code associated with the transaction. If no response 487 code is defined, None is returned. 488 """ 489 490 raise NotImplementedError, "get_response_code" 491 492 def set_response_code(self, response_code): 493 494 """ 495 Set the 'response_code' using a numeric constant defined in the HTTP 496 specification. 497 """ 498 499 raise NotImplementedError, "set_response_code" 500 501 def set_header_value(self, header, value): 502 503 """ 504 Set the HTTP 'header' with the given 'value'. 505 """ 506 507 raise NotImplementedError, "set_header_value" 508 509 def set_content_type(self, content_type): 510 511 """ 512 Sets the 'content_type' for the response. 513 """ 514 515 raise NotImplementedError, "set_content_type" 516 517 # Higher level response-related methods. 518 519 def set_cookie(self, cookie): 520 521 """ 522 Stores the given 'cookie' object in the response. 523 """ 524 525 raise NotImplementedError, "set_cookie" 526 527 def set_cookie_value(self, name, value, path=None, expires=None): 528 529 """ 530 Stores a cookie with the given 'name' and 'value' in the response. 531 532 The optional 'path' is a string which specifies the scope of the cookie, 533 and the optional 'expires' parameter is a value compatible with the 534 time.time function, and indicates the expiry date/time of the cookie. 535 """ 536 537 raise NotImplementedError, "set_cookie_value" 538 539 def delete_cookie(self, cookie_name): 540 541 """ 542 Adds to the response a request that the cookie with the given 543 'cookie_name' be deleted/discarded by the client. 544 """ 545 546 raise NotImplementedError, "delete_cookie" 547 548 # Session-related methods. 549 550 def get_session(self, create=1): 551 552 """ 553 Gets a session corresponding to an identifier supplied in the 554 transaction. 555 556 If no session has yet been established according to information 557 provided in the transaction then the optional 'create' parameter 558 determines whether a new session will be established. 559 560 Where no session has been established and where 'create' is set to 0 561 then None is returned. In all other cases, a session object is created 562 (where appropriate) and returned. 563 """ 564 565 raise NotImplementedError, "get_session" 566 567 def expire_session(self): 568 569 """ 570 Expires any session established according to information provided in the 571 transaction. 572 """ 573 574 raise NotImplementedError, "expire_session" 575 576 # Application-specific methods. 577 578 def set_user(self, username): 579 580 """ 581 An application-specific method which sets the user information with 582 'username' in the transaction. This affects subsequent calls to 583 'get_user'. 584 """ 585 586 self.user = username 587 588 def set_virtual_path_info(self, path_info): 589 590 """ 591 An application-specific method which sets the 'path_info' in the 592 transaction. This affects subsequent calls to 'get_virtual_path_info'. 593 594 Note that the virtual path info should either be an empty string, or it 595 should begin with "/" and then (optionally) include other details. 596 Virtual path info strings which omit the leading "/" - ie. containing 597 things like "xxx" or even "xxx/yyy" - do not really make sense and may 598 not be handled correctly by various WebStack components. 599 """ 600 601 self.path_info = path_info 602 603 def get_virtual_path_info(self, encoding=None): 604 605 """ 606 An application-specific method which either returns path info set in the 607 'set_virtual_path_info' method, or the normal path info found in the 608 request. 609 610 If the optional 'encoding' is set, use that in preference to the default 611 encoding to convert the path into a form not containing "URL encoded" 612 character values. 613 """ 614 615 if self.path_info is not None: 616 return self.path_info 617 else: 618 return self.get_path_info(encoding) 619 620 def get_processed_virtual_path_info(self, encoding=None): 621 622 """ 623 An application-specific method which returns the virtual path info that 624 is considered "processed"; that is, the part of the path info which is 625 not included in the virtual path info. 626 627 If the optional 'encoding' is set, use that in preference to the default 628 encoding to convert the path into a form not containing "URL encoded" 629 character values. 630 631 Where the virtual path info is identical to the path info, an empty 632 string is returned. 633 634 Where the virtual path info is a substring of the path info, the path 635 info preceding that substring is returned. 636 637 Where the virtual path info is either an empty string or not a substring 638 of the path info, the entire path info is returned. 639 640 Generally, one should expect the following relationship between the path 641 info, virtual path info and processed virtual path info: 642 643 path info == processed virtual path info + virtual path info 644 """ 645 646 real_path_info = self.get_path_info(encoding) 647 virtual_path_info = self.get_virtual_path_info(encoding) 648 649 if virtual_path_info == "": 650 return real_path_info 651 652 i = real_path_info.rfind(virtual_path_info) 653 if i == -1: 654 return real_path_info 655 else: 656 return real_path_info[:i] 657 658 def get_attributes(self): 659 660 """ 661 An application-specific method which obtains a dictionary mapping names 662 to attribute values that can be used to store arbitrary information. 663 664 Since the dictionary of attributes is retained by the transaction during 665 its lifetime, such a dictionary can be used to store information that an 666 application wishes to communicate amongst its components and resources 667 without having to pass objects other than the transaction between them. 668 669 The returned dictionary can be modified using normal dictionary-like 670 methods. If no attributes existed previously, a new dictionary is 671 created and associated with the transaction. 672 """ 673 674 if not hasattr(self, "_attributes"): 675 self._attributes = {} 676 return self._attributes 677 678 # Utility methods. 679 680 def traverse_path(self, encoding=None): 681 682 """ 683 Traverse the path, updating the virtual path info and thus the processed 684 virtual path info accordingly. Return the traversed virtual path info 685 fragment. 686 """ 687 688 vp = self.get_virtual_path_info(encoding).split("/") 689 self.set_virtual_path_info("/" + "/".join(vp[2:])) 690 return vp[1] 691 692 def update_path(self, path, relative_path): 693 694 """ 695 Transform the given 'path' using the specified 'relative_path'. For 696 example, a simple identifier replaces the last component from 'path': 697 698 trans.update_path("/parent/node", "other") -> "/parent/other" 699 700 If the last component is empty, the effect is similar to an append 701 operation: 702 703 trans.update_path("/parent/node/", "other") -> "/parent/node/other" 704 705 Where 'relative_path' is empty, the result is 'path' with the last 706 component erased (but still present): 707 708 trans.update_path("/parent/node", "") -> "/parent/" 709 710 trans.update_path("/parent/node/", "") -> "/parent/node/" 711 712 Where 'relative_path' contains ".", the component is regarded as being 713 empty: 714 715 trans.update_path("/parent/node", "other/./more") -> "/parent/other/more" 716 717 trans.update_path("/parent/node/", "other/./more") -> "/parent/node/other/more" 718 719 However, at the start of 'relative_path', "." can remove one component: 720 721 trans.update_path("/parent/node", ".") -> "/parent" 722 723 trans.update_path("/parent/node/", ".") -> "/parent/node" 724 725 Adding "/" immediately afterwards restores any removed "/": 726 727 trans.update_path("/parent/node/", "./") -> "/parent/node/" 728 729 trans.update_path("/parent/node", "./") -> "/parent/" 730 731 Following components add to the effect of "./": 732 733 trans.update_path("/parent/node", "./other/more") -> "/parent/other/more" 734 735 trans.update_path("/parent/node/", "./other/more") -> "/parent/node/other/more" 736 737 Where 'relative_path' contains "..", two components are removed from the 738 resulting path: 739 740 trans.update_path("/parent/node/", "..") -> "/parent" 741 742 trans.update_path("/parent/node/", "../other") -> "/parent/other" 743 744 trans.update_path("/parent/node", "..") -> "/" 745 746 trans.update_path("/parent/node", "../other") -> "/other" 747 748 Where fewer components exist than are to be removed, the path is reset: 749 750 trans.update_path("/parent/node", "../..") -> "/" 751 752 Subsequent components are applied to the reset path: 753 754 trans.update_path("/parent/node", "../../other") -> "/other" 755 756 trans.update_path("/parent/node/", "../../other") -> "/other" 757 758 Where 'relative_path' begins with "/", the 'path' is reset to "/" and 759 the components of the 'relative_path' are then applied to that new path: 760 761 trans.update_path("/parent/node", "/other") -> "/other" 762 763 Where 'relative_path' ends with "/", the final "/" is added to the 764 result: 765 766 trans.update_path("/parent/node", "other/") -> "/parent/other/" 767 """ 768 769 rparts = relative_path.split("/") 770 771 if relative_path.startswith("/"): 772 parts = [""] 773 del rparts[0] 774 elif relative_path == "": 775 parts = path.split("/") 776 parts[-1] = "" 777 del rparts[0] 778 else: 779 parts = path.split("/") 780 del parts[-1] 781 782 for rpart in rparts: 783 if rpart == ".": 784 continue 785 elif rpart == "..": 786 if len(parts) > 1: 787 parts = parts[:-1] 788 else: 789 parts.append(rpart) 790 791 return "/" + "/".join(parts[1:]) 792 793 def redirect(self, path, code=302): 794 795 """ 796 Send a redirect response to the client, providing the given 'path' as 797 the suggested location of a resource. The optional 'code' (set to 302 by 798 default) may be used to change the exact meaning of the response 799 according to the HTTP specifications. 800 801 Note that 'path' should be a plain string suitable for header output. 802 Use the 'encode_path' method to convert Unicode objects into such 803 strings. 804 """ 805 806 self.set_response_code(code) 807 self.set_header_value("Location", path) 808 raise EndOfResponse 809 810 class Resource: 811 812 "A generic resource interface." 813 814 def respond(self, trans): 815 816 """ 817 An application-specific method which performs activities on the basis of 818 the transaction object 'trans'. 819 """ 820 821 raise NotImplementedError, "respond" 822 823 class Authenticator: 824 825 "A generic authentication component." 826 827 def authenticate(self, trans): 828 829 """ 830 An application-specific method which authenticates the sender of the 831 request described by the transaction object 'trans'. This method should 832 consider 'trans' to be read-only and not attempt to change the state of 833 the transaction. 834 835 If the sender of the request is authenticated successfully, the result 836 of this method evaluates to true; otherwise the result of this method 837 evaluates to false. 838 """ 839 840 raise NotImplementedError, "authenticate" 841 842 def get_auth_type(self): 843 844 """ 845 An application-specific method which returns the authentication type to 846 be used. An example value is 'Basic' which specifies HTTP basic 847 authentication. 848 """ 849 850 raise NotImplementedError, "get_auth_type" 851 852 def get_realm(self): 853 854 """ 855 An application-specific method which returns the name of the realm for 856 which authentication is taking place. 857 """ 858 859 raise NotImplementedError, "get_realm" 860 861 # vim: tabstop=4 expandtab shiftwidth=4