1 #!/usr/bin/env python 2 3 """ 4 Generic Web framework interfaces. 5 6 Copyright (C) 2004, 2005, 2006 Paul Boddie <paul@boddie.org.uk> 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Lesser General Public 10 License as published by the Free Software Foundation; either 11 version 2.1 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Lesser General Public License for more details. 17 18 You should have received a copy of the GNU Lesser General Public 19 License along with this library; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 21 22 -------- 23 24 The WebStack architecture consists of the following layers: 25 26 * Framework - The underlying Web framework implementation. 27 * Adapter - Code operating under the particular framework which creates 28 WebStack abstractions and issues them to the application. 29 * Resources - Units of functionality operating within the hosted Web 30 application. 31 32 Resources can act as both content producers within an application and as request 33 dispatchers to other resources; in the latter role, they may be referred to as 34 directors. 35 """ 36 37 import urllib 38 from WebStack.Helpers.Request import Cookie, parse_header_value, ContentType, HeaderValue 39 40 class EndOfResponse(Exception): 41 42 "An exception which signals the end of a response." 43 44 pass 45 46 class Transaction: 47 48 """ 49 A generic transaction interface containing framework-specific methods to be 50 overridden. 51 """ 52 53 # The default charset ties output together with body field interpretation. 54 # It is also used to interpret URLs and paths. 55 56 default_charset = "utf-8" 57 58 # The safe default charset provides some interpretation of incoming data of 59 # an unknown encoding. Generally, one should avoid making "last resort" 60 # interpretations, however. 61 62 safe_default_charset = "iso-8859-1" 63 64 # The default path info is provided here, although the manipulated virtual 65 # path info is an instance attribute set through instances of subclasses of 66 # this class. 67 68 path_info = None 69 70 # The default user is provided here, although the manipulated user is an 71 # instance attribute set through instances of subclasses of this class. 72 73 user = None 74 75 def commit(self): 76 77 """ 78 A special method, synchronising the transaction with framework-specific 79 objects. 80 """ 81 82 pass 83 84 def rollback(self): 85 86 """ 87 A special method, partially synchronising the transaction with 88 framework-specific objects, but discarding previously emitted content 89 that is to be replaced by an error message. 90 """ 91 92 pass 93 94 # Utility methods. 95 96 def parse_header_value(self, header_class, header_value_str): 97 98 """ 99 Create an object of the given 'header_class' by determining the details 100 of the given 'header_value_str' - a string containing the value of a 101 particular header. 102 """ 103 104 # Now uses the WebStack.Helpers.Request function of the same name. 105 106 return parse_header_value(header_class, header_value_str) 107 108 def parse_content_type(self, content_type_field): 109 110 """ 111 Parse the given 'content_type_field' - a value found comparable to that 112 found in an HTTP request header for "Content-Type". 113 """ 114 115 return self.parse_header_value(ContentType, content_type_field) 116 117 def format_header_value(self, value): 118 119 """ 120 Format the given header 'value'. Typically, this just ensures the usage 121 of US-ASCII. 122 """ 123 124 return value.encode("US-ASCII") 125 126 def encode_cookie_value(self, value): 127 128 """ 129 Encode the given cookie 'value'. This ensures the usage of US-ASCII 130 through the encoding of Unicode objects as URL-encoded UTF-8 text. 131 """ 132 133 return urllib.quote(value.encode("UTF-8")).encode("US-ASCII") 134 135 def decode_cookie_value(self, value): 136 137 """ 138 Decode the given cookie 'value'. 139 """ 140 141 return unicode(urllib.unquote(value), "UTF-8") 142 143 def process_cookies(self, cookie_dict, using_strings=0): 144 145 """ 146 Process the given 'cookie_dict', returning a dictionary mapping cookie names 147 to cookie objects where the names and values have been decoded from the form 148 used in the cookies retrieved from the request. 149 150 The optional 'using_strings', if set to 1, treats the 'cookie_dict' as a 151 mapping of cookie names to values. 152 """ 153 154 cookies = {} 155 for name in cookie_dict.keys(): 156 if using_strings: 157 value = cookie_dict[name] 158 else: 159 cookie = cookie_dict[name] 160 value = cookie.value 161 cookie_name = self.decode_cookie_value(name) 162 cookie_value = self.decode_cookie_value(value) 163 cookies[cookie_name] = Cookie(cookie_name, cookie_value) 164 return cookies 165 166 def parse_content_preferences(self, accept_preference): 167 168 """ 169 Returns the preferences as requested by the user agent. The preferences are 170 returned as a list of codes in the same order as they appeared in the 171 appropriate environment variable. In other words, the explicit weighting 172 criteria are ignored. 173 174 As the 'accept_preference' parameter, values for language and charset 175 preferences are appropriate. 176 """ 177 178 if accept_preference is None: 179 return [] 180 181 accept_defs = accept_preference.split(",") 182 accept_prefs = [] 183 for accept_def in accept_defs: 184 t = accept_def.split(";") 185 if len(t) >= 1: 186 accept_prefs.append(t[0].strip()) 187 return accept_prefs 188 189 def convert_to_list(self, value): 190 191 """ 192 Returns a single element list containing 'value' if it is not itself a list, a 193 tuple, or None. If 'value' is a list then it is itself returned; if 'value' is a 194 tuple then a new list containing the same elements is returned; if 'value' is None 195 then an empty list is returned. 196 """ 197 198 if type(value) == type([]): 199 return value 200 elif type(value) == type(()): 201 return list(value) 202 elif value is None: 203 return [] 204 else: 205 return [value] 206 207 # Public utility methods. 208 209 def decode_path(self, path, encoding=None): 210 211 """ 212 From the given 'path', use the optional 'encoding' (if specified) to decode the 213 information and convert it to Unicode. Upon failure for a specified 'encoding' 214 or where 'encoding' is not specified, use the default character encoding to 215 perform the conversion. 216 217 Returns the 'path' as a Unicode value without "URL encoded" character values. 218 """ 219 220 unquoted_path = urllib.unquote(path) 221 return self.decode_value(unquoted_path, encoding) 222 223 def decode_value(self, value, encoding=None): 224 225 """ 226 From the given 'value', use the optional 'encoding' (if specified) to decode the 227 information and convert it to Unicode. Upon failure for a specified 'encoding' 228 or where 'encoding' is not specified, use the default character encoding to 229 perform the conversion. 230 231 Returns the 'value' as a Unicode value. 232 """ 233 234 if encoding is not None: 235 try: 236 return unicode(value, encoding) 237 except UnicodeError: 238 pass 239 try: 240 return unicode(value, self.default_charset) 241 except UnicodeError: 242 return unicode(value, self.safe_default_charset) 243 244 def encode_path(self, path, encoding=None): 245 246 """ 247 Encode the given 'path', using the optional 'encoding' (if specified) or the 248 default encoding where 'encoding' is not specified, and produce a suitable "URL 249 encoded" string. 250 """ 251 252 return urllib.quote(self.encode_value(path, encoding)) 253 254 def encode_value(self, value, encoding=None): 255 256 """ 257 Encode the given 'value', using the optional 'encoding' (if specified) or the 258 default encoding where 'encoding' is not specified, producing a plain string. 259 """ 260 261 if encoding is not None: 262 return value.encode(encoding) 263 else: 264 try: 265 return value.encode(self.default_charset) 266 except UnicodeError: 267 return value.encode(self.safe_default_charset) 268 269 # Server-related methods. 270 271 def get_server_name(self): 272 273 "Returns the server name." 274 275 raise NotImplementedError, "get_server_name" 276 277 def get_server_port(self): 278 279 "Returns the server port as a string." 280 281 raise NotImplementedError, "get_server_port" 282 283 # Request-related methods. 284 285 def get_request_stream(self): 286 287 """ 288 Returns the request stream for the transaction. 289 """ 290 291 raise NotImplementedError, "get_request_stream" 292 293 def get_request_method(self): 294 295 """ 296 Returns the request method. 297 """ 298 299 raise NotImplementedError, "get_request_method" 300 301 def get_headers(self): 302 303 """ 304 Returns all request headers as a dictionary-like object mapping header 305 names to values. 306 """ 307 308 raise NotImplementedError, "get_headers" 309 310 def get_header_values(self, key): 311 312 """ 313 Returns a list of all request header values associated with the given 314 'key'. Note that according to RFC 2616, 'key' is treated as a 315 case-insensitive string. 316 """ 317 318 raise NotImplementedError, "get_header_values" 319 320 def get_content_type(self): 321 322 """ 323 Returns the content type specified on the request, along with the 324 charset employed. 325 """ 326 327 raise NotImplementedError, "get_content_type" 328 329 def get_content_charsets(self): 330 331 """ 332 Returns the character set preferences. 333 """ 334 335 raise NotImplementedError, "get_content_charsets" 336 337 def get_content_languages(self): 338 339 """ 340 Returns extracted language information from the transaction. 341 """ 342 343 raise NotImplementedError, "get_content_languages" 344 345 def get_path(self, encoding=None): 346 347 """ 348 Returns the entire path from the request as a Unicode object. Any "URL 349 encoded" character values in the part of the path before the query 350 string will be decoded and presented as genuine characters; the query 351 string will remain "URL encoded", however. 352 353 If the optional 'encoding' is set, use that in preference to the default 354 encoding to convert the path into a form not containing "URL encoded" 355 character values. 356 """ 357 358 raise NotImplementedError, "get_path" 359 360 def get_path_without_query(self, encoding=None): 361 362 """ 363 Returns the entire path from the request minus the query string as a 364 Unicode object containing genuine characters (as opposed to "URL 365 encoded" character values). 366 367 If the optional 'encoding' is set, use that in preference to the default 368 encoding to convert the path into a form not containing "URL encoded" 369 character values. 370 """ 371 372 raise NotImplementedError, "get_path_without_query" 373 374 def get_path_info(self, encoding=None): 375 376 """ 377 Returns the "path info" (the part of the URL after the resource name 378 handling the current request) from the request as a Unicode object 379 containing genuine characters (as opposed to "URL encoded" character 380 values). 381 382 If the optional 'encoding' is set, use that in preference to the default 383 encoding to convert the path into a form not containing "URL encoded" 384 character values. 385 """ 386 387 raise NotImplementedError, "get_path_info" 388 389 def get_path_without_info(self, encoding=None): 390 391 """ 392 Returns the entire path from the request minus the query string and the 393 "path info" as a Unicode object containing genuine characters (as 394 opposed to "URL encoded" character values). 395 396 If the optional 'encoding' is set, use that in preference to the default 397 encoding to convert the path into a form not containing "URL encoded" 398 character values. 399 """ 400 401 entire_path = self.get_path_without_query(encoding) 402 path_info = self.get_path_info(encoding) 403 return entire_path[:-len(path_info)] 404 405 def get_query_string(self): 406 407 """ 408 Returns the query string from the path in the request. 409 """ 410 411 raise NotImplementedError, "get_query_string" 412 413 # Higher level request-related methods. 414 415 def get_fields_from_path(self, encoding=None): 416 417 """ 418 Extracts fields (or request parameters) from the path specified in the 419 transaction. The underlying framework may refuse to supply fields from 420 the path if handling a POST transaction. The optional 'encoding' 421 parameter specifies the character encoding of the query string for cases 422 where the default encoding is to be overridden. 423 424 Returns a dictionary mapping field names to lists of values (even if a 425 single value is associated with any given field name). 426 """ 427 428 raise NotImplementedError, "get_fields_from_path" 429 430 def get_fields_from_body(self, encoding=None): 431 432 """ 433 Extracts fields (or request parameters) from the message body in the 434 transaction. The optional 'encoding' parameter specifies the character 435 encoding of the message body for cases where no such information is 436 available, but where the default encoding is to be overridden. 437 438 Returns a dictionary mapping field names to lists of values (even if a 439 single value is associated with any given field name). Each value is 440 either a Unicode object (representing a simple form field, for example) 441 or a WebStack.Helpers.Request.FileContent object (representing a file 442 upload form field). 443 """ 444 445 raise NotImplementedError, "get_fields_from_body" 446 447 def get_fields(self, encoding=None): 448 449 """ 450 Extracts fields (or request parameters) from both the path specified in 451 the transaction as well as the message body. The optional 'encoding' 452 parameter specifies the character encoding of the message body for cases 453 where no such information is available, but where the default encoding 454 is to be overridden. 455 456 Returns a dictionary mapping field names to lists of values (even if a 457 single value is associated with any given field name). Each value is 458 either a Unicode object (representing a simple form field, for example) 459 or a WebStack.Helpers.Request.FileContent object (representing a file 460 upload form field). 461 462 Where a given field name is used in both the path and message body to 463 specify values, the values from both sources will be combined into a 464 single list associated with that field name. 465 """ 466 467 raise NotImplementedError, "get_fields" 468 469 def get_user(self): 470 471 """ 472 Extracts user information from the transaction. 473 474 Returns a username as a string or None if no user is defined. 475 """ 476 477 raise NotImplementedError, "get_user" 478 479 def get_cookies(self): 480 481 """ 482 Obtains cookie information from the request. 483 484 Returns a dictionary mapping cookie names to cookie objects. 485 """ 486 487 raise NotImplementedError, "get_cookies" 488 489 def get_cookie(self, cookie_name): 490 491 """ 492 Obtains cookie information from the request. 493 494 Returns a cookie object for the given 'cookie_name' or None if no such 495 cookie exists. 496 """ 497 498 raise NotImplementedError, "get_cookie" 499 500 # Response-related methods. 501 502 def get_response_stream(self): 503 504 """ 505 Returns the response stream for the transaction. 506 """ 507 508 raise NotImplementedError, "get_response_stream" 509 510 def get_response_stream_encoding(self): 511 512 """ 513 Returns the response stream encoding. 514 """ 515 516 raise NotImplementedError, "get_response_stream_encoding" 517 518 def get_response_code(self): 519 520 """ 521 Get the response code associated with the transaction. If no response 522 code is defined, None is returned. 523 """ 524 525 raise NotImplementedError, "get_response_code" 526 527 def set_response_code(self, response_code): 528 529 """ 530 Set the 'response_code' using a numeric constant defined in the HTTP 531 specification. 532 """ 533 534 raise NotImplementedError, "set_response_code" 535 536 def set_header_value(self, header, value): 537 538 """ 539 Set the HTTP 'header' with the given 'value'. 540 """ 541 542 raise NotImplementedError, "set_header_value" 543 544 def set_content_type(self, content_type): 545 546 """ 547 Sets the 'content_type' for the response. 548 """ 549 550 raise NotImplementedError, "set_content_type" 551 552 # Higher level response-related methods. 553 554 def set_cookie(self, cookie): 555 556 """ 557 Stores the given 'cookie' object in the response. 558 """ 559 560 raise NotImplementedError, "set_cookie" 561 562 def set_cookie_value(self, name, value, path=None, expires=None): 563 564 """ 565 Stores a cookie with the given 'name' and 'value' in the response. 566 567 The optional 'path' is a string which specifies the scope of the cookie, 568 and the optional 'expires' parameter is a value compatible with the 569 time.time function, and indicates the expiry date/time of the cookie. 570 """ 571 572 raise NotImplementedError, "set_cookie_value" 573 574 def delete_cookie(self, cookie_name): 575 576 """ 577 Adds to the response a request that the cookie with the given 578 'cookie_name' be deleted/discarded by the client. 579 """ 580 581 raise NotImplementedError, "delete_cookie" 582 583 # Session-related methods. 584 585 def get_session(self, create=1): 586 587 """ 588 Gets a session corresponding to an identifier supplied in the 589 transaction. 590 591 If no session has yet been established according to information 592 provided in the transaction then the optional 'create' parameter 593 determines whether a new session will be established. 594 595 Where no session has been established and where 'create' is set to 0 596 then None is returned. In all other cases, a session object is created 597 (where appropriate) and returned. 598 """ 599 600 raise NotImplementedError, "get_session" 601 602 def expire_session(self): 603 604 """ 605 Expires any session established according to information provided in the 606 transaction. 607 """ 608 609 raise NotImplementedError, "expire_session" 610 611 # Application-specific methods. 612 613 def set_user(self, username): 614 615 """ 616 An application-specific method which sets the user information with 617 'username' in the transaction. This affects subsequent calls to 618 'get_user'. 619 """ 620 621 self.user = username 622 623 def set_virtual_path_info(self, path_info): 624 625 """ 626 An application-specific method which sets the 'path_info' in the 627 transaction. This affects subsequent calls to 'get_virtual_path_info'. 628 629 Note that the virtual path info should either be an empty string, or it 630 should begin with "/" and then (optionally) include other details. 631 Virtual path info strings which omit the leading "/" - ie. containing 632 things like "xxx" or even "xxx/yyy" - do not really make sense and may 633 not be handled correctly by various WebStack components. 634 """ 635 636 self.path_info = path_info 637 638 def get_virtual_path_info(self, encoding=None): 639 640 """ 641 An application-specific method which either returns path info set in the 642 'set_virtual_path_info' method, or the normal path info found in the 643 request. 644 645 If the optional 'encoding' is set, use that in preference to the default 646 encoding to convert the path into a form not containing "URL encoded" 647 character values. 648 """ 649 650 if self.path_info is not None: 651 return self.path_info 652 else: 653 return self.get_path_info(encoding) 654 655 def get_processed_virtual_path_info(self, encoding=None): 656 657 """ 658 An application-specific method which returns the virtual path info that 659 is considered "processed"; that is, the part of the path info which is 660 not included in the virtual path info. 661 662 If the optional 'encoding' is set, use that in preference to the default 663 encoding to convert the path into a form not containing "URL encoded" 664 character values. 665 666 Where the virtual path info is identical to the path info, an empty 667 string is returned. 668 669 Where the virtual path info is a substring of the path info, the path 670 info preceding that substring is returned. 671 672 Where the virtual path info is either an empty string or not a substring 673 of the path info, the entire path info is returned. 674 675 Generally, one should expect the following relationship between the path 676 info, virtual path info and processed virtual path info: 677 678 path info == processed virtual path info + virtual path info 679 """ 680 681 real_path_info = self.get_path_info(encoding) 682 virtual_path_info = self.get_virtual_path_info(encoding) 683 684 if virtual_path_info == "": 685 return real_path_info 686 687 i = real_path_info.rfind(virtual_path_info) 688 if i == -1: 689 return real_path_info 690 else: 691 return real_path_info[:i] 692 693 def get_attributes(self): 694 695 """ 696 An application-specific method which obtains a dictionary mapping names 697 to attribute values that can be used to store arbitrary information. 698 699 Since the dictionary of attributes is retained by the transaction during 700 its lifetime, such a dictionary can be used to store information that an 701 application wishes to communicate amongst its components and resources 702 without having to pass objects other than the transaction between them. 703 704 The returned dictionary can be modified using normal dictionary-like 705 methods. If no attributes existed previously, a new dictionary is 706 created and associated with the transaction. 707 """ 708 709 if not hasattr(self, "_attributes"): 710 self._attributes = {} 711 return self._attributes 712 713 # Utility methods. 714 715 def traverse_path(self, encoding=None): 716 717 """ 718 Traverse the path, updating the virtual path info and thus the processed 719 virtual path info accordingly. Return the traversed virtual path info 720 fragment. 721 """ 722 723 vp = self.get_virtual_path_info(encoding).split("/") 724 self.set_virtual_path_info("/" + "/".join(vp[2:])) 725 return vp[1] 726 727 def update_path(self, path, relative_path): 728 729 """ 730 Transform the given 'path' using the specified 'relative_path'. For 731 example, a simple identifier replaces the last component from 'path': 732 733 trans.update_path("/parent/node", "other") -> "/parent/other" 734 735 If the last component is empty, the effect is similar to an append 736 operation: 737 738 trans.update_path("/parent/node/", "other") -> "/parent/node/other" 739 740 Where 'relative_path' is empty, the result is 'path' with the last 741 component erased (but still present): 742 743 trans.update_path("/parent/node", "") -> "/parent/" 744 745 trans.update_path("/parent/node/", "") -> "/parent/node/" 746 747 Where 'relative_path' contains ".", the component is regarded as being 748 empty: 749 750 trans.update_path("/parent/node", "other/./more") -> "/parent/other/more" 751 752 trans.update_path("/parent/node/", "other/./more") -> "/parent/node/other/more" 753 754 However, at the start of 'relative_path', "." can remove one component: 755 756 trans.update_path("/parent/node", ".") -> "/parent" 757 758 trans.update_path("/parent/node/", ".") -> "/parent/node" 759 760 Adding "/" immediately afterwards restores any removed "/": 761 762 trans.update_path("/parent/node/", "./") -> "/parent/node/" 763 764 trans.update_path("/parent/node", "./") -> "/parent/" 765 766 Following components add to the effect of "./": 767 768 trans.update_path("/parent/node", "./other/more") -> "/parent/other/more" 769 770 trans.update_path("/parent/node/", "./other/more") -> "/parent/node/other/more" 771 772 Where 'relative_path' contains "..", two components are removed from the 773 resulting path: 774 775 trans.update_path("/parent/node/", "..") -> "/parent" 776 777 trans.update_path("/parent/node/", "../other") -> "/parent/other" 778 779 trans.update_path("/parent/node", "..") -> "/" 780 781 trans.update_path("/parent/node", "../other") -> "/other" 782 783 Where fewer components exist than are to be removed, the path is reset: 784 785 trans.update_path("/parent/node", "../..") -> "/" 786 787 Subsequent components are applied to the reset path: 788 789 trans.update_path("/parent/node", "../../other") -> "/other" 790 791 trans.update_path("/parent/node/", "../../other") -> "/other" 792 793 Where 'relative_path' begins with "/", the 'path' is reset to "/" and 794 the components of the 'relative_path' are then applied to that new path: 795 796 trans.update_path("/parent/node", "/other") -> "/other" 797 798 Where 'relative_path' ends with "/", the final "/" is added to the 799 result: 800 801 trans.update_path("/parent/node", "other/") -> "/parent/other/" 802 """ 803 804 rparts = relative_path.split("/") 805 806 if relative_path.startswith("/"): 807 parts = [""] 808 del rparts[0] 809 elif relative_path == "": 810 parts = path.split("/") 811 parts[-1] = "" 812 del rparts[0] 813 else: 814 parts = path.split("/") 815 del parts[-1] 816 817 for rpart in rparts: 818 if rpart == ".": 819 continue 820 elif rpart == "..": 821 if len(parts) > 1: 822 parts = parts[:-1] 823 else: 824 parts.append(rpart) 825 826 return "/" + "/".join(parts[1:]) 827 828 def redirect(self, path, code=302): 829 830 """ 831 Send a redirect response to the client, providing the given 'path' as 832 the suggested location of a resource. The optional 'code' (set to 302 by 833 default) may be used to change the exact meaning of the response 834 according to the HTTP specifications. 835 836 Note that 'path' should be a plain string suitable for header output. 837 Use the 'encode_path' method to convert Unicode objects into such 838 strings. 839 """ 840 841 self.set_response_code(code) 842 self.set_header_value("Location", path) 843 raise EndOfResponse 844 845 class Resource: 846 847 "A generic resource interface." 848 849 def respond(self, trans): 850 851 """ 852 An application-specific method which performs activities on the basis of 853 the transaction object 'trans'. 854 """ 855 856 raise NotImplementedError, "respond" 857 858 class Authenticator: 859 860 "A generic authentication component." 861 862 def authenticate(self, trans): 863 864 """ 865 An application-specific method which authenticates the sender of the 866 request described by the transaction object 'trans'. This method should 867 consider 'trans' to be read-only and not attempt to change the state of 868 the transaction. 869 870 If the sender of the request is authenticated successfully, the result 871 of this method evaluates to true; otherwise the result of this method 872 evaluates to false. 873 """ 874 875 raise NotImplementedError, "authenticate" 876 877 def get_auth_type(self): 878 879 """ 880 An application-specific method which returns the authentication type to 881 be used. An example value is 'Basic' which specifies HTTP basic 882 authentication. 883 """ 884 885 raise NotImplementedError, "get_auth_type" 886 887 def get_realm(self): 888 889 """ 890 An application-specific method which returns the name of the realm for 891 which authentication is taking place. 892 """ 893 894 raise NotImplementedError, "get_realm" 895 896 # vim: tabstop=4 expandtab shiftwidth=4