1.1 --- a/WebStack/Generic.py Sat Feb 02 23:32:31 2008 +0000
1.2 +++ b/WebStack/Generic.py Sat Feb 02 23:33:13 2008 +0000
1.3 @@ -51,8 +51,15 @@
1.4 """
1.5
1.6 # The default charset ties output together with body field interpretation.
1.7 + # It is also used to interpret URLs and paths.
1.8
1.9 - default_charset = "iso-8859-1"
1.10 + default_charset = "utf-8"
1.11 +
1.12 + # The safe default charset provides some interpretation of incoming data of
1.13 + # an unknown encoding. Generally, one should avoid making "last resort"
1.14 + # interpretations, however.
1.15 +
1.16 + safe_default_charset = "iso-8859-1"
1.17
1.18 # The default path info is provided here, although the manipulated virtual
1.19 # path info is an instance attribute set through instances of subclasses of
1.20 @@ -211,12 +218,28 @@
1.21 """
1.22
1.23 unquoted_path = urllib.unquote(path)
1.24 + return self.decode_value(unquoted_path, encoding)
1.25 +
1.26 + def decode_value(self, value, encoding=None):
1.27 +
1.28 + """
1.29 + From the given 'value', use the optional 'encoding' (if specified) to decode the
1.30 + information and convert it to Unicode. Upon failure for a specified 'encoding'
1.31 + or where 'encoding' is not specified, use the default character encoding to
1.32 + perform the conversion.
1.33 +
1.34 + Returns the 'value' as a Unicode value.
1.35 + """
1.36 +
1.37 if encoding is not None:
1.38 try:
1.39 - return unicode(unquoted_path, encoding)
1.40 + return unicode(value, encoding)
1.41 except UnicodeError:
1.42 pass
1.43 - return unicode(unquoted_path, self.default_charset)
1.44 + try:
1.45 + return unicode(value, self.default_charset)
1.46 + except UnicodeError:
1.47 + return unicode(value, self.safe_default_charset)
1.48
1.49 def encode_path(self, path, encoding=None):
1.50
1.51 @@ -226,10 +249,22 @@
1.52 encoded" string.
1.53 """
1.54
1.55 + return urllib.quote(self.encode_value(path, encoding))
1.56 +
1.57 + def encode_value(self, value, encoding=None):
1.58 +
1.59 + """
1.60 + Encode the given 'value', using the optional 'encoding' (if specified) or the
1.61 + default encoding where 'encoding' is not specified, producing a plain string.
1.62 + """
1.63 +
1.64 if encoding is not None:
1.65 - return urllib.quote(path.encode(encoding))
1.66 + return value.encode(encoding)
1.67 else:
1.68 - return urllib.quote(path.encode(self.default_charset))
1.69 + try:
1.70 + return value.encode(self.default_charset)
1.71 + except UnicodeError:
1.72 + return value.encode(self.safe_default_charset)
1.73
1.74 # Server-related methods.
1.75