1.1 --- a/vContent.py Thu Jan 15 23:39:48 2009 +0100
1.2 +++ b/vContent.py Sat Mar 14 22:13:22 2009 +0100
1.3 @@ -3,7 +3,7 @@
1.4 """
1.5 Parsing of vCard, vCalendar and iCalendar files.
1.6
1.7 -Copyright (C) 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk>
1.8 +Copyright (C) 2005, 2006, 2007, 2008, 2009 Paul Boddie <paul@boddie.org.uk>
1.9
1.10 This program is free software; you can redistribute it and/or modify it under
1.11 the terms of the GNU Lesser General Public License as published by the Free
1.12 @@ -97,7 +97,7 @@
1.13 if self.lines:
1.14 return self.lines.pop()
1.15 else:
1.16 - # NOTE: Sanity check for broken lines (\r instead of \r\n or \n).
1.17 + # Sanity check for broken lines (\r instead of \r\n or \n).
1.18 line = self.f.readline()
1.19 while line.endswith("\r") and not self.non_standard_newline:
1.20 line += self.f.readline()
1.21 @@ -110,7 +110,7 @@
1.22
1.23 """
1.24 Read an entire content line, itself potentially consisting of many
1.25 - physical lines of text.
1.26 + physical lines of text, returning a string.
1.27 """
1.28
1.29 # Skip blank lines.
1.30 @@ -174,6 +174,9 @@
1.31 current position up to the target found, along with the target string,
1.32 using a tuple of the form (string, target). If no target was found,
1.33 return the entire string together with a target of None.
1.34 +
1.35 + The 'targets' parameter must be a regular expression object or an object
1.36 + compatible with the API of such objects.
1.37 """
1.38
1.39 text = self.text
1.40 @@ -505,36 +508,48 @@
1.41
1.42 self.f.close()
1.43
1.44 - def write_content_line(self, name, parameters, value):
1.45 + def write(self, name, parameters, value):
1.46
1.47 """
1.48 - Write a content line for the given 'name', 'parameters' and 'value'
1.49 - information.
1.50 + Write a content line, serialising the given 'name', 'parameters' and
1.51 + 'value' information.
1.52 + """
1.53 +
1.54 + self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value))
1.55 +
1.56 + # Internal methods.
1.57 +
1.58 + def write_content_line(self, name, encoded_parameters, encoded_value):
1.59 +
1.60 + """
1.61 + Write a content line for the given 'name', 'encoded_parameters' and
1.62 + 'encoded_value' information.
1.63 """
1.64
1.65 f = self.f
1.66
1.67 f.write(name)
1.68 - for parameter_name, parameter_value in parameters.items():
1.69 + for param_name, param_value in encoded_parameters.items():
1.70 f.write(";")
1.71 - f.write(parameter_name)
1.72 + f.write(param_name)
1.73 f.write("=")
1.74 - f.write(parameter_value)
1.75 + f.write(param_value)
1.76 f.write(":")
1.77 - f.write(self.encode(name, parameters, value))
1.78 + f.write(encoded_value)
1.79 f.end_line()
1.80
1.81 - def encode_content(self, value):
1.82 + def encode_quoted_parameter_value(self, value):
1.83
1.84 - "Encode the given 'value', quoting characters."
1.85 + "Encode the given 'value'."
1.86
1.87 - return value.replace("\n", "\\n")
1.88 + return '"%s"' % value
1.89
1.90 - # Internal methods.
1.91 + def encode_value(self, name, parameters, value):
1.92
1.93 - def encode(self, name, parameters, value):
1.94 -
1.95 - "Encode using 'name' and 'parameters' the given 'value'."
1.96 + """
1.97 + Encode using 'name' and 'parameters' the given 'value' so that the
1.98 + resulting encoded form employs any specified character encodings.
1.99 + """
1.100
1.101 encoding = parameters.get("ENCODING")
1.102 charset = parameters.get("CHARSET")
1.103 @@ -546,26 +561,55 @@
1.104
1.105 return self.encode_content(value)
1.106
1.107 + # Overrideable methods.
1.108 +
1.109 + def encode_parameters(self, parameters):
1.110 +
1.111 + """
1.112 + Encode the given 'parameters' according to the vCalendar specification.
1.113 + """
1.114 +
1.115 + encoded_parameters = {}
1.116 +
1.117 + for param_name, param_value in parameters.items():
1.118 +
1.119 + # Basic format support merely involves quoting values which seem to
1.120 + # need it. Other more specific formats may define exactly which
1.121 + # parameters should be quoted.
1.122 +
1.123 + if ContentLine.SEPARATORS.search(param_value):
1.124 + param_value = self.encode_quoted_parameter_value(param_value)
1.125 +
1.126 + encoded_parameters[param_name] = param_value
1.127 +
1.128 + return encoded_parameters
1.129 +
1.130 + def encode_content(self, value):
1.131 +
1.132 + "Encode the given 'value', quoting characters."
1.133 +
1.134 + return value.replace("\n", "\\n")
1.135 +
1.136 # Utility functions.
1.137
1.138 def is_input_stream(stream_or_string):
1.139 return hasattr(stream_or_string, "read")
1.140
1.141 -def get_input_stream(stream_or_string):
1.142 +def get_input_stream(stream_or_string, encoding=None):
1.143 if is_input_stream(stream_or_string):
1.144 return stream_or_string
1.145 else:
1.146 - return codecs.open(stream_or_string, encoding=default_encoding)
1.147 + return codecs.open(stream_or_string, encoding=(encoding or default_encoding))
1.148
1.149 -def get_output_stream(stream_or_string):
1.150 +def get_output_stream(stream_or_string, encoding=None):
1.151 if hasattr(stream_or_string, "write"):
1.152 return stream_or_string
1.153 else:
1.154 - return codecs.open(stream_or_string, "w", encoding=default_encoding)
1.155 + return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding))
1.156
1.157 # Public functions.
1.158
1.159 -def parse(stream_or_string, non_standard_newline=0, parser_cls=None):
1.160 +def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
1.161
1.162 """
1.163 Parse the resource data found through the use of the 'stream_or_string',
1.164 @@ -573,6 +617,9 @@
1.165 used to open files or to wrap streams in order to provide Unicode data) or a
1.166 filename identifying a file to be parsed.
1.167
1.168 + The optional 'encoding' can be used to specify the character encoding used
1.169 + by the file to be parsed.
1.170 +
1.171 The optional 'non_standard_newline' can be set to a true value (unlike the
1.172 default) in order to attempt to process files with CR as the end of line
1.173 character.
1.174 @@ -581,7 +628,7 @@
1.175 is returned.
1.176 """
1.177
1.178 - stream = get_input_stream(stream_or_string)
1.179 + stream = get_input_stream(stream_or_string, encoding)
1.180 reader = Reader(stream, non_standard_newline)
1.181
1.182 # Parse using the reader.
1.183 @@ -596,7 +643,7 @@
1.184 if not is_input_stream(stream_or_string):
1.185 reader.close()
1.186
1.187 -def iterparse(stream_or_string, non_standard_newline=0, parser_cls=None):
1.188 +def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
1.189
1.190 """
1.191 Parse the resource data found through the use of the 'stream_or_string',
1.192 @@ -604,6 +651,9 @@
1.193 used to open files or to wrap streams in order to provide Unicode data) or a
1.194 filename identifying a file to be parsed.
1.195
1.196 + The optional 'encoding' can be used to specify the character encoding used
1.197 + by the file to be parsed.
1.198 +
1.199 The optional 'non_standard_newline' can be set to a true value (unlike the
1.200 default) in order to attempt to process files with CR as the end of line
1.201 character.
1.202 @@ -612,13 +662,27 @@
1.203 events of the form (name, parameters, value).
1.204 """
1.205
1.206 - stream = get_input_stream(stream_or_string)
1.207 + stream = get_input_stream(stream_or_string, encoding)
1.208 reader = Reader(stream, non_standard_newline)
1.209 parser = (parser_cls or StreamParser)(reader)
1.210 return parser
1.211
1.212 -def iterwrite(stream_or_string, line_length=None, writer_cls=None):
1.213 - stream = get_output_stream(stream_or_string)
1.214 +def iterwrite(stream_or_string, encoding=None, line_length=None, writer_cls=None):
1.215 +
1.216 + """
1.217 + Return a writer which will send data to the resource found through the use
1.218 + of 'stream_or_string', which is either a stream accepting Unicode data (the
1.219 + codecs module can be used to open files or to wrap streams in order to
1.220 + accept Unicode data) or a filename identifying a file to be parsed.
1.221 +
1.222 + The optional 'encoding' can be used to specify the character encoding used
1.223 + by the file to be written.
1.224 +
1.225 + The optional 'line_length' can be used to specify how long lines should be
1.226 + in the resulting data.
1.227 + """
1.228 +
1.229 + stream = get_output_stream(stream_or_string, encoding)
1.230 _writer = Writer(stream, line_length)
1.231 writer = (writer_cls or StreamWriter)(_writer)
1.232 return writer