1.1 --- a/vContent.py Sun Nov 02 23:06:25 2008 +0100
1.2 +++ b/vContent.py Mon Nov 03 00:48:06 2008 +0100
1.3 @@ -41,11 +41,16 @@
1.4 # Encoding-related imports.
1.5
1.6 import base64, quopri
1.7 +import codecs
1.8
1.9 # Tokenisation help.
1.10
1.11 import re
1.12
1.13 +# Configuration.
1.14 +
1.15 +default_encoding = "utf-8"
1.16 +
1.17 # Reader and parser classes.
1.18
1.19 class Reader:
1.20 @@ -65,6 +70,12 @@
1.21 self.lines = []
1.22 self.line_number = 1 # about to read line 1
1.23
1.24 + def close(self):
1.25 +
1.26 + "Close the reader."
1.27 +
1.28 + self.f.close()
1.29 +
1.30 def pushback(self, line):
1.31
1.32 """
1.33 @@ -102,13 +113,23 @@
1.34 physical lines of text.
1.35 """
1.36
1.37 + # Skip blank lines.
1.38 +
1.39 line = self.readline()
1.40 + while line:
1.41 + line_stripped = line.rstrip("\r\n")
1.42 + if not line_stripped:
1.43 + line = self.readline()
1.44 + else:
1.45 + break
1.46 + else:
1.47 + return ""
1.48
1.49 # Strip all appropriate whitespace from the right end of each line.
1.50 # For subsequent lines, remove the first whitespace character.
1.51 # See section 4.1 of the iCalendar specification.
1.52
1.53 - lines = [line.rstrip("\r\n")]
1.54 + lines = [line_stripped]
1.55
1.56 line = self.readline()
1.57 while line.startswith(" ") or line.startswith("\t"):
1.58 @@ -212,6 +233,12 @@
1.59
1.60 self.f = f
1.61
1.62 + def close(self):
1.63 +
1.64 + "Close the reader."
1.65 +
1.66 + self.f.close()
1.67 +
1.68 def __iter__(self):
1.69
1.70 "Return self as the iterator."
1.71 @@ -424,6 +451,12 @@
1.72 self.line_length = line_length or self.default_line_length
1.73 self.char_offset = 0
1.74
1.75 + def close(self):
1.76 +
1.77 + "Close the writer."
1.78 +
1.79 + self.f.close()
1.80 +
1.81 def write(self, text):
1.82
1.83 "Write the 'text' to the file."
1.84 @@ -466,7 +499,13 @@
1.85
1.86 self.f = f
1.87
1.88 - def write(self, name, parameters, value):
1.89 + def close(self):
1.90 +
1.91 + "Close the writer."
1.92 +
1.93 + self.f.close()
1.94 +
1.95 + def write_content_line(self, name, parameters, value):
1.96
1.97 """
1.98 Write a content line for the given 'name', 'parameters' and 'value'
1.99 @@ -507,14 +546,32 @@
1.100
1.101 return self.encode_content(value)
1.102
1.103 +# Utility functions.
1.104 +
1.105 +def is_input_stream(stream_or_string):
1.106 + return hasattr(stream_or_string, "read")
1.107 +
1.108 +def get_input_stream(stream_or_string):
1.109 + if is_input_stream(stream_or_string):
1.110 + return stream_or_string
1.111 + else:
1.112 + return codecs.open(stream_or_string, encoding=default_encoding)
1.113 +
1.114 +def get_output_stream(stream_or_string):
1.115 + if hasattr(stream_or_string, "write"):
1.116 + return stream_or_string
1.117 + else:
1.118 + return codecs.open(stream_or_string, "w", encoding=default_encoding)
1.119 +
1.120 # Public functions.
1.121
1.122 -def parse(f, non_standard_newline=0, parser_cls=None):
1.123 +def parse(stream_or_string, non_standard_newline=0, parser_cls=None):
1.124
1.125 """
1.126 - Parse the resource data found through the use of the file object 'f', which
1.127 - should provide Unicode data. (The codecs module can be used to open files or
1.128 - to wrap streams in order to provide Unicode data.)
1.129 + Parse the resource data found through the use of the 'stream_or_string',
1.130 + which is either a stream providing Unicode data (the codecs module can be
1.131 + used to open files or to wrap streams in order to provide Unicode data) or a
1.132 + filename identifying a file to be parsed.
1.133
1.134 The optional 'non_standard_newline' can be set to a true value (unlike the
1.135 default) in order to attempt to process files with CR as the end of line
1.136 @@ -524,16 +581,28 @@
1.137 is returned.
1.138 """
1.139
1.140 - reader = Reader(f, non_standard_newline)
1.141 - parser = (parser_cls or Parser)()
1.142 - return parser.parse(reader)
1.143 + stream = get_input_stream(stream_or_string)
1.144 + reader = Reader(stream, non_standard_newline)
1.145 +
1.146 + # Parse using the reader.
1.147
1.148 -def iterparse(f, non_standard_newline=0, parser_cls=None):
1.149 + try:
1.150 + parser = (parser_cls or Parser)()
1.151 + return parser.parse(reader)
1.152 +
1.153 + # Close any opened streams.
1.154 +
1.155 + finally:
1.156 + if not is_input_stream(stream_or_string):
1.157 + reader.close()
1.158 +
1.159 +def iterparse(stream_or_string, non_standard_newline=0, parser_cls=None):
1.160
1.161 """
1.162 - Parse the resource data found through the use of the file object 'f', which
1.163 - should provide Unicode data. (The codecs module can be used to open files or
1.164 - to wrap streams in order to provide Unicode data.)
1.165 + Parse the resource data found through the use of the 'stream_or_string',
1.166 + which is either a stream providing Unicode data (the codecs module can be
1.167 + used to open files or to wrap streams in order to provide Unicode data) or a
1.168 + filename identifying a file to be parsed.
1.169
1.170 The optional 'non_standard_newline' can be set to a true value (unlike the
1.171 default) in order to attempt to process files with CR as the end of line
1.172 @@ -543,12 +612,14 @@
1.173 events of the form (name, parameters, value).
1.174 """
1.175
1.176 - reader = Reader(f, non_standard_newline)
1.177 + stream = get_input_stream(stream_or_string)
1.178 + reader = Reader(stream, non_standard_newline)
1.179 parser = (parser_cls or StreamParser)(reader)
1.180 - return iter(parser)
1.181 + return parser
1.182
1.183 -def iterwrite(f, line_length=None, writer_cls=None):
1.184 - _writer = Writer(f, line_length)
1.185 +def iterwrite(stream_or_string, line_length=None, writer_cls=None):
1.186 + stream = get_output_stream(stream_or_string)
1.187 + _writer = Writer(stream, line_length)
1.188 writer = (writer_cls or StreamWriter)(_writer)
1.189 return writer
1.190