1 #!/usr/bin/env python 2 3 """ 4 Parsing of vCalendar and iCalendar files. 5 6 Copyright (C) 2008, 2009, 2011, 2013 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 21 -------- 22 23 References: 24 25 RFC 5545: Internet Calendaring and Scheduling Core Object Specification 26 (iCalendar) 27 http://tools.ietf.org/html/rfc5545 28 29 RFC 2445: Internet Calendaring and Scheduling Core Object Specification 30 (iCalendar) 31 http://tools.ietf.org/html/rfc2445 32 """ 33 34 import vContent 35 import re 36 37 try: 38 set 39 except NameError: 40 from sets import Set as set 41 42 # Format details. 43 44 QUOTED_PARAMETERS = set([ 45 "ALTREP", "DELEGATED-FROM", "DELEGATED-TO", "DIR", "MEMBER", "SENT-BY" 46 ]) 47 MULTIVALUED_PARAMETERS = set([ 48 "DELEGATED-FROM", "DELEGATED-TO", "MEMBER" 49 ]) 50 QUOTED_TYPES = set(["URI"]) 51 52 unquoted_separator_regexp = re.compile(r"(?<!\\)([,;])") 53 54 # Parser classes. 55 56 class vCalendarStreamParser(vContent.StreamParser): 57 58 "A stream parser specifically for vCalendar/iCalendar." 59 60 def next(self): 61 62 """ 63 Return the next content item in the file as a tuple of the form 64 (name, parameters, value). 65 """ 66 67 name, parameters, value = vContent.StreamParser.next(self) 68 return name, self.decode_parameters(parameters), value 69 70 def decode_content(self, value): 71 72 """ 73 Decode the given 'value' (which may represent a collection of distinct 74 values), replacing quoted separator characters. 75 """ 76 77 sep = None 78 values = [] 79 80 for i, s in enumerate(unquoted_separator_regexp.split(value)): 81 if i % 2 != 0: 82 if not sep: 83 sep = s 84 continue 85 values.append(self.decode_content_value(s)) 86 87 if sep == ",": 88 return values 89 elif sep == ";": 90 return tuple(values) 91 else: 92 return values[0] 93 94 def decode_content_value(self, value): 95 96 "Decode the given 'value', replacing quoted separator characters." 97 98 # Replace quoted characters (see 4.3.11 in RFC 2445). 99 100 value = vContent.StreamParser.decode_content(self, value) 101 return value.replace(r"\,", ",").replace(r"\;", ";") 102 103 # Internal methods. 104 105 def decode_quoted_value(self, value): 106 107 "Decode the given 'value', returning a list of decoded values." 108 109 if value[0] == '"' and value[-1] == '"': 110 return value[1:-1] 111 else: 112 return value 113 114 def decode_parameters(self, parameters): 115 116 """ 117 Decode the given 'parameters' according to the vCalendar specification. 118 """ 119 120 decoded_parameters = {} 121 122 for param_name, param_value in parameters.items(): 123 if param_name in QUOTED_PARAMETERS: 124 param_value = self.decode_quoted_value(param_value) 125 separator = '","' 126 else: 127 separator = "," 128 if param_name in MULTIVALUED_PARAMETERS: 129 param_value = param_value.split(separator) 130 decoded_parameters[param_name] = param_value 131 132 return decoded_parameters 133 134 class vCalendarParser(vContent.Parser): 135 136 "A parser specifically for vCalendar/iCalendar." 137 138 def parse(self, f, parser_cls=None): 139 return vContent.Parser.parse(self, f, (parser_cls or vCalendarStreamParser)) 140 141 # Writer classes. 142 143 class vCalendarStreamWriter(vContent.StreamWriter): 144 145 "A stream writer specifically for vCalendar." 146 147 # Overridden methods. 148 149 def encode_parameters(self, parameters): 150 151 """ 152 Encode the given 'parameters' according to the vCalendar specification. 153 """ 154 155 encoded_parameters = {} 156 157 for param_name, param_value in parameters.items(): 158 if param_name in QUOTED_PARAMETERS: 159 param_value = self.encode_quoted_parameter_value(param_value) 160 separator = '","' 161 else: 162 separator = "," 163 if param_name in MULTIVALUED_PARAMETERS: 164 param_value = separator.join(param_value) 165 encoded_parameters[param_name] = param_value 166 167 return encoded_parameters 168 169 def encode_content(self, value): 170 171 """ 172 Encode the given 'value' (which may be a list or tuple of separate 173 values), quoting characters and separating collections of values. 174 """ 175 176 if isinstance(value, list): 177 sep = "," 178 elif isinstance(value, tuple): 179 sep = ";" 180 else: 181 value = [value] 182 sep = "" 183 184 return sep.join([self.encode_content_value(v) for v in value]) 185 186 def encode_content_value(self, value): 187 188 "Encode the given 'value', quoting characters." 189 190 # Replace quoted characters (see 4.3.11 in RFC 2445). 191 192 value = vContent.StreamWriter.encode_content(self, value) 193 return value.replace(";", r"\;").replace(",", r"\,") 194 195 # Public functions. 196 197 def parse(stream_or_string, encoding=None, non_standard_newline=0): 198 199 """ 200 Parse the resource data found through the use of the 'stream_or_string', 201 which is either a stream providing Unicode data (the codecs module can be 202 used to open files or to wrap streams in order to provide Unicode data) or a 203 filename identifying a file to be parsed. 204 205 The optional 'encoding' can be used to specify the character encoding used 206 by the file to be parsed. 207 208 The optional 'non_standard_newline' can be set to a true value (unlike the 209 default) in order to attempt to process files with CR as the end of line 210 character. 211 212 As a result of parsing the resource, the root node of the imported resource 213 is returned. 214 """ 215 216 return vContent.parse(stream_or_string, encoding, non_standard_newline, vCalendarParser) 217 218 def iterparse(stream_or_string, encoding=None, non_standard_newline=0): 219 220 """ 221 Parse the resource data found through the use of the 'stream_or_string', 222 which is either a stream providing Unicode data (the codecs module can be 223 used to open files or to wrap streams in order to provide Unicode data) or a 224 filename identifying a file to be parsed. 225 226 The optional 'encoding' can be used to specify the character encoding used 227 by the file to be parsed. 228 229 The optional 'non_standard_newline' can be set to a true value (unlike the 230 default) in order to attempt to process files with CR as the end of line 231 character. 232 233 An iterator is returned which provides event tuples describing parsing 234 events of the form (name, parameters, value). 235 """ 236 237 return vContent.iterparse(stream_or_string, encoding, non_standard_newline, vCalendarStreamParser) 238 239 def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None): 240 241 """ 242 Return a writer which will either send data to the resource found through 243 the use of 'stream_or_string' or using the given 'write' operation. 244 245 The 'stream_or_string' parameter may be either a stream accepting Unicode 246 data (the codecs module can be used to open files or to wrap streams in 247 order to accept Unicode data) or a filename identifying a file to be 248 written. 249 250 The optional 'encoding' can be used to specify the character encoding used 251 by the file to be written. 252 253 The optional 'line_length' can be used to specify how long lines should be 254 in the resulting data. 255 """ 256 257 return vContent.iterwrite(stream_or_string, write, encoding, line_length, vCalendarStreamWriter) 258 259 # vim: tabstop=4 expandtab shiftwidth=4