vContent

Annotated vCalendar.py

76:c675f7bdd52d
2017-11-24 Paul Boddie Extended the value decoding and encoding mechanisms to avoid splitting RRULE values using commas and quoting commas in encoded RRULE values.
paul@4 1
#!/usr/bin/env python
paul@4 2
paul@4 3
"""
paul@4 4
Parsing of vCalendar and iCalendar files.
paul@4 5
paul@76 6
Copyright (C) 2008, 2009, 2011, 2013, 2014, 2015,
paul@76 7
              2016, 2017 Paul Boddie <paul@boddie.org.uk>
paul@4 8
paul@4 9
This program is free software; you can redistribute it and/or modify it under
paul@14 10
the terms of the GNU General Public License as published by the Free Software
paul@14 11
Foundation; either version 3 of the License, or (at your option) any later
paul@14 12
version.
paul@4 13
paul@4 14
This program is distributed in the hope that it will be useful, but WITHOUT
paul@4 15
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@14 16
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@4 17
details.
paul@4 18
paul@14 19
You should have received a copy of the GNU General Public License along with
paul@14 20
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@4 21
paul@4 22
--------
paul@4 23
paul@4 24
References:
paul@4 25
paul@16 26
RFC 5545: Internet Calendaring and Scheduling Core Object Specification
paul@16 27
          (iCalendar)
paul@18 28
          http://tools.ietf.org/html/rfc5545
paul@16 29
paul@4 30
RFC 2445: Internet Calendaring and Scheduling Core Object Specification
paul@4 31
          (iCalendar)
paul@18 32
          http://tools.ietf.org/html/rfc2445
paul@4 33
"""
paul@4 34
paul@5 35
import vContent
paul@24 36
import re
paul@4 37
paul@4 38
try:
paul@4 39
    set
paul@4 40
except NameError:
paul@4 41
    from sets import Set as set
paul@4 42
paul@39 43
ParseError = vContent.ParseError
paul@39 44
paul@7 45
# Format details.
paul@7 46
paul@38 47
SECTION_TYPES = set([
paul@67 48
    "VALARM", "VCALENDAR", "VEVENT", "VFREEBUSY", "VJOURNAL", "VTIMEZONE", "VTODO",
paul@67 49
    "DAYLIGHT", "STANDARD"
paul@38 50
    ])
paul@7 51
QUOTED_PARAMETERS = set([
paul@7 52
    "ALTREP", "DELEGATED-FROM", "DELEGATED-TO", "DIR", "MEMBER", "SENT-BY"
paul@7 53
    ])
paul@7 54
MULTIVALUED_PARAMETERS = set([
paul@17 55
    "DELEGATED-FROM", "DELEGATED-TO", "MEMBER"
paul@7 56
    ])
paul@76 57
NON_MULTIVALUED_PROPERTIES = set([
paul@76 58
    "RRULE"
paul@76 59
    ])
paul@7 60
QUOTED_TYPES = set(["URI"])
paul@7 61
paul@25 62
unquoted_separator_regexp = re.compile(r"(?<!\\)([,;])")
paul@76 63
unquoted_semicolon_regexp = re.compile(r"(?<!\\)([;])")
paul@24 64
paul@7 65
# Parser classes.
paul@7 66
paul@5 67
class vCalendarStreamParser(vContent.StreamParser):
paul@4 68
paul@5 69
    "A stream parser specifically for vCalendar/iCalendar."
paul@4 70
paul@5 71
    def next(self):
paul@5 72
paul@5 73
        """
paul@5 74
        Return the next content item in the file as a tuple of the form
paul@7 75
        (name, parameters, value).
paul@5 76
        """
paul@5 77
paul@7 78
        name, parameters, value = vContent.StreamParser.next(self)
paul@22 79
        return name, self.decode_parameters(parameters), value
paul@7 80
paul@76 81
    def decode_content(self, name, value):
paul@7 82
paul@24 83
        """
paul@76 84
        Decode for property 'name' the given 'value' (which may represent a
paul@76 85
        collection of distinct values), replacing quoted separator characters.
paul@24 86
        """
paul@24 87
paul@25 88
        sep = None
paul@25 89
        values = []
paul@25 90
paul@76 91
        if name in NON_MULTIVALUED_PROPERTIES:
paul@76 92
            split = unquoted_semicolon_regexp.split
paul@76 93
        else:
paul@76 94
            split = unquoted_separator_regexp.split
paul@76 95
paul@76 96
        for i, s in enumerate(split(value)):
paul@25 97
            if i % 2 != 0:
paul@25 98
                if not sep:
paul@25 99
                    sep = s
paul@25 100
                continue
paul@76 101
            values.append(self.decode_content_value(name, s))
paul@25 102
paul@25 103
        if sep == ",":
paul@25 104
            return values
paul@25 105
        elif sep == ";":
paul@25 106
            return tuple(values)
paul@25 107
        else:
paul@25 108
            return values[0]
paul@24 109
paul@76 110
    def decode_content_value(self, name, value):
paul@24 111
paul@76 112
        """
paul@76 113
        Decode for property 'name' the given 'value', replacing quoted separator
paul@76 114
        characters.
paul@76 115
        """
paul@7 116
paul@7 117
        # Replace quoted characters (see 4.3.11 in RFC 2445).
paul@7 118
paul@76 119
        value = vContent.StreamParser.decode_content(self, name, value)
paul@13 120
        return value.replace(r"\,", ",").replace(r"\;", ";")
paul@7 121
paul@7 122
    # Internal methods.
paul@5 123
paul@4 124
    def decode_quoted_value(self, value):
paul@4 125
paul@4 126
        "Decode the given 'value', returning a list of decoded values."
paul@4 127
paul@4 128
        if value[0] == '"' and value[-1] == '"':
paul@4 129
            return value[1:-1]
paul@4 130
        else:
paul@4 131
            return value
paul@4 132
paul@5 133
    def decode_parameters(self, parameters):
paul@4 134
paul@4 135
        """
paul@5 136
        Decode the given 'parameters' according to the vCalendar specification.
paul@4 137
        """
paul@4 138
paul@4 139
        decoded_parameters = {}
paul@5 140
paul@4 141
        for param_name, param_value in parameters.items():
paul@7 142
            if param_name in QUOTED_PARAMETERS:
paul@4 143
                param_value = self.decode_quoted_value(param_value)
paul@4 144
                separator = '","'
paul@4 145
            else:
paul@4 146
                separator = ","
paul@7 147
            if param_name in MULTIVALUED_PARAMETERS:
paul@4 148
                param_value = param_value.split(separator)
paul@4 149
            decoded_parameters[param_name] = param_value
paul@4 150
paul@5 151
        return decoded_parameters
paul@5 152
paul@5 153
class vCalendarParser(vContent.Parser):
paul@5 154
paul@5 155
    "A parser specifically for vCalendar/iCalendar."
paul@5 156
paul@5 157
    def parse(self, f, parser_cls=None):
paul@12 158
        return vContent.Parser.parse(self, f, (parser_cls or vCalendarStreamParser))
paul@4 159
paul@65 160
    def makeComponent(self, name, parameters, value=None):
paul@65 161
paul@65 162
        """
paul@65 163
        Make a component object from the given 'name', 'parameters' and optional
paul@65 164
        'value'.
paul@65 165
        """
paul@65 166
paul@65 167
        if name in SECTION_TYPES:
paul@65 168
            return (name, parameters, value or [])
paul@65 169
        else:
paul@65 170
            return (name, parameters, value or None)
paul@65 171
paul@7 172
# Writer classes.
paul@7 173
paul@7 174
class vCalendarStreamWriter(vContent.StreamWriter):
paul@7 175
paul@27 176
    "A stream writer specifically for vCalendar."
paul@7 177
paul@11 178
    # Overridden methods.
paul@7 179
paul@38 180
    def write(self, name, parameters, value):
paul@38 181
paul@38 182
        """
paul@38 183
        Write a content line, serialising the given 'name', 'parameters' and
paul@38 184
        'value' information.
paul@38 185
        """
paul@38 186
paul@38 187
        if name in SECTION_TYPES:
paul@38 188
            self.write_content_line("BEGIN", {}, name)
paul@38 189
            for n, p, v in value:
paul@38 190
                self.write(n, p, v)
paul@38 191
            self.write_content_line("END", {}, name)
paul@38 192
        else:
paul@38 193
            vContent.StreamWriter.write(self, name, parameters, value)
paul@38 194
paul@7 195
    def encode_parameters(self, parameters):
paul@7 196
paul@7 197
        """
paul@7 198
        Encode the given 'parameters' according to the vCalendar specification.
paul@7 199
        """
paul@7 200
paul@7 201
        encoded_parameters = {}
paul@7 202
paul@7 203
        for param_name, param_value in parameters.items():
paul@7 204
            if param_name in QUOTED_PARAMETERS:
paul@7 205
                separator = '","'
paul@7 206
            else:
paul@7 207
                separator = ","
paul@7 208
            if param_name in MULTIVALUED_PARAMETERS:
paul@7 209
                param_value = separator.join(param_value)
paul@76 210
            if param_name in QUOTED_PARAMETERS:
paul@76 211
                param_value = self.encode_quoted_parameter_value(param_value)
paul@7 212
            encoded_parameters[param_name] = param_value
paul@7 213
paul@7 214
        return encoded_parameters
paul@7 215
paul@76 216
    def encode_content(self, name, value):
paul@7 217
paul@24 218
        """
paul@76 219
        Encode for property 'name' the given 'value' (which may be a list or
paul@76 220
        tuple of separate values), quoting characters and separating collections
paul@76 221
        of values.
paul@24 222
        """
paul@24 223
paul@25 224
        if isinstance(value, list):
paul@25 225
            sep = ","
paul@25 226
        elif isinstance(value, tuple):
paul@25 227
            sep = ";"
paul@25 228
        else:
paul@24 229
            value = [value]
paul@25 230
            sep = ""
paul@24 231
paul@76 232
        l = []
paul@76 233
        for v in value:
paul@76 234
            l.append(self.encode_content_value(name, v))
paul@76 235
        return sep.join(l)
paul@24 236
paul@76 237
    def encode_content_value(self, name, value):
paul@24 238
paul@76 239
        "Encode for property 'name' the given 'value', quoting characters."
paul@7 240
paul@7 241
        # Replace quoted characters (see 4.3.11 in RFC 2445).
paul@7 242
paul@76 243
        value = vContent.StreamWriter.encode_content(self, name, value)
paul@76 244
paul@76 245
        if name in NON_MULTIVALUED_PROPERTIES:
paul@76 246
            quote = self.quote_semicolons
paul@76 247
        else:
paul@76 248
            quote = self.quote_separators
paul@76 249
paul@76 250
        return quote(value)
paul@76 251
paul@76 252
    def quote_separators(self, value):
paul@13 253
        return value.replace(";", r"\;").replace(",", r"\,")
paul@7 254
paul@76 255
    def quote_semicolons(self, value):
paul@76 256
        return value.replace(";", r"\;")
paul@76 257
paul@4 258
# Public functions.
paul@4 259
paul@11 260
def parse(stream_or_string, encoding=None, non_standard_newline=0):
paul@4 261
paul@4 262
    """
paul@9 263
    Parse the resource data found through the use of the 'stream_or_string',
paul@9 264
    which is either a stream providing Unicode data (the codecs module can be
paul@9 265
    used to open files or to wrap streams in order to provide Unicode data) or a
paul@9 266
    filename identifying a file to be parsed.
paul@4 267
paul@11 268
    The optional 'encoding' can be used to specify the character encoding used
paul@11 269
    by the file to be parsed.
paul@11 270
paul@4 271
    The optional 'non_standard_newline' can be set to a true value (unlike the
paul@4 272
    default) in order to attempt to process files with CR as the end of line
paul@4 273
    character.
paul@4 274
paul@4 275
    As a result of parsing the resource, the root node of the imported resource
paul@4 276
    is returned.
paul@4 277
    """
paul@4 278
paul@11 279
    return vContent.parse(stream_or_string, encoding, non_standard_newline, vCalendarParser)
paul@5 280
paul@11 281
def iterparse(stream_or_string, encoding=None, non_standard_newline=0):
paul@5 282
paul@5 283
    """
paul@9 284
    Parse the resource data found through the use of the 'stream_or_string',
paul@9 285
    which is either a stream providing Unicode data (the codecs module can be
paul@9 286
    used to open files or to wrap streams in order to provide Unicode data) or a
paul@9 287
    filename identifying a file to be parsed.
paul@5 288
paul@11 289
    The optional 'encoding' can be used to specify the character encoding used
paul@11 290
    by the file to be parsed.
paul@11 291
paul@5 292
    The optional 'non_standard_newline' can be set to a true value (unlike the
paul@5 293
    default) in order to attempt to process files with CR as the end of line
paul@5 294
    character.
paul@5 295
paul@5 296
    An iterator is returned which provides event tuples describing parsing
paul@5 297
    events of the form (name, parameters, value).
paul@5 298
    """
paul@5 299
paul@11 300
    return vContent.iterparse(stream_or_string, encoding, non_standard_newline, vCalendarStreamParser)
paul@11 301
paul@21 302
def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None):
paul@4 303
paul@11 304
    """
paul@21 305
    Return a writer which will either send data to the resource found through
paul@21 306
    the use of 'stream_or_string' or using the given 'write' operation.
paul@21 307
paul@21 308
    The 'stream_or_string' parameter may be either a stream accepting Unicode
paul@21 309
    data (the codecs module can be used to open files or to wrap streams in
paul@21 310
    order to accept Unicode data) or a filename identifying a file to be
paul@21 311
    written.
paul@11 312
paul@11 313
    The optional 'encoding' can be used to specify the character encoding used
paul@11 314
    by the file to be written.
paul@11 315
paul@11 316
    The optional 'line_length' can be used to specify how long lines should be
paul@11 317
    in the resulting data.
paul@11 318
    """
paul@11 319
paul@21 320
    return vContent.iterwrite(stream_or_string, write, encoding, line_length, vCalendarStreamWriter)
paul@8 321
paul@76 322
def to_dict(node):
paul@76 323
paul@76 324
    "Return the 'node' converted to a dictionary representation."
paul@76 325
paul@76 326
    return vContent.to_dict(node, SECTION_TYPES)
paul@76 327
paul@40 328
to_node = vContent.to_node
paul@40 329
paul@4 330
# vim: tabstop=4 expandtab shiftwidth=4