vContent

Annotated vContent.py

16:ee0e3f1920e2
2011-07-21 Paul Boddie Changed the RFC URLs to point to an active site.
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Parsing of vCard, vCalendar and iCalendar files.
paul@0 5
paul@11 6
Copyright (C) 2005, 2006, 2007, 2008, 2009 Paul Boddie <paul@boddie.org.uk>
paul@0 7
paul@0 8
This program is free software; you can redistribute it and/or modify it under
paul@14 9
the terms of the GNU General Public License as published by the Free Software
paul@14 10
Foundation; either version 3 of the License, or (at your option) any later
paul@14 11
version.
paul@0 12
paul@0 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@14 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 16
details.
paul@0 17
paul@14 18
You should have received a copy of the GNU General Public License along with
paul@14 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 20
paul@0 21
--------
paul@0 22
paul@0 23
References:
paul@0 24
paul@16 25
RFC 5545: Internet Calendaring and Scheduling Core Object Specification
paul@16 26
          (iCalendar)
paul@16 27
          http://www.rfc-editor.org/rfc/rfc5545.txt
paul@16 28
paul@0 29
RFC 2445: Internet Calendaring and Scheduling Core Object Specification
paul@0 30
          (iCalendar)
paul@16 31
          http://www.rfc-editor.org/rfc/rfc2445.txt
paul@0 32
paul@0 33
RFC 2425: A MIME Content-Type for Directory Information
paul@16 34
          http://www.rfc-editor.org/rfc/rfc2425.txt
paul@0 35
paul@0 36
RFC 2426: vCard MIME Directory Profile
paul@16 37
          http://www.rfc-editor.org/rfc/rfc2426.txt
paul@0 38
"""
paul@0 39
paul@4 40
try:
paul@4 41
    set
paul@4 42
except NameError:
paul@4 43
    from sets import Set as set
paul@4 44
paul@0 45
# Encoding-related imports.
paul@0 46
paul@0 47
import base64, quopri
paul@9 48
import codecs
paul@0 49
paul@4 50
# Tokenisation help.
paul@4 51
paul@4 52
import re
paul@4 53
paul@9 54
# Configuration.
paul@9 55
paul@9 56
default_encoding = "utf-8"
paul@9 57
paul@7 58
# Reader and parser classes.
paul@0 59
paul@0 60
class Reader:
paul@0 61
paul@0 62
    "A simple class wrapping a file, providing simple pushback capabilities."
paul@0 63
paul@0 64
    def __init__(self, f, non_standard_newline=0):
paul@0 65
paul@0 66
        """
paul@0 67
        Initialise the object with the file 'f'. If 'non_standard_newline' is
paul@0 68
        set to a true value (unlike the default), lines ending with CR will be
paul@0 69
        treated as complete lines.
paul@0 70
        """
paul@0 71
paul@0 72
        self.f = f
paul@0 73
        self.non_standard_newline = non_standard_newline
paul@0 74
        self.lines = []
paul@8 75
        self.line_number = 1 # about to read line 1
paul@0 76
paul@9 77
    def close(self):
paul@9 78
paul@9 79
        "Close the reader."
paul@9 80
paul@9 81
        self.f.close()
paul@9 82
paul@0 83
    def pushback(self, line):
paul@0 84
paul@0 85
        """
paul@0 86
        Push the given 'line' back so that the next line read is actually the
paul@0 87
        given 'line' and not the next line from the underlying file.
paul@0 88
        """
paul@0 89
paul@0 90
        self.lines.append(line)
paul@0 91
        self.line_number -= 1
paul@0 92
paul@0 93
    def readline(self):
paul@0 94
paul@0 95
        """
paul@0 96
        If no pushed-back lines exist, read a line directly from the file.
paul@0 97
        Otherwise, read from the list of pushed-back lines.
paul@0 98
        """
paul@0 99
paul@0 100
        self.line_number += 1
paul@0 101
        if self.lines:
paul@0 102
            return self.lines.pop()
paul@0 103
        else:
paul@11 104
            # Sanity check for broken lines (\r instead of \r\n or \n).
paul@0 105
            line = self.f.readline()
paul@0 106
            while line.endswith("\r") and not self.non_standard_newline:
paul@0 107
                line += self.f.readline()
paul@0 108
            if line.endswith("\r") and self.non_standard_newline:
paul@0 109
                return line + "\n"
paul@0 110
            else:
paul@0 111
                return line
paul@0 112
paul@8 113
    def read_content_line(self):
paul@0 114
paul@0 115
        """
paul@8 116
        Read an entire content line, itself potentially consisting of many
paul@11 117
        physical lines of text, returning a string.
paul@0 118
        """
paul@0 119
paul@9 120
        # Skip blank lines.
paul@9 121
paul@8 122
        line = self.readline()
paul@9 123
        while line:
paul@9 124
            line_stripped = line.rstrip("\r\n")
paul@9 125
            if not line_stripped:
paul@9 126
                line = self.readline()
paul@9 127
            else:
paul@9 128
                break
paul@9 129
        else:
paul@9 130
            return ""
paul@0 131
paul@8 132
        # Strip all appropriate whitespace from the right end of each line.
paul@8 133
        # For subsequent lines, remove the first whitespace character.
paul@8 134
        # See section 4.1 of the iCalendar specification.
paul@8 135
paul@9 136
        lines = [line_stripped]
paul@0 137
paul@0 138
        line = self.readline()
paul@8 139
        while line.startswith(" ") or line.startswith("\t"):
paul@8 140
            lines.append(line[1:].rstrip("\r\n"))
paul@8 141
            line = self.readline()
paul@8 142
paul@8 143
        # Since one line too many will have been read, push the line back into
paul@8 144
        # the file.
paul@8 145
paul@8 146
        if line:
paul@8 147
            self.pushback(line)
paul@8 148
paul@8 149
        return "".join(lines)
paul@8 150
paul@8 151
    def get_content_line(self):
paul@8 152
paul@8 153
        "Return a content line object for the current line."
paul@8 154
paul@8 155
        return ContentLine(self.read_content_line())
paul@8 156
paul@8 157
class ContentLine:
paul@8 158
paul@8 159
    "A content line which can be searched."
paul@8 160
paul@8 161
    SEPARATORS = re.compile('[;:"]')
paul@8 162
    SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]')
paul@8 163
paul@8 164
    def __init__(self, text):
paul@8 165
        self.text = text
paul@8 166
        self.start = 0
paul@8 167
paul@8 168
    def get_remaining(self):
paul@8 169
paul@8 170
        "Get the remaining text from the content line."
paul@8 171
paul@8 172
        return self.text[self.start:]
paul@8 173
paul@8 174
    def search(self, targets):
paul@8 175
paul@8 176
        """
paul@8 177
        Find one of the 'targets' in the text, returning the string from the
paul@8 178
        current position up to the target found, along with the target string,
paul@8 179
        using a tuple of the form (string, target). If no target was found,
paul@8 180
        return the entire string together with a target of None.
paul@11 181
paul@11 182
        The 'targets' parameter must be a regular expression object or an object
paul@11 183
        compatible with the API of such objects.
paul@8 184
        """
paul@8 185
paul@8 186
        text = self.text
paul@8 187
        start = pos = self.start
paul@8 188
        length = len(text)
paul@0 189
paul@4 190
        # Remember the first target.
paul@4 191
paul@4 192
        first = None
paul@4 193
        first_pos = None
paul@4 194
        in_quoted_region = 0
paul@0 195
paul@8 196
        # Process the text, looking for the targets.
paul@4 197
paul@8 198
        while pos < length:
paul@8 199
            match = targets.search(text, pos)
paul@4 200
paul@8 201
            # Where nothing matches, end the search.
paul@0 202
paul@4 203
            if match is None:
paul@8 204
                pos = length
paul@0 205
paul@4 206
            # Where a double quote matches, toggle the region state.
paul@0 207
paul@4 208
            elif match.group() == '"':
paul@4 209
                in_quoted_region = not in_quoted_region
paul@8 210
                pos = match.end()
paul@4 211
paul@4 212
            # Where something else matches outside a region, stop searching.
paul@0 213
paul@4 214
            elif not in_quoted_region:
paul@4 215
                first = match.group()
paul@4 216
                first_pos = match.start()
paul@4 217
                break
paul@0 218
paul@4 219
            # Otherwise, keep looking for the end of the region.
paul@4 220
paul@4 221
            else:
paul@8 222
                pos = match.end()
paul@4 223
paul@4 224
        # Where no more input can provide the targets, return a special result.
paul@0 225
paul@4 226
        else:
paul@8 227
            self.start = length
paul@8 228
            return text[start:], None
paul@0 229
paul@8 230
        self.start = match.end()
paul@8 231
        return text[start:first_pos], first
paul@0 232
paul@0 233
class StreamParser:
paul@0 234
paul@0 235
    "A stream parser for content in vCard/vCalendar/iCalendar-like formats."
paul@0 236
paul@0 237
    def __init__(self, f):
paul@0 238
paul@0 239
        "Initialise the parser for the given file 'f'."
paul@0 240
paul@0 241
        self.f = f
paul@0 242
paul@9 243
    def close(self):
paul@9 244
paul@9 245
        "Close the reader."
paul@9 246
paul@9 247
        self.f.close()
paul@9 248
paul@0 249
    def __iter__(self):
paul@0 250
paul@0 251
        "Return self as the iterator."
paul@0 252
paul@0 253
        return self
paul@0 254
paul@0 255
    def next(self):
paul@0 256
paul@0 257
        """
paul@0 258
        Return the next content item in the file as a tuple of the form
paul@0 259
        (name, parameters, values).
paul@0 260
        """
paul@0 261
paul@0 262
        return self.parse_content_line()
paul@0 263
paul@7 264
    def decode_content(self, value):
paul@7 265
paul@7 266
        "Decode the given 'value', replacing quoted characters."
paul@7 267
paul@7 268
        return value.replace("\r", "").replace("\\N", "\n").replace("\\n", "\n")
paul@7 269
paul@5 270
    # Internal methods.
paul@5 271
paul@0 272
    def parse_content_line(self):
paul@0 273
paul@0 274
        """
paul@7 275
        Return the name, parameters and value information for the current
paul@7 276
        content line in the file being parsed.
paul@0 277
        """
paul@0 278
paul@0 279
        f = self.f
paul@8 280
        line_number = f.line_number
paul@8 281
        line = f.get_content_line()
paul@0 282
paul@8 283
        # Read the property name.
paul@0 284
paul@8 285
        name, sep = line.search(line.SEPARATORS)
paul@0 286
        name = name.strip()
paul@0 287
paul@0 288
        if not name and sep is None:
paul@0 289
            raise StopIteration
paul@0 290
paul@8 291
        # Read the parameters.
paul@8 292
paul@8 293
        parameters = {}
paul@8 294
paul@0 295
        while sep == ";":
paul@0 296
paul@0 297
            # Find the actual modifier.
paul@0 298
paul@8 299
            parameter_name, sep = line.search(line.SEPARATORS_PLUS_EQUALS)
paul@0 300
            parameter_name = parameter_name.strip()
paul@0 301
paul@0 302
            if sep == "=":
paul@8 303
                parameter_value, sep = line.search(line.SEPARATORS)
paul@0 304
                parameter_value = parameter_value.strip()
paul@0 305
            else:
paul@0 306
                parameter_value = None
paul@0 307
paul@0 308
            # Append a key, value tuple to the parameters list.
paul@0 309
paul@0 310
            parameters[parameter_name] = parameter_value
paul@0 311
paul@0 312
        # Get the value content.
paul@0 313
paul@0 314
        if sep != ":":
paul@8 315
            raise ValueError, line_number
paul@0 316
paul@8 317
        # Obtain and decode the value.
paul@0 318
paul@8 319
        value = self.decode(name, parameters, line.get_remaining())
paul@0 320
paul@0 321
        return name, parameters, value
paul@0 322
paul@7 323
    def decode(self, name, parameters, value):
paul@1 324
paul@7 325
        "Decode using 'name' and 'parameters' the given 'value'."
paul@0 326
paul@1 327
        encoding = parameters.get("ENCODING")
paul@1 328
        charset = parameters.get("CHARSET")
paul@0 329
paul@7 330
        value = self.decode_content(value)
paul@0 331
paul@0 332
        if encoding == "QUOTED-PRINTABLE":
paul@1 333
            return unicode(quopri.decodestring(value), charset or "iso-8859-1")
paul@0 334
        elif encoding == "BASE64":
paul@0 335
            return base64.decodestring(value)
paul@0 336
        else:
paul@1 337
            return value
paul@0 338
paul@2 339
class ParserBase:
paul@0 340
paul@2 341
    "An abstract parser for content in vCard/vCalendar/iCalendar-like formats."
paul@0 342
paul@0 343
    def __init__(self):
paul@0 344
paul@0 345
        "Initialise the parser."
paul@0 346
paul@2 347
        self.names = []
paul@0 348
paul@5 349
    def parse(self, f, parser_cls=None):
paul@0 350
paul@0 351
        "Parse the contents of the file 'f'."
paul@0 352
paul@5 353
        parser = (parser_cls or StreamParser)(f)
paul@0 354
paul@0 355
        for name, parameters, value in parser:
paul@0 356
paul@0 357
            if name == "BEGIN":
paul@2 358
                self.names.append(value)
paul@3 359
                self.startComponent(value, parameters)
paul@0 360
paul@0 361
            elif name == "END":
paul@2 362
                start_name = self.names.pop()
paul@2 363
                if start_name != value:
paul@0 364
                    raise ParseError, "Mismatch in BEGIN and END declarations (%r and %r) at line %d." % (
paul@2 365
                        start_name, value, f.line_number)
paul@2 366
paul@3 367
                self.endComponent(value)
paul@0 368
paul@0 369
            else:
paul@3 370
                self.handleProperty(name, parameters, value)
paul@2 371
paul@2 372
class Parser(ParserBase):
paul@2 373
paul@2 374
    "A SAX-like parser for vCard/vCalendar/iCalendar-like formats."
paul@2 375
paul@2 376
    def __init__(self):
paul@2 377
        ParserBase.__init__(self)
paul@3 378
        self.components = []
paul@2 379
paul@3 380
    def startComponent(self, name, parameters):
paul@2 381
paul@2 382
        """
paul@3 383
        Add the component with the given 'name' and 'parameters', recording an
paul@3 384
        empty list of children as part of the component's content.
paul@2 385
        """
paul@2 386
paul@12 387
        component = self.handleProperty(name, parameters)
paul@3 388
        self.components.append(component)
paul@3 389
        return component
paul@2 390
paul@3 391
    def endComponent(self, name):
paul@2 392
paul@2 393
        """
paul@3 394
        End the component with the given 'name' by removing it from the active
paul@12 395
        component stack. If only one component exists on the stack, retain it
paul@12 396
        for later inspection.
paul@2 397
        """
paul@2 398
paul@3 399
        if len(self.components) > 1:
paul@3 400
            return self.components.pop()
paul@12 401
paul@12 402
        # Or return the only element.
paul@12 403
paul@3 404
        elif self.components:
paul@12 405
            return self.components[0]
paul@2 406
paul@12 407
    def handleProperty(self, name, parameters, value=None):
paul@0 408
paul@2 409
        """
paul@12 410
        Record the property with the given 'name', 'parameters' and optional
paul@12 411
        'value' as part of the current component's children.
paul@2 412
        """
paul@2 413
paul@2 414
        component = self.makeComponent(name, parameters, value)
paul@2 415
        self.attachComponent(component)
paul@2 416
        return component
paul@2 417
paul@2 418
    # Component object construction/manipulation methods.
paul@2 419
paul@2 420
    def attachComponent(self, component):
paul@2 421
paul@2 422
        "Attach the given 'component' to its parent."
paul@2 423
paul@3 424
        if self.components:
paul@3 425
            component_name, component_parameters, component_children = self.components[-1]
paul@3 426
            component_children.append(component)
paul@2 427
paul@12 428
    def makeComponent(self, name, parameters, value=None):
paul@2 429
paul@2 430
        """
paul@12 431
        Make a component object from the given 'name', 'parameters' and optional
paul@12 432
        'value'.
paul@2 433
        """
paul@2 434
paul@12 435
        return (name, parameters, value or [])
paul@2 436
paul@2 437
    # Public methods.
paul@2 438
paul@5 439
    def parse(self, f, parser_cls=None):
paul@2 440
paul@2 441
        "Parse the contents of the file 'f'."
paul@2 442
paul@5 443
        ParserBase.parse(self, f, parser_cls)
paul@3 444
        return self.components[0]
paul@0 445
paul@7 446
# Writer classes.
paul@7 447
paul@8 448
class Writer:
paul@8 449
paul@8 450
    "A simple class wrapping a file, providing simple output capabilities."
paul@8 451
paul@8 452
    default_line_length = 76
paul@8 453
paul@8 454
    def __init__(self, f, line_length=None):
paul@8 455
paul@8 456
        """
paul@8 457
        Initialise the object with the file 'f'. If 'line_length' is set, the
paul@8 458
        length of written lines will conform to the specified value instead of
paul@8 459
        the default value. 
paul@8 460
        """
paul@8 461
paul@8 462
        self.f = f
paul@8 463
        self.line_length = line_length or self.default_line_length
paul@8 464
        self.char_offset = 0
paul@8 465
paul@9 466
    def close(self):
paul@9 467
paul@9 468
        "Close the writer."
paul@9 469
paul@9 470
        self.f.close()
paul@9 471
paul@8 472
    def write(self, text):
paul@8 473
paul@8 474
        "Write the 'text' to the file."
paul@8 475
paul@8 476
        f = self.f
paul@8 477
        line_length = self.line_length
paul@8 478
paul@8 479
        i = 0
paul@8 480
        remaining = len(text)
paul@8 481
paul@8 482
        while remaining:
paul@8 483
            space = line_length - self.char_offset
paul@8 484
            if remaining > space:
paul@8 485
                f.write(text[i:i + space])
paul@8 486
                f.write("\r\n ")
paul@8 487
                self.char_offset = 1
paul@8 488
                i += space
paul@8 489
                remaining -= space
paul@8 490
            else:
paul@8 491
                f.write(text[i:])
paul@8 492
                self.char_offset += remaining
paul@8 493
                i += remaining
paul@8 494
                remaining = 0
paul@8 495
paul@8 496
    def end_line(self):
paul@8 497
paul@8 498
        "End the current content line."
paul@8 499
paul@8 500
        if self.char_offset > 0:
paul@8 501
            self.char_offset = 0
paul@8 502
            self.f.write("\r\n")
paul@8 503
paul@7 504
class StreamWriter:
paul@7 505
paul@7 506
    "A stream writer for content in vCard/vCalendar/iCalendar-like formats."
paul@7 507
paul@8 508
    def __init__(self, f):
paul@7 509
paul@7 510
        "Initialise the parser for the given file 'f'."
paul@7 511
paul@7 512
        self.f = f
paul@7 513
paul@9 514
    def close(self):
paul@9 515
paul@9 516
        "Close the writer."
paul@9 517
paul@9 518
        self.f.close()
paul@9 519
paul@11 520
    def write(self, name, parameters, value):
paul@7 521
paul@7 522
        """
paul@11 523
        Write a content line, serialising the given 'name', 'parameters' and
paul@11 524
        'value' information.
paul@11 525
        """
paul@11 526
paul@11 527
        self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value))
paul@11 528
paul@11 529
    # Internal methods.
paul@11 530
paul@11 531
    def write_content_line(self, name, encoded_parameters, encoded_value):
paul@11 532
paul@11 533
        """
paul@11 534
        Write a content line for the given 'name', 'encoded_parameters' and
paul@11 535
        'encoded_value' information.
paul@7 536
        """
paul@7 537
paul@7 538
        f = self.f
paul@7 539
paul@7 540
        f.write(name)
paul@11 541
        for param_name, param_value in encoded_parameters.items():
paul@8 542
            f.write(";")
paul@11 543
            f.write(param_name)
paul@8 544
            f.write("=")
paul@11 545
            f.write(param_value)
paul@7 546
        f.write(":")
paul@11 547
        f.write(encoded_value)
paul@8 548
        f.end_line()
paul@7 549
paul@11 550
    def encode_quoted_parameter_value(self, value):
paul@7 551
paul@11 552
        "Encode the given 'value'."
paul@7 553
paul@11 554
        return '"%s"' % value
paul@7 555
paul@11 556
    def encode_value(self, name, parameters, value):
paul@7 557
paul@11 558
        """
paul@11 559
        Encode using 'name' and 'parameters' the given 'value' so that the
paul@11 560
        resulting encoded form employs any specified character encodings.
paul@11 561
        """
paul@7 562
paul@7 563
        encoding = parameters.get("ENCODING")
paul@7 564
        charset = parameters.get("CHARSET")
paul@7 565
paul@7 566
        if encoding == "QUOTED-PRINTABLE":
paul@7 567
            value = quopri.encodestring(value.encode(charset or "iso-8859-1"))
paul@7 568
        elif encoding == "BASE64":
paul@7 569
            value = base64.encodestring(value)
paul@7 570
paul@7 571
        return self.encode_content(value)
paul@7 572
paul@11 573
    # Overrideable methods.
paul@11 574
paul@11 575
    def encode_parameters(self, parameters):
paul@11 576
paul@11 577
        """
paul@11 578
        Encode the given 'parameters' according to the vCalendar specification.
paul@11 579
        """
paul@11 580
paul@11 581
        encoded_parameters = {}
paul@11 582
paul@11 583
        for param_name, param_value in parameters.items():
paul@11 584
paul@11 585
            # Basic format support merely involves quoting values which seem to
paul@11 586
            # need it. Other more specific formats may define exactly which
paul@11 587
            # parameters should be quoted.
paul@11 588
paul@11 589
            if ContentLine.SEPARATORS.search(param_value):
paul@11 590
                param_value = self.encode_quoted_parameter_value(param_value)
paul@11 591
paul@11 592
            encoded_parameters[param_name] = param_value
paul@11 593
paul@11 594
        return encoded_parameters
paul@11 595
paul@11 596
    def encode_content(self, value):
paul@11 597
paul@11 598
        "Encode the given 'value', quoting characters."
paul@11 599
paul@11 600
        return value.replace("\n", "\\n")
paul@11 601
paul@9 602
# Utility functions.
paul@9 603
paul@9 604
def is_input_stream(stream_or_string):
paul@9 605
    return hasattr(stream_or_string, "read")
paul@9 606
paul@11 607
def get_input_stream(stream_or_string, encoding=None):
paul@9 608
    if is_input_stream(stream_or_string):
paul@9 609
        return stream_or_string
paul@9 610
    else:
paul@11 611
        return codecs.open(stream_or_string, encoding=(encoding or default_encoding))
paul@9 612
paul@11 613
def get_output_stream(stream_or_string, encoding=None):
paul@9 614
    if hasattr(stream_or_string, "write"):
paul@9 615
        return stream_or_string
paul@9 616
    else:
paul@11 617
        return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding))
paul@9 618
paul@0 619
# Public functions.
paul@0 620
paul@11 621
def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
paul@0 622
paul@0 623
    """
paul@9 624
    Parse the resource data found through the use of the 'stream_or_string',
paul@9 625
    which is either a stream providing Unicode data (the codecs module can be
paul@9 626
    used to open files or to wrap streams in order to provide Unicode data) or a
paul@9 627
    filename identifying a file to be parsed.
paul@0 628
paul@11 629
    The optional 'encoding' can be used to specify the character encoding used
paul@11 630
    by the file to be parsed.
paul@11 631
paul@0 632
    The optional 'non_standard_newline' can be set to a true value (unlike the
paul@0 633
    default) in order to attempt to process files with CR as the end of line
paul@0 634
    character.
paul@0 635
paul@0 636
    As a result of parsing the resource, the root node of the imported resource
paul@0 637
    is returned.
paul@0 638
    """
paul@0 639
paul@11 640
    stream = get_input_stream(stream_or_string, encoding)
paul@9 641
    reader = Reader(stream, non_standard_newline)
paul@9 642
paul@9 643
    # Parse using the reader.
paul@0 644
paul@9 645
    try:
paul@9 646
        parser = (parser_cls or Parser)()
paul@9 647
        return parser.parse(reader)
paul@9 648
paul@9 649
    # Close any opened streams.
paul@9 650
paul@9 651
    finally:
paul@9 652
        if not is_input_stream(stream_or_string):
paul@9 653
            reader.close()
paul@9 654
paul@11 655
def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
paul@5 656
paul@5 657
    """
paul@9 658
    Parse the resource data found through the use of the 'stream_or_string',
paul@9 659
    which is either a stream providing Unicode data (the codecs module can be
paul@9 660
    used to open files or to wrap streams in order to provide Unicode data) or a
paul@9 661
    filename identifying a file to be parsed.
paul@5 662
paul@11 663
    The optional 'encoding' can be used to specify the character encoding used
paul@11 664
    by the file to be parsed.
paul@11 665
paul@5 666
    The optional 'non_standard_newline' can be set to a true value (unlike the
paul@5 667
    default) in order to attempt to process files with CR as the end of line
paul@5 668
    character.
paul@5 669
paul@5 670
    An iterator is returned which provides event tuples describing parsing
paul@5 671
    events of the form (name, parameters, value).
paul@5 672
    """
paul@5 673
paul@11 674
    stream = get_input_stream(stream_or_string, encoding)
paul@9 675
    reader = Reader(stream, non_standard_newline)
paul@5 676
    parser = (parser_cls or StreamParser)(reader)
paul@9 677
    return parser
paul@5 678
paul@11 679
def iterwrite(stream_or_string, encoding=None, line_length=None, writer_cls=None):
paul@11 680
paul@11 681
    """
paul@11 682
    Return a writer which will send data to the resource found through the use
paul@11 683
    of 'stream_or_string', which is either a stream accepting Unicode data (the
paul@11 684
    codecs module can be used to open files or to wrap streams in order to
paul@11 685
    accept Unicode data) or a filename identifying a file to be parsed.
paul@11 686
paul@11 687
    The optional 'encoding' can be used to specify the character encoding used
paul@11 688
    by the file to be written.
paul@11 689
paul@11 690
    The optional 'line_length' can be used to specify how long lines should be
paul@11 691
    in the resulting data.
paul@11 692
    """
paul@11 693
paul@11 694
    stream = get_output_stream(stream_or_string, encoding)
paul@9 695
    _writer = Writer(stream, line_length)
paul@8 696
    writer = (writer_cls or StreamWriter)(_writer)
paul@8 697
    return writer
paul@8 698
paul@0 699
# vim: tabstop=4 expandtab shiftwidth=4