vContent

Annotated vContent.py

40:bfb1e0c93471
2014-09-24 Paul Boddie Moved representation conversion functions into vContent.
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Parsing of vCard, vCalendar and iCalendar files.
paul@0 5
paul@39 6
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011, 2013,
paul@39 7
              2014 Paul Boddie <paul@boddie.org.uk>
paul@0 8
paul@0 9
This program is free software; you can redistribute it and/or modify it under
paul@14 10
the terms of the GNU General Public License as published by the Free Software
paul@14 11
Foundation; either version 3 of the License, or (at your option) any later
paul@14 12
version.
paul@0 13
paul@0 14
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 15
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@14 16
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 17
details.
paul@0 18
paul@14 19
You should have received a copy of the GNU General Public License along with
paul@14 20
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 21
paul@0 22
--------
paul@0 23
paul@0 24
References:
paul@0 25
paul@16 26
RFC 5545: Internet Calendaring and Scheduling Core Object Specification
paul@16 27
          (iCalendar)
paul@18 28
          http://tools.ietf.org/html/rfc5545
paul@16 29
paul@0 30
RFC 2445: Internet Calendaring and Scheduling Core Object Specification
paul@0 31
          (iCalendar)
paul@18 32
          http://tools.ietf.org/html/rfc2445
paul@0 33
paul@0 34
RFC 2425: A MIME Content-Type for Directory Information
paul@18 35
          http://tools.ietf.org/html/rfc2425
paul@0 36
paul@0 37
RFC 2426: vCard MIME Directory Profile
paul@18 38
          http://tools.ietf.org/html/rfc2426
paul@0 39
"""
paul@0 40
paul@4 41
try:
paul@4 42
    set
paul@4 43
except NameError:
paul@4 44
    from sets import Set as set
paul@4 45
paul@0 46
# Encoding-related imports.
paul@0 47
paul@0 48
import base64, quopri
paul@9 49
import codecs
paul@0 50
paul@4 51
# Tokenisation help.
paul@4 52
paul@4 53
import re
paul@4 54
paul@9 55
# Configuration.
paul@9 56
paul@9 57
default_encoding = "utf-8"
paul@9 58
paul@39 59
class ParseError(Exception):
paul@39 60
paul@39 61
    "General parsing errors."
paul@39 62
paul@39 63
    pass
paul@39 64
paul@7 65
# Reader and parser classes.
paul@0 66
paul@0 67
class Reader:
paul@0 68
paul@0 69
    "A simple class wrapping a file, providing simple pushback capabilities."
paul@0 70
paul@0 71
    def __init__(self, f, non_standard_newline=0):
paul@0 72
paul@0 73
        """
paul@0 74
        Initialise the object with the file 'f'. If 'non_standard_newline' is
paul@0 75
        set to a true value (unlike the default), lines ending with CR will be
paul@0 76
        treated as complete lines.
paul@0 77
        """
paul@0 78
paul@0 79
        self.f = f
paul@0 80
        self.non_standard_newline = non_standard_newline
paul@0 81
        self.lines = []
paul@8 82
        self.line_number = 1 # about to read line 1
paul@0 83
paul@9 84
    def close(self):
paul@9 85
paul@9 86
        "Close the reader."
paul@9 87
paul@9 88
        self.f.close()
paul@9 89
paul@0 90
    def pushback(self, line):
paul@0 91
paul@0 92
        """
paul@0 93
        Push the given 'line' back so that the next line read is actually the
paul@0 94
        given 'line' and not the next line from the underlying file.
paul@0 95
        """
paul@0 96
paul@0 97
        self.lines.append(line)
paul@0 98
        self.line_number -= 1
paul@0 99
paul@0 100
    def readline(self):
paul@0 101
paul@0 102
        """
paul@0 103
        If no pushed-back lines exist, read a line directly from the file.
paul@0 104
        Otherwise, read from the list of pushed-back lines.
paul@0 105
        """
paul@0 106
paul@0 107
        self.line_number += 1
paul@0 108
        if self.lines:
paul@0 109
            return self.lines.pop()
paul@0 110
        else:
paul@11 111
            # Sanity check for broken lines (\r instead of \r\n or \n).
paul@0 112
            line = self.f.readline()
paul@0 113
            while line.endswith("\r") and not self.non_standard_newline:
paul@31 114
                s = self.f.readline()
paul@31 115
                if not s:
paul@31 116
                    break
paul@31 117
                line += s
paul@0 118
            if line.endswith("\r") and self.non_standard_newline:
paul@0 119
                return line + "\n"
paul@0 120
            else:
paul@0 121
                return line
paul@0 122
paul@8 123
    def read_content_line(self):
paul@0 124
paul@0 125
        """
paul@8 126
        Read an entire content line, itself potentially consisting of many
paul@11 127
        physical lines of text, returning a string.
paul@0 128
        """
paul@0 129
paul@9 130
        # Skip blank lines.
paul@9 131
paul@8 132
        line = self.readline()
paul@9 133
        while line:
paul@9 134
            line_stripped = line.rstrip("\r\n")
paul@9 135
            if not line_stripped:
paul@9 136
                line = self.readline()
paul@9 137
            else:
paul@9 138
                break
paul@9 139
        else:
paul@9 140
            return ""
paul@0 141
paul@8 142
        # Strip all appropriate whitespace from the right end of each line.
paul@8 143
        # For subsequent lines, remove the first whitespace character.
paul@8 144
        # See section 4.1 of the iCalendar specification.
paul@8 145
paul@9 146
        lines = [line_stripped]
paul@0 147
paul@0 148
        line = self.readline()
paul@8 149
        while line.startswith(" ") or line.startswith("\t"):
paul@8 150
            lines.append(line[1:].rstrip("\r\n"))
paul@8 151
            line = self.readline()
paul@8 152
paul@8 153
        # Since one line too many will have been read, push the line back into
paul@8 154
        # the file.
paul@8 155
paul@8 156
        if line:
paul@8 157
            self.pushback(line)
paul@8 158
paul@8 159
        return "".join(lines)
paul@8 160
paul@8 161
    def get_content_line(self):
paul@8 162
paul@8 163
        "Return a content line object for the current line."
paul@8 164
paul@8 165
        return ContentLine(self.read_content_line())
paul@8 166
paul@8 167
class ContentLine:
paul@8 168
paul@8 169
    "A content line which can be searched."
paul@8 170
paul@8 171
    SEPARATORS = re.compile('[;:"]')
paul@8 172
    SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]')
paul@8 173
paul@8 174
    def __init__(self, text):
paul@8 175
        self.text = text
paul@8 176
        self.start = 0
paul@8 177
paul@30 178
    def __repr__(self):
paul@30 179
        return "ContentLine(%r)" % self.text
paul@30 180
paul@8 181
    def get_remaining(self):
paul@8 182
paul@8 183
        "Get the remaining text from the content line."
paul@8 184
paul@8 185
        return self.text[self.start:]
paul@8 186
paul@8 187
    def search(self, targets):
paul@8 188
paul@8 189
        """
paul@8 190
        Find one of the 'targets' in the text, returning the string from the
paul@8 191
        current position up to the target found, along with the target string,
paul@8 192
        using a tuple of the form (string, target). If no target was found,
paul@8 193
        return the entire string together with a target of None.
paul@11 194
paul@11 195
        The 'targets' parameter must be a regular expression object or an object
paul@11 196
        compatible with the API of such objects.
paul@8 197
        """
paul@8 198
paul@8 199
        text = self.text
paul@8 200
        start = pos = self.start
paul@8 201
        length = len(text)
paul@0 202
paul@4 203
        # Remember the first target.
paul@4 204
paul@4 205
        first = None
paul@4 206
        first_pos = None
paul@4 207
        in_quoted_region = 0
paul@0 208
paul@8 209
        # Process the text, looking for the targets.
paul@4 210
paul@8 211
        while pos < length:
paul@8 212
            match = targets.search(text, pos)
paul@4 213
paul@8 214
            # Where nothing matches, end the search.
paul@0 215
paul@4 216
            if match is None:
paul@8 217
                pos = length
paul@0 218
paul@4 219
            # Where a double quote matches, toggle the region state.
paul@0 220
paul@4 221
            elif match.group() == '"':
paul@4 222
                in_quoted_region = not in_quoted_region
paul@8 223
                pos = match.end()
paul@4 224
paul@4 225
            # Where something else matches outside a region, stop searching.
paul@0 226
paul@4 227
            elif not in_quoted_region:
paul@4 228
                first = match.group()
paul@4 229
                first_pos = match.start()
paul@4 230
                break
paul@0 231
paul@4 232
            # Otherwise, keep looking for the end of the region.
paul@4 233
paul@4 234
            else:
paul@8 235
                pos = match.end()
paul@4 236
paul@4 237
        # Where no more input can provide the targets, return a special result.
paul@0 238
paul@4 239
        else:
paul@8 240
            self.start = length
paul@8 241
            return text[start:], None
paul@0 242
paul@8 243
        self.start = match.end()
paul@8 244
        return text[start:first_pos], first
paul@0 245
paul@0 246
class StreamParser:
paul@0 247
paul@0 248
    "A stream parser for content in vCard/vCalendar/iCalendar-like formats."
paul@0 249
paul@0 250
    def __init__(self, f):
paul@0 251
paul@0 252
        "Initialise the parser for the given file 'f'."
paul@0 253
paul@0 254
        self.f = f
paul@0 255
paul@9 256
    def close(self):
paul@9 257
paul@9 258
        "Close the reader."
paul@9 259
paul@9 260
        self.f.close()
paul@9 261
paul@0 262
    def __iter__(self):
paul@0 263
paul@0 264
        "Return self as the iterator."
paul@0 265
paul@0 266
        return self
paul@0 267
paul@0 268
    def next(self):
paul@0 269
paul@0 270
        """
paul@0 271
        Return the next content item in the file as a tuple of the form
paul@0 272
        (name, parameters, values).
paul@0 273
        """
paul@0 274
paul@0 275
        return self.parse_content_line()
paul@0 276
paul@7 277
    def decode_content(self, value):
paul@7 278
paul@7 279
        "Decode the given 'value', replacing quoted characters."
paul@7 280
paul@7 281
        return value.replace("\r", "").replace("\\N", "\n").replace("\\n", "\n")
paul@7 282
paul@5 283
    # Internal methods.
paul@5 284
paul@0 285
    def parse_content_line(self):
paul@0 286
paul@0 287
        """
paul@7 288
        Return the name, parameters and value information for the current
paul@7 289
        content line in the file being parsed.
paul@0 290
        """
paul@0 291
paul@0 292
        f = self.f
paul@8 293
        line_number = f.line_number
paul@8 294
        line = f.get_content_line()
paul@0 295
paul@8 296
        # Read the property name.
paul@0 297
paul@8 298
        name, sep = line.search(line.SEPARATORS)
paul@0 299
        name = name.strip()
paul@0 300
paul@0 301
        if not name and sep is None:
paul@0 302
            raise StopIteration
paul@0 303
paul@8 304
        # Read the parameters.
paul@8 305
paul@8 306
        parameters = {}
paul@8 307
paul@0 308
        while sep == ";":
paul@0 309
paul@0 310
            # Find the actual modifier.
paul@0 311
paul@8 312
            parameter_name, sep = line.search(line.SEPARATORS_PLUS_EQUALS)
paul@0 313
            parameter_name = parameter_name.strip()
paul@0 314
paul@0 315
            if sep == "=":
paul@8 316
                parameter_value, sep = line.search(line.SEPARATORS)
paul@0 317
                parameter_value = parameter_value.strip()
paul@0 318
            else:
paul@0 319
                parameter_value = None
paul@0 320
paul@0 321
            # Append a key, value tuple to the parameters list.
paul@0 322
paul@0 323
            parameters[parameter_name] = parameter_value
paul@0 324
paul@0 325
        # Get the value content.
paul@0 326
paul@0 327
        if sep != ":":
paul@30 328
            raise ValueError, (line_number, line)
paul@0 329
paul@8 330
        # Obtain and decode the value.
paul@0 331
paul@8 332
        value = self.decode(name, parameters, line.get_remaining())
paul@0 333
paul@0 334
        return name, parameters, value
paul@0 335
paul@7 336
    def decode(self, name, parameters, value):
paul@1 337
paul@7 338
        "Decode using 'name' and 'parameters' the given 'value'."
paul@0 339
paul@1 340
        encoding = parameters.get("ENCODING")
paul@1 341
        charset = parameters.get("CHARSET")
paul@0 342
paul@7 343
        value = self.decode_content(value)
paul@0 344
paul@0 345
        if encoding == "QUOTED-PRINTABLE":
paul@1 346
            return unicode(quopri.decodestring(value), charset or "iso-8859-1")
paul@0 347
        elif encoding == "BASE64":
paul@0 348
            return base64.decodestring(value)
paul@0 349
        else:
paul@1 350
            return value
paul@0 351
paul@2 352
class ParserBase:
paul@0 353
paul@2 354
    "An abstract parser for content in vCard/vCalendar/iCalendar-like formats."
paul@0 355
paul@0 356
    def __init__(self):
paul@0 357
paul@0 358
        "Initialise the parser."
paul@0 359
paul@2 360
        self.names = []
paul@0 361
paul@5 362
    def parse(self, f, parser_cls=None):
paul@0 363
paul@0 364
        "Parse the contents of the file 'f'."
paul@0 365
paul@5 366
        parser = (parser_cls or StreamParser)(f)
paul@0 367
paul@0 368
        for name, parameters, value in parser:
paul@0 369
paul@0 370
            if name == "BEGIN":
paul@2 371
                self.names.append(value)
paul@3 372
                self.startComponent(value, parameters)
paul@0 373
paul@0 374
            elif name == "END":
paul@2 375
                start_name = self.names.pop()
paul@2 376
                if start_name != value:
paul@0 377
                    raise ParseError, "Mismatch in BEGIN and END declarations (%r and %r) at line %d." % (
paul@2 378
                        start_name, value, f.line_number)
paul@2 379
paul@3 380
                self.endComponent(value)
paul@0 381
paul@0 382
            else:
paul@3 383
                self.handleProperty(name, parameters, value)
paul@2 384
paul@2 385
class Parser(ParserBase):
paul@2 386
paul@2 387
    "A SAX-like parser for vCard/vCalendar/iCalendar-like formats."
paul@2 388
paul@2 389
    def __init__(self):
paul@2 390
        ParserBase.__init__(self)
paul@3 391
        self.components = []
paul@2 392
paul@3 393
    def startComponent(self, name, parameters):
paul@2 394
paul@2 395
        """
paul@3 396
        Add the component with the given 'name' and 'parameters', recording an
paul@3 397
        empty list of children as part of the component's content.
paul@2 398
        """
paul@2 399
paul@12 400
        component = self.handleProperty(name, parameters)
paul@3 401
        self.components.append(component)
paul@3 402
        return component
paul@2 403
paul@3 404
    def endComponent(self, name):
paul@2 405
paul@2 406
        """
paul@3 407
        End the component with the given 'name' by removing it from the active
paul@12 408
        component stack. If only one component exists on the stack, retain it
paul@12 409
        for later inspection.
paul@2 410
        """
paul@2 411
paul@3 412
        if len(self.components) > 1:
paul@3 413
            return self.components.pop()
paul@12 414
paul@12 415
        # Or return the only element.
paul@12 416
paul@3 417
        elif self.components:
paul@12 418
            return self.components[0]
paul@2 419
paul@12 420
    def handleProperty(self, name, parameters, value=None):
paul@0 421
paul@2 422
        """
paul@12 423
        Record the property with the given 'name', 'parameters' and optional
paul@12 424
        'value' as part of the current component's children.
paul@2 425
        """
paul@2 426
paul@2 427
        component = self.makeComponent(name, parameters, value)
paul@2 428
        self.attachComponent(component)
paul@2 429
        return component
paul@2 430
paul@2 431
    # Component object construction/manipulation methods.
paul@2 432
paul@2 433
    def attachComponent(self, component):
paul@2 434
paul@2 435
        "Attach the given 'component' to its parent."
paul@2 436
paul@3 437
        if self.components:
paul@3 438
            component_name, component_parameters, component_children = self.components[-1]
paul@3 439
            component_children.append(component)
paul@2 440
paul@12 441
    def makeComponent(self, name, parameters, value=None):
paul@2 442
paul@2 443
        """
paul@12 444
        Make a component object from the given 'name', 'parameters' and optional
paul@12 445
        'value'.
paul@2 446
        """
paul@2 447
paul@12 448
        return (name, parameters, value or [])
paul@2 449
paul@2 450
    # Public methods.
paul@2 451
paul@5 452
    def parse(self, f, parser_cls=None):
paul@2 453
paul@2 454
        "Parse the contents of the file 'f'."
paul@2 455
paul@5 456
        ParserBase.parse(self, f, parser_cls)
paul@3 457
        return self.components[0]
paul@0 458
paul@7 459
# Writer classes.
paul@7 460
paul@8 461
class Writer:
paul@8 462
paul@8 463
    "A simple class wrapping a file, providing simple output capabilities."
paul@8 464
paul@8 465
    default_line_length = 76
paul@8 466
paul@21 467
    def __init__(self, write, line_length=None):
paul@8 468
paul@8 469
        """
paul@21 470
        Initialise the object with the given 'write' operation. If 'line_length'
paul@21 471
        is set, the length of written lines will conform to the specified value
paul@21 472
        instead of the default value. 
paul@8 473
        """
paul@8 474
paul@21 475
        self._write = write
paul@8 476
        self.line_length = line_length or self.default_line_length
paul@8 477
        self.char_offset = 0
paul@8 478
paul@8 479
    def write(self, text):
paul@8 480
paul@8 481
        "Write the 'text' to the file."
paul@8 482
paul@21 483
        write = self._write
paul@8 484
        line_length = self.line_length
paul@8 485
paul@8 486
        i = 0
paul@8 487
        remaining = len(text)
paul@8 488
paul@8 489
        while remaining:
paul@8 490
            space = line_length - self.char_offset
paul@8 491
            if remaining > space:
paul@21 492
                write(text[i:i + space])
paul@21 493
                write("\r\n ")
paul@8 494
                self.char_offset = 1
paul@8 495
                i += space
paul@8 496
                remaining -= space
paul@8 497
            else:
paul@21 498
                write(text[i:])
paul@8 499
                self.char_offset += remaining
paul@8 500
                i += remaining
paul@8 501
                remaining = 0
paul@8 502
paul@8 503
    def end_line(self):
paul@8 504
paul@8 505
        "End the current content line."
paul@8 506
paul@8 507
        if self.char_offset > 0:
paul@8 508
            self.char_offset = 0
paul@21 509
            self._write("\r\n")
paul@8 510
paul@7 511
class StreamWriter:
paul@7 512
paul@7 513
    "A stream writer for content in vCard/vCalendar/iCalendar-like formats."
paul@7 514
paul@8 515
    def __init__(self, f):
paul@7 516
paul@21 517
        "Initialise the stream writer with the given 'f' stream object."
paul@7 518
paul@7 519
        self.f = f
paul@7 520
paul@37 521
    def append(self, record):
paul@37 522
        self.write(*record)
paul@37 523
paul@11 524
    def write(self, name, parameters, value):
paul@7 525
paul@7 526
        """
paul@11 527
        Write a content line, serialising the given 'name', 'parameters' and
paul@11 528
        'value' information.
paul@11 529
        """
paul@11 530
paul@11 531
        self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value))
paul@11 532
paul@11 533
    # Internal methods.
paul@11 534
paul@11 535
    def write_content_line(self, name, encoded_parameters, encoded_value):
paul@11 536
paul@11 537
        """
paul@11 538
        Write a content line for the given 'name', 'encoded_parameters' and
paul@11 539
        'encoded_value' information.
paul@7 540
        """
paul@7 541
paul@7 542
        f = self.f
paul@7 543
paul@7 544
        f.write(name)
paul@11 545
        for param_name, param_value in encoded_parameters.items():
paul@8 546
            f.write(";")
paul@11 547
            f.write(param_name)
paul@8 548
            f.write("=")
paul@11 549
            f.write(param_value)
paul@7 550
        f.write(":")
paul@11 551
        f.write(encoded_value)
paul@8 552
        f.end_line()
paul@7 553
paul@11 554
    def encode_quoted_parameter_value(self, value):
paul@7 555
paul@11 556
        "Encode the given 'value'."
paul@7 557
paul@11 558
        return '"%s"' % value
paul@7 559
paul@11 560
    def encode_value(self, name, parameters, value):
paul@7 561
paul@11 562
        """
paul@11 563
        Encode using 'name' and 'parameters' the given 'value' so that the
paul@11 564
        resulting encoded form employs any specified character encodings.
paul@11 565
        """
paul@7 566
paul@7 567
        encoding = parameters.get("ENCODING")
paul@7 568
        charset = parameters.get("CHARSET")
paul@7 569
paul@7 570
        if encoding == "QUOTED-PRINTABLE":
paul@7 571
            value = quopri.encodestring(value.encode(charset or "iso-8859-1"))
paul@7 572
        elif encoding == "BASE64":
paul@7 573
            value = base64.encodestring(value)
paul@7 574
paul@7 575
        return self.encode_content(value)
paul@7 576
paul@11 577
    # Overrideable methods.
paul@11 578
paul@11 579
    def encode_parameters(self, parameters):
paul@11 580
paul@11 581
        """
paul@11 582
        Encode the given 'parameters' according to the vCalendar specification.
paul@11 583
        """
paul@11 584
paul@11 585
        encoded_parameters = {}
paul@11 586
paul@11 587
        for param_name, param_value in parameters.items():
paul@11 588
paul@11 589
            # Basic format support merely involves quoting values which seem to
paul@11 590
            # need it. Other more specific formats may define exactly which
paul@11 591
            # parameters should be quoted.
paul@11 592
paul@11 593
            if ContentLine.SEPARATORS.search(param_value):
paul@11 594
                param_value = self.encode_quoted_parameter_value(param_value)
paul@11 595
paul@11 596
            encoded_parameters[param_name] = param_value
paul@11 597
paul@11 598
        return encoded_parameters
paul@11 599
paul@11 600
    def encode_content(self, value):
paul@11 601
paul@11 602
        "Encode the given 'value', quoting characters."
paul@11 603
paul@11 604
        return value.replace("\n", "\\n")
paul@11 605
paul@9 606
# Utility functions.
paul@9 607
paul@9 608
def is_input_stream(stream_or_string):
paul@9 609
    return hasattr(stream_or_string, "read")
paul@9 610
paul@11 611
def get_input_stream(stream_or_string, encoding=None):
paul@9 612
    if is_input_stream(stream_or_string):
paul@9 613
        return stream_or_string
paul@9 614
    else:
paul@11 615
        return codecs.open(stream_or_string, encoding=(encoding or default_encoding))
paul@9 616
paul@11 617
def get_output_stream(stream_or_string, encoding=None):
paul@9 618
    if hasattr(stream_or_string, "write"):
paul@9 619
        return stream_or_string
paul@9 620
    else:
paul@11 621
        return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding))
paul@9 622
paul@40 623
def items_to_dict(items):
paul@40 624
paul@40 625
    """
paul@40 626
    Return the given 'items' as a dictionary mapping names to tuples of the form
paul@40 627
    (value, attributes).
paul@40 628
    """
paul@40 629
paul@40 630
    d = {}
paul@40 631
    for name, attr, value in items:
paul@40 632
        if not d.has_key(name):
paul@40 633
            d[name] = []
paul@40 634
        if isinstance(value, list):
paul@40 635
            d[name].append((items_to_dict(value), attr))
paul@40 636
        else:
paul@40 637
            d[name].append((value, attr))
paul@40 638
    return d
paul@40 639
paul@40 640
def dict_to_items(d):
paul@40 641
paul@40 642
    """
paul@40 643
    Return 'd' converted to a list of items suitable for serialisation using
paul@40 644
    iterwrite.
paul@40 645
    """
paul@40 646
paul@40 647
    items = []
paul@40 648
    for name, value in d.items():
paul@40 649
        if isinstance(value, list):
paul@40 650
            for v, a in value:
paul@40 651
                if isinstance(v, dict):
paul@40 652
                    items.append((name, a, dict_to_items(v)))
paul@40 653
                else:
paul@40 654
                    items.append((name, a, v))
paul@40 655
        else:
paul@40 656
            v, a = value
paul@40 657
            items.append((name, a, dict_to_items(v)))
paul@40 658
    return items
paul@40 659
paul@0 660
# Public functions.
paul@0 661
paul@11 662
def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
paul@0 663
paul@0 664
    """
paul@9 665
    Parse the resource data found through the use of the 'stream_or_string',
paul@9 666
    which is either a stream providing Unicode data (the codecs module can be
paul@9 667
    used to open files or to wrap streams in order to provide Unicode data) or a
paul@9 668
    filename identifying a file to be parsed.
paul@0 669
paul@11 670
    The optional 'encoding' can be used to specify the character encoding used
paul@11 671
    by the file to be parsed.
paul@11 672
paul@0 673
    The optional 'non_standard_newline' can be set to a true value (unlike the
paul@0 674
    default) in order to attempt to process files with CR as the end of line
paul@0 675
    character.
paul@0 676
paul@0 677
    As a result of parsing the resource, the root node of the imported resource
paul@0 678
    is returned.
paul@0 679
    """
paul@0 680
paul@11 681
    stream = get_input_stream(stream_or_string, encoding)
paul@9 682
    reader = Reader(stream, non_standard_newline)
paul@9 683
paul@9 684
    # Parse using the reader.
paul@0 685
paul@9 686
    try:
paul@9 687
        parser = (parser_cls or Parser)()
paul@9 688
        return parser.parse(reader)
paul@9 689
paul@9 690
    # Close any opened streams.
paul@9 691
paul@9 692
    finally:
paul@9 693
        if not is_input_stream(stream_or_string):
paul@9 694
            reader.close()
paul@9 695
paul@11 696
def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
paul@5 697
paul@5 698
    """
paul@9 699
    Parse the resource data found through the use of the 'stream_or_string',
paul@9 700
    which is either a stream providing Unicode data (the codecs module can be
paul@9 701
    used to open files or to wrap streams in order to provide Unicode data) or a
paul@9 702
    filename identifying a file to be parsed.
paul@5 703
paul@11 704
    The optional 'encoding' can be used to specify the character encoding used
paul@11 705
    by the file to be parsed.
paul@11 706
paul@5 707
    The optional 'non_standard_newline' can be set to a true value (unlike the
paul@5 708
    default) in order to attempt to process files with CR as the end of line
paul@5 709
    character.
paul@5 710
paul@5 711
    An iterator is returned which provides event tuples describing parsing
paul@5 712
    events of the form (name, parameters, value).
paul@5 713
    """
paul@5 714
paul@11 715
    stream = get_input_stream(stream_or_string, encoding)
paul@9 716
    reader = Reader(stream, non_standard_newline)
paul@5 717
    parser = (parser_cls or StreamParser)(reader)
paul@9 718
    return parser
paul@5 719
paul@21 720
def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None, writer_cls=None):
paul@11 721
paul@11 722
    """
paul@21 723
    Return a writer which will either send data to the resource found through
paul@21 724
    the use of 'stream_or_string' or using the given 'write' operation.
paul@21 725
paul@21 726
    The 'stream_or_string' parameter may be either a stream accepting Unicode
paul@21 727
    data (the codecs module can be used to open files or to wrap streams in
paul@21 728
    order to accept Unicode data) or a filename identifying a file to be
paul@21 729
    written.
paul@11 730
paul@11 731
    The optional 'encoding' can be used to specify the character encoding used
paul@11 732
    by the file to be written.
paul@11 733
paul@11 734
    The optional 'line_length' can be used to specify how long lines should be
paul@11 735
    in the resulting data.
paul@11 736
    """
paul@11 737
paul@21 738
    if stream_or_string:
paul@21 739
        stream = get_output_stream(stream_or_string, encoding)
paul@21 740
        _writer = Writer(stream.write, line_length)
paul@21 741
    elif write:
paul@21 742
        _writer = Writer(write, line_length)
paul@21 743
    else:
paul@21 744
        raise IOError, "No stream, filename or write operation specified."
paul@21 745
paul@21 746
    return (writer_cls or StreamWriter)(_writer)
paul@8 747
paul@40 748
def to_dict(node):
paul@40 749
paul@40 750
    "Return the 'node' converted to a dictionary representation."
paul@40 751
paul@40 752
    name, attr, items = node
paul@40 753
    return {name : (isinstance(items, list) and items_to_dict(items) or items, attr)}
paul@40 754
paul@40 755
def to_node(d):
paul@40 756
paul@40 757
    "Return 'd' converted to a items-based representation."
paul@40 758
paul@40 759
    return dict_to_items(d)[0]
paul@40 760
paul@0 761
# vim: tabstop=4 expandtab shiftwidth=4