imip-agent

Annotated vContent.py

1418:337a05845a69
2017-12-10 Paul Boddie Ensure that periods in the managed state are copied and can therefore be edited independently from the original values. client-editing-simplification
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Parsing of vCard, vCalendar and iCalendar files.
paul@0 5
paul@7 6
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011, 2013,
paul@1390 7
              2014, 2015, 2017 Paul Boddie <paul@boddie.org.uk>
paul@0 8
paul@0 9
This program is free software; you can redistribute it and/or modify it under
paul@0 10
the terms of the GNU General Public License as published by the Free Software
paul@0 11
Foundation; either version 3 of the License, or (at your option) any later
paul@0 12
version.
paul@0 13
paul@0 14
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 15
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0 16
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 17
details.
paul@0 18
paul@0 19
You should have received a copy of the GNU General Public License along with
paul@0 20
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 21
paul@0 22
--------
paul@0 23
paul@0 24
References:
paul@0 25
paul@0 26
RFC 5545: Internet Calendaring and Scheduling Core Object Specification
paul@0 27
          (iCalendar)
paul@0 28
          http://tools.ietf.org/html/rfc5545
paul@0 29
paul@0 30
RFC 2445: Internet Calendaring and Scheduling Core Object Specification
paul@0 31
          (iCalendar)
paul@0 32
          http://tools.ietf.org/html/rfc2445
paul@0 33
paul@0 34
RFC 2425: A MIME Content-Type for Directory Information
paul@0 35
          http://tools.ietf.org/html/rfc2425
paul@0 36
paul@0 37
RFC 2426: vCard MIME Directory Profile
paul@0 38
          http://tools.ietf.org/html/rfc2426
paul@0 39
"""
paul@0 40
paul@0 41
try:
paul@0 42
    set
paul@0 43
except NameError:
paul@0 44
    from sets import Set as set
paul@0 45
paul@0 46
# Encoding-related imports.
paul@0 47
paul@0 48
import base64, quopri
paul@0 49
import codecs
paul@0 50
paul@0 51
# Tokenisation help.
paul@0 52
paul@0 53
import re
paul@0 54
paul@0 55
# Configuration.
paul@0 56
paul@0 57
default_encoding = "utf-8"
paul@0 58
paul@7 59
class ParseError(Exception):
paul@7 60
paul@7 61
    "General parsing errors."
paul@7 62
paul@7 63
    pass
paul@7 64
paul@781 65
class WriteError(Exception):
paul@781 66
paul@781 67
    "General writing errors."
paul@781 68
paul@781 69
    pass
paul@781 70
paul@0 71
# Reader and parser classes.
paul@0 72
paul@0 73
class Reader:
paul@0 74
paul@0 75
    "A simple class wrapping a file, providing simple pushback capabilities."
paul@0 76
paul@0 77
    def __init__(self, f, non_standard_newline=0):
paul@0 78
paul@0 79
        """
paul@0 80
        Initialise the object with the file 'f'. If 'non_standard_newline' is
paul@0 81
        set to a true value (unlike the default), lines ending with CR will be
paul@0 82
        treated as complete lines.
paul@0 83
        """
paul@0 84
paul@0 85
        self.f = f
paul@0 86
        self.non_standard_newline = non_standard_newline
paul@0 87
        self.lines = []
paul@0 88
        self.line_number = 1 # about to read line 1
paul@0 89
paul@0 90
    def close(self):
paul@0 91
paul@0 92
        "Close the reader."
paul@0 93
paul@0 94
        self.f.close()
paul@0 95
paul@0 96
    def pushback(self, line):
paul@0 97
paul@0 98
        """
paul@0 99
        Push the given 'line' back so that the next line read is actually the
paul@0 100
        given 'line' and not the next line from the underlying file.
paul@0 101
        """
paul@0 102
paul@0 103
        self.lines.append(line)
paul@0 104
        self.line_number -= 1
paul@0 105
paul@0 106
    def readline(self):
paul@0 107
paul@0 108
        """
paul@0 109
        If no pushed-back lines exist, read a line directly from the file.
paul@0 110
        Otherwise, read from the list of pushed-back lines.
paul@0 111
        """
paul@0 112
paul@0 113
        self.line_number += 1
paul@0 114
        if self.lines:
paul@0 115
            return self.lines.pop()
paul@0 116
        else:
paul@0 117
            # Sanity check for broken lines (\r instead of \r\n or \n).
paul@0 118
            line = self.f.readline()
paul@0 119
            while line.endswith("\r") and not self.non_standard_newline:
paul@0 120
                s = self.f.readline()
paul@0 121
                if not s:
paul@0 122
                    break
paul@0 123
                line += s
paul@0 124
            if line.endswith("\r") and self.non_standard_newline:
paul@0 125
                return line + "\n"
paul@0 126
            else:
paul@0 127
                return line
paul@0 128
paul@0 129
    def read_content_line(self):
paul@0 130
paul@0 131
        """
paul@0 132
        Read an entire content line, itself potentially consisting of many
paul@0 133
        physical lines of text, returning a string.
paul@0 134
        """
paul@0 135
paul@0 136
        # Skip blank lines.
paul@0 137
paul@0 138
        line = self.readline()
paul@0 139
        while line:
paul@0 140
            line_stripped = line.rstrip("\r\n")
paul@0 141
            if not line_stripped:
paul@0 142
                line = self.readline()
paul@0 143
            else:
paul@0 144
                break
paul@0 145
        else:
paul@0 146
            return ""
paul@0 147
paul@0 148
        # Strip all appropriate whitespace from the right end of each line.
paul@0 149
        # For subsequent lines, remove the first whitespace character.
paul@0 150
        # See section 4.1 of the iCalendar specification.
paul@0 151
paul@0 152
        lines = [line_stripped]
paul@0 153
paul@0 154
        line = self.readline()
paul@0 155
        while line.startswith(" ") or line.startswith("\t"):
paul@0 156
            lines.append(line[1:].rstrip("\r\n"))
paul@0 157
            line = self.readline()
paul@0 158
paul@0 159
        # Since one line too many will have been read, push the line back into
paul@0 160
        # the file.
paul@0 161
paul@0 162
        if line:
paul@0 163
            self.pushback(line)
paul@0 164
paul@0 165
        return "".join(lines)
paul@0 166
paul@0 167
    def get_content_line(self):
paul@0 168
paul@0 169
        "Return a content line object for the current line."
paul@0 170
paul@0 171
        return ContentLine(self.read_content_line())
paul@0 172
paul@0 173
class ContentLine:
paul@0 174
paul@0 175
    "A content line which can be searched."
paul@0 176
paul@0 177
    SEPARATORS = re.compile('[;:"]')
paul@0 178
    SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]')
paul@0 179
paul@0 180
    def __init__(self, text):
paul@0 181
        self.text = text
paul@0 182
        self.start = 0
paul@0 183
paul@0 184
    def __repr__(self):
paul@0 185
        return "ContentLine(%r)" % self.text
paul@0 186
paul@0 187
    def get_remaining(self):
paul@0 188
paul@0 189
        "Get the remaining text from the content line."
paul@0 190
paul@0 191
        return self.text[self.start:]
paul@0 192
paul@0 193
    def search(self, targets):
paul@0 194
paul@0 195
        """
paul@0 196
        Find one of the 'targets' in the text, returning the string from the
paul@0 197
        current position up to the target found, along with the target string,
paul@0 198
        using a tuple of the form (string, target). If no target was found,
paul@0 199
        return the entire string together with a target of None.
paul@0 200
paul@0 201
        The 'targets' parameter must be a regular expression object or an object
paul@0 202
        compatible with the API of such objects.
paul@0 203
        """
paul@0 204
paul@0 205
        text = self.text
paul@0 206
        start = pos = self.start
paul@0 207
        length = len(text)
paul@0 208
paul@0 209
        # Remember the first target.
paul@0 210
paul@0 211
        first = None
paul@0 212
        first_pos = None
paul@0 213
        in_quoted_region = 0
paul@0 214
paul@0 215
        # Process the text, looking for the targets.
paul@0 216
paul@0 217
        while pos < length:
paul@0 218
            match = targets.search(text, pos)
paul@0 219
paul@0 220
            # Where nothing matches, end the search.
paul@0 221
paul@0 222
            if match is None:
paul@0 223
                pos = length
paul@0 224
paul@0 225
            # Where a double quote matches, toggle the region state.
paul@0 226
paul@0 227
            elif match.group() == '"':
paul@0 228
                in_quoted_region = not in_quoted_region
paul@0 229
                pos = match.end()
paul@0 230
paul@0 231
            # Where something else matches outside a region, stop searching.
paul@0 232
paul@0 233
            elif not in_quoted_region:
paul@0 234
                first = match.group()
paul@0 235
                first_pos = match.start()
paul@0 236
                break
paul@0 237
paul@0 238
            # Otherwise, keep looking for the end of the region.
paul@0 239
paul@0 240
            else:
paul@0 241
                pos = match.end()
paul@0 242
paul@0 243
        # Where no more input can provide the targets, return a special result.
paul@0 244
paul@0 245
        else:
paul@0 246
            self.start = length
paul@0 247
            return text[start:], None
paul@0 248
paul@0 249
        self.start = match.end()
paul@0 250
        return text[start:first_pos], first
paul@0 251
paul@0 252
class StreamParser:
paul@0 253
paul@0 254
    "A stream parser for content in vCard/vCalendar/iCalendar-like formats."
paul@0 255
paul@0 256
    def __init__(self, f):
paul@0 257
paul@0 258
        "Initialise the parser for the given file 'f'."
paul@0 259
paul@0 260
        self.f = f
paul@0 261
paul@0 262
    def close(self):
paul@0 263
paul@0 264
        "Close the reader."
paul@0 265
paul@0 266
        self.f.close()
paul@0 267
paul@0 268
    def __iter__(self):
paul@0 269
paul@0 270
        "Return self as the iterator."
paul@0 271
paul@0 272
        return self
paul@0 273
paul@0 274
    def next(self):
paul@0 275
paul@0 276
        """
paul@0 277
        Return the next content item in the file as a tuple of the form
paul@0 278
        (name, parameters, values).
paul@0 279
        """
paul@0 280
paul@0 281
        return self.parse_content_line()
paul@0 282
paul@1390 283
    def decode_content(self, name, value):
paul@0 284
paul@1390 285
        """
paul@1390 286
        Decode for property 'name' the given 'value', replacing quoted
paul@1390 287
        characters.
paul@1390 288
        """
paul@0 289
paul@0 290
        return value.replace("\r", "").replace("\\N", "\n").replace("\\n", "\n")
paul@0 291
paul@0 292
    # Internal methods.
paul@0 293
paul@0 294
    def parse_content_line(self):
paul@0 295
paul@0 296
        """
paul@0 297
        Return the name, parameters and value information for the current
paul@0 298
        content line in the file being parsed.
paul@0 299
        """
paul@0 300
paul@0 301
        f = self.f
paul@0 302
        line_number = f.line_number
paul@0 303
        line = f.get_content_line()
paul@0 304
paul@0 305
        # Read the property name.
paul@0 306
paul@0 307
        name, sep = line.search(line.SEPARATORS)
paul@0 308
        name = name.strip()
paul@0 309
paul@0 310
        if not name and sep is None:
paul@0 311
            raise StopIteration
paul@0 312
paul@0 313
        # Read the parameters.
paul@0 314
paul@0 315
        parameters = {}
paul@0 316
paul@0 317
        while sep == ";":
paul@0 318
paul@0 319
            # Find the actual modifier.
paul@0 320
paul@0 321
            parameter_name, sep = line.search(line.SEPARATORS_PLUS_EQUALS)
paul@0 322
            parameter_name = parameter_name.strip()
paul@0 323
paul@0 324
            if sep == "=":
paul@0 325
                parameter_value, sep = line.search(line.SEPARATORS)
paul@0 326
                parameter_value = parameter_value.strip()
paul@0 327
            else:
paul@0 328
                parameter_value = None
paul@0 329
paul@0 330
            # Append a key, value tuple to the parameters list.
paul@0 331
paul@0 332
            parameters[parameter_name] = parameter_value
paul@0 333
paul@0 334
        # Get the value content.
paul@0 335
paul@0 336
        if sep != ":":
paul@0 337
            raise ValueError, (line_number, line)
paul@0 338
paul@0 339
        # Obtain and decode the value.
paul@0 340
paul@0 341
        value = self.decode(name, parameters, line.get_remaining())
paul@0 342
paul@0 343
        return name, parameters, value
paul@0 344
paul@0 345
    def decode(self, name, parameters, value):
paul@0 346
paul@0 347
        "Decode using 'name' and 'parameters' the given 'value'."
paul@0 348
paul@0 349
        encoding = parameters.get("ENCODING")
paul@0 350
        charset = parameters.get("CHARSET")
paul@0 351
paul@1390 352
        value = self.decode_content(name, value)
paul@0 353
paul@0 354
        if encoding == "QUOTED-PRINTABLE":
paul@0 355
            return unicode(quopri.decodestring(value), charset or "iso-8859-1")
paul@0 356
        elif encoding == "BASE64":
paul@0 357
            return base64.decodestring(value)
paul@0 358
        else:
paul@0 359
            return value
paul@0 360
paul@0 361
class ParserBase:
paul@0 362
paul@0 363
    "An abstract parser for content in vCard/vCalendar/iCalendar-like formats."
paul@0 364
paul@0 365
    def __init__(self):
paul@0 366
paul@0 367
        "Initialise the parser."
paul@0 368
paul@0 369
        self.names = []
paul@0 370
paul@0 371
    def parse(self, f, parser_cls=None):
paul@0 372
paul@0 373
        "Parse the contents of the file 'f'."
paul@0 374
paul@0 375
        parser = (parser_cls or StreamParser)(f)
paul@0 376
paul@0 377
        for name, parameters, value in parser:
paul@0 378
paul@0 379
            if name == "BEGIN":
paul@0 380
                self.names.append(value)
paul@0 381
                self.startComponent(value, parameters)
paul@0 382
paul@0 383
            elif name == "END":
paul@0 384
                start_name = self.names.pop()
paul@0 385
                if start_name != value:
paul@0 386
                    raise ParseError, "Mismatch in BEGIN and END declarations (%r and %r) at line %d." % (
paul@0 387
                        start_name, value, f.line_number)
paul@0 388
paul@0 389
                self.endComponent(value)
paul@0 390
paul@0 391
            else:
paul@0 392
                self.handleProperty(name, parameters, value)
paul@0 393
paul@0 394
class Parser(ParserBase):
paul@0 395
paul@0 396
    "A SAX-like parser for vCard/vCalendar/iCalendar-like formats."
paul@0 397
paul@0 398
    def __init__(self):
paul@0 399
        ParserBase.__init__(self)
paul@0 400
        self.components = []
paul@0 401
paul@0 402
    def startComponent(self, name, parameters):
paul@0 403
paul@0 404
        """
paul@0 405
        Add the component with the given 'name' and 'parameters', recording an
paul@0 406
        empty list of children as part of the component's content.
paul@0 407
        """
paul@0 408
paul@0 409
        component = self.handleProperty(name, parameters)
paul@0 410
        self.components.append(component)
paul@0 411
        return component
paul@0 412
paul@0 413
    def endComponent(self, name):
paul@0 414
paul@0 415
        """
paul@0 416
        End the component with the given 'name' by removing it from the active
paul@0 417
        component stack. If only one component exists on the stack, retain it
paul@0 418
        for later inspection.
paul@0 419
        """
paul@0 420
paul@0 421
        if len(self.components) > 1:
paul@0 422
            return self.components.pop()
paul@0 423
paul@0 424
        # Or return the only element.
paul@0 425
paul@0 426
        elif self.components:
paul@0 427
            return self.components[0]
paul@0 428
paul@0 429
    def handleProperty(self, name, parameters, value=None):
paul@0 430
paul@0 431
        """
paul@0 432
        Record the property with the given 'name', 'parameters' and optional
paul@0 433
        'value' as part of the current component's children.
paul@0 434
        """
paul@0 435
paul@0 436
        component = self.makeComponent(name, parameters, value)
paul@0 437
        self.attachComponent(component)
paul@0 438
        return component
paul@0 439
paul@0 440
    # Component object construction/manipulation methods.
paul@0 441
paul@0 442
    def attachComponent(self, component):
paul@0 443
paul@0 444
        "Attach the given 'component' to its parent."
paul@0 445
paul@0 446
        if self.components:
paul@0 447
            component_name, component_parameters, component_children = self.components[-1]
paul@0 448
            component_children.append(component)
paul@0 449
paul@0 450
    def makeComponent(self, name, parameters, value=None):
paul@0 451
paul@0 452
        """
paul@0 453
        Make a component object from the given 'name', 'parameters' and optional
paul@0 454
        'value'.
paul@0 455
        """
paul@0 456
paul@0 457
        return (name, parameters, value or [])
paul@0 458
paul@0 459
    # Public methods.
paul@0 460
paul@0 461
    def parse(self, f, parser_cls=None):
paul@0 462
paul@0 463
        "Parse the contents of the file 'f'."
paul@0 464
paul@0 465
        ParserBase.parse(self, f, parser_cls)
paul@118 466
        try:
paul@118 467
            return self.components[0]
paul@118 468
        except IndexError:
paul@118 469
            raise ParseError, "No vContent component found in file."
paul@0 470
paul@0 471
# Writer classes.
paul@0 472
paul@0 473
class Writer:
paul@0 474
paul@0 475
    "A simple class wrapping a file, providing simple output capabilities."
paul@0 476
paul@0 477
    default_line_length = 76
paul@0 478
paul@0 479
    def __init__(self, write, line_length=None):
paul@0 480
paul@0 481
        """
paul@0 482
        Initialise the object with the given 'write' operation. If 'line_length'
paul@0 483
        is set, the length of written lines will conform to the specified value
paul@0 484
        instead of the default value. 
paul@0 485
        """
paul@0 486
paul@0 487
        self._write = write
paul@0 488
        self.line_length = line_length or self.default_line_length
paul@0 489
        self.char_offset = 0
paul@0 490
paul@0 491
    def write(self, text):
paul@0 492
paul@0 493
        "Write the 'text' to the file."
paul@0 494
paul@0 495
        write = self._write
paul@0 496
        line_length = self.line_length
paul@0 497
paul@0 498
        i = 0
paul@0 499
        remaining = len(text)
paul@0 500
paul@0 501
        while remaining:
paul@0 502
            space = line_length - self.char_offset
paul@0 503
            if remaining > space:
paul@0 504
                write(text[i:i + space])
paul@0 505
                write("\r\n ")
paul@0 506
                self.char_offset = 1
paul@0 507
                i += space
paul@0 508
                remaining -= space
paul@0 509
            else:
paul@0 510
                write(text[i:])
paul@0 511
                self.char_offset += remaining
paul@0 512
                i += remaining
paul@0 513
                remaining = 0
paul@0 514
paul@0 515
    def end_line(self):
paul@0 516
paul@0 517
        "End the current content line."
paul@0 518
paul@0 519
        if self.char_offset > 0:
paul@0 520
            self.char_offset = 0
paul@0 521
            self._write("\r\n")
paul@0 522
paul@0 523
class StreamWriter:
paul@0 524
paul@0 525
    "A stream writer for content in vCard/vCalendar/iCalendar-like formats."
paul@0 526
paul@0 527
    def __init__(self, f):
paul@0 528
paul@0 529
        "Initialise the stream writer with the given 'f' stream object."
paul@0 530
paul@0 531
        self.f = f
paul@0 532
paul@3 533
    def append(self, record):
paul@3 534
        self.write(*record)
paul@3 535
paul@0 536
    def write(self, name, parameters, value):
paul@0 537
paul@0 538
        """
paul@0 539
        Write a content line, serialising the given 'name', 'parameters' and
paul@0 540
        'value' information.
paul@0 541
        """
paul@0 542
paul@0 543
        self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value))
paul@0 544
paul@0 545
    # Internal methods.
paul@0 546
paul@0 547
    def write_content_line(self, name, encoded_parameters, encoded_value):
paul@0 548
paul@0 549
        """
paul@0 550
        Write a content line for the given 'name', 'encoded_parameters' and
paul@0 551
        'encoded_value' information.
paul@0 552
        """
paul@0 553
paul@0 554
        f = self.f
paul@0 555
paul@0 556
        f.write(name)
paul@0 557
        for param_name, param_value in encoded_parameters.items():
paul@0 558
            f.write(";")
paul@0 559
            f.write(param_name)
paul@0 560
            f.write("=")
paul@0 561
            f.write(param_value)
paul@0 562
        f.write(":")
paul@0 563
        f.write(encoded_value)
paul@0 564
        f.end_line()
paul@0 565
paul@0 566
    def encode_quoted_parameter_value(self, value):
paul@0 567
paul@0 568
        "Encode the given 'value'."
paul@0 569
paul@0 570
        return '"%s"' % value
paul@0 571
paul@0 572
    def encode_value(self, name, parameters, value):
paul@0 573
paul@0 574
        """
paul@0 575
        Encode using 'name' and 'parameters' the given 'value' so that the
paul@0 576
        resulting encoded form employs any specified character encodings.
paul@0 577
        """
paul@0 578
paul@0 579
        encoding = parameters.get("ENCODING")
paul@0 580
        charset = parameters.get("CHARSET")
paul@0 581
paul@781 582
        try:
paul@781 583
            if encoding == "QUOTED-PRINTABLE":
paul@781 584
                value = quopri.encodestring(value.encode(charset or "iso-8859-1"))
paul@781 585
            elif encoding == "BASE64":
paul@781 586
                value = base64.encodestring(value)
paul@0 587
paul@1390 588
            return self.encode_content(name, value)
paul@781 589
        except TypeError:
paul@781 590
            raise WriteError, "Property %r value with parameters %r cannot be encoded: %r" % (name, parameters, value)
paul@0 591
paul@0 592
    # Overrideable methods.
paul@0 593
paul@0 594
    def encode_parameters(self, parameters):
paul@0 595
paul@0 596
        """
paul@0 597
        Encode the given 'parameters' according to the vCalendar specification.
paul@0 598
        """
paul@0 599
paul@0 600
        encoded_parameters = {}
paul@0 601
paul@0 602
        for param_name, param_value in parameters.items():
paul@0 603
paul@0 604
            # Basic format support merely involves quoting values which seem to
paul@0 605
            # need it. Other more specific formats may define exactly which
paul@0 606
            # parameters should be quoted.
paul@0 607
paul@0 608
            if ContentLine.SEPARATORS.search(param_value):
paul@0 609
                param_value = self.encode_quoted_parameter_value(param_value)
paul@0 610
paul@0 611
            encoded_parameters[param_name] = param_value
paul@0 612
paul@0 613
        return encoded_parameters
paul@0 614
paul@1390 615
    def encode_content(self, name, value):
paul@0 616
paul@1390 617
        "Encode for property 'name' the given 'value', quoting characters."
paul@0 618
paul@828 619
        return (value or "").replace("\n", "\\n")
paul@0 620
paul@0 621
# Utility functions.
paul@0 622
paul@0 623
def is_input_stream(stream_or_string):
paul@0 624
    return hasattr(stream_or_string, "read")
paul@0 625
paul@0 626
def get_input_stream(stream_or_string, encoding=None):
paul@0 627
    if is_input_stream(stream_or_string):
paul@211 628
        if isinstance(stream_or_string, codecs.StreamReader):
paul@211 629
            return stream_or_string
paul@211 630
        else:
paul@211 631
            return codecs.getreader(encoding or default_encoding)(stream_or_string)
paul@0 632
    else:
paul@0 633
        return codecs.open(stream_or_string, encoding=(encoding or default_encoding))
paul@0 634
paul@0 635
def get_output_stream(stream_or_string, encoding=None):
paul@0 636
    if hasattr(stream_or_string, "write"):
paul@211 637
        if isinstance(stream_or_string, codecs.StreamWriter):
paul@211 638
            return stream_or_string
paul@211 639
        else:
paul@211 640
            return codecs.getwriter(encoding or default_encoding)(stream_or_string)
paul@0 641
    else:
paul@0 642
        return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding))
paul@0 643
paul@107 644
def items_to_dict(items, sections=None):
paul@26 645
paul@26 646
    """
paul@26 647
    Return the given 'items' as a dictionary mapping names to tuples of the form
paul@107 648
    (value, attributes). Where 'sections' is provided, only items whose names
paul@107 649
    occur in the given 'sections' collection will be treated as groups or
paul@107 650
    sections of definitions.
paul@26 651
    """
paul@26 652
paul@26 653
    d = {}
paul@26 654
    for name, attr, value in items:
paul@26 655
        if not d.has_key(name):
paul@26 656
            d[name] = []
paul@107 657
        if isinstance(value, list) and (not sections or name in sections):
paul@107 658
            d[name].append((items_to_dict(value, sections), attr))
paul@26 659
        else:
paul@26 660
            d[name].append((value, attr))
paul@26 661
    return d
paul@26 662
paul@26 663
def dict_to_items(d):
paul@26 664
paul@26 665
    """
paul@26 666
    Return 'd' converted to a list of items suitable for serialisation using
paul@26 667
    iterwrite.
paul@26 668
    """
paul@26 669
paul@26 670
    items = []
paul@26 671
    for name, value in d.items():
paul@26 672
        if isinstance(value, list):
paul@26 673
            for v, a in value:
paul@26 674
                if isinstance(v, dict):
paul@26 675
                    items.append((name, a, dict_to_items(v)))
paul@26 676
                else:
paul@26 677
                    items.append((name, a, v))
paul@26 678
        else:
paul@26 679
            v, a = value
paul@26 680
            items.append((name, a, dict_to_items(v)))
paul@26 681
    return items
paul@26 682
paul@0 683
# Public functions.
paul@0 684
paul@0 685
def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
paul@0 686
paul@0 687
    """
paul@0 688
    Parse the resource data found through the use of the 'stream_or_string',
paul@0 689
    which is either a stream providing Unicode data (the codecs module can be
paul@0 690
    used to open files or to wrap streams in order to provide Unicode data) or a
paul@0 691
    filename identifying a file to be parsed.
paul@0 692
paul@0 693
    The optional 'encoding' can be used to specify the character encoding used
paul@0 694
    by the file to be parsed.
paul@0 695
paul@0 696
    The optional 'non_standard_newline' can be set to a true value (unlike the
paul@0 697
    default) in order to attempt to process files with CR as the end of line
paul@0 698
    character.
paul@0 699
paul@0 700
    As a result of parsing the resource, the root node of the imported resource
paul@0 701
    is returned.
paul@0 702
    """
paul@0 703
paul@0 704
    stream = get_input_stream(stream_or_string, encoding)
paul@0 705
    reader = Reader(stream, non_standard_newline)
paul@0 706
paul@0 707
    # Parse using the reader.
paul@0 708
paul@0 709
    try:
paul@0 710
        parser = (parser_cls or Parser)()
paul@0 711
        return parser.parse(reader)
paul@0 712
paul@0 713
    # Close any opened streams.
paul@0 714
paul@0 715
    finally:
paul@0 716
        if not is_input_stream(stream_or_string):
paul@0 717
            reader.close()
paul@0 718
paul@0 719
def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
paul@0 720
paul@0 721
    """
paul@0 722
    Parse the resource data found through the use of the 'stream_or_string',
paul@0 723
    which is either a stream providing Unicode data (the codecs module can be
paul@0 724
    used to open files or to wrap streams in order to provide Unicode data) or a
paul@0 725
    filename identifying a file to be parsed.
paul@0 726
paul@0 727
    The optional 'encoding' can be used to specify the character encoding used
paul@0 728
    by the file to be parsed.
paul@0 729
paul@0 730
    The optional 'non_standard_newline' can be set to a true value (unlike the
paul@0 731
    default) in order to attempt to process files with CR as the end of line
paul@0 732
    character.
paul@0 733
paul@0 734
    An iterator is returned which provides event tuples describing parsing
paul@0 735
    events of the form (name, parameters, value).
paul@0 736
    """
paul@0 737
paul@0 738
    stream = get_input_stream(stream_or_string, encoding)
paul@0 739
    reader = Reader(stream, non_standard_newline)
paul@0 740
    parser = (parser_cls or StreamParser)(reader)
paul@0 741
    return parser
paul@0 742
paul@0 743
def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None, writer_cls=None):
paul@0 744
paul@0 745
    """
paul@0 746
    Return a writer which will either send data to the resource found through
paul@0 747
    the use of 'stream_or_string' or using the given 'write' operation.
paul@0 748
paul@0 749
    The 'stream_or_string' parameter may be either a stream accepting Unicode
paul@0 750
    data (the codecs module can be used to open files or to wrap streams in
paul@0 751
    order to accept Unicode data) or a filename identifying a file to be
paul@0 752
    written.
paul@0 753
paul@0 754
    The optional 'encoding' can be used to specify the character encoding used
paul@0 755
    by the file to be written.
paul@0 756
paul@0 757
    The optional 'line_length' can be used to specify how long lines should be
paul@0 758
    in the resulting data.
paul@0 759
    """
paul@0 760
paul@0 761
    if stream_or_string:
paul@0 762
        stream = get_output_stream(stream_or_string, encoding)
paul@0 763
        _writer = Writer(stream.write, line_length)
paul@0 764
    elif write:
paul@0 765
        _writer = Writer(write, line_length)
paul@0 766
    else:
paul@0 767
        raise IOError, "No stream, filename or write operation specified."
paul@0 768
paul@0 769
    return (writer_cls or StreamWriter)(_writer)
paul@0 770
paul@107 771
def to_dict(node, sections=None):
paul@26 772
paul@26 773
    "Return the 'node' converted to a dictionary representation."
paul@26 774
paul@26 775
    name, attr, items = node
paul@107 776
    return {name : (isinstance(items, list) and items_to_dict(items, sections) or items, attr)}
paul@26 777
paul@26 778
def to_node(d):
paul@26 779
paul@26 780
    "Return 'd' converted to a items-based representation."
paul@26 781
paul@26 782
    return dict_to_items(d)[0]
paul@26 783
paul@0 784
# vim: tabstop=4 expandtab shiftwidth=4