libxml2dom (annotate libxml2dom/__init_

#!/usr/bin/env python

paulb@18

2

paulb@18

3

"""

paulb@75

4

DOM wrapper around libxml2, specifically the libxml2mod Python extension module.

Copyright (C) 2003, 2004, 2005, 2006, 2007 Paul Boddie <paul@boddie.org.uk>

This program is free software; you can redistribute it and/or modify it under

paulb@293

9

the terms of the GNU Lesser General Public License as published by the Free

paulb@293

10

Software Foundation; either version 3 of the License, or (at your option) any

paulb@293

11

later version.

This program is distributed in the hope that it will be useful, but WITHOUT

paulb@293

14

ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

paulb@293

15

FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more

paulb@293

16

details.

You should have received a copy of the GNU Lesser General Public License along

paulb@293

19

with this program.  If not, see <http://www.gnu.org/licenses/>.

paulb@18

20

"""

__version__ = "0.4.5"

from libxml2dom.macrolib import *

paulb@72

25

from libxml2dom.macrolib import \

paulb@72

26

    createDocument as Node_createDocument, \

paulb@72

27

    parseString as Node_parseString, parseURI as Node_parseURI, \

paulb@72

28

    parseFile as Node_parseFile, \

paulb@72

29

    toString as Node_toString, toStream as Node_toStream, \

paulb@72

30

    toFile as Node_toFile

paulb@232

31

import urllib # for parseURI in HTML mode

paulb@262

32

import xml.dom # for getElementById

# Standard namespaces.

XML_NAMESPACE = xml.dom.XML_NAMESPACE

# Default namespace bindings for XPath.

default_ns = {

paulb@301

41

    "xml" : XML_NAMESPACE

class Implementation(object):

    "Contains an abstraction over the DOM implementation."

    def createDocumentType(self, localName, publicId, systemId):

paulb@218

49

        return DocumentType(localName, publicId, systemId)

    def createDocument(self, namespaceURI, localName, doctype):

paulb@218

52

        return Document(Node_createDocument(namespaceURI, localName, doctype), self)

    # Wrapping of documents.

    def adoptDocument(self, node):

paulb@236

57

        return Document(node, self)

    # Factory functions.

    def get_node(self, _node, context_node):

paulb@218

62

        if Node_nodeType(_node) == context_node.DOCUMENT_NODE:

paulb@218

63

            return context_node.ownerDocument

paulb@218

64

        elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE:

paulb@218

65

            return Attribute(_node, self, context_node.ownerDocument, context_node)

paulb@218

66

        else:

paulb@218

67

            return Node(_node, self, context_node.ownerDocument)

    def get_node_or_none(self, _node, context_node):

paulb@240

70

        if _node is None:

paulb@240

71

            return None

paulb@240

72

        else:

paulb@240

73

            return self.get_node(_node, context_node)

# Attribute and node list wrappers.

class NamedNodeMap(object):

"""

paulb@75

80

    A wrapper around Node objects providing DOM and dictionary convenience

paulb@75

81

    methods.

paulb@75

82

"""

    def __init__(self, node, impl):

paulb@18

85

        self.node = node

paulb@256

86

        self.impl = impl

    def getNamedItem(self, name):

paulb@72

89

        return self.node.getAttributeNode(name)

    def getNamedItemNS(self, ns, localName):

paulb@72

92

        return self.node.getAttributeNodeNS(ns, localName)

    def setNamedItem(self, node):

paulb@91

95

        try:

paulb@91

96

            old = self.getNamedItem(node.nodeName)

paulb@91

97

        except KeyError:

paulb@91

98

            old = None

paulb@86

99

        self.node.setAttributeNode(node)

paulb@91

100

        return old

    def setNamedItemNS(self, node):

paulb@91

103

        try:

paulb@91

104

            old = self.getNamedItemNS(node.namespaceURI, node.localName)

paulb@91

105

        except KeyError:

paulb@91

106

            old = None

paulb@86

107

        self.node.setAttributeNodeNS(node)

paulb@91

108

        return old

    def removeNamedItem(self, name):

paulb@91

111

        try:

paulb@91

112

            old = self.getNamedItem(name)

paulb@91

113

        except KeyError:

paulb@91

114

            old = None

paulb@91

115

        self.node.removeAttribute(name)

paulb@91

116

        return old

    def removeNamedItemNS(self, ns, localName):

paulb@91

119

        try:

paulb@91

120

            old = self.getNamedItemNS(ns, localName)

paulb@91

121

        except KeyError:

paulb@91

122

            old = None

paulb@91

123

        self.node.removeAttributeNS(ns, localName)

paulb@91

124

        return old

    # Iterator emulation.

    def __iter__(self):

paulb@276

129

        return NamedNodeMapIterator(self)

    # Dictionary emulation methods.

    def __getitem__(self, name):

paulb@18

134

        return self.getNamedItem(name)

    def __setitem__(self, name, node):

paulb@18

137

        if name == node.nodeName:

paulb@18

138

            self.setNamedItem(node)

paulb@18

139

        else:

paulb@18

140

            raise KeyError, name

    def __delitem__(self, name):

paulb@18

143

        # NOTE: To be implemented.

paulb@18

144

        pass

    def values(self):

paulb@218

147

        return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()]

    def keys(self):

paulb@18

150

        return [(attr.namespaceURI, attr.localName) for attr in self.values()]

    def items(self):

paulb@18

153

        return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()]

    def __repr__(self):

paulb@18

156

        return str(self)

    def __str__(self):

paulb@18

159

        return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()])

    def _length(self):

paulb@198

162

        return len(self.values())

    length = property(_length)

class NamedNodeMapIterator(object):

    "An iterator over a NamedNodeMap."

    def __init__(self, nodemap):

paulb@276

171

        self.nodemap = nodemap

paulb@276

172

        self.items = self.nodemap.items()

    def next(self):

paulb@276

175

        if self.items:

paulb@276

176

            current = self.items[0][1]

paulb@276

177

            self.items = self.items[1:]

paulb@276

178

            return current

paulb@276

179

        else:

paulb@276

180

            raise StopIteration

class NodeList(list):

    "A wrapper around node lists."

    def item(self, index):

paulb@86

187

        return self[index]

    def _length(self):

paulb@86

190

        return len(self)

    length = property(_length)

# Node classes.

class Node(object):

"""

paulb@75

199

    A DOM-style wrapper around libxml2mod objects.

paulb@75

200

"""

    ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE

paulb@81

203

    COMMENT_NODE = xml.dom.Node.COMMENT_NODE

paulb@81

204

    DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE

paulb@81

205

    DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE

paulb@81

206

    ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE

paulb@81

207

    ENTITY_NODE = xml.dom.Node.ENTITY_NODE

paulb@81

208

    ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE

paulb@81

209

    NOTATION_NODE = xml.dom.Node.NOTATION_NODE

paulb@81

210

    PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE

paulb@81

211

    TEXT_NODE = xml.dom.Node.TEXT_NODE

    def __init__(self, node, impl=None, ownerDocument=None):

paulb@18

214

        self._node = node

paulb@223

215

        self.impl = impl or default_impl

paulb@75

216

        self.ownerDocument = ownerDocument

    def as_native_node(self):

paulb@18

219

        return self._node

    def _nodeType(self):

paulb@72

222

        return Node_nodeType(self._node)

    def _childNodes(self):

        # NOTE: Consider a generator instead.

        return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)])

    def _firstChild(self):

paulb@276

231

        return (self.childNodes or [None])[0]

    def _lastChild(self):

paulb@276

234

        return (self.childNodes or [None])[-1]

    def _attributes(self):

paulb@256

237

        return NamedNodeMap(self, self.impl)

    def _namespaceURI(self):

paulb@72

240

        return Node_namespaceURI(self._node)

    def _textContent(self):

paulb@236

243

        return Node_textContent(self._node)

    def _nodeValue(self):

paulb@223

246

        if self.nodeType in null_value_node_types:

paulb@223

247

            return None

paulb@72

248

        return Node_nodeValue(self._node)

    def _setNodeValue(self, value):

paulb@86

251

        Node_setNodeValue(self._node, value)

    def _prefix(self):

paulb@72

254

        return Node_prefix(self._node)

    def _nodeName(self):

paulb@72

257

        return Node_nodeName(self._node)

    def _tagName(self):

paulb@72

260

        return Node_tagName(self._node)

    def _localName(self):

paulb@72

263

        return Node_localName(self._node)

    def _parentNode(self):

paulb@240

266

        return self.impl.get_node_or_none(Node_parentNode(self._node), self)

    def _previousSibling(self):

paulb@240

269

        return self.impl.get_node_or_none(Node_previousSibling(self._node), self)

    def _nextSibling(self):

paulb@240

272

        return self.impl.get_node_or_none(Node_nextSibling(self._node), self)

    def _doctype(self):

paulb@276

275

        _doctype = Node_doctype(self._node)

paulb@276

276

        if _doctype is not None:

paulb@276

277

            return self.impl.get_node(_doctype, self)

paulb@276

278

        else:

paulb@276

279

            return None

    def _publicId(self):

paulb@154

282

        # NOTE: To be fixed when the libxml2mod API has been figured out.

paulb@154

283

        if self.nodeType != self.DOCUMENT_TYPE_NODE:

paulb@154

284

            return None

paulb@154

285

        declaration = self.toString()

paulb@154

286

        return self._findId(declaration, "PUBLIC")

    def _systemId(self):

paulb@154

289

        # NOTE: To be fixed when the libxml2mod API has been figured out.

paulb@154

290

        if self.nodeType != self.DOCUMENT_TYPE_NODE:

paulb@154

291

            return None

paulb@154

292

        declaration = self.toString()

paulb@154

293

        if self._findId(declaration, "PUBLIC"):

paulb@154

294

            return self._findIdValue(declaration, 0)

paulb@154

295

        return self._findId(declaration, "SYSTEM")

    # NOTE: To be removed when the libxml2mod API has been figured out.

    def _findId(self, declaration, identifier):

paulb@154

300

        i = declaration.find(identifier)

paulb@154

301

        if i == -1:

paulb@154

302

            return None

paulb@154

303

        return self._findIdValue(declaration, i)

    def _findIdValue(self, declaration, i):

paulb@154

306

        q = declaration.find('"', i)

paulb@154

307

        if q == -1:

paulb@154

308

            return None

paulb@154

309

        q2 = declaration.find('"', q + 1)

paulb@154

310

        if q2 == -1:

paulb@154

311

            return None

paulb@154

312

        return declaration[q+1:q2]

    def hasAttributeNS(self, ns, localName):

paulb@72

315

        return Node_hasAttributeNS(self._node, ns, localName)

    def hasAttribute(self, name):

paulb@72

318

        return Node_hasAttribute(self._node, name)

    def getAttributeNS(self, ns, localName):

paulb@72

321

        return Node_getAttributeNS(self._node, ns, localName)

    def getAttribute(self, name):

paulb@72

324

        return Node_getAttribute(self._node, name)

    def getAttributeNodeNS(self, ns, localName):

paulb@218

327

        return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self)

    def getAttributeNode(self, localName):

paulb@218

330

        return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self)

    def setAttributeNS(self, ns, name, value):

paulb@72

333

        Node_setAttributeNS(self._node, ns, name, value)

    def setAttribute(self, name, value):

paulb@72

336

        Node_setAttribute(self._node, name, value)

    def setAttributeNodeNS(self, node):

paulb@88

339

        Node_setAttributeNodeNS(self._node, node._node)

    def setAttributeNode(self, node):

paulb@88

342

        Node_setAttributeNode(self._node, node._node)

    def removeAttributeNS(self, ns, localName):

paulb@91

345

        Node_removeAttributeNS(self._node, ns, localName)

    def removeAttribute(self, name):

paulb@91

348

        Node_removeAttribute(self._node, name)

    def createElementNS(self, ns, name):

paulb@218

351

        return self.impl.get_node(Node_createElementNS(self._node, ns, name), self)

    def createElement(self, name):

paulb@218

354

        return self.impl.get_node(Node_createElement(self._node, name), self)

    def createAttributeNS(self, ns, name):

paulb@88

357

        tmp = self.createElement("tmp")

paulb@218

358

        return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name))

    def createAttribute(self, name):

paulb@88

361

        tmp = self.createElement("tmp")

paulb@218

362

        return Attribute(Node_createAttribute(tmp._node, name), self.impl)

    def createTextNode(self, value):

paulb@218

365

        return self.impl.get_node(Node_createTextNode(self._node, value), self)

    def createComment(self, value):

paulb@218

368

        return self.impl.get_node(Node_createComment(self._node, value), self)

    def createCDATASection(self, value):

paulb@251

371

        return self.impl.get_node(Node_createCDATASection(self._node, value), self)

    def importNode(self, node, deep):

paulb@72

374

        if hasattr(node, "as_native_node"):

paulb@218

375

            return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self)

paulb@72

376

        else:

paulb@218

377

            return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self)

    def cloneNode(self, deep):

paulb@208

380

        # This takes advantage of the ubiquity of importNode (in spite of the DOM specification).

paulb@208

381

        return self.importNode(self, deep)

    def insertBefore(self, tmp, oldNode):

paulb@276

384

        if tmp.ownerDocument != self.ownerDocument:

paulb@276

385

            raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR)

paulb@276

386

        if oldNode.parentNode != self:

paulb@276

387

            raise xml.dom.DOMException(xml.dom.NOT_FOUND_ERR)

paulb@72

388

        if hasattr(tmp, "as_native_node"):

paulb@218

389

            return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self)

paulb@18

390

        else:

paulb@218

391

            return self.impl.get_node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self)

    def replaceChild(self, tmp, oldNode):

paulb@276

394

        if tmp.ownerDocument != self.ownerDocument:

paulb@276

395

            raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR)

paulb@276

396

        if oldNode.parentNode != self:

paulb@276

397

            raise xml.dom.DOMException(xml.dom.NOT_FOUND_ERR)

paulb@72

398

        if hasattr(tmp, "as_native_node"):

paulb@218

399

            return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self)

paulb@18

400

        else:

paulb@218

401

            return self.impl.get_node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self)

    def appendChild(self, tmp):

paulb@276

404

        if tmp.ownerDocument != self.ownerDocument:

paulb@276

405

            raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR)

paulb@72

406

        if hasattr(tmp, "as_native_node"):

paulb@218

407

            return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self)

paulb@18

408

        else:

paulb@218

409

            return self.impl.get_node(Node_appendChild(self._node, tmp), self)

    def removeChild(self, tmp):

paulb@72

412

        if hasattr(tmp, "as_native_node"):

paulb@72

413

            Node_removeChild(self._node, tmp.as_native_node())

paulb@72

414

        else:

paulb@72

415

            Node_removeChild(self._node, tmp)

paulb@276

416

        return tmp

    def getElementById(self, identifier):

paulb@262

419

        nodes = self.xpath(".//*[@xml:id='" + identifier.replace("'", "&apos;") + "']",

paulb@262

420

            namespaces={"xml" : xml.dom.XML_NAMESPACE})

paulb@262

421

        if nodes:

paulb@262

422

            return nodes[0]

paulb@262

423

        else:

paulb@262

424

            return None

    def getElementsByTagName(self, tagName):

paulb@232

427

        return self.xpath(".//" + tagName)

    def getElementsByTagNameNS(self, namespaceURI, localName):

paulb@232

430

        return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI})

    def normalize(self):

paulb@124

433

        text_nodes = []

paulb@124

434

        for node in self.childNodes:

paulb@124

435

            if node.nodeType == node.TEXT_NODE:

paulb@124

436

                text_nodes.append(node)

paulb@124

437

            elif len(text_nodes) != 0:

paulb@124

438

                self._normalize(text_nodes)

paulb@124

439

                text_nodes = []

paulb@124

440

        if len(text_nodes) != 0:

paulb@124

441

            self._normalize(text_nodes)

    def _normalize(self, text_nodes):

paulb@124

444

        texts = []

paulb@124

445

        for text_node in text_nodes[:-1]:

paulb@124

446

            texts.append(text_node.nodeValue)

paulb@124

447

            self.removeChild(text_node)

paulb@124

448

        texts.append(text_nodes[-1].nodeValue)

paulb@124

449

        self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1])

    childNodes = property(_childNodes)

paulb@276

452

    firstChild = property(_firstChild)

paulb@276

453

    lastChild = property(_lastChild)

paulb@86

454

    value = data = nodeValue = property(_nodeValue, _setNodeValue)

paulb@236

455

    textContent = property(_textContent)

paulb@18

456

    name = nodeName = property(_nodeName)

paulb@18

457

    tagName = property(_tagName)

paulb@18

458

    namespaceURI = property(_namespaceURI)

paulb@18

459

    prefix = property(_prefix)

paulb@18

460

    localName = property(_localName)

paulb@88

461

    parentNode = property(_parentNode)

paulb@18

462

    nodeType = property(_nodeType)

paulb@18

463

    attributes = property(_attributes)

paulb@24

464

    previousSibling = property(_previousSibling)

paulb@18

465

    nextSibling = property(_nextSibling)

paulb@154

466

    doctype = property(_doctype)

paulb@154

467

    publicId = property(_publicId)

paulb@154

468

    systemId = property(_systemId)

    # NOTE: To be fixed - these being doctype-specific values.

    entities = {}

paulb@154

473

    notations = {}

    def isSameNode(self, other):

paulb@240

476

        return self == other

    def __hash__(self):

paulb@262

479

        return hash(self.localName)

    def __eq__(self, other):

paulb@276

482

        return isinstance(other, Node) and Node_equals(self._node, other._node)

    def __ne__(self, other):

paulb@240

485

        return not (self == other)

    # 4DOM extensions to the usual PyXML API.

paulb@32

488

    # NOTE: To be finished.

    def xpath(self, expr, variables=None, namespaces=None):

paulb@301

491

        ns = {}

paulb@301

492

        ns.update(default_ns)

paulb@301

493

        ns.update(namespaces or {})

paulb@301

494

        result = Node_xpath(self._node, expr, variables, ns)

paulb@202

495

        if isinstance(result, str):

paulb@202

496

            return to_unicode(result)

paulb@202

497

        elif hasattr(result, "__len__"):

paulb@218

498

            return NodeList([self.impl.get_node(_node, self) for _node in result])

paulb@81

499

        else:

paulb@81

500

            return result

    # Convenience methods.

    def toString(self, encoding=None, prettyprint=0):

paulb@120

505

        return toString(self, encoding, prettyprint)

    def toStream(self, stream, encoding=None, prettyprint=0):

paulb@120

508

        toStream(self, stream, encoding, prettyprint)

    def toFile(self, f, encoding=None, prettyprint=0):

paulb@120

511

        toFile(self, f, encoding, prettyprint)

# Attribute nodes.

class Attribute(Node):

    "A class providing attribute access."

    def __init__(self, node, impl, ownerDocument=None, ownerElement=None):

paulb@218

520

        Node.__init__(self, node, impl, ownerDocument)

paulb@88

521

        self.ownerElement = ownerElement

    def _parentNode(self):

paulb@88

524

        return self.ownerElement

    parentNode = property(_parentNode)

# Document housekeeping mechanisms.

class _Document:

"""

paulb@258

533

    An abstract class providing document-level housekeeping and distinct

paulb@258

534

    functionality.

paulb@258

535

"""

    def __init__(self, node, impl):

paulb@75

538

        self._node = node

paulb@256

539

        self.implementation = self.impl = impl

    def _documentElement(self):

paulb@256

542

        return self.xpath("*")[0]

    def _ownerDocument(self):

paulb@188

545

        return self

    def __del__(self):

paulb@84

548

        #print "Freeing document", self._node

paulb@75

549

        libxml2mod.xmlFreeDoc(self._node)

    documentElement = property(_documentElement)

paulb@75

552

    ownerDocument = property(_ownerDocument)

class Document(_Document, Node):

"""

paulb@258

557

    A generic document class. Specialised document classes should inherit from

paulb@258

558

    the _Document class and their own variation of Node.

paulb@258

559

"""

    pass

class DocumentType(object):

    "A class providing a container for document type information."

    def __init__(self, localName, publicId, systemId):

paulb@154

568

        self.name = self.localName = localName

paulb@154

569

        self.publicId = publicId

paulb@154

570

        self.systemId = systemId

        # NOTE: Nothing is currently provided to support the following

paulb@154

573

        # NOTE: attributes.

        self.entities = {}

paulb@154

576

        self.notations = {}

# Constants.

null_value_node_types = [

paulb@223

581

    Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE,

paulb@223

582

    Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE

# Utility functions.

def createDocumentType(localName, publicId, systemId):

paulb@223

588

    return default_impl.createDocumentType(localName, publicId, systemId)

def createDocument(namespaceURI, localName, doctype):

paulb@223

591

    return default_impl.createDocument(namespaceURI, localName, doctype)

def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, impl=None):

"""

paulb@105

596

    Parse the given 'stream_or_string', where the supplied object can either be

paulb@136

597

    a stream (such as a file or stream object), or a string (containing the

paulb@268

598

    filename of a document). The optional parameters described below should be

paulb@268

599

    provided as keyword arguments.

    If the optional 'html' parameter is set to a true value, the content to be

paulb@268

602

    parsed will be treated as being HTML rather than XML. If the optional

paulb@268

603

    'htmlencoding' is specified, HTML parsing will be performed with the

paulb@268

604

    document encoding assumed to that specified.

    If the optional 'unfinished' parameter is set to a true value, unfinished

paulb@268

607

    documents will be parsed, even though such documents may be missing content

paulb@268

608

    such as closing tags.

    A document object is returned by this function.

paulb@105

611

"""

    impl = impl or default_impl

    if hasattr(stream_or_string, "read"):

paulb@18

616

        stream = stream_or_string

paulb@268

617

        return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished, impl=impl)

paulb@18

618

    else:

paulb@268

619

        return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished, impl=impl)

def parseFile(filename, html=0, htmlencoding=None, unfinished=0, impl=None):

"""

paulb@268

624

    Parse the file having the given 'filename'. The optional parameters

paulb@268

625

    described below should be provided as keyword arguments.

    If the optional 'html' parameter is set to a true value, the content to be

paulb@268

628

    parsed will be treated as being HTML rather than XML. If the optional

paulb@268

629

    'htmlencoding' is specified, HTML parsing will be performed with the

paulb@268

630

    document encoding assumed to that specified.

    If the optional 'unfinished' parameter is set to a true value, unfinished

paulb@268

633

    documents will be parsed, even though such documents may be missing content

paulb@268

634

    such as closing tags.

    A document object is returned by this function.

paulb@105

637

"""

    impl = impl or default_impl

paulb@268

640

    return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding, unfinished=unfinished))

def parseString(s, html=0, htmlencoding=None, unfinished=0, impl=None):

"""

paulb@268

645

    Parse the content of the given string 's'. The optional parameters described

paulb@268

646

    below should be provided as keyword arguments.

    If the optional 'html' parameter is set to a true value, the content to be

paulb@268

649

    parsed will be treated as being HTML rather than XML. If the optional

paulb@268

650

    'htmlencoding' is specified, HTML parsing will be performed with the

paulb@268

651

    document encoding assumed to that specified.

    If the optional 'unfinished' parameter is set to a true value, unfinished

paulb@268

654

    documents will be parsed, even though such documents may be missing content

paulb@268

655

    such as closing tags.

    A document object is returned by this function.

paulb@105

658

"""

    impl = impl or default_impl

paulb@268

661

    return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding, unfinished=unfinished))

def parseURI(uri, html=0, htmlencoding=None, unfinished=0, impl=None):

"""

paulb@268

666

    Parse the content found at the given 'uri'. The optional parameters

paulb@268

667

    described below should be provided as keyword arguments.

    If the optional 'html' parameter is set to a true value, the content to be

paulb@268

670

    parsed will be treated as being HTML rather than XML. If the optional

paulb@268

671

    'htmlencoding' is specified, HTML parsing will be performed with the

paulb@268

672

    document encoding assumed to that specified.

    If the optional 'unfinished' parameter is set to a true value, unfinished

paulb@268

675

    documents will be parsed, even though such documents may be missing content

paulb@268

676

    such as closing tags.

    XML documents are retrieved using libxml2's own network capabilities; HTML

paulb@232

679

    documents are retrieved using the urllib module provided by Python. To

paulb@232

680

    retrieve either kind of document using Python's own modules for this purpose

paulb@232

681

    (such as urllib), open a stream and pass it to the parse function:

    f = urllib.urlopen(uri)

paulb@232

684

    try:

paulb@232

685

        doc = libxml2dom.parse(f, html)

paulb@232

686

    finally:

paulb@232

687

        f.close()

    A document object is returned by this function.

paulb@105

690

"""

    if html:

paulb@232

693

        f = urllib.urlopen(uri)

paulb@232

694

        try:

paulb@268

695

            return parse(f, html=html, htmlencoding=htmlencoding, unfinished=unfinished, impl=impl)

paulb@232

696

        finally:

paulb@232

697

            f.close()

paulb@232

698

    else:

paulb@232

699

        impl = impl or default_impl

paulb@268

700

        return impl.adoptDocument(Node_parseURI(uri, html=html, htmlencoding=htmlencoding, unfinished=unfinished))

def toString(node, encoding=None, prettyprint=0):

"""

paulb@105

705

    Return a string containing the serialised form of the given 'node' and its

paulb@105

706

    children. The optional 'encoding' can be used to override the default

paulb@120

707

    character encoding used in the serialisation. The optional 'prettyprint'

paulb@120

708

    indicates whether the serialised form is prettyprinted or not (the default

paulb@120

709

    setting).

paulb@105

710

"""

    return Node_toString(node.as_native_node(), encoding, prettyprint)

def toStream(node, stream, encoding=None, prettyprint=0):

"""

paulb@105

717

    Write the serialised form of the given 'node' and its children to the given

paulb@105

718

    'stream'. The optional 'encoding' can be used to override the default

paulb@120

719

    character encoding used in the serialisation. The optional 'prettyprint'

paulb@120

720

    indicates whether the serialised form is prettyprinted or not (the default

paulb@120

721

    setting).

paulb@105

722

"""

    Node_toStream(node.as_native_node(), stream, encoding, prettyprint)

def toFile(node, filename, encoding=None, prettyprint=0):

"""

paulb@105

729

    Write the serialised form of the given 'node' and its children to a file

paulb@105

730

    having the given 'filename'. The optional 'encoding' can be used to override

paulb@120

731

    the default character encoding used in the serialisation. The optional

paulb@120

732

    'prettyprint' indicates whether the serialised form is prettyprinted or not

paulb@120

733

    (the default setting).

paulb@105

734

"""

    Node_toFile(node.as_native_node(), filename, encoding, prettyprint)

def adoptNodes(nodes, impl=None):

"""

paulb@105

741

    A special utility method which adopts the given low-level 'nodes' and which

paulb@105

742

    returns a list of high-level equivalents. This is currently experimental and

paulb@105

743

    should not be casually used.

paulb@105

744

"""

    impl = impl or default_impl

    if len(nodes) == 0:

paulb@81

749

        return []

paulb@236

750

    doc = impl.adoptDocument(libxml2mod.doc(nodes[0]))

paulb@81

751

    results = []

paulb@81

752

    for node in nodes:

paulb@218

753

        results.append(Node(node, impl, doc))

paulb@81

754

    return results

def getDOMImplementation():

    "Return the default DOM implementation."

    return default_impl

# Single instance of the implementation.

default_impl = Implementation()

# vim: tabstop=4 expandtab shiftwidth=4

libxml2dom

Annotated libxml2dom/__init__.py

Annotated libxml2dom/init.py