1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 __version__ = "0.4.7" 23 24 from libxml2dom.macrolib import * 25 from libxml2dom.macrolib import \ 26 createDocument as Node_createDocument, \ 27 parseString as Node_parseString, parseURI as Node_parseURI, \ 28 parseFile as Node_parseFile, \ 29 toString as Node_toString, toStream as Node_toStream, \ 30 toFile as Node_toFile 31 import urllib # for parseURI in HTML mode 32 import libxml2dom.errors 33 34 # Standard namespaces. 35 36 XML_NAMESPACE = xml.dom.XML_NAMESPACE 37 38 # Default namespace bindings for XPath. 39 40 default_ns = { 41 "xml" : XML_NAMESPACE 42 } 43 44 class Implementation(object): 45 46 "Contains an abstraction over the DOM implementation." 47 48 def createDocumentType(self, localName, publicId, systemId): 49 return DocumentType(localName, publicId, systemId) 50 51 def createDocument(self, namespaceURI, localName, doctype): 52 return Document(Node_createDocument(namespaceURI, localName, doctype), self) 53 54 # Wrapping of documents. 55 56 def adoptDocument(self, node): 57 return Document(node, self) 58 59 # Factory functions. 60 61 def get_node(self, _node, context_node): 62 63 # Return the existing document. 64 65 if Node_nodeType(_node) == context_node.DOCUMENT_NODE: 66 return context_node.ownerDocument 67 68 # Return an attribute using the parent of the attribute as the owner 69 # element. 70 71 elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE: 72 return Attribute(_node, self, context_node.ownerDocument, 73 self.get_node(Node_parentNode(_node), context_node)) 74 75 # Return other nodes. 76 77 else: 78 return Node(_node, self, context_node.ownerDocument) 79 80 def get_node_or_none(self, _node, context_node): 81 if _node is None: 82 return None 83 else: 84 return self.get_node(_node, context_node) 85 86 # Attribute and node list wrappers. 87 88 class NamedNodeMap(object): 89 90 """ 91 A wrapper around Node objects providing DOM and dictionary convenience 92 methods. 93 """ 94 95 def __init__(self, node, impl): 96 self.node = node 97 self.impl = impl 98 99 def getNamedItem(self, name): 100 return self.node.getAttributeNode(name) 101 102 def getNamedItemNS(self, ns, localName): 103 return self.node.getAttributeNodeNS(ns, localName) 104 105 def setNamedItem(self, node): 106 try: 107 old = self.getNamedItem(node.nodeName) 108 except KeyError: 109 old = None 110 self.node.setAttributeNode(node) 111 return old 112 113 def setNamedItemNS(self, node): 114 try: 115 old = self.getNamedItemNS(node.namespaceURI, node.localName) 116 except KeyError: 117 old = None 118 self.node.setAttributeNodeNS(node) 119 return old 120 121 def removeNamedItem(self, name): 122 try: 123 old = self.getNamedItem(name) 124 except KeyError: 125 old = None 126 self.node.removeAttribute(name) 127 return old 128 129 def removeNamedItemNS(self, ns, localName): 130 try: 131 old = self.getNamedItemNS(ns, localName) 132 except KeyError: 133 old = None 134 self.node.removeAttributeNS(ns, localName) 135 return old 136 137 # Iterator emulation. 138 139 def __iter__(self): 140 return NamedNodeMapIterator(self) 141 142 # Dictionary emulation methods. 143 144 def __getitem__(self, name): 145 return self.getNamedItem(name) 146 147 def __setitem__(self, name, node): 148 if name == node.nodeName: 149 self.setNamedItem(node) 150 else: 151 raise KeyError, name 152 153 def __delitem__(self, name): 154 # NOTE: To be implemented. 155 pass 156 157 def values(self): 158 return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()] 159 160 def keys(self): 161 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 162 163 def items(self): 164 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 165 166 def __repr__(self): 167 return str(self) 168 169 def __str__(self): 170 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 171 172 def _length(self): 173 return len(self.values()) 174 175 length = property(_length) 176 177 class NamedNodeMapIterator(object): 178 179 "An iterator over a NamedNodeMap." 180 181 def __init__(self, nodemap): 182 self.nodemap = nodemap 183 self.items = self.nodemap.items() 184 185 def next(self): 186 if self.items: 187 current = self.items[0][1] 188 self.items = self.items[1:] 189 return current 190 else: 191 raise StopIteration 192 193 class NodeList(list): 194 195 "A wrapper around node lists." 196 197 def item(self, index): 198 return self[index] 199 200 def _length(self): 201 return len(self) 202 203 length = property(_length) 204 205 # Node classes. 206 207 class Node(object): 208 209 """ 210 A DOM-style wrapper around libxml2mod objects. 211 """ 212 213 ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE 214 COMMENT_NODE = xml.dom.Node.COMMENT_NODE 215 DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE 216 DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE 217 ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE 218 ENTITY_NODE = xml.dom.Node.ENTITY_NODE 219 ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE 220 NOTATION_NODE = xml.dom.Node.NOTATION_NODE 221 PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE 222 TEXT_NODE = xml.dom.Node.TEXT_NODE 223 224 def __init__(self, node, impl=None, ownerDocument=None): 225 self._node = node 226 self.impl = impl or default_impl 227 self.ownerDocument = ownerDocument 228 229 def as_native_node(self): 230 return self._node 231 232 def _nodeType(self): 233 return Node_nodeType(self._node) 234 235 def _childNodes(self): 236 237 # NOTE: Consider a generator instead. 238 239 return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)]) 240 241 def _firstChild(self): 242 return (self.childNodes or [None])[0] 243 244 def _lastChild(self): 245 return (self.childNodes or [None])[-1] 246 247 def _attributes(self): 248 return NamedNodeMap(self, self.impl) 249 250 def _namespaceURI(self): 251 return Node_namespaceURI(self._node) 252 253 def _textContent(self): 254 return Node_textContent(self._node) 255 256 def _nodeValue(self): 257 if self.nodeType in null_value_node_types: 258 return None 259 return Node_nodeValue(self._node) 260 261 def _setNodeValue(self, value): 262 Node_setNodeValue(self._node, value) 263 264 def _prefix(self): 265 return Node_prefix(self._node) 266 267 def _nodeName(self): 268 return Node_nodeName(self._node) 269 270 def _tagName(self): 271 return Node_tagName(self._node) 272 273 def _localName(self): 274 return Node_localName(self._node) 275 276 def _parentNode(self): 277 return self.impl.get_node_or_none(Node_parentNode(self._node), self) 278 279 def _previousSibling(self): 280 return self.impl.get_node_or_none(Node_previousSibling(self._node), self) 281 282 def _nextSibling(self): 283 return self.impl.get_node_or_none(Node_nextSibling(self._node), self) 284 285 def _doctype(self): 286 _doctype = Node_doctype(self._node) 287 if _doctype is not None: 288 return self.impl.get_node(_doctype, self) 289 else: 290 return None 291 292 def _publicId(self): 293 # NOTE: To be fixed when the libxml2mod API has been figured out. 294 if self.nodeType != self.DOCUMENT_TYPE_NODE: 295 return None 296 declaration = self.toString() 297 return self._findId(declaration, "PUBLIC") 298 299 def _systemId(self): 300 # NOTE: To be fixed when the libxml2mod API has been figured out. 301 if self.nodeType != self.DOCUMENT_TYPE_NODE: 302 return None 303 declaration = self.toString() 304 if self._findId(declaration, "PUBLIC"): 305 return self._findIdValue(declaration, 0) 306 return self._findId(declaration, "SYSTEM") 307 308 # NOTE: To be removed when the libxml2mod API has been figured out. 309 310 def _findId(self, declaration, identifier): 311 i = declaration.find(identifier) 312 if i == -1: 313 return None 314 return self._findIdValue(declaration, i) 315 316 def _findIdValue(self, declaration, i): 317 q = declaration.find('"', i) 318 if q == -1: 319 return None 320 q2 = declaration.find('"', q + 1) 321 if q2 == -1: 322 return None 323 return declaration[q+1:q2] 324 325 def hasAttributeNS(self, ns, localName): 326 return Node_hasAttributeNS(self._node, ns, localName) 327 328 def hasAttribute(self, name): 329 return Node_hasAttribute(self._node, name) 330 331 def getAttributeNS(self, ns, localName): 332 return Node_getAttributeNS(self._node, ns, localName) 333 334 def getAttribute(self, name): 335 return Node_getAttribute(self._node, name) 336 337 def getAttributeNodeNS(self, ns, localName): 338 return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self) 339 340 def getAttributeNode(self, localName): 341 return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self) 342 343 def setAttributeNS(self, ns, name, value): 344 Node_setAttributeNS(self._node, ns, name, value) 345 346 def setAttribute(self, name, value): 347 Node_setAttribute(self._node, name, value) 348 349 def setAttributeNodeNS(self, node): 350 Node_setAttributeNodeNS(self._node, node._node) 351 352 def setAttributeNode(self, node): 353 Node_setAttributeNode(self._node, node._node) 354 355 def removeAttributeNS(self, ns, localName): 356 Node_removeAttributeNS(self._node, ns, localName) 357 358 def removeAttribute(self, name): 359 Node_removeAttribute(self._node, name) 360 361 def createElementNS(self, ns, name): 362 return self.impl.get_node(Node_createElementNS(self._node, ns, name), self) 363 364 def createElement(self, name): 365 return self.impl.get_node(Node_createElement(self._node, name), self) 366 367 def createAttributeNS(self, ns, name): 368 tmp = self.createElement("tmp") 369 return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name)) 370 371 def createAttribute(self, name): 372 tmp = self.createElement("tmp") 373 return Attribute(Node_createAttribute(tmp._node, name), self.impl) 374 375 def createTextNode(self, value): 376 return self.impl.get_node(Node_createTextNode(self._node, value), self) 377 378 def createComment(self, value): 379 return self.impl.get_node(Node_createComment(self._node, value), self) 380 381 def createCDATASection(self, value): 382 return self.impl.get_node(Node_createCDATASection(self._node, value), self) 383 384 def importNode(self, node, deep): 385 if hasattr(node, "as_native_node"): 386 return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self) 387 else: 388 return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self) 389 390 def cloneNode(self, deep): 391 # This takes advantage of the ubiquity of importNode (in spite of the DOM specification). 392 return self.importNode(self, deep) 393 394 def insertBefore(self, tmp, oldNode): 395 if tmp.ownerDocument != self.ownerDocument: 396 raise xml.dom.WrongDocumentErr() 397 if oldNode.parentNode != self: 398 raise xml.dom.NotFoundErr() 399 if hasattr(tmp, "as_native_node"): 400 return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) 401 else: 402 return self.impl.get_node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self) 403 404 def replaceChild(self, tmp, oldNode): 405 if tmp.ownerDocument != self.ownerDocument: 406 raise xml.dom.WrongDocumentErr() 407 if oldNode.parentNode != self: 408 raise xml.dom.NotFoundErr() 409 if hasattr(tmp, "as_native_node"): 410 return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) 411 else: 412 return self.impl.get_node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self) 413 414 def appendChild(self, tmp): 415 if tmp.ownerDocument != self.ownerDocument: 416 raise xml.dom.WrongDocumentErr() 417 if hasattr(tmp, "as_native_node"): 418 return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self) 419 else: 420 return self.impl.get_node(Node_appendChild(self._node, tmp), self) 421 422 def removeChild(self, tmp): 423 if hasattr(tmp, "as_native_node"): 424 Node_removeChild(self._node, tmp.as_native_node()) 425 else: 426 Node_removeChild(self._node, tmp) 427 return tmp 428 429 def getElementById(self, identifier): 430 _node = Node_getElementById(self.ownerDocument.as_native_node(), identifier) 431 if _node is None: 432 return None 433 else: 434 return self.impl.get_node(_node, self) 435 436 def getElementsByTagName(self, tagName): 437 return self.xpath(".//" + tagName) 438 439 def getElementsByTagNameNS(self, namespaceURI, localName): 440 return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI}) 441 442 def normalize(self): 443 text_nodes = [] 444 for node in self.childNodes: 445 if node.nodeType == node.TEXT_NODE: 446 text_nodes.append(node) 447 elif len(text_nodes) != 0: 448 self._normalize(text_nodes) 449 text_nodes = [] 450 if len(text_nodes) != 0: 451 self._normalize(text_nodes) 452 453 def _normalize(self, text_nodes): 454 texts = [] 455 for text_node in text_nodes[:-1]: 456 texts.append(text_node.nodeValue) 457 self.removeChild(text_node) 458 texts.append(text_nodes[-1].nodeValue) 459 self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1]) 460 461 childNodes = property(_childNodes) 462 firstChild = property(_firstChild) 463 lastChild = property(_lastChild) 464 value = data = nodeValue = property(_nodeValue, _setNodeValue) 465 textContent = property(_textContent) 466 name = nodeName = property(_nodeName) 467 tagName = property(_tagName) 468 namespaceURI = property(_namespaceURI) 469 prefix = property(_prefix) 470 localName = property(_localName) 471 parentNode = property(_parentNode) 472 nodeType = property(_nodeType) 473 attributes = property(_attributes) 474 previousSibling = property(_previousSibling) 475 nextSibling = property(_nextSibling) 476 doctype = property(_doctype) 477 publicId = property(_publicId) 478 systemId = property(_systemId) 479 480 # NOTE: To be fixed - these being doctype-specific values. 481 482 entities = {} 483 notations = {} 484 485 def isSameNode(self, other): 486 return self == other 487 488 def __hash__(self): 489 return hash(self.localName) 490 491 def __eq__(self, other): 492 return isinstance(other, Node) and Node_equals(self._node, other._node) 493 494 def __ne__(self, other): 495 return not (self == other) 496 497 # 4DOM extensions to the usual PyXML API. 498 # NOTE: To be finished. 499 500 def xpath(self, expr, variables=None, namespaces=None): 501 502 """ 503 Evaluate the given expression 'expr' using the optional 'variables' and 504 'namespaces' mappings. 505 """ 506 507 ns = {} 508 ns.update(default_ns) 509 ns.update(namespaces or {}) 510 result = Node_xpath(self._node, expr, variables, ns) 511 if isinstance(result, str): 512 return to_unicode(result) 513 elif hasattr(result, "__len__"): 514 return NodeList([self.impl.get_node(_node, self) for _node in result]) 515 else: 516 return result 517 518 # Other extensions to the usual PyXML API. 519 520 def xinclude(self): 521 522 """ 523 Process XInclude declarations within the document, returning the number 524 of substitutions performed (zero or more), raising an XIncludeException 525 otherwise. 526 """ 527 528 return Node_xinclude(self._node) 529 530 # Convenience methods. 531 532 def toString(self, encoding=None, prettyprint=0): 533 return toString(self, encoding, prettyprint) 534 535 def toStream(self, stream, encoding=None, prettyprint=0): 536 toStream(self, stream, encoding, prettyprint) 537 538 def toFile(self, f, encoding=None, prettyprint=0): 539 toFile(self, f, encoding, prettyprint) 540 541 # Attribute nodes. 542 543 class Attribute(Node): 544 545 "A class providing attribute access." 546 547 def __init__(self, node, impl, ownerDocument=None, ownerElement=None): 548 Node.__init__(self, node, impl, ownerDocument) 549 self.ownerElement = ownerElement 550 551 def _parentNode(self): 552 return self.ownerElement 553 554 parentNode = property(_parentNode) 555 556 # Document housekeeping mechanisms. 557 558 class _Document: 559 560 """ 561 An abstract class providing document-level housekeeping and distinct 562 functionality. Configuration of the document is also supported. 563 See: http://www.w3.org/TR/DOM-Level-3-Core/core.html#DOMConfiguration 564 """ 565 566 # Constants from 567 # See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-NodeEditVAL 568 569 VAL_TRUE = 5 570 VAL_FALSE = 6 571 VAL_UNKNOWN = 7 572 573 def __init__(self, node, impl): 574 self._node = node 575 self.implementation = self.impl = impl 576 self.error_handler = libxml2dom.errors.DOMErrorHandler() 577 578 # Standard DOM properties and their implementations. 579 580 def _documentElement(self): 581 return self.xpath("*")[0] 582 583 def _ownerDocument(self): 584 return self 585 586 def __del__(self): 587 #print "Freeing document", self._node 588 libxml2mod.xmlFreeDoc(self._node) 589 590 documentElement = property(_documentElement) 591 ownerDocument = property(_ownerDocument) 592 593 # DOM Level 3 Core DOMConfiguration methods. 594 595 def setParameter(self, name, value): 596 if name == "error-handler": 597 raise xml.dom.NotSupportedErr() 598 raise xml.dom.NotFoundErr() 599 600 def getParameter(self, name): 601 if name == "error-handler": 602 return self.error_handler 603 raise xml.dom.NotFoundErr() 604 605 def canSetParameter(self, name, value): 606 return 0 607 608 def _parameterNames(self): 609 return [] 610 611 # Extensions to the usual PyXML API. 612 613 def validate(self, doc): 614 615 """ 616 Validate the document against the given schema document, 'doc'. 617 """ 618 619 if hasattr(doc, "as_native_node"): 620 _schema = Document_schema(doc.as_native_node()) 621 else: 622 _schema = Document_schemaFromString(doc.toString()) 623 try: 624 self.error_handler.reset() 625 return Document_validate(_schema, self._node, self.error_handler) 626 finally: 627 Schema_free(_schema) 628 629 # DOM Level 3 Validation methods. 630 631 def validateDocument(self, doc): 632 633 """ 634 Validate the document against the given schema document, 'doc'. 635 See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-DocumentEditVAL-validateDocument 636 """ 637 638 return self.validate(doc) and self.VAL_TRUE or self.VAL_FALSE 639 640 class Document(_Document, Node): 641 642 """ 643 A generic document class. Specialised document classes should inherit from 644 the _Document class and their own variation of Node. 645 """ 646 647 pass 648 649 class DocumentType(object): 650 651 "A class providing a container for document type information." 652 653 def __init__(self, localName, publicId, systemId): 654 self.name = self.localName = localName 655 self.publicId = publicId 656 self.systemId = systemId 657 658 # NOTE: Nothing is currently provided to support the following 659 # NOTE: attributes. 660 661 self.entities = {} 662 self.notations = {} 663 664 # Constants. 665 666 null_value_node_types = [ 667 Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE, 668 Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE 669 ] 670 671 # Utility functions. 672 673 def createDocumentType(localName, publicId, systemId): 674 return default_impl.createDocumentType(localName, publicId, systemId) 675 676 def createDocument(namespaceURI, localName, doctype): 677 return default_impl.createDocument(namespaceURI, localName, doctype) 678 679 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 680 681 """ 682 Parse the given 'stream_or_string', where the supplied object can either be 683 a stream (such as a file or stream object), or a string (containing the 684 filename of a document). The optional parameters described below should be 685 provided as keyword arguments. 686 687 If the optional 'html' parameter is set to a true value, the content to be 688 parsed will be treated as being HTML rather than XML. If the optional 689 'htmlencoding' is specified, HTML parsing will be performed with the 690 document encoding assumed to that specified. 691 692 If the optional 'unfinished' parameter is set to a true value, unfinished 693 documents will be parsed, even though such documents may be missing content 694 such as closing tags. 695 696 If the optional 'validate' parameter is set to a true value, an attempt will 697 be made to validate the parsed document. 698 699 If the optional 'remote' parameter is set to a true value, references to 700 remote documents (such as DTDs) will be followed in order to obtain such 701 documents. 702 703 A document object is returned by this function. 704 """ 705 706 impl = impl or default_impl 707 708 if hasattr(stream_or_string, "read"): 709 stream = stream_or_string 710 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, 711 unfinished=unfinished, validate=validate, remote=remote, impl=impl) 712 else: 713 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, 714 unfinished=unfinished, validate=validate, remote=remote, impl=impl) 715 716 def parseFile(filename, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 717 718 """ 719 Parse the file having the given 'filename'. The optional parameters 720 described below should be provided as keyword arguments. 721 722 If the optional 'html' parameter is set to a true value, the content to be 723 parsed will be treated as being HTML rather than XML. If the optional 724 'htmlencoding' is specified, HTML parsing will be performed with the 725 document encoding assumed to that specified. 726 727 If the optional 'unfinished' parameter is set to a true value, unfinished 728 documents will be parsed, even though such documents may be missing content 729 such as closing tags. 730 731 If the optional 'validate' parameter is set to a true value, an attempt will 732 be made to validate the parsed document. 733 734 If the optional 'remote' parameter is set to a true value, references to 735 remote documents (such as DTDs) will be followed in order to obtain such 736 documents. 737 738 A document object is returned by this function. 739 """ 740 741 impl = impl or default_impl 742 return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding, 743 unfinished=unfinished, validate=validate, remote=remote)) 744 745 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 746 747 """ 748 Parse the content of the given string 's'. The optional parameters described 749 below should be provided as keyword arguments. 750 751 If the optional 'html' parameter is set to a true value, the content to be 752 parsed will be treated as being HTML rather than XML. If the optional 753 'htmlencoding' is specified, HTML parsing will be performed with the 754 document encoding assumed to that specified. 755 756 If the optional 'unfinished' parameter is set to a true value, unfinished 757 documents will be parsed, even though such documents may be missing content 758 such as closing tags. 759 760 If the optional 'validate' parameter is set to a true value, an attempt will 761 be made to validate the parsed document. 762 763 If the optional 'remote' parameter is set to a true value, references to 764 remote documents (such as DTDs) will be followed in order to obtain such 765 documents. 766 767 A document object is returned by this function. 768 """ 769 770 impl = impl or default_impl 771 return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding, 772 unfinished=unfinished, validate=validate, remote=remote)) 773 774 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 775 776 """ 777 Parse the content found at the given 'uri'. The optional parameters 778 described below should be provided as keyword arguments. 779 780 If the optional 'html' parameter is set to a true value, the content to be 781 parsed will be treated as being HTML rather than XML. If the optional 782 'htmlencoding' is specified, HTML parsing will be performed with the 783 document encoding assumed to that specified. 784 785 If the optional 'unfinished' parameter is set to a true value, unfinished 786 documents will be parsed, even though such documents may be missing content 787 such as closing tags. 788 789 If the optional 'validate' parameter is set to a true value, an attempt will 790 be made to validate the parsed document. 791 792 If the optional 'remote' parameter is set to a true value, references to 793 remote documents (such as DTDs) will be followed in order to obtain such 794 documents. 795 796 XML documents are retrieved using libxml2's own network capabilities; HTML 797 documents are retrieved using the urllib module provided by Python. To 798 retrieve either kind of document using Python's own modules for this purpose 799 (such as urllib), open a stream and pass it to the parse function: 800 801 f = urllib.urlopen(uri) 802 try: 803 doc = libxml2dom.parse(f, html) 804 finally: 805 f.close() 806 807 A document object is returned by this function. 808 """ 809 810 if html: 811 f = urllib.urlopen(uri) 812 try: 813 return parse(f, html=html, htmlencoding=htmlencoding, unfinished=unfinished, 814 validate=validate, remote=remote, impl=impl) 815 finally: 816 f.close() 817 else: 818 impl = impl or default_impl 819 return impl.adoptDocument(Node_parseURI(uri, html=html, htmlencoding=htmlencoding, 820 unfinished=unfinished, validate=validate, remote=remote)) 821 822 def toString(node, encoding=None, prettyprint=0): 823 824 """ 825 Return a string containing the serialised form of the given 'node' and its 826 children. The optional 'encoding' can be used to override the default 827 character encoding used in the serialisation. The optional 'prettyprint' 828 indicates whether the serialised form is prettyprinted or not (the default 829 setting). 830 """ 831 832 return Node_toString(node.as_native_node(), encoding, prettyprint) 833 834 def toStream(node, stream, encoding=None, prettyprint=0): 835 836 """ 837 Write the serialised form of the given 'node' and its children to the given 838 'stream'. The optional 'encoding' can be used to override the default 839 character encoding used in the serialisation. The optional 'prettyprint' 840 indicates whether the serialised form is prettyprinted or not (the default 841 setting). 842 """ 843 844 Node_toStream(node.as_native_node(), stream, encoding, prettyprint) 845 846 def toFile(node, filename, encoding=None, prettyprint=0): 847 848 """ 849 Write the serialised form of the given 'node' and its children to a file 850 having the given 'filename'. The optional 'encoding' can be used to override 851 the default character encoding used in the serialisation. The optional 852 'prettyprint' indicates whether the serialised form is prettyprinted or not 853 (the default setting). 854 """ 855 856 Node_toFile(node.as_native_node(), filename, encoding, prettyprint) 857 858 def adoptNodes(nodes, impl=None): 859 860 """ 861 A special utility method which adopts the given low-level 'nodes' and which 862 returns a list of high-level equivalents. This is currently experimental and 863 should not be casually used. 864 """ 865 866 impl = impl or default_impl 867 868 if len(nodes) == 0: 869 return [] 870 doc = impl.adoptDocument(libxml2mod.doc(nodes[0])) 871 results = [] 872 for node in nodes: 873 results.append(Node(node, impl, doc)) 874 return results 875 876 def getDOMImplementation(): 877 878 "Return the default DOM implementation." 879 880 return default_impl 881 882 # Single instance of the implementation. 883 884 default_impl = Implementation() 885 886 # vim: tabstop=4 expandtab shiftwidth=4