1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 __version__ = "0.4.7" 23 24 from libxml2dom.macrolib import * 25 from libxml2dom.macrolib import \ 26 createDocument as Node_createDocument, \ 27 parseString as Node_parseString, parseURI as Node_parseURI, \ 28 parseFile as Node_parseFile, \ 29 toString as Node_toString, toStream as Node_toStream, \ 30 toFile as Node_toFile 31 import urllib # for parseURI in HTML mode 32 import libxml2dom.errors 33 34 # Standard namespaces. 35 36 XML_NAMESPACE = xml.dom.XML_NAMESPACE 37 38 # Default namespace bindings for XPath. 39 40 default_ns = { 41 "xml" : XML_NAMESPACE 42 } 43 44 class Implementation(object): 45 46 "Contains an abstraction over the DOM implementation." 47 48 def createDocumentType(self, localName, publicId, systemId): 49 return DocumentType(localName, publicId, systemId) 50 51 def createDocument(self, namespaceURI, localName, doctype): 52 return Document(Node_createDocument(namespaceURI, localName, doctype), self) 53 54 # Wrapping of documents. 55 56 def adoptDocument(self, node): 57 return Document(node, self) 58 59 # Factory functions. 60 61 def get_node(self, _node, context_node): 62 63 # Return the existing document. 64 65 if Node_nodeType(_node) == context_node.DOCUMENT_NODE: 66 return context_node.ownerDocument 67 68 # Return an attribute using the parent of the attribute as the owner 69 # element. 70 71 elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE: 72 return Attribute(_node, self, context_node.ownerDocument, 73 self.get_node(Node_parentNode(_node), context_node)) 74 75 # Return other nodes. 76 77 else: 78 return Node(_node, self, context_node.ownerDocument) 79 80 def get_node_or_none(self, _node, context_node): 81 if _node is None: 82 return None 83 else: 84 return self.get_node(_node, context_node) 85 86 # Attribute and node list wrappers. 87 88 class NamedNodeMap(object): 89 90 """ 91 A wrapper around Node objects providing DOM and dictionary convenience 92 methods. 93 """ 94 95 def __init__(self, node, impl): 96 self.node = node 97 self.impl = impl 98 99 def getNamedItem(self, name): 100 return self.node.getAttributeNode(name) 101 102 def getNamedItemNS(self, ns, localName): 103 return self.node.getAttributeNodeNS(ns, localName) 104 105 def setNamedItem(self, node): 106 try: 107 old = self.getNamedItem(node.nodeName) 108 except KeyError: 109 old = None 110 self.node.setAttributeNode(node) 111 return old 112 113 def setNamedItemNS(self, node): 114 try: 115 old = self.getNamedItemNS(node.namespaceURI, node.localName) 116 except KeyError: 117 old = None 118 self.node.setAttributeNodeNS(node) 119 return old 120 121 def removeNamedItem(self, name): 122 try: 123 old = self.getNamedItem(name) 124 except KeyError: 125 old = None 126 self.node.removeAttribute(name) 127 return old 128 129 def removeNamedItemNS(self, ns, localName): 130 try: 131 old = self.getNamedItemNS(ns, localName) 132 except KeyError: 133 old = None 134 self.node.removeAttributeNS(ns, localName) 135 return old 136 137 # Iterator emulation. 138 139 def __iter__(self): 140 return NamedNodeMapIterator(self) 141 142 # Dictionary emulation methods. 143 144 def __getitem__(self, name): 145 return self.getNamedItem(name) 146 147 def __setitem__(self, name, node): 148 if name == node.nodeName: 149 self.setNamedItem(node) 150 else: 151 raise KeyError, name 152 153 def __delitem__(self, name): 154 # NOTE: To be implemented. 155 pass 156 157 def values(self): 158 return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()] 159 160 def keys(self): 161 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 162 163 def items(self): 164 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 165 166 def __repr__(self): 167 return str(self) 168 169 def __str__(self): 170 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 171 172 def _length(self): 173 return len(self.values()) 174 175 length = property(_length) 176 177 class NamedNodeMapIterator(object): 178 179 "An iterator over a NamedNodeMap." 180 181 def __init__(self, nodemap): 182 self.nodemap = nodemap 183 self.items = self.nodemap.items() 184 185 def next(self): 186 if self.items: 187 current = self.items[0][1] 188 self.items = self.items[1:] 189 return current 190 else: 191 raise StopIteration 192 193 class NodeList(list): 194 195 "A wrapper around node lists." 196 197 def item(self, index): 198 return self[index] 199 200 def _length(self): 201 return len(self) 202 203 length = property(_length) 204 205 # Node classes. 206 207 class Node(object): 208 209 """ 210 A DOM-style wrapper around libxml2mod objects. 211 """ 212 213 ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE 214 COMMENT_NODE = xml.dom.Node.COMMENT_NODE 215 DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE 216 DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE 217 ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE 218 ENTITY_NODE = xml.dom.Node.ENTITY_NODE 219 ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE 220 NOTATION_NODE = xml.dom.Node.NOTATION_NODE 221 PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE 222 TEXT_NODE = xml.dom.Node.TEXT_NODE 223 224 def __init__(self, node, impl=None, ownerDocument=None): 225 self._node = node 226 self.impl = impl or default_impl 227 self.ownerDocument = ownerDocument 228 229 def as_native_node(self): 230 return self._node 231 232 def _nodeType(self): 233 return Node_nodeType(self._node) 234 235 def _childNodes(self): 236 237 # NOTE: Consider a generator instead. 238 239 return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)]) 240 241 def _firstChild(self): 242 return (self.childNodes or [None])[0] 243 244 def _lastChild(self): 245 return (self.childNodes or [None])[-1] 246 247 def _attributes(self): 248 return NamedNodeMap(self, self.impl) 249 250 def _namespaceURI(self): 251 return Node_namespaceURI(self._node) 252 253 def _textContent(self): 254 return Node_textContent(self._node) 255 256 def _nodeValue(self): 257 if self.nodeType in null_value_node_types: 258 return None 259 return Node_nodeValue(self._node) 260 261 def _setNodeValue(self, value): 262 Node_setNodeValue(self._node, value) 263 264 def _prefix(self): 265 return Node_prefix(self._node) 266 267 def _nodeName(self): 268 return Node_nodeName(self._node) 269 270 def _tagName(self): 271 return Node_tagName(self._node) 272 273 def _localName(self): 274 return Node_localName(self._node) 275 276 def _parentNode(self): 277 return self.impl.get_node_or_none(Node_parentNode(self._node), self) 278 279 def _previousSibling(self): 280 return self.impl.get_node_or_none(Node_previousSibling(self._node), self) 281 282 def _nextSibling(self): 283 return self.impl.get_node_or_none(Node_nextSibling(self._node), self) 284 285 def _doctype(self): 286 _doctype = Node_doctype(self._node) 287 if _doctype is not None: 288 return self.impl.get_node(_doctype, self) 289 else: 290 return None 291 292 def _publicId(self): 293 # NOTE: To be fixed when the libxml2mod API has been figured out. 294 if self.nodeType != self.DOCUMENT_TYPE_NODE: 295 return None 296 declaration = self.toString() 297 return self._findId(declaration, "PUBLIC") 298 299 def _systemId(self): 300 # NOTE: To be fixed when the libxml2mod API has been figured out. 301 if self.nodeType != self.DOCUMENT_TYPE_NODE: 302 return None 303 declaration = self.toString() 304 if self._findId(declaration, "PUBLIC"): 305 return self._findIdValue(declaration, 0) 306 return self._findId(declaration, "SYSTEM") 307 308 # NOTE: To be removed when the libxml2mod API has been figured out. 309 310 def _findId(self, declaration, identifier): 311 i = declaration.find(identifier) 312 if i == -1: 313 return None 314 return self._findIdValue(declaration, i) 315 316 def _findIdValue(self, declaration, i): 317 q = declaration.find('"', i) 318 if q == -1: 319 return None 320 q2 = declaration.find('"', q + 1) 321 if q2 == -1: 322 return None 323 return declaration[q+1:q2] 324 325 def hasAttributeNS(self, ns, localName): 326 return Node_hasAttributeNS(self._node, ns, localName) 327 328 def hasAttribute(self, name): 329 return Node_hasAttribute(self._node, name) 330 331 def getAttributeNS(self, ns, localName): 332 return Node_getAttributeNS(self._node, ns, localName) 333 334 def getAttribute(self, name): 335 return Node_getAttribute(self._node, name) 336 337 def getAttributeNodeNS(self, ns, localName): 338 return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self) 339 340 def getAttributeNode(self, localName): 341 return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self) 342 343 def setAttributeNS(self, ns, name, value): 344 Node_setAttributeNS(self._node, ns, name, value) 345 346 def setAttribute(self, name, value): 347 Node_setAttribute(self._node, name, value) 348 349 def setAttributeNodeNS(self, node): 350 Node_setAttributeNodeNS(self._node, node._node) 351 352 def setAttributeNode(self, node): 353 Node_setAttributeNode(self._node, node._node) 354 355 def removeAttributeNS(self, ns, localName): 356 Node_removeAttributeNS(self._node, ns, localName) 357 358 def removeAttribute(self, name): 359 Node_removeAttribute(self._node, name) 360 361 def createElementNS(self, ns, name): 362 return self.impl.get_node(Node_createElementNS(self._node, ns, name), self) 363 364 def createElement(self, name): 365 return self.impl.get_node(Node_createElement(self._node, name), self) 366 367 def createAttributeNS(self, ns, name): 368 tmp = self.createElement("tmp") 369 return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name)) 370 371 def createAttribute(self, name): 372 tmp = self.createElement("tmp") 373 return Attribute(Node_createAttribute(tmp._node, name), self.impl) 374 375 def createTextNode(self, value): 376 return self.impl.get_node(Node_createTextNode(self._node, value), self) 377 378 def createComment(self, value): 379 return self.impl.get_node(Node_createComment(self._node, value), self) 380 381 def createCDATASection(self, value): 382 return self.impl.get_node(Node_createCDATASection(self._node, value), self) 383 384 def importNode(self, node, deep): 385 if hasattr(node, "as_native_node"): 386 return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self) 387 else: 388 return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self) 389 390 def cloneNode(self, deep): 391 # This takes advantage of the ubiquity of importNode (in spite of the DOM specification). 392 return self.importNode(self, deep) 393 394 def insertBefore(self, tmp, oldNode): 395 if tmp.ownerDocument != self.ownerDocument: 396 raise xml.dom.WrongDocumentErr() 397 if oldNode.parentNode != self: 398 raise xml.dom.NotFoundErr() 399 if hasattr(tmp, "as_native_node"): 400 return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) 401 else: 402 return self.impl.get_node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self) 403 404 def replaceChild(self, tmp, oldNode): 405 if tmp.ownerDocument != self.ownerDocument: 406 raise xml.dom.WrongDocumentErr() 407 if oldNode.parentNode != self: 408 raise xml.dom.NotFoundErr() 409 if hasattr(tmp, "as_native_node"): 410 return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) 411 else: 412 return self.impl.get_node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self) 413 414 def appendChild(self, tmp): 415 if tmp.ownerDocument != self.ownerDocument: 416 raise xml.dom.WrongDocumentErr() 417 if hasattr(tmp, "as_native_node"): 418 return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self) 419 else: 420 return self.impl.get_node(Node_appendChild(self._node, tmp), self) 421 422 def removeChild(self, tmp): 423 if hasattr(tmp, "as_native_node"): 424 Node_removeChild(self._node, tmp.as_native_node()) 425 else: 426 Node_removeChild(self._node, tmp) 427 return tmp 428 429 def getElementById(self, identifier): 430 _node = Node_getElementById(self.ownerDocument.as_native_node(), identifier) 431 if _node is None: 432 return None 433 else: 434 return self.impl.get_node(_node, self) 435 436 def getElementsByTagName(self, tagName): 437 return self.xpath(".//" + tagName) 438 439 def getElementsByTagNameNS(self, namespaceURI, localName): 440 return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI}) 441 442 def normalize(self): 443 text_nodes = [] 444 for node in self.childNodes: 445 if node.nodeType == node.TEXT_NODE: 446 text_nodes.append(node) 447 elif len(text_nodes) != 0: 448 self._normalize(text_nodes) 449 text_nodes = [] 450 if len(text_nodes) != 0: 451 self._normalize(text_nodes) 452 453 def _normalize(self, text_nodes): 454 texts = [] 455 for text_node in text_nodes[:-1]: 456 texts.append(text_node.nodeValue) 457 self.removeChild(text_node) 458 texts.append(text_nodes[-1].nodeValue) 459 self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1]) 460 461 childNodes = property(_childNodes) 462 firstChild = property(_firstChild) 463 lastChild = property(_lastChild) 464 value = data = nodeValue = property(_nodeValue, _setNodeValue) 465 textContent = property(_textContent) 466 name = nodeName = property(_nodeName) 467 tagName = property(_tagName) 468 namespaceURI = property(_namespaceURI) 469 prefix = property(_prefix) 470 localName = property(_localName) 471 parentNode = property(_parentNode) 472 nodeType = property(_nodeType) 473 attributes = property(_attributes) 474 previousSibling = property(_previousSibling) 475 nextSibling = property(_nextSibling) 476 doctype = property(_doctype) 477 publicId = property(_publicId) 478 systemId = property(_systemId) 479 480 # NOTE: To be fixed - these being doctype-specific values. 481 482 entities = {} 483 notations = {} 484 485 def isSameNode(self, other): 486 return self == other 487 488 def __hash__(self): 489 return hash(self.localName) 490 491 def __eq__(self, other): 492 return isinstance(other, Node) and Node_equals(self._node, other._node) 493 494 def __ne__(self, other): 495 return not (self == other) 496 497 # 4DOM extensions to the usual PyXML API. 498 # NOTE: To be finished. 499 500 def xpath(self, expr, variables=None, namespaces=None): 501 502 """ 503 Evaluate the given expression 'expr' using the optional 'variables' and 504 'namespaces' mappings. 505 """ 506 507 ns = {} 508 ns.update(default_ns) 509 ns.update(namespaces or {}) 510 result = Node_xpath(self._node, expr, variables, ns) 511 if isinstance(result, str): 512 return to_unicode(result) 513 elif hasattr(result, "__len__"): 514 return NodeList([self.impl.get_node(_node, self) for _node in result]) 515 else: 516 return result 517 518 # Other extensions to the usual PyXML API. 519 520 def xinclude(self): 521 522 """ 523 Process XInclude declarations within the document, returning the number 524 of substitutions performed (zero or more), raising an XIncludeException 525 otherwise. 526 """ 527 528 return Node_xinclude(self._node) 529 530 # Convenience methods. 531 532 def toString(self, encoding=None, prettyprint=0): 533 return toString(self, encoding, prettyprint) 534 535 def toStream(self, stream, encoding=None, prettyprint=0): 536 toStream(self, stream, encoding, prettyprint) 537 538 def toFile(self, f, encoding=None, prettyprint=0): 539 toFile(self, f, encoding, prettyprint) 540 541 # Attribute nodes. 542 543 class Attribute(Node): 544 545 "A class providing attribute access." 546 547 def __init__(self, node, impl, ownerDocument=None, ownerElement=None): 548 Node.__init__(self, node, impl, ownerDocument) 549 self.ownerElement = ownerElement 550 551 def _parentNode(self): 552 return self.ownerElement 553 554 parentNode = property(_parentNode) 555 556 # Document housekeeping mechanisms. 557 558 class _Document: 559 560 """ 561 An abstract class providing document-level housekeeping and distinct 562 functionality. Configuration of the document is also supported. 563 See: http://www.w3.org/TR/DOM-Level-3-Core/core.html#DOMConfiguration 564 """ 565 566 # Constants from 567 # See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-NodeEditVAL 568 569 VAL_TRUE = 5 570 VAL_FALSE = 6 571 VAL_UNKNOWN = 7 572 573 def __init__(self, node, impl): 574 self._node = node 575 self.implementation = self.impl = impl 576 self.error_handler = libxml2dom.errors.DOMErrorHandler() 577 578 # Standard DOM properties and their implementations. 579 580 def _documentElement(self): 581 return self.xpath("*")[0] 582 583 def _ownerDocument(self): 584 return self 585 586 def __del__(self): 587 #print "Freeing document", self._node 588 libxml2mod.xmlFreeDoc(self._node) 589 590 documentElement = property(_documentElement) 591 ownerDocument = property(_ownerDocument) 592 593 # DOM Level 3 Core DOMConfiguration methods. 594 595 def setParameter(self, name, value): 596 if name == "error-handler": 597 raise xml.dom.NotSupportedErr() 598 raise xml.dom.NotFoundErr() 599 600 def getParameter(self, name): 601 if name == "error-handler": 602 return self.error_handler 603 raise xml.dom.NotFoundErr() 604 605 def canSetParameter(self, name, value): 606 return 0 607 608 def _parameterNames(self): 609 return [] 610 611 # Extensions to the usual PyXML API. 612 613 def validate(self, doc): 614 615 """ 616 Validate the document against the given schema document, 'doc'. 617 """ 618 619 validation_ns = doc.documentElement.namespaceURI 620 621 if hasattr(doc, "as_native_node"): 622 _schema = Document_schema(doc.as_native_node(), validation_ns) 623 else: 624 _schema = Document_schemaFromString(doc.toString(), validation_ns) 625 try: 626 self.error_handler.reset() 627 return Document_validate(_schema, self._node, self.error_handler, validation_ns) 628 finally: 629 Schema_free(_schema, validation_ns) 630 631 # DOM Level 3 Validation methods. 632 633 def validateDocument(self, doc): 634 635 """ 636 Validate the document against the given schema document, 'doc'. 637 See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-DocumentEditVAL-validateDocument 638 """ 639 640 return self.validate(doc) and self.VAL_TRUE or self.VAL_FALSE 641 642 class Document(_Document, Node): 643 644 """ 645 A generic document class. Specialised document classes should inherit from 646 the _Document class and their own variation of Node. 647 """ 648 649 pass 650 651 class DocumentType(object): 652 653 "A class providing a container for document type information." 654 655 def __init__(self, localName, publicId, systemId): 656 self.name = self.localName = localName 657 self.publicId = publicId 658 self.systemId = systemId 659 660 # NOTE: Nothing is currently provided to support the following 661 # NOTE: attributes. 662 663 self.entities = {} 664 self.notations = {} 665 666 # Constants. 667 668 null_value_node_types = [ 669 Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE, 670 Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE 671 ] 672 673 # Utility functions. 674 675 def createDocumentType(localName, publicId, systemId): 676 return default_impl.createDocumentType(localName, publicId, systemId) 677 678 def createDocument(namespaceURI, localName, doctype): 679 return default_impl.createDocument(namespaceURI, localName, doctype) 680 681 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 682 683 """ 684 Parse the given 'stream_or_string', where the supplied object can either be 685 a stream (such as a file or stream object), or a string (containing the 686 filename of a document). The optional parameters described below should be 687 provided as keyword arguments. 688 689 If the optional 'html' parameter is set to a true value, the content to be 690 parsed will be treated as being HTML rather than XML. If the optional 691 'htmlencoding' is specified, HTML parsing will be performed with the 692 document encoding assumed to that specified. 693 694 If the optional 'unfinished' parameter is set to a true value, unfinished 695 documents will be parsed, even though such documents may be missing content 696 such as closing tags. 697 698 If the optional 'validate' parameter is set to a true value, an attempt will 699 be made to validate the parsed document. 700 701 If the optional 'remote' parameter is set to a true value, references to 702 remote documents (such as DTDs) will be followed in order to obtain such 703 documents. 704 705 A document object is returned by this function. 706 """ 707 708 impl = impl or default_impl 709 710 if hasattr(stream_or_string, "read"): 711 stream = stream_or_string 712 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, 713 unfinished=unfinished, validate=validate, remote=remote, impl=impl) 714 else: 715 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, 716 unfinished=unfinished, validate=validate, remote=remote, impl=impl) 717 718 def parseFile(filename, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 719 720 """ 721 Parse the file having the given 'filename'. The optional parameters 722 described below should be provided as keyword arguments. 723 724 If the optional 'html' parameter is set to a true value, the content to be 725 parsed will be treated as being HTML rather than XML. If the optional 726 'htmlencoding' is specified, HTML parsing will be performed with the 727 document encoding assumed to that specified. 728 729 If the optional 'unfinished' parameter is set to a true value, unfinished 730 documents will be parsed, even though such documents may be missing content 731 such as closing tags. 732 733 If the optional 'validate' parameter is set to a true value, an attempt will 734 be made to validate the parsed document. 735 736 If the optional 'remote' parameter is set to a true value, references to 737 remote documents (such as DTDs) will be followed in order to obtain such 738 documents. 739 740 A document object is returned by this function. 741 """ 742 743 impl = impl or default_impl 744 return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding, 745 unfinished=unfinished, validate=validate, remote=remote)) 746 747 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 748 749 """ 750 Parse the content of the given string 's'. The optional parameters described 751 below should be provided as keyword arguments. 752 753 If the optional 'html' parameter is set to a true value, the content to be 754 parsed will be treated as being HTML rather than XML. If the optional 755 'htmlencoding' is specified, HTML parsing will be performed with the 756 document encoding assumed to that specified. 757 758 If the optional 'unfinished' parameter is set to a true value, unfinished 759 documents will be parsed, even though such documents may be missing content 760 such as closing tags. 761 762 If the optional 'validate' parameter is set to a true value, an attempt will 763 be made to validate the parsed document. 764 765 If the optional 'remote' parameter is set to a true value, references to 766 remote documents (such as DTDs) will be followed in order to obtain such 767 documents. 768 769 A document object is returned by this function. 770 """ 771 772 impl = impl or default_impl 773 return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding, 774 unfinished=unfinished, validate=validate, remote=remote)) 775 776 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 777 778 """ 779 Parse the content found at the given 'uri'. The optional parameters 780 described below should be provided as keyword arguments. 781 782 If the optional 'html' parameter is set to a true value, the content to be 783 parsed will be treated as being HTML rather than XML. If the optional 784 'htmlencoding' is specified, HTML parsing will be performed with the 785 document encoding assumed to that specified. 786 787 If the optional 'unfinished' parameter is set to a true value, unfinished 788 documents will be parsed, even though such documents may be missing content 789 such as closing tags. 790 791 If the optional 'validate' parameter is set to a true value, an attempt will 792 be made to validate the parsed document. 793 794 If the optional 'remote' parameter is set to a true value, references to 795 remote documents (such as DTDs) will be followed in order to obtain such 796 documents. 797 798 XML documents are retrieved using libxml2's own network capabilities; HTML 799 documents are retrieved using the urllib module provided by Python. To 800 retrieve either kind of document using Python's own modules for this purpose 801 (such as urllib), open a stream and pass it to the parse function: 802 803 f = urllib.urlopen(uri) 804 try: 805 doc = libxml2dom.parse(f, html) 806 finally: 807 f.close() 808 809 A document object is returned by this function. 810 """ 811 812 if html: 813 f = urllib.urlopen(uri) 814 try: 815 return parse(f, html=html, htmlencoding=htmlencoding, unfinished=unfinished, 816 validate=validate, remote=remote, impl=impl) 817 finally: 818 f.close() 819 else: 820 impl = impl or default_impl 821 return impl.adoptDocument(Node_parseURI(uri, html=html, htmlencoding=htmlencoding, 822 unfinished=unfinished, validate=validate, remote=remote)) 823 824 def toString(node, encoding=None, prettyprint=0): 825 826 """ 827 Return a string containing the serialised form of the given 'node' and its 828 children. The optional 'encoding' can be used to override the default 829 character encoding used in the serialisation. The optional 'prettyprint' 830 indicates whether the serialised form is prettyprinted or not (the default 831 setting). 832 """ 833 834 return Node_toString(node.as_native_node(), encoding, prettyprint) 835 836 def toStream(node, stream, encoding=None, prettyprint=0): 837 838 """ 839 Write the serialised form of the given 'node' and its children to the given 840 'stream'. The optional 'encoding' can be used to override the default 841 character encoding used in the serialisation. The optional 'prettyprint' 842 indicates whether the serialised form is prettyprinted or not (the default 843 setting). 844 """ 845 846 Node_toStream(node.as_native_node(), stream, encoding, prettyprint) 847 848 def toFile(node, filename, encoding=None, prettyprint=0): 849 850 """ 851 Write the serialised form of the given 'node' and its children to a file 852 having the given 'filename'. The optional 'encoding' can be used to override 853 the default character encoding used in the serialisation. The optional 854 'prettyprint' indicates whether the serialised form is prettyprinted or not 855 (the default setting). 856 """ 857 858 Node_toFile(node.as_native_node(), filename, encoding, prettyprint) 859 860 def adoptNodes(nodes, impl=None): 861 862 """ 863 A special utility method which adopts the given low-level 'nodes' and which 864 returns a list of high-level equivalents. This is currently experimental and 865 should not be casually used. 866 """ 867 868 impl = impl or default_impl 869 870 if len(nodes) == 0: 871 return [] 872 doc = impl.adoptDocument(libxml2mod.doc(nodes[0])) 873 results = [] 874 for node in nodes: 875 results.append(Node(node, impl, doc)) 876 return results 877 878 def getDOMImplementation(): 879 880 "Return the default DOM implementation." 881 882 return default_impl 883 884 # Single instance of the implementation. 885 886 default_impl = Implementation() 887 888 # vim: tabstop=4 expandtab shiftwidth=4