1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 __version__ = "0.4.7" 23 24 from libxml2dom.macrolib import * 25 from libxml2dom.macrolib import \ 26 createDocument as Node_createDocument, \ 27 parseString as Node_parseString, parseURI as Node_parseURI, \ 28 parseFile as Node_parseFile, \ 29 toString as Node_toString, toStream as Node_toStream, \ 30 toFile as Node_toFile 31 import urllib # for parseURI in HTML mode 32 33 # Standard namespaces. 34 35 XML_NAMESPACE = xml.dom.XML_NAMESPACE 36 37 # Default namespace bindings for XPath. 38 39 default_ns = { 40 "xml" : XML_NAMESPACE 41 } 42 43 class Implementation(object): 44 45 "Contains an abstraction over the DOM implementation." 46 47 def createDocumentType(self, localName, publicId, systemId): 48 return DocumentType(localName, publicId, systemId) 49 50 def createDocument(self, namespaceURI, localName, doctype): 51 return Document(Node_createDocument(namespaceURI, localName, doctype), self) 52 53 # Wrapping of documents. 54 55 def adoptDocument(self, node): 56 return Document(node, self) 57 58 # Factory functions. 59 60 def get_node(self, _node, context_node): 61 62 # Return the existing document. 63 64 if Node_nodeType(_node) == context_node.DOCUMENT_NODE: 65 return context_node.ownerDocument 66 67 # Return an attribute using the parent of the attribute as the owner 68 # element. 69 70 elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE: 71 return Attribute(_node, self, context_node.ownerDocument, 72 self.get_node(Node_parentNode(_node), context_node)) 73 74 # Return other nodes. 75 76 else: 77 return Node(_node, self, context_node.ownerDocument) 78 79 def get_node_or_none(self, _node, context_node): 80 if _node is None: 81 return None 82 else: 83 return self.get_node(_node, context_node) 84 85 # Attribute and node list wrappers. 86 87 class NamedNodeMap(object): 88 89 """ 90 A wrapper around Node objects providing DOM and dictionary convenience 91 methods. 92 """ 93 94 def __init__(self, node, impl): 95 self.node = node 96 self.impl = impl 97 98 def getNamedItem(self, name): 99 return self.node.getAttributeNode(name) 100 101 def getNamedItemNS(self, ns, localName): 102 return self.node.getAttributeNodeNS(ns, localName) 103 104 def setNamedItem(self, node): 105 try: 106 old = self.getNamedItem(node.nodeName) 107 except KeyError: 108 old = None 109 self.node.setAttributeNode(node) 110 return old 111 112 def setNamedItemNS(self, node): 113 try: 114 old = self.getNamedItemNS(node.namespaceURI, node.localName) 115 except KeyError: 116 old = None 117 self.node.setAttributeNodeNS(node) 118 return old 119 120 def removeNamedItem(self, name): 121 try: 122 old = self.getNamedItem(name) 123 except KeyError: 124 old = None 125 self.node.removeAttribute(name) 126 return old 127 128 def removeNamedItemNS(self, ns, localName): 129 try: 130 old = self.getNamedItemNS(ns, localName) 131 except KeyError: 132 old = None 133 self.node.removeAttributeNS(ns, localName) 134 return old 135 136 # Iterator emulation. 137 138 def __iter__(self): 139 return NamedNodeMapIterator(self) 140 141 # Dictionary emulation methods. 142 143 def __getitem__(self, name): 144 return self.getNamedItem(name) 145 146 def __setitem__(self, name, node): 147 if name == node.nodeName: 148 self.setNamedItem(node) 149 else: 150 raise KeyError, name 151 152 def __delitem__(self, name): 153 # NOTE: To be implemented. 154 pass 155 156 def values(self): 157 return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()] 158 159 def keys(self): 160 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 161 162 def items(self): 163 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 164 165 def __repr__(self): 166 return str(self) 167 168 def __str__(self): 169 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 170 171 def _length(self): 172 return len(self.values()) 173 174 length = property(_length) 175 176 class NamedNodeMapIterator(object): 177 178 "An iterator over a NamedNodeMap." 179 180 def __init__(self, nodemap): 181 self.nodemap = nodemap 182 self.items = self.nodemap.items() 183 184 def next(self): 185 if self.items: 186 current = self.items[0][1] 187 self.items = self.items[1:] 188 return current 189 else: 190 raise StopIteration 191 192 class NodeList(list): 193 194 "A wrapper around node lists." 195 196 def item(self, index): 197 return self[index] 198 199 def _length(self): 200 return len(self) 201 202 length = property(_length) 203 204 # Node classes. 205 206 class Node(object): 207 208 """ 209 A DOM-style wrapper around libxml2mod objects. 210 """ 211 212 ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE 213 COMMENT_NODE = xml.dom.Node.COMMENT_NODE 214 DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE 215 DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE 216 ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE 217 ENTITY_NODE = xml.dom.Node.ENTITY_NODE 218 ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE 219 NOTATION_NODE = xml.dom.Node.NOTATION_NODE 220 PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE 221 TEXT_NODE = xml.dom.Node.TEXT_NODE 222 223 def __init__(self, node, impl=None, ownerDocument=None): 224 self._node = node 225 self.impl = impl or default_impl 226 self.ownerDocument = ownerDocument 227 228 def as_native_node(self): 229 return self._node 230 231 def _nodeType(self): 232 return Node_nodeType(self._node) 233 234 def _childNodes(self): 235 236 # NOTE: Consider a generator instead. 237 238 return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)]) 239 240 def _firstChild(self): 241 return (self.childNodes or [None])[0] 242 243 def _lastChild(self): 244 return (self.childNodes or [None])[-1] 245 246 def _attributes(self): 247 return NamedNodeMap(self, self.impl) 248 249 def _namespaceURI(self): 250 return Node_namespaceURI(self._node) 251 252 def _textContent(self): 253 return Node_textContent(self._node) 254 255 def _nodeValue(self): 256 if self.nodeType in null_value_node_types: 257 return None 258 return Node_nodeValue(self._node) 259 260 def _setNodeValue(self, value): 261 Node_setNodeValue(self._node, value) 262 263 def _prefix(self): 264 return Node_prefix(self._node) 265 266 def _nodeName(self): 267 return Node_nodeName(self._node) 268 269 def _tagName(self): 270 return Node_tagName(self._node) 271 272 def _localName(self): 273 return Node_localName(self._node) 274 275 def _parentNode(self): 276 return self.impl.get_node_or_none(Node_parentNode(self._node), self) 277 278 def _previousSibling(self): 279 return self.impl.get_node_or_none(Node_previousSibling(self._node), self) 280 281 def _nextSibling(self): 282 return self.impl.get_node_or_none(Node_nextSibling(self._node), self) 283 284 def _doctype(self): 285 _doctype = Node_doctype(self._node) 286 if _doctype is not None: 287 return self.impl.get_node(_doctype, self) 288 else: 289 return None 290 291 def _publicId(self): 292 # NOTE: To be fixed when the libxml2mod API has been figured out. 293 if self.nodeType != self.DOCUMENT_TYPE_NODE: 294 return None 295 declaration = self.toString() 296 return self._findId(declaration, "PUBLIC") 297 298 def _systemId(self): 299 # NOTE: To be fixed when the libxml2mod API has been figured out. 300 if self.nodeType != self.DOCUMENT_TYPE_NODE: 301 return None 302 declaration = self.toString() 303 if self._findId(declaration, "PUBLIC"): 304 return self._findIdValue(declaration, 0) 305 return self._findId(declaration, "SYSTEM") 306 307 # NOTE: To be removed when the libxml2mod API has been figured out. 308 309 def _findId(self, declaration, identifier): 310 i = declaration.find(identifier) 311 if i == -1: 312 return None 313 return self._findIdValue(declaration, i) 314 315 def _findIdValue(self, declaration, i): 316 q = declaration.find('"', i) 317 if q == -1: 318 return None 319 q2 = declaration.find('"', q + 1) 320 if q2 == -1: 321 return None 322 return declaration[q+1:q2] 323 324 def hasAttributeNS(self, ns, localName): 325 return Node_hasAttributeNS(self._node, ns, localName) 326 327 def hasAttribute(self, name): 328 return Node_hasAttribute(self._node, name) 329 330 def getAttributeNS(self, ns, localName): 331 return Node_getAttributeNS(self._node, ns, localName) 332 333 def getAttribute(self, name): 334 return Node_getAttribute(self._node, name) 335 336 def getAttributeNodeNS(self, ns, localName): 337 return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self) 338 339 def getAttributeNode(self, localName): 340 return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self) 341 342 def setAttributeNS(self, ns, name, value): 343 Node_setAttributeNS(self._node, ns, name, value) 344 345 def setAttribute(self, name, value): 346 Node_setAttribute(self._node, name, value) 347 348 def setAttributeNodeNS(self, node): 349 Node_setAttributeNodeNS(self._node, node._node) 350 351 def setAttributeNode(self, node): 352 Node_setAttributeNode(self._node, node._node) 353 354 def removeAttributeNS(self, ns, localName): 355 Node_removeAttributeNS(self._node, ns, localName) 356 357 def removeAttribute(self, name): 358 Node_removeAttribute(self._node, name) 359 360 def createElementNS(self, ns, name): 361 return self.impl.get_node(Node_createElementNS(self._node, ns, name), self) 362 363 def createElement(self, name): 364 return self.impl.get_node(Node_createElement(self._node, name), self) 365 366 def createAttributeNS(self, ns, name): 367 tmp = self.createElement("tmp") 368 return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name)) 369 370 def createAttribute(self, name): 371 tmp = self.createElement("tmp") 372 return Attribute(Node_createAttribute(tmp._node, name), self.impl) 373 374 def createTextNode(self, value): 375 return self.impl.get_node(Node_createTextNode(self._node, value), self) 376 377 def createComment(self, value): 378 return self.impl.get_node(Node_createComment(self._node, value), self) 379 380 def createCDATASection(self, value): 381 return self.impl.get_node(Node_createCDATASection(self._node, value), self) 382 383 def importNode(self, node, deep): 384 if hasattr(node, "as_native_node"): 385 return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self) 386 else: 387 return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self) 388 389 def cloneNode(self, deep): 390 # This takes advantage of the ubiquity of importNode (in spite of the DOM specification). 391 return self.importNode(self, deep) 392 393 def insertBefore(self, tmp, oldNode): 394 if tmp.ownerDocument != self.ownerDocument: 395 raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR) 396 if oldNode.parentNode != self: 397 raise xml.dom.DOMException(xml.dom.NOT_FOUND_ERR) 398 if hasattr(tmp, "as_native_node"): 399 return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) 400 else: 401 return self.impl.get_node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self) 402 403 def replaceChild(self, tmp, oldNode): 404 if tmp.ownerDocument != self.ownerDocument: 405 raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR) 406 if oldNode.parentNode != self: 407 raise xml.dom.DOMException(xml.dom.NOT_FOUND_ERR) 408 if hasattr(tmp, "as_native_node"): 409 return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) 410 else: 411 return self.impl.get_node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self) 412 413 def appendChild(self, tmp): 414 if tmp.ownerDocument != self.ownerDocument: 415 raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR) 416 if hasattr(tmp, "as_native_node"): 417 return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self) 418 else: 419 return self.impl.get_node(Node_appendChild(self._node, tmp), self) 420 421 def removeChild(self, tmp): 422 if hasattr(tmp, "as_native_node"): 423 Node_removeChild(self._node, tmp.as_native_node()) 424 else: 425 Node_removeChild(self._node, tmp) 426 return tmp 427 428 def getElementById(self, identifier): 429 _node = Node_getElementById(self.ownerDocument.as_native_node(), identifier) 430 if _node is None: 431 return None 432 else: 433 return self.impl.get_node(_node, self) 434 435 def getElementsByTagName(self, tagName): 436 return self.xpath(".//" + tagName) 437 438 def getElementsByTagNameNS(self, namespaceURI, localName): 439 return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI}) 440 441 def normalize(self): 442 text_nodes = [] 443 for node in self.childNodes: 444 if node.nodeType == node.TEXT_NODE: 445 text_nodes.append(node) 446 elif len(text_nodes) != 0: 447 self._normalize(text_nodes) 448 text_nodes = [] 449 if len(text_nodes) != 0: 450 self._normalize(text_nodes) 451 452 def _normalize(self, text_nodes): 453 texts = [] 454 for text_node in text_nodes[:-1]: 455 texts.append(text_node.nodeValue) 456 self.removeChild(text_node) 457 texts.append(text_nodes[-1].nodeValue) 458 self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1]) 459 460 childNodes = property(_childNodes) 461 firstChild = property(_firstChild) 462 lastChild = property(_lastChild) 463 value = data = nodeValue = property(_nodeValue, _setNodeValue) 464 textContent = property(_textContent) 465 name = nodeName = property(_nodeName) 466 tagName = property(_tagName) 467 namespaceURI = property(_namespaceURI) 468 prefix = property(_prefix) 469 localName = property(_localName) 470 parentNode = property(_parentNode) 471 nodeType = property(_nodeType) 472 attributes = property(_attributes) 473 previousSibling = property(_previousSibling) 474 nextSibling = property(_nextSibling) 475 doctype = property(_doctype) 476 publicId = property(_publicId) 477 systemId = property(_systemId) 478 479 # NOTE: To be fixed - these being doctype-specific values. 480 481 entities = {} 482 notations = {} 483 484 def isSameNode(self, other): 485 return self == other 486 487 def __hash__(self): 488 return hash(self.localName) 489 490 def __eq__(self, other): 491 return isinstance(other, Node) and Node_equals(self._node, other._node) 492 493 def __ne__(self, other): 494 return not (self == other) 495 496 # 4DOM extensions to the usual PyXML API. 497 # NOTE: To be finished. 498 499 def xpath(self, expr, variables=None, namespaces=None): 500 501 """ 502 Evaluate the given expression 'expr' using the optional 'variables' and 503 'namespaces' mappings. 504 """ 505 506 ns = {} 507 ns.update(default_ns) 508 ns.update(namespaces or {}) 509 result = Node_xpath(self._node, expr, variables, ns) 510 if isinstance(result, str): 511 return to_unicode(result) 512 elif hasattr(result, "__len__"): 513 return NodeList([self.impl.get_node(_node, self) for _node in result]) 514 else: 515 return result 516 517 # Other extensions to the usual PyXML API. 518 519 def xinclude(self): 520 521 """ 522 Process XInclude declarations within the document, returning the number 523 of substitutions performed (zero or more), raising an XIncludeException 524 otherwise. 525 """ 526 527 return Node_xinclude(self._node) 528 529 # Convenience methods. 530 531 def toString(self, encoding=None, prettyprint=0): 532 return toString(self, encoding, prettyprint) 533 534 def toStream(self, stream, encoding=None, prettyprint=0): 535 toStream(self, stream, encoding, prettyprint) 536 537 def toFile(self, f, encoding=None, prettyprint=0): 538 toFile(self, f, encoding, prettyprint) 539 540 # Attribute nodes. 541 542 class Attribute(Node): 543 544 "A class providing attribute access." 545 546 def __init__(self, node, impl, ownerDocument=None, ownerElement=None): 547 Node.__init__(self, node, impl, ownerDocument) 548 self.ownerElement = ownerElement 549 550 def _parentNode(self): 551 return self.ownerElement 552 553 parentNode = property(_parentNode) 554 555 # Document housekeeping mechanisms. 556 557 class _Document: 558 559 """ 560 An abstract class providing document-level housekeeping and distinct 561 functionality. 562 """ 563 564 def __init__(self, node, impl): 565 self._node = node 566 self.implementation = self.impl = impl 567 568 def _documentElement(self): 569 return self.xpath("*")[0] 570 571 def _ownerDocument(self): 572 return self 573 574 def __del__(self): 575 #print "Freeing document", self._node 576 libxml2mod.xmlFreeDoc(self._node) 577 578 documentElement = property(_documentElement) 579 ownerDocument = property(_ownerDocument) 580 581 class Document(_Document, Node): 582 583 """ 584 A generic document class. Specialised document classes should inherit from 585 the _Document class and their own variation of Node. 586 """ 587 588 pass 589 590 class DocumentType(object): 591 592 "A class providing a container for document type information." 593 594 def __init__(self, localName, publicId, systemId): 595 self.name = self.localName = localName 596 self.publicId = publicId 597 self.systemId = systemId 598 599 # NOTE: Nothing is currently provided to support the following 600 # NOTE: attributes. 601 602 self.entities = {} 603 self.notations = {} 604 605 # Constants. 606 607 null_value_node_types = [ 608 Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE, 609 Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE 610 ] 611 612 # Utility functions. 613 614 def createDocumentType(localName, publicId, systemId): 615 return default_impl.createDocumentType(localName, publicId, systemId) 616 617 def createDocument(namespaceURI, localName, doctype): 618 return default_impl.createDocument(namespaceURI, localName, doctype) 619 620 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 621 622 """ 623 Parse the given 'stream_or_string', where the supplied object can either be 624 a stream (such as a file or stream object), or a string (containing the 625 filename of a document). The optional parameters described below should be 626 provided as keyword arguments. 627 628 If the optional 'html' parameter is set to a true value, the content to be 629 parsed will be treated as being HTML rather than XML. If the optional 630 'htmlencoding' is specified, HTML parsing will be performed with the 631 document encoding assumed to that specified. 632 633 If the optional 'unfinished' parameter is set to a true value, unfinished 634 documents will be parsed, even though such documents may be missing content 635 such as closing tags. 636 637 If the optional 'validate' parameter is set to a true value, an attempt will 638 be made to validate the parsed document. 639 640 If the optional 'remote' parameter is set to a true value, references to 641 remote documents (such as DTDs) will be followed in order to obtain such 642 documents. 643 644 A document object is returned by this function. 645 """ 646 647 impl = impl or default_impl 648 649 if hasattr(stream_or_string, "read"): 650 stream = stream_or_string 651 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, 652 unfinished=unfinished, validate=validate, remote=remote, impl=impl) 653 else: 654 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, 655 unfinished=unfinished, validate=validate, remote=remote, impl=impl) 656 657 def parseFile(filename, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 658 659 """ 660 Parse the file having the given 'filename'. The optional parameters 661 described below should be provided as keyword arguments. 662 663 If the optional 'html' parameter is set to a true value, the content to be 664 parsed will be treated as being HTML rather than XML. If the optional 665 'htmlencoding' is specified, HTML parsing will be performed with the 666 document encoding assumed to that specified. 667 668 If the optional 'unfinished' parameter is set to a true value, unfinished 669 documents will be parsed, even though such documents may be missing content 670 such as closing tags. 671 672 If the optional 'validate' parameter is set to a true value, an attempt will 673 be made to validate the parsed document. 674 675 If the optional 'remote' parameter is set to a true value, references to 676 remote documents (such as DTDs) will be followed in order to obtain such 677 documents. 678 679 A document object is returned by this function. 680 """ 681 682 impl = impl or default_impl 683 return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding, 684 unfinished=unfinished, validate=validate, remote=remote)) 685 686 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 687 688 """ 689 Parse the content of the given string 's'. The optional parameters described 690 below should be provided as keyword arguments. 691 692 If the optional 'html' parameter is set to a true value, the content to be 693 parsed will be treated as being HTML rather than XML. If the optional 694 'htmlencoding' is specified, HTML parsing will be performed with the 695 document encoding assumed to that specified. 696 697 If the optional 'unfinished' parameter is set to a true value, unfinished 698 documents will be parsed, even though such documents may be missing content 699 such as closing tags. 700 701 If the optional 'validate' parameter is set to a true value, an attempt will 702 be made to validate the parsed document. 703 704 If the optional 'remote' parameter is set to a true value, references to 705 remote documents (such as DTDs) will be followed in order to obtain such 706 documents. 707 708 A document object is returned by this function. 709 """ 710 711 impl = impl or default_impl 712 return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding, 713 unfinished=unfinished, validate=validate, remote=remote)) 714 715 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 716 717 """ 718 Parse the content found at the given 'uri'. The optional parameters 719 described below should be provided as keyword arguments. 720 721 If the optional 'html' parameter is set to a true value, the content to be 722 parsed will be treated as being HTML rather than XML. If the optional 723 'htmlencoding' is specified, HTML parsing will be performed with the 724 document encoding assumed to that specified. 725 726 If the optional 'unfinished' parameter is set to a true value, unfinished 727 documents will be parsed, even though such documents may be missing content 728 such as closing tags. 729 730 If the optional 'validate' parameter is set to a true value, an attempt will 731 be made to validate the parsed document. 732 733 If the optional 'remote' parameter is set to a true value, references to 734 remote documents (such as DTDs) will be followed in order to obtain such 735 documents. 736 737 XML documents are retrieved using libxml2's own network capabilities; HTML 738 documents are retrieved using the urllib module provided by Python. To 739 retrieve either kind of document using Python's own modules for this purpose 740 (such as urllib), open a stream and pass it to the parse function: 741 742 f = urllib.urlopen(uri) 743 try: 744 doc = libxml2dom.parse(f, html) 745 finally: 746 f.close() 747 748 A document object is returned by this function. 749 """ 750 751 if html: 752 f = urllib.urlopen(uri) 753 try: 754 return parse(f, html=html, htmlencoding=htmlencoding, unfinished=unfinished, 755 validate=validate, remote=remote, impl=impl) 756 finally: 757 f.close() 758 else: 759 impl = impl or default_impl 760 return impl.adoptDocument(Node_parseURI(uri, html=html, htmlencoding=htmlencoding, 761 unfinished=unfinished, validate=validate, remote=remote)) 762 763 def toString(node, encoding=None, prettyprint=0): 764 765 """ 766 Return a string containing the serialised form of the given 'node' and its 767 children. The optional 'encoding' can be used to override the default 768 character encoding used in the serialisation. The optional 'prettyprint' 769 indicates whether the serialised form is prettyprinted or not (the default 770 setting). 771 """ 772 773 return Node_toString(node.as_native_node(), encoding, prettyprint) 774 775 def toStream(node, stream, encoding=None, prettyprint=0): 776 777 """ 778 Write the serialised form of the given 'node' and its children to the given 779 'stream'. The optional 'encoding' can be used to override the default 780 character encoding used in the serialisation. The optional 'prettyprint' 781 indicates whether the serialised form is prettyprinted or not (the default 782 setting). 783 """ 784 785 Node_toStream(node.as_native_node(), stream, encoding, prettyprint) 786 787 def toFile(node, filename, encoding=None, prettyprint=0): 788 789 """ 790 Write the serialised form of the given 'node' and its children to a file 791 having the given 'filename'. The optional 'encoding' can be used to override 792 the default character encoding used in the serialisation. The optional 793 'prettyprint' indicates whether the serialised form is prettyprinted or not 794 (the default setting). 795 """ 796 797 Node_toFile(node.as_native_node(), filename, encoding, prettyprint) 798 799 def adoptNodes(nodes, impl=None): 800 801 """ 802 A special utility method which adopts the given low-level 'nodes' and which 803 returns a list of high-level equivalents. This is currently experimental and 804 should not be casually used. 805 """ 806 807 impl = impl or default_impl 808 809 if len(nodes) == 0: 810 return [] 811 doc = impl.adoptDocument(libxml2mod.doc(nodes[0])) 812 results = [] 813 for node in nodes: 814 results.append(Node(node, impl, doc)) 815 return results 816 817 def getDOMImplementation(): 818 819 "Return the default DOM implementation." 820 821 return default_impl 822 823 # Single instance of the implementation. 824 825 default_impl = Implementation() 826 827 # vim: tabstop=4 expandtab shiftwidth=4