1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module. 5 6 Copyright (C) 2003, 2004, 2005 Paul Boddie <paul@boddie.org.uk> 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Lesser General Public 10 License as published by the Free Software Foundation; either 11 version 2.1 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Lesser General Public License for more details. 17 18 You should have received a copy of the GNU Lesser General Public 19 License along with this library; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 21 """ 22 23 __version__ = "0.3.3" 24 25 from libxml2dom.macrolib import * 26 from libxml2dom.macrolib import \ 27 createDocument as Node_createDocument, \ 28 parseString as Node_parseString, parseURI as Node_parseURI, \ 29 parseFile as Node_parseFile, \ 30 toString as Node_toString, toStream as Node_toStream, \ 31 toFile as Node_toFile 32 33 # Attribute and node list wrappers. 34 35 class NamedNodeMap(object): 36 37 """ 38 A wrapper around Node objects providing DOM and dictionary convenience 39 methods. 40 """ 41 42 def __init__(self, node): 43 self.node = node 44 45 def getNamedItem(self, name): 46 return self.node.getAttributeNode(name) 47 48 def getNamedItemNS(self, ns, localName): 49 return self.node.getAttributeNodeNS(ns, localName) 50 51 def setNamedItem(self, node): 52 try: 53 old = self.getNamedItem(node.nodeName) 54 except KeyError: 55 old = None 56 self.node.setAttributeNode(node) 57 return old 58 59 def setNamedItemNS(self, node): 60 try: 61 old = self.getNamedItemNS(node.namespaceURI, node.localName) 62 except KeyError: 63 old = None 64 self.node.setAttributeNodeNS(node) 65 return old 66 67 def removeNamedItem(self, name): 68 try: 69 old = self.getNamedItem(name) 70 except KeyError: 71 old = None 72 self.node.removeAttribute(name) 73 return old 74 75 def removeNamedItemNS(self, ns, localName): 76 try: 77 old = self.getNamedItemNS(ns, localName) 78 except KeyError: 79 old = None 80 self.node.removeAttributeNS(ns, localName) 81 return old 82 83 # Dictionary emulation methods. 84 85 def __getitem__(self, name): 86 return self.getNamedItem(name) 87 88 def __setitem__(self, name, node): 89 if name == node.nodeName: 90 self.setNamedItem(node) 91 else: 92 raise KeyError, name 93 94 def __delitem__(self, name): 95 # NOTE: To be implemented. 96 pass 97 98 def values(self): 99 return [Attribute(_node, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()] 100 101 def keys(self): 102 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 103 104 def items(self): 105 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 106 107 def __repr__(self): 108 return str(self) 109 110 def __str__(self): 111 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 112 113 class NodeList(list): 114 115 "A wrapper around node lists." 116 117 def item(self, index): 118 return self[index] 119 120 def length(self): 121 return len(self) 122 123 # Node classes. 124 125 class Node(object): 126 127 """ 128 A DOM-style wrapper around libxml2mod objects. 129 """ 130 131 ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE 132 COMMENT_NODE = xml.dom.Node.COMMENT_NODE 133 DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE 134 DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE 135 ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE 136 ENTITY_NODE = xml.dom.Node.ENTITY_NODE 137 ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE 138 NOTATION_NODE = xml.dom.Node.NOTATION_NODE 139 PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE 140 TEXT_NODE = xml.dom.Node.TEXT_NODE 141 142 def __init__(self, node, ownerDocument=None): 143 self._node = node 144 self.ownerDocument = ownerDocument 145 146 def as_native_node(self): 147 return self._node 148 149 def _nodeType(self): 150 return Node_nodeType(self._node) 151 152 def _childNodes(self): 153 154 # NOTE: Consider a generator instead. 155 156 return NodeList([Node(_node, self.ownerDocument) for _node in Node_childNodes(self._node)]) 157 158 def _attributes(self): 159 return NamedNodeMap(self) 160 161 def _namespaceURI(self): 162 return Node_namespaceURI(self._node) 163 164 def _nodeValue(self): 165 return Node_nodeValue(self._node) 166 167 def _setNodeValue(self, value): 168 Node_setNodeValue(self._node, value) 169 170 def _prefix(self): 171 return Node_prefix(self._node) 172 173 def _nodeName(self): 174 return Node_nodeName(self._node) 175 176 def _tagName(self): 177 return Node_tagName(self._node) 178 179 def _localName(self): 180 return Node_localName(self._node) 181 182 def _parentNode(self): 183 return get_node(Node_parentNode(self._node), self) 184 185 def _previousSibling(self): 186 return Node(Node_previousSibling(self._node), self.ownerDocument) 187 188 def _nextSibling(self): 189 return Node(Node_nextSibling(self._node), self.ownerDocument) 190 191 def _doctype(self): 192 return Node(Node_doctype(self._node), self.ownerDocument) 193 194 def _publicId(self): 195 # NOTE: To be fixed when the libxml2mod API has been figured out. 196 if self.nodeType != self.DOCUMENT_TYPE_NODE: 197 return None 198 declaration = self.toString() 199 return self._findId(declaration, "PUBLIC") 200 201 def _systemId(self): 202 # NOTE: To be fixed when the libxml2mod API has been figured out. 203 if self.nodeType != self.DOCUMENT_TYPE_NODE: 204 return None 205 declaration = self.toString() 206 if self._findId(declaration, "PUBLIC"): 207 return self._findIdValue(declaration, 0) 208 return self._findId(declaration, "SYSTEM") 209 210 # NOTE: To be removed when the libxml2mod API has been figured out. 211 212 def _findId(self, declaration, identifier): 213 i = declaration.find(identifier) 214 if i == -1: 215 return None 216 return self._findIdValue(declaration, i) 217 218 def _findIdValue(self, declaration, i): 219 q = declaration.find('"', i) 220 if q == -1: 221 return None 222 q2 = declaration.find('"', q + 1) 223 if q2 == -1: 224 return None 225 return declaration[q+1:q2] 226 227 def hasAttributeNS(self, ns, localName): 228 return Node_hasAttributeNS(self._node, ns, localName) 229 230 def hasAttribute(self, name): 231 return Node_hasAttribute(self._node, name) 232 233 def getAttributeNS(self, ns, localName): 234 return Node_getAttributeNS(self._node, ns, localName) 235 236 def getAttribute(self, name): 237 return Node_getAttribute(self._node, name) 238 239 def getAttributeNodeNS(self, ns, localName): 240 return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.ownerDocument, self) 241 242 def getAttributeNode(self, localName): 243 return Attribute(Node_getAttributeNode(self._node, localName), self.ownerDocument, self) 244 245 def setAttributeNS(self, ns, name, value): 246 Node_setAttributeNS(self._node, ns, name, value) 247 248 def setAttribute(self, name, value): 249 Node_setAttribute(self._node, name, value) 250 251 def setAttributeNodeNS(self, node): 252 Node_setAttributeNodeNS(self._node, node._node) 253 254 def setAttributeNode(self, node): 255 Node_setAttributeNode(self._node, node._node) 256 257 def removeAttributeNS(self, ns, localName): 258 Node_removeAttributeNS(self._node, ns, localName) 259 260 def removeAttribute(self, name): 261 Node_removeAttribute(self._node, name) 262 263 def createElementNS(self, ns, name): 264 return Node(Node_createElementNS(self._node, ns, name), self.ownerDocument) 265 266 def createElement(self, name): 267 return Node(Node_createElement(self._node, name), self.ownerDocument) 268 269 def createAttributeNS(self, ns, name): 270 tmp = self.createElement("tmp") 271 return Attribute(Node_createAttributeNS(tmp._node, ns, name)) 272 273 def createAttribute(self, name): 274 tmp = self.createElement("tmp") 275 return Attribute(Node_createAttribute(tmp._node, name)) 276 277 def createTextNode(self, value): 278 return Node(Node_createTextNode(self._node, value), self.ownerDocument) 279 280 def createComment(self, value): 281 return Node(Node_createComment(self._node, value), self.ownerDocument) 282 283 def importNode(self, node, deep): 284 if hasattr(node, "as_native_node"): 285 return Node(Node_importNode(self._node, node.as_native_node(), deep), self.ownerDocument) 286 else: 287 return Node(Node_importNode_DOM(self._node, node, deep), self.ownerDocument) 288 289 def insertBefore(self, tmp, oldNode): 290 if hasattr(tmp, "as_native_node"): 291 return Node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument) 292 else: 293 return Node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self.ownerDocument) 294 295 def replaceChild(self, tmp, oldNode): 296 if hasattr(tmp, "as_native_node"): 297 return Node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument) 298 else: 299 return Node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self.ownerDocument) 300 301 def appendChild(self, tmp): 302 if hasattr(tmp, "as_native_node"): 303 return Node(Node_appendChild(self._node, tmp.as_native_node()), self.ownerDocument) 304 else: 305 return Node(Node_appendChild(self._node, tmp), self.ownerDocument) 306 307 def removeChild(self, tmp): 308 if hasattr(tmp, "as_native_node"): 309 Node_removeChild(self._node, tmp.as_native_node()) 310 else: 311 Node_removeChild(self._node, tmp) 312 313 def getElementsByTagName(self, tagName): 314 return self.xpath("//" + tagName) 315 316 def getElementsByTagNameNS(self, namespaceURI, localName): 317 return self.xpath("//ns:" + localName, namespaces={"ns" : namespaceURI}) 318 319 def normalize(self): 320 text_nodes = [] 321 for node in self.childNodes: 322 if node.nodeType == node.TEXT_NODE: 323 text_nodes.append(node) 324 elif len(text_nodes) != 0: 325 self._normalize(text_nodes) 326 text_nodes = [] 327 if len(text_nodes) != 0: 328 self._normalize(text_nodes) 329 330 def _normalize(self, text_nodes): 331 texts = [] 332 for text_node in text_nodes[:-1]: 333 texts.append(text_node.nodeValue) 334 self.removeChild(text_node) 335 texts.append(text_nodes[-1].nodeValue) 336 self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1]) 337 338 childNodes = property(_childNodes) 339 value = data = nodeValue = property(_nodeValue, _setNodeValue) 340 name = nodeName = property(_nodeName) 341 tagName = property(_tagName) 342 namespaceURI = property(_namespaceURI) 343 prefix = property(_prefix) 344 localName = property(_localName) 345 parentNode = property(_parentNode) 346 nodeType = property(_nodeType) 347 attributes = property(_attributes) 348 previousSibling = property(_previousSibling) 349 nextSibling = property(_nextSibling) 350 doctype = property(_doctype) 351 publicId = property(_publicId) 352 systemId = property(_systemId) 353 354 # NOTE: To be fixed - these being doctype-specific values. 355 356 entities = {} 357 notations = {} 358 359 #def isSameNode(self, other): 360 # return self._node.nodePath() == other._node.nodePath() 361 362 #def __eq__(self, other): 363 # return self._node.nodePath() == other._node.nodePath() 364 365 # 4DOM extensions to the usual PyXML API. 366 # NOTE: To be finished. 367 368 def xpath(self, expr, variables=None, namespaces=None): 369 result = Node_xpath(self._node, expr, variables, namespaces) 370 if hasattr(result, "__len__"): 371 return NodeList([get_node(_node, self) for _node in result]) 372 else: 373 return result 374 375 # Convenience methods. 376 377 def toString(self, encoding=None, prettyprint=0): 378 return toString(self, encoding, prettyprint) 379 380 def toStream(self, stream, encoding=None, prettyprint=0): 381 toStream(self, stream, encoding, prettyprint) 382 383 def toFile(self, f, encoding=None, prettyprint=0): 384 toFile(self, f, encoding, prettyprint) 385 386 # Attribute nodes. 387 388 class Attribute(Node): 389 390 "A class providing attribute access." 391 392 def __init__(self, node, ownerDocument=None, ownerElement=None): 393 Node.__init__(self, node, ownerDocument) 394 self.ownerElement = ownerElement 395 396 def _parentNode(self): 397 return self.ownerElement 398 399 parentNode = property(_parentNode) 400 401 # Document housekeeping mechanisms. 402 403 class Document(Node): 404 405 "A class providing document-level housekeeping." 406 407 def __init__(self, node): 408 self._node = node 409 410 def _ownerDocument(self): 411 return self 412 413 def _parentNode(self): 414 return None 415 416 def __del__(self): 417 #print "Freeing document", self._node 418 libxml2mod.xmlFreeDoc(self._node) 419 420 ownerDocument = property(_ownerDocument) 421 parentNode = property(_parentNode) 422 423 class DocumentType(object): 424 425 "A class providing a container for document type information." 426 427 def __init__(self, localName, publicId, systemId): 428 self.name = self.localName = localName 429 self.publicId = publicId 430 self.systemId = systemId 431 432 # NOTE: Nothing is currently provided to support the following 433 # NOTE: attributes. 434 435 self.entities = {} 436 self.notations = {} 437 438 # Factory functions. 439 440 def get_node(_node, context_node): 441 if Node_nodeType(_node) == context_node.DOCUMENT_NODE: 442 return context_node.ownerDocument 443 elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE: 444 return Attribute(_node, context_node.ownerDocument, context_node) 445 else: 446 return Node(_node, context_node.ownerDocument) 447 448 # Utility functions. 449 450 def createDocumentType(localName, publicId, systemId): 451 return DocumentType(localName, publicId, systemId) 452 453 def createDocument(namespaceURI, localName, doctype): 454 return Document(Node_createDocument(namespaceURI, localName, doctype)) 455 456 def parse(stream_or_string, html=0): 457 458 """ 459 Parse the given 'stream_or_string', where the supplied object can either be 460 a stream (such as a file or stream object), or a string (containing the 461 filename of a document). If the optional 'html' parameter is set to a true 462 value, the content to be parsed will be treated as being HTML rather than 463 XML. 464 465 A document object is returned by this function. 466 """ 467 468 if hasattr(stream_or_string, "read"): 469 stream = stream_or_string 470 return parseString(stream.read(), html) 471 else: 472 return parseFile(stream_or_string, html) 473 474 def parseFile(filename, html=0): 475 476 """ 477 Parse the file having the given 'filename'. If the optional 'html' parameter 478 is set to a true value, the content to be parsed will be treated as being 479 HTML rather than XML. 480 481 A document object is returned by this function. 482 """ 483 484 return Document(Node_parseFile(filename, html)) 485 486 def parseString(s, html=0): 487 488 """ 489 Parse the content of the given string 's'. If the optional 'html' parameter 490 is set to a true value, the content to be parsed will be treated as being 491 HTML rather than XML. 492 493 A document object is returned by this function. 494 """ 495 496 return Document(Node_parseString(s, html)) 497 498 def parseURI(uri, html=0): 499 500 """ 501 Parse the content found at the given 'uri'. If the optional 'html' parameter 502 is set to a true value, the content to be parsed will be treated as being 503 HTML rather than XML. 504 505 The parseURI does not currently work with HTML. Use parse with a stream 506 object instead. For example: 507 508 d = parse(urllib.urlopen("http://www.python.org"), html=1) 509 510 A document object is returned by this function. 511 """ 512 513 return Document(Node_parseURI(uri, html)) 514 515 def toString(node, encoding=None, prettyprint=0): 516 517 """ 518 Return a string containing the serialised form of the given 'node' and its 519 children. The optional 'encoding' can be used to override the default 520 character encoding used in the serialisation. The optional 'prettyprint' 521 indicates whether the serialised form is prettyprinted or not (the default 522 setting). 523 """ 524 525 return Node_toString(node.as_native_node(), encoding, prettyprint) 526 527 def toStream(node, stream, encoding=None, prettyprint=0): 528 529 """ 530 Write the serialised form of the given 'node' and its children to the given 531 'stream'. The optional 'encoding' can be used to override the default 532 character encoding used in the serialisation. The optional 'prettyprint' 533 indicates whether the serialised form is prettyprinted or not (the default 534 setting). 535 """ 536 537 Node_toStream(node.as_native_node(), stream, encoding, prettyprint) 538 539 def toFile(node, filename, encoding=None, prettyprint=0): 540 541 """ 542 Write the serialised form of the given 'node' and its children to a file 543 having the given 'filename'. The optional 'encoding' can be used to override 544 the default character encoding used in the serialisation. The optional 545 'prettyprint' indicates whether the serialised form is prettyprinted or not 546 (the default setting). 547 """ 548 549 Node_toFile(node.as_native_node(), filename, encoding, prettyprint) 550 551 def adoptNodes(nodes): 552 553 """ 554 A special utility method which adopts the given low-level 'nodes' and which 555 returns a list of high-level equivalents. This is currently experimental and 556 should not be casually used. 557 """ 558 559 if len(nodes) == 0: 560 return [] 561 doc = Document(libxml2mod.doc(nodes[0])) 562 results = [] 563 for node in nodes: 564 results.append(Node(node, doc)) 565 return results 566 567 # vim: tabstop=4 expandtab shiftwidth=4