1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module. 5 """ 6 7 __version__ = "0.2.4" 8 9 import libxml2 10 from libxml2dom.macrolib import * 11 from libxml2dom.macrolib import \ 12 createDocument as Node_createDocument, \ 13 parseString as Node_parseString, parseURI as Node_parseURI, \ 14 parseFile as Node_parseFile, \ 15 toString as Node_toString, toStream as Node_toStream, \ 16 toFile as Node_toFile 17 import weakref 18 19 # Attribute and node list wrappers. 20 21 class NamedNodeMap(object): 22 23 """ 24 A wrapper around Node objects providing DOM and dictionary convenience 25 methods. 26 """ 27 28 def __init__(self, node): 29 self.node = node 30 31 def getNamedItem(self, name): 32 return self.node.getAttributeNode(name) 33 34 def getNamedItemNS(self, ns, localName): 35 return self.node.getAttributeNodeNS(ns, localName) 36 37 def setNamedItem(self, node): 38 try: 39 old = self.getNamedItem(node.nodeName) 40 except KeyError: 41 old = None 42 self.node.setAttributeNode(node) 43 return old 44 45 def setNamedItemNS(self, node): 46 try: 47 old = self.getNamedItemNS(node.namespaceURI, node.localName) 48 except KeyError: 49 old = None 50 self.node.setAttributeNodeNS(node) 51 return old 52 53 def removeNamedItem(self, name): 54 try: 55 old = self.getNamedItem(name) 56 except KeyError: 57 old = None 58 self.node.removeAttribute(name) 59 return old 60 61 def removeNamedItemNS(self, ns, localName): 62 try: 63 old = self.getNamedItemNS(ns, localName) 64 except KeyError: 65 old = None 66 self.node.removeAttributeNS(ns, localName) 67 return old 68 69 # Dictionary emulation methods. 70 71 def __getitem__(self, name): 72 return self.getNamedItem(name) 73 74 def __setitem__(self, name, node): 75 if name == node.nodeName: 76 self.setNamedItem(node) 77 else: 78 raise KeyError, name 79 80 def __delitem__(self, name): 81 # NOTE: To be implemented. 82 pass 83 84 def values(self): 85 return [Attribute(_node, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()] 86 87 def keys(self): 88 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 89 90 def items(self): 91 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 92 93 def __repr__(self): 94 return str(self) 95 96 def __str__(self): 97 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 98 99 class NodeList(list): 100 101 "A wrapper around node lists." 102 103 def item(self, index): 104 return self[index] 105 106 def length(self): 107 return len(self) 108 109 # Node classes. 110 111 class Node(object): 112 113 """ 114 A DOM-style wrapper around libxml2mod objects. 115 """ 116 117 ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE 118 COMMENT_NODE = xml.dom.Node.COMMENT_NODE 119 DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE 120 DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE 121 ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE 122 ENTITY_NODE = xml.dom.Node.ENTITY_NODE 123 ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE 124 NOTATION_NODE = xml.dom.Node.NOTATION_NODE 125 PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE 126 TEXT_NODE = xml.dom.Node.TEXT_NODE 127 128 def __init__(self, node, ownerDocument=None): 129 self._node = node 130 self.ownerDocument = ownerDocument 131 132 def as_native_node(self): 133 return self._node 134 135 def _nodeType(self): 136 return Node_nodeType(self._node) 137 138 def _childNodes(self): 139 140 # NOTE: Consider a generator instead. 141 142 return NodeList([Node(_node, self.ownerDocument) for _node in Node_childNodes(self._node)]) 143 144 def _attributes(self): 145 return NamedNodeMap(self) 146 147 def _namespaceURI(self): 148 return Node_namespaceURI(self._node) 149 150 def _nodeValue(self): 151 return Node_nodeValue(self._node) 152 153 def _setNodeValue(self, value): 154 Node_setNodeValue(self._node, value) 155 156 def _prefix(self): 157 return Node_prefix(self._node) 158 159 def _nodeName(self): 160 return Node_nodeName(self._node) 161 162 def _tagName(self): 163 return Node_tagName(self._node) 164 165 def _localName(self): 166 return Node_localName(self._node) 167 168 def _parentNode(self): 169 return get_node(Node_parentNode(self._node), self) 170 171 def _previousSibling(self): 172 return Node(Node_previousSibling(self._node), self.ownerDocument) 173 174 def _nextSibling(self): 175 return Node(Node_nextSibling(self._node), self.ownerDocument) 176 177 def hasAttributeNS(self, ns, localName): 178 return Node_hasAttributeNS(self._node, ns, localName) 179 180 def hasAttribute(self, name): 181 return Node_hasAttribute(self._node, name) 182 183 def getAttributeNS(self, ns, localName): 184 return Node_getAttributeNS(self._node, ns, localName) 185 186 def getAttribute(self, name): 187 return Node_getAttribute(self._node, name) 188 189 def getAttributeNodeNS(self, ns, localName): 190 return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.ownerDocument, self) 191 192 def getAttributeNode(self, localName): 193 return Attribute(Node_getAttributeNode(self._node, localName), self.ownerDocument, self) 194 195 def setAttributeNS(self, ns, name, value): 196 Node_setAttributeNS(self._node, ns, name, value) 197 198 def setAttribute(self, name, value): 199 Node_setAttribute(self._node, name, value) 200 201 def setAttributeNodeNS(self, node): 202 Node_setAttributeNodeNS(self._node, node._node) 203 204 def setAttributeNode(self, node): 205 Node_setAttributeNode(self._node, node._node) 206 207 def removeAttributeNS(self, ns, localName): 208 Node_removeAttributeNS(self._node, ns, localName) 209 210 def removeAttribute(self, name): 211 Node_removeAttribute(self._node, name) 212 213 def createElementNS(self, ns, name): 214 return Node(Node_createElementNS(self._node, ns, name), self.ownerDocument) 215 216 def createElement(self, name): 217 return Node(Node_createElement(self._node, name), self.ownerDocument) 218 219 def createAttributeNS(self, ns, name): 220 tmp = self.createElement("tmp") 221 return Attribute(Node_createAttributeNS(tmp._node, ns, name)) 222 223 def createAttribute(self, name): 224 tmp = self.createElement("tmp") 225 return Attribute(Node_createAttribute(tmp._node, name)) 226 227 def createTextNode(self, value): 228 return Node(Node_createTextNode(self._node, value), self.ownerDocument) 229 230 def createComment(self, value): 231 return Node(Node_createComment(self._node, value), self.ownerDocument) 232 233 def importNode(self, node, deep): 234 if hasattr(node, "as_native_node"): 235 return Node(Node_importNode(self._node, node.as_native_node(), deep), self.ownerDocument) 236 else: 237 return Node(Node_importNode_DOM(self._node, node, deep), self.ownerDocument) 238 239 def insertBefore(self, tmp, oldNode): 240 if hasattr(tmp, "as_native_node"): 241 return Node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument) 242 else: 243 return Node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self.ownerDocument) 244 245 def replaceChild(self, tmp, oldNode): 246 if hasattr(tmp, "as_native_node"): 247 return Node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument) 248 else: 249 return Node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self.ownerDocument) 250 251 def appendChild(self, tmp): 252 if hasattr(tmp, "as_native_node"): 253 return Node(Node_appendChild(self._node, tmp.as_native_node()), self.ownerDocument) 254 else: 255 return Node(Node_appendChild(self._node, tmp), self.ownerDocument) 256 257 def removeChild(self, tmp): 258 if hasattr(tmp, "as_native_node"): 259 Node_removeChild(self._node, tmp.as_native_node()) 260 else: 261 Node_removeChild(self._node, tmp) 262 263 def getElementsByTagName(self, tagName): 264 return self.xpath("//" + tagName) 265 266 def getElementsByTagNameNS(self, namespaceURI, localName): 267 return self.xpath("//ns:" + localName, namespaces={"ns" : namespaceURI}) 268 269 def normalize(self): 270 text_nodes = [] 271 for node in self.childNodes: 272 if node.nodeType == node.TEXT_NODE: 273 text_nodes.append(node) 274 elif len(text_nodes) != 0: 275 self._normalize(text_nodes) 276 text_nodes = [] 277 if len(text_nodes) != 0: 278 self._normalize(text_nodes) 279 280 def _normalize(self, text_nodes): 281 texts = [] 282 for text_node in text_nodes[:-1]: 283 texts.append(text_node.nodeValue) 284 self.removeChild(text_node) 285 texts.append(text_nodes[-1].nodeValue) 286 self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1]) 287 288 # NOTE: normalize must be implemented specially for libxml2dom. 289 290 childNodes = property(_childNodes) 291 value = data = nodeValue = property(_nodeValue, _setNodeValue) 292 name = nodeName = property(_nodeName) 293 tagName = property(_tagName) 294 namespaceURI = property(_namespaceURI) 295 prefix = property(_prefix) 296 localName = property(_localName) 297 parentNode = property(_parentNode) 298 nodeType = property(_nodeType) 299 attributes = property(_attributes) 300 previousSibling = property(_previousSibling) 301 nextSibling = property(_nextSibling) 302 303 #def isSameNode(self, other): 304 # return self._node.nodePath() == other._node.nodePath() 305 306 #def __eq__(self, other): 307 # return self._node.nodePath() == other._node.nodePath() 308 309 # 4DOM extensions to the usual PyXML API. 310 # NOTE: To be finished. 311 312 def xpath(self, expr, variables=None, namespaces=None): 313 result = Node_xpath(self._node, expr, variables, namespaces) 314 if hasattr(result, "__len__"): 315 return NodeList([get_node(_node, self) for _node in result]) 316 else: 317 return result 318 319 # Convenience methods. 320 321 def toString(self, encoding=None, prettyprint=0): 322 return toString(self, encoding, prettyprint) 323 324 def toStream(self, stream, encoding=None, prettyprint=0): 325 toStream(self, stream, encoding, prettyprint) 326 327 def toFile(self, f, encoding=None, prettyprint=0): 328 toFile(self, f, encoding, prettyprint) 329 330 # Attribute nodes. 331 332 class Attribute(Node): 333 334 "A class providing attribute access." 335 336 def __init__(self, node, ownerDocument=None, ownerElement=None): 337 Node.__init__(self, node, ownerDocument) 338 self.ownerElement = ownerElement 339 340 def _parentNode(self): 341 return self.ownerElement 342 343 parentNode = property(_parentNode) 344 345 # Document housekeeping mechanisms. 346 347 class Document(Node): 348 349 "A class providing document-level housekeeping." 350 351 def __init__(self, node): 352 self._node = node 353 self.weakref_ownerDocument = weakref.ref(self) 354 355 def _ownerDocument(self): 356 return self.weakref_ownerDocument() 357 358 def __del__(self): 359 #print "Freeing document", self._node 360 libxml2mod.xmlFreeDoc(self._node) 361 362 ownerDocument = property(_ownerDocument) 363 364 # Factory functions. 365 366 def get_node(_node, context_node): 367 if Node_nodeType(_node) == context_node.DOCUMENT_NODE: 368 return context_node.ownerDocument 369 elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE: 370 return Attribute(_node, context_node.ownerDocument, context_node) 371 else: 372 return Node(_node, context_node.ownerDocument) 373 374 # Utility functions. 375 376 def createDocumentType(localName, publicId, systemId): 377 return None 378 379 def createDocument(namespaceURI, localName, doctype): 380 return Document(Node_createDocument(namespaceURI, localName, doctype)) 381 382 def parse(stream_or_string, html=0): 383 384 """ 385 Parse the given 'stream_or_string', where the supplied object can either be 386 a stream (such as a file or stream object), or a string (containing the 387 filename of a document). If the optional 'html' parameter is set to a true 388 value, the content to be parsed will be treated as being HTML rather than 389 XML. 390 391 A document object is returned by this function. 392 """ 393 394 if hasattr(stream_or_string, "read"): 395 stream = stream_or_string 396 return parseString(stream.read(), html) 397 else: 398 return parseFile(stream_or_string, html) 399 400 def parseFile(filename, html=0): 401 402 """ 403 Parse the file having the given 'filename'. If the optional 'html' parameter 404 is set to a true value, the content to be parsed will be treated as being 405 HTML rather than XML. 406 407 A document object is returned by this function. 408 """ 409 410 return Document(Node_parseFile(filename, html)) 411 412 def parseString(s, html=0): 413 414 """ 415 Parse the content of the given string 's'. If the optional 'html' parameter 416 is set to a true value, the content to be parsed will be treated as being 417 HTML rather than XML. 418 419 A document object is returned by this function. 420 """ 421 422 return Document(Node_parseString(s, html)) 423 424 def parseURI(uri, html=0): 425 426 """ 427 Parse the content found at the given 'uri'. If the optional 'html' parameter 428 is set to a true value, the content to be parsed will be treated as being 429 HTML rather than XML. 430 431 The parseURI does not currently work with HTML. Use parse with a stream 432 object instead. For example: 433 434 d = parse(urllib.urlopen("http://www.python.org"), html=1) 435 436 A document object is returned by this function. 437 """ 438 439 return Document(Node_parseURI(uri, html)) 440 441 def toString(node, encoding=None, prettyprint=0): 442 443 """ 444 Return a string containing the serialised form of the given 'node' and its 445 children. The optional 'encoding' can be used to override the default 446 character encoding used in the serialisation. The optional 'prettyprint' 447 indicates whether the serialised form is prettyprinted or not (the default 448 setting). 449 """ 450 451 return Node_toString(node.as_native_node(), encoding, prettyprint) 452 453 def toStream(node, stream, encoding=None, prettyprint=0): 454 455 """ 456 Write the serialised form of the given 'node' and its children to the given 457 'stream'. The optional 'encoding' can be used to override the default 458 character encoding used in the serialisation. The optional 'prettyprint' 459 indicates whether the serialised form is prettyprinted or not (the default 460 setting). 461 """ 462 463 Node_toStream(node.as_native_node(), stream, encoding, prettyprint) 464 465 def toFile(node, filename, encoding=None, prettyprint=0): 466 467 """ 468 Write the serialised form of the given 'node' and its children to a file 469 having the given 'filename'. The optional 'encoding' can be used to override 470 the default character encoding used in the serialisation. The optional 471 'prettyprint' indicates whether the serialised form is prettyprinted or not 472 (the default setting). 473 """ 474 475 Node_toFile(node.as_native_node(), filename, encoding, prettyprint) 476 477 def adoptNodes(nodes): 478 479 """ 480 A special utility method which adopts the given low-level 'nodes' and which 481 returns a list of high-level equivalents. This is currently experimental and 482 should not be casually used. 483 """ 484 485 if len(nodes) == 0: 486 return [] 487 doc = Document(libxml2mod.doc(nodes[0])) 488 results = [] 489 for node in nodes: 490 results.append(Node(node, doc)) 491 return results 492 493 # vim: tabstop=4 expandtab shiftwidth=4