paulb@18 | 1 | #!/usr/bin/env python |
paulb@18 | 2 | |
paulb@18 | 3 | """ |
paulb@75 | 4 | DOM wrapper around libxml2, specifically the libxml2mod Python extension module. |
paulb@188 | 5 | |
paulb@324 | 6 | Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk> |
paulb@188 | 7 | |
paulb@293 | 8 | This program is free software; you can redistribute it and/or modify it under |
paulb@293 | 9 | the terms of the GNU Lesser General Public License as published by the Free |
paulb@293 | 10 | Software Foundation; either version 3 of the License, or (at your option) any |
paulb@293 | 11 | later version. |
paulb@188 | 12 | |
paulb@293 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paulb@293 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paulb@293 | 15 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
paulb@293 | 16 | details. |
paulb@188 | 17 | |
paulb@293 | 18 | You should have received a copy of the GNU Lesser General Public License along |
paulb@293 | 19 | with this program. If not, see <http://www.gnu.org/licenses/>. |
paulb@18 | 20 | """ |
paulb@18 | 21 | |
paulb@324 | 22 | __version__ = "0.4.6" |
paulb@27 | 23 | |
paulb@72 | 24 | from libxml2dom.macrolib import * |
paulb@72 | 25 | from libxml2dom.macrolib import \ |
paulb@72 | 26 | createDocument as Node_createDocument, \ |
paulb@72 | 27 | parseString as Node_parseString, parseURI as Node_parseURI, \ |
paulb@72 | 28 | parseFile as Node_parseFile, \ |
paulb@72 | 29 | toString as Node_toString, toStream as Node_toStream, \ |
paulb@72 | 30 | toFile as Node_toFile |
paulb@232 | 31 | import urllib # for parseURI in HTML mode |
paulb@262 | 32 | import xml.dom # for getElementById |
paulb@18 | 33 | |
paulb@301 | 34 | # Standard namespaces. |
paulb@301 | 35 | |
paulb@301 | 36 | XML_NAMESPACE = xml.dom.XML_NAMESPACE |
paulb@301 | 37 | |
paulb@301 | 38 | # Default namespace bindings for XPath. |
paulb@301 | 39 | |
paulb@301 | 40 | default_ns = { |
paulb@301 | 41 | "xml" : XML_NAMESPACE |
paulb@301 | 42 | } |
paulb@301 | 43 | |
paulb@218 | 44 | class Implementation(object): |
paulb@218 | 45 | |
paulb@218 | 46 | "Contains an abstraction over the DOM implementation." |
paulb@218 | 47 | |
paulb@218 | 48 | def createDocumentType(self, localName, publicId, systemId): |
paulb@218 | 49 | return DocumentType(localName, publicId, systemId) |
paulb@218 | 50 | |
paulb@218 | 51 | def createDocument(self, namespaceURI, localName, doctype): |
paulb@218 | 52 | return Document(Node_createDocument(namespaceURI, localName, doctype), self) |
paulb@218 | 53 | |
paulb@236 | 54 | # Wrapping of documents. |
paulb@236 | 55 | |
paulb@236 | 56 | def adoptDocument(self, node): |
paulb@236 | 57 | return Document(node, self) |
paulb@236 | 58 | |
paulb@218 | 59 | # Factory functions. |
paulb@218 | 60 | |
paulb@218 | 61 | def get_node(self, _node, context_node): |
paul@331 | 62 | |
paul@331 | 63 | # Return the existing document. |
paul@331 | 64 | |
paulb@218 | 65 | if Node_nodeType(_node) == context_node.DOCUMENT_NODE: |
paulb@218 | 66 | return context_node.ownerDocument |
paul@331 | 67 | |
paul@331 | 68 | # Return an attribute using the parent of the attribute as the owner |
paul@331 | 69 | # element. |
paul@331 | 70 | |
paulb@218 | 71 | elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE: |
paul@331 | 72 | return Attribute(_node, self, context_node.ownerDocument, |
paul@331 | 73 | self.get_node(Node_parentNode(_node), context_node)) |
paul@331 | 74 | |
paul@331 | 75 | # Return other nodes. |
paul@331 | 76 | |
paulb@218 | 77 | else: |
paulb@218 | 78 | return Node(_node, self, context_node.ownerDocument) |
paulb@218 | 79 | |
paulb@240 | 80 | def get_node_or_none(self, _node, context_node): |
paulb@240 | 81 | if _node is None: |
paulb@240 | 82 | return None |
paulb@240 | 83 | else: |
paulb@240 | 84 | return self.get_node(_node, context_node) |
paulb@240 | 85 | |
paulb@86 | 86 | # Attribute and node list wrappers. |
paulb@18 | 87 | |
paulb@18 | 88 | class NamedNodeMap(object): |
paulb@18 | 89 | |
paulb@75 | 90 | """ |
paulb@75 | 91 | A wrapper around Node objects providing DOM and dictionary convenience |
paulb@75 | 92 | methods. |
paulb@75 | 93 | """ |
paulb@75 | 94 | |
paulb@256 | 95 | def __init__(self, node, impl): |
paulb@18 | 96 | self.node = node |
paulb@256 | 97 | self.impl = impl |
paulb@18 | 98 | |
paulb@18 | 99 | def getNamedItem(self, name): |
paulb@72 | 100 | return self.node.getAttributeNode(name) |
paulb@18 | 101 | |
paulb@18 | 102 | def getNamedItemNS(self, ns, localName): |
paulb@72 | 103 | return self.node.getAttributeNodeNS(ns, localName) |
paulb@18 | 104 | |
paulb@18 | 105 | def setNamedItem(self, node): |
paulb@91 | 106 | try: |
paulb@91 | 107 | old = self.getNamedItem(node.nodeName) |
paulb@91 | 108 | except KeyError: |
paulb@91 | 109 | old = None |
paulb@86 | 110 | self.node.setAttributeNode(node) |
paulb@91 | 111 | return old |
paulb@18 | 112 | |
paulb@18 | 113 | def setNamedItemNS(self, node): |
paulb@91 | 114 | try: |
paulb@91 | 115 | old = self.getNamedItemNS(node.namespaceURI, node.localName) |
paulb@91 | 116 | except KeyError: |
paulb@91 | 117 | old = None |
paulb@86 | 118 | self.node.setAttributeNodeNS(node) |
paulb@91 | 119 | return old |
paulb@91 | 120 | |
paulb@91 | 121 | def removeNamedItem(self, name): |
paulb@91 | 122 | try: |
paulb@91 | 123 | old = self.getNamedItem(name) |
paulb@91 | 124 | except KeyError: |
paulb@91 | 125 | old = None |
paulb@91 | 126 | self.node.removeAttribute(name) |
paulb@91 | 127 | return old |
paulb@91 | 128 | |
paulb@91 | 129 | def removeNamedItemNS(self, ns, localName): |
paulb@91 | 130 | try: |
paulb@91 | 131 | old = self.getNamedItemNS(ns, localName) |
paulb@91 | 132 | except KeyError: |
paulb@91 | 133 | old = None |
paulb@91 | 134 | self.node.removeAttributeNS(ns, localName) |
paulb@91 | 135 | return old |
paulb@86 | 136 | |
paulb@276 | 137 | # Iterator emulation. |
paulb@276 | 138 | |
paulb@276 | 139 | def __iter__(self): |
paulb@276 | 140 | return NamedNodeMapIterator(self) |
paulb@276 | 141 | |
paulb@86 | 142 | # Dictionary emulation methods. |
paulb@18 | 143 | |
paulb@18 | 144 | def __getitem__(self, name): |
paulb@18 | 145 | return self.getNamedItem(name) |
paulb@18 | 146 | |
paulb@18 | 147 | def __setitem__(self, name, node): |
paulb@18 | 148 | if name == node.nodeName: |
paulb@18 | 149 | self.setNamedItem(node) |
paulb@18 | 150 | else: |
paulb@18 | 151 | raise KeyError, name |
paulb@18 | 152 | |
paulb@18 | 153 | def __delitem__(self, name): |
paulb@18 | 154 | # NOTE: To be implemented. |
paulb@18 | 155 | pass |
paulb@18 | 156 | |
paulb@18 | 157 | def values(self): |
paulb@218 | 158 | return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()] |
paulb@18 | 159 | |
paulb@18 | 160 | def keys(self): |
paulb@18 | 161 | return [(attr.namespaceURI, attr.localName) for attr in self.values()] |
paulb@18 | 162 | |
paulb@18 | 163 | def items(self): |
paulb@18 | 164 | return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] |
paulb@18 | 165 | |
paulb@18 | 166 | def __repr__(self): |
paulb@18 | 167 | return str(self) |
paulb@18 | 168 | |
paulb@18 | 169 | def __str__(self): |
paulb@18 | 170 | return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) |
paulb@18 | 171 | |
paulb@198 | 172 | def _length(self): |
paulb@198 | 173 | return len(self.values()) |
paulb@198 | 174 | |
paulb@198 | 175 | length = property(_length) |
paulb@198 | 176 | |
paulb@276 | 177 | class NamedNodeMapIterator(object): |
paulb@276 | 178 | |
paulb@276 | 179 | "An iterator over a NamedNodeMap." |
paulb@276 | 180 | |
paulb@276 | 181 | def __init__(self, nodemap): |
paulb@276 | 182 | self.nodemap = nodemap |
paulb@276 | 183 | self.items = self.nodemap.items() |
paulb@276 | 184 | |
paulb@276 | 185 | def next(self): |
paulb@276 | 186 | if self.items: |
paulb@276 | 187 | current = self.items[0][1] |
paulb@276 | 188 | self.items = self.items[1:] |
paulb@276 | 189 | return current |
paulb@276 | 190 | else: |
paulb@276 | 191 | raise StopIteration |
paulb@276 | 192 | |
paulb@86 | 193 | class NodeList(list): |
paulb@86 | 194 | |
paulb@86 | 195 | "A wrapper around node lists." |
paulb@86 | 196 | |
paulb@86 | 197 | def item(self, index): |
paulb@86 | 198 | return self[index] |
paulb@86 | 199 | |
paulb@198 | 200 | def _length(self): |
paulb@86 | 201 | return len(self) |
paulb@86 | 202 | |
paulb@198 | 203 | length = property(_length) |
paulb@198 | 204 | |
paulb@86 | 205 | # Node classes. |
paulb@86 | 206 | |
paulb@18 | 207 | class Node(object): |
paulb@18 | 208 | |
paulb@75 | 209 | """ |
paulb@75 | 210 | A DOM-style wrapper around libxml2mod objects. |
paulb@75 | 211 | """ |
paulb@75 | 212 | |
paulb@81 | 213 | ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE |
paulb@81 | 214 | COMMENT_NODE = xml.dom.Node.COMMENT_NODE |
paulb@81 | 215 | DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE |
paulb@81 | 216 | DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE |
paulb@81 | 217 | ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE |
paulb@81 | 218 | ENTITY_NODE = xml.dom.Node.ENTITY_NODE |
paulb@81 | 219 | ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE |
paulb@81 | 220 | NOTATION_NODE = xml.dom.Node.NOTATION_NODE |
paulb@81 | 221 | PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE |
paulb@81 | 222 | TEXT_NODE = xml.dom.Node.TEXT_NODE |
paulb@81 | 223 | |
paulb@223 | 224 | def __init__(self, node, impl=None, ownerDocument=None): |
paulb@18 | 225 | self._node = node |
paulb@223 | 226 | self.impl = impl or default_impl |
paulb@75 | 227 | self.ownerDocument = ownerDocument |
paulb@18 | 228 | |
paulb@18 | 229 | def as_native_node(self): |
paulb@18 | 230 | return self._node |
paulb@18 | 231 | |
paulb@18 | 232 | def _nodeType(self): |
paulb@72 | 233 | return Node_nodeType(self._node) |
paulb@18 | 234 | |
paulb@18 | 235 | def _childNodes(self): |
paulb@18 | 236 | |
paulb@18 | 237 | # NOTE: Consider a generator instead. |
paulb@18 | 238 | |
paulb@218 | 239 | return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)]) |
paulb@18 | 240 | |
paulb@276 | 241 | def _firstChild(self): |
paulb@276 | 242 | return (self.childNodes or [None])[0] |
paulb@276 | 243 | |
paulb@276 | 244 | def _lastChild(self): |
paulb@276 | 245 | return (self.childNodes or [None])[-1] |
paulb@276 | 246 | |
paulb@18 | 247 | def _attributes(self): |
paulb@256 | 248 | return NamedNodeMap(self, self.impl) |
paulb@18 | 249 | |
paulb@18 | 250 | def _namespaceURI(self): |
paulb@72 | 251 | return Node_namespaceURI(self._node) |
paulb@18 | 252 | |
paulb@236 | 253 | def _textContent(self): |
paulb@236 | 254 | return Node_textContent(self._node) |
paulb@236 | 255 | |
paulb@18 | 256 | def _nodeValue(self): |
paulb@223 | 257 | if self.nodeType in null_value_node_types: |
paulb@223 | 258 | return None |
paulb@72 | 259 | return Node_nodeValue(self._node) |
paulb@18 | 260 | |
paulb@86 | 261 | def _setNodeValue(self, value): |
paulb@86 | 262 | Node_setNodeValue(self._node, value) |
paulb@86 | 263 | |
paulb@18 | 264 | def _prefix(self): |
paulb@72 | 265 | return Node_prefix(self._node) |
paulb@18 | 266 | |
paulb@18 | 267 | def _nodeName(self): |
paulb@72 | 268 | return Node_nodeName(self._node) |
paulb@18 | 269 | |
paulb@18 | 270 | def _tagName(self): |
paulb@72 | 271 | return Node_tagName(self._node) |
paulb@18 | 272 | |
paulb@18 | 273 | def _localName(self): |
paulb@72 | 274 | return Node_localName(self._node) |
paulb@18 | 275 | |
paulb@18 | 276 | def _parentNode(self): |
paulb@240 | 277 | return self.impl.get_node_or_none(Node_parentNode(self._node), self) |
paulb@18 | 278 | |
paulb@24 | 279 | def _previousSibling(self): |
paulb@240 | 280 | return self.impl.get_node_or_none(Node_previousSibling(self._node), self) |
paulb@24 | 281 | |
paulb@18 | 282 | def _nextSibling(self): |
paulb@240 | 283 | return self.impl.get_node_or_none(Node_nextSibling(self._node), self) |
paulb@18 | 284 | |
paulb@154 | 285 | def _doctype(self): |
paulb@276 | 286 | _doctype = Node_doctype(self._node) |
paulb@276 | 287 | if _doctype is not None: |
paulb@276 | 288 | return self.impl.get_node(_doctype, self) |
paulb@276 | 289 | else: |
paulb@276 | 290 | return None |
paulb@154 | 291 | |
paulb@154 | 292 | def _publicId(self): |
paulb@154 | 293 | # NOTE: To be fixed when the libxml2mod API has been figured out. |
paulb@154 | 294 | if self.nodeType != self.DOCUMENT_TYPE_NODE: |
paulb@154 | 295 | return None |
paulb@154 | 296 | declaration = self.toString() |
paulb@154 | 297 | return self._findId(declaration, "PUBLIC") |
paulb@154 | 298 | |
paulb@154 | 299 | def _systemId(self): |
paulb@154 | 300 | # NOTE: To be fixed when the libxml2mod API has been figured out. |
paulb@154 | 301 | if self.nodeType != self.DOCUMENT_TYPE_NODE: |
paulb@154 | 302 | return None |
paulb@154 | 303 | declaration = self.toString() |
paulb@154 | 304 | if self._findId(declaration, "PUBLIC"): |
paulb@154 | 305 | return self._findIdValue(declaration, 0) |
paulb@154 | 306 | return self._findId(declaration, "SYSTEM") |
paulb@154 | 307 | |
paulb@154 | 308 | # NOTE: To be removed when the libxml2mod API has been figured out. |
paulb@154 | 309 | |
paulb@154 | 310 | def _findId(self, declaration, identifier): |
paulb@154 | 311 | i = declaration.find(identifier) |
paulb@154 | 312 | if i == -1: |
paulb@154 | 313 | return None |
paulb@154 | 314 | return self._findIdValue(declaration, i) |
paulb@154 | 315 | |
paulb@154 | 316 | def _findIdValue(self, declaration, i): |
paulb@154 | 317 | q = declaration.find('"', i) |
paulb@154 | 318 | if q == -1: |
paulb@154 | 319 | return None |
paulb@154 | 320 | q2 = declaration.find('"', q + 1) |
paulb@154 | 321 | if q2 == -1: |
paulb@154 | 322 | return None |
paulb@154 | 323 | return declaration[q+1:q2] |
paulb@154 | 324 | |
paulb@18 | 325 | def hasAttributeNS(self, ns, localName): |
paulb@72 | 326 | return Node_hasAttributeNS(self._node, ns, localName) |
paulb@18 | 327 | |
paulb@18 | 328 | def hasAttribute(self, name): |
paulb@72 | 329 | return Node_hasAttribute(self._node, name) |
paulb@18 | 330 | |
paulb@18 | 331 | def getAttributeNS(self, ns, localName): |
paulb@72 | 332 | return Node_getAttributeNS(self._node, ns, localName) |
paulb@18 | 333 | |
paulb@18 | 334 | def getAttribute(self, name): |
paulb@72 | 335 | return Node_getAttribute(self._node, name) |
paulb@18 | 336 | |
paulb@18 | 337 | def getAttributeNodeNS(self, ns, localName): |
paulb@218 | 338 | return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self) |
paulb@18 | 339 | |
paulb@18 | 340 | def getAttributeNode(self, localName): |
paulb@218 | 341 | return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self) |
paulb@18 | 342 | |
paulb@18 | 343 | def setAttributeNS(self, ns, name, value): |
paulb@72 | 344 | Node_setAttributeNS(self._node, ns, name, value) |
paulb@18 | 345 | |
paulb@18 | 346 | def setAttribute(self, name, value): |
paulb@72 | 347 | Node_setAttribute(self._node, name, value) |
paulb@18 | 348 | |
paulb@86 | 349 | def setAttributeNodeNS(self, node): |
paulb@88 | 350 | Node_setAttributeNodeNS(self._node, node._node) |
paulb@18 | 351 | |
paulb@86 | 352 | def setAttributeNode(self, node): |
paulb@88 | 353 | Node_setAttributeNode(self._node, node._node) |
paulb@18 | 354 | |
paulb@91 | 355 | def removeAttributeNS(self, ns, localName): |
paulb@91 | 356 | Node_removeAttributeNS(self._node, ns, localName) |
paulb@91 | 357 | |
paulb@91 | 358 | def removeAttribute(self, name): |
paulb@91 | 359 | Node_removeAttribute(self._node, name) |
paulb@91 | 360 | |
paulb@18 | 361 | def createElementNS(self, ns, name): |
paulb@218 | 362 | return self.impl.get_node(Node_createElementNS(self._node, ns, name), self) |
paulb@18 | 363 | |
paulb@18 | 364 | def createElement(self, name): |
paulb@218 | 365 | return self.impl.get_node(Node_createElement(self._node, name), self) |
paulb@18 | 366 | |
paulb@88 | 367 | def createAttributeNS(self, ns, name): |
paulb@88 | 368 | tmp = self.createElement("tmp") |
paulb@218 | 369 | return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name)) |
paulb@18 | 370 | |
paulb@88 | 371 | def createAttribute(self, name): |
paulb@88 | 372 | tmp = self.createElement("tmp") |
paulb@218 | 373 | return Attribute(Node_createAttribute(tmp._node, name), self.impl) |
paulb@18 | 374 | |
paulb@18 | 375 | def createTextNode(self, value): |
paulb@218 | 376 | return self.impl.get_node(Node_createTextNode(self._node, value), self) |
paulb@18 | 377 | |
paulb@34 | 378 | def createComment(self, value): |
paulb@218 | 379 | return self.impl.get_node(Node_createComment(self._node, value), self) |
paulb@18 | 380 | |
paulb@251 | 381 | def createCDATASection(self, value): |
paulb@251 | 382 | return self.impl.get_node(Node_createCDATASection(self._node, value), self) |
paulb@251 | 383 | |
paulb@18 | 384 | def importNode(self, node, deep): |
paulb@72 | 385 | if hasattr(node, "as_native_node"): |
paulb@218 | 386 | return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self) |
paulb@72 | 387 | else: |
paulb@218 | 388 | return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self) |
paulb@18 | 389 | |
paulb@208 | 390 | def cloneNode(self, deep): |
paulb@208 | 391 | # This takes advantage of the ubiquity of importNode (in spite of the DOM specification). |
paulb@208 | 392 | return self.importNode(self, deep) |
paulb@208 | 393 | |
paulb@18 | 394 | def insertBefore(self, tmp, oldNode): |
paulb@276 | 395 | if tmp.ownerDocument != self.ownerDocument: |
paulb@276 | 396 | raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR) |
paulb@276 | 397 | if oldNode.parentNode != self: |
paulb@276 | 398 | raise xml.dom.DOMException(xml.dom.NOT_FOUND_ERR) |
paulb@72 | 399 | if hasattr(tmp, "as_native_node"): |
paulb@218 | 400 | return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) |
paulb@18 | 401 | else: |
paulb@218 | 402 | return self.impl.get_node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self) |
paulb@18 | 403 | |
paulb@18 | 404 | def replaceChild(self, tmp, oldNode): |
paulb@276 | 405 | if tmp.ownerDocument != self.ownerDocument: |
paulb@276 | 406 | raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR) |
paulb@276 | 407 | if oldNode.parentNode != self: |
paulb@276 | 408 | raise xml.dom.DOMException(xml.dom.NOT_FOUND_ERR) |
paulb@72 | 409 | if hasattr(tmp, "as_native_node"): |
paulb@218 | 410 | return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) |
paulb@18 | 411 | else: |
paulb@218 | 412 | return self.impl.get_node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self) |
paulb@18 | 413 | |
paulb@18 | 414 | def appendChild(self, tmp): |
paulb@276 | 415 | if tmp.ownerDocument != self.ownerDocument: |
paulb@276 | 416 | raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR) |
paulb@72 | 417 | if hasattr(tmp, "as_native_node"): |
paulb@218 | 418 | return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self) |
paulb@18 | 419 | else: |
paulb@218 | 420 | return self.impl.get_node(Node_appendChild(self._node, tmp), self) |
paulb@18 | 421 | |
paulb@18 | 422 | def removeChild(self, tmp): |
paulb@72 | 423 | if hasattr(tmp, "as_native_node"): |
paulb@72 | 424 | Node_removeChild(self._node, tmp.as_native_node()) |
paulb@72 | 425 | else: |
paulb@72 | 426 | Node_removeChild(self._node, tmp) |
paulb@276 | 427 | return tmp |
paulb@18 | 428 | |
paulb@262 | 429 | def getElementById(self, identifier): |
paulb@262 | 430 | nodes = self.xpath(".//*[@xml:id='" + identifier.replace("'", "'") + "']", |
paulb@262 | 431 | namespaces={"xml" : xml.dom.XML_NAMESPACE}) |
paulb@262 | 432 | if nodes: |
paulb@262 | 433 | return nodes[0] |
paulb@262 | 434 | else: |
paulb@262 | 435 | return None |
paulb@262 | 436 | |
paulb@99 | 437 | def getElementsByTagName(self, tagName): |
paulb@232 | 438 | return self.xpath(".//" + tagName) |
paulb@99 | 439 | |
paulb@99 | 440 | def getElementsByTagNameNS(self, namespaceURI, localName): |
paulb@232 | 441 | return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI}) |
paulb@99 | 442 | |
paulb@124 | 443 | def normalize(self): |
paulb@124 | 444 | text_nodes = [] |
paulb@124 | 445 | for node in self.childNodes: |
paulb@124 | 446 | if node.nodeType == node.TEXT_NODE: |
paulb@124 | 447 | text_nodes.append(node) |
paulb@124 | 448 | elif len(text_nodes) != 0: |
paulb@124 | 449 | self._normalize(text_nodes) |
paulb@124 | 450 | text_nodes = [] |
paulb@124 | 451 | if len(text_nodes) != 0: |
paulb@124 | 452 | self._normalize(text_nodes) |
paulb@124 | 453 | |
paulb@124 | 454 | def _normalize(self, text_nodes): |
paulb@124 | 455 | texts = [] |
paulb@124 | 456 | for text_node in text_nodes[:-1]: |
paulb@124 | 457 | texts.append(text_node.nodeValue) |
paulb@124 | 458 | self.removeChild(text_node) |
paulb@124 | 459 | texts.append(text_nodes[-1].nodeValue) |
paulb@124 | 460 | self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1]) |
paulb@124 | 461 | |
paulb@18 | 462 | childNodes = property(_childNodes) |
paulb@276 | 463 | firstChild = property(_firstChild) |
paulb@276 | 464 | lastChild = property(_lastChild) |
paulb@86 | 465 | value = data = nodeValue = property(_nodeValue, _setNodeValue) |
paulb@236 | 466 | textContent = property(_textContent) |
paulb@18 | 467 | name = nodeName = property(_nodeName) |
paulb@18 | 468 | tagName = property(_tagName) |
paulb@18 | 469 | namespaceURI = property(_namespaceURI) |
paulb@18 | 470 | prefix = property(_prefix) |
paulb@18 | 471 | localName = property(_localName) |
paulb@88 | 472 | parentNode = property(_parentNode) |
paulb@18 | 473 | nodeType = property(_nodeType) |
paulb@18 | 474 | attributes = property(_attributes) |
paulb@24 | 475 | previousSibling = property(_previousSibling) |
paulb@18 | 476 | nextSibling = property(_nextSibling) |
paulb@154 | 477 | doctype = property(_doctype) |
paulb@154 | 478 | publicId = property(_publicId) |
paulb@154 | 479 | systemId = property(_systemId) |
paulb@154 | 480 | |
paulb@154 | 481 | # NOTE: To be fixed - these being doctype-specific values. |
paulb@154 | 482 | |
paulb@154 | 483 | entities = {} |
paulb@154 | 484 | notations = {} |
paulb@18 | 485 | |
paulb@240 | 486 | def isSameNode(self, other): |
paulb@240 | 487 | return self == other |
paulb@18 | 488 | |
paulb@262 | 489 | def __hash__(self): |
paulb@262 | 490 | return hash(self.localName) |
paulb@262 | 491 | |
paulb@240 | 492 | def __eq__(self, other): |
paulb@276 | 493 | return isinstance(other, Node) and Node_equals(self._node, other._node) |
paulb@240 | 494 | |
paulb@240 | 495 | def __ne__(self, other): |
paulb@240 | 496 | return not (self == other) |
paulb@18 | 497 | |
paulb@32 | 498 | # 4DOM extensions to the usual PyXML API. |
paulb@32 | 499 | # NOTE: To be finished. |
paulb@32 | 500 | |
paulb@32 | 501 | def xpath(self, expr, variables=None, namespaces=None): |
paulb@324 | 502 | |
paulb@324 | 503 | """ |
paulb@324 | 504 | Evaluate the given expression 'expr' using the optional 'variables' and |
paulb@324 | 505 | 'namespaces' mappings. |
paulb@324 | 506 | """ |
paulb@324 | 507 | |
paulb@301 | 508 | ns = {} |
paulb@301 | 509 | ns.update(default_ns) |
paulb@301 | 510 | ns.update(namespaces or {}) |
paulb@301 | 511 | result = Node_xpath(self._node, expr, variables, ns) |
paulb@202 | 512 | if isinstance(result, str): |
paulb@202 | 513 | return to_unicode(result) |
paulb@202 | 514 | elif hasattr(result, "__len__"): |
paulb@218 | 515 | return NodeList([self.impl.get_node(_node, self) for _node in result]) |
paulb@81 | 516 | else: |
paulb@81 | 517 | return result |
paulb@81 | 518 | |
paulb@324 | 519 | # Other extensions to the usual PyXML API. |
paulb@324 | 520 | |
paulb@324 | 521 | def xinclude(self): |
paulb@324 | 522 | |
paulb@324 | 523 | """ |
paulb@324 | 524 | Process XInclude declarations within the document, returning the number |
paulb@324 | 525 | of substitutions performed (zero or more), raising an XIncludeException |
paulb@324 | 526 | otherwise. |
paulb@324 | 527 | """ |
paulb@324 | 528 | |
paulb@324 | 529 | return Node_xinclude(self._node) |
paulb@324 | 530 | |
paulb@81 | 531 | # Convenience methods. |
paulb@81 | 532 | |
paulb@120 | 533 | def toString(self, encoding=None, prettyprint=0): |
paulb@120 | 534 | return toString(self, encoding, prettyprint) |
paulb@81 | 535 | |
paulb@120 | 536 | def toStream(self, stream, encoding=None, prettyprint=0): |
paulb@120 | 537 | toStream(self, stream, encoding, prettyprint) |
paulb@81 | 538 | |
paulb@120 | 539 | def toFile(self, f, encoding=None, prettyprint=0): |
paulb@120 | 540 | toFile(self, f, encoding, prettyprint) |
paulb@75 | 541 | |
paulb@88 | 542 | # Attribute nodes. |
paulb@88 | 543 | |
paulb@88 | 544 | class Attribute(Node): |
paulb@88 | 545 | |
paulb@88 | 546 | "A class providing attribute access." |
paulb@88 | 547 | |
paulb@218 | 548 | def __init__(self, node, impl, ownerDocument=None, ownerElement=None): |
paulb@218 | 549 | Node.__init__(self, node, impl, ownerDocument) |
paulb@88 | 550 | self.ownerElement = ownerElement |
paulb@88 | 551 | |
paulb@88 | 552 | def _parentNode(self): |
paulb@88 | 553 | return self.ownerElement |
paulb@88 | 554 | |
paulb@88 | 555 | parentNode = property(_parentNode) |
paulb@88 | 556 | |
paulb@75 | 557 | # Document housekeeping mechanisms. |
paulb@75 | 558 | |
paulb@258 | 559 | class _Document: |
paulb@75 | 560 | |
paulb@258 | 561 | """ |
paulb@258 | 562 | An abstract class providing document-level housekeeping and distinct |
paulb@258 | 563 | functionality. |
paulb@258 | 564 | """ |
paulb@75 | 565 | |
paulb@218 | 566 | def __init__(self, node, impl): |
paulb@75 | 567 | self._node = node |
paulb@256 | 568 | self.implementation = self.impl = impl |
paulb@256 | 569 | |
paulb@256 | 570 | def _documentElement(self): |
paulb@256 | 571 | return self.xpath("*")[0] |
paulb@75 | 572 | |
paulb@75 | 573 | def _ownerDocument(self): |
paulb@188 | 574 | return self |
paulb@75 | 575 | |
paulb@75 | 576 | def __del__(self): |
paulb@84 | 577 | #print "Freeing document", self._node |
paulb@75 | 578 | libxml2mod.xmlFreeDoc(self._node) |
paulb@75 | 579 | |
paulb@256 | 580 | documentElement = property(_documentElement) |
paulb@75 | 581 | ownerDocument = property(_ownerDocument) |
paulb@154 | 582 | |
paulb@258 | 583 | class Document(_Document, Node): |
paulb@258 | 584 | |
paulb@258 | 585 | """ |
paulb@258 | 586 | A generic document class. Specialised document classes should inherit from |
paulb@258 | 587 | the _Document class and their own variation of Node. |
paulb@258 | 588 | """ |
paulb@258 | 589 | |
paulb@258 | 590 | pass |
paulb@258 | 591 | |
paulb@154 | 592 | class DocumentType(object): |
paulb@154 | 593 | |
paulb@154 | 594 | "A class providing a container for document type information." |
paulb@154 | 595 | |
paulb@154 | 596 | def __init__(self, localName, publicId, systemId): |
paulb@154 | 597 | self.name = self.localName = localName |
paulb@154 | 598 | self.publicId = publicId |
paulb@154 | 599 | self.systemId = systemId |
paulb@154 | 600 | |
paulb@154 | 601 | # NOTE: Nothing is currently provided to support the following |
paulb@154 | 602 | # NOTE: attributes. |
paulb@154 | 603 | |
paulb@154 | 604 | self.entities = {} |
paulb@154 | 605 | self.notations = {} |
paulb@32 | 606 | |
paulb@223 | 607 | # Constants. |
paulb@223 | 608 | |
paulb@223 | 609 | null_value_node_types = [ |
paulb@223 | 610 | Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE, |
paulb@223 | 611 | Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE |
paulb@223 | 612 | ] |
paulb@223 | 613 | |
paulb@18 | 614 | # Utility functions. |
paulb@18 | 615 | |
paulb@18 | 616 | def createDocumentType(localName, publicId, systemId): |
paulb@223 | 617 | return default_impl.createDocumentType(localName, publicId, systemId) |
paulb@18 | 618 | |
paulb@18 | 619 | def createDocument(namespaceURI, localName, doctype): |
paulb@223 | 620 | return default_impl.createDocument(namespaceURI, localName, doctype) |
paulb@18 | 621 | |
paulb@268 | 622 | def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, impl=None): |
paulb@105 | 623 | |
paulb@105 | 624 | """ |
paulb@105 | 625 | Parse the given 'stream_or_string', where the supplied object can either be |
paulb@136 | 626 | a stream (such as a file or stream object), or a string (containing the |
paulb@268 | 627 | filename of a document). The optional parameters described below should be |
paulb@268 | 628 | provided as keyword arguments. |
paulb@268 | 629 | |
paulb@268 | 630 | If the optional 'html' parameter is set to a true value, the content to be |
paulb@268 | 631 | parsed will be treated as being HTML rather than XML. If the optional |
paulb@268 | 632 | 'htmlencoding' is specified, HTML parsing will be performed with the |
paulb@268 | 633 | document encoding assumed to that specified. |
paulb@268 | 634 | |
paulb@268 | 635 | If the optional 'unfinished' parameter is set to a true value, unfinished |
paulb@268 | 636 | documents will be parsed, even though such documents may be missing content |
paulb@268 | 637 | such as closing tags. |
paulb@105 | 638 | |
paulb@105 | 639 | A document object is returned by this function. |
paulb@105 | 640 | """ |
paulb@105 | 641 | |
paulb@218 | 642 | impl = impl or default_impl |
paulb@218 | 643 | |
paulb@18 | 644 | if hasattr(stream_or_string, "read"): |
paulb@18 | 645 | stream = stream_or_string |
paulb@268 | 646 | return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished, impl=impl) |
paulb@18 | 647 | else: |
paulb@268 | 648 | return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished, impl=impl) |
paulb@32 | 649 | |
paulb@268 | 650 | def parseFile(filename, html=0, htmlencoding=None, unfinished=0, impl=None): |
paulb@105 | 651 | |
paulb@105 | 652 | """ |
paulb@268 | 653 | Parse the file having the given 'filename'. The optional parameters |
paulb@268 | 654 | described below should be provided as keyword arguments. |
paulb@268 | 655 | |
paulb@268 | 656 | If the optional 'html' parameter is set to a true value, the content to be |
paulb@268 | 657 | parsed will be treated as being HTML rather than XML. If the optional |
paulb@268 | 658 | 'htmlencoding' is specified, HTML parsing will be performed with the |
paulb@268 | 659 | document encoding assumed to that specified. |
paulb@268 | 660 | |
paulb@268 | 661 | If the optional 'unfinished' parameter is set to a true value, unfinished |
paulb@268 | 662 | documents will be parsed, even though such documents may be missing content |
paulb@268 | 663 | such as closing tags. |
paulb@105 | 664 | |
paulb@105 | 665 | A document object is returned by this function. |
paulb@105 | 666 | """ |
paulb@105 | 667 | |
paulb@218 | 668 | impl = impl or default_impl |
paulb@268 | 669 | return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding, unfinished=unfinished)) |
paulb@18 | 670 | |
paulb@268 | 671 | def parseString(s, html=0, htmlencoding=None, unfinished=0, impl=None): |
paulb@105 | 672 | |
paulb@105 | 673 | """ |
paulb@268 | 674 | Parse the content of the given string 's'. The optional parameters described |
paulb@268 | 675 | below should be provided as keyword arguments. |
paulb@268 | 676 | |
paulb@268 | 677 | If the optional 'html' parameter is set to a true value, the content to be |
paulb@268 | 678 | parsed will be treated as being HTML rather than XML. If the optional |
paulb@268 | 679 | 'htmlencoding' is specified, HTML parsing will be performed with the |
paulb@268 | 680 | document encoding assumed to that specified. |
paulb@268 | 681 | |
paulb@268 | 682 | If the optional 'unfinished' parameter is set to a true value, unfinished |
paulb@268 | 683 | documents will be parsed, even though such documents may be missing content |
paulb@268 | 684 | such as closing tags. |
paulb@105 | 685 | |
paulb@105 | 686 | A document object is returned by this function. |
paulb@105 | 687 | """ |
paulb@105 | 688 | |
paulb@218 | 689 | impl = impl or default_impl |
paulb@268 | 690 | return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding, unfinished=unfinished)) |
paulb@18 | 691 | |
paulb@268 | 692 | def parseURI(uri, html=0, htmlencoding=None, unfinished=0, impl=None): |
paulb@105 | 693 | |
paulb@105 | 694 | """ |
paulb@268 | 695 | Parse the content found at the given 'uri'. The optional parameters |
paulb@268 | 696 | described below should be provided as keyword arguments. |
paulb@268 | 697 | |
paulb@268 | 698 | If the optional 'html' parameter is set to a true value, the content to be |
paulb@268 | 699 | parsed will be treated as being HTML rather than XML. If the optional |
paulb@268 | 700 | 'htmlencoding' is specified, HTML parsing will be performed with the |
paulb@268 | 701 | document encoding assumed to that specified. |
paulb@268 | 702 | |
paulb@268 | 703 | If the optional 'unfinished' parameter is set to a true value, unfinished |
paulb@268 | 704 | documents will be parsed, even though such documents may be missing content |
paulb@268 | 705 | such as closing tags. |
paulb@128 | 706 | |
paulb@232 | 707 | XML documents are retrieved using libxml2's own network capabilities; HTML |
paulb@232 | 708 | documents are retrieved using the urllib module provided by Python. To |
paulb@232 | 709 | retrieve either kind of document using Python's own modules for this purpose |
paulb@232 | 710 | (such as urllib), open a stream and pass it to the parse function: |
paulb@128 | 711 | |
paulb@232 | 712 | f = urllib.urlopen(uri) |
paulb@232 | 713 | try: |
paulb@232 | 714 | doc = libxml2dom.parse(f, html) |
paulb@232 | 715 | finally: |
paulb@232 | 716 | f.close() |
paulb@105 | 717 | |
paulb@105 | 718 | A document object is returned by this function. |
paulb@105 | 719 | """ |
paulb@105 | 720 | |
paulb@232 | 721 | if html: |
paulb@232 | 722 | f = urllib.urlopen(uri) |
paulb@232 | 723 | try: |
paulb@268 | 724 | return parse(f, html=html, htmlencoding=htmlencoding, unfinished=unfinished, impl=impl) |
paulb@232 | 725 | finally: |
paulb@232 | 726 | f.close() |
paulb@232 | 727 | else: |
paulb@232 | 728 | impl = impl or default_impl |
paulb@268 | 729 | return impl.adoptDocument(Node_parseURI(uri, html=html, htmlencoding=htmlencoding, unfinished=unfinished)) |
paulb@18 | 730 | |
paulb@120 | 731 | def toString(node, encoding=None, prettyprint=0): |
paulb@105 | 732 | |
paulb@105 | 733 | """ |
paulb@105 | 734 | Return a string containing the serialised form of the given 'node' and its |
paulb@105 | 735 | children. The optional 'encoding' can be used to override the default |
paulb@120 | 736 | character encoding used in the serialisation. The optional 'prettyprint' |
paulb@120 | 737 | indicates whether the serialised form is prettyprinted or not (the default |
paulb@120 | 738 | setting). |
paulb@105 | 739 | """ |
paulb@105 | 740 | |
paulb@120 | 741 | return Node_toString(node.as_native_node(), encoding, prettyprint) |
paulb@18 | 742 | |
paulb@120 | 743 | def toStream(node, stream, encoding=None, prettyprint=0): |
paulb@105 | 744 | |
paulb@105 | 745 | """ |
paulb@105 | 746 | Write the serialised form of the given 'node' and its children to the given |
paulb@105 | 747 | 'stream'. The optional 'encoding' can be used to override the default |
paulb@120 | 748 | character encoding used in the serialisation. The optional 'prettyprint' |
paulb@120 | 749 | indicates whether the serialised form is prettyprinted or not (the default |
paulb@120 | 750 | setting). |
paulb@105 | 751 | """ |
paulb@105 | 752 | |
paulb@120 | 753 | Node_toStream(node.as_native_node(), stream, encoding, prettyprint) |
paulb@72 | 754 | |
paulb@120 | 755 | def toFile(node, filename, encoding=None, prettyprint=0): |
paulb@105 | 756 | |
paulb@105 | 757 | """ |
paulb@105 | 758 | Write the serialised form of the given 'node' and its children to a file |
paulb@105 | 759 | having the given 'filename'. The optional 'encoding' can be used to override |
paulb@120 | 760 | the default character encoding used in the serialisation. The optional |
paulb@120 | 761 | 'prettyprint' indicates whether the serialised form is prettyprinted or not |
paulb@120 | 762 | (the default setting). |
paulb@105 | 763 | """ |
paulb@105 | 764 | |
paulb@120 | 765 | Node_toFile(node.as_native_node(), filename, encoding, prettyprint) |
paulb@18 | 766 | |
paulb@218 | 767 | def adoptNodes(nodes, impl=None): |
paulb@105 | 768 | |
paulb@105 | 769 | """ |
paulb@105 | 770 | A special utility method which adopts the given low-level 'nodes' and which |
paulb@105 | 771 | returns a list of high-level equivalents. This is currently experimental and |
paulb@105 | 772 | should not be casually used. |
paulb@105 | 773 | """ |
paulb@105 | 774 | |
paulb@218 | 775 | impl = impl or default_impl |
paulb@218 | 776 | |
paulb@81 | 777 | if len(nodes) == 0: |
paulb@81 | 778 | return [] |
paulb@236 | 779 | doc = impl.adoptDocument(libxml2mod.doc(nodes[0])) |
paulb@81 | 780 | results = [] |
paulb@81 | 781 | for node in nodes: |
paulb@218 | 782 | results.append(Node(node, impl, doc)) |
paulb@81 | 783 | return results |
paulb@81 | 784 | |
paulb@240 | 785 | def getDOMImplementation(): |
paulb@240 | 786 | |
paulb@240 | 787 | "Return the default DOM implementation." |
paulb@240 | 788 | |
paulb@240 | 789 | return default_impl |
paulb@240 | 790 | |
paulb@218 | 791 | # Single instance of the implementation. |
paulb@218 | 792 | |
paulb@218 | 793 | default_impl = Implementation() |
paulb@218 | 794 | |
paulb@18 | 795 | # vim: tabstop=4 expandtab shiftwidth=4 |