paulb@1 | 1 | #!/usr/bin/env python |
paulb@1 | 2 | |
paulb@1 | 3 | """ |
paulb@1 | 4 | DOM wrapper around libxml2. |
paulb@1 | 5 | """ |
paulb@1 | 6 | |
paulb@1 | 7 | import xml.dom |
paulb@3 | 8 | import libxml2 |
paulb@8 | 9 | import sys |
paulb@1 | 10 | |
paulb@3 | 11 | # NOTE: Consider a generator instead. |
paulb@3 | 12 | |
paulb@3 | 13 | class NamedNodeMap(object): |
paulb@1 | 14 | |
paulb@1 | 15 | def __init__(self, node): |
paulb@3 | 16 | self.node = node |
paulb@3 | 17 | |
paulb@3 | 18 | def getNamedItem(self, name): |
paulb@5 | 19 | return self.node.getAttributeNode(name) |
paulb@3 | 20 | |
paulb@3 | 21 | def getNamedItemNS(self, ns, localName): |
paulb@5 | 22 | return self.node.getAttributeNodeNS(ns, localName) |
paulb@3 | 23 | |
paulb@3 | 24 | def setNamedItem(self, node): |
paulb@5 | 25 | self.node.setAttributeNode(node.name, node) |
paulb@3 | 26 | |
paulb@3 | 27 | def setNamedItemNS(self, node): |
paulb@5 | 28 | self.node.setAttributeNodeNS(node.namespaceURI, node.localName, node) |
paulb@3 | 29 | |
paulb@3 | 30 | def __getitem__(self, name): |
paulb@6 | 31 | return self.getNamedItem(name) |
paulb@3 | 32 | |
paulb@3 | 33 | def __setitem__(self, name, node): |
paulb@6 | 34 | if name == node.nodeName: |
paulb@6 | 35 | self.setNamedItem(node) |
paulb@6 | 36 | else: |
paulb@6 | 37 | raise KeyError, name |
paulb@3 | 38 | |
paulb@3 | 39 | def __delitem__(self, name): |
paulb@6 | 40 | # NOTE: To be implemented. |
paulb@3 | 41 | pass |
paulb@3 | 42 | |
paulb@3 | 43 | def values(self): |
paulb@3 | 44 | attributes = [] |
paulb@3 | 45 | _attribute = self.node._node.properties |
paulb@3 | 46 | while _attribute is not None: |
paulb@3 | 47 | attributes.append(Node(_attribute, ownerElement=self.node)) |
paulb@3 | 48 | _attribute = _attribute.next |
paulb@3 | 49 | return attributes |
paulb@3 | 50 | |
paulb@3 | 51 | def keys(self): |
paulb@3 | 52 | return [(attr.namespaceURI, attr.localName) for attr in self.values()] |
paulb@3 | 53 | |
paulb@3 | 54 | def items(self): |
paulb@3 | 55 | return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] |
paulb@3 | 56 | |
paulb@3 | 57 | def __repr__(self): |
paulb@3 | 58 | return str(self) |
paulb@3 | 59 | |
paulb@3 | 60 | def __str__(self): |
paulb@3 | 61 | return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) |
paulb@3 | 62 | |
paulb@3 | 63 | def _get_prefix_and_localName(name): |
paulb@3 | 64 | t = name.split(":") |
paulb@3 | 65 | if len(t) == 1: |
paulb@3 | 66 | return None, name |
paulb@3 | 67 | elif len(t) == 2: |
paulb@3 | 68 | return t |
paulb@3 | 69 | else: |
paulb@3 | 70 | # NOTE: Should raise an exception. |
paulb@3 | 71 | return None, None |
paulb@3 | 72 | |
paulb@3 | 73 | class TemporaryNode(object): |
paulb@3 | 74 | def __init__(self, ns, name, nodeType): |
paulb@3 | 75 | self.ns = ns |
paulb@3 | 76 | self.name = name |
paulb@3 | 77 | self.nodeType = nodeType |
paulb@3 | 78 | self.prefix, self.localName = _get_prefix_and_localName(self.name) |
paulb@1 | 79 | |
paulb@1 | 80 | class Node(object): |
paulb@1 | 81 | |
paulb@1 | 82 | _nodeTypes = { |
paulb@1 | 83 | "attribute" : xml.dom.Node.ATTRIBUTE_NODE, |
paulb@1 | 84 | "comment" : xml.dom.Node.COMMENT_NODE, |
paulb@1 | 85 | "document_xml" : xml.dom.Node.DOCUMENT_NODE, |
paulb@1 | 86 | "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, |
paulb@8 | 87 | "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. |
paulb@1 | 88 | "element" : xml.dom.Node.ELEMENT_NODE, |
paulb@1 | 89 | "entity" : xml.dom.Node.ENTITY_NODE, |
paulb@1 | 90 | "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, |
paulb@1 | 91 | "notation" : xml.dom.Node.NOTATION_NODE, |
paulb@1 | 92 | "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, |
paulb@1 | 93 | "text" : xml.dom.Node.TEXT_NODE |
paulb@1 | 94 | } |
paulb@1 | 95 | |
paulb@8 | 96 | def __init__(self, node, ownerElement=None, doctype=None): |
paulb@1 | 97 | self._node = node |
paulb@3 | 98 | self.ownerElement = ownerElement |
paulb@8 | 99 | self.doctype = doctype |
paulb@7 | 100 | |
paulb@1 | 101 | def _ownerDocument(self): |
paulb@8 | 102 | return Node(self._node.doc) |
paulb@1 | 103 | |
paulb@1 | 104 | def _nodeType(self): |
paulb@1 | 105 | return self._nodeTypes[self._node.type] |
paulb@1 | 106 | |
paulb@1 | 107 | def _childNodes(self): |
paulb@3 | 108 | |
paulb@3 | 109 | # NOTE: Consider a generator instead. |
paulb@3 | 110 | |
paulb@1 | 111 | child_nodes = [] |
paulb@5 | 112 | _node = self._node.children |
paulb@5 | 113 | while _node is not None: |
paulb@5 | 114 | child_nodes.append(Node(_node)) |
paulb@5 | 115 | _node = _node.next |
paulb@1 | 116 | return child_nodes |
paulb@1 | 117 | |
paulb@1 | 118 | def _attributes(self): |
paulb@3 | 119 | return NamedNodeMap(self) |
paulb@3 | 120 | |
paulb@3 | 121 | def _getNs(self): |
paulb@3 | 122 | |
paulb@3 | 123 | "Internal namespace information retrieval." |
paulb@3 | 124 | |
paulb@3 | 125 | try: |
paulb@3 | 126 | return self._node.ns() |
paulb@3 | 127 | except libxml2.treeError: |
paulb@3 | 128 | return None |
paulb@3 | 129 | |
paulb@3 | 130 | def _namespaceURI(self): |
paulb@3 | 131 | ns = self._getNs() |
paulb@3 | 132 | if ns is not None: |
paulb@3 | 133 | return ns.content |
paulb@3 | 134 | else: |
paulb@3 | 135 | return None |
paulb@3 | 136 | |
paulb@3 | 137 | def _nodeValue(self): |
paulb@3 | 138 | return self._node.content |
paulb@3 | 139 | |
paulb@3 | 140 | def _prefix(self): |
paulb@3 | 141 | ns = self._getNs() |
paulb@3 | 142 | if ns is not None: |
paulb@3 | 143 | return ns.name |
paulb@3 | 144 | else: |
paulb@3 | 145 | return None |
paulb@3 | 146 | |
paulb@3 | 147 | def _nodeName(self): |
paulb@3 | 148 | prefix = self._prefix() |
paulb@3 | 149 | if prefix is not None: |
paulb@3 | 150 | return prefix + ":" + self._localName() |
paulb@3 | 151 | else: |
paulb@3 | 152 | return self._localName() |
paulb@1 | 153 | |
paulb@1 | 154 | def _tagName(self): |
paulb@1 | 155 | if self._node.type == "element": |
paulb@3 | 156 | return self._nodeName() |
paulb@1 | 157 | else: |
paulb@1 | 158 | return None |
paulb@1 | 159 | |
paulb@3 | 160 | def _localName(self): |
paulb@3 | 161 | return self._node.name |
paulb@3 | 162 | |
paulb@3 | 163 | def _parentNode(self): |
paulb@5 | 164 | if self.nodeType == xml.dom.Node.DOCUMENT_NODE: |
paulb@5 | 165 | return None |
paulb@5 | 166 | else: |
paulb@5 | 167 | return Node(self._node.parent) |
paulb@3 | 168 | |
paulb@8 | 169 | def hasAttributeNS(self, ns, localName): |
paulb@8 | 170 | return self._getAttributeNS(ns, localName) is not None |
paulb@8 | 171 | |
paulb@8 | 172 | def hasAttribute(self, name): |
paulb@8 | 173 | return self._getAttribute(name) is not None |
paulb@8 | 174 | |
paulb@3 | 175 | def getAttributeNS(self, ns, localName): |
paulb@8 | 176 | return self._getAttributeNS(ns, localName) or "" |
paulb@8 | 177 | |
paulb@8 | 178 | def _getAttributeNS(self, ns, localName): |
paulb@3 | 179 | return self._node.nsProp(localName, ns) |
paulb@3 | 180 | |
paulb@3 | 181 | def getAttribute(self, name): |
paulb@8 | 182 | return self._getAttribute(name) or "" |
paulb@8 | 183 | |
paulb@8 | 184 | def _getAttribute(self, name): |
paulb@8 | 185 | return self._node.prop(name) |
paulb@3 | 186 | |
paulb@3 | 187 | def getAttributeNodeNS(self, ns, localName): |
paulb@3 | 188 | return self.attributes[(ns, localName)] |
paulb@3 | 189 | |
paulb@3 | 190 | def getAttributeNode(self, localName): |
paulb@3 | 191 | # NOTE: Needs verifying. |
paulb@3 | 192 | return self.attributes[(None, localName)] |
paulb@3 | 193 | |
paulb@3 | 194 | def setAttributeNS(self, ns, name, value): |
paulb@3 | 195 | prefix, localName = _get_prefix_and_localName(name) |
paulb@8 | 196 | if prefix is not None: |
paulb@3 | 197 | self._node.setNsProp(self._node.newNs(ns, prefix), localName, value) |
paulb@8 | 198 | elif ns == self._node.ns().content: |
paulb@8 | 199 | self._node.setNsProp(self._node.ns(), localName, value) |
paulb@8 | 200 | else: |
paulb@8 | 201 | # NOTE: Needs verifying: what should happen to the namespace? |
paulb@8 | 202 | self._node.setNsProp(None, localName, value) |
paulb@3 | 203 | |
paulb@3 | 204 | def setAttribute(self, name, value): |
paulb@3 | 205 | self._node.setProp(name, value) |
paulb@3 | 206 | |
paulb@3 | 207 | def setAttributeNodeNS(self, ns, name, node): |
paulb@3 | 208 | # NOTE: Not actually putting the node on the element. |
paulb@3 | 209 | self.setAttributeNS(ns, name, node.nodeValue) |
paulb@3 | 210 | |
paulb@3 | 211 | def setAttributeNode(self, name, node): |
paulb@3 | 212 | # NOTE: Not actually putting the node on the element. |
paulb@3 | 213 | self.setAttribute(name, node.nodeValue) |
paulb@3 | 214 | |
paulb@3 | 215 | def createElementNS(self, ns, name): |
paulb@3 | 216 | prefix, localName = _get_prefix_and_localName(name) |
paulb@8 | 217 | _node = libxml2.newNode(localName) |
paulb@8 | 218 | _ns = _node.newNs(ns, prefix) |
paulb@8 | 219 | _node.setNs(_ns) |
paulb@8 | 220 | return Node(_node) |
paulb@3 | 221 | |
paulb@3 | 222 | def createElement(self, name): |
paulb@8 | 223 | _node = libxml2.newNode(localName) |
paulb@8 | 224 | return Node(_node) |
paulb@3 | 225 | |
paulb@3 | 226 | def createAttributeNS(self, ns, name): |
paulb@3 | 227 | prefix, localName = _get_prefix_and_localName(name) |
paulb@3 | 228 | return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) |
paulb@3 | 229 | |
paulb@3 | 230 | def createAttribute(self, name): |
paulb@3 | 231 | return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) |
paulb@3 | 232 | |
paulb@4 | 233 | def createTextNode(self, value): |
paulb@9 | 234 | return Node(libxml2.newText(value)) |
paulb@4 | 235 | |
paulb@3 | 236 | def _add_node(self, tmp): |
paulb@8 | 237 | if tmp.nodeType == xml.dom.Node.ATTRIBUTE_NODE: |
paulb@8 | 238 | if tmp.ns is not None: |
paulb@3 | 239 | _child = self._node.newNsProp(None, tmp.localName, None) |
paulb@3 | 240 | _ns = _child.newNs(tmp.ns, tmp.prefix) |
paulb@3 | 241 | _child.setNs(_ns) |
paulb@8 | 242 | else: |
paulb@8 | 243 | _child = self._node.newProp(None, tmp.name, None) |
paulb@1 | 244 | else: |
paulb@8 | 245 | _child = None |
paulb@3 | 246 | |
paulb@3 | 247 | return _child |
paulb@1 | 248 | |
paulb@8 | 249 | def importNode(self, node, deep): |
paulb@8 | 250 | |
paulb@8 | 251 | if node.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@8 | 252 | imported_element = self.ownerDocument.createElementNS(node.namespaceURI, node.tagName) |
paulb@8 | 253 | for value in node.attributes.values(): |
paulb@8 | 254 | imported_element.setAttributeNS(value.namespaceURI, value.nodeName, value.nodeValue) |
paulb@8 | 255 | |
paulb@8 | 256 | if deep: |
paulb@8 | 257 | for child in node.childNodes: |
paulb@8 | 258 | imported_child = self.importNode(child, deep) |
paulb@8 | 259 | if imported_child: |
paulb@8 | 260 | imported_element.appendChild(imported_child) |
paulb@8 | 261 | |
paulb@8 | 262 | return imported_element |
paulb@8 | 263 | |
paulb@8 | 264 | elif node.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@8 | 265 | return self.ownerDocument.createTextNode(node.nodeValue) |
paulb@8 | 266 | |
paulb@8 | 267 | elif node.nodeType == xml.dom.Node.ATTRIBUTE_NODE: |
paulb@8 | 268 | return self.ownerDocument.createAttributeNS(node.namespaceURI, node.name) |
paulb@8 | 269 | |
paulb@8 | 270 | raise ValueError, node.nodeType |
paulb@8 | 271 | |
paulb@3 | 272 | def insertBefore(self, tmp, oldNode): |
paulb@4 | 273 | if tmp.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@9 | 274 | _child = tmp._node |
paulb@8 | 275 | elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@8 | 276 | _child = tmp._node |
paulb@4 | 277 | else: |
paulb@4 | 278 | _child = self._add_node(tmp) |
paulb@4 | 279 | _child.unlinkNode() |
paulb@3 | 280 | return Node(oldNode._node.addPrevSibling(_child)) |
paulb@3 | 281 | |
paulb@3 | 282 | def replaceChild(self, tmp, oldNode): |
paulb@4 | 283 | if tmp.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@9 | 284 | _child = tmp._node |
paulb@8 | 285 | elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@8 | 286 | _child = tmp._node |
paulb@4 | 287 | else: |
paulb@4 | 288 | _child = self._add_node(tmp) |
paulb@4 | 289 | _child.unlinkNode() |
paulb@3 | 290 | return Node(oldNode._node.replaceNode(_child)) |
paulb@3 | 291 | |
paulb@3 | 292 | def appendChild(self, tmp): |
paulb@4 | 293 | if tmp.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@9 | 294 | _child = self._node.addChild(tmp._node) |
paulb@8 | 295 | elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@8 | 296 | _child = self._node.addChild(tmp._node) |
paulb@4 | 297 | else: |
paulb@4 | 298 | _child = self._add_node(tmp) |
paulb@4 | 299 | return Node(_child) |
paulb@3 | 300 | |
paulb@7 | 301 | #doctype defined in __init__ |
paulb@4 | 302 | #ownerElement defined in __init__ |
paulb@1 | 303 | ownerDocument = property(_ownerDocument) |
paulb@1 | 304 | childNodes = property(_childNodes) |
paulb@3 | 305 | value = data = nodeValue = property(_nodeValue) |
paulb@3 | 306 | name = nodeName = property(_nodeName) |
paulb@1 | 307 | tagName = property(_tagName) |
paulb@1 | 308 | namespaceURI = property(_namespaceURI) |
paulb@3 | 309 | prefix = property(_prefix) |
paulb@3 | 310 | localName = property(_localName) |
paulb@3 | 311 | parentNode = property(_parentNode) |
paulb@3 | 312 | nodeType = property(_nodeType) |
paulb@3 | 313 | attributes = property(_attributes) |
paulb@3 | 314 | |
paulb@3 | 315 | def isSameNode(self, other): |
paulb@3 | 316 | return self._node.nodePath() == other._node.nodePath() |
paulb@3 | 317 | |
paulb@3 | 318 | def __eq__(self, other): |
paulb@3 | 319 | return self._node.nodePath() == other._node.nodePath() |
paulb@1 | 320 | |
paulb@8 | 321 | # Utility functions. |
paulb@8 | 322 | |
paulb@8 | 323 | def createDocumentType(localName, publicId, systemId): |
paulb@8 | 324 | return None |
paulb@8 | 325 | |
paulb@8 | 326 | def createDocument(namespaceURI, localName, doctype): |
paulb@8 | 327 | # NOTE: Fixed to use version 1.0 only. |
paulb@8 | 328 | d = Node(libxml2.newDoc("1.0"), doctype=doctype) |
paulb@8 | 329 | if localName is not None: |
paulb@8 | 330 | root = d.createElementNS(namespaceURI, localName) |
paulb@8 | 331 | d.appendChild(root) |
paulb@8 | 332 | return d |
paulb@8 | 333 | |
paulb@8 | 334 | def parse(stream_or_string): |
paulb@8 | 335 | if hasattr(stream_or_string, "read"): |
paulb@8 | 336 | stream = stream_or_string |
paulb@8 | 337 | else: |
paulb@8 | 338 | stream = open(stream_or_string) |
paulb@8 | 339 | return parseString(stream.read()) |
paulb@8 | 340 | |
paulb@8 | 341 | def parseString(s): |
paulb@8 | 342 | return Node(libxml2.parseDoc(s)) |
paulb@8 | 343 | |
paulb@8 | 344 | def parseURI(uri): |
paulb@8 | 345 | return Node(libxml2.parseURI(uri)) |
paulb@8 | 346 | |
paulb@8 | 347 | def toString(node): |
paulb@8 | 348 | return node._node.serialize() |
paulb@8 | 349 | |
paulb@8 | 350 | def toStream(node, stream=None): |
paulb@8 | 351 | stream = stream or sys.stdout |
paulb@8 | 352 | stream.write(toString(node)) |
paulb@8 | 353 | |
paulb@1 | 354 | # vim: tabstop=4 expandtab shiftwidth=4 |