paulb@1 | 1 | #!/usr/bin/env python |
paulb@1 | 2 | |
paulb@1 | 3 | """ |
paulb@1 | 4 | DOM wrapper around libxml2. |
paulb@1 | 5 | """ |
paulb@1 | 6 | |
paulb@1 | 7 | import xml.dom |
paulb@3 | 8 | import libxml2 |
paulb@8 | 9 | import sys |
paulb@1 | 10 | |
paulb@3 | 11 | # NOTE: Consider a generator instead. |
paulb@3 | 12 | |
paulb@3 | 13 | class NamedNodeMap(object): |
paulb@1 | 14 | |
paulb@1 | 15 | def __init__(self, node): |
paulb@3 | 16 | self.node = node |
paulb@3 | 17 | |
paulb@3 | 18 | def getNamedItem(self, name): |
paulb@5 | 19 | return self.node.getAttributeNode(name) |
paulb@3 | 20 | |
paulb@3 | 21 | def getNamedItemNS(self, ns, localName): |
paulb@5 | 22 | return self.node.getAttributeNodeNS(ns, localName) |
paulb@3 | 23 | |
paulb@3 | 24 | def setNamedItem(self, node): |
paulb@5 | 25 | self.node.setAttributeNode(node.name, node) |
paulb@3 | 26 | |
paulb@3 | 27 | def setNamedItemNS(self, node): |
paulb@5 | 28 | self.node.setAttributeNodeNS(node.namespaceURI, node.localName, node) |
paulb@3 | 29 | |
paulb@3 | 30 | def __getitem__(self, name): |
paulb@6 | 31 | return self.getNamedItem(name) |
paulb@3 | 32 | |
paulb@3 | 33 | def __setitem__(self, name, node): |
paulb@6 | 34 | if name == node.nodeName: |
paulb@6 | 35 | self.setNamedItem(node) |
paulb@6 | 36 | else: |
paulb@6 | 37 | raise KeyError, name |
paulb@3 | 38 | |
paulb@3 | 39 | def __delitem__(self, name): |
paulb@6 | 40 | # NOTE: To be implemented. |
paulb@3 | 41 | pass |
paulb@3 | 42 | |
paulb@3 | 43 | def values(self): |
paulb@3 | 44 | attributes = [] |
paulb@3 | 45 | _attribute = self.node._node.properties |
paulb@3 | 46 | while _attribute is not None: |
paulb@3 | 47 | attributes.append(Node(_attribute, ownerElement=self.node)) |
paulb@3 | 48 | _attribute = _attribute.next |
paulb@3 | 49 | return attributes |
paulb@3 | 50 | |
paulb@3 | 51 | def keys(self): |
paulb@3 | 52 | return [(attr.namespaceURI, attr.localName) for attr in self.values()] |
paulb@3 | 53 | |
paulb@3 | 54 | def items(self): |
paulb@3 | 55 | return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] |
paulb@3 | 56 | |
paulb@3 | 57 | def __repr__(self): |
paulb@3 | 58 | return str(self) |
paulb@3 | 59 | |
paulb@3 | 60 | def __str__(self): |
paulb@3 | 61 | return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) |
paulb@3 | 62 | |
paulb@3 | 63 | def _get_prefix_and_localName(name): |
paulb@3 | 64 | t = name.split(":") |
paulb@3 | 65 | if len(t) == 1: |
paulb@3 | 66 | return None, name |
paulb@3 | 67 | elif len(t) == 2: |
paulb@3 | 68 | return t |
paulb@3 | 69 | else: |
paulb@3 | 70 | # NOTE: Should raise an exception. |
paulb@3 | 71 | return None, None |
paulb@3 | 72 | |
paulb@3 | 73 | class TemporaryNode(object): |
paulb@3 | 74 | def __init__(self, ns, name, nodeType): |
paulb@3 | 75 | self.ns = ns |
paulb@3 | 76 | self.name = name |
paulb@3 | 77 | self.nodeType = nodeType |
paulb@3 | 78 | self.prefix, self.localName = _get_prefix_and_localName(self.name) |
paulb@1 | 79 | |
paulb@4 | 80 | class TemporaryText(object): |
paulb@4 | 81 | def __init__(self, _text): |
paulb@4 | 82 | self.ns = self.name = self.prefix = self.localName = None |
paulb@4 | 83 | self.nodeType = xml.dom.Node.TEXT_NODE |
paulb@4 | 84 | self._text = _text |
paulb@4 | 85 | |
paulb@1 | 86 | class Node(object): |
paulb@1 | 87 | |
paulb@1 | 88 | _nodeTypes = { |
paulb@1 | 89 | "attribute" : xml.dom.Node.ATTRIBUTE_NODE, |
paulb@1 | 90 | "comment" : xml.dom.Node.COMMENT_NODE, |
paulb@1 | 91 | "document_xml" : xml.dom.Node.DOCUMENT_NODE, |
paulb@1 | 92 | "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, |
paulb@8 | 93 | "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. |
paulb@1 | 94 | "element" : xml.dom.Node.ELEMENT_NODE, |
paulb@1 | 95 | "entity" : xml.dom.Node.ENTITY_NODE, |
paulb@1 | 96 | "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, |
paulb@1 | 97 | "notation" : xml.dom.Node.NOTATION_NODE, |
paulb@1 | 98 | "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, |
paulb@1 | 99 | "text" : xml.dom.Node.TEXT_NODE |
paulb@1 | 100 | } |
paulb@1 | 101 | |
paulb@8 | 102 | def __init__(self, node, ownerElement=None, doctype=None): |
paulb@1 | 103 | self._node = node |
paulb@3 | 104 | self.ownerElement = ownerElement |
paulb@8 | 105 | self.doctype = doctype |
paulb@7 | 106 | |
paulb@1 | 107 | def _ownerDocument(self): |
paulb@8 | 108 | return Node(self._node.doc) |
paulb@1 | 109 | |
paulb@1 | 110 | def _nodeType(self): |
paulb@1 | 111 | return self._nodeTypes[self._node.type] |
paulb@1 | 112 | |
paulb@1 | 113 | def _childNodes(self): |
paulb@3 | 114 | |
paulb@3 | 115 | # NOTE: Consider a generator instead. |
paulb@3 | 116 | |
paulb@1 | 117 | child_nodes = [] |
paulb@5 | 118 | _node = self._node.children |
paulb@5 | 119 | while _node is not None: |
paulb@5 | 120 | child_nodes.append(Node(_node)) |
paulb@5 | 121 | _node = _node.next |
paulb@1 | 122 | return child_nodes |
paulb@1 | 123 | |
paulb@1 | 124 | def _attributes(self): |
paulb@3 | 125 | return NamedNodeMap(self) |
paulb@3 | 126 | |
paulb@3 | 127 | def _getNs(self): |
paulb@3 | 128 | |
paulb@3 | 129 | "Internal namespace information retrieval." |
paulb@3 | 130 | |
paulb@3 | 131 | try: |
paulb@3 | 132 | return self._node.ns() |
paulb@3 | 133 | except libxml2.treeError: |
paulb@3 | 134 | return None |
paulb@3 | 135 | |
paulb@3 | 136 | def _namespaceURI(self): |
paulb@3 | 137 | ns = self._getNs() |
paulb@3 | 138 | if ns is not None: |
paulb@3 | 139 | return ns.content |
paulb@3 | 140 | else: |
paulb@3 | 141 | return None |
paulb@3 | 142 | |
paulb@3 | 143 | def _nodeValue(self): |
paulb@3 | 144 | return self._node.content |
paulb@3 | 145 | |
paulb@3 | 146 | def _prefix(self): |
paulb@3 | 147 | ns = self._getNs() |
paulb@3 | 148 | if ns is not None: |
paulb@3 | 149 | return ns.name |
paulb@3 | 150 | else: |
paulb@3 | 151 | return None |
paulb@3 | 152 | |
paulb@3 | 153 | def _nodeName(self): |
paulb@3 | 154 | prefix = self._prefix() |
paulb@3 | 155 | if prefix is not None: |
paulb@3 | 156 | return prefix + ":" + self._localName() |
paulb@3 | 157 | else: |
paulb@3 | 158 | return self._localName() |
paulb@1 | 159 | |
paulb@1 | 160 | def _tagName(self): |
paulb@1 | 161 | if self._node.type == "element": |
paulb@3 | 162 | return self._nodeName() |
paulb@1 | 163 | else: |
paulb@1 | 164 | return None |
paulb@1 | 165 | |
paulb@3 | 166 | def _localName(self): |
paulb@3 | 167 | return self._node.name |
paulb@3 | 168 | |
paulb@3 | 169 | def _parentNode(self): |
paulb@5 | 170 | if self.nodeType == xml.dom.Node.DOCUMENT_NODE: |
paulb@5 | 171 | return None |
paulb@5 | 172 | else: |
paulb@5 | 173 | return Node(self._node.parent) |
paulb@3 | 174 | |
paulb@8 | 175 | def hasAttributeNS(self, ns, localName): |
paulb@8 | 176 | return self._getAttributeNS(ns, localName) is not None |
paulb@8 | 177 | |
paulb@8 | 178 | def hasAttribute(self, name): |
paulb@8 | 179 | return self._getAttribute(name) is not None |
paulb@8 | 180 | |
paulb@3 | 181 | def getAttributeNS(self, ns, localName): |
paulb@8 | 182 | return self._getAttributeNS(ns, localName) or "" |
paulb@8 | 183 | |
paulb@8 | 184 | def _getAttributeNS(self, ns, localName): |
paulb@3 | 185 | return self._node.nsProp(localName, ns) |
paulb@3 | 186 | |
paulb@3 | 187 | def getAttribute(self, name): |
paulb@8 | 188 | return self._getAttribute(name) or "" |
paulb@8 | 189 | |
paulb@8 | 190 | def _getAttribute(self, name): |
paulb@8 | 191 | return self._node.prop(name) |
paulb@3 | 192 | |
paulb@3 | 193 | def getAttributeNodeNS(self, ns, localName): |
paulb@3 | 194 | return self.attributes[(ns, localName)] |
paulb@3 | 195 | |
paulb@3 | 196 | def getAttributeNode(self, localName): |
paulb@3 | 197 | # NOTE: Needs verifying. |
paulb@3 | 198 | return self.attributes[(None, localName)] |
paulb@3 | 199 | |
paulb@3 | 200 | def setAttributeNS(self, ns, name, value): |
paulb@3 | 201 | prefix, localName = _get_prefix_and_localName(name) |
paulb@8 | 202 | if prefix is not None: |
paulb@3 | 203 | self._node.setNsProp(self._node.newNs(ns, prefix), localName, value) |
paulb@8 | 204 | elif ns == self._node.ns().content: |
paulb@8 | 205 | self._node.setNsProp(self._node.ns(), localName, value) |
paulb@8 | 206 | else: |
paulb@8 | 207 | # NOTE: Needs verifying: what should happen to the namespace? |
paulb@8 | 208 | self._node.setNsProp(None, localName, value) |
paulb@3 | 209 | |
paulb@3 | 210 | def setAttribute(self, name, value): |
paulb@3 | 211 | self._node.setProp(name, value) |
paulb@3 | 212 | |
paulb@3 | 213 | def setAttributeNodeNS(self, ns, name, node): |
paulb@3 | 214 | # NOTE: Not actually putting the node on the element. |
paulb@3 | 215 | self.setAttributeNS(ns, name, node.nodeValue) |
paulb@3 | 216 | |
paulb@3 | 217 | def setAttributeNode(self, name, node): |
paulb@3 | 218 | # NOTE: Not actually putting the node on the element. |
paulb@3 | 219 | self.setAttribute(name, node.nodeValue) |
paulb@3 | 220 | |
paulb@3 | 221 | def createElementNS(self, ns, name): |
paulb@3 | 222 | prefix, localName = _get_prefix_and_localName(name) |
paulb@8 | 223 | _node = libxml2.newNode(localName) |
paulb@8 | 224 | _ns = _node.newNs(ns, prefix) |
paulb@8 | 225 | _node.setNs(_ns) |
paulb@8 | 226 | return Node(_node) |
paulb@3 | 227 | |
paulb@3 | 228 | def createElement(self, name): |
paulb@8 | 229 | _node = libxml2.newNode(localName) |
paulb@8 | 230 | return Node(_node) |
paulb@3 | 231 | |
paulb@3 | 232 | def createAttributeNS(self, ns, name): |
paulb@3 | 233 | prefix, localName = _get_prefix_and_localName(name) |
paulb@3 | 234 | return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) |
paulb@3 | 235 | |
paulb@3 | 236 | def createAttribute(self, name): |
paulb@3 | 237 | return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) |
paulb@3 | 238 | |
paulb@4 | 239 | def createTextNode(self, value): |
paulb@4 | 240 | return TemporaryText(self._node.doc.newDocText(value)) |
paulb@4 | 241 | |
paulb@3 | 242 | def _add_node(self, tmp): |
paulb@8 | 243 | if tmp.nodeType == xml.dom.Node.ATTRIBUTE_NODE: |
paulb@8 | 244 | if tmp.ns is not None: |
paulb@3 | 245 | _child = self._node.newNsProp(None, tmp.localName, None) |
paulb@3 | 246 | _ns = _child.newNs(tmp.ns, tmp.prefix) |
paulb@3 | 247 | _child.setNs(_ns) |
paulb@8 | 248 | else: |
paulb@8 | 249 | _child = self._node.newProp(None, tmp.name, None) |
paulb@1 | 250 | else: |
paulb@8 | 251 | _child = None |
paulb@3 | 252 | |
paulb@3 | 253 | return _child |
paulb@1 | 254 | |
paulb@8 | 255 | def importNode(self, node, deep): |
paulb@8 | 256 | |
paulb@8 | 257 | if node.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@8 | 258 | imported_element = self.ownerDocument.createElementNS(node.namespaceURI, node.tagName) |
paulb@8 | 259 | for value in node.attributes.values(): |
paulb@8 | 260 | imported_element.setAttributeNS(value.namespaceURI, value.nodeName, value.nodeValue) |
paulb@8 | 261 | |
paulb@8 | 262 | if deep: |
paulb@8 | 263 | for child in node.childNodes: |
paulb@8 | 264 | imported_child = self.importNode(child, deep) |
paulb@8 | 265 | if imported_child: |
paulb@8 | 266 | imported_element.appendChild(imported_child) |
paulb@8 | 267 | |
paulb@8 | 268 | return imported_element |
paulb@8 | 269 | |
paulb@8 | 270 | elif node.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@8 | 271 | return self.ownerDocument.createTextNode(node.nodeValue) |
paulb@8 | 272 | |
paulb@8 | 273 | elif node.nodeType == xml.dom.Node.ATTRIBUTE_NODE: |
paulb@8 | 274 | return self.ownerDocument.createAttributeNS(node.namespaceURI, node.name) |
paulb@8 | 275 | |
paulb@8 | 276 | raise ValueError, node.nodeType |
paulb@8 | 277 | |
paulb@3 | 278 | def insertBefore(self, tmp, oldNode): |
paulb@4 | 279 | if tmp.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@4 | 280 | _child = tmp._text |
paulb@8 | 281 | elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@8 | 282 | _child = tmp._node |
paulb@4 | 283 | else: |
paulb@4 | 284 | _child = self._add_node(tmp) |
paulb@4 | 285 | _child.unlinkNode() |
paulb@3 | 286 | return Node(oldNode._node.addPrevSibling(_child)) |
paulb@3 | 287 | |
paulb@3 | 288 | def replaceChild(self, tmp, oldNode): |
paulb@4 | 289 | if tmp.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@4 | 290 | _child = tmp._text |
paulb@8 | 291 | elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@8 | 292 | _child = tmp._node |
paulb@4 | 293 | else: |
paulb@4 | 294 | _child = self._add_node(tmp) |
paulb@4 | 295 | _child.unlinkNode() |
paulb@3 | 296 | return Node(oldNode._node.replaceNode(_child)) |
paulb@3 | 297 | |
paulb@3 | 298 | def appendChild(self, tmp): |
paulb@4 | 299 | if tmp.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@4 | 300 | _child = self._node.addChild(tmp._text) |
paulb@8 | 301 | elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@8 | 302 | _child = self._node.addChild(tmp._node) |
paulb@4 | 303 | else: |
paulb@4 | 304 | _child = self._add_node(tmp) |
paulb@4 | 305 | return Node(_child) |
paulb@3 | 306 | |
paulb@7 | 307 | #doctype defined in __init__ |
paulb@4 | 308 | #ownerElement defined in __init__ |
paulb@1 | 309 | ownerDocument = property(_ownerDocument) |
paulb@1 | 310 | childNodes = property(_childNodes) |
paulb@3 | 311 | value = data = nodeValue = property(_nodeValue) |
paulb@3 | 312 | name = nodeName = property(_nodeName) |
paulb@1 | 313 | tagName = property(_tagName) |
paulb@1 | 314 | namespaceURI = property(_namespaceURI) |
paulb@3 | 315 | prefix = property(_prefix) |
paulb@3 | 316 | localName = property(_localName) |
paulb@3 | 317 | parentNode = property(_parentNode) |
paulb@3 | 318 | nodeType = property(_nodeType) |
paulb@3 | 319 | attributes = property(_attributes) |
paulb@3 | 320 | |
paulb@3 | 321 | def isSameNode(self, other): |
paulb@3 | 322 | return self._node.nodePath() == other._node.nodePath() |
paulb@3 | 323 | |
paulb@3 | 324 | def __eq__(self, other): |
paulb@3 | 325 | return self._node.nodePath() == other._node.nodePath() |
paulb@1 | 326 | |
paulb@8 | 327 | # Utility functions. |
paulb@8 | 328 | |
paulb@8 | 329 | def createDocumentType(localName, publicId, systemId): |
paulb@8 | 330 | return None |
paulb@8 | 331 | |
paulb@8 | 332 | def createDocument(namespaceURI, localName, doctype): |
paulb@8 | 333 | # NOTE: Fixed to use version 1.0 only. |
paulb@8 | 334 | d = Node(libxml2.newDoc("1.0"), doctype=doctype) |
paulb@8 | 335 | if localName is not None: |
paulb@8 | 336 | root = d.createElementNS(namespaceURI, localName) |
paulb@8 | 337 | d.appendChild(root) |
paulb@8 | 338 | return d |
paulb@8 | 339 | |
paulb@8 | 340 | def parse(stream_or_string): |
paulb@8 | 341 | if hasattr(stream_or_string, "read"): |
paulb@8 | 342 | stream = stream_or_string |
paulb@8 | 343 | else: |
paulb@8 | 344 | stream = open(stream_or_string) |
paulb@8 | 345 | return parseString(stream.read()) |
paulb@8 | 346 | |
paulb@8 | 347 | def parseString(s): |
paulb@8 | 348 | return Node(libxml2.parseDoc(s)) |
paulb@8 | 349 | |
paulb@8 | 350 | def parseURI(uri): |
paulb@8 | 351 | return Node(libxml2.parseURI(uri)) |
paulb@8 | 352 | |
paulb@8 | 353 | def toString(node): |
paulb@8 | 354 | return node._node.serialize() |
paulb@8 | 355 | |
paulb@8 | 356 | def toStream(node, stream=None): |
paulb@8 | 357 | stream = stream or sys.stdout |
paulb@8 | 358 | stream.write(toString(node)) |
paulb@8 | 359 | |
paulb@1 | 360 | # vim: tabstop=4 expandtab shiftwidth=4 |