1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2. 5 """ 6 7 import xml.dom 8 import libxml2 9 import sys 10 11 # NOTE: libxml2 seems to use UTF-8 throughout. 12 13 def from_unicode(s): 14 if type(s) == type(u""): 15 return s.encode("utf-8") 16 else: 17 return s 18 19 def to_unicode(s): 20 if type(s) == type(""): 21 return unicode(s, encoding="utf-8") 22 else: 23 return s 24 25 # NOTE: Consider a generator instead. 26 27 class NamedNodeMap(object): 28 29 def __init__(self, node): 30 self.node = node 31 32 def getNamedItem(self, name): 33 return self.node.getAttributeNode(name) 34 35 def getNamedItemNS(self, ns, localName): 36 return self.node.getAttributeNodeNS(ns, localName) 37 38 def setNamedItem(self, node): 39 self.node.setAttributeNode(node.name, node) 40 41 def setNamedItemNS(self, node): 42 self.node.setAttributeNodeNS(node.namespaceURI, node.localName, node) 43 44 def __getitem__(self, name): 45 return self.getNamedItem(name) 46 47 def __setitem__(self, name, node): 48 if name == node.nodeName: 49 self.setNamedItem(node) 50 else: 51 raise KeyError, name 52 53 def __delitem__(self, name): 54 # NOTE: To be implemented. 55 pass 56 57 def values(self): 58 attributes = [] 59 _attribute = self.node.as_native_node().properties 60 while _attribute is not None: 61 attributes.append(Node(_attribute, ownerElement=self.node)) 62 _attribute = _attribute.next 63 return attributes 64 65 def keys(self): 66 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 67 68 def items(self): 69 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 70 71 def __repr__(self): 72 return str(self) 73 74 def __str__(self): 75 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 76 77 def _get_prefix_and_localName(name): 78 t = name.split(":") 79 if len(t) == 1: 80 return None, name 81 elif len(t) == 2: 82 return t 83 else: 84 # NOTE: Should raise an exception. 85 return None, None 86 87 class TemporaryNode(object): 88 def __init__(self, ns, name, nodeType): 89 self.ns = ns 90 self.name = name 91 self.nodeType = nodeType 92 self.prefix, self.localName = _get_prefix_and_localName(self.name) 93 94 class Node(object): 95 96 _nodeTypes = { 97 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 98 "comment" : xml.dom.Node.COMMENT_NODE, 99 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 100 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 101 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 102 "element" : xml.dom.Node.ELEMENT_NODE, 103 "entity" : xml.dom.Node.ENTITY_NODE, 104 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 105 "notation" : xml.dom.Node.NOTATION_NODE, 106 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 107 "text" : xml.dom.Node.TEXT_NODE 108 } 109 110 def __init__(self, node, ownerElement=None, doctype=None): 111 self._node = node 112 self.ownerElement = ownerElement 113 self.doctype = doctype 114 115 def as_native_node(self): 116 return self._node 117 118 def _ownerDocument(self): 119 return Node(self._node.doc) 120 121 def _nodeType(self): 122 return self._nodeTypes[self._node.type] 123 124 def _childNodes(self): 125 126 # NOTE: Consider a generator instead. 127 128 child_nodes = [] 129 _node = self._node.children 130 while _node is not None: 131 child_nodes.append(Node(_node)) 132 _node = _node.next 133 return child_nodes 134 135 def _attributes(self): 136 return NamedNodeMap(self) 137 138 def _getNs(self): 139 140 "Internal namespace information retrieval." 141 142 try: 143 return self._node.ns() 144 except libxml2.treeError: 145 return None 146 147 def _namespaceURI(self): 148 ns = self._getNs() 149 if ns is not None: 150 return to_unicode(ns.content) 151 else: 152 return None 153 154 def _nodeValue(self): 155 return to_unicode(self._node.content) 156 157 def _prefix(self): 158 ns = self._getNs() 159 if ns is not None: 160 return to_unicode(ns.name) 161 else: 162 return None 163 164 def _nodeName(self): 165 prefix = self._prefix() 166 if prefix is not None: 167 return prefix + ":" + self._localName() 168 else: 169 return self._localName() 170 171 def _tagName(self): 172 if self._node.type == "element": 173 return self._nodeName() 174 else: 175 return None 176 177 def _localName(self): 178 return to_unicode(self._node.name) 179 180 def _parentNode(self): 181 if self.nodeType == xml.dom.Node.DOCUMENT_NODE: 182 return None 183 else: 184 return Node(self._node.parent) 185 186 def _previousSibling(self): 187 if self._node.prev is not None: 188 return Node(self._node.prev) 189 else: 190 return None 191 192 def _nextSibling(self): 193 if self._node.next is not None: 194 return Node(self._node.next) 195 else: 196 return None 197 198 def hasAttributeNS(self, ns, localName): 199 return self.getAttributeNS(ns, localName) is not None 200 201 def hasAttribute(self, name): 202 return self.getAttribute(name) is not None 203 204 def getAttributeNS(self, ns, localName): 205 return to_unicode(self._node.nsProp(localName, ns)) 206 207 def getAttribute(self, name): 208 return to_unicode(self._node.prop(name)) 209 210 def getAttributeNodeNS(self, ns, localName): 211 return self.attributes[(ns, localName)] 212 213 def getAttributeNode(self, localName): 214 # NOTE: Needs verifying. 215 return self.attributes[(None, localName)] 216 217 def setAttributeNS(self, ns, name, value): 218 # NOTE: Need to convert from Unicode. 219 ns, name, value = map(from_unicode, [ns, name, value]) 220 221 prefix, localName = _get_prefix_and_localName(name) 222 if prefix is not None: 223 self._node.setNsProp(self._node.newNs(ns, prefix), localName, value) 224 elif ns == self._node.ns().content: 225 self._node.setNsProp(self._node.ns(), localName, value) 226 else: 227 # NOTE: Needs verifying: what should happen to the namespace? 228 self._node.setNsProp(None, localName, value) 229 230 def setAttribute(self, name, value): 231 # NOTE: Need to convert from Unicode. 232 name, value = map(from_unicode, [name, value]) 233 234 self._node.setProp(name, value) 235 236 def setAttributeNodeNS(self, ns, name, node): 237 # NOTE: Not actually putting the node on the element. 238 self.setAttributeNS(ns, name, node.nodeValue) 239 240 def setAttributeNode(self, name, node): 241 # NOTE: Not actually putting the node on the element. 242 self.setAttribute(name, node.nodeValue) 243 244 def createElementNS(self, ns, name): 245 # NOTE: Need to convert from Unicode. 246 ns, name = map(from_unicode, [ns, name]) 247 248 prefix, localName = _get_prefix_and_localName(name) 249 _node = libxml2.newNode(localName) 250 _ns = _node.newNs(ns, prefix) 251 _node.setNs(_ns) 252 return Node(_node) 253 254 def createElement(self, name): 255 # NOTE: Need to convert from Unicode. 256 name = from_unicode(name) 257 258 _node = libxml2.newNode(localName) 259 return Node(_node) 260 261 def createAttributeNS(self, ns, name): 262 # NOTE: Need to convert from Unicode. 263 ns, name = map(from_unicode, [ns, name]) 264 265 prefix, localName = _get_prefix_and_localName(name) 266 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 267 268 def createAttribute(self, name): 269 # NOTE: Need to convert from Unicode. 270 name = from_unicode(name) 271 272 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 273 274 def createTextNode(self, value): 275 # NOTE: Need to convert from Unicode. 276 #value = from_unicode(value) 277 278 return Node(libxml2.newText(value)) 279 280 def _add_node(self, tmp): 281 if tmp.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 282 if tmp.ns is not None: 283 _child = self._node.newNsProp(None, tmp.localName, None) 284 _ns = _child.newNs(tmp.ns, tmp.prefix) 285 _child.setNs(_ns) 286 else: 287 _child = self._node.newProp(None, tmp.name, None) 288 else: 289 _child = None 290 291 return _child 292 293 def importNode(self, node, deep): 294 295 if node.nodeType == xml.dom.Node.ELEMENT_NODE: 296 imported_element = self.ownerDocument.createElementNS(node.namespaceURI, node.tagName) 297 for value in node.attributes.values(): 298 imported_element.setAttributeNS(value.namespaceURI, value.nodeName, value.nodeValue) 299 300 if deep: 301 for child in node.childNodes: 302 imported_child = self.importNode(child, deep) 303 if imported_child: 304 imported_element.appendChild(imported_child) 305 306 return imported_element 307 308 elif node.nodeType == xml.dom.Node.TEXT_NODE: 309 return self.ownerDocument.createTextNode(node.nodeValue) 310 311 elif node.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 312 return self.ownerDocument.createAttributeNS(node.namespaceURI, node.name) 313 314 raise ValueError, node.nodeType 315 316 def insertBefore(self, tmp, oldNode): 317 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 318 _child = tmp._node 319 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 320 _child = tmp._node 321 else: 322 _child = self._add_node(tmp) 323 _child.unlinkNode() 324 return Node(oldNode._node.addPrevSibling(_child)) 325 326 def replaceChild(self, tmp, oldNode): 327 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 328 _child = tmp._node 329 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 330 _child = tmp._node 331 else: 332 _child = self._add_node(tmp) 333 _child.unlinkNode() 334 return Node(oldNode._node.replaceNode(_child)) 335 336 def appendChild(self, tmp): 337 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 338 _child = self._node.addChild(tmp._node) 339 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 340 _child = self._node.addChild(tmp._node) 341 else: 342 _child = self._add_node(tmp) 343 return Node(_child) 344 345 def removeChild(self, tmp): 346 tmp._node.unlinkNode() 347 348 #doctype defined in __init__ 349 #ownerElement defined in __init__ 350 ownerDocument = property(_ownerDocument) 351 childNodes = property(_childNodes) 352 value = data = nodeValue = property(_nodeValue) 353 name = nodeName = property(_nodeName) 354 tagName = property(_tagName) 355 namespaceURI = property(_namespaceURI) 356 prefix = property(_prefix) 357 localName = property(_localName) 358 parentNode = property(_parentNode) 359 nodeType = property(_nodeType) 360 attributes = property(_attributes) 361 previousSibling = property(_previousSibling) 362 nextSibling = property(_nextSibling) 363 364 def isSameNode(self, other): 365 return self._node.nodePath() == other._node.nodePath() 366 367 def __eq__(self, other): 368 return self._node.nodePath() == other._node.nodePath() 369 370 # Utility functions. 371 372 def createDocumentType(localName, publicId, systemId): 373 return None 374 375 def createDocument(namespaceURI, localName, doctype): 376 # NOTE: Fixed to use version 1.0 only. 377 d = Node(libxml2.newDoc("1.0"), doctype=doctype) 378 if localName is not None: 379 root = d.createElementNS(namespaceURI, localName) 380 d.appendChild(root) 381 return d 382 383 def parse(stream_or_string): 384 if hasattr(stream_or_string, "read"): 385 stream = stream_or_string 386 else: 387 stream = open(stream_or_string) 388 return parseString(stream.read()) 389 390 def parseString(s): 391 return Node(libxml2.parseDoc(s)) 392 393 def parseURI(uri): 394 return Node(libxml2.parseURI(uri)) 395 396 def toString(node): 397 return node.as_native_node().serialize() 398 399 def toStream(node, stream=None): 400 stream = stream or sys.stdout 401 stream.write(toString(node)) 402 403 # vim: tabstop=4 expandtab shiftwidth=4