1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2. 5 """ 6 7 __version__ = "0.1.1" 8 9 import xml.dom 10 import libxml2 11 import sys 12 13 # NOTE: libxml2 seems to use UTF-8 throughout. 14 15 def from_unicode(s): 16 if type(s) == type(u""): 17 return s.encode("utf-8") 18 else: 19 return s 20 21 def to_unicode(s): 22 if type(s) == type(""): 23 return unicode(s, encoding="utf-8") 24 else: 25 return s 26 27 # NOTE: Consider a generator instead. 28 29 class NamedNodeMap(object): 30 31 def __init__(self, node): 32 self.node = node 33 34 def getNamedItem(self, name): 35 return self.node.getAttributeNode(name) 36 37 def getNamedItemNS(self, ns, localName): 38 return self.node.getAttributeNodeNS(ns, localName) 39 40 def setNamedItem(self, node): 41 self.node.setAttributeNode(node.name, node) 42 43 def setNamedItemNS(self, node): 44 self.node.setAttributeNodeNS(node.namespaceURI, node.localName, node) 45 46 def __getitem__(self, name): 47 return self.getNamedItem(name) 48 49 def __setitem__(self, name, node): 50 if name == node.nodeName: 51 self.setNamedItem(node) 52 else: 53 raise KeyError, name 54 55 def __delitem__(self, name): 56 # NOTE: To be implemented. 57 pass 58 59 def values(self): 60 attributes = [] 61 _attribute = self.node.as_native_node().properties 62 while _attribute is not None: 63 attributes.append(Node(_attribute, ownerElement=self.node)) 64 _attribute = _attribute.next 65 return attributes 66 67 def keys(self): 68 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 69 70 def items(self): 71 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 72 73 def __repr__(self): 74 return str(self) 75 76 def __str__(self): 77 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 78 79 def _get_prefix_and_localName(name): 80 t = name.split(":") 81 if len(t) == 1: 82 return None, name 83 elif len(t) == 2: 84 return t 85 else: 86 # NOTE: Should raise an exception. 87 return None, None 88 89 class TemporaryNode(object): 90 def __init__(self, ns, name, nodeType): 91 self.ns = ns 92 self.name = name 93 self.nodeType = nodeType 94 self.prefix, self.localName = _get_prefix_and_localName(self.name) 95 96 class Node(object): 97 98 _nodeTypes = { 99 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 100 "comment" : xml.dom.Node.COMMENT_NODE, 101 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 102 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 103 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 104 "element" : xml.dom.Node.ELEMENT_NODE, 105 "entity" : xml.dom.Node.ENTITY_NODE, 106 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 107 "notation" : xml.dom.Node.NOTATION_NODE, 108 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 109 "text" : xml.dom.Node.TEXT_NODE 110 } 111 112 def __init__(self, node, ownerElement=None, doctype=None): 113 self._node = node 114 self.ownerElement = ownerElement 115 self.doctype = doctype 116 117 def as_native_node(self): 118 return self._node 119 120 def _ownerDocument(self): 121 return Node(self._node.doc) 122 123 def _nodeType(self): 124 return self._nodeTypes[self._node.type] 125 126 def _childNodes(self): 127 128 # NOTE: Consider a generator instead. 129 130 child_nodes = [] 131 _node = self._node.children 132 while _node is not None: 133 child_nodes.append(Node(_node)) 134 _node = _node.next 135 return child_nodes 136 137 def _attributes(self): 138 return NamedNodeMap(self) 139 140 def _getNs(self): 141 142 "Internal namespace information retrieval." 143 144 try: 145 return self._node.ns() 146 except libxml2.treeError: 147 return None 148 149 def _namespaceURI(self): 150 ns = self._getNs() 151 if ns is not None: 152 return to_unicode(ns.content) 153 else: 154 return None 155 156 def _nodeValue(self): 157 return to_unicode(self._node.content) 158 159 def _prefix(self): 160 ns = self._getNs() 161 if ns is not None: 162 return to_unicode(ns.name) 163 else: 164 return None 165 166 def _nodeName(self): 167 prefix = self._prefix() 168 if prefix is not None: 169 return prefix + ":" + self._localName() 170 else: 171 return self._localName() 172 173 def _tagName(self): 174 if self._node.type == "element": 175 return self._nodeName() 176 else: 177 return None 178 179 def _localName(self): 180 return to_unicode(self._node.name) 181 182 def _parentNode(self): 183 if self.nodeType == xml.dom.Node.DOCUMENT_NODE: 184 return None 185 else: 186 return Node(self._node.parent) 187 188 def _previousSibling(self): 189 if self._node.prev is not None: 190 return Node(self._node.prev) 191 else: 192 return None 193 194 def _nextSibling(self): 195 if self._node.next is not None: 196 return Node(self._node.next) 197 else: 198 return None 199 200 def hasAttributeNS(self, ns, localName): 201 return self.getAttributeNS(ns, localName) is not None 202 203 def hasAttribute(self, name): 204 return self.getAttribute(name) is not None 205 206 def getAttributeNS(self, ns, localName): 207 return to_unicode(self._node.nsProp(localName, ns)) 208 209 def getAttribute(self, name): 210 return to_unicode(self._node.prop(name)) 211 212 def getAttributeNodeNS(self, ns, localName): 213 return self.attributes[(ns, localName)] 214 215 def getAttributeNode(self, localName): 216 # NOTE: Needs verifying. 217 return self.attributes[(None, localName)] 218 219 def setAttributeNS(self, ns, name, value): 220 # NOTE: Need to convert from Unicode. 221 ns, name, value = map(from_unicode, [ns, name, value]) 222 223 prefix, localName = _get_prefix_and_localName(name) 224 if prefix is not None: 225 self._node.setNsProp(self._node.newNs(ns, prefix), localName, value) 226 elif ns == self._node.ns().content: 227 self._node.setNsProp(self._node.ns(), localName, value) 228 else: 229 # NOTE: Needs verifying: what should happen to the namespace? 230 self._node.setNsProp(None, localName, value) 231 232 def setAttribute(self, name, value): 233 # NOTE: Need to convert from Unicode. 234 name, value = map(from_unicode, [name, value]) 235 236 self._node.setProp(name, value) 237 238 def setAttributeNodeNS(self, ns, name, node): 239 # NOTE: Not actually putting the node on the element. 240 self.setAttributeNS(ns, name, node.nodeValue) 241 242 def setAttributeNode(self, name, node): 243 # NOTE: Not actually putting the node on the element. 244 self.setAttribute(name, node.nodeValue) 245 246 def createElementNS(self, ns, name): 247 # NOTE: Need to convert from Unicode. 248 ns, name = map(from_unicode, [ns, name]) 249 250 prefix, localName = _get_prefix_and_localName(name) 251 _node = libxml2.newNode(localName) 252 _ns = _node.newNs(ns, prefix) 253 _node.setNs(_ns) 254 return Node(_node) 255 256 def createElement(self, name): 257 # NOTE: Need to convert from Unicode. 258 name = from_unicode(name) 259 260 _node = libxml2.newNode(localName) 261 return Node(_node) 262 263 def createAttributeNS(self, ns, name): 264 # NOTE: Need to convert from Unicode. 265 ns, name = map(from_unicode, [ns, name]) 266 267 prefix, localName = _get_prefix_and_localName(name) 268 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 269 270 def createAttribute(self, name): 271 # NOTE: Need to convert from Unicode. 272 name = from_unicode(name) 273 274 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 275 276 def createTextNode(self, value): 277 # NOTE: Need to convert from Unicode. 278 value = from_unicode(value) 279 280 return Node(libxml2.newText(value)) 281 282 def _add_node(self, tmp): 283 if tmp.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 284 if tmp.ns is not None: 285 _child = self._node.newNsProp(None, tmp.localName, None) 286 _ns = _child.newNs(tmp.ns, tmp.prefix) 287 _child.setNs(_ns) 288 else: 289 _child = self._node.newProp(None, tmp.name, None) 290 else: 291 _child = None 292 293 return _child 294 295 def importNode(self, node, deep): 296 297 if node.nodeType == xml.dom.Node.ELEMENT_NODE: 298 imported_element = self.ownerDocument.createElementNS(node.namespaceURI, node.tagName) 299 for value in node.attributes.values(): 300 imported_element.setAttributeNS(value.namespaceURI, value.nodeName, value.nodeValue) 301 302 if deep: 303 for child in node.childNodes: 304 imported_child = self.importNode(child, deep) 305 if imported_child: 306 imported_element.appendChild(imported_child) 307 308 return imported_element 309 310 elif node.nodeType == xml.dom.Node.TEXT_NODE: 311 return self.ownerDocument.createTextNode(node.nodeValue) 312 313 elif node.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 314 return self.ownerDocument.createAttributeNS(node.namespaceURI, node.name) 315 316 raise ValueError, node.nodeType 317 318 def insertBefore(self, tmp, oldNode): 319 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 320 _child = tmp._node 321 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 322 _child = tmp._node 323 else: 324 _child = self._add_node(tmp) 325 _child.unlinkNode() 326 return Node(oldNode._node.addPrevSibling(_child)) 327 328 def replaceChild(self, tmp, oldNode): 329 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 330 _child = tmp._node 331 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 332 _child = tmp._node 333 else: 334 _child = self._add_node(tmp) 335 _child.unlinkNode() 336 return Node(oldNode._node.replaceNode(_child)) 337 338 def appendChild(self, tmp): 339 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 340 _child = self._node.addChild(tmp._node) 341 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 342 _child = self._node.addChild(tmp._node) 343 else: 344 _child = self._add_node(tmp) 345 return Node(_child) 346 347 def removeChild(self, tmp): 348 tmp._node.unlinkNode() 349 350 #doctype defined in __init__ 351 #ownerElement defined in __init__ 352 ownerDocument = property(_ownerDocument) 353 childNodes = property(_childNodes) 354 value = data = nodeValue = property(_nodeValue) 355 name = nodeName = property(_nodeName) 356 tagName = property(_tagName) 357 namespaceURI = property(_namespaceURI) 358 prefix = property(_prefix) 359 localName = property(_localName) 360 parentNode = property(_parentNode) 361 nodeType = property(_nodeType) 362 attributes = property(_attributes) 363 previousSibling = property(_previousSibling) 364 nextSibling = property(_nextSibling) 365 366 def isSameNode(self, other): 367 return self._node.nodePath() == other._node.nodePath() 368 369 def __eq__(self, other): 370 return self._node.nodePath() == other._node.nodePath() 371 372 # Utility functions. 373 374 def createDocumentType(localName, publicId, systemId): 375 return None 376 377 def createDocument(namespaceURI, localName, doctype): 378 # NOTE: Fixed to use version 1.0 only. 379 d = Node(libxml2.newDoc("1.0"), doctype=doctype) 380 if localName is not None: 381 root = d.createElementNS(namespaceURI, localName) 382 d.appendChild(root) 383 return d 384 385 def parse(stream_or_string): 386 if hasattr(stream_or_string, "read"): 387 stream = stream_or_string 388 else: 389 stream = open(stream_or_string) 390 return parseString(stream.read()) 391 392 def parseString(s): 393 return Node(libxml2.parseDoc(s)) 394 395 def parseURI(uri): 396 return Node(libxml2.parseURI(uri)) 397 398 def toString(node, encoding=None): 399 if encoding is None: 400 return node.as_native_node().serialize() 401 else: 402 return node.as_native_node().serialize(encoding) 403 404 def toStream(node, stream=None, encoding=None): 405 stream = stream or sys.stdout 406 stream.write(toString(node, encoding)) 407 408 # vim: tabstop=4 expandtab shiftwidth=4