1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2. 5 """ 6 7 __version__ = "0.1.2" 8 9 import xml.dom 10 import libxml2 11 import sys 12 13 # NOTE: libxml2 seems to use UTF-8 throughout. 14 15 def from_unicode(s): 16 if type(s) == type(u""): 17 return s.encode("utf-8") 18 else: 19 return s 20 21 def to_unicode(s): 22 if type(s) == type(""): 23 return unicode(s, encoding="utf-8") 24 else: 25 return s 26 27 # NOTE: Consider a generator instead. 28 29 class NamedNodeMap(object): 30 31 def __init__(self, node): 32 self.node = node 33 34 def getNamedItem(self, name): 35 for attr in self.values(): 36 if name == attr.localName: 37 return attr 38 # NOTE: Check exception possibilities here. 39 return None 40 41 def getNamedItemNS(self, ns, localName): 42 for attr in self.values(): 43 if localName == attr.localName and ns == attr.namespaceURI: 44 return attr 45 # NOTE: Check exception possibilities here. 46 return None 47 48 def setNamedItem(self, node): 49 self.node.setAttributeNode(node.name, node) 50 51 def setNamedItemNS(self, node): 52 self.node.setAttributeNodeNS(node.namespaceURI, node.localName, node) 53 54 def __getitem__(self, name): 55 return self.getNamedItem(name) 56 57 def __setitem__(self, name, node): 58 if name == node.nodeName: 59 self.setNamedItem(node) 60 else: 61 raise KeyError, name 62 63 def __delitem__(self, name): 64 # NOTE: To be implemented. 65 pass 66 67 def values(self): 68 attributes = [] 69 _attribute = self.node.as_native_node().properties 70 while _attribute is not None: 71 attributes.append(Node(_attribute, ownerElement=self.node)) 72 _attribute = _attribute.next 73 return attributes 74 75 def keys(self): 76 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 77 78 def items(self): 79 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 80 81 def __repr__(self): 82 return str(self) 83 84 def __str__(self): 85 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 86 87 def _get_prefix_and_localName(name): 88 t = name.split(":") 89 if len(t) == 1: 90 return None, name 91 elif len(t) == 2: 92 return t 93 else: 94 # NOTE: Should raise an exception. 95 return None, None 96 97 class TemporaryNode(object): 98 def __init__(self, ns, name, nodeType): 99 self.ns = ns 100 self.name = name 101 self.nodeType = nodeType 102 self.prefix, self.localName = _get_prefix_and_localName(self.name) 103 104 class Node(object): 105 106 _nodeTypes = { 107 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 108 "comment" : xml.dom.Node.COMMENT_NODE, 109 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 110 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 111 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 112 "element" : xml.dom.Node.ELEMENT_NODE, 113 "entity" : xml.dom.Node.ENTITY_NODE, 114 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 115 "notation" : xml.dom.Node.NOTATION_NODE, 116 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 117 "text" : xml.dom.Node.TEXT_NODE 118 } 119 120 def __init__(self, node, ownerElement=None, doctype=None): 121 self._node = node 122 self.ownerElement = ownerElement 123 self.doctype = doctype 124 125 def as_native_node(self): 126 return self._node 127 128 def _ownerDocument(self): 129 return Node(self._node.doc) 130 131 def _nodeType(self): 132 return self._nodeTypes[self._node.type] 133 134 def _childNodes(self): 135 136 # NOTE: Consider a generator instead. 137 138 child_nodes = [] 139 _node = self._node.children 140 while _node is not None: 141 child_nodes.append(Node(_node)) 142 _node = _node.next 143 return child_nodes 144 145 def _attributes(self): 146 return NamedNodeMap(self) 147 148 def _getNs(self): 149 150 "Internal namespace information retrieval." 151 152 try: 153 return self._node.ns() 154 except libxml2.treeError: 155 return None 156 157 def _namespaceURI(self): 158 ns = self._getNs() 159 if ns is not None: 160 return to_unicode(ns.content) 161 else: 162 return None 163 164 def _nodeValue(self): 165 return to_unicode(self._node.content) 166 167 def _prefix(self): 168 ns = self._getNs() 169 if ns is not None: 170 return to_unicode(ns.name) 171 else: 172 return None 173 174 def _nodeName(self): 175 prefix = self._prefix() 176 if prefix is not None: 177 return prefix + ":" + self._localName() 178 else: 179 return self._localName() 180 181 def _tagName(self): 182 if self._node.type == "element": 183 return self._nodeName() 184 else: 185 return None 186 187 def _localName(self): 188 return to_unicode(self._node.name) 189 190 def _parentNode(self): 191 if self.nodeType == xml.dom.Node.DOCUMENT_NODE: 192 return None 193 else: 194 return Node(self._node.parent) 195 196 def _previousSibling(self): 197 if self._node.prev is not None: 198 return Node(self._node.prev) 199 else: 200 return None 201 202 def _nextSibling(self): 203 if self._node.next is not None: 204 return Node(self._node.next) 205 else: 206 return None 207 208 def hasAttributeNS(self, ns, localName): 209 return self.getAttributeNS(ns, localName) is not None 210 211 def hasAttribute(self, name): 212 return self.getAttribute(name) is not None 213 214 def getAttributeNS(self, ns, localName): 215 return to_unicode(self._node.nsProp(localName, ns)) 216 217 def getAttribute(self, name): 218 return to_unicode(self._node.prop(name)) 219 220 def getAttributeNodeNS(self, ns, localName): 221 return self.attributes.getNamedItemNS(ns, localName) 222 223 def getAttributeNode(self, localName): 224 # NOTE: Needs verifying. 225 return self.attributes.getNamedItem(localName) 226 227 def setAttributeNS(self, ns, name, value): 228 # NOTE: Need to convert from Unicode. 229 ns, name, value = map(from_unicode, [ns, name, value]) 230 231 prefix, localName = _get_prefix_and_localName(name) 232 if prefix is not None: 233 self._node.setNsProp(self._node.newNs(ns, prefix), localName, value) 234 elif ns is not None and ns == self._node.ns().content: 235 self._node.setNsProp(self._node.ns(), localName, value) 236 else: 237 # NOTE: Needs verifying: what should happen to the namespace? 238 # NOTE: This also catches the case where None is the element's 239 # NOTE: namespace and is also used for the attribute. 240 self._node.setNsProp(None, localName, value) 241 242 def setAttribute(self, name, value): 243 # NOTE: Need to convert from Unicode. 244 name, value = map(from_unicode, [name, value]) 245 246 self._node.setProp(name, value) 247 248 def setAttributeNodeNS(self, ns, name, node): 249 # NOTE: Not actually putting the node on the element. 250 self.setAttributeNS(ns, name, node.nodeValue) 251 252 def setAttributeNode(self, name, node): 253 # NOTE: Not actually putting the node on the element. 254 self.setAttribute(name, node.nodeValue) 255 256 def createElementNS(self, ns, name): 257 # NOTE: Need to convert from Unicode. 258 ns, name = map(from_unicode, [ns, name]) 259 260 prefix, localName = _get_prefix_and_localName(name) 261 _node = libxml2.newNode(localName) 262 # NOTE: Does it make sense to set the namespace if it is empty? 263 if ns is not None: 264 _ns = _node.newNs(ns, prefix) 265 _node.setNs(_ns) 266 return Node(_node) 267 268 def createElement(self, name): 269 # NOTE: Need to convert from Unicode. 270 name = from_unicode(name) 271 272 _node = libxml2.newNode(localName) 273 return Node(_node) 274 275 def createAttributeNS(self, ns, name): 276 # NOTE: Need to convert from Unicode. 277 ns, name = map(from_unicode, [ns, name]) 278 279 prefix, localName = _get_prefix_and_localName(name) 280 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 281 282 def createAttribute(self, name): 283 # NOTE: Need to convert from Unicode. 284 name = from_unicode(name) 285 286 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 287 288 def createTextNode(self, value): 289 # NOTE: Need to convert from Unicode. 290 value = from_unicode(value) 291 292 return Node(libxml2.newText(value)) 293 294 def createComment(self, value): 295 # NOTE: Need to convert from Unicode. 296 value = from_unicode(value) 297 298 return Node(libxml2.newComment(value)) 299 300 def _add_node(self, tmp): 301 if tmp.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 302 if tmp.ns is not None: 303 _child = self._node.newNsProp(None, tmp.localName, None) 304 _ns = _child.newNs(tmp.ns, tmp.prefix) 305 _child.setNs(_ns) 306 else: 307 _child = self._node.newProp(None, tmp.name, None) 308 else: 309 _child = None 310 311 return _child 312 313 def importNode(self, node, deep): 314 315 if node.nodeType == xml.dom.Node.ELEMENT_NODE: 316 imported_element = self.ownerDocument.createElementNS(node.namespaceURI, node.tagName) 317 for value in node.attributes.values(): 318 imported_element.setAttributeNS(value.namespaceURI, value.nodeName, value.nodeValue) 319 320 if deep: 321 for child in node.childNodes: 322 imported_child = self.importNode(child, deep) 323 if imported_child: 324 imported_element.appendChild(imported_child) 325 326 return imported_element 327 328 elif node.nodeType == xml.dom.Node.TEXT_NODE: 329 return self.ownerDocument.createTextNode(node.nodeValue) 330 331 elif node.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 332 return self.ownerDocument.createAttributeNS(node.namespaceURI, node.name) 333 334 elif node.nodeType == xml.dom.Node.COMMENT_NODE: 335 return self.ownerDocument.createComment(node.data) 336 337 raise ValueError, node.nodeType 338 339 def insertBefore(self, tmp, oldNode): 340 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 341 _child = tmp._node 342 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 343 _child = tmp._node 344 else: 345 _child = self._add_node(tmp) 346 _child.unlinkNode() 347 return Node(oldNode._node.addPrevSibling(_child)) 348 349 def replaceChild(self, tmp, oldNode): 350 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 351 _child = tmp._node 352 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 353 _child = tmp._node 354 else: 355 _child = self._add_node(tmp) 356 _child.unlinkNode() 357 return Node(oldNode._node.replaceNode(_child)) 358 359 def appendChild(self, tmp): 360 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 361 _child = self._node.addChild(tmp._node) 362 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 363 _child = self._node.addChild(tmp._node) 364 else: 365 _child = self._add_node(tmp) 366 return Node(_child) 367 368 def removeChild(self, tmp): 369 tmp._node.unlinkNode() 370 371 #doctype defined in __init__ 372 #ownerElement defined in __init__ 373 ownerDocument = property(_ownerDocument) 374 childNodes = property(_childNodes) 375 value = data = nodeValue = property(_nodeValue) 376 name = nodeName = property(_nodeName) 377 tagName = property(_tagName) 378 namespaceURI = property(_namespaceURI) 379 prefix = property(_prefix) 380 localName = property(_localName) 381 parentNode = property(_parentNode) 382 nodeType = property(_nodeType) 383 attributes = property(_attributes) 384 previousSibling = property(_previousSibling) 385 nextSibling = property(_nextSibling) 386 387 def isSameNode(self, other): 388 return self._node.nodePath() == other._node.nodePath() 389 390 def __eq__(self, other): 391 return self._node.nodePath() == other._node.nodePath() 392 393 # 4DOM extensions to the usual PyXML API. 394 # NOTE: To be finished. 395 396 def xpath(self, expr, variables=None, namespaces=None): 397 context = self.ownerDocument.as_native_node().xpathNewContext() 398 context.setContextNode(self.as_native_node()) 399 # NOTE: Discover namespaces from the node. 400 for prefix, ns in (namespaces or {}).items(): 401 context.xpathRegisterNs(prefix, ns) 402 # NOTE: May need to tidy up the context. 403 return [Node(_node) for _node in context.xpathEval(expr)] 404 405 # Utility functions. 406 407 def createDocumentType(localName, publicId, systemId): 408 return None 409 410 def createDocument(namespaceURI, localName, doctype): 411 # NOTE: Fixed to use version 1.0 only. 412 d = Node(libxml2.newDoc("1.0"), doctype=doctype) 413 if localName is not None: 414 root = d.createElementNS(namespaceURI, localName) 415 d.appendChild(root) 416 return d 417 418 def parse(stream_or_string): 419 if hasattr(stream_or_string, "read"): 420 stream = stream_or_string 421 return parseString(stream.read()) 422 else: 423 return parseFile(stream_or_string) 424 425 def parseFile(s): 426 # NOTE: Switching off validation and remote DTD resolution. 427 context = libxml2.createFileParserCtxt(s) 428 context.validate(0) 429 context.ctxtUseOptions(0) 430 context.parseDocument() 431 return Node(context.doc()) 432 433 def parseString(s): 434 # NOTE: Switching off validation and remote DTD resolution. 435 context = libxml2.createMemoryParserCtxt(s, len(s)) 436 context.validate(0) 437 context.ctxtUseOptions(0) 438 context.parseDocument() 439 return Node(context.doc()) 440 441 def parseURI(uri): 442 return Node(libxml2.parseURI(uri)) 443 444 def toString(node, encoding=None): 445 if encoding is None: 446 return node.as_native_node().serialize() 447 else: 448 return node.as_native_node().serialize(encoding) 449 450 def toStream(node, stream=None, encoding=None): 451 stream = stream or sys.stdout 452 stream.write(toString(node, encoding)) 453 454 # vim: tabstop=4 expandtab shiftwidth=4