1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2. 5 """ 6 7 __version__ = "0.1.1" 8 9 import xml.dom 10 import libxml2 11 import sys 12 13 # NOTE: libxml2 seems to use UTF-8 throughout. 14 15 def from_unicode(s): 16 if type(s) == type(u""): 17 return s.encode("utf-8") 18 else: 19 return s 20 21 def to_unicode(s): 22 if type(s) == type(""): 23 return unicode(s, encoding="utf-8") 24 else: 25 return s 26 27 # NOTE: Consider a generator instead. 28 29 class NamedNodeMap(object): 30 31 def __init__(self, node): 32 self.node = node 33 34 def getNamedItem(self, name): 35 for attr in self.values(): 36 if name == attr.localName: 37 return attr 38 # NOTE: Check exception possibilities here. 39 return None 40 41 def getNamedItemNS(self, ns, localName): 42 for attr in self.values(): 43 if localName == attr.localName and ns == attr.namespaceURI: 44 return attr 45 # NOTE: Check exception possibilities here. 46 return None 47 48 def setNamedItem(self, node): 49 self.node.setAttributeNode(node.name, node) 50 51 def setNamedItemNS(self, node): 52 self.node.setAttributeNodeNS(node.namespaceURI, node.localName, node) 53 54 def __getitem__(self, name): 55 return self.getNamedItem(name) 56 57 def __setitem__(self, name, node): 58 if name == node.nodeName: 59 self.setNamedItem(node) 60 else: 61 raise KeyError, name 62 63 def __delitem__(self, name): 64 # NOTE: To be implemented. 65 pass 66 67 def values(self): 68 attributes = [] 69 _attribute = self.node.as_native_node().properties 70 while _attribute is not None: 71 attributes.append(Node(_attribute, ownerElement=self.node)) 72 _attribute = _attribute.next 73 return attributes 74 75 def keys(self): 76 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 77 78 def items(self): 79 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 80 81 def __repr__(self): 82 return str(self) 83 84 def __str__(self): 85 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 86 87 def _get_prefix_and_localName(name): 88 t = name.split(":") 89 if len(t) == 1: 90 return None, name 91 elif len(t) == 2: 92 return t 93 else: 94 # NOTE: Should raise an exception. 95 return None, None 96 97 class TemporaryNode(object): 98 def __init__(self, ns, name, nodeType): 99 self.ns = ns 100 self.name = name 101 self.nodeType = nodeType 102 self.prefix, self.localName = _get_prefix_and_localName(self.name) 103 104 class Node(object): 105 106 _nodeTypes = { 107 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 108 "comment" : xml.dom.Node.COMMENT_NODE, 109 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 110 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 111 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 112 "element" : xml.dom.Node.ELEMENT_NODE, 113 "entity" : xml.dom.Node.ENTITY_NODE, 114 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 115 "notation" : xml.dom.Node.NOTATION_NODE, 116 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 117 "text" : xml.dom.Node.TEXT_NODE 118 } 119 120 def __init__(self, node, ownerElement=None, doctype=None): 121 self._node = node 122 self.ownerElement = ownerElement 123 self.doctype = doctype 124 125 def as_native_node(self): 126 return self._node 127 128 def _ownerDocument(self): 129 return Node(self._node.doc) 130 131 def _nodeType(self): 132 return self._nodeTypes[self._node.type] 133 134 def _childNodes(self): 135 136 # NOTE: Consider a generator instead. 137 138 child_nodes = [] 139 _node = self._node.children 140 while _node is not None: 141 child_nodes.append(Node(_node)) 142 _node = _node.next 143 return child_nodes 144 145 def _attributes(self): 146 return NamedNodeMap(self) 147 148 def _getNs(self): 149 150 "Internal namespace information retrieval." 151 152 try: 153 return self._node.ns() 154 except libxml2.treeError: 155 return None 156 157 def _namespaceURI(self): 158 ns = self._getNs() 159 if ns is not None: 160 return to_unicode(ns.content) 161 else: 162 return None 163 164 def _nodeValue(self): 165 return to_unicode(self._node.content) 166 167 def _prefix(self): 168 ns = self._getNs() 169 if ns is not None: 170 return to_unicode(ns.name) 171 else: 172 return None 173 174 def _nodeName(self): 175 prefix = self._prefix() 176 if prefix is not None: 177 return prefix + ":" + self._localName() 178 else: 179 return self._localName() 180 181 def _tagName(self): 182 if self._node.type == "element": 183 return self._nodeName() 184 else: 185 return None 186 187 def _localName(self): 188 return to_unicode(self._node.name) 189 190 def _parentNode(self): 191 if self.nodeType == xml.dom.Node.DOCUMENT_NODE: 192 return None 193 else: 194 return Node(self._node.parent) 195 196 def _previousSibling(self): 197 if self._node.prev is not None: 198 return Node(self._node.prev) 199 else: 200 return None 201 202 def _nextSibling(self): 203 if self._node.next is not None: 204 return Node(self._node.next) 205 else: 206 return None 207 208 def hasAttributeNS(self, ns, localName): 209 return self.getAttributeNS(ns, localName) is not None 210 211 def hasAttribute(self, name): 212 return self.getAttribute(name) is not None 213 214 def getAttributeNS(self, ns, localName): 215 return to_unicode(self._node.nsProp(localName, ns)) 216 217 def getAttribute(self, name): 218 return to_unicode(self._node.prop(name)) 219 220 def getAttributeNodeNS(self, ns, localName): 221 return self.attributes.getNamedItemNS(ns, localName) 222 223 def getAttributeNode(self, localName): 224 # NOTE: Needs verifying. 225 return self.attributes.getNamedItem(localName) 226 227 def setAttributeNS(self, ns, name, value): 228 # NOTE: Need to convert from Unicode. 229 ns, name, value = map(from_unicode, [ns, name, value]) 230 231 prefix, localName = _get_prefix_and_localName(name) 232 if prefix is not None: 233 self._node.setNsProp(self._node.newNs(ns, prefix), localName, value) 234 elif ns is not None and ns == self._node.ns().content: 235 self._node.setNsProp(self._node.ns(), localName, value) 236 else: 237 # NOTE: Needs verifying: what should happen to the namespace? 238 # NOTE: This also catches the case where None is the element's 239 # NOTE: namespace and is also used for the attribute. 240 self._node.setNsProp(None, localName, value) 241 242 def setAttribute(self, name, value): 243 # NOTE: Need to convert from Unicode. 244 name, value = map(from_unicode, [name, value]) 245 246 self._node.setProp(name, value) 247 248 def setAttributeNodeNS(self, ns, name, node): 249 # NOTE: Not actually putting the node on the element. 250 self.setAttributeNS(ns, name, node.nodeValue) 251 252 def setAttributeNode(self, name, node): 253 # NOTE: Not actually putting the node on the element. 254 self.setAttribute(name, node.nodeValue) 255 256 def createElementNS(self, ns, name): 257 # NOTE: Need to convert from Unicode. 258 ns, name = map(from_unicode, [ns, name]) 259 260 prefix, localName = _get_prefix_and_localName(name) 261 _node = libxml2.newNode(localName) 262 # NOTE: Does it make sense to set the namespace if it is empty? 263 if ns is not None: 264 _ns = _node.newNs(ns, prefix) 265 _node.setNs(_ns) 266 return Node(_node) 267 268 def createElement(self, name): 269 # NOTE: Need to convert from Unicode. 270 name = from_unicode(name) 271 272 _node = libxml2.newNode(localName) 273 return Node(_node) 274 275 def createAttributeNS(self, ns, name): 276 # NOTE: Need to convert from Unicode. 277 ns, name = map(from_unicode, [ns, name]) 278 279 prefix, localName = _get_prefix_and_localName(name) 280 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 281 282 def createAttribute(self, name): 283 # NOTE: Need to convert from Unicode. 284 name = from_unicode(name) 285 286 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 287 288 def createTextNode(self, value): 289 # NOTE: Need to convert from Unicode. 290 value = from_unicode(value) 291 292 return Node(libxml2.newText(value)) 293 294 def _add_node(self, tmp): 295 if tmp.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 296 if tmp.ns is not None: 297 _child = self._node.newNsProp(None, tmp.localName, None) 298 _ns = _child.newNs(tmp.ns, tmp.prefix) 299 _child.setNs(_ns) 300 else: 301 _child = self._node.newProp(None, tmp.name, None) 302 else: 303 _child = None 304 305 return _child 306 307 def importNode(self, node, deep): 308 309 if node.nodeType == xml.dom.Node.ELEMENT_NODE: 310 imported_element = self.ownerDocument.createElementNS(node.namespaceURI, node.tagName) 311 for value in node.attributes.values(): 312 imported_element.setAttributeNS(value.namespaceURI, value.nodeName, value.nodeValue) 313 314 if deep: 315 for child in node.childNodes: 316 imported_child = self.importNode(child, deep) 317 if imported_child: 318 imported_element.appendChild(imported_child) 319 320 return imported_element 321 322 elif node.nodeType == xml.dom.Node.TEXT_NODE: 323 return self.ownerDocument.createTextNode(node.nodeValue) 324 325 elif node.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 326 return self.ownerDocument.createAttributeNS(node.namespaceURI, node.name) 327 328 raise ValueError, node.nodeType 329 330 def insertBefore(self, tmp, oldNode): 331 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 332 _child = tmp._node 333 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 334 _child = tmp._node 335 else: 336 _child = self._add_node(tmp) 337 _child.unlinkNode() 338 return Node(oldNode._node.addPrevSibling(_child)) 339 340 def replaceChild(self, tmp, oldNode): 341 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 342 _child = tmp._node 343 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 344 _child = tmp._node 345 else: 346 _child = self._add_node(tmp) 347 _child.unlinkNode() 348 return Node(oldNode._node.replaceNode(_child)) 349 350 def appendChild(self, tmp): 351 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 352 _child = self._node.addChild(tmp._node) 353 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 354 _child = self._node.addChild(tmp._node) 355 else: 356 _child = self._add_node(tmp) 357 return Node(_child) 358 359 def removeChild(self, tmp): 360 tmp._node.unlinkNode() 361 362 #doctype defined in __init__ 363 #ownerElement defined in __init__ 364 ownerDocument = property(_ownerDocument) 365 childNodes = property(_childNodes) 366 value = data = nodeValue = property(_nodeValue) 367 name = nodeName = property(_nodeName) 368 tagName = property(_tagName) 369 namespaceURI = property(_namespaceURI) 370 prefix = property(_prefix) 371 localName = property(_localName) 372 parentNode = property(_parentNode) 373 nodeType = property(_nodeType) 374 attributes = property(_attributes) 375 previousSibling = property(_previousSibling) 376 nextSibling = property(_nextSibling) 377 378 def isSameNode(self, other): 379 return self._node.nodePath() == other._node.nodePath() 380 381 def __eq__(self, other): 382 return self._node.nodePath() == other._node.nodePath() 383 384 # 4DOM extensions to the usual PyXML API. 385 # NOTE: To be finished. 386 387 def xpath(self, expr, variables=None, namespaces=None): 388 context = self.ownerDocument.as_native_node().xpathNewContext() 389 context.setContextNode(self.as_native_node()) 390 # NOTE: Discover namespaces from the node. 391 for prefix, ns in (namespaces or {}).items(): 392 context.xpathRegisterNs(prefix, ns) 393 # NOTE: May need to tidy up the context. 394 return [Node(_node) for _node in context.xpathEval(expr)] 395 396 # Utility functions. 397 398 def createDocumentType(localName, publicId, systemId): 399 return None 400 401 def createDocument(namespaceURI, localName, doctype): 402 # NOTE: Fixed to use version 1.0 only. 403 d = Node(libxml2.newDoc("1.0"), doctype=doctype) 404 if localName is not None: 405 root = d.createElementNS(namespaceURI, localName) 406 d.appendChild(root) 407 return d 408 409 def parse(stream_or_string): 410 if hasattr(stream_or_string, "read"): 411 stream = stream_or_string 412 return parseString(stream.read()) 413 else: 414 return parseFile(stream_or_string) 415 416 def parseFile(s): 417 # NOTE: Switching off validation and remote DTD resolution. 418 context = libxml2.createFileParserCtxt(s) 419 context.validate(0) 420 context.ctxtUseOptions(0) 421 context.parseDocument() 422 return Node(context.doc()) 423 424 def parseString(s): 425 # NOTE: Switching off validation and remote DTD resolution. 426 context = libxml2.createMemoryParserCtxt(s, len(s)) 427 context.validate(0) 428 context.ctxtUseOptions(0) 429 context.parseDocument() 430 return Node(context.doc()) 431 432 def parseURI(uri): 433 return Node(libxml2.parseURI(uri)) 434 435 def toString(node, encoding=None): 436 if encoding is None: 437 return node.as_native_node().serialize() 438 else: 439 return node.as_native_node().serialize(encoding) 440 441 def toStream(node, stream=None, encoding=None): 442 stream = stream or sys.stdout 443 stream.write(toString(node, encoding)) 444 445 # vim: tabstop=4 expandtab shiftwidth=4