1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2 node methods and properties. 5 """ 6 7 import xml.dom 8 import libxml2 9 10 class TemporaryNode: 11 def __init__(self, ns, name): 12 self._ns = ns 13 self.name = name 14 self.type = "attribute" 15 16 def ns(self): 17 return self._ns 18 19 # NOTE: libxml2 seems to use UTF-8 throughout. 20 21 def from_unicode(s): 22 if type(s) == type(u""): 23 return s.encode("utf-8") 24 else: 25 return s 26 27 def to_unicode(s): 28 if type(s) == type(""): 29 return unicode(s, encoding="utf-8") 30 else: 31 return s 32 33 def _get_prefix_and_localName(name): 34 t = name.split(":") 35 if len(t) == 1: 36 return None, name 37 elif len(t) == 2: 38 return t 39 else: 40 # NOTE: Should raise an exception. 41 return None, None 42 43 _nodeTypes = { 44 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 45 "comment" : xml.dom.Node.COMMENT_NODE, 46 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 47 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 48 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 49 "element" : xml.dom.Node.ELEMENT_NODE, 50 "entity" : xml.dom.Node.ENTITY_NODE, 51 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 52 "notation" : xml.dom.Node.NOTATION_NODE, 53 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 54 "text" : xml.dom.Node.TEXT_NODE 55 } 56 57 def Node_ownerDocument(node): 58 return node.doc 59 60 def Node_nodeType(node): 61 global _nodesTypes 62 return _nodeTypes[node.type] 63 64 def Node_childNodes(node): 65 66 # NOTE: Consider a generator instead. 67 68 child_nodes = [] 69 node = node.children 70 while node is not None: 71 child_nodes.append(node) 72 node = node.next 73 return child_nodes 74 75 def Node_attributes(node): 76 attributes = {} 77 node = node.properties 78 while node is not None: 79 ns = _getNs(node) 80 if ns is not None: 81 attributes[(ns.content, node.name)] = node 82 else: 83 attributes[(None, node.name)] = node 84 node = node.next 85 return attributes 86 87 def _getNs(node): 88 89 "Internal namespace information retrieval." 90 91 try: 92 return node.ns() 93 except libxml2.treeError: 94 return None 95 96 def Node_namespaceURI(node): 97 ns = _getNs(node) 98 if ns is not None: 99 return to_unicode(ns.content) 100 else: 101 return None 102 103 def Node_nodeValue(node): 104 return to_unicode(node.content) 105 106 def Node_prefix(node): 107 ns = _getNs(node) 108 if ns is not None: 109 return to_unicode(ns.name) 110 else: 111 return None 112 113 def Node_nodeName(node): 114 prefix = Node_prefix(node) 115 if prefix is not None: 116 return prefix + ":" + Node_localName(node) 117 else: 118 return Node_localName(node) 119 120 def Node_tagName(node): 121 if node.type == "element": 122 return Node_nodeName(node) 123 else: 124 return None 125 126 def Node_localName(node): 127 return to_unicode(node.name) 128 129 def Node_parentNode(node): 130 if node.type == "document_xml": 131 return None 132 else: 133 return node.parent 134 135 def Node_previousSibling(node): 136 if node.prev is not None: 137 return node.prev 138 else: 139 return None 140 141 def Node_nextSibling(node): 142 if node.next is not None: 143 return node.next 144 else: 145 return None 146 147 def Node_hasAttributeNS(node, ns, localName): 148 return Node_getAttributeNS(ns, localName) is not None 149 150 def Node_hasAttribute(node, name): 151 return Node_getAttribute(name) is not None 152 153 def Node_getAttributeNS(node, ns, localName): 154 return to_unicode(node.nsProp(localName, ns)) 155 156 def Node_getAttribute(node, name): 157 return to_unicode(node.prop(name)) 158 159 def Node_getAttributeNodeNS(node, ns, localName): 160 return node.nsProp(localName, ns) 161 162 def Node_getAttributeNode(node, name): 163 # NOTE: Needs verifying. 164 return node.prop(name) 165 166 def Node_setAttributeNS(node, ns, name, value): 167 # NOTE: Need to convert from Unicode. 168 ns, name, value = map(from_unicode, [ns, name, value]) 169 170 prefix, localName = _get_prefix_and_localName(name) 171 if prefix is not None: 172 node.setNsProp(node.newNs(ns, prefix), localName, value) 173 elif ns is not None and ns == node.ns().content: 174 node.setNsProp(node.ns(), localName, value) 175 else: 176 # NOTE: Needs verifying: what should happen to the namespace? 177 # NOTE: This also catches the case where None is the element's 178 # NOTE: namespace and is also used for the attribute. 179 node.setNsProp(None, localName, value) 180 181 def Node_setAttribute(node, name, value): 182 # NOTE: Need to convert from Unicode. 183 name, value = map(from_unicode, [name, value]) 184 185 node.setProp(name, value) 186 187 def Node_setAttributeNodeNS(node, ns, name, attr): 188 # NOTE: Not actually putting the node on the element. 189 Node_setAttributeNS(node, ns, name, Node_nodeValue(attr)) 190 191 def Node_setAttributeNode(node, name, attr): 192 # NOTE: Not actually putting the node on the element. 193 Node_setAttribute(node, name, Node_nodeValue(attr)) 194 195 def Node_createElementNS(node, ns, name): 196 # NOTE: Need to convert from Unicode. 197 ns, name = map(from_unicode, [ns, name]) 198 199 prefix, localName = _get_prefix_and_localName(name) 200 new_node = libxml2.newNode(localName) 201 # NOTE: Does it make sense to set the namespace if it is empty? 202 if ns is not None: 203 new_ns = new_node.newNs(ns, prefix) 204 new_node.setNs(new_ns) 205 return new_node 206 207 def Node_createElement(node, name): 208 # NOTE: Need to convert from Unicode. 209 name = from_unicode(name) 210 211 new_node = libxml2.newNode(name) 212 return new_node 213 214 def Node_createAttributeNS(node, ns, name): 215 # NOTE: Need to convert from Unicode. 216 ns, name = map(from_unicode, [ns, name]) 217 218 prefix, localName = _get_prefix_and_localName(name) 219 # NOTE: Does it make sense to set the namespace if it is empty? 220 if ns is not None: 221 new_ns = new_node.newNs(ns, prefix) 222 else: 223 new_ns = None 224 return TemporaryNode(new_ns, localName) 225 226 def Node_createAttribute(node, name): 227 # NOTE: Need to convert from Unicode. 228 name = from_unicode(name) 229 230 return TemporaryNode(None, name) 231 232 def Node_createTextNode(node, value): 233 # NOTE: Need to convert from Unicode. 234 value = from_unicode(value) 235 236 return libxml2.newText(value) 237 238 def Node_createComment(node, value): 239 # NOTE: Need to convert from Unicode. 240 value = from_unicode(value) 241 242 return libxml2.newComment(value) 243 244 def _add_node(node, tmp): 245 if tmp.ns is not None: 246 child = node.newNsProp(None, Node_localName(tmp), None) 247 ns = child.newNs(Node_namespaceURI(tmp), Node_prefix(tmp)) 248 child.setNs(ns) 249 else: 250 child = node.newProp(None, tmp.name, None) 251 252 return child 253 254 def Node_insertBefore(node, tmp, oldNode): 255 if not isinstance(tmp, TemporaryNode): 256 return oldNode.addPrevSibling(tmp) 257 else: 258 return None 259 260 def Node_replaceChild(node, tmp, oldNode): 261 if not isinstance(tmp, TemporaryNode): 262 return oldNode.replaceNode(tmp) 263 else: 264 return None 265 266 def Node_appendChild(node, tmp): 267 return node.addChild(tmp) 268 269 def Node_removeChild(node, child): 270 child.unlinkNode() 271 272 def Node_xpath(node, expr, variables=None, namespaces=None): 273 context = Node_ownerDocument(node).xpathNewContext() 274 context.setContextNode(node) 275 # NOTE: Discover namespaces from the node. 276 for prefix, ns in (namespaces or {}).items(): 277 context.xpathRegisterNs(prefix, ns) 278 # NOTE: May need to tidy up the context. 279 return context.xpathEval(expr) 280 281 # Utility functions. 282 283 def createDocumentType(localName, publicId, systemId): 284 return None 285 286 def createDocument(namespaceURI, localName, doctype): 287 # NOTE: Fixed to use version 1.0 only. 288 d = libxml2.newDoc("1.0") 289 if localName is not None: 290 root = Node_createElementNS(d, namespaceURI, localName) 291 Node_appendChild(d, root) 292 return d 293 294 def parse(stream_or_string): 295 if hasattr(stream_or_string, "read"): 296 stream = stream_or_string 297 return parseString(stream.read()) 298 else: 299 return parseFile(stream_or_string) 300 301 def parseFile(s): 302 # NOTE: Switching off validation and remote DTD resolution. 303 context = libxml2.createFileParserCtxt(s) 304 context.validate(0) 305 context.ctxtUseOptions(0) 306 context.parseDocument() 307 return context.doc() 308 309 def parseString(s): 310 # NOTE: Switching off validation and remote DTD resolution. 311 context = libxml2.createMemoryParserCtxt(s, len(s)) 312 context.validate(0) 313 context.ctxtUseOptions(0) 314 context.parseDocument() 315 return context.doc() 316 317 def parseURI(uri): 318 return libxml2.parseURI(uri) 319 320 # vim: tabstop=4 expandtab shiftwidth=4