1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 """ 6 7 import xml.dom 8 import libxml2mod 9 10 # NOTE: libxml2 seems to use UTF-8 throughout. 11 12 def from_unicode(s): 13 if isinstance(s, unicode): 14 return s.encode("utf-8") 15 else: 16 # The string might contain non-ASCII characters, thus upsetting libxml2 17 # as it encounters a non-UTF-8 string. 18 try: 19 unicode(s) 20 except UnicodeError: 21 raise TypeError, "Please use Unicode for non-ASCII data." 22 return s 23 24 def to_unicode(s): 25 if isinstance(s, str): 26 return unicode(s, encoding="utf-8") 27 else: 28 return s 29 30 def get_ns(ns): 31 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 32 # Detect "" and produce None as the empty namespace. 33 if out_ns: 34 return out_ns 35 else: 36 return None 37 38 def _get_prefix_and_localName(name): 39 t = name.split(":") 40 if len(t) == 1: 41 return None, name 42 elif len(t) == 2: 43 return t 44 else: 45 # NOTE: Should raise an exception. 46 return None, None 47 48 _nodeTypes = { 49 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 50 "comment" : xml.dom.Node.COMMENT_NODE, 51 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 52 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 53 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 54 "element" : xml.dom.Node.ELEMENT_NODE, 55 "entity" : xml.dom.Node.ENTITY_NODE, 56 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 57 "notation" : xml.dom.Node.NOTATION_NODE, 58 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 59 "text" : xml.dom.Node.TEXT_NODE 60 } 61 62 _reverseNodeTypes = {} 63 for label, value in _nodeTypes.items(): 64 _reverseNodeTypes[value] = label 65 66 def Node_ownerDocument(node): 67 return libxml2mod.doc(node) or node 68 69 def Node_nodeType(node): 70 return _nodeTypes[libxml2mod.type(node)] 71 72 def Node_childNodes(node): 73 74 # NOTE: Consider a generator instead. 75 76 child_nodes = [] 77 node = libxml2mod.children(node) 78 while node is not None: 79 # Remove doctypes. 80 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 81 child_nodes.append(node) 82 node = libxml2mod.next(node) 83 return child_nodes 84 85 def Node_attributes(node): 86 attributes = {} 87 node = libxml2mod.properties(node) 88 while node is not None: 89 ns = libxml2mod.xmlNodeGetNs(node) 90 if ns is not None: 91 attributes[(get_ns(ns), libxml2mod.name(node))] = node 92 else: 93 attributes[(None, libxml2mod.name(node))] = node 94 node = libxml2mod.next(node) 95 return attributes 96 97 def Node_namespaceURI(node): 98 ns = libxml2mod.xmlNodeGetNs(node) 99 if ns is not None: 100 return get_ns(ns) 101 else: 102 return None 103 104 def Node_nodeValue(node): 105 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 106 107 # NOTE: This is not properly exposed in the libxml2macro interface as the 108 # NOTE: writable form of nodeValue. 109 110 def Node_setNodeValue(node, value): 111 # NOTE: Cannot set attribute node values. 112 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 113 114 # NOTE: Verify this. 115 116 Node_data = Node_nodeValue 117 118 def Node_prefix(node): 119 ns = libxml2mod.xmlNodeGetNs(node) 120 if ns is not None: 121 return to_unicode(libxml2mod.name(ns)) 122 else: 123 return None 124 125 def Node_nodeName(node): 126 prefix = Node_prefix(node) 127 if prefix is not None: 128 return prefix + ":" + Node_localName(node) 129 else: 130 return Node_localName(node) 131 132 def Node_tagName(node): 133 if libxml2mod.type(node) == "element": 134 return Node_nodeName(node) 135 else: 136 return None 137 138 def Node_localName(node): 139 return to_unicode(libxml2mod.name(node)) 140 141 def Node_parentNode(node): 142 if libxml2mod.type(node) == "document_xml": 143 return None 144 else: 145 return libxml2mod.parent(node) 146 147 def Node_previousSibling(node): 148 if libxml2mod.prev(node) is not None: 149 return libxml2mod.prev(node) 150 else: 151 return None 152 153 def Node_nextSibling(node): 154 if libxml2mod.next(node) is not None: 155 return libxml2mod.next(node) 156 else: 157 return None 158 159 def Node_doctype(node): 160 return libxml2mod.xmlGetIntSubset(node) 161 162 def Node_hasAttributeNS(node, ns, localName): 163 return Node_getAttributeNS(node, ns, localName) is not None 164 165 def Node_hasAttribute(node, name): 166 return Node_getAttribute(node, name) is not None 167 168 def Node_getAttributeNS(node, ns, localName): 169 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 170 171 def Node_getAttribute(node, name): 172 return to_unicode(libxml2mod.xmlGetProp(node, name)) 173 174 def Node_getAttributeNodeNS(node, ns, localName): 175 # NOTE: Needs verifying. 176 return Node_attributes(node)[(ns, localName)] 177 178 def Node_getAttributeNode(node, name): 179 # NOTE: Needs verifying. 180 return Node_attributes(node)[(None, name)] 181 182 def Node_setAttributeNS(node, ns, name, value): 183 # NOTE: Need to convert from Unicode. 184 ns, name, value = map(from_unicode, [ns, name, value]) 185 186 prefix, localName = _get_prefix_and_localName(name) 187 188 # NOTE: Might need to be xmlSetNsProp. 189 if ns is not None and ns == libxml2mod.xmlNodeGetContent(libxml2mod.xmlNodeGetNs(node)): 190 libxml2mod.xmlNewNsProp(node, libxml2mod.xmlNodeGetNs(node), localName, value) 191 elif prefix is not None: 192 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 193 libxml2mod.xmlNewNsProp(node, new_ns, localName, value) 194 else: 195 # NOTE: Needs verifying: what should happen to the namespace? 196 # NOTE: This also catches the case where None is the element's 197 # NOTE: namespace and is also used for the attribute. 198 libxml2mod.xmlNewNsProp(node, None, localName, value) 199 200 def Node_setAttribute(node, name, value): 201 # NOTE: Need to convert from Unicode. 202 name, value = map(from_unicode, [name, value]) 203 204 libxml2mod.xmlSetProp(node, name, value) 205 206 def Node_setAttributeNodeNS(node, attr): 207 # NOTE: Not actually putting the node on the element. 208 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 209 210 def Node_setAttributeNode(node, attr): 211 # NOTE: Not actually putting the node on the element. 212 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 213 214 def Node_removeAttributeNS(node, ns, localName): 215 attr = Node_getAttributeNodeNS(node, ns, localName) 216 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 217 218 def Node_removeAttribute(node, name): 219 name = from_unicode(name) 220 libxml2mod.xmlUnsetProp(node, name) 221 222 def Node_createElementNS(node, ns, name): 223 # NOTE: Need to convert from Unicode. 224 ns, name = map(from_unicode, [ns, name]) 225 226 prefix, localName = _get_prefix_and_localName(name) 227 new_node = libxml2mod.xmlNewNode(localName) 228 229 # If the namespace is not empty, set the declaration. 230 if ns is not None: 231 new_ns = libxml2mod.xmlNewNs(new_node, ns, prefix) 232 libxml2mod.xmlSetNs(new_node, new_ns) 233 # If the namespace is empty, set a "null" declaration. 234 else: 235 #new_ns = libxml2mod.xmlNewNs(new_node, "", prefix) 236 #libxml2mod.xmlSetNs(new_node, new_ns) 237 libxml2mod.xmlSetNs(new_node, None) 238 Node_setAttribute(new_node, "xmlns", "") 239 return new_node 240 241 def Node_createElement(node, name): 242 # NOTE: Need to convert from Unicode. 243 name = from_unicode(name) 244 245 new_node = libxml2mod.xmlNewNode(name) 246 return new_node 247 248 def Node_createAttributeNS(node, ns, name): 249 250 # NOTE: Need to convert from Unicode. 251 ns, name = map(from_unicode, [ns, name]) 252 253 prefix, localName = _get_prefix_and_localName(name) 254 # NOTE: Does it make sense to set the namespace if it is empty? 255 if ns is not None: 256 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 257 else: 258 new_ns = None 259 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 260 return new_node 261 262 def Node_createAttribute(node, name): 263 264 # NOTE: Need to convert from Unicode. 265 name = from_unicode(name) 266 267 # NOTE: xmlNewProp does not seem to work. 268 return Node_createAttributeNS(node, None, name) 269 270 def Node_createTextNode(node, value): 271 # NOTE: Need to convert from Unicode. 272 value = from_unicode(value) 273 274 return libxml2mod.xmlNewText(value) 275 276 def Node_createComment(node, value): 277 # NOTE: Need to convert from Unicode. 278 value = from_unicode(value) 279 280 return libxml2mod.xmlNewComment(value) 281 282 def Node_insertBefore(node, tmp, oldNode): 283 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 284 285 def Node_replaceChild(node, tmp, oldNode): 286 return libxml2mod.xmlReplaceNode(oldNode, tmp) 287 288 def Node_appendChild(node, tmp): 289 return libxml2mod.xmlAddChild(node, tmp) 290 291 def Node_removeChild(node, child): 292 libxml2mod.xmlUnlinkNode(child) 293 294 def Node_importNode(node, other, deep): 295 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 296 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 297 for attr in Node_attributes(other).values(): 298 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 299 300 if deep: 301 for child in Node_childNodes(other): 302 imported_child = Node_importNode(node, child, deep) 303 if imported_child: 304 Node_appendChild(imported_element, imported_child) 305 306 return imported_element 307 308 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 309 return Node_createTextNode(node, Node_nodeValue(other)) 310 311 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 312 return Node_createComment(node, Node_data(other)) 313 314 raise ValueError, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other)) 315 316 def Node_importNode_DOM(node, other, deep): 317 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 318 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 319 for attr in other.attributes.values(): 320 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 321 322 if deep: 323 for child in other.childNodes: 324 imported_child = Node_importNode_DOM(node, child, deep) 325 if imported_child: 326 Node_appendChild(imported_element, imported_child) 327 328 return imported_element 329 330 elif other.nodeType == xml.dom.Node.TEXT_NODE: 331 return Node_createTextNode(node, other.nodeValue) 332 333 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 334 return Node_createComment(node, other.data) 335 336 raise ValueError, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType) 337 338 def Node_xpath(node, expr, variables=None, namespaces=None): 339 # NOTE: Need to convert from Unicode. 340 expr = from_unicode(expr) 341 342 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node)) 343 libxml2mod.xmlXPathSetContextNode(context, node) 344 # NOTE: Discover namespaces from the node. 345 # NOTE: Work out how to specify paths without having to use prefixes on 346 # NOTE: names all the time. 347 for prefix, ns in (namespaces or {}).items(): 348 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 349 # NOTE: No such functions are exposed in current versions of libxml2. 350 #for (prefix, ns), value in (variables or {}).items(): 351 # # NOTE: Need to convert from Unicode. 352 # value = from_unicode(value) 353 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 354 result = libxml2mod.xmlXPathEval(expr, context) 355 libxml2mod.xmlXPathFreeContext(context) 356 return result 357 358 # Utility functions. 359 360 def createDocument(namespaceURI, localName, doctype): 361 # NOTE: Fixed to use version 1.0 only. 362 d = libxml2mod.xmlNewDoc("1.0") 363 if localName is not None: 364 # NOTE: Verify that this is always what should occur. 365 root = Node_createElementNS(d, namespaceURI, localName) 366 Node_appendChild(d, root) 367 if doctype is not None: 368 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 369 return d 370 371 def parse(stream_or_string, html=0): 372 if hasattr(stream_or_string, "read"): 373 stream = stream_or_string 374 return parseString(stream.read(), html) 375 else: 376 return parseFile(stream_or_string, html) 377 378 def parseFile(s, html=0): 379 # NOTE: Switching off validation and remote DTD resolution. 380 if not html: 381 context = libxml2mod.xmlCreateFileParserCtxt(s) 382 libxml2mod.xmlParserSetPedantic(context, 0) 383 libxml2mod.xmlParserSetValidate(context, 0) 384 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 385 libxml2mod.xmlParseDocument(context) 386 return libxml2mod.xmlParserGetDoc(context) 387 else: 388 return libxml2mod.htmlReadFile(s, None, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 389 390 def parseString(s, html=0): 391 # NOTE: Switching off validation and remote DTD resolution. 392 if not html: 393 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 394 libxml2mod.xmlParserSetPedantic(context, 0) 395 libxml2mod.xmlParserSetValidate(context, 0) 396 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 397 libxml2mod.xmlParseDocument(context) 398 return libxml2mod.xmlParserGetDoc(context) 399 else: 400 # NOTE: URL given as None. 401 html_url = None 402 return libxml2mod.htmlReadMemory(s, len(s), html_url, None, 403 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 404 405 def parseURI(uri, html=0): 406 # NOTE: Switching off validation and remote DTD resolution. 407 if not html: 408 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 409 libxml2mod.xmlParserSetPedantic(context, 0) 410 libxml2mod.xmlParserSetValidate(context, 0) 411 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 412 libxml2mod.xmlParseDocument(context) 413 return libxml2mod.xmlParserGetDoc(context) 414 else: 415 raise NotImplementedError, "parseURI does not yet support HTML" 416 417 def toString(node, encoding=None, prettyprint=0): 418 return libxml2mod.serializeNode(node, encoding, prettyprint) 419 420 def toStream(node, stream, encoding=None, prettyprint=0): 421 stream.write(toString(node, encoding, prettyprint)) 422 423 def toFile(node, f, encoding=None, prettyprint=0): 424 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 425 426 # libxml2mod constants. 427 428 HTML_PARSE_NOERROR = 32 429 HTML_PARSE_NOWARNING = 64 430 HTML_PARSE_NONET = 2048 431 XML_PARSE_NOERROR = 32 432 XML_PARSE_NOWARNING = 64 433 XML_PARSE_NONET = 2048 434 435 # vim: tabstop=4 expandtab shiftwidth=4