1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 """ 6 7 import xml.dom 8 import libxml2mod 9 10 # NOTE: libxml2 seems to use UTF-8 throughout. 11 12 def from_unicode(s): 13 if isinstance(s, unicode): 14 return s.encode("utf-8") 15 else: 16 # The string might contain non-ASCII characters, thus upsetting libxml2 17 # as it encounters a non-UTF-8 string. 18 try: 19 unicode(s) 20 except UnicodeError: 21 raise TypeError, "Please use Unicode for non-ASCII data." 22 return s 23 24 def to_unicode(s): 25 if isinstance(s, str): 26 return unicode(s, encoding="utf-8") 27 else: 28 return s 29 30 def get_ns(ns): 31 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 32 # Detect "" and produce None as the empty namespace. 33 if out_ns: 34 return out_ns 35 else: 36 return None 37 38 def _get_prefix_and_localName(name): 39 t = name.split(":") 40 if len(t) == 1: 41 return None, name 42 elif len(t) == 2: 43 return t 44 else: 45 # NOTE: Should raise an exception. 46 return None, None 47 48 def _find_namespace(node, ns, prefix): 49 new_ns = None 50 current = libxml2mod.xmlNodeGetNsDefs(node) 51 while current is not None: 52 if _check_namespace(current, ns, prefix): 53 new_ns = current 54 break 55 current = libxml2mod.next(current) 56 if new_ns is None: 57 node_ns = libxml2mod.xmlNodeGetNs(node) 58 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 59 new_ns = node_ns 60 return new_ns 61 62 def _check_namespace(current, ns, prefix): 63 current_ns = libxml2mod.xmlNodeGetContent(current) 64 current_prefix = libxml2mod.name(current) 65 if ns == current_ns and prefix == current_prefix: 66 return 1 67 else: 68 return 0 69 70 def _make_namespace(node, ns, prefix, set_default=0): 71 if prefix is not None or set_default: 72 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 73 else: 74 new_ns = None 75 return new_ns 76 77 _nodeTypes = { 78 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 79 "comment" : xml.dom.Node.COMMENT_NODE, 80 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 81 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 82 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 83 "element" : xml.dom.Node.ELEMENT_NODE, 84 "entity" : xml.dom.Node.ENTITY_NODE, 85 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 86 "notation" : xml.dom.Node.NOTATION_NODE, 87 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 88 "text" : xml.dom.Node.TEXT_NODE 89 } 90 91 _reverseNodeTypes = {} 92 for label, value in _nodeTypes.items(): 93 _reverseNodeTypes[value] = label 94 95 def Node_ownerDocument(node): 96 return libxml2mod.doc(node) or node 97 98 def Node_nodeType(node): 99 return _nodeTypes[libxml2mod.type(node)] 100 101 def Node_childNodes(node): 102 103 # NOTE: Consider a generator instead. 104 105 child_nodes = [] 106 node = libxml2mod.children(node) 107 while node is not None: 108 # Remove doctypes. 109 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 110 child_nodes.append(node) 111 node = libxml2mod.next(node) 112 return child_nodes 113 114 def Node_attributes(node): 115 attributes = {} 116 node = libxml2mod.properties(node) 117 while node is not None: 118 ns = libxml2mod.xmlNodeGetNs(node) 119 if ns is not None: 120 attributes[(get_ns(ns), libxml2mod.name(node))] = node 121 else: 122 attributes[(None, libxml2mod.name(node))] = node 123 node = libxml2mod.next(node) 124 return attributes 125 126 def Node_namespaceURI(node): 127 ns = libxml2mod.xmlNodeGetNs(node) 128 if ns is not None: 129 return get_ns(ns) 130 else: 131 return None 132 133 def Node_nodeValue(node): 134 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 135 136 # NOTE: This is not properly exposed in the libxml2macro interface as the 137 # NOTE: writable form of nodeValue. 138 139 def Node_setNodeValue(node, value): 140 # NOTE: Cannot set attribute node values. 141 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 142 143 # NOTE: Verify this. 144 145 Node_data = Node_nodeValue 146 147 def Node_prefix(node): 148 ns = libxml2mod.xmlNodeGetNs(node) 149 if ns is not None: 150 return to_unicode(libxml2mod.name(ns)) 151 else: 152 return None 153 154 def Node_nodeName(node): 155 prefix = Node_prefix(node) 156 if prefix is not None: 157 return prefix + ":" + Node_localName(node) 158 else: 159 return Node_localName(node) 160 161 def Node_tagName(node): 162 if libxml2mod.type(node) == "element": 163 return Node_nodeName(node) 164 else: 165 return None 166 167 def Node_localName(node): 168 return to_unicode(libxml2mod.name(node)) 169 170 def Node_parentNode(node): 171 if libxml2mod.type(node) == "document_xml": 172 return None 173 else: 174 return libxml2mod.parent(node) 175 176 def Node_previousSibling(node): 177 if libxml2mod.prev(node) is not None: 178 return libxml2mod.prev(node) 179 else: 180 return None 181 182 def Node_nextSibling(node): 183 if libxml2mod.next(node) is not None: 184 return libxml2mod.next(node) 185 else: 186 return None 187 188 def Node_doctype(node): 189 return libxml2mod.xmlGetIntSubset(node) 190 191 def Node_hasAttributeNS(node, ns, localName): 192 return Node_getAttributeNS(node, ns, localName) is not None 193 194 def Node_hasAttribute(node, name): 195 return Node_getAttribute(node, name) is not None 196 197 def Node_getAttributeNS(node, ns, localName): 198 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 199 200 def Node_getAttribute(node, name): 201 return to_unicode(libxml2mod.xmlGetProp(node, name)) 202 203 def Node_getAttributeNodeNS(node, ns, localName): 204 # NOTE: Needs verifying. 205 return Node_attributes(node)[(ns, localName)] 206 207 def Node_getAttributeNode(node, name): 208 # NOTE: Needs verifying. 209 return Node_attributes(node)[(None, name)] 210 211 def Node_setAttributeNS(node, ns, name, value): 212 ns, name, value = map(from_unicode, [ns, name, value]) 213 prefix, localName = _get_prefix_and_localName(name) 214 215 # Detect setting of xmlns:localName=value, looking for cases where 216 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 217 # with prefix=x, ns=y). 218 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 219 if _find_namespace(node, value, localName): 220 return 221 new_ns = _make_namespace(node, value, localName, set_default=0) 222 # For non-xmlns attributes, we find or make a namespace declaration and then 223 # set an attribute. 224 elif ns is not None: 225 new_ns = _find_namespace(node, ns, prefix) 226 if new_ns is None: 227 new_ns = _make_namespace(node, ns, prefix, set_default=0) 228 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 229 else: 230 # NOTE: Needs verifying: what should happen to the namespace? 231 # NOTE: This also catches the case where None is the element's 232 # NOTE: namespace and is also used for the attribute. 233 libxml2mod.xmlSetNsProp(node, None, localName, value) 234 235 def Node_setAttribute(node, name, value): 236 name, value = map(from_unicode, [name, value]) 237 238 libxml2mod.xmlSetProp(node, name, value) 239 240 def Node_setAttributeNodeNS(node, attr): 241 # NOTE: Not actually putting the node on the element. 242 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 243 244 def Node_setAttributeNode(node, attr): 245 # NOTE: Not actually putting the node on the element. 246 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 247 248 def Node_removeAttributeNS(node, ns, localName): 249 attr = Node_getAttributeNodeNS(node, ns, localName) 250 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 251 252 def Node_removeAttribute(node, name): 253 name = from_unicode(name) 254 libxml2mod.xmlUnsetProp(node, name) 255 256 def Node_createElementNS(node, ns, name): 257 ns, name = map(from_unicode, [ns, name]) 258 259 prefix, localName = _get_prefix_and_localName(name) 260 new_node = libxml2mod.xmlNewNode(localName) 261 262 # If the namespace is not empty, set the declaration. 263 if ns is not None: 264 new_ns = _find_namespace(new_node, ns, prefix) 265 if new_ns is None: 266 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 267 libxml2mod.xmlSetNs(new_node, new_ns) 268 # If the namespace is empty, set a "null" declaration. 269 elif prefix is not None: 270 new_ns = _find_namespace(new_node, "", prefix) 271 if new_ns is None: 272 new_ns = _make_namespace(new_node, "", prefix) 273 libxml2mod.xmlSetNs(new_node, new_ns) 274 else: 275 libxml2mod.xmlSetNs(new_node, None) 276 Node_setAttribute(new_node, "xmlns", "") 277 return new_node 278 279 def Node_createElement(node, name): 280 name = from_unicode(name) 281 282 new_node = libxml2mod.xmlNewNode(name) 283 return new_node 284 285 def Node_createAttributeNS(node, ns, name): 286 ns, name = map(from_unicode, [ns, name]) 287 288 prefix, localName = _get_prefix_and_localName(name) 289 # NOTE: Does it make sense to set the namespace if it is empty? 290 if ns is not None: 291 new_ns = _find_namespace(node, ns, prefix) 292 if new_ns is None: 293 new_ns = _make_namespace(node, ns, prefix, set_default=0) 294 else: 295 new_ns = None 296 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 297 return new_node 298 299 def Node_createAttribute(node, name): 300 name = from_unicode(name) 301 302 # NOTE: xmlNewProp does not seem to work. 303 return Node_createAttributeNS(node, None, name) 304 305 def Node_createTextNode(node, value): 306 value = from_unicode(value) 307 308 return libxml2mod.xmlNewText(value) 309 310 def Node_createComment(node, value): 311 value = from_unicode(value) 312 313 return libxml2mod.xmlNewComment(value) 314 315 def Node_insertBefore(node, tmp, oldNode): 316 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 317 318 def Node_replaceChild(node, tmp, oldNode): 319 return libxml2mod.xmlReplaceNode(oldNode, tmp) 320 321 def Node_appendChild(node, tmp): 322 return libxml2mod.xmlAddChild(node, tmp) 323 324 def Node_removeChild(node, child): 325 libxml2mod.xmlUnlinkNode(child) 326 327 def Node_importNode(node, other, deep): 328 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 329 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 330 for attr in Node_attributes(other).values(): 331 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 332 333 if deep: 334 for child in Node_childNodes(other): 335 imported_child = Node_importNode(node, child, deep) 336 if imported_child: 337 Node_appendChild(imported_element, imported_child) 338 339 return imported_element 340 341 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 342 return Node_createTextNode(node, Node_nodeValue(other)) 343 344 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 345 return Node_createComment(node, Node_data(other)) 346 347 raise ValueError, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other)) 348 349 def Node_importNode_DOM(node, other, deep): 350 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 351 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 352 for attr in other.attributes.values(): 353 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 354 355 if deep: 356 for child in other.childNodes: 357 imported_child = Node_importNode_DOM(node, child, deep) 358 if imported_child: 359 Node_appendChild(imported_element, imported_child) 360 361 return imported_element 362 363 elif other.nodeType == xml.dom.Node.TEXT_NODE: 364 return Node_createTextNode(node, other.nodeValue) 365 366 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 367 return Node_createComment(node, other.data) 368 369 raise ValueError, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType) 370 371 def Node_xpath(node, expr, variables=None, namespaces=None): 372 expr = from_unicode(expr) 373 374 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node)) 375 libxml2mod.xmlXPathSetContextNode(context, node) 376 # NOTE: Discover namespaces from the node. 377 # NOTE: Work out how to specify paths without having to use prefixes on 378 # NOTE: names all the time. 379 for prefix, ns in (namespaces or {}).items(): 380 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 381 # NOTE: No such functions are exposed in current versions of libxml2. 382 #for (prefix, ns), value in (variables or {}).items(): 383 # value = from_unicode(value) 384 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 385 result = libxml2mod.xmlXPathEval(expr, context) 386 libxml2mod.xmlXPathFreeContext(context) 387 return result 388 389 # Utility functions. 390 391 def createDocument(namespaceURI, localName, doctype): 392 # NOTE: Fixed to use version 1.0 only. 393 d = libxml2mod.xmlNewDoc("1.0") 394 if localName is not None: 395 # NOTE: Verify that this is always what should occur. 396 root = Node_createElementNS(d, namespaceURI, localName) 397 Node_appendChild(d, root) 398 if doctype is not None: 399 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 400 return d 401 402 def parse(stream_or_string, html=0): 403 if hasattr(stream_or_string, "read"): 404 stream = stream_or_string 405 return parseString(stream.read(), html) 406 else: 407 return parseFile(stream_or_string, html) 408 409 def parseFile(s, html=0): 410 # NOTE: Switching off validation and remote DTD resolution. 411 if not html: 412 context = libxml2mod.xmlCreateFileParserCtxt(s) 413 libxml2mod.xmlParserSetPedantic(context, 0) 414 libxml2mod.xmlParserSetValidate(context, 0) 415 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 416 libxml2mod.xmlParseDocument(context) 417 return libxml2mod.xmlParserGetDoc(context) 418 else: 419 return libxml2mod.htmlReadFile(s, None, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 420 421 def parseString(s, html=0): 422 # NOTE: Switching off validation and remote DTD resolution. 423 if not html: 424 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 425 libxml2mod.xmlParserSetPedantic(context, 0) 426 libxml2mod.xmlParserSetValidate(context, 0) 427 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 428 libxml2mod.xmlParseDocument(context) 429 return libxml2mod.xmlParserGetDoc(context) 430 else: 431 # NOTE: URL given as None. 432 html_url = None 433 return libxml2mod.htmlReadMemory(s, len(s), html_url, None, 434 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 435 436 def parseURI(uri, html=0): 437 # NOTE: Switching off validation and remote DTD resolution. 438 if not html: 439 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 440 libxml2mod.xmlParserSetPedantic(context, 0) 441 libxml2mod.xmlParserSetValidate(context, 0) 442 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 443 libxml2mod.xmlParseDocument(context) 444 return libxml2mod.xmlParserGetDoc(context) 445 else: 446 raise NotImplementedError, "parseURI does not yet support HTML" 447 448 def toString(node, encoding=None, prettyprint=0): 449 return libxml2mod.serializeNode(node, encoding, prettyprint) 450 451 def toStream(node, stream, encoding=None, prettyprint=0): 452 stream.write(toString(node, encoding, prettyprint)) 453 454 def toFile(node, f, encoding=None, prettyprint=0): 455 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 456 457 # libxml2mod constants. 458 459 HTML_PARSE_NOERROR = 32 460 HTML_PARSE_NOWARNING = 64 461 HTML_PARSE_NONET = 2048 462 XML_PARSE_NOERROR = 32 463 XML_PARSE_NOWARNING = 64 464 XML_PARSE_NONET = 2048 465 466 # vim: tabstop=4 expandtab shiftwidth=4