1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007 Paul Boddie <paul@boddie.org.uk> 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Lesser General Public 10 License as published by the Free Software Foundation; either 11 version 2.1 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Lesser General Public License for more details. 17 18 You should have received a copy of the GNU Lesser General Public 19 License along with this library; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 21 """ 22 23 import xml.dom 24 25 # Try the conventional import first. 26 27 try: 28 import libxml2mod 29 except ImportError: 30 from libxmlmods import libxml2mod 31 32 # NOTE: libxml2 seems to use UTF-8 throughout. 33 34 def from_unicode(s): 35 if isinstance(s, unicode): 36 return s.encode("utf-8") 37 else: 38 # The string might contain non-ASCII characters, thus upsetting libxml2 39 # as it encounters a non-UTF-8 string. 40 try: 41 unicode(s) 42 except UnicodeError: 43 raise TypeError, "Please use Unicode for non-ASCII data." 44 return s 45 46 def to_unicode(s): 47 if isinstance(s, str): 48 return unicode(s, encoding="utf-8") 49 else: 50 return s 51 52 def check_document(node, tmp): 53 if Node_ownerDocument(tmp) is not None and Node_ownerDocument(node) is not Node_ownerDocument(tmp): 54 raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR) 55 56 def get_ns(ns): 57 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 58 # Detect "" and produce None as the empty namespace. 59 if out_ns: 60 return out_ns 61 else: 62 return None 63 64 def _get_prefix_and_localName(name): 65 t = name.split(":") 66 if len(t) == 1: 67 return None, name 68 elif len(t) == 2: 69 return t 70 else: 71 # NOTE: Should raise an exception. 72 return None, None 73 74 def _find_namespace_for_prefix(node, prefix): 75 76 "Find the namespace definition node in the given 'node' for 'prefix'." 77 78 current = libxml2mod.xmlNodeGetNsDefs(node) 79 while current is not None: 80 if libxml2mod.name(current) == prefix: 81 return current 82 current = libxml2mod.next(current) 83 return None 84 85 def _find_namespace(node, ns, prefix): 86 87 """ 88 Find the namespace definition node in the given 'node' for the given 'ns' 89 and 'prefix'. 90 """ 91 92 new_ns = None 93 current = libxml2mod.xmlNodeGetNsDefs(node) 94 while current is not None: 95 if _check_namespace(current, ns, prefix): 96 new_ns = current 97 break 98 current = libxml2mod.next(current) 99 if new_ns is None: 100 node_ns = libxml2mod.xmlNodeGetNs(node) 101 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 102 new_ns = node_ns 103 return new_ns 104 105 def _check_namespace(current, ns, prefix): 106 107 "Check the 'current' namespace definition node against 'ns' and 'prefix'." 108 109 current_ns = get_ns(current) 110 current_prefix = libxml2mod.name(current) 111 if ns == current_ns and (prefix is None or prefix == current_prefix): 112 return 1 113 else: 114 return 0 115 116 def _make_namespace(node, ns, prefix, set_default=0): 117 118 """ 119 Make a new namespace definition node within the given 'node' for 'ns', 120 'prefix', setting the default namespace on 'node' when 'prefix' is None and 121 'set_default' is set to a true value (unlike the default value for that 122 parameter). 123 """ 124 125 if prefix is not None or set_default: 126 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 127 else: 128 new_ns = None 129 return new_ns 130 131 def _get_invented_prefix(node, ns): 132 current = libxml2mod.xmlNodeGetNsDefs(node) 133 prefixes = [] 134 while current is not None: 135 current_prefix = libxml2mod.name(current) 136 prefixes.append(current_prefix) 137 current = libxml2mod.next(current) 138 i = 0 139 while 1: 140 prefix = "NS%d" % i 141 if prefix not in prefixes: 142 return prefix 143 i += 1 144 145 _nodeTypes = { 146 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 147 "comment" : xml.dom.Node.COMMENT_NODE, 148 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 149 "document_html" : xml.dom.Node.DOCUMENT_NODE, 150 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 151 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 152 "element" : xml.dom.Node.ELEMENT_NODE, 153 "entity" : xml.dom.Node.ENTITY_NODE, 154 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 155 "notation" : xml.dom.Node.NOTATION_NODE, 156 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 157 "text" : xml.dom.Node.TEXT_NODE 158 } 159 160 _reverseNodeTypes = {} 161 for label, value in _nodeTypes.items(): 162 _reverseNodeTypes[value] = label 163 164 def Node_ownerDocument(node): 165 return libxml2mod.doc(node) 166 167 def Node_nodeType(node): 168 return _nodeTypes[libxml2mod.type(node)] 169 170 def Node_childNodes(node): 171 172 # NOTE: Consider a generator instead. 173 174 child_nodes = [] 175 node = libxml2mod.children(node) 176 while node is not None: 177 # Remove doctypes. 178 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 179 child_nodes.append(node) 180 node = libxml2mod.next(node) 181 return child_nodes 182 183 def Node_attributes(node): 184 attributes = {} 185 186 # Include normal attributes. 187 188 current = libxml2mod.properties(node) 189 while current is not None: 190 ns = libxml2mod.xmlNodeGetNs(current) 191 if ns is not None: 192 attributes[(get_ns(ns), libxml2mod.name(current))] = current 193 else: 194 attributes[(None, libxml2mod.name(current))] = current 195 current = libxml2mod.next(current) 196 197 # Include xmlns attributes. 198 199 #current = libxml2mod.xmlNodeGetNsDefs(node) 200 #while current is not None: 201 # ns = get_ns(current) 202 # prefix = libxml2mod.name(current) 203 # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. 204 # current = libxml2mod.next(current) 205 206 return attributes 207 208 def Node_namespaceURI(node): 209 ns = libxml2mod.xmlNodeGetNs(node) 210 if ns is not None: 211 return get_ns(ns) 212 else: 213 return None 214 215 def Node_nodeValue(node): 216 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 217 218 # NOTE: This is not properly exposed in the libxml2macro interface as the 219 # NOTE: writable form of nodeValue. 220 221 def Node_setNodeValue(node, value): 222 # NOTE: Cannot set attribute node values. 223 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 224 225 # NOTE: Verify this. The data attribute should only really exist for text, 226 # NOTE: character data, processing instructions and comments. 227 228 Node_data = Node_nodeValue 229 230 Node_textContent = Node_nodeValue 231 232 def Node_prefix(node): 233 ns = libxml2mod.xmlNodeGetNs(node) 234 if ns is not None: 235 return to_unicode(libxml2mod.name(ns)) 236 else: 237 return None 238 239 def Node_nodeName(node): 240 prefix = Node_prefix(node) 241 if prefix is not None: 242 return prefix + ":" + Node_localName(node) 243 else: 244 return Node_localName(node) 245 246 def Node_tagName(node): 247 if libxml2mod.type(node) == "element": 248 return Node_nodeName(node) 249 else: 250 return None 251 252 def Node_localName(node): 253 return to_unicode(libxml2mod.name(node)) 254 255 def Node_parentNode(node): 256 if libxml2mod.type(node) == "document_xml": 257 return None 258 else: 259 return libxml2mod.parent(node) 260 261 def Node_previousSibling(node): 262 if libxml2mod.prev(node) is not None: 263 return libxml2mod.prev(node) 264 else: 265 return None 266 267 def Node_nextSibling(node): 268 if libxml2mod.next(node) is not None: 269 return libxml2mod.next(node) 270 else: 271 return None 272 273 def Node_doctype(node): 274 return libxml2mod.xmlGetIntSubset(node) 275 276 def Node_hasAttributeNS(node, ns, localName): 277 return Node_getAttributeNS(node, ns, localName) is not None or \ 278 _find_namespace(node, ns, localName) is not None 279 280 def Node_hasAttribute(node, name): 281 return Node_getAttribute(node, name) is not None 282 283 def Node_getAttributeNS(node, ns, localName): 284 if ns == xml.dom.XMLNS_NAMESPACE: 285 ns_def = _find_namespace_for_prefix(node, localName) 286 if ns_def is not None: 287 return get_ns(ns_def) 288 else: 289 return None 290 else: 291 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 292 293 def Node_getAttribute(node, name): 294 return to_unicode(libxml2mod.xmlGetProp(node, name)) 295 296 def Node_getAttributeNodeNS(node, ns, localName): 297 # NOTE: Needs verifying. 298 return Node_attributes(node)[(ns, localName)] 299 300 def Node_getAttributeNode(node, name): 301 # NOTE: Needs verifying. 302 return Node_attributes(node)[(None, name)] 303 304 def Node_setAttributeNS(node, ns, name, value): 305 ns, name, value = map(from_unicode, [ns, name, value]) 306 prefix, localName = _get_prefix_and_localName(name) 307 308 # Detect setting of xmlns:localName=value, looking for cases where 309 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 310 # with prefix=x, ns=y). 311 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 312 if _find_namespace(node, value, localName): 313 return 314 new_ns = _make_namespace(node, value, localName, set_default=0) 315 # For non-xmlns attributes, we find or make a namespace declaration and then 316 # set an attribute. 317 elif ns is not None: 318 # Look for a suitable namespace. 319 new_ns = _find_namespace(node, ns, prefix) 320 # Create a declaration if no suitable one was found. 321 if new_ns is None: 322 # Invent a prefix for unprefixed attributes with namespaces. 323 if prefix is None: 324 prefix = _get_invented_prefix(node, ns) 325 new_ns = _make_namespace(node, ns, prefix, set_default=0) 326 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 327 else: 328 # NOTE: Needs verifying: what should happen to the namespace? 329 # NOTE: This also catches the case where None is the element's 330 # NOTE: namespace and is also used for the attribute. 331 libxml2mod.xmlSetNsProp(node, None, localName, value) 332 333 def Node_setAttribute(node, name, value): 334 name, value = map(from_unicode, [name, value]) 335 336 libxml2mod.xmlSetProp(node, name, value) 337 338 def Node_setAttributeNodeNS(node, attr): 339 # NOTE: Not actually putting the node on the element. 340 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 341 342 def Node_setAttributeNode(node, attr): 343 # NOTE: Not actually putting the node on the element. 344 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 345 346 def Node_removeAttributeNS(node, ns, localName): 347 attr = Node_getAttributeNodeNS(node, ns, localName) 348 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 349 350 def Node_removeAttribute(node, name): 351 name = from_unicode(name) 352 libxml2mod.xmlUnsetProp(node, name) 353 354 def Node_createElementNS(node, ns, name): 355 ns, name = map(from_unicode, [ns, name]) 356 357 prefix, localName = _get_prefix_and_localName(name) 358 new_node = libxml2mod.xmlNewNode(localName) 359 360 # If the namespace is not empty, set the declaration. 361 if ns is not None: 362 new_ns = _find_namespace(new_node, ns, prefix) 363 if new_ns is None: 364 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 365 libxml2mod.xmlSetNs(new_node, new_ns) 366 # If the namespace is empty, set a "null" declaration. 367 elif prefix is not None: 368 new_ns = _find_namespace(new_node, "", prefix) 369 if new_ns is None: 370 new_ns = _make_namespace(new_node, "", prefix) 371 libxml2mod.xmlSetNs(new_node, new_ns) 372 else: 373 libxml2mod.xmlSetNs(new_node, None) 374 Node_setAttribute(new_node, "xmlns", "") 375 return new_node 376 377 def Node_createElement(node, name): 378 name = from_unicode(name) 379 380 new_node = libxml2mod.xmlNewNode(name) 381 return new_node 382 383 def Node_createAttributeNS(node, ns, name): 384 ns, name = map(from_unicode, [ns, name]) 385 386 prefix, localName = _get_prefix_and_localName(name) 387 # NOTE: Does it make sense to set the namespace if it is empty? 388 if ns is not None: 389 new_ns = _find_namespace(node, ns, prefix) 390 if new_ns is None: 391 new_ns = _make_namespace(node, ns, prefix, set_default=0) 392 else: 393 new_ns = None 394 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 395 return new_node 396 397 def Node_createAttribute(node, name): 398 name = from_unicode(name) 399 400 # NOTE: xmlNewProp does not seem to work. 401 return Node_createAttributeNS(node, None, name) 402 403 def Node_createTextNode(node, value): 404 value = from_unicode(value) 405 406 return libxml2mod.xmlNewText(value) 407 408 def Node_createComment(node, value): 409 value = from_unicode(value) 410 411 return libxml2mod.xmlNewComment(value) 412 413 def Node_insertBefore(node, tmp, oldNode): 414 check_document(node, tmp) 415 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 416 417 def Node_replaceChild(node, tmp, oldNode): 418 check_document(node, tmp) 419 return libxml2mod.xmlReplaceNode(oldNode, tmp) 420 421 def Node_appendChild(node, tmp): 422 check_document(node, tmp) 423 return libxml2mod.xmlAddChild(node, tmp) 424 425 def Node_removeChild(node, child): 426 libxml2mod.xmlUnlinkNode(child) 427 428 def Node_importNode(node, other, deep): 429 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 430 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 431 for attr in Node_attributes(other).values(): 432 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 433 434 if deep: 435 for child in Node_childNodes(other): 436 imported_child = Node_importNode(node, child, deep) 437 if imported_child: 438 Node_appendChild(imported_element, imported_child) 439 440 return imported_element 441 442 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 443 return Node_createTextNode(node, Node_nodeValue(other)) 444 445 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 446 return Node_createComment(node, Node_data(other)) 447 448 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) 449 450 def Node_importNode_DOM(node, other, deep): 451 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 452 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 453 for attr in other.attributes.values(): 454 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 455 456 if deep: 457 for child in other.childNodes: 458 imported_child = Node_importNode_DOM(node, child, deep) 459 if imported_child: 460 Node_appendChild(imported_element, imported_child) 461 462 return imported_element 463 464 elif other.nodeType == xml.dom.Node.TEXT_NODE: 465 return Node_createTextNode(node, other.nodeValue) 466 467 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 468 return Node_createComment(node, other.data) 469 470 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType)) 471 472 def Node_xpath(node, expr, variables=None, namespaces=None): 473 expr = from_unicode(expr) 474 475 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) 476 libxml2mod.xmlXPathSetContextNode(context, node) 477 # NOTE: Discover namespaces from the node. 478 # NOTE: Work out how to specify paths without having to use prefixes on 479 # NOTE: names all the time. 480 for prefix, ns in (namespaces or {}).items(): 481 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 482 # NOTE: No such functions are exposed in current versions of libxml2. 483 #for (prefix, ns), value in (variables or {}).items(): 484 # value = from_unicode(value) 485 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 486 result = libxml2mod.xmlXPathEval(expr, context) 487 libxml2mod.xmlXPathFreeContext(context) 488 return result 489 490 # Utility functions. 491 492 def createDocument(namespaceURI, localName, doctype): 493 # NOTE: Fixed to use version 1.0 only. 494 d = libxml2mod.xmlNewDoc("1.0") 495 if localName is not None: 496 # NOTE: Verify that this is always what should occur. 497 root = Node_createElementNS(d, namespaceURI, localName) 498 Node_appendChild(d, root) 499 if doctype is not None: 500 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 501 return d 502 503 def parse(stream_or_string, html=0): 504 if hasattr(stream_or_string, "read"): 505 stream = stream_or_string 506 return parseString(stream.read(), html) 507 else: 508 return parseFile(stream_or_string, html) 509 510 def parseFile(s, html=0): 511 # NOTE: Switching off validation and remote DTD resolution. 512 if not html: 513 context = libxml2mod.xmlCreateFileParserCtxt(s) 514 libxml2mod.xmlParserSetPedantic(context, 0) 515 libxml2mod.xmlParserSetValidate(context, 0) 516 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 517 libxml2mod.xmlParseDocument(context) 518 return libxml2mod.xmlParserGetDoc(context) 519 else: 520 return libxml2mod.htmlReadFile(s, None, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 521 522 def parseString(s, html=0): 523 # NOTE: Switching off validation and remote DTD resolution. 524 if not html: 525 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 526 libxml2mod.xmlParserSetPedantic(context, 0) 527 libxml2mod.xmlParserSetValidate(context, 0) 528 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 529 libxml2mod.xmlParseDocument(context) 530 return libxml2mod.xmlParserGetDoc(context) 531 else: 532 # NOTE: URL given as None. 533 html_url = None 534 return libxml2mod.htmlReadMemory(s, len(s), html_url, None, 535 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 536 537 def parseURI(uri, html=0): 538 # NOTE: Switching off validation and remote DTD resolution. 539 if not html: 540 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 541 libxml2mod.xmlParserSetPedantic(context, 0) 542 libxml2mod.xmlParserSetValidate(context, 0) 543 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 544 libxml2mod.xmlParseDocument(context) 545 return libxml2mod.xmlParserGetDoc(context) 546 else: 547 raise NotImplementedError, "parseURI does not yet support HTML" 548 549 def toString(node, encoding=None, prettyprint=0): 550 return libxml2mod.serializeNode(node, encoding, prettyprint) 551 552 def toStream(node, stream, encoding=None, prettyprint=0): 553 stream.write(toString(node, encoding, prettyprint)) 554 555 def toFile(node, f, encoding=None, prettyprint=0): 556 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 557 558 # libxml2mod constants. 559 560 HTML_PARSE_NOERROR = 32 561 HTML_PARSE_NOWARNING = 64 562 HTML_PARSE_NONET = 2048 563 XML_PARSE_NOERROR = 32 564 XML_PARSE_NOWARNING = 64 565 XML_PARSE_NONET = 2048 566 567 # vim: tabstop=4 expandtab shiftwidth=4