1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 import xml.dom 23 24 # Try the conventional import first. 25 26 try: 27 import libxml2mod 28 except ImportError: 29 from libxmlmods import libxml2mod 30 31 # NOTE: libxml2 seems to use UTF-8 throughout. 32 # NOTE: Implement: http://www.w3.org/TR/2006/REC-xml-20060816/#AVNormalize 33 34 def from_unicode(s): 35 if isinstance(s, unicode): 36 return s.encode("utf-8") 37 else: 38 # The string might contain non-ASCII characters, thus upsetting libxml2 39 # as it encounters a non-UTF-8 string. 40 try: 41 unicode(s) 42 except UnicodeError: 43 raise TypeError, "Please use Unicode for non-ASCII data." 44 return s 45 46 def to_unicode(s): 47 if isinstance(s, str): 48 return unicode(s, encoding="utf-8") 49 else: 50 return s 51 52 def get_ns(ns): 53 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 54 # Detect "" and produce None as the empty namespace. 55 if out_ns: 56 return out_ns 57 else: 58 return None 59 60 def _get_prefix_and_localName(name): 61 t = name.split(":") 62 if len(t) == 1: 63 return None, name 64 elif len(t) == 2: 65 return t 66 else: 67 # NOTE: Should raise an exception. 68 return None, None 69 70 def _find_namespace_for_prefix(node, prefix): 71 72 "Find the namespace definition node in the given 'node' for 'prefix'." 73 74 current = libxml2mod.xmlNodeGetNsDefs(node) 75 while current is not None: 76 if libxml2mod.name(current) == prefix: 77 return current 78 current = libxml2mod.next(current) 79 return None 80 81 def _find_namespace(node, ns, prefix): 82 83 """ 84 Find the namespace definition node in the given 'node' for the given 'ns' 85 and 'prefix'. 86 """ 87 88 # Special treatment for XML namespace. 89 90 if prefix == "xml" and ns == xml.dom.XML_NAMESPACE: 91 return libxml2mod.xmlSearchNsByHref(Node_ownerDocument(node), node, xml.dom.XML_NAMESPACE) 92 93 new_ns = None 94 current = libxml2mod.xmlNodeGetNsDefs(node) 95 while current is not None: 96 if _check_namespace(current, ns, prefix): 97 new_ns = current 98 break 99 current = libxml2mod.next(current) 100 if new_ns is None: 101 node_ns = libxml2mod.xmlNodeGetNs(node) 102 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 103 new_ns = node_ns 104 return new_ns 105 106 def _check_namespace(current, ns, prefix): 107 108 "Check the 'current' namespace definition node against 'ns' and 'prefix'." 109 110 current_ns = get_ns(current) 111 current_prefix = libxml2mod.name(current) 112 if ns == current_ns and (prefix is None or prefix == current_prefix): 113 return 1 114 else: 115 return 0 116 117 def _make_namespace(node, ns, prefix, set_default=0): 118 119 """ 120 Make a new namespace definition node within the given 'node' for 'ns', 121 'prefix', setting the default namespace on 'node' when 'prefix' is None and 122 'set_default' is set to a true value (unlike the default value for that 123 parameter). 124 """ 125 126 if prefix is not None or set_default: 127 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 128 else: 129 new_ns = None 130 return new_ns 131 132 def _get_invented_prefix(node, ns): 133 current = libxml2mod.xmlNodeGetNsDefs(node) 134 prefixes = [] 135 while current is not None: 136 current_prefix = libxml2mod.name(current) 137 prefixes.append(current_prefix) 138 current = libxml2mod.next(current) 139 i = 0 140 while 1: 141 prefix = "NS%d" % i 142 if prefix not in prefixes: 143 return prefix 144 i += 1 145 146 _nodeTypes = { 147 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 148 "cdata" : xml.dom.Node.CDATA_SECTION_NODE, 149 "comment" : xml.dom.Node.COMMENT_NODE, 150 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 151 "document_html" : xml.dom.Node.DOCUMENT_NODE, 152 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 153 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 154 "element" : xml.dom.Node.ELEMENT_NODE, 155 "entity" : xml.dom.Node.ENTITY_NODE, 156 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 157 "notation" : xml.dom.Node.NOTATION_NODE, 158 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 159 "text" : xml.dom.Node.TEXT_NODE 160 } 161 162 _reverseNodeTypes = {} 163 for label, value in _nodeTypes.items(): 164 _reverseNodeTypes[value] = label 165 166 def Node_equals(node, other): 167 return libxml2mod.xmlXPathCmpNodes(node, other) == 0 168 169 def Node_ownerDocument(node): 170 return libxml2mod.doc(node) 171 172 def Node_nodeType(node): 173 return _nodeTypes[libxml2mod.type(node)] 174 175 def Node_childNodes(node): 176 177 # NOTE: Consider a generator instead. 178 179 child_nodes = [] 180 node = libxml2mod.children(node) 181 while node is not None: 182 # Remove doctypes. 183 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 184 child_nodes.append(node) 185 node = libxml2mod.next(node) 186 return child_nodes 187 188 def Node_attributes(node): 189 attributes = {} 190 191 # Include normal attributes. 192 193 current = libxml2mod.properties(node) 194 while current is not None: 195 ns = libxml2mod.xmlNodeGetNs(current) 196 if ns is not None: 197 attributes[(get_ns(ns), libxml2mod.name(current))] = current 198 else: 199 attributes[(None, libxml2mod.name(current))] = current 200 current = libxml2mod.next(current) 201 202 # Include xmlns attributes. 203 204 #current = libxml2mod.xmlNodeGetNsDefs(node) 205 #while current is not None: 206 # ns = get_ns(current) 207 # prefix = libxml2mod.name(current) 208 # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. 209 # current = libxml2mod.next(current) 210 211 return attributes 212 213 def Node_namespaceURI(node): 214 ns = libxml2mod.xmlNodeGetNs(node) 215 if ns is not None: 216 return get_ns(ns) 217 else: 218 return None 219 220 def Node_nodeValue(node): 221 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 222 223 # NOTE: This is not properly exposed in the libxml2macro interface as the 224 # NOTE: writable form of nodeValue. 225 226 def Node_setNodeValue(node, value): 227 # NOTE: Cannot set attribute node values. 228 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 229 230 # NOTE: Verify this. The data attribute should only really exist for text, 231 # NOTE: character data, processing instructions and comments. 232 233 Node_data = Node_nodeValue 234 235 Node_textContent = Node_nodeValue 236 237 def Node_prefix(node): 238 ns = libxml2mod.xmlNodeGetNs(node) 239 if ns is not None: 240 return to_unicode(libxml2mod.name(ns)) 241 else: 242 return None 243 244 def Node_nodeName(node): 245 prefix = Node_prefix(node) 246 if prefix is not None: 247 return prefix + ":" + Node_localName(node) 248 else: 249 return Node_localName(node) 250 251 def Node_tagName(node): 252 if libxml2mod.type(node) == "element": 253 return Node_nodeName(node) 254 else: 255 return None 256 257 def Node_localName(node): 258 return to_unicode(libxml2mod.name(node)) 259 260 def Node_parentNode(node): 261 if node is None or libxml2mod.type(node) == "document_xml": 262 return None 263 else: 264 return libxml2mod.parent(node) 265 266 def Node_previousSibling(node): 267 if node is not None and libxml2mod.prev(node) is not None: 268 return libxml2mod.prev(node) 269 else: 270 return None 271 272 def Node_nextSibling(node): 273 if node is not None and libxml2mod.next(node) is not None: 274 return libxml2mod.next(node) 275 else: 276 return None 277 278 def Node_doctype(node): 279 return libxml2mod.xmlGetIntSubset(node) 280 281 def Node_hasAttributeNS(node, ns, localName): 282 return Node_getAttributeNS(node, ns, localName) is not None or \ 283 _find_namespace(node, ns, localName) is not None 284 285 def Node_hasAttribute(node, name): 286 return Node_getAttribute(node, name) is not None 287 288 def Node_getAttributeNS(node, ns, localName): 289 if ns == xml.dom.XMLNS_NAMESPACE: 290 ns_def = _find_namespace_for_prefix(node, localName) 291 if ns_def is not None: 292 return get_ns(ns_def) 293 else: 294 return None 295 else: 296 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 297 298 def Node_getAttribute(node, name): 299 return to_unicode(libxml2mod.xmlGetProp(node, name)) 300 301 def Node_getAttributeNodeNS(node, ns, localName): 302 # NOTE: Needs verifying. 303 return Node_attributes(node)[(ns, localName)] 304 305 def Node_getAttributeNode(node, name): 306 # NOTE: Needs verifying. 307 return Node_attributes(node)[(None, name)] 308 309 def Node_setAttributeNS(node, ns, name, value): 310 ns, name, value = map(from_unicode, [ns, name, value]) 311 prefix, localName = _get_prefix_and_localName(name) 312 313 # Detect setting of xmlns:localName=value, looking for cases where 314 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 315 # with prefix=x, ns=y). 316 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 317 if _find_namespace(node, value, localName): 318 return 319 new_ns = _make_namespace(node, value, localName, set_default=0) 320 # For non-xmlns attributes, we find or make a namespace declaration and then 321 # set an attribute. 322 elif ns is not None: 323 # Look for a suitable namespace. 324 new_ns = _find_namespace(node, ns, prefix) 325 # Create a declaration if no suitable one was found. 326 if new_ns is None: 327 # Invent a prefix for unprefixed attributes with namespaces. 328 if prefix is None: 329 prefix = _get_invented_prefix(node, ns) 330 new_ns = _make_namespace(node, ns, prefix, set_default=0) 331 # Remove any conflicting attribute. 332 if Node_hasAttributeNS(node, ns, localName): 333 Node_removeAttributeNS(node, ns, localName) 334 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 335 else: 336 # NOTE: Needs verifying: what should happen to the namespace? 337 # NOTE: This also catches the case where None is the element's 338 # NOTE: namespace and is also used for the attribute. 339 libxml2mod.xmlSetNsProp(node, None, localName, value) 340 341 def Node_setAttribute(node, name, value): 342 name, value = map(from_unicode, [name, value]) 343 344 libxml2mod.xmlSetProp(node, name, value) 345 346 def Node_setAttributeNodeNS(node, attr): 347 # NOTE: Not actually putting the node on the element. 348 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 349 350 def Node_setAttributeNode(node, attr): 351 # NOTE: Not actually putting the node on the element. 352 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 353 354 def Node_removeAttributeNS(node, ns, localName): 355 attr = Node_getAttributeNodeNS(node, ns, localName) 356 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 357 358 def Node_removeAttribute(node, name): 359 name = from_unicode(name) 360 libxml2mod.xmlUnsetProp(node, name) 361 362 def Node_createElementNS(node, ns, name): 363 ns, name = map(from_unicode, [ns, name]) 364 365 prefix, localName = _get_prefix_and_localName(name) 366 new_node = libxml2mod.xmlNewNode(localName) 367 368 # If the namespace is not empty, set the declaration. 369 if ns is not None: 370 new_ns = _find_namespace(new_node, ns, prefix) 371 if new_ns is None: 372 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 373 libxml2mod.xmlSetNs(new_node, new_ns) 374 # If the namespace is empty, set a "null" declaration. 375 elif prefix is not None: 376 new_ns = _find_namespace(new_node, "", prefix) 377 if new_ns is None: 378 new_ns = _make_namespace(new_node, "", prefix) 379 libxml2mod.xmlSetNs(new_node, new_ns) 380 else: 381 libxml2mod.xmlSetNs(new_node, None) 382 Node_setAttribute(new_node, "xmlns", "") 383 return new_node 384 385 def Node_createElement(node, name): 386 name = from_unicode(name) 387 388 new_node = libxml2mod.xmlNewNode(name) 389 return new_node 390 391 def Node_createAttributeNS(node, ns, name): 392 ns, name = map(from_unicode, [ns, name]) 393 394 prefix, localName = _get_prefix_and_localName(name) 395 # NOTE: Does it make sense to set the namespace if it is empty? 396 if ns is not None: 397 new_ns = _find_namespace(node, ns, prefix) 398 if new_ns is None: 399 new_ns = _make_namespace(node, ns, prefix, set_default=0) 400 else: 401 new_ns = None 402 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 403 return new_node 404 405 def Node_createAttribute(node, name): 406 name = from_unicode(name) 407 408 # NOTE: xmlNewProp does not seem to work. 409 return Node_createAttributeNS(node, None, name) 410 411 def Node_createTextNode(node, value): 412 value = from_unicode(value) 413 414 return libxml2mod.xmlNewText(value) 415 416 def Node_createComment(node, value): 417 value = from_unicode(value) 418 419 return libxml2mod.xmlNewComment(value) 420 421 def Node_createCDATASection(node, value): 422 value = from_unicode(value) 423 424 return libxml2mod.xmlNewCDataBlock(Node_ownerDocument(node), value, len(value)) 425 426 def Node_insertBefore(node, tmp, oldNode): 427 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 428 429 def Node_replaceChild(node, tmp, oldNode): 430 return libxml2mod.xmlReplaceNode(oldNode, tmp) 431 432 def Node_appendChild(node, tmp): 433 return libxml2mod.xmlAddChild(node, tmp) 434 435 def Node_removeChild(node, child): 436 libxml2mod.xmlUnlinkNode(child) 437 438 def Node_importNode(node, other, deep): 439 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 440 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 441 for attr in Node_attributes(other).values(): 442 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 443 444 if deep: 445 for child in Node_childNodes(other): 446 imported_child = Node_importNode(node, child, deep) 447 if imported_child: 448 Node_appendChild(imported_element, imported_child) 449 450 return imported_element 451 452 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 453 return Node_createTextNode(node, Node_nodeValue(other)) 454 455 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 456 return Node_createComment(node, Node_data(other)) 457 458 elif Node_nodeType(other) == xml.dom.Node.CDATA_SECTION_NODE: 459 return Node_createCDATASection(node, Node_data(other)) 460 461 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) 462 463 def Node_importNode_DOM(node, other, deep): 464 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 465 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 466 for attr in other.attributes.values(): 467 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 468 469 if deep: 470 for child in other.childNodes: 471 imported_child = Node_importNode_DOM(node, child, deep) 472 if imported_child: 473 Node_appendChild(imported_element, imported_child) 474 475 return imported_element 476 477 elif other.nodeType == xml.dom.Node.TEXT_NODE: 478 return Node_createTextNode(node, other.nodeValue) 479 480 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 481 return Node_createComment(node, other.data) 482 483 elif other.nodeType == xml.dom.Node.CDATA_SECTION_NODE: 484 return Node_createCDATASection(node, other.data) 485 486 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType)) 487 488 def Node_xpath(node, expr, variables=None, namespaces=None): 489 expr = from_unicode(expr) 490 491 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) 492 libxml2mod.xmlXPathSetContextNode(context, node) 493 # NOTE: Discover namespaces from the node. 494 # NOTE: Work out how to specify paths without having to use prefixes on 495 # NOTE: names all the time. 496 for prefix, ns in (namespaces or {}).items(): 497 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 498 # NOTE: No such functions are exposed in current versions of libxml2. 499 #for (prefix, ns), value in (variables or {}).items(): 500 # value = from_unicode(value) 501 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 502 result = libxml2mod.xmlXPathEval(expr, context) 503 libxml2mod.xmlXPathFreeContext(context) 504 return result 505 506 # Exceptions. 507 508 class LSException(Exception): 509 510 "DOM Level 3 Load/Save exception." 511 512 PARSE_ERR = 81 513 SERIALIZE_ERR = 82 514 515 def __repr__(self): 516 return str(self) 517 518 def __str__(self): 519 exctype = self.args[0] 520 if exctype == self.PARSE_ERR: 521 return "Parse error: LSException(%d)" % exctype 522 elif exctype == self.SERIALIZE_ERR: 523 return "Serialize error: LSException(%d)" % exctype 524 else: 525 return Exception.__repr__(self) 526 527 # Utility functions. 528 529 def createDocument(namespaceURI, localName, doctype): 530 # NOTE: Fixed to use version 1.0 only. 531 d = libxml2mod.xmlNewDoc("1.0") 532 if localName is not None: 533 # NOTE: Verify that this is always what should occur. 534 root = Node_createElementNS(d, namespaceURI, localName) 535 Node_appendChild(d, root) 536 if doctype is not None: 537 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 538 return d 539 540 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0): 541 if hasattr(stream_or_string, "read"): 542 stream = stream_or_string 543 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished) 544 else: 545 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished) 546 547 def parseFile(s, html=0, htmlencoding=None, unfinished=0): 548 # NOTE: Switching off validation and remote DTD resolution. 549 if not html: 550 context = libxml2mod.xmlCreateFileParserCtxt(s) 551 Parser_configure(context) 552 Parser_parse(context) 553 doc = Parser_document(context) 554 if unfinished or Parser_well_formed(context): 555 return doc 556 else: 557 raise LSException(LSException.PARSE_ERR) 558 else: 559 return libxml2mod.htmlReadFile(s, htmlencoding, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 560 561 def parseString(s, html=0, htmlencoding=None, unfinished=0): 562 # NOTE: Switching off validation and remote DTD resolution. 563 if not html: 564 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 565 Parser_configure(context) 566 Parser_parse(context) 567 doc = Parser_document(context) 568 if unfinished or Parser_well_formed(context): 569 return doc 570 else: 571 raise LSException(LSException.PARSE_ERR) 572 else: 573 # NOTE: URL given as None. 574 html_url = None 575 return libxml2mod.htmlReadMemory(s, len(s), html_url, htmlencoding, 576 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 577 578 def parseURI(uri, html=0, htmlencoding=None, unfinished=0): 579 # NOTE: Switching off validation and remote DTD resolution. 580 if not html: 581 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 582 Parser_configure(context) 583 Parser_parse(context) 584 doc = Parser_document(context) 585 if unfinished or Parser_well_formed(context): 586 return doc 587 else: 588 raise LSException(LSException.PARSE_ERR) 589 else: 590 raise NotImplementedError, "parseURI does not yet support HTML" 591 592 def toString(node, encoding=None, prettyprint=0): 593 return libxml2mod.serializeNode(node, encoding, prettyprint) 594 595 def toStream(node, stream, encoding=None, prettyprint=0): 596 stream.write(toString(node, encoding, prettyprint)) 597 598 def toFile(node, f, encoding=None, prettyprint=0): 599 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 600 601 # libxml2mod constants and helper functions. 602 603 HTML_PARSE_NOERROR = 32 604 HTML_PARSE_NOWARNING = 64 605 HTML_PARSE_NONET = 2048 606 XML_PARSE_NOERROR = 32 607 XML_PARSE_NOWARNING = 64 608 XML_PARSE_NONET = 2048 609 610 def Parser_push(): 611 return libxml2mod.xmlCreatePushParser(None, "", 0, None) 612 613 def Parser_configure(context): 614 libxml2mod.xmlParserSetPedantic(context, 0) 615 libxml2mod.xmlParserSetValidate(context, 0) 616 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 617 618 def Parser_feed(context, s): 619 libxml2mod.xmlParseChunk(context, s, len(s), 1) 620 621 def Parser_well_formed(context): 622 return libxml2mod.xmlParserGetWellFormed(context) 623 624 def Parser_document(context): 625 return libxml2mod.xmlParserGetDoc(context) 626 627 def Parser_parse(context): 628 libxml2mod.xmlParseDocument(context) 629 630 # vim: tabstop=4 expandtab shiftwidth=4