1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 import xml.dom 23 from libxml2dom.errors import DOMError 24 25 # Try the conventional import first. 26 27 try: 28 import libxml2mod 29 except ImportError: 30 from libxmlmods import libxml2mod 31 32 # Global reconfiguration. This enables prettyprinting. 33 34 libxml2mod.xmlKeepBlanksDefault(0) 35 36 # NOTE: libxml2 seems to use UTF-8 throughout. 37 # NOTE: Implement: http://www.w3.org/TR/2006/REC-xml-20060816/#AVNormalize 38 39 def from_unicode(s): 40 if isinstance(s, unicode): 41 return s.encode("utf-8") 42 else: 43 # The string might contain non-ASCII characters, thus upsetting libxml2 44 # as it encounters a non-UTF-8 string. 45 try: 46 unicode(s) 47 except UnicodeError: 48 raise TypeError, "Please use Unicode for non-ASCII data." 49 return s 50 51 def to_unicode(s): 52 if isinstance(s, str): 53 return unicode(s, encoding="utf-8") 54 else: 55 return s 56 57 def get_ns(ns): 58 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 59 # Detect "" and produce None as the empty namespace. 60 if out_ns: 61 return out_ns 62 else: 63 return None 64 65 def _get_prefix_and_localName(name): 66 t = name.split(":") 67 if len(t) == 1: 68 return None, name 69 elif len(t) == 2: 70 return t 71 else: 72 # NOTE: Should raise an exception. 73 return None, None 74 75 def _find_namespace_for_prefix(node, prefix): 76 77 "Find the namespace definition node in the given 'node' for 'prefix'." 78 79 current = libxml2mod.xmlNodeGetNsDefs(node) 80 while current is not None: 81 if libxml2mod.name(current) == prefix: 82 return current 83 current = libxml2mod.next(current) 84 return None 85 86 def _find_namespace(node, ns, prefix): 87 88 """ 89 Find the namespace definition node in the given 'node' for the given 'ns' 90 and 'prefix'. 91 """ 92 93 # Special treatment for XML namespace. 94 95 if prefix == "xml" and ns == xml.dom.XML_NAMESPACE: 96 return libxml2mod.xmlSearchNsByHref(Node_ownerDocument(node), node, xml.dom.XML_NAMESPACE) 97 98 new_ns = None 99 current = libxml2mod.xmlNodeGetNsDefs(node) 100 while current is not None: 101 if _check_namespace(current, ns, prefix): 102 new_ns = current 103 break 104 current = libxml2mod.next(current) 105 if new_ns is None: 106 node_ns = libxml2mod.xmlNodeGetNs(node) 107 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 108 new_ns = node_ns 109 return new_ns 110 111 def _check_namespace(current, ns, prefix): 112 113 "Check the 'current' namespace definition node against 'ns' and 'prefix'." 114 115 current_ns = get_ns(current) 116 current_prefix = libxml2mod.name(current) 117 if ns == current_ns and (prefix is None or prefix == current_prefix): 118 return 1 119 else: 120 return 0 121 122 def _make_namespace(node, ns, prefix, set_default=0): 123 124 """ 125 Make a new namespace definition node within the given 'node' for 'ns', 126 'prefix', setting the default namespace on 'node' when 'prefix' is None and 127 'set_default' is set to a true value (unlike the default value for that 128 parameter). 129 """ 130 131 if prefix is not None or set_default: 132 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 133 else: 134 new_ns = None 135 return new_ns 136 137 def _get_invented_prefix(node, ns): 138 current = libxml2mod.xmlNodeGetNsDefs(node) 139 prefixes = [] 140 while current is not None: 141 current_prefix = libxml2mod.name(current) 142 prefixes.append(current_prefix) 143 current = libxml2mod.next(current) 144 i = 0 145 while 1: 146 prefix = "NS%d" % i 147 if prefix not in prefixes: 148 return prefix 149 i += 1 150 151 _nodeTypes = { 152 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 153 "cdata" : xml.dom.Node.CDATA_SECTION_NODE, 154 "comment" : xml.dom.Node.COMMENT_NODE, 155 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 156 "document_html" : xml.dom.Node.DOCUMENT_NODE, 157 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 158 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 159 "element" : xml.dom.Node.ELEMENT_NODE, 160 "entity" : xml.dom.Node.ENTITY_NODE, 161 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 162 "notation" : xml.dom.Node.NOTATION_NODE, 163 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 164 "text" : xml.dom.Node.TEXT_NODE 165 } 166 167 _reverseNodeTypes = {} 168 for label, value in _nodeTypes.items(): 169 _reverseNodeTypes[value] = label 170 171 def Node_equals(node, other): 172 return libxml2mod.xmlXPathCmpNodes(node, other) == 0 173 174 def Node_ownerDocument(node): 175 return libxml2mod.doc(node) 176 177 def Node_nodeType(node): 178 return _nodeTypes[libxml2mod.type(node)] 179 180 def Node_childNodes(node): 181 182 # NOTE: Consider a generator instead. 183 184 child_nodes = [] 185 node = libxml2mod.children(node) 186 while node is not None: 187 # Remove doctypes. 188 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 189 child_nodes.append(node) 190 node = libxml2mod.next(node) 191 return child_nodes 192 193 def Node_attributes(node): 194 attributes = {} 195 196 # Include normal attributes. 197 198 current = libxml2mod.properties(node) 199 while current is not None: 200 ns = libxml2mod.xmlNodeGetNs(current) 201 if ns is not None: 202 attributes[(get_ns(ns), libxml2mod.name(current))] = current 203 else: 204 attributes[(None, libxml2mod.name(current))] = current 205 current = libxml2mod.next(current) 206 207 # Include xmlns attributes. 208 209 #current = libxml2mod.xmlNodeGetNsDefs(node) 210 #while current is not None: 211 # ns = get_ns(current) 212 # prefix = libxml2mod.name(current) 213 # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. 214 # current = libxml2mod.next(current) 215 216 return attributes 217 218 def Node_namespaceURI(node): 219 ns = libxml2mod.xmlNodeGetNs(node) 220 if ns is not None: 221 return get_ns(ns) 222 else: 223 return None 224 225 def Node_nodeValue(node): 226 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 227 228 # NOTE: This is not properly exposed in the libxml2macro interface as the 229 # NOTE: writable form of nodeValue. 230 231 def Node_setNodeValue(node, value): 232 # NOTE: Cannot set attribute node values. 233 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 234 235 # NOTE: Verify this. The data attribute should only really exist for text, 236 # NOTE: character data, processing instructions and comments. 237 238 Node_data = Node_nodeValue 239 240 Node_textContent = Node_nodeValue 241 242 def Node_prefix(node): 243 ns = libxml2mod.xmlNodeGetNs(node) 244 if ns is not None: 245 return to_unicode(libxml2mod.name(ns)) 246 else: 247 return None 248 249 def Node_nodeName(node): 250 prefix = Node_prefix(node) 251 if prefix is not None: 252 return prefix + ":" + Node_localName(node) 253 else: 254 return Node_localName(node) 255 256 def Node_tagName(node): 257 if libxml2mod.type(node) == "element": 258 return Node_nodeName(node) 259 else: 260 return None 261 262 def Node_localName(node): 263 return to_unicode(libxml2mod.name(node)) 264 265 def Node_parentNode(node): 266 if node is None or libxml2mod.type(node) == "document_xml": 267 return None 268 else: 269 return libxml2mod.parent(node) 270 271 def Node_previousSibling(node): 272 if node is not None and libxml2mod.prev(node) is not None: 273 return libxml2mod.prev(node) 274 else: 275 return None 276 277 def Node_nextSibling(node): 278 if node is not None and libxml2mod.next(node) is not None: 279 return libxml2mod.next(node) 280 else: 281 return None 282 283 def Node_doctype(node): 284 return libxml2mod.xmlGetIntSubset(node) 285 286 def Node_hasAttributeNS(node, ns, localName): 287 return Node_getAttributeNS(node, ns, localName) is not None or \ 288 _find_namespace(node, ns, localName) is not None 289 290 def Node_hasAttribute(node, name): 291 return Node_getAttribute(node, name) is not None 292 293 def Node_getAttributeNS(node, ns, localName): 294 if ns == xml.dom.XMLNS_NAMESPACE: 295 ns_def = _find_namespace_for_prefix(node, localName) 296 if ns_def is not None: 297 return get_ns(ns_def) 298 else: 299 return None 300 else: 301 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 302 303 def Node_getAttribute(node, name): 304 return to_unicode(libxml2mod.xmlGetProp(node, name)) 305 306 def Node_getAttributeNodeNS(node, ns, localName): 307 # NOTE: Needs verifying. 308 return Node_attributes(node)[(ns, localName)] 309 310 def Node_getAttributeNode(node, name): 311 # NOTE: Needs verifying. 312 return Node_attributes(node)[(None, name)] 313 314 def Node_setAttributeNS(node, ns, name, value): 315 ns, name, value = map(from_unicode, [ns, name, value]) 316 prefix, localName = _get_prefix_and_localName(name) 317 318 # Detect setting of xmlns:localName=value, looking for cases where 319 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 320 # with prefix=x, ns=y). 321 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 322 if _find_namespace(node, value, localName): 323 return 324 new_ns = _make_namespace(node, value, localName, set_default=0) 325 # For non-xmlns attributes, we find or make a namespace declaration and then 326 # set an attribute. 327 elif ns is not None: 328 # Look for a suitable namespace. 329 new_ns = _find_namespace(node, ns, prefix) 330 # Create a declaration if no suitable one was found. 331 if new_ns is None: 332 # Invent a prefix for unprefixed attributes with namespaces. 333 if prefix is None: 334 prefix = _get_invented_prefix(node, ns) 335 new_ns = _make_namespace(node, ns, prefix, set_default=0) 336 # Remove any conflicting attribute. 337 if Node_hasAttributeNS(node, ns, localName): 338 Node_removeAttributeNS(node, ns, localName) 339 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 340 else: 341 # NOTE: Needs verifying: what should happen to the namespace? 342 # NOTE: This also catches the case where None is the element's 343 # NOTE: namespace and is also used for the attribute. 344 libxml2mod.xmlSetNsProp(node, None, localName, value) 345 346 def Node_setAttribute(node, name, value): 347 name, value = map(from_unicode, [name, value]) 348 349 libxml2mod.xmlSetProp(node, name, value) 350 351 def Node_setAttributeNodeNS(node, attr): 352 # NOTE: Not actually putting the node on the element. 353 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 354 355 def Node_setAttributeNode(node, attr): 356 # NOTE: Not actually putting the node on the element. 357 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 358 359 def Node_removeAttributeNS(node, ns, localName): 360 attr = Node_getAttributeNodeNS(node, ns, localName) 361 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 362 363 def Node_removeAttribute(node, name): 364 name = from_unicode(name) 365 libxml2mod.xmlUnsetProp(node, name) 366 367 def Node_createElementNS(node, ns, name): 368 ns, name = map(from_unicode, [ns, name]) 369 370 prefix, localName = _get_prefix_and_localName(name) 371 new_node = libxml2mod.xmlNewNode(localName) 372 373 # If the namespace is not empty, set the declaration. 374 if ns is not None: 375 new_ns = _find_namespace(new_node, ns, prefix) 376 if new_ns is None: 377 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 378 libxml2mod.xmlSetNs(new_node, new_ns) 379 # If the namespace is empty, set a "null" declaration. 380 elif prefix is not None: 381 new_ns = _find_namespace(new_node, "", prefix) 382 if new_ns is None: 383 new_ns = _make_namespace(new_node, "", prefix) 384 libxml2mod.xmlSetNs(new_node, new_ns) 385 else: 386 libxml2mod.xmlSetNs(new_node, None) 387 Node_setAttribute(new_node, "xmlns", "") 388 return new_node 389 390 def Node_createElement(node, name): 391 name = from_unicode(name) 392 393 new_node = libxml2mod.xmlNewNode(name) 394 return new_node 395 396 def Node_createAttributeNS(node, ns, name): 397 ns, name = map(from_unicode, [ns, name]) 398 399 prefix, localName = _get_prefix_and_localName(name) 400 # NOTE: Does it make sense to set the namespace if it is empty? 401 if ns is not None: 402 new_ns = _find_namespace(node, ns, prefix) 403 if new_ns is None: 404 new_ns = _make_namespace(node, ns, prefix, set_default=0) 405 else: 406 new_ns = None 407 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 408 return new_node 409 410 def Node_createAttribute(node, name): 411 name = from_unicode(name) 412 413 # NOTE: xmlNewProp does not seem to work. 414 return Node_createAttributeNS(node, None, name) 415 416 def Node_createTextNode(node, value): 417 value = from_unicode(value) 418 419 return libxml2mod.xmlNewText(value) 420 421 def Node_createComment(node, value): 422 value = from_unicode(value) 423 424 return libxml2mod.xmlNewComment(value) 425 426 def Node_createCDATASection(node, value): 427 value = from_unicode(value) 428 429 return libxml2mod.xmlNewCDataBlock(Node_ownerDocument(node), value, len(value)) 430 431 def Node_insertBefore(node, tmp, oldNode): 432 433 # Work around libxml2 tendency to merge text nodes and free nodes silently. 434 435 if libxml2mod.type(tmp) == "text": 436 placeholder = libxml2mod.xmlNewNode("tmp") 437 placeholder = libxml2mod.xmlAddPrevSibling(oldNode, placeholder) 438 libxml2mod.xmlReplaceNode(placeholder, tmp) 439 return tmp 440 else: 441 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 442 443 def Node_replaceChild(node, tmp, oldNode): 444 return libxml2mod.xmlReplaceNode(oldNode, tmp) 445 446 def Node_appendChild(node, tmp): 447 448 # Work around libxml2 tendency to merge text nodes and free nodes silently. 449 450 if libxml2mod.type(tmp) == "text": 451 placeholder = libxml2mod.xmlNewNode("tmp") 452 placeholder = libxml2mod.xmlAddChild(node, placeholder) 453 libxml2mod.xmlReplaceNode(placeholder, tmp) 454 return tmp 455 else: 456 return libxml2mod.xmlAddChild(node, tmp) 457 458 def Node_removeChild(node, child): 459 libxml2mod.xmlUnlinkNode(child) 460 461 def Node_importNode(node, other, deep): 462 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 463 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 464 for attr in Node_attributes(other).values(): 465 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 466 467 if deep: 468 for child in Node_childNodes(other): 469 imported_child = Node_importNode(node, child, deep) 470 if imported_child: 471 Node_appendChild(imported_element, imported_child) 472 473 return imported_element 474 475 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 476 return Node_createTextNode(node, Node_nodeValue(other)) 477 478 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 479 return Node_createComment(node, Node_data(other)) 480 481 elif Node_nodeType(other) == xml.dom.Node.CDATA_SECTION_NODE: 482 return Node_createCDATASection(node, Node_data(other)) 483 484 raise xml.dom.NotSupportedErr("Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) 485 486 def Node_importNode_DOM(node, other, deep): 487 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 488 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 489 for attr in other.attributes.values(): 490 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 491 492 if deep: 493 for child in other.childNodes: 494 imported_child = Node_importNode_DOM(node, child, deep) 495 if imported_child: 496 Node_appendChild(imported_element, imported_child) 497 498 return imported_element 499 500 elif other.nodeType == xml.dom.Node.TEXT_NODE: 501 return Node_createTextNode(node, other.nodeValue) 502 503 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 504 return Node_createComment(node, other.data) 505 506 elif other.nodeType == xml.dom.Node.CDATA_SECTION_NODE: 507 return Node_createCDATASection(node, other.data) 508 509 raise xml.dom.NotSupportedErr( 510 "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType) 511 ) 512 513 def Node_getElementById(doc, identifier): 514 node = libxml2mod.xmlGetID(doc, identifier) 515 if node is None: 516 return None 517 else: 518 return Node_parentNode(node) 519 520 def Node_xpath(node, expr, variables=None, namespaces=None): 521 expr = from_unicode(expr) 522 523 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) 524 libxml2mod.xmlXPathSetContextNode(context, node) 525 # NOTE: Discover namespaces from the node. 526 # NOTE: Work out how to specify paths without having to use prefixes on 527 # NOTE: names all the time. 528 for prefix, ns in (namespaces or {}).items(): 529 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 530 # NOTE: No such functions are exposed in current versions of libxml2. 531 #for (prefix, ns), value in (variables or {}).items(): 532 # value = from_unicode(value) 533 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 534 result = libxml2mod.xmlXPathEval(expr, context) 535 libxml2mod.xmlXPathFreeContext(context) 536 return result 537 538 def Node_xinclude(node): 539 result = libxml2mod.xmlXIncludeProcessFlags(node, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 540 if result == -1: 541 raise XIncludeException() 542 else: 543 return result 544 545 # Exceptions. 546 547 class LSException(Exception): 548 549 "DOM Level 3 Load/Save exception." 550 551 PARSE_ERR = 81 552 SERIALIZE_ERR = 82 553 554 def __repr__(self): 555 exctype, excdata = self.args[0:2] 556 return "LSException(%d, %r)" % (exctype, excdata) 557 558 def __str__(self): 559 exctype, excdata = self.args[0:2] 560 if exctype == self.PARSE_ERR: 561 return "Parse error: %s" % excdata 562 elif exctype == self.SERIALIZE_ERR: 563 return "Serialize error: %s" % excdata 564 else: 565 return repr(self) 566 567 class XIncludeException(Exception): 568 569 "Unstandardised XInclude exception." 570 571 pass 572 573 # Utility functions. 574 575 def createDocument(namespaceURI, localName, doctype): 576 # NOTE: Fixed to use version 1.0 only. 577 d = libxml2mod.xmlNewDoc("1.0") 578 if localName is not None: 579 # NOTE: Verify that this is always what should occur. 580 root = Node_createElementNS(d, namespaceURI, localName) 581 Node_appendChild(d, root) 582 if doctype is not None: 583 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 584 return d 585 586 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0): 587 if hasattr(stream_or_string, "read"): 588 stream = stream_or_string 589 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished) 590 else: 591 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished) 592 593 def parseFile(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 594 if not html: 595 context = libxml2mod.xmlCreateFileParserCtxt(s) 596 return _parseXML(context, unfinished, validate, remote) 597 else: 598 d = libxml2mod.htmlReadFile(s, htmlencoding, 599 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | html_net_flag(remote)) 600 if d is None: 601 raise LSException(LSException.PARSE_ERR, DOMError(DOMError.SEVERITY_FATAL_ERROR, get_parse_error_message())) 602 return d 603 604 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 605 if not html: 606 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 607 return _parseXML(context, unfinished, validate, remote) 608 else: 609 # NOTE: URL given as None. 610 html_url = None 611 d = libxml2mod.htmlReadMemory(s, len(s), html_url, htmlencoding, 612 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | html_net_flag(remote)) 613 if d is None: 614 raise LSException(LSException.PARSE_ERR, DOMError(DOMError.SEVERITY_FATAL_ERROR, get_parse_error_message())) 615 return d 616 617 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 618 if not html: 619 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 620 return _parseXML(context, unfinished, validate, remote) 621 else: 622 raise NotImplementedError, "parseURI does not yet support HTML" 623 624 def _parseXML(context, unfinished, validate, remote): 625 if context is None: 626 raise LSException(LSException.PARSE_ERR, DOMError(DOMError.SEVERITY_FATAL_ERROR)) 627 628 Parser_configure(context, validate, remote) 629 Parser_parse(context) 630 doc = Parser_document(context) 631 error = Parser_error() 632 633 try: 634 if validate and not Parser_valid(context): 635 636 # NOTE: May not be the correct exception. 637 638 raise LSException( 639 LSException.PARSE_ERR, 640 DOMError( 641 DOMError.SEVERITY_FATAL_ERROR, 642 get_parse_error_message() or "Document did not validate" 643 )) 644 645 elif unfinished and (error is None or Parser_errorCode(error) == XML_ERR_TAG_NOT_FINISHED): 646 647 # NOTE: There may be other unfinished conditions. 648 649 return doc 650 651 elif error is not None and Parser_errorLevel(error) == XML_ERR_FATAL: 652 raise LSException( 653 LSException.PARSE_ERR, 654 DOMError( 655 DOMError.SEVERITY_FATAL_ERROR, 656 get_parse_error_message() or "Document caused fatal error" 657 )) 658 659 else: 660 661 # NOTE: Could provide non-fatal errors or warnings. 662 663 return doc 664 665 finally: 666 Parser_resetError(error) 667 libxml2mod.xmlFreeParserCtxt(context) 668 669 def toString(node, encoding=None, prettyprint=0): 670 return libxml2mod.serializeNode(node, encoding, prettyprint) 671 672 def toStream(node, stream, encoding=None, prettyprint=0): 673 stream.write(toString(node, encoding, prettyprint)) 674 675 def toFile(node, f, encoding=None, prettyprint=0): 676 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 677 678 # libxml2mod constants and helper functions. 679 680 HTML_PARSE_NOERROR = 32 681 HTML_PARSE_NOWARNING = 64 682 HTML_PARSE_NONET = 2048 683 XML_PARSE_DTDVALID = 16 684 XML_PARSE_NOERROR = 32 685 XML_PARSE_NOWARNING = 64 686 XML_PARSE_NONET = 2048 687 688 XML_ERR_NONE = 0 689 XML_ERR_WARNING = 1 690 XML_ERR_ERROR = 2 691 XML_ERR_FATAL = 3 692 693 XML_ERR_TAG_NOT_FINISHED = 77 694 695 def html_net_flag(remote): 696 if remote: 697 return 0 698 else: 699 return HTML_PARSE_NONET 700 701 def xml_net_flag(remote): 702 if remote: 703 return 0 704 else: 705 return XML_PARSE_NONET 706 707 def xml_validate_flag(validate): 708 if validate: 709 return XML_PARSE_DTDVALID 710 else: 711 return 0 712 713 def get_parse_error_message(): 714 error = Parser_error() 715 if error is not None: 716 filename = libxml2mod.xmlErrorGetFile(error) 717 if filename is None: 718 filename = "<string>" 719 else: 720 filename = repr(filename) 721 line = libxml2mod.xmlErrorGetLine(error) 722 error_message = libxml2mod.xmlErrorGetMessage(error).strip() 723 return "Filename %s, line %d: %s" % (filename, line, error_message) 724 else: 725 return None 726 727 def Parser_error(): 728 return libxml2mod.xmlGetLastError() 729 730 def Parser_resetError(error): 731 if error is None: 732 return libxml2mod.xmlResetLastError() 733 else: 734 return libxml2mod.xmlResetError(error) 735 736 def Parser_errorLevel(error): 737 return libxml2mod.xmlErrorGetLevel(error) 738 739 def Parser_errorCode(error): 740 return libxml2mod.xmlErrorGetCode(error) 741 742 def Parser_push(): 743 return libxml2mod.xmlCreatePushParser(None, "", 0, None) 744 745 def Parser_configure(context, validate=0, remote=0): 746 libxml2mod.xmlParserSetPedantic(context, 0) 747 #libxml2mod.xmlParserSetValidate(context, validate) 748 libxml2mod.xmlCtxtUseOptions(context, 749 XML_PARSE_NOERROR | XML_PARSE_NOWARNING | xml_net_flag(remote) | xml_validate_flag(validate)) 750 751 def Parser_feed(context, s): 752 libxml2mod.xmlParseChunk(context, s, len(s), 1) 753 754 def Parser_well_formed(context): 755 return libxml2mod.xmlParserGetWellFormed(context) 756 757 def Parser_valid(context): 758 return libxml2mod.xmlParserGetIsValid(context) 759 760 def Parser_document(context): 761 return libxml2mod.xmlParserGetDoc(context) 762 763 def Parser_parse(context): 764 libxml2mod.xmlParseDocument(context) 765 766 # Schema and validation helper functions and classes. 767 # NOTE: Should potentially combine these with other definitions. 768 769 RELAXNG_NS = "http://relaxng.org/ns/structure/1.0" 770 SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron" 771 XMLSCHEMA_NS = "http://www.w3.org/2001/XMLSchema" 772 773 def Document_schema(doc, namespaceURI): 774 if namespaceURI == RELAXNG_NS: 775 return Schema_parseRelaxNG(libxml2mod.xmlRelaxNGNewDocParserCtxt(doc)) 776 elif namespaceURI == SCHEMATRON_NS: 777 return Schema_parseSchematron(libxml2mod.xmlSchematronNewDocParserCtxt(doc)) 778 elif namespaceURI == XMLSCHEMA_NS: 779 return Schema_parseSchema(libxml2mod.xmlSchemaNewDocParserCtxt(doc)) 780 else: 781 return None 782 783 def Document_schemaFromString(s, namespaceURI): 784 if namespaceURI == RELAXNG_NS: 785 return Schema_parseRelaxNG(libxml2mod.xmlRelaxNGNewMemParserCtxt(s, len(s))) 786 elif namespaceURI == SCHEMATRON_NS: 787 return Schema_parseSchematron(libxml2mod.xmlSchematronNewMemParserCtxt(s, len(s))) 788 elif namespaceURI == XMLSCHEMA_NS: 789 return Schema_parseSchema(libxml2mod.xmlSchemaNewMemParserCtxt(s, len(s))) 790 else: 791 return None 792 793 def Document_validate(schema, doc, error_handler, namespaceURI): 794 if namespaceURI == RELAXNG_NS: 795 return Document_validateRelaxNG(schema, doc, error_handler) 796 elif namespaceURI == SCHEMATRON_NS: 797 return Document_validateSchematron(schema, doc, error_handler) 798 elif namespaceURI == XMLSCHEMA_NS: 799 return Document_validateSchema(schema, doc, error_handler) 800 else: 801 return 0 802 803 def Document_validateRelaxNG(schema, doc, error_handler): 804 validator_context = libxml2mod.xmlRelaxNGNewValidCtxt(schema) 805 handler = ValidationHandler(error_handler) 806 libxml2mod.xmlRelaxNGSetValidErrors(validator_context, handler.error, handler.warning, None) 807 try: 808 status = libxml2mod.xmlRelaxNGValidateDoc(validator_context, doc) 809 return status == 0 810 finally: 811 libxml2mod.xmlRelaxNGFreeValidCtxt(validator_context) 812 813 def Document_validateSchematron(schema, doc, error_handler): 814 validator_context = libxml2mod.xmlSchematronNewValidCtxt(schema) 815 handler = ValidationHandler(error_handler) 816 libxml2mod.xmlSchematronSetValidErrors(validator_context, handler.error, handler.warning, None) 817 try: 818 status = libxml2mod.xmlSchematronValidateDoc(validator_context, doc) 819 return status == 0 820 finally: 821 libxml2mod.xmlSchematronFreeValidCtxt(validator_context) 822 823 def Document_validateSchema(schema, doc, error_handler): 824 validator_context = libxml2mod.xmlSchemaNewValidCtxt(schema) 825 handler = ValidationHandler(error_handler) 826 libxml2mod.xmlSchemaSetValidErrors(validator_context, handler.error, handler.warning, None) 827 try: 828 status = libxml2mod.xmlSchemaValidateDoc(validator_context, doc) 829 return status == 0 830 finally: 831 libxml2mod.xmlSchemaFreeValidCtxt(validator_context) 832 833 def Schema_parseRelaxNG(context): 834 try: 835 return libxml2mod.xmlRelaxNGParse(context) 836 finally: 837 libxml2mod.xmlRelaxNGFreeParserCtxt(context) 838 839 def Schema_parseSchematron(context): 840 try: 841 return libxml2mod.xmlSchematronParse(context) 842 finally: 843 libxml2mod.xmlSchematronFreeParserCtxt(context) 844 845 def Schema_parseSchema(context): 846 try: 847 return libxml2mod.xmlSchemaParse(context) 848 finally: 849 libxml2mod.xmlSchemaFreeParserCtxt(context) 850 851 def Schema_free(schema, namespaceURI): 852 if namespaceURI == RELAXNG_NS: 853 libxml2mod.xmlRelaxNGFree(schema) 854 elif namespaceURI == SCHEMATRON_NS: 855 libxml2mod.xmlSchematronFree(schema) 856 elif namespaceURI == XMLSCHEMA_NS: 857 libxml2mod.xmlSchemaFree(schema) 858 859 class ValidationHandler: 860 861 """ 862 A handler which collects validation errors and warnings and passes them to a 863 DOMErrorHandler. 864 """ 865 866 def __init__(self, error_handler): 867 self.error_handler = error_handler 868 869 def error(self, msg, arg): 870 self.error_handler.handleError(DOMError(DOMError.SEVERITY_FATAL_ERROR, msg.strip())) 871 872 def warning(self, msg, arg): 873 self.error_handler.handleError(DOMError(DOMError.SEVERITY_WARNING, msg.strip())) 874 875 # vim: tabstop=4 expandtab shiftwidth=4