1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 import xml.dom 23 from libxml2dom.errors import DOMError 24 25 # Try the conventional import first. 26 27 try: 28 import libxml2mod 29 except ImportError: 30 from libxmlmods import libxml2mod 31 32 # Global reconfiguration. This enables prettyprinting. 33 34 libxml2mod.xmlKeepBlanksDefault(0) 35 36 # NOTE: libxml2 seems to use UTF-8 throughout. 37 # NOTE: Implement: http://www.w3.org/TR/2006/REC-xml-20060816/#AVNormalize 38 39 def from_unicode(s): 40 if isinstance(s, unicode): 41 return s.encode("utf-8") 42 else: 43 # The string might contain non-ASCII characters, thus upsetting libxml2 44 # as it encounters a non-UTF-8 string. 45 try: 46 unicode(s) 47 except UnicodeError: 48 raise TypeError, "Please use Unicode for non-ASCII data." 49 return s 50 51 def to_unicode(s): 52 if isinstance(s, str): 53 return unicode(s, encoding="utf-8") 54 else: 55 return s 56 57 def get_ns(ns): 58 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 59 # Detect "" and produce None as the empty namespace. 60 if out_ns: 61 return out_ns 62 else: 63 return None 64 65 def _get_prefix_and_localName(name): 66 t = name.split(":") 67 if len(t) == 1: 68 return None, name 69 elif len(t) == 2: 70 return t 71 else: 72 # NOTE: Should raise an exception. 73 return None, None 74 75 def _find_namespace_for_prefix(node, prefix): 76 77 "Find the namespace definition node in the given 'node' for 'prefix'." 78 79 current = libxml2mod.xmlNodeGetNsDefs(node) 80 while current is not None: 81 if libxml2mod.name(current) == prefix: 82 return current 83 current = libxml2mod.next(current) 84 return None 85 86 def _find_namespace(node, ns, prefix): 87 88 """ 89 Find the namespace definition node in the given 'node' for the given 'ns' 90 and 'prefix'. 91 """ 92 93 # Special treatment for XML namespace. 94 95 if prefix == "xml" and ns == xml.dom.XML_NAMESPACE: 96 return libxml2mod.xmlSearchNsByHref(Node_ownerDocument(node), node, xml.dom.XML_NAMESPACE) 97 98 new_ns = None 99 current = libxml2mod.xmlNodeGetNsDefs(node) 100 while current is not None: 101 if _check_namespace(current, ns, prefix): 102 new_ns = current 103 break 104 current = libxml2mod.next(current) 105 if new_ns is None: 106 node_ns = libxml2mod.xmlNodeGetNs(node) 107 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 108 new_ns = node_ns 109 return new_ns 110 111 def _check_namespace(current, ns, prefix): 112 113 "Check the 'current' namespace definition node against 'ns' and 'prefix'." 114 115 current_ns = get_ns(current) 116 current_prefix = libxml2mod.name(current) 117 if ns == current_ns and (prefix is None or prefix == current_prefix): 118 return 1 119 else: 120 return 0 121 122 def _make_namespace(node, ns, prefix, set_default=0): 123 124 """ 125 Make a new namespace definition node within the given 'node' for 'ns', 126 'prefix', setting the default namespace on 'node' when 'prefix' is None and 127 'set_default' is set to a true value (unlike the default value for that 128 parameter). 129 """ 130 131 if prefix is not None or set_default: 132 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 133 else: 134 new_ns = None 135 return new_ns 136 137 def _get_invented_prefix(node, ns): 138 current = libxml2mod.xmlNodeGetNsDefs(node) 139 prefixes = [] 140 while current is not None: 141 current_prefix = libxml2mod.name(current) 142 prefixes.append(current_prefix) 143 current = libxml2mod.next(current) 144 i = 0 145 while 1: 146 prefix = "NS%d" % i 147 if prefix not in prefixes: 148 return prefix 149 i += 1 150 151 _nodeTypes = { 152 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 153 "cdata" : xml.dom.Node.CDATA_SECTION_NODE, 154 "comment" : xml.dom.Node.COMMENT_NODE, 155 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 156 "document_html" : xml.dom.Node.DOCUMENT_NODE, 157 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 158 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 159 "element" : xml.dom.Node.ELEMENT_NODE, 160 "entity" : xml.dom.Node.ENTITY_NODE, 161 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 162 "notation" : xml.dom.Node.NOTATION_NODE, 163 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 164 "text" : xml.dom.Node.TEXT_NODE 165 } 166 167 _reverseNodeTypes = {} 168 for label, value in _nodeTypes.items(): 169 _reverseNodeTypes[value] = label 170 171 def Node_equals(node, other): 172 return libxml2mod.xmlXPathCmpNodes(node, other) == 0 173 174 def Node_ownerDocument(node): 175 return libxml2mod.doc(node) 176 177 def Node_nodeType(node): 178 return _nodeTypes[libxml2mod.type(node)] 179 180 def Node_childNodes(node): 181 182 # NOTE: Consider a generator instead. 183 184 child_nodes = [] 185 node = libxml2mod.children(node) 186 while node is not None: 187 # Remove doctypes. 188 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 189 child_nodes.append(node) 190 node = libxml2mod.next(node) 191 return child_nodes 192 193 def Node_attributes(node): 194 attributes = {} 195 196 # Include normal attributes. 197 198 current = libxml2mod.properties(node) 199 while current is not None: 200 ns = libxml2mod.xmlNodeGetNs(current) 201 if ns is not None: 202 attributes[(get_ns(ns), libxml2mod.name(current))] = current 203 else: 204 attributes[(None, libxml2mod.name(current))] = current 205 current = libxml2mod.next(current) 206 207 # Include xmlns attributes. 208 209 #current = libxml2mod.xmlNodeGetNsDefs(node) 210 #while current is not None: 211 # ns = get_ns(current) 212 # prefix = libxml2mod.name(current) 213 # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. 214 # current = libxml2mod.next(current) 215 216 return attributes 217 218 def Node_namespaceURI(node): 219 ns = libxml2mod.xmlNodeGetNs(node) 220 if ns is not None: 221 return get_ns(ns) 222 else: 223 return None 224 225 def Node_nodeValue(node): 226 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 227 228 # NOTE: This is not properly exposed in the libxml2macro interface as the 229 # NOTE: writable form of nodeValue. 230 231 def Node_setNodeValue(node, value): 232 # NOTE: Cannot set attribute node values. 233 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 234 235 # NOTE: Verify this. The data attribute should only really exist for text, 236 # NOTE: character data, processing instructions and comments. 237 238 Node_data = Node_nodeValue 239 240 Node_textContent = Node_nodeValue 241 242 def Node_prefix(node): 243 ns = libxml2mod.xmlNodeGetNs(node) 244 if ns is not None: 245 return to_unicode(libxml2mod.name(ns)) 246 else: 247 return None 248 249 def Node_nodeName(node): 250 prefix = Node_prefix(node) 251 if prefix is not None: 252 return prefix + ":" + Node_localName(node) 253 else: 254 return Node_localName(node) 255 256 def Node_tagName(node): 257 if libxml2mod.type(node) == "element": 258 return Node_nodeName(node) 259 else: 260 return None 261 262 def Node_localName(node): 263 return to_unicode(libxml2mod.name(node)) 264 265 def Node_parentNode(node): 266 if node is None or libxml2mod.type(node) == "document_xml": 267 return None 268 else: 269 return libxml2mod.parent(node) 270 271 def Node_previousSibling(node): 272 if node is not None and libxml2mod.prev(node) is not None: 273 return libxml2mod.prev(node) 274 else: 275 return None 276 277 def Node_nextSibling(node): 278 if node is not None and libxml2mod.next(node) is not None: 279 return libxml2mod.next(node) 280 else: 281 return None 282 283 def Node_doctype(node): 284 return libxml2mod.xmlGetIntSubset(node) 285 286 def Node_hasAttributeNS(node, ns, localName): 287 return Node_getAttributeNS(node, ns, localName) is not None or \ 288 _find_namespace(node, ns, localName) is not None 289 290 def Node_hasAttribute(node, name): 291 return Node_getAttribute(node, name) is not None 292 293 def Node_getAttributeNS(node, ns, localName): 294 if ns == xml.dom.XMLNS_NAMESPACE: 295 ns_def = _find_namespace_for_prefix(node, localName) 296 if ns_def is not None: 297 return get_ns(ns_def) 298 else: 299 return None 300 else: 301 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 302 303 def Node_getAttribute(node, name): 304 return to_unicode(libxml2mod.xmlGetProp(node, name)) 305 306 def Node_getAttributeNodeNS(node, ns, localName): 307 # NOTE: Needs verifying. 308 return Node_attributes(node)[(ns, localName)] 309 310 def Node_getAttributeNode(node, name): 311 # NOTE: Needs verifying. 312 return Node_attributes(node)[(None, name)] 313 314 def Node_setAttributeNS(node, ns, name, value): 315 ns, name, value = map(from_unicode, [ns, name, value]) 316 prefix, localName = _get_prefix_and_localName(name) 317 318 # Detect setting of xmlns:localName=value, looking for cases where 319 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 320 # with prefix=x, ns=y). 321 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 322 if _find_namespace(node, value, localName): 323 return 324 new_ns = _make_namespace(node, value, localName, set_default=0) 325 # For non-xmlns attributes, we find or make a namespace declaration and then 326 # set an attribute. 327 elif ns is not None: 328 # Look for a suitable namespace. 329 new_ns = _find_namespace(node, ns, prefix) 330 # Create a declaration if no suitable one was found. 331 if new_ns is None: 332 # Invent a prefix for unprefixed attributes with namespaces. 333 if prefix is None: 334 prefix = _get_invented_prefix(node, ns) 335 new_ns = _make_namespace(node, ns, prefix, set_default=0) 336 # Remove any conflicting attribute. 337 if Node_hasAttributeNS(node, ns, localName): 338 Node_removeAttributeNS(node, ns, localName) 339 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 340 else: 341 # NOTE: Needs verifying: what should happen to the namespace? 342 # NOTE: This also catches the case where None is the element's 343 # NOTE: namespace and is also used for the attribute. 344 libxml2mod.xmlSetNsProp(node, None, localName, value) 345 346 def Node_setAttribute(node, name, value): 347 name, value = map(from_unicode, [name, value]) 348 349 libxml2mod.xmlSetProp(node, name, value) 350 351 def Node_setAttributeNodeNS(node, attr): 352 # NOTE: Not actually putting the node on the element. 353 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 354 355 def Node_setAttributeNode(node, attr): 356 # NOTE: Not actually putting the node on the element. 357 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 358 359 def Node_removeAttributeNS(node, ns, localName): 360 attr = Node_getAttributeNodeNS(node, ns, localName) 361 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 362 363 def Node_removeAttribute(node, name): 364 name = from_unicode(name) 365 libxml2mod.xmlUnsetProp(node, name) 366 367 def Node_createElementNS(node, ns, name): 368 ns, name = map(from_unicode, [ns, name]) 369 370 prefix, localName = _get_prefix_and_localName(name) 371 new_node = libxml2mod.xmlNewNode(localName) 372 373 # If the namespace is not empty, set the declaration. 374 if ns is not None: 375 new_ns = _find_namespace(new_node, ns, prefix) 376 if new_ns is None: 377 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 378 libxml2mod.xmlSetNs(new_node, new_ns) 379 # If the namespace is empty, set a "null" declaration. 380 elif prefix is not None: 381 new_ns = _find_namespace(new_node, "", prefix) 382 if new_ns is None: 383 new_ns = _make_namespace(new_node, "", prefix) 384 libxml2mod.xmlSetNs(new_node, new_ns) 385 else: 386 libxml2mod.xmlSetNs(new_node, None) 387 Node_setAttribute(new_node, "xmlns", "") 388 return new_node 389 390 def Node_createElement(node, name): 391 name = from_unicode(name) 392 393 new_node = libxml2mod.xmlNewNode(name) 394 return new_node 395 396 def Node_createAttributeNS(node, ns, name): 397 ns, name = map(from_unicode, [ns, name]) 398 399 prefix, localName = _get_prefix_and_localName(name) 400 # NOTE: Does it make sense to set the namespace if it is empty? 401 if ns is not None: 402 new_ns = _find_namespace(node, ns, prefix) 403 if new_ns is None: 404 new_ns = _make_namespace(node, ns, prefix, set_default=0) 405 else: 406 new_ns = None 407 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 408 return new_node 409 410 def Node_createAttribute(node, name): 411 name = from_unicode(name) 412 413 # NOTE: xmlNewProp does not seem to work. 414 return Node_createAttributeNS(node, None, name) 415 416 def Node_createTextNode(node, value): 417 value = from_unicode(value) 418 419 return libxml2mod.xmlNewText(value) 420 421 def Node_createComment(node, value): 422 value = from_unicode(value) 423 424 return libxml2mod.xmlNewComment(value) 425 426 def Node_createCDATASection(node, value): 427 value = from_unicode(value) 428 429 return libxml2mod.xmlNewCDataBlock(Node_ownerDocument(node), value, len(value)) 430 431 def Node_insertBefore(node, tmp, oldNode): 432 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 433 434 def Node_replaceChild(node, tmp, oldNode): 435 return libxml2mod.xmlReplaceNode(oldNode, tmp) 436 437 def Node_appendChild(node, tmp): 438 return libxml2mod.xmlAddChild(node, tmp) 439 440 def Node_removeChild(node, child): 441 libxml2mod.xmlUnlinkNode(child) 442 443 def Node_importNode(node, other, deep): 444 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 445 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 446 for attr in Node_attributes(other).values(): 447 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 448 449 if deep: 450 for child in Node_childNodes(other): 451 imported_child = Node_importNode(node, child, deep) 452 if imported_child: 453 Node_appendChild(imported_element, imported_child) 454 455 return imported_element 456 457 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 458 return Node_createTextNode(node, Node_nodeValue(other)) 459 460 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 461 return Node_createComment(node, Node_data(other)) 462 463 elif Node_nodeType(other) == xml.dom.Node.CDATA_SECTION_NODE: 464 return Node_createCDATASection(node, Node_data(other)) 465 466 raise xml.dom.NotSupportedErr("Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) 467 468 def Node_importNode_DOM(node, other, deep): 469 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 470 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 471 for attr in other.attributes.values(): 472 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 473 474 if deep: 475 for child in other.childNodes: 476 imported_child = Node_importNode_DOM(node, child, deep) 477 if imported_child: 478 Node_appendChild(imported_element, imported_child) 479 480 return imported_element 481 482 elif other.nodeType == xml.dom.Node.TEXT_NODE: 483 return Node_createTextNode(node, other.nodeValue) 484 485 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 486 return Node_createComment(node, other.data) 487 488 elif other.nodeType == xml.dom.Node.CDATA_SECTION_NODE: 489 return Node_createCDATASection(node, other.data) 490 491 raise xml.dom.NotSupportedErr( 492 "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType) 493 ) 494 495 def Node_getElementById(doc, identifier): 496 node = libxml2mod.xmlGetID(doc, identifier) 497 if node is None: 498 return None 499 else: 500 return Node_parentNode(node) 501 502 def Node_xpath(node, expr, variables=None, namespaces=None): 503 expr = from_unicode(expr) 504 505 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) 506 libxml2mod.xmlXPathSetContextNode(context, node) 507 # NOTE: Discover namespaces from the node. 508 # NOTE: Work out how to specify paths without having to use prefixes on 509 # NOTE: names all the time. 510 for prefix, ns in (namespaces or {}).items(): 511 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 512 # NOTE: No such functions are exposed in current versions of libxml2. 513 #for (prefix, ns), value in (variables or {}).items(): 514 # value = from_unicode(value) 515 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 516 result = libxml2mod.xmlXPathEval(expr, context) 517 libxml2mod.xmlXPathFreeContext(context) 518 return result 519 520 def Node_xinclude(node): 521 result = libxml2mod.xmlXIncludeProcessFlags(node, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 522 if result == -1: 523 raise XIncludeException() 524 else: 525 return result 526 527 # Exceptions. 528 529 class LSException(Exception): 530 531 "DOM Level 3 Load/Save exception." 532 533 PARSE_ERR = 81 534 SERIALIZE_ERR = 82 535 536 def __repr__(self): 537 exctype, excdata = self.args[0:2] 538 return "LSException(%d, %r)" % (exctype, excdata) 539 540 def __str__(self): 541 exctype, excdata = self.args[0:2] 542 if exctype == self.PARSE_ERR: 543 return "Parse error: %s" % excdata 544 elif exctype == self.SERIALIZE_ERR: 545 return "Serialize error: %s" % excdata 546 else: 547 return repr(self) 548 549 class XIncludeException(Exception): 550 551 "Unstandardised XInclude exception." 552 553 pass 554 555 # Utility functions. 556 557 def createDocument(namespaceURI, localName, doctype): 558 # NOTE: Fixed to use version 1.0 only. 559 d = libxml2mod.xmlNewDoc("1.0") 560 if localName is not None: 561 # NOTE: Verify that this is always what should occur. 562 root = Node_createElementNS(d, namespaceURI, localName) 563 Node_appendChild(d, root) 564 if doctype is not None: 565 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 566 return d 567 568 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0): 569 if hasattr(stream_or_string, "read"): 570 stream = stream_or_string 571 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished) 572 else: 573 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished) 574 575 def parseFile(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 576 if not html: 577 context = libxml2mod.xmlCreateFileParserCtxt(s) 578 return _parseXML(context, unfinished, validate, remote) 579 else: 580 return libxml2mod.htmlReadFile(s, htmlencoding, 581 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | html_net_flag(remote)) 582 583 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 584 if not html: 585 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 586 return _parseXML(context, unfinished, validate, remote) 587 else: 588 # NOTE: URL given as None. 589 html_url = None 590 return libxml2mod.htmlReadMemory(s, len(s), html_url, htmlencoding, 591 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | html_net_flag(remote)) 592 593 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 594 if not html: 595 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 596 return _parseXML(context, unfinished, validate, remote) 597 else: 598 raise NotImplementedError, "parseURI does not yet support HTML" 599 600 def _parseXML(context, unfinished, validate, remote): 601 if context is None: 602 raise LSException(LSException.PARSE_ERR, DOMError(DOMError.SEVERITY_FATAL_ERROR)) 603 604 Parser_configure(context, validate, remote) 605 Parser_parse(context) 606 doc = Parser_document(context) 607 error = Parser_error() 608 609 try: 610 if validate and not Parser_valid(context): 611 612 # NOTE: May not be the correct exception. 613 614 raise LSException( 615 LSException.PARSE_ERR, 616 DOMError( 617 DOMError.SEVERITY_FATAL_ERROR, 618 get_parse_error_message() or "Document did not validate" 619 )) 620 621 elif unfinished and (error is None or Parser_errorCode(error) == XML_ERR_TAG_NOT_FINISHED): 622 623 # NOTE: There may be other unfinished conditions. 624 625 return doc 626 627 elif error is not None and Parser_errorLevel(error) == XML_ERR_FATAL: 628 raise LSException( 629 LSException.PARSE_ERR, 630 DOMError( 631 DOMError.SEVERITY_FATAL_ERROR, 632 get_parse_error_message() or "Document caused fatal error" 633 )) 634 635 else: 636 637 # NOTE: Could provide non-fatal errors or warnings. 638 639 return doc 640 641 finally: 642 Parser_resetError(error) 643 libxml2mod.xmlFreeParserCtxt(context) 644 645 def toString(node, encoding=None, prettyprint=0): 646 return libxml2mod.serializeNode(node, encoding, prettyprint) 647 648 def toStream(node, stream, encoding=None, prettyprint=0): 649 stream.write(toString(node, encoding, prettyprint)) 650 651 def toFile(node, f, encoding=None, prettyprint=0): 652 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 653 654 # libxml2mod constants and helper functions. 655 656 HTML_PARSE_NOERROR = 32 657 HTML_PARSE_NOWARNING = 64 658 HTML_PARSE_NONET = 2048 659 XML_PARSE_DTDVALID = 16 660 XML_PARSE_NOERROR = 32 661 XML_PARSE_NOWARNING = 64 662 XML_PARSE_NONET = 2048 663 664 XML_ERR_NONE = 0 665 XML_ERR_WARNING = 1 666 XML_ERR_ERROR = 2 667 XML_ERR_FATAL = 3 668 669 XML_ERR_TAG_NOT_FINISHED = 77 670 671 def html_net_flag(remote): 672 if remote: 673 return 0 674 else: 675 return HTML_PARSE_NONET 676 677 def xml_net_flag(remote): 678 if remote: 679 return 0 680 else: 681 return XML_PARSE_NONET 682 683 def xml_validate_flag(validate): 684 if validate: 685 return XML_PARSE_DTDVALID 686 else: 687 return 0 688 689 def get_parse_error_message(): 690 error = Parser_error() 691 if error is not None: 692 filename = libxml2mod.xmlErrorGetFile(error) 693 if filename is None: 694 filename = "<string>" 695 else: 696 filename = repr(filename) 697 line = libxml2mod.xmlErrorGetLine(error) 698 error_message = libxml2mod.xmlErrorGetMessage(error).strip() 699 return "Filename %s, line %d: %s" % (filename, line, error_message) 700 else: 701 return None 702 703 def Parser_error(): 704 return libxml2mod.xmlGetLastError() 705 706 def Parser_resetError(error): 707 if error is None: 708 return libxml2mod.xmlResetLastError() 709 else: 710 return libxml2mod.xmlResetError(error) 711 712 def Parser_errorLevel(error): 713 return libxml2mod.xmlErrorGetLevel(error) 714 715 def Parser_errorCode(error): 716 return libxml2mod.xmlErrorGetCode(error) 717 718 def Parser_push(): 719 return libxml2mod.xmlCreatePushParser(None, "", 0, None) 720 721 def Parser_configure(context, validate, remote): 722 libxml2mod.xmlParserSetPedantic(context, 0) 723 #libxml2mod.xmlParserSetValidate(context, validate) 724 libxml2mod.xmlCtxtUseOptions(context, 725 XML_PARSE_NOERROR | XML_PARSE_NOWARNING | xml_net_flag(remote) | xml_validate_flag(validate)) 726 727 def Parser_feed(context, s): 728 libxml2mod.xmlParseChunk(context, s, len(s), 1) 729 730 def Parser_well_formed(context): 731 return libxml2mod.xmlParserGetWellFormed(context) 732 733 def Parser_valid(context): 734 return libxml2mod.xmlParserGetIsValid(context) 735 736 def Parser_document(context): 737 return libxml2mod.xmlParserGetDoc(context) 738 739 def Parser_parse(context): 740 libxml2mod.xmlParseDocument(context) 741 742 # Schema and validation helper functions and classes. 743 # NOTE: Should potentially combine these with other definitions. 744 745 RELAXNG_NS = "http://relaxng.org/ns/structure/1.0" 746 SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron" 747 XMLSCHEMA_NS = "http://www.w3.org/2001/XMLSchema" 748 749 def Document_schema(doc, namespaceURI): 750 if namespaceURI == RELAXNG_NS: 751 return Schema_parseRelaxNG(libxml2mod.xmlRelaxNGNewDocParserCtxt(doc)) 752 elif namespaceURI == SCHEMATRON_NS: 753 return Schema_parseSchematron(libxml2mod.xmlSchematronNewDocParserCtxt(doc)) 754 elif namespaceURI == XMLSCHEMA_NS: 755 return Schema_parseSchema(libxml2mod.xmlSchemaNewDocParserCtxt(doc)) 756 else: 757 return None 758 759 def Document_schemaFromString(s, namespaceURI): 760 if namespaceURI == RELAXNG_NS: 761 return Schema_parseRelaxNG(libxml2mod.xmlRelaxNGNewMemParserCtxt(s, len(s))) 762 elif namespaceURI == SCHEMATRON_NS: 763 return Schema_parseSchematron(libxml2mod.xmlSchematronNewMemParserCtxt(s, len(s))) 764 elif namespaceURI == XMLSCHEMA_NS: 765 return Schema_parseSchema(libxml2mod.xmlSchemaNewMemParserCtxt(s, len(s))) 766 else: 767 return None 768 769 def Document_validate(schema, doc, error_handler, namespaceURI): 770 if namespaceURI == RELAXNG_NS: 771 return Document_validateRelaxNG(schema, doc, error_handler) 772 elif namespaceURI == SCHEMATRON_NS: 773 return Document_validateSchematron(schema, doc, error_handler) 774 elif namespaceURI == XMLSCHEMA_NS: 775 return Document_validateSchema(schema, doc, error_handler) 776 else: 777 return 0 778 779 def Document_validateRelaxNG(schema, doc, error_handler): 780 validator_context = libxml2mod.xmlRelaxNGNewValidCtxt(schema) 781 handler = ValidationHandler(error_handler) 782 libxml2mod.xmlRelaxNGSetValidErrors(validator_context, handler.error, handler.warning, None) 783 try: 784 status = libxml2mod.xmlRelaxNGValidateDoc(validator_context, doc) 785 return status == 0 786 finally: 787 libxml2mod.xmlRelaxNGFreeValidCtxt(validator_context) 788 789 def Document_validateSchematron(schema, doc, error_handler): 790 validator_context = libxml2mod.xmlSchematronNewValidCtxt(schema) 791 handler = ValidationHandler(error_handler) 792 libxml2mod.xmlSchematronSetValidErrors(validator_context, handler.error, handler.warning, None) 793 try: 794 status = libxml2mod.xmlSchematronValidateDoc(validator_context, doc) 795 return status == 0 796 finally: 797 libxml2mod.xmlSchematronFreeValidCtxt(validator_context) 798 799 def Document_validateSchema(schema, doc, error_handler): 800 validator_context = libxml2mod.xmlSchemaNewValidCtxt(schema) 801 handler = ValidationHandler(error_handler) 802 libxml2mod.xmlSchemaSetValidErrors(validator_context, handler.error, handler.warning, None) 803 try: 804 status = libxml2mod.xmlSchemaValidateDoc(validator_context, doc) 805 return status == 0 806 finally: 807 libxml2mod.xmlSchemaFreeValidCtxt(validator_context) 808 809 def Schema_parseRelaxNG(context): 810 try: 811 return libxml2mod.xmlRelaxNGParse(context) 812 finally: 813 libxml2mod.xmlRelaxNGFreeParserCtxt(context) 814 815 def Schema_parseSchematron(context): 816 try: 817 return libxml2mod.xmlSchematronParse(context) 818 finally: 819 libxml2mod.xmlSchematronFreeParserCtxt(context) 820 821 def Schema_parseSchema(context): 822 try: 823 return libxml2mod.xmlSchemaParse(context) 824 finally: 825 libxml2mod.xmlSchemaFreeParserCtxt(context) 826 827 def Schema_free(schema, namespaceURI): 828 if namespaceURI == RELAXNG_NS: 829 libxml2mod.xmlRelaxNGFree(schema) 830 elif namespaceURI == SCHEMATRON_NS: 831 libxml2mod.xmlSchematronFree(schema) 832 elif namespaceURI == XMLSCHEMA_NS: 833 libxml2mod.xmlSchemaFree(schema) 834 835 class ValidationHandler: 836 837 """ 838 A handler which collects validation errors and warnings and passes them to a 839 DOMErrorHandler. 840 """ 841 842 def __init__(self, error_handler): 843 self.error_handler = error_handler 844 845 def error(self, msg, arg): 846 self.error_handler.handleError(DOMError(DOMError.SEVERITY_FATAL_ERROR, msg.strip())) 847 848 def warning(self, msg, arg): 849 self.error_handler.handleError(DOMError(DOMError.SEVERITY_WARNING, msg.strip())) 850 851 # vim: tabstop=4 expandtab shiftwidth=4