libxml2dom (file libxml2dom/__init_

     1 #!/usr/bin/env python     2      3 """     4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module.     5      6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU Lesser General Public License as published by the Free    10 Software Foundation; either version 3 of the License, or (at your option) any    11 later version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more    16 details.    17     18 You should have received a copy of the GNU Lesser General Public License along    19 with this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 __version__ = "0.4.7"    23     24 from libxml2dom.macrolib import *    25 from libxml2dom.macrolib import \    26     createDocument as Node_createDocument, \    27     parseString as Node_parseString, parseURI as Node_parseURI, \    28     parseFile as Node_parseFile, \    29     toString as Node_toString, toStream as Node_toStream, \    30     toFile as Node_toFile    31 import urllib # for parseURI in HTML mode    32 import libxml2dom.errors    33     34 # Standard namespaces.    35     36 XML_NAMESPACE = xml.dom.XML_NAMESPACE    37     38 # Default namespace bindings for XPath.    39     40 default_ns = {    41     "xml" : XML_NAMESPACE    42     }    43     44 class Implementation(object):    45     46     "Contains an abstraction over the DOM implementation."    47     48     def createDocumentType(self, localName, publicId, systemId):    49         return DocumentType(localName, publicId, systemId)    50     51     def createDocument(self, namespaceURI, localName, doctype):    52         return Document(Node_createDocument(namespaceURI, localName, doctype), self)    53     54     # Wrapping of documents.    55     56     def adoptDocument(self, node):    57         return Document(node, self)    58     59     # Factory functions.    60     61     def get_node(self, _node, context_node):    62     63         # Return the existing document.    64     65         if Node_nodeType(_node) == context_node.DOCUMENT_NODE:    66             return context_node.ownerDocument    67     68         # Return an attribute using the parent of the attribute as the owner    69         # element.    70     71         elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE:    72             return Attribute(_node, self, context_node.ownerDocument,    73                 self.get_node(Node_parentNode(_node), context_node))    74     75         # Return other nodes.    76     77         else:    78             return Node(_node, self, context_node.ownerDocument)    79     80     def get_node_or_none(self, _node, context_node):    81         if _node is None:    82             return None    83         else:    84             return self.get_node(_node, context_node)    85     86 # Attribute and node list wrappers.    87     88 class NamedNodeMap(object):    89     90     """    91     A wrapper around Node objects providing DOM and dictionary convenience    92     methods.    93     """    94     95     def __init__(self, node, impl):    96         self.node = node    97         self.impl = impl    98     99     def getNamedItem(self, name):   100         return self.node.getAttributeNode(name)   101    102     def getNamedItemNS(self, ns, localName):   103         return self.node.getAttributeNodeNS(ns, localName)   104    105     def setNamedItem(self, node):   106         try:   107             old = self.getNamedItem(node.nodeName)   108         except KeyError:   109             old = None   110         self.node.setAttributeNode(node)   111         return old   112    113     def setNamedItemNS(self, node):   114         try:   115             old = self.getNamedItemNS(node.namespaceURI, node.localName)   116         except KeyError:   117             old = None   118         self.node.setAttributeNodeNS(node)   119         return old   120    121     def removeNamedItem(self, name):   122         try:   123             old = self.getNamedItem(name)   124         except KeyError:   125             old = None   126         self.node.removeAttribute(name)   127         return old   128    129     def removeNamedItemNS(self, ns, localName):   130         try:   131             old = self.getNamedItemNS(ns, localName)   132         except KeyError:   133             old = None   134         self.node.removeAttributeNS(ns, localName)   135         return old   136    137     # Iterator emulation.   138    139     def __iter__(self):   140         return NamedNodeMapIterator(self)   141    142     # Dictionary emulation methods.   143    144     def __getitem__(self, name):   145         return self.getNamedItem(name)   146    147     def __setitem__(self, name, node):   148         if name == node.nodeName:   149             self.setNamedItem(node)   150         else:   151             raise KeyError, name   152    153     def __delitem__(self, name):   154         # NOTE: To be implemented.   155         pass   156    157     def values(self):   158         return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()]   159    160     def keys(self):   161         return [(attr.namespaceURI, attr.localName) for attr in self.values()]   162    163     def items(self):   164         return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()]   165    166     def __repr__(self):   167         return str(self)   168    169     def __str__(self):   170         return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()])   171    172     def _length(self):   173         return len(self.values())   174    175     length = property(_length)   176    177 class NamedNodeMapIterator(object):   178    179     "An iterator over a NamedNodeMap."   180    181     def __init__(self, nodemap):   182         self.nodemap = nodemap   183         self.items = self.nodemap.items()   184    185     def next(self):   186         if self.items:   187             current = self.items[0][1]   188             self.items = self.items[1:]   189             return current   190         else:   191             raise StopIteration   192    193 class NodeList(list):   194    195     "A wrapper around node lists."   196    197     def item(self, index):   198         return self[index]   199    200     def _length(self):   201         return len(self)   202    203     length = property(_length)   204    205 # Node classes.   206    207 class Node(object):   208    209     """   210     A DOM-style wrapper around libxml2mod objects.   211     """   212    213     ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE   214     COMMENT_NODE = xml.dom.Node.COMMENT_NODE   215     DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE   216     DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE   217     ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE   218     ENTITY_NODE = xml.dom.Node.ENTITY_NODE   219     ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE   220     NOTATION_NODE = xml.dom.Node.NOTATION_NODE   221     PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE   222     TEXT_NODE = xml.dom.Node.TEXT_NODE   223    224     def __init__(self, node, impl=None, ownerDocument=None):   225         self._node = node   226         self.impl = impl or default_impl   227         self.ownerDocument = ownerDocument   228    229     def as_native_node(self):   230         return self._node   231    232     def _nodeType(self):   233         return Node_nodeType(self._node)   234    235     def _childNodes(self):   236    237         # NOTE: Consider a generator instead.   238    239         return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)])   240    241     def _firstChild(self):   242         return (self.childNodes or [None])[0]   243    244     def _lastChild(self):   245         return (self.childNodes or [None])[-1]   246    247     def _attributes(self):   248         return NamedNodeMap(self, self.impl)   249    250     def _namespaceURI(self):   251         return Node_namespaceURI(self._node)   252    253     def _textContent(self):   254         return Node_textContent(self._node)   255    256     def _nodeValue(self):   257         if self.nodeType in null_value_node_types:   258             return None   259         return Node_nodeValue(self._node)   260    261     def _setNodeValue(self, value):   262         Node_setNodeValue(self._node, value)   263    264     def _prefix(self):   265         return Node_prefix(self._node)   266    267     def _nodeName(self):   268         return Node_nodeName(self._node)   269    270     def _tagName(self):   271         return Node_tagName(self._node)   272    273     def _localName(self):   274         return Node_localName(self._node)   275    276     def _parentNode(self):   277         return self.impl.get_node_or_none(Node_parentNode(self._node), self)   278    279     def _previousSibling(self):   280         return self.impl.get_node_or_none(Node_previousSibling(self._node), self)   281    282     def _nextSibling(self):   283         return self.impl.get_node_or_none(Node_nextSibling(self._node), self)   284    285     def _doctype(self):   286         _doctype = Node_doctype(self._node)   287         if _doctype is not None:   288             return self.impl.get_node(_doctype, self)   289         else:   290             return None   291    292     def _publicId(self):   293         # NOTE: To be fixed when the libxml2mod API has been figured out.   294         if self.nodeType != self.DOCUMENT_TYPE_NODE:   295             return None   296         declaration = self.toString()   297         return self._findId(declaration, "PUBLIC")   298    299     def _systemId(self):   300         # NOTE: To be fixed when the libxml2mod API has been figured out.   301         if self.nodeType != self.DOCUMENT_TYPE_NODE:   302             return None   303         declaration = self.toString()   304         if self._findId(declaration, "PUBLIC"):   305             return self._findIdValue(declaration, 0)   306         return self._findId(declaration, "SYSTEM")   307    308     # NOTE: To be removed when the libxml2mod API has been figured out.   309    310     def _findId(self, declaration, identifier):   311         i = declaration.find(identifier)   312         if i == -1:   313             return None   314         return self._findIdValue(declaration, i)   315    316     def _findIdValue(self, declaration, i):   317         q = declaration.find('"', i)   318         if q == -1:   319             return None   320         q2 = declaration.find('"', q + 1)   321         if q2 == -1:   322             return None   323         return declaration[q+1:q2]   324    325     def hasAttributeNS(self, ns, localName):   326         return Node_hasAttributeNS(self._node, ns, localName)   327    328     def hasAttribute(self, name):   329         return Node_hasAttribute(self._node, name)   330    331     def getAttributeNS(self, ns, localName):   332         return Node_getAttributeNS(self._node, ns, localName)   333    334     def getAttribute(self, name):   335         return Node_getAttribute(self._node, name)   336    337     def getAttributeNodeNS(self, ns, localName):   338         return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self)   339    340     def getAttributeNode(self, localName):   341         return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self)   342    343     def setAttributeNS(self, ns, name, value):   344         Node_setAttributeNS(self._node, ns, name, value)   345    346     def setAttribute(self, name, value):   347         Node_setAttribute(self._node, name, value)   348    349     def setAttributeNodeNS(self, node):   350         Node_setAttributeNodeNS(self._node, node._node)   351    352     def setAttributeNode(self, node):   353         Node_setAttributeNode(self._node, node._node)   354    355     def removeAttributeNS(self, ns, localName):   356         Node_removeAttributeNS(self._node, ns, localName)   357    358     def removeAttribute(self, name):   359         Node_removeAttribute(self._node, name)   360    361     def createElementNS(self, ns, name):   362         return self.impl.get_node(Node_createElementNS(self._node, ns, name), self)   363    364     def createElement(self, name):   365         return self.impl.get_node(Node_createElement(self._node, name), self)   366    367     def createAttributeNS(self, ns, name):   368         tmp = self.createElement("tmp")   369         return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name))   370    371     def createAttribute(self, name):   372         tmp = self.createElement("tmp")   373         return Attribute(Node_createAttribute(tmp._node, name), self.impl)   374    375     def createTextNode(self, value):   376         return self.impl.get_node(Node_createTextNode(self._node, value), self)   377    378     def createComment(self, value):   379         return self.impl.get_node(Node_createComment(self._node, value), self)   380    381     def createCDATASection(self, value):   382         return self.impl.get_node(Node_createCDATASection(self._node, value), self)   383    384     def importNode(self, node, deep):   385         if hasattr(node, "as_native_node"):   386             return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self)   387         else:   388             return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self)   389    390     def cloneNode(self, deep):   391         # This takes advantage of the ubiquity of importNode (in spite of the DOM specification).   392         return self.importNode(self, deep)   393    394     def insertBefore(self, tmp, oldNode):   395         if tmp.ownerDocument != self.ownerDocument:   396             raise xml.dom.WrongDocumentErr()   397         if oldNode.parentNode != self:   398             raise xml.dom.NotFoundErr()   399         if hasattr(tmp, "as_native_node"):   400             return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self)   401         else:   402             return self.impl.get_node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self)   403    404     def replaceChild(self, tmp, oldNode):   405         if tmp.ownerDocument != self.ownerDocument:   406             raise xml.dom.WrongDocumentErr()   407         if oldNode.parentNode != self:   408             raise xml.dom.NotFoundErr()   409         if hasattr(tmp, "as_native_node"):   410             return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self)   411         else:   412             return self.impl.get_node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self)   413    414     def appendChild(self, tmp):   415         if tmp.ownerDocument != self.ownerDocument:   416             raise xml.dom.WrongDocumentErr()   417         if hasattr(tmp, "as_native_node"):   418             return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self)   419         else:   420             return self.impl.get_node(Node_appendChild(self._node, tmp), self)   421    422     def removeChild(self, tmp):   423         if hasattr(tmp, "as_native_node"):   424             Node_removeChild(self._node, tmp.as_native_node())   425         else:   426             Node_removeChild(self._node, tmp)   427         return tmp   428    429     def getElementById(self, identifier):   430         _node = Node_getElementById(self.ownerDocument.as_native_node(), identifier)   431         if _node is None:   432             return None   433         else:   434             return self.impl.get_node(_node, self)   435    436     def getElementsByTagName(self, tagName):   437         return self.xpath(".//" + tagName)   438    439     def getElementsByTagNameNS(self, namespaceURI, localName):   440         return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI})   441    442     def normalize(self):   443         text_nodes = []   444         for node in self.childNodes:   445             if node.nodeType == node.TEXT_NODE:   446                 text_nodes.append(node)   447             elif len(text_nodes) != 0:   448                 self._normalize(text_nodes)   449                 text_nodes = []   450         if len(text_nodes) != 0:   451             self._normalize(text_nodes)   452    453     def _normalize(self, text_nodes):   454         texts = []   455         for text_node in text_nodes[:-1]:   456             texts.append(text_node.nodeValue)   457             self.removeChild(text_node)   458         texts.append(text_nodes[-1].nodeValue)   459         self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1])   460    461     childNodes = property(_childNodes)   462     firstChild = property(_firstChild)   463     lastChild = property(_lastChild)   464     value = data = nodeValue = property(_nodeValue, _setNodeValue)   465     textContent = property(_textContent)   466     name = nodeName = property(_nodeName)   467     tagName = property(_tagName)   468     namespaceURI = property(_namespaceURI)   469     prefix = property(_prefix)   470     localName = property(_localName)   471     parentNode = property(_parentNode)   472     nodeType = property(_nodeType)   473     attributes = property(_attributes)   474     previousSibling = property(_previousSibling)   475     nextSibling = property(_nextSibling)   476     doctype = property(_doctype)   477     publicId = property(_publicId)   478     systemId = property(_systemId)   479    480     # NOTE: To be fixed - these being doctype-specific values.   481    482     entities = {}   483     notations = {}   484    485     def isSameNode(self, other):   486         return self == other   487    488     def __hash__(self):   489         return hash(self.localName)   490    491     def __eq__(self, other):   492         return isinstance(other, Node) and Node_equals(self._node, other._node)   493    494     def __ne__(self, other):   495         return not (self == other)   496    497     # 4DOM extensions to the usual PyXML API.   498     # NOTE: To be finished.   499    500     def xpath(self, expr, variables=None, namespaces=None):   501    502         """   503         Evaluate the given expression 'expr' using the optional 'variables' and   504         'namespaces' mappings.   505         """   506    507         ns = {}   508         ns.update(default_ns)   509         ns.update(namespaces or {})   510         result = Node_xpath(self._node, expr, variables, ns)   511         if isinstance(result, str):   512             return to_unicode(result)   513         elif hasattr(result, "__len__"):   514             return NodeList([self.impl.get_node(_node, self) for _node in result])   515         else:   516             return result   517    518     # Other extensions to the usual PyXML API.   519    520     def xinclude(self):   521    522         """   523         Process XInclude declarations within the document, returning the number   524         of substitutions performed (zero or more), raising an XIncludeException   525         otherwise.   526         """   527    528         return Node_xinclude(self._node)   529    530     # Convenience methods.   531    532     def toString(self, encoding=None, prettyprint=0):   533         return toString(self, encoding, prettyprint)   534    535     def toStream(self, stream, encoding=None, prettyprint=0):   536         toStream(self, stream, encoding, prettyprint)   537    538     def toFile(self, f, encoding=None, prettyprint=0):   539         toFile(self, f, encoding, prettyprint)   540    541 # Attribute nodes.   542    543 class Attribute(Node):   544    545     "A class providing attribute access."   546    547     def __init__(self, node, impl, ownerDocument=None, ownerElement=None):   548         Node.__init__(self, node, impl, ownerDocument)   549         self.ownerElement = ownerElement   550    551     def _parentNode(self):   552         return self.ownerElement   553    554     parentNode = property(_parentNode)   555    556 # Document housekeeping mechanisms.   557    558 class _Document:   559    560     """   561     An abstract class providing document-level housekeeping and distinct   562     functionality. Configuration of the document is also supported.   563     See: http://www.w3.org/TR/DOM-Level-3-Core/core.html#DOMConfiguration   564     """   565    566     # Constants from    567     # See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-NodeEditVAL   568    569     VAL_TRUE = 5   570     VAL_FALSE = 6   571     VAL_UNKNOWN = 7   572    573     def __init__(self, node, impl):   574         self._node = node   575         self.implementation = self.impl = impl   576         self.error_handler = libxml2dom.errors.DOMErrorHandler()   577    578     # Standard DOM properties and their implementations.   579    580     def _documentElement(self):   581         return self.xpath("*")[0]   582    583     def _ownerDocument(self):   584         return self   585    586     def __del__(self):   587         #print "Freeing document", self._node   588         libxml2mod.xmlFreeDoc(self._node)   589    590     documentElement = property(_documentElement)   591     ownerDocument = property(_ownerDocument)   592    593     # DOM Level 3 Core DOMConfiguration methods.   594    595     def setParameter(self, name, value):   596         if name == "error-handler":   597             raise xml.dom.NotSupportedErr()   598         raise xml.dom.NotFoundErr()   599    600     def getParameter(self, name):   601         if name == "error-handler":   602             return self.error_handler   603         raise xml.dom.NotFoundErr()   604    605     def canSetParameter(self, name, value):   606         return 0   607    608     def _parameterNames(self):   609         return []   610    611     # Extensions to the usual PyXML API.   612    613     def validate(self, doc):   614    615         """   616         Validate the document against the given schema document, 'doc'.   617         """   618    619         validation_ns = doc.documentElement.namespaceURI   620    621         if hasattr(doc, "as_native_node"):   622             _schema = Document_schema(doc.as_native_node(), validation_ns)   623         else:   624             _schema = Document_schemaFromString(doc.toString(), validation_ns)   625         try:   626             self.error_handler.reset()   627             return Document_validate(_schema, self._node, self.error_handler, validation_ns)   628         finally:   629             Schema_free(_schema, validation_ns)   630    631     # DOM Level 3 Validation methods.   632    633     def validateDocument(self, doc):   634    635         """   636         Validate the document against the given schema document, 'doc'.   637         See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-DocumentEditVAL-validateDocument   638         """   639    640         return self.validate(doc) and self.VAL_TRUE or self.VAL_FALSE   641    642 class Document(_Document, Node):   643    644     """   645     A generic document class. Specialised document classes should inherit from   646     the _Document class and their own variation of Node.   647     """   648    649     pass   650    651 class DocumentType(object):   652    653     "A class providing a container for document type information."   654    655     def __init__(self, localName, publicId, systemId):   656         self.name = self.localName = localName   657         self.publicId = publicId   658         self.systemId = systemId   659    660         # NOTE: Nothing is currently provided to support the following   661         # NOTE: attributes.   662    663         self.entities = {}   664         self.notations = {}   665    666 # Constants.   667    668 null_value_node_types = [   669     Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE,   670     Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE   671     ]   672    673 # Utility functions.   674    675 def createDocumentType(localName, publicId, systemId):   676     return default_impl.createDocumentType(localName, publicId, systemId)   677    678 def createDocument(namespaceURI, localName, doctype):   679     return default_impl.createDocument(namespaceURI, localName, doctype)   680    681 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   682    683     """   684     Parse the given 'stream_or_string', where the supplied object can either be   685     a stream (such as a file or stream object), or a string (containing the   686     filename of a document). The optional parameters described below should be   687     provided as keyword arguments.   688    689     If the optional 'html' parameter is set to a true value, the content to be   690     parsed will be treated as being HTML rather than XML. If the optional   691     'htmlencoding' is specified, HTML parsing will be performed with the   692     document encoding assumed to that specified.   693    694     If the optional 'unfinished' parameter is set to a true value, unfinished   695     documents will be parsed, even though such documents may be missing content   696     such as closing tags.   697    698     If the optional 'validate' parameter is set to a true value, an attempt will   699     be made to validate the parsed document.   700    701     If the optional 'remote' parameter is set to a true value, references to   702     remote documents (such as DTDs) will be followed in order to obtain such   703     documents.   704    705     A document object is returned by this function.   706     """   707    708     impl = impl or default_impl   709    710     if hasattr(stream_or_string, "read"):   711         stream = stream_or_string   712         return parseString(stream.read(), html=html, htmlencoding=htmlencoding,   713             unfinished=unfinished, validate=validate, remote=remote, impl=impl)   714     else:   715         return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding,   716             unfinished=unfinished, validate=validate, remote=remote, impl=impl)   717    718 def parseFile(filename, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   719    720     """   721     Parse the file having the given 'filename'. The optional parameters   722     described below should be provided as keyword arguments.   723    724     If the optional 'html' parameter is set to a true value, the content to be   725     parsed will be treated as being HTML rather than XML. If the optional   726     'htmlencoding' is specified, HTML parsing will be performed with the   727     document encoding assumed to that specified.   728    729     If the optional 'unfinished' parameter is set to a true value, unfinished   730     documents will be parsed, even though such documents may be missing content   731     such as closing tags.   732    733     If the optional 'validate' parameter is set to a true value, an attempt will   734     be made to validate the parsed document.   735    736     If the optional 'remote' parameter is set to a true value, references to   737     remote documents (such as DTDs) will be followed in order to obtain such   738     documents.   739    740     A document object is returned by this function.   741     """   742    743     impl = impl or default_impl   744     return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding,   745         unfinished=unfinished, validate=validate, remote=remote))   746    747 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   748    749     """   750     Parse the content of the given string 's'. The optional parameters described   751     below should be provided as keyword arguments.   752    753     If the optional 'html' parameter is set to a true value, the content to be   754     parsed will be treated as being HTML rather than XML. If the optional   755     'htmlencoding' is specified, HTML parsing will be performed with the   756     document encoding assumed to that specified.   757    758     If the optional 'unfinished' parameter is set to a true value, unfinished   759     documents will be parsed, even though such documents may be missing content   760     such as closing tags.   761    762     If the optional 'validate' parameter is set to a true value, an attempt will   763     be made to validate the parsed document.   764    765     If the optional 'remote' parameter is set to a true value, references to   766     remote documents (such as DTDs) will be followed in order to obtain such   767     documents.   768    769     A document object is returned by this function.   770     """   771    772     impl = impl or default_impl   773     return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding,   774         unfinished=unfinished, validate=validate, remote=remote))   775    776 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   777    778     """   779     Parse the content found at the given 'uri'. The optional parameters   780     described below should be provided as keyword arguments.   781    782     If the optional 'html' parameter is set to a true value, the content to be   783     parsed will be treated as being HTML rather than XML. If the optional   784     'htmlencoding' is specified, HTML parsing will be performed with the   785     document encoding assumed to that specified.   786    787     If the optional 'unfinished' parameter is set to a true value, unfinished   788     documents will be parsed, even though such documents may be missing content   789     such as closing tags.   790    791     If the optional 'validate' parameter is set to a true value, an attempt will   792     be made to validate the parsed document.   793    794     If the optional 'remote' parameter is set to a true value, references to   795     remote documents (such as DTDs) will be followed in order to obtain such   796     documents.   797    798     XML documents are retrieved using libxml2's own network capabilities; HTML   799     documents are retrieved using the urllib module provided by Python. To   800     retrieve either kind of document using Python's own modules for this purpose   801     (such as urllib), open a stream and pass it to the parse function:   802    803     f = urllib.urlopen(uri)   804     try:   805         doc = libxml2dom.parse(f, html)   806     finally:   807         f.close()   808    809     A document object is returned by this function.   810     """   811    812     if html:   813         f = urllib.urlopen(uri)   814         try:   815             return parse(f, html=html, htmlencoding=htmlencoding, unfinished=unfinished,   816                 validate=validate, remote=remote, impl=impl)   817         finally:   818             f.close()   819     else:   820         impl = impl or default_impl   821         return impl.adoptDocument(Node_parseURI(uri, html=html, htmlencoding=htmlencoding,   822             unfinished=unfinished, validate=validate, remote=remote))   823    824 def toString(node, encoding=None, prettyprint=0):   825    826     """   827     Return a string containing the serialised form of the given 'node' and its   828     children. The optional 'encoding' can be used to override the default   829     character encoding used in the serialisation. The optional 'prettyprint'   830     indicates whether the serialised form is prettyprinted or not (the default   831     setting).   832     """   833    834     return Node_toString(node.as_native_node(), encoding, prettyprint)   835    836 def toStream(node, stream, encoding=None, prettyprint=0):   837    838     """   839     Write the serialised form of the given 'node' and its children to the given   840     'stream'. The optional 'encoding' can be used to override the default   841     character encoding used in the serialisation. The optional 'prettyprint'   842     indicates whether the serialised form is prettyprinted or not (the default   843     setting).   844     """   845    846     Node_toStream(node.as_native_node(), stream, encoding, prettyprint)   847    848 def toFile(node, filename, encoding=None, prettyprint=0):   849    850     """   851     Write the serialised form of the given 'node' and its children to a file   852     having the given 'filename'. The optional 'encoding' can be used to override   853     the default character encoding used in the serialisation. The optional   854     'prettyprint' indicates whether the serialised form is prettyprinted or not   855     (the default setting).   856     """   857    858     Node_toFile(node.as_native_node(), filename, encoding, prettyprint)   859    860 def adoptNodes(nodes, impl=None):   861    862     """   863     A special utility method which adopts the given low-level 'nodes' and which   864     returns a list of high-level equivalents. This is currently experimental and   865     should not be casually used.   866     """   867    868     impl = impl or default_impl   869    870     if len(nodes) == 0:   871         return []   872     doc = impl.adoptDocument(libxml2mod.doc(nodes[0]))   873     results = []   874     for node in nodes:   875         results.append(Node(node, impl, doc))   876     return results   877    878 def getDOMImplementation():   879    880     "Return the default DOM implementation."   881    882     return default_impl   883    884 # Single instance of the implementation.   885    886 default_impl = Implementation()   887    888 # vim: tabstop=4 expandtab shiftwidth=4
libxml2dom

libxml2dom/__init__.py

libxml2dom/init.py