libxml2dom (file libxml2dom/__init_

     1 #!/usr/bin/env python     2      3 """     4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module.     5      6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU Lesser General Public License as published by the Free    10 Software Foundation; either version 3 of the License, or (at your option) any    11 later version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more    16 details.    17     18 You should have received a copy of the GNU Lesser General Public License along    19 with this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 __version__ = "0.4.7"    23     24 from libxml2dom.macrolib import *    25 from libxml2dom.macrolib import \    26     createDocument as Node_createDocument, \    27     parseString as Node_parseString, parseURI as Node_parseURI, \    28     parseFile as Node_parseFile, \    29     toString as Node_toString, toStream as Node_toStream, \    30     toFile as Node_toFile    31 import urllib # for parseURI in HTML mode    32 import libxml2dom.errors    33     34 # Standard namespaces.    35     36 XML_NAMESPACE = xml.dom.XML_NAMESPACE    37     38 # Default namespace bindings for XPath.    39     40 default_ns = {    41     "xml" : XML_NAMESPACE    42     }    43     44 class Implementation(object):    45     46     "Contains an abstraction over the DOM implementation."    47     48     def createDocumentType(self, localName, publicId, systemId):    49         return DocumentType(localName, publicId, systemId)    50     51     def createDocument(self, namespaceURI, localName, doctype):    52         return Document(Node_createDocument(namespaceURI, localName, doctype), self)    53     54     # Wrapping of documents.    55     56     def adoptDocument(self, node):    57         return Document(node, self)    58     59     # Factory functions.    60     61     def get_node(self, _node, context_node):    62     63         # Return the existing document.    64     65         if Node_nodeType(_node) == context_node.DOCUMENT_NODE:    66             return context_node.ownerDocument    67     68         # Return an attribute using the parent of the attribute as the owner    69         # element.    70     71         elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE:    72             return Attribute(_node, self, context_node.ownerDocument,    73                 self.get_node(Node_parentNode(_node), context_node))    74     75         # Return other nodes.    76     77         else:    78             return Node(_node, self, context_node.ownerDocument)    79     80     def get_node_or_none(self, _node, context_node):    81         if _node is None:    82             return None    83         else:    84             return self.get_node(_node, context_node)    85     86 # Attribute and node list wrappers.    87     88 class NamedNodeMap(object):    89     90     """    91     A wrapper around Node objects providing DOM and dictionary convenience    92     methods.    93     """    94     95     def __init__(self, node, impl):    96         self.node = node    97         self.impl = impl    98     99     def getNamedItem(self, name):   100         return self.node.getAttributeNode(name)   101    102     def getNamedItemNS(self, ns, localName):   103         return self.node.getAttributeNodeNS(ns, localName)   104    105     def setNamedItem(self, node):   106         try:   107             old = self.getNamedItem(node.nodeName)   108         except KeyError:   109             old = None   110         self.node.setAttributeNode(node)   111         return old   112    113     def setNamedItemNS(self, node):   114         try:   115             old = self.getNamedItemNS(node.namespaceURI, node.localName)   116         except KeyError:   117             old = None   118         self.node.setAttributeNodeNS(node)   119         return old   120    121     def removeNamedItem(self, name):   122         try:   123             old = self.getNamedItem(name)   124         except KeyError:   125             old = None   126         self.node.removeAttribute(name)   127         return old   128    129     def removeNamedItemNS(self, ns, localName):   130         try:   131             old = self.getNamedItemNS(ns, localName)   132         except KeyError:   133             old = None   134         self.node.removeAttributeNS(ns, localName)   135         return old   136    137     # Iterator emulation.   138    139     def __iter__(self):   140         return NamedNodeMapIterator(self)   141    142     # Dictionary emulation methods.   143    144     def __getitem__(self, name):   145         return self.getNamedItem(name)   146    147     def __setitem__(self, name, node):   148         if name == node.nodeName:   149             self.setNamedItem(node)   150         else:   151             raise KeyError, name   152    153     def __delitem__(self, name):   154         # NOTE: To be implemented.   155         pass   156    157     def values(self):   158         return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()]   159    160     def keys(self):   161         return [(attr.namespaceURI, attr.localName) for attr in self.values()]   162    163     def items(self):   164         return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()]   165    166     def __repr__(self):   167         return str(self)   168    169     def __str__(self):   170         return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()])   171    172     def _length(self):   173         return len(self.values())   174    175     length = property(_length)   176    177 class NamedNodeMapIterator(object):   178    179     "An iterator over a NamedNodeMap."   180    181     def __init__(self, nodemap):   182         self.nodemap = nodemap   183         self.items = self.nodemap.items()   184    185     def next(self):   186         if self.items:   187             current = self.items[0][1]   188             self.items = self.items[1:]   189             return current   190         else:   191             raise StopIteration   192    193 class NodeList(list):   194    195     "A wrapper around node lists."   196    197     def item(self, index):   198         return self[index]   199    200     def _length(self):   201         return len(self)   202    203     length = property(_length)   204    205 # Node classes.   206    207 class Node(object):   208    209     """   210     A DOM-style wrapper around libxml2mod objects.   211     """   212    213     ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE   214     COMMENT_NODE = xml.dom.Node.COMMENT_NODE   215     DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE   216     DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE   217     ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE   218     ENTITY_NODE = xml.dom.Node.ENTITY_NODE   219     ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE   220     NOTATION_NODE = xml.dom.Node.NOTATION_NODE   221     PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE   222     TEXT_NODE = xml.dom.Node.TEXT_NODE   223    224     def __init__(self, node, impl=None, ownerDocument=None):   225         self._node = node   226         self.impl = impl or default_impl   227         self.ownerDocument = ownerDocument   228    229     def as_native_node(self):   230         return self._node   231    232     def _nodeType(self):   233         return Node_nodeType(self._node)   234    235     def _childNodes(self):   236    237         # NOTE: Consider a generator instead.   238    239         return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)])   240    241     def _firstChild(self):   242         return (self.childNodes or [None])[0]   243    244     def _lastChild(self):   245         return (self.childNodes or [None])[-1]   246    247     def _attributes(self):   248         return NamedNodeMap(self, self.impl)   249    250     def _namespaceURI(self):   251         return Node_namespaceURI(self._node)   252    253     def _textContent(self):   254         return Node_textContent(self._node)   255    256     def _nodeValue(self):   257         if self.nodeType in null_value_node_types:   258             return None   259         return Node_nodeValue(self._node)   260    261     def _setNodeValue(self, value):   262         Node_setNodeValue(self._node, value)   263    264     def _prefix(self):   265         return Node_prefix(self._node)   266    267     def _nodeName(self):   268         return Node_nodeName(self._node)   269    270     def _tagName(self):   271         return Node_tagName(self._node)   272    273     def _localName(self):   274         return Node_localName(self._node)   275    276     def _parentNode(self):   277         return self.impl.get_node_or_none(Node_parentNode(self._node), self)   278    279     def _previousSibling(self):   280         return self.impl.get_node_or_none(Node_previousSibling(self._node), self)   281    282     def _nextSibling(self):   283         return self.impl.get_node_or_none(Node_nextSibling(self._node), self)   284    285     def _doctype(self):   286         _doctype = Node_doctype(self._node)   287         if _doctype is not None:   288             return self.impl.get_node(_doctype, self)   289         else:   290             return None   291    292     def _publicId(self):   293         # NOTE: To be fixed when the libxml2mod API has been figured out.   294         if self.nodeType != self.DOCUMENT_TYPE_NODE:   295             return None   296         declaration = self.toString()   297         return self._findId(declaration, "PUBLIC")   298    299     def _systemId(self):   300         # NOTE: To be fixed when the libxml2mod API has been figured out.   301         if self.nodeType != self.DOCUMENT_TYPE_NODE:   302             return None   303         declaration = self.toString()   304         if self._findId(declaration, "PUBLIC"):   305             return self._findIdValue(declaration, 0)   306         return self._findId(declaration, "SYSTEM")   307    308     # NOTE: To be removed when the libxml2mod API has been figured out.   309    310     def _findId(self, declaration, identifier):   311         i = declaration.find(identifier)   312         if i == -1:   313             return None   314         return self._findIdValue(declaration, i)   315    316     def _findIdValue(self, declaration, i):   317         q = declaration.find('"', i)   318         if q == -1:   319             return None   320         q2 = declaration.find('"', q + 1)   321         if q2 == -1:   322             return None   323         return declaration[q+1:q2]   324    325     def hasAttributeNS(self, ns, localName):   326         return Node_hasAttributeNS(self._node, ns, localName)   327    328     def hasAttribute(self, name):   329         return Node_hasAttribute(self._node, name)   330    331     def getAttributeNS(self, ns, localName):   332         return Node_getAttributeNS(self._node, ns, localName)   333    334     def getAttribute(self, name):   335         return Node_getAttribute(self._node, name)   336    337     def getAttributeNodeNS(self, ns, localName):   338         return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self)   339    340     def getAttributeNode(self, localName):   341         return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self)   342    343     def setAttributeNS(self, ns, name, value):   344         Node_setAttributeNS(self._node, ns, name, value)   345    346     def setAttribute(self, name, value):   347         Node_setAttribute(self._node, name, value)   348    349     def setAttributeNodeNS(self, node):   350         Node_setAttributeNodeNS(self._node, node._node)   351    352     def setAttributeNode(self, node):   353         Node_setAttributeNode(self._node, node._node)   354    355     def removeAttributeNS(self, ns, localName):   356         Node_removeAttributeNS(self._node, ns, localName)   357    358     def removeAttribute(self, name):   359         Node_removeAttribute(self._node, name)   360    361     def createElementNS(self, ns, name):   362         return self.impl.get_node(Node_createElementNS(self._node, ns, name), self)   363    364     def createElement(self, name):   365         return self.impl.get_node(Node_createElement(self._node, name), self)   366    367     def createAttributeNS(self, ns, name):   368         tmp = self.createElement("tmp")   369         return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name))   370    371     def createAttribute(self, name):   372         tmp = self.createElement("tmp")   373         return Attribute(Node_createAttribute(tmp._node, name), self.impl)   374    375     def createTextNode(self, value):   376         return self.impl.get_node(Node_createTextNode(self._node, value), self)   377    378     def createComment(self, value):   379         return self.impl.get_node(Node_createComment(self._node, value), self)   380    381     def createCDATASection(self, value):   382         return self.impl.get_node(Node_createCDATASection(self._node, value), self)   383    384     def importNode(self, node, deep):   385         if hasattr(node, "as_native_node"):   386             return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self)   387         else:   388             return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self)   389    390     def cloneNode(self, deep):   391         # This takes advantage of the ubiquity of importNode (in spite of the DOM specification).   392         return self.importNode(self, deep)   393    394     def insertBefore(self, tmp, oldNode):   395         if tmp.ownerDocument != self.ownerDocument:   396             raise xml.dom.WrongDocumentErr()   397         if oldNode.parentNode != self:   398             raise xml.dom.NotFoundErr()   399         if hasattr(tmp, "as_native_node"):   400             return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self)   401         else:   402             return self.impl.get_node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self)   403    404     def replaceChild(self, tmp, oldNode):   405         if tmp.ownerDocument != self.ownerDocument:   406             raise xml.dom.WrongDocumentErr()   407         if oldNode.parentNode != self:   408             raise xml.dom.NotFoundErr()   409         if hasattr(tmp, "as_native_node"):   410             return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self)   411         else:   412             return self.impl.get_node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self)   413    414     def appendChild(self, tmp):   415         if tmp.ownerDocument != self.ownerDocument:   416             raise xml.dom.WrongDocumentErr()   417         if hasattr(tmp, "as_native_node"):   418             return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self)   419         else:   420             return self.impl.get_node(Node_appendChild(self._node, tmp), self)   421    422     def removeChild(self, tmp):   423         if hasattr(tmp, "as_native_node"):   424             Node_removeChild(self._node, tmp.as_native_node())   425         else:   426             Node_removeChild(self._node, tmp)   427         return tmp   428    429     def getElementById(self, identifier):   430         _node = Node_getElementById(self.ownerDocument.as_native_node(), identifier)   431         if _node is None:   432             return None   433         else:   434             return self.impl.get_node(_node, self)   435    436     def getElementsByTagName(self, tagName):   437         return self.xpath(".//" + tagName)   438    439     def getElementsByTagNameNS(self, namespaceURI, localName):   440         return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI})   441    442     def normalize(self):   443         text_nodes = []   444         for node in self.childNodes:   445             if node.nodeType == node.TEXT_NODE:   446                 text_nodes.append(node)   447             elif len(text_nodes) != 0:   448                 self._normalize(text_nodes)   449                 text_nodes = []   450         if len(text_nodes) != 0:   451             self._normalize(text_nodes)   452    453     def _normalize(self, text_nodes):   454         texts = []   455         for text_node in text_nodes[:-1]:   456             texts.append(text_node.nodeValue)   457             self.removeChild(text_node)   458         texts.append(text_nodes[-1].nodeValue)   459         self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1])   460    461     childNodes = property(_childNodes)   462     firstChild = property(_firstChild)   463     lastChild = property(_lastChild)   464     value = data = nodeValue = property(_nodeValue, _setNodeValue)   465     textContent = property(_textContent)   466     name = nodeName = property(_nodeName)   467     tagName = property(_tagName)   468     namespaceURI = property(_namespaceURI)   469     prefix = property(_prefix)   470     localName = property(_localName)   471     parentNode = property(_parentNode)   472     nodeType = property(_nodeType)   473     attributes = property(_attributes)   474     previousSibling = property(_previousSibling)   475     nextSibling = property(_nextSibling)   476     doctype = property(_doctype)   477     publicId = property(_publicId)   478     systemId = property(_systemId)   479    480     # NOTE: To be fixed - these being doctype-specific values.   481    482     entities = {}   483     notations = {}   484    485     def isSameNode(self, other):   486         return self == other   487    488     def __hash__(self):   489         return hash(self.localName)   490    491     def __eq__(self, other):   492         return isinstance(other, Node) and Node_equals(self._node, other._node)   493    494     def __ne__(self, other):   495         return not (self == other)   496    497     # 4DOM extensions to the usual PyXML API.   498     # NOTE: To be finished.   499    500     def xpath(self, expr, variables=None, namespaces=None):   501    502         """   503         Evaluate the given expression 'expr' using the optional 'variables' and   504         'namespaces' mappings.   505         """   506    507         ns = {}   508         ns.update(default_ns)   509         ns.update(namespaces or {})   510         result = Node_xpath(self._node, expr, variables, ns)   511         if isinstance(result, str):   512             return to_unicode(result)   513         elif hasattr(result, "__len__"):   514             return NodeList([self.impl.get_node(_node, self) for _node in result])   515         else:   516             return result   517    518     # Other extensions to the usual PyXML API.   519    520     def xinclude(self):   521    522         """   523         Process XInclude declarations within the document, returning the number   524         of substitutions performed (zero or more), raising an XIncludeException   525         otherwise.   526         """   527    528         return Node_xinclude(self._node)   529    530     # Convenience methods.   531    532     def toString(self, encoding=None, prettyprint=0):   533         return toString(self, encoding, prettyprint)   534    535     def toStream(self, stream, encoding=None, prettyprint=0):   536         toStream(self, stream, encoding, prettyprint)   537    538     def toFile(self, f, encoding=None, prettyprint=0):   539         toFile(self, f, encoding, prettyprint)   540    541 # Attribute nodes.   542    543 class Attribute(Node):   544    545     "A class providing attribute access."   546    547     def __init__(self, node, impl, ownerDocument=None, ownerElement=None):   548         Node.__init__(self, node, impl, ownerDocument)   549         self.ownerElement = ownerElement   550    551     def _parentNode(self):   552         return self.ownerElement   553    554     parentNode = property(_parentNode)   555    556 # Document housekeeping mechanisms.   557    558 class _Document:   559    560     """   561     An abstract class providing document-level housekeeping and distinct   562     functionality. Configuration of the document is also supported.   563     See: http://www.w3.org/TR/DOM-Level-3-Core/core.html#DOMConfiguration   564     """   565    566     # Constants from    567     # See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-NodeEditVAL   568    569     VAL_TRUE = 5   570     VAL_FALSE = 6   571     VAL_UNKNOWN = 7   572    573     def __init__(self, node, impl):   574         self._node = node   575         self.implementation = self.impl = impl   576         self.error_handler = libxml2dom.errors.DOMErrorHandler()   577    578     # Standard DOM properties and their implementations.   579    580     def _documentElement(self):   581         return self.xpath("*")[0]   582    583     def _ownerDocument(self):   584         return self   585    586     def __del__(self):   587         #print "Freeing document", self._node   588         libxml2mod.xmlFreeDoc(self._node)   589    590     documentElement = property(_documentElement)   591     ownerDocument = property(_ownerDocument)   592    593     # DOM Level 3 Core DOMConfiguration methods.   594    595     def setParameter(self, name, value):   596         if name == "error-handler":   597             raise xml.dom.NotSupportedErr()   598         raise xml.dom.NotFoundErr()   599    600     def getParameter(self, name):   601         if name == "error-handler":   602             return self.error_handler   603         raise xml.dom.NotFoundErr()   604    605     def canSetParameter(self, name, value):   606         return 0   607    608     def _parameterNames(self):   609         return []   610    611     # Extensions to the usual PyXML API.   612    613     def validate(self, doc):   614    615         """   616         Validate the document against the given schema document, 'doc'.   617         """   618    619         if hasattr(doc, "as_native_node"):   620             _schema = Document_schema(doc.as_native_node())   621         else:   622             _schema = Document_schemaFromString(doc.toString())   623         try:   624             self.error_handler.reset()   625             return Document_validate(_schema, self._node, self.error_handler)   626         finally:   627             Schema_free(_schema)   628    629     # DOM Level 3 Validation methods.   630    631     def validateDocument(self, doc):   632    633         """   634         Validate the document against the given schema document, 'doc'.   635         See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-DocumentEditVAL-validateDocument   636         """   637    638         return self.validate(doc) and self.VAL_TRUE or self.VAL_FALSE   639    640 class Document(_Document, Node):   641    642     """   643     A generic document class. Specialised document classes should inherit from   644     the _Document class and their own variation of Node.   645     """   646    647     pass   648    649 class DocumentType(object):   650    651     "A class providing a container for document type information."   652    653     def __init__(self, localName, publicId, systemId):   654         self.name = self.localName = localName   655         self.publicId = publicId   656         self.systemId = systemId   657    658         # NOTE: Nothing is currently provided to support the following   659         # NOTE: attributes.   660    661         self.entities = {}   662         self.notations = {}   663    664 # Constants.   665    666 null_value_node_types = [   667     Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE,   668     Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE   669     ]   670    671 # Utility functions.   672    673 def createDocumentType(localName, publicId, systemId):   674     return default_impl.createDocumentType(localName, publicId, systemId)   675    676 def createDocument(namespaceURI, localName, doctype):   677     return default_impl.createDocument(namespaceURI, localName, doctype)   678    679 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   680    681     """   682     Parse the given 'stream_or_string', where the supplied object can either be   683     a stream (such as a file or stream object), or a string (containing the   684     filename of a document). The optional parameters described below should be   685     provided as keyword arguments.   686    687     If the optional 'html' parameter is set to a true value, the content to be   688     parsed will be treated as being HTML rather than XML. If the optional   689     'htmlencoding' is specified, HTML parsing will be performed with the   690     document encoding assumed to that specified.   691    692     If the optional 'unfinished' parameter is set to a true value, unfinished   693     documents will be parsed, even though such documents may be missing content   694     such as closing tags.   695    696     If the optional 'validate' parameter is set to a true value, an attempt will   697     be made to validate the parsed document.   698    699     If the optional 'remote' parameter is set to a true value, references to   700     remote documents (such as DTDs) will be followed in order to obtain such   701     documents.   702    703     A document object is returned by this function.   704     """   705    706     impl = impl or default_impl   707    708     if hasattr(stream_or_string, "read"):   709         stream = stream_or_string   710         return parseString(stream.read(), html=html, htmlencoding=htmlencoding,   711             unfinished=unfinished, validate=validate, remote=remote, impl=impl)   712     else:   713         return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding,   714             unfinished=unfinished, validate=validate, remote=remote, impl=impl)   715    716 def parseFile(filename, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   717    718     """   719     Parse the file having the given 'filename'. The optional parameters   720     described below should be provided as keyword arguments.   721    722     If the optional 'html' parameter is set to a true value, the content to be   723     parsed will be treated as being HTML rather than XML. If the optional   724     'htmlencoding' is specified, HTML parsing will be performed with the   725     document encoding assumed to that specified.   726    727     If the optional 'unfinished' parameter is set to a true value, unfinished   728     documents will be parsed, even though such documents may be missing content   729     such as closing tags.   730    731     If the optional 'validate' parameter is set to a true value, an attempt will   732     be made to validate the parsed document.   733    734     If the optional 'remote' parameter is set to a true value, references to   735     remote documents (such as DTDs) will be followed in order to obtain such   736     documents.   737    738     A document object is returned by this function.   739     """   740    741     impl = impl or default_impl   742     return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding,   743         unfinished=unfinished, validate=validate, remote=remote))   744    745 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   746    747     """   748     Parse the content of the given string 's'. The optional parameters described   749     below should be provided as keyword arguments.   750    751     If the optional 'html' parameter is set to a true value, the content to be   752     parsed will be treated as being HTML rather than XML. If the optional   753     'htmlencoding' is specified, HTML parsing will be performed with the   754     document encoding assumed to that specified.   755    756     If the optional 'unfinished' parameter is set to a true value, unfinished   757     documents will be parsed, even though such documents may be missing content   758     such as closing tags.   759    760     If the optional 'validate' parameter is set to a true value, an attempt will   761     be made to validate the parsed document.   762    763     If the optional 'remote' parameter is set to a true value, references to   764     remote documents (such as DTDs) will be followed in order to obtain such   765     documents.   766    767     A document object is returned by this function.   768     """   769    770     impl = impl or default_impl   771     return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding,   772         unfinished=unfinished, validate=validate, remote=remote))   773    774 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   775    776     """   777     Parse the content found at the given 'uri'. The optional parameters   778     described below should be provided as keyword arguments.   779    780     If the optional 'html' parameter is set to a true value, the content to be   781     parsed will be treated as being HTML rather than XML. If the optional   782     'htmlencoding' is specified, HTML parsing will be performed with the   783     document encoding assumed to that specified.   784    785     If the optional 'unfinished' parameter is set to a true value, unfinished   786     documents will be parsed, even though such documents may be missing content   787     such as closing tags.   788    789     If the optional 'validate' parameter is set to a true value, an attempt will   790     be made to validate the parsed document.   791    792     If the optional 'remote' parameter is set to a true value, references to   793     remote documents (such as DTDs) will be followed in order to obtain such   794     documents.   795    796     XML documents are retrieved using libxml2's own network capabilities; HTML   797     documents are retrieved using the urllib module provided by Python. To   798     retrieve either kind of document using Python's own modules for this purpose   799     (such as urllib), open a stream and pass it to the parse function:   800    801     f = urllib.urlopen(uri)   802     try:   803         doc = libxml2dom.parse(f, html)   804     finally:   805         f.close()   806    807     A document object is returned by this function.   808     """   809    810     if html:   811         f = urllib.urlopen(uri)   812         try:   813             return parse(f, html=html, htmlencoding=htmlencoding, unfinished=unfinished,   814                 validate=validate, remote=remote, impl=impl)   815         finally:   816             f.close()   817     else:   818         impl = impl or default_impl   819         return impl.adoptDocument(Node_parseURI(uri, html=html, htmlencoding=htmlencoding,   820             unfinished=unfinished, validate=validate, remote=remote))   821    822 def toString(node, encoding=None, prettyprint=0):   823    824     """   825     Return a string containing the serialised form of the given 'node' and its   826     children. The optional 'encoding' can be used to override the default   827     character encoding used in the serialisation. The optional 'prettyprint'   828     indicates whether the serialised form is prettyprinted or not (the default   829     setting).   830     """   831    832     return Node_toString(node.as_native_node(), encoding, prettyprint)   833    834 def toStream(node, stream, encoding=None, prettyprint=0):   835    836     """   837     Write the serialised form of the given 'node' and its children to the given   838     'stream'. The optional 'encoding' can be used to override the default   839     character encoding used in the serialisation. The optional 'prettyprint'   840     indicates whether the serialised form is prettyprinted or not (the default   841     setting).   842     """   843    844     Node_toStream(node.as_native_node(), stream, encoding, prettyprint)   845    846 def toFile(node, filename, encoding=None, prettyprint=0):   847    848     """   849     Write the serialised form of the given 'node' and its children to a file   850     having the given 'filename'. The optional 'encoding' can be used to override   851     the default character encoding used in the serialisation. The optional   852     'prettyprint' indicates whether the serialised form is prettyprinted or not   853     (the default setting).   854     """   855    856     Node_toFile(node.as_native_node(), filename, encoding, prettyprint)   857    858 def adoptNodes(nodes, impl=None):   859    860     """   861     A special utility method which adopts the given low-level 'nodes' and which   862     returns a list of high-level equivalents. This is currently experimental and   863     should not be casually used.   864     """   865    866     impl = impl or default_impl   867    868     if len(nodes) == 0:   869         return []   870     doc = impl.adoptDocument(libxml2mod.doc(nodes[0]))   871     results = []   872     for node in nodes:   873         results.append(Node(node, impl, doc))   874     return results   875    876 def getDOMImplementation():   877    878     "Return the default DOM implementation."   879    880     return default_impl   881    882 # Single instance of the implementation.   883    884 default_impl = Implementation()   885    886 # vim: tabstop=4 expandtab shiftwidth=4
libxml2dom

libxml2dom/__init__.py

libxml2dom/init.py