libxml2dom (file libxml2dom/__init_

     1 #!/usr/bin/env python     2      3 """     4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module.     5      6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU Lesser General Public License as published by the Free    10 Software Foundation; either version 3 of the License, or (at your option) any    11 later version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more    16 details.    17     18 You should have received a copy of the GNU Lesser General Public License along    19 with this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 __version__ = "0.5"    23     24 from libxml2dom.macrolib import *    25 from libxml2dom.macrolib import \    26     createDocument as Node_createDocument, \    27     parseString as Node_parseString, parseURI as Node_parseURI, \    28     parseFile as Node_parseFile, \    29     toString as Node_toString, toStream as Node_toStream, \    30     toFile as Node_toFile    31 import urllib # for parseURI in HTML mode    32 import libxml2dom.errors    33     34 # Standard namespaces.    35     36 XML_NAMESPACE = xml.dom.XML_NAMESPACE    37     38 # Default namespace bindings for XPath.    39     40 default_ns = {    41     "xml" : XML_NAMESPACE    42     }    43     44 class Implementation(object):    45     46     "Contains an abstraction over the DOM implementation."    47     48     def createDocumentType(self, localName, publicId, systemId):    49         return DocumentType(localName, publicId, systemId)    50     51     def createDocument(self, namespaceURI, localName, doctype):    52         return Document(Node_createDocument(namespaceURI, localName, doctype), self)    53     54     # Wrapping of documents.    55     56     def adoptDocument(self, node):    57         return Document(node, self)    58     59     # Factory functions.    60     61     def get_node(self, _node, context_node):    62     63         # Return the existing document.    64     65         if Node_nodeType(_node) == context_node.DOCUMENT_NODE:    66             return context_node.ownerDocument    67     68         # Return an attribute using the parent of the attribute as the owner    69         # element.    70     71         elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE:    72             return Attribute(_node, self, context_node.ownerDocument,    73                 self.get_node(Node_parentNode(_node), context_node))    74     75         # Return other nodes.    76     77         else:    78             return Node(_node, self, context_node.ownerDocument)    79     80     def get_node_or_none(self, _node, context_node):    81         if _node is None:    82             return None    83         else:    84             return self.get_node(_node, context_node)    85     86 # Attribute and node list wrappers.    87     88 class NamedNodeMap(object):    89     90     """    91     A wrapper around Node objects providing DOM and dictionary convenience    92     methods.    93     """    94     95     def __init__(self, node, impl):    96         self.node = node    97         self.impl = impl    98     99     def getNamedItem(self, name):   100         return self.node.getAttributeNode(name)   101    102     def getNamedItemNS(self, ns, localName):   103         return self.node.getAttributeNodeNS(ns, localName)   104    105     def setNamedItem(self, node):   106         try:   107             old = self.getNamedItem(node.nodeName)   108         except KeyError:   109             old = None   110         self.node.setAttributeNode(node)   111         return old   112    113     def setNamedItemNS(self, node):   114         try:   115             old = self.getNamedItemNS(node.namespaceURI, node.localName)   116         except KeyError:   117             old = None   118         self.node.setAttributeNodeNS(node)   119         return old   120    121     def removeNamedItem(self, name):   122         try:   123             old = self.getNamedItem(name)   124         except KeyError:   125             old = None   126         self.node.removeAttribute(name)   127         return old   128    129     def removeNamedItemNS(self, ns, localName):   130         try:   131             old = self.getNamedItemNS(ns, localName)   132         except KeyError:   133             old = None   134         self.node.removeAttributeNS(ns, localName)   135         return old   136    137     # Iterator emulation.   138    139     def __iter__(self):   140         return NamedNodeMapIterator(self)   141    142     # Dictionary emulation methods.   143    144     def __getitem__(self, name):   145         return self.getNamedItem(name)   146    147     def __setitem__(self, name, node):   148         if name == node.nodeName:   149             self.setNamedItem(node)   150         else:   151             raise KeyError, name   152    153     def __delitem__(self, name):   154         # NOTE: To be implemented.   155         pass   156    157     def values(self):   158         return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()]   159    160     def keys(self):   161         return [(attr.namespaceURI, attr.localName) for attr in self.values()]   162    163     def items(self):   164         return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()]   165    166     def __repr__(self):   167         return str(self)   168    169     def __str__(self):   170         return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()])   171    172     def _length(self):   173         return len(self.values())   174    175     length = property(_length)   176    177 class NamedNodeMapIterator(object):   178    179     "An iterator over a NamedNodeMap."   180    181     def __init__(self, nodemap):   182         self.nodemap = nodemap   183         self.items = self.nodemap.items()   184    185     def next(self):   186         if self.items:   187             current = self.items[0][1]   188             self.items = self.items[1:]   189             return current   190         else:   191             raise StopIteration   192    193 class NodeList(list):   194    195     "A wrapper around node lists."   196    197     def item(self, index):   198         return self[index]   199    200     def _length(self):   201         return len(self)   202    203     length = property(_length)   204    205 # Node classes.   206    207 class Node(object):   208    209     """   210     A DOM-style wrapper around libxml2mod objects.   211     """   212    213     ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE   214     COMMENT_NODE = xml.dom.Node.COMMENT_NODE   215     DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE   216     DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE   217     ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE   218     ENTITY_NODE = xml.dom.Node.ENTITY_NODE   219     ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE   220     NOTATION_NODE = xml.dom.Node.NOTATION_NODE   221     PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE   222     TEXT_NODE = xml.dom.Node.TEXT_NODE   223    224     def __init__(self, node, impl=None, ownerDocument=None):   225         self._node = node   226         self.impl = impl or default_impl   227         self.ownerDocument = ownerDocument   228    229     def as_native_node(self):   230         return self._node   231    232     def _nodeType(self):   233         return Node_nodeType(self._node)   234    235     def _childNodes(self):   236    237         # NOTE: Consider a generator instead.   238    239         return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)])   240    241     def _firstChild(self):   242         return (self.childNodes or [None])[0]   243    244     def _lastChild(self):   245         return (self.childNodes or [None])[-1]   246    247     def _attributes(self):   248         return NamedNodeMap(self, self.impl)   249    250     def _namespaceURI(self):   251         return Node_namespaceURI(self._node)   252    253     def _textContent(self):   254         return Node_textContent(self._node)   255    256     def _nodeValue(self):   257         if self.nodeType in null_value_node_types:   258             return None   259         return Node_nodeValue(self._node)   260    261     def _setNodeValue(self, value):   262         Node_setNodeValue(self._node, value)   263    264     def _prefix(self):   265         return Node_prefix(self._node)   266    267     def _nodeName(self):   268         return Node_nodeName(self._node)   269    270     def _tagName(self):   271         return Node_tagName(self._node)   272    273     def _localName(self):   274         return Node_localName(self._node)   275    276     def _parentNode(self):   277         return self.impl.get_node_or_none(Node_parentNode(self._node), self)   278    279     def _previousSibling(self):   280         return self.impl.get_node_or_none(Node_previousSibling(self._node), self)   281    282     def _nextSibling(self):   283         return self.impl.get_node_or_none(Node_nextSibling(self._node), self)   284    285     def _doctype(self):   286         _doctype = Node_doctype(self._node)   287         if _doctype is not None:   288             return self.impl.get_node(_doctype, self)   289         else:   290             return None   291    292     def _publicId(self):   293         # NOTE: To be fixed when the libxml2mod API has been figured out.   294         if self.nodeType != self.DOCUMENT_TYPE_NODE:   295             return None   296         declaration = self.toString()   297         return self._findId(declaration, "PUBLIC")   298    299     def _systemId(self):   300         # NOTE: To be fixed when the libxml2mod API has been figured out.   301         if self.nodeType != self.DOCUMENT_TYPE_NODE:   302             return None   303         declaration = self.toString()   304         if self._findId(declaration, "PUBLIC"):   305             return self._findIdValue(declaration, 0)   306         return self._findId(declaration, "SYSTEM")   307    308     # NOTE: To be removed when the libxml2mod API has been figured out.   309    310     def _findId(self, declaration, identifier):   311         i = declaration.find(identifier)   312         if i == -1:   313             return None   314         return self._findIdValue(declaration, i)   315    316     def _findIdValue(self, declaration, i):   317         q = declaration.find('"', i)   318         if q == -1:   319             return None   320         q2 = declaration.find('"', q + 1)   321         if q2 == -1:   322             return None   323         return declaration[q+1:q2]   324    325     def hasAttributeNS(self, ns, localName):   326         return Node_hasAttributeNS(self._node, ns, localName)   327    328     def hasAttribute(self, name):   329         return Node_hasAttribute(self._node, name)   330    331     def getAttributeNS(self, ns, localName):   332         return Node_getAttributeNS(self._node, ns, localName)   333    334     def getAttribute(self, name):   335         return Node_getAttribute(self._node, name)   336    337     def getAttributeNodeNS(self, ns, localName):   338         return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self)   339    340     def getAttributeNode(self, localName):   341         return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self)   342    343     def setAttributeNS(self, ns, name, value):   344         Node_setAttributeNS(self._node, ns, name, value)   345    346     def setAttribute(self, name, value):   347         Node_setAttribute(self._node, name, value)   348    349     def setAttributeNodeNS(self, node):   350         Node_setAttributeNodeNS(self._node, node._node)   351    352     def setAttributeNode(self, node):   353         Node_setAttributeNode(self._node, node._node)   354    355     def removeAttributeNS(self, ns, localName):   356         Node_removeAttributeNS(self._node, ns, localName)   357    358     def removeAttribute(self, name):   359         Node_removeAttribute(self._node, name)   360    361     def createElementNS(self, ns, name):   362         return self.impl.get_node(Node_createElementNS(self._node, ns, name), self)   363    364     def createElement(self, name):   365         return self.impl.get_node(Node_createElement(self._node, name), self)   366    367     def createAttributeNS(self, ns, name):   368         tmp = self.createElement("tmp")   369         return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name))   370    371     def createAttribute(self, name):   372         tmp = self.createElement("tmp")   373         return Attribute(Node_createAttribute(tmp._node, name), self.impl)   374    375     def createTextNode(self, value):   376         return self.impl.get_node(Node_createTextNode(self._node, value), self)   377    378     def createComment(self, value):   379         return self.impl.get_node(Node_createComment(self._node, value), self)   380    381     def createCDATASection(self, value):   382         return self.impl.get_node(Node_createCDATASection(self._node, value), self)   383    384     def importNode(self, node, deep):   385         if hasattr(node, "as_native_node"):   386             return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self)   387         else:   388             return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self)   389    390     def cloneNode(self, deep):   391         # This takes advantage of the ubiquity of importNode (in spite of the DOM specification).   392         return self.importNode(self, deep)   393    394     def insertBefore(self, tmp, oldNode):   395         if tmp.ownerDocument != self.ownerDocument:   396             raise xml.dom.WrongDocumentErr()   397         if oldNode.parentNode != self:   398             raise xml.dom.NotFoundErr()   399    400         # Nodes must be from this implementation before insertion.   401    402         if not hasattr(tmp, "as_native_node"):   403             raise xml.dom.WrongDocumentErr()   404    405         return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self)   406    407     def replaceChild(self, tmp, oldNode):   408         if tmp.ownerDocument != self.ownerDocument:   409             raise xml.dom.WrongDocumentErr()   410         if oldNode.parentNode != self:   411             raise xml.dom.NotFoundErr()   412    413         # Nodes must be from this implementation before insertion.   414    415         if not hasattr(tmp, "as_native_node"):   416             raise xml.dom.WrongDocumentErr()   417    418         return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self)   419    420     def appendChild(self, tmp):   421         if tmp.ownerDocument != self.ownerDocument:   422             raise xml.dom.WrongDocumentErr()   423    424         # Nodes must be from this implementation before insertion.   425    426         if not hasattr(tmp, "as_native_node"):   427             raise xml.dom.WrongDocumentErr()   428    429         return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self)   430    431     def removeChild(self, tmp):   432    433         # Nodes must be from this implementation in order to be removed.   434    435         if not hasattr(tmp, "as_native_node"):   436             raise xml.dom.WrongDocumentErr()   437    438         Node_removeChild(self._node, tmp.as_native_node())   439         return tmp   440    441     def getElementById(self, identifier):   442         _node = Node_getElementById(self.ownerDocument.as_native_node(), identifier)   443         if _node is None:   444             return None   445         else:   446             return self.impl.get_node(_node, self)   447    448     def getElementsByTagName(self, tagName):   449         return self.xpath(".//" + tagName)   450    451     def getElementsByTagNameNS(self, namespaceURI, localName):   452         return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI})   453    454     def normalize(self):   455         text_nodes = []   456         for node in self.childNodes:   457             if node.nodeType == node.TEXT_NODE:   458                 text_nodes.append(node)   459             elif len(text_nodes) != 0:   460                 self._normalize(text_nodes)   461                 text_nodes = []   462         if len(text_nodes) != 0:   463             self._normalize(text_nodes)   464    465     def _normalize(self, text_nodes):   466         texts = []   467         for text_node in text_nodes[:-1]:   468             texts.append(text_node.nodeValue)   469             self.removeChild(text_node)   470         texts.append(text_nodes[-1].nodeValue)   471         self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1])   472    473     childNodes = property(_childNodes)   474     firstChild = property(_firstChild)   475     lastChild = property(_lastChild)   476     value = data = nodeValue = property(_nodeValue, _setNodeValue)   477     textContent = property(_textContent)   478     name = nodeName = property(_nodeName)   479     tagName = property(_tagName)   480     namespaceURI = property(_namespaceURI)   481     prefix = property(_prefix)   482     localName = property(_localName)   483     parentNode = property(_parentNode)   484     nodeType = property(_nodeType)   485     attributes = property(_attributes)   486     previousSibling = property(_previousSibling)   487     nextSibling = property(_nextSibling)   488     doctype = property(_doctype)   489     publicId = property(_publicId)   490     systemId = property(_systemId)   491    492     # NOTE: To be fixed - these being doctype-specific values.   493    494     entities = {}   495     notations = {}   496    497     def isSameNode(self, other):   498         return self == other   499    500     def __hash__(self):   501         return hash(self.localName)   502    503     def __eq__(self, other):   504         return isinstance(other, Node) and Node_equals(self._node, other._node)   505    506     def __ne__(self, other):   507         return not (self == other)   508    509     # 4DOM extensions to the usual PyXML API.   510     # NOTE: To be finished.   511    512     def xpath(self, expr, variables=None, namespaces=None):   513    514         """   515         Evaluate the given expression 'expr' using the optional 'variables' and   516         'namespaces' mappings.   517         """   518    519         ns = {}   520         ns.update(default_ns)   521         ns.update(namespaces or {})   522         result = Node_xpath(self._node, expr, variables, ns)   523         if isinstance(result, str):   524             return to_unicode(result)   525         elif hasattr(result, "__len__"):   526             return NodeList([self.impl.get_node(_node, self) for _node in result])   527         else:   528             return result   529    530     # Other extensions to the usual PyXML API.   531    532     def xinclude(self):   533    534         """   535         Process XInclude declarations within the document, returning the number   536         of substitutions performed (zero or more), raising an XIncludeException   537         otherwise.   538         """   539    540         return Node_xinclude(self._node)   541    542     # Convenience methods.   543    544     def toString(self, encoding=None, prettyprint=0):   545         return toString(self, encoding, prettyprint)   546    547     def toStream(self, stream, encoding=None, prettyprint=0):   548         toStream(self, stream, encoding, prettyprint)   549    550     def toFile(self, f, encoding=None, prettyprint=0):   551         toFile(self, f, encoding, prettyprint)   552    553 # Attribute nodes.   554    555 class Attribute(Node):   556    557     "A class providing attribute access."   558    559     def __init__(self, node, impl, ownerDocument=None, ownerElement=None):   560         Node.__init__(self, node, impl, ownerDocument)   561         self.ownerElement = ownerElement   562    563     def _parentNode(self):   564         return self.ownerElement   565    566     parentNode = property(_parentNode)   567    568 # Document housekeeping mechanisms.   569    570 class _Document:   571    572     """   573     An abstract class providing document-level housekeeping and distinct   574     functionality. Configuration of the document is also supported.   575     See: http://www.w3.org/TR/DOM-Level-3-Core/core.html#DOMConfiguration   576     """   577    578     # Constants from    579     # See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-NodeEditVAL   580    581     VAL_TRUE = 5   582     VAL_FALSE = 6   583     VAL_UNKNOWN = 7   584    585     def __init__(self, node, impl):   586         self._node = node   587         self.implementation = self.impl = impl   588         self.error_handler = libxml2dom.errors.DOMErrorHandler()   589    590     # Standard DOM properties and their implementations.   591    592     def _documentElement(self):   593         return self.xpath("*")[0]   594    595     def _ownerDocument(self):   596         return self   597    598     def __del__(self):   599         #print "Freeing document", self._node   600         libxml2mod.xmlFreeDoc(self._node)   601    602     documentElement = property(_documentElement)   603     ownerDocument = property(_ownerDocument)   604    605     # DOM Level 3 Core DOMConfiguration methods.   606    607     def setParameter(self, name, value):   608         if name == "error-handler":   609             raise xml.dom.NotSupportedErr()   610         raise xml.dom.NotFoundErr()   611    612     def getParameter(self, name):   613         if name == "error-handler":   614             return self.error_handler   615         raise xml.dom.NotFoundErr()   616    617     def canSetParameter(self, name, value):   618         return 0   619    620     def _parameterNames(self):   621         return []   622    623     # Extensions to the usual PyXML API.   624    625     def validate(self, doc):   626    627         """   628         Validate the document against the given schema document, 'doc'.   629         """   630    631         validation_ns = doc.documentElement.namespaceURI   632    633         if hasattr(doc, "as_native_node"):   634             _schema = Document_schema(doc.as_native_node(), validation_ns)   635         else:   636             _schema = Document_schemaFromString(doc.toString(), validation_ns)   637         try:   638             self.error_handler.reset()   639             return Document_validate(_schema, self._node, self.error_handler, validation_ns)   640         finally:   641             Schema_free(_schema, validation_ns)   642    643     # DOM Level 3 Validation methods.   644    645     def validateDocument(self, doc):   646    647         """   648         Validate the document against the given schema document, 'doc'.   649         See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-DocumentEditVAL-validateDocument   650         """   651    652         return self.validate(doc) and self.VAL_TRUE or self.VAL_FALSE   653    654 class Document(_Document, Node):   655    656     """   657     A generic document class. Specialised document classes should inherit from   658     the _Document class and their own variation of Node.   659     """   660    661     pass   662    663 class DocumentType(object):   664    665     "A class providing a container for document type information."   666    667     def __init__(self, localName, publicId, systemId):   668         self.name = self.localName = localName   669         self.publicId = publicId   670         self.systemId = systemId   671    672         # NOTE: Nothing is currently provided to support the following   673         # NOTE: attributes.   674    675         self.entities = {}   676         self.notations = {}   677    678 # Constants.   679    680 null_value_node_types = [   681     Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE,   682     Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE   683     ]   684    685 # Utility functions.   686    687 def createDocumentType(localName, publicId, systemId):   688     return default_impl.createDocumentType(localName, publicId, systemId)   689    690 def createDocument(namespaceURI, localName, doctype):   691     return default_impl.createDocument(namespaceURI, localName, doctype)   692    693 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   694    695     """   696     Parse the given 'stream_or_string', where the supplied object can either be   697     a stream (such as a file or stream object), or a string (containing the   698     filename of a document). The optional parameters described below should be   699     provided as keyword arguments.   700    701     If the optional 'html' parameter is set to a true value, the content to be   702     parsed will be treated as being HTML rather than XML. If the optional   703     'htmlencoding' is specified, HTML parsing will be performed with the   704     document encoding assumed to that specified.   705    706     If the optional 'unfinished' parameter is set to a true value, unfinished   707     documents will be parsed, even though such documents may be missing content   708     such as closing tags.   709    710     If the optional 'validate' parameter is set to a true value, an attempt will   711     be made to validate the parsed document.   712    713     If the optional 'remote' parameter is set to a true value, references to   714     remote documents (such as DTDs) will be followed in order to obtain such   715     documents.   716    717     A document object is returned by this function.   718     """   719    720     impl = impl or default_impl   721    722     if hasattr(stream_or_string, "read"):   723         stream = stream_or_string   724         return parseString(stream.read(), html=html, htmlencoding=htmlencoding,   725             unfinished=unfinished, validate=validate, remote=remote, impl=impl)   726     else:   727         return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding,   728             unfinished=unfinished, validate=validate, remote=remote, impl=impl)   729    730 def parseFile(filename, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   731    732     """   733     Parse the file having the given 'filename'. The optional parameters   734     described below should be provided as keyword arguments.   735    736     If the optional 'html' parameter is set to a true value, the content to be   737     parsed will be treated as being HTML rather than XML. If the optional   738     'htmlencoding' is specified, HTML parsing will be performed with the   739     document encoding assumed to that specified.   740    741     If the optional 'unfinished' parameter is set to a true value, unfinished   742     documents will be parsed, even though such documents may be missing content   743     such as closing tags.   744    745     If the optional 'validate' parameter is set to a true value, an attempt will   746     be made to validate the parsed document.   747    748     If the optional 'remote' parameter is set to a true value, references to   749     remote documents (such as DTDs) will be followed in order to obtain such   750     documents.   751    752     A document object is returned by this function.   753     """   754    755     impl = impl or default_impl   756     return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding,   757         unfinished=unfinished, validate=validate, remote=remote))   758    759 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   760    761     """   762     Parse the content of the given string 's'. The optional parameters described   763     below should be provided as keyword arguments.   764    765     If the optional 'html' parameter is set to a true value, the content to be   766     parsed will be treated as being HTML rather than XML. If the optional   767     'htmlencoding' is specified, HTML parsing will be performed with the   768     document encoding assumed to that specified.   769    770     If the optional 'unfinished' parameter is set to a true value, unfinished   771     documents will be parsed, even though such documents may be missing content   772     such as closing tags.   773    774     If the optional 'validate' parameter is set to a true value, an attempt will   775     be made to validate the parsed document.   776    777     If the optional 'remote' parameter is set to a true value, references to   778     remote documents (such as DTDs) will be followed in order to obtain such   779     documents.   780    781     A document object is returned by this function.   782     """   783    784     impl = impl or default_impl   785     return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding,   786         unfinished=unfinished, validate=validate, remote=remote))   787    788 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   789    790     """   791     Parse the content found at the given 'uri'. The optional parameters   792     described below should be provided as keyword arguments.   793    794     If the optional 'html' parameter is set to a true value, the content to be   795     parsed will be treated as being HTML rather than XML. If the optional   796     'htmlencoding' is specified, HTML parsing will be performed with the   797     document encoding assumed to that specified.   798    799     If the optional 'unfinished' parameter is set to a true value, unfinished   800     documents will be parsed, even though such documents may be missing content   801     such as closing tags.   802    803     If the optional 'validate' parameter is set to a true value, an attempt will   804     be made to validate the parsed document.   805    806     If the optional 'remote' parameter is set to a true value, references to   807     remote documents (such as DTDs) will be followed in order to obtain such   808     documents.   809    810     XML documents are retrieved using libxml2's own network capabilities; HTML   811     documents are retrieved using the urllib module provided by Python. To   812     retrieve either kind of document using Python's own modules for this purpose   813     (such as urllib), open a stream and pass it to the parse function:   814    815     f = urllib.urlopen(uri)   816     try:   817         doc = libxml2dom.parse(f, html)   818     finally:   819         f.close()   820    821     A document object is returned by this function.   822     """   823    824     if html:   825         f = urllib.urlopen(uri)   826         try:   827             return parse(f, html=html, htmlencoding=htmlencoding, unfinished=unfinished,   828                 validate=validate, remote=remote, impl=impl)   829         finally:   830             f.close()   831     else:   832         impl = impl or default_impl   833         return impl.adoptDocument(Node_parseURI(uri, html=html, htmlencoding=htmlencoding,   834             unfinished=unfinished, validate=validate, remote=remote))   835    836 def toString(node, encoding=None, prettyprint=0):   837    838     """   839     Return a string containing the serialised form of the given 'node' and its   840     children. The optional 'encoding' can be used to override the default   841     character encoding used in the serialisation. The optional 'prettyprint'   842     indicates whether the serialised form is prettyprinted or not (the default   843     setting).   844     """   845    846     return Node_toString(node.as_native_node(), encoding, prettyprint)   847    848 def toStream(node, stream, encoding=None, prettyprint=0):   849    850     """   851     Write the serialised form of the given 'node' and its children to the given   852     'stream'. The optional 'encoding' can be used to override the default   853     character encoding used in the serialisation. The optional 'prettyprint'   854     indicates whether the serialised form is prettyprinted or not (the default   855     setting).   856     """   857    858     Node_toStream(node.as_native_node(), stream, encoding, prettyprint)   859    860 def toFile(node, filename, encoding=None, prettyprint=0):   861    862     """   863     Write the serialised form of the given 'node' and its children to a file   864     having the given 'filename'. The optional 'encoding' can be used to override   865     the default character encoding used in the serialisation. The optional   866     'prettyprint' indicates whether the serialised form is prettyprinted or not   867     (the default setting).   868     """   869    870     Node_toFile(node.as_native_node(), filename, encoding, prettyprint)   871    872 def adoptNodes(nodes, impl=None):   873    874     """   875     A special utility method which adopts the given low-level 'nodes' and which   876     returns a list of high-level equivalents. This is currently experimental and   877     should not be casually used.   878     """   879    880     impl = impl or default_impl   881    882     if len(nodes) == 0:   883         return []   884     doc = impl.adoptDocument(libxml2mod.doc(nodes[0]))   885     results = []   886     for node in nodes:   887         results.append(Node(node, impl, doc))   888     return results   889    890 def getDOMImplementation():   891    892     "Return the default DOM implementation."   893    894     return default_impl   895    896 # Single instance of the implementation.   897    898 default_impl = Implementation()   899    900 # vim: tabstop=4 expandtab shiftwidth=4
libxml2dom

libxml2dom/__init__.py

libxml2dom/init.py