libxml2dom (file libxml2dom/__init_

     1 #!/usr/bin/env python     2      3 """     4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module.     5      6 Copyright (C) 2003, 2004, 2005 Paul Boddie <paul@boddie.org.uk>     7      8 This library is free software; you can redistribute it and/or     9 modify it under the terms of the GNU Lesser General Public    10 License as published by the Free Software Foundation; either    11 version 2.1 of the License, or (at your option) any later version.    12     13 This library is distributed in the hope that it will be useful,    14 but WITHOUT ANY WARRANTY; without even the implied warranty of    15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    16 Lesser General Public License for more details.    17     18 You should have received a copy of the GNU Lesser General Public    19 License along with this library; if not, write to the Free Software    20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA    21 """    22     23 __version__ = "0.3.3"    24     25 from libxml2dom.macrolib import *    26 from libxml2dom.macrolib import \    27     createDocument as Node_createDocument, \    28     parseString as Node_parseString, parseURI as Node_parseURI, \    29     parseFile as Node_parseFile, \    30     toString as Node_toString, toStream as Node_toStream, \    31     toFile as Node_toFile    32     33 # Attribute and node list wrappers.    34     35 class NamedNodeMap(object):    36     37     """    38     A wrapper around Node objects providing DOM and dictionary convenience    39     methods.    40     """    41     42     def __init__(self, node):    43         self.node = node    44     45     def getNamedItem(self, name):    46         return self.node.getAttributeNode(name)    47     48     def getNamedItemNS(self, ns, localName):    49         return self.node.getAttributeNodeNS(ns, localName)    50     51     def setNamedItem(self, node):    52         try:    53             old = self.getNamedItem(node.nodeName)    54         except KeyError:    55             old = None    56         self.node.setAttributeNode(node)    57         return old    58     59     def setNamedItemNS(self, node):    60         try:    61             old = self.getNamedItemNS(node.namespaceURI, node.localName)    62         except KeyError:    63             old = None    64         self.node.setAttributeNodeNS(node)    65         return old    66     67     def removeNamedItem(self, name):    68         try:    69             old = self.getNamedItem(name)    70         except KeyError:    71             old = None    72         self.node.removeAttribute(name)    73         return old    74     75     def removeNamedItemNS(self, ns, localName):    76         try:    77             old = self.getNamedItemNS(ns, localName)    78         except KeyError:    79             old = None    80         self.node.removeAttributeNS(ns, localName)    81         return old    82     83     # Dictionary emulation methods.    84     85     def __getitem__(self, name):    86         return self.getNamedItem(name)    87     88     def __setitem__(self, name, node):    89         if name == node.nodeName:    90             self.setNamedItem(node)    91         else:    92             raise KeyError, name    93     94     def __delitem__(self, name):    95         # NOTE: To be implemented.    96         pass    97     98     def values(self):    99         return [Attribute(_node, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()]   100    101     def keys(self):   102         return [(attr.namespaceURI, attr.localName) for attr in self.values()]   103    104     def items(self):   105         return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()]   106    107     def __repr__(self):   108         return str(self)   109    110     def __str__(self):   111         return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()])   112    113 class NodeList(list):   114    115     "A wrapper around node lists."   116    117     def item(self, index):   118         return self[index]   119    120     def length(self):   121         return len(self)   122    123 # Node classes.   124    125 class Node(object):   126    127     """   128     A DOM-style wrapper around libxml2mod objects.   129     """   130    131     ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE   132     COMMENT_NODE = xml.dom.Node.COMMENT_NODE   133     DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE   134     DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE   135     ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE   136     ENTITY_NODE = xml.dom.Node.ENTITY_NODE   137     ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE   138     NOTATION_NODE = xml.dom.Node.NOTATION_NODE   139     PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE   140     TEXT_NODE = xml.dom.Node.TEXT_NODE   141    142     def __init__(self, node, ownerDocument=None):   143         self._node = node   144         self.ownerDocument = ownerDocument   145    146     def as_native_node(self):   147         return self._node   148    149     def _nodeType(self):   150         return Node_nodeType(self._node)   151    152     def _childNodes(self):   153    154         # NOTE: Consider a generator instead.   155    156         return NodeList([Node(_node, self.ownerDocument) for _node in Node_childNodes(self._node)])   157    158     def _attributes(self):   159         return NamedNodeMap(self)   160    161     def _namespaceURI(self):   162         return Node_namespaceURI(self._node)   163    164     def _nodeValue(self):   165         return Node_nodeValue(self._node)   166    167     def _setNodeValue(self, value):   168         Node_setNodeValue(self._node, value)   169    170     def _prefix(self):   171         return Node_prefix(self._node)   172    173     def _nodeName(self):   174         return Node_nodeName(self._node)   175    176     def _tagName(self):   177         return Node_tagName(self._node)   178    179     def _localName(self):   180         return Node_localName(self._node)   181    182     def _parentNode(self):   183         return get_node(Node_parentNode(self._node), self)   184    185     def _previousSibling(self):   186         return Node(Node_previousSibling(self._node), self.ownerDocument)   187    188     def _nextSibling(self):   189         return Node(Node_nextSibling(self._node), self.ownerDocument)   190    191     def _doctype(self):   192         return Node(Node_doctype(self._node), self.ownerDocument)   193    194     def _publicId(self):   195         # NOTE: To be fixed when the libxml2mod API has been figured out.   196         if self.nodeType != self.DOCUMENT_TYPE_NODE:   197             return None   198         declaration = self.toString()   199         return self._findId(declaration, "PUBLIC")   200    201     def _systemId(self):   202         # NOTE: To be fixed when the libxml2mod API has been figured out.   203         if self.nodeType != self.DOCUMENT_TYPE_NODE:   204             return None   205         declaration = self.toString()   206         if self._findId(declaration, "PUBLIC"):   207             return self._findIdValue(declaration, 0)   208         return self._findId(declaration, "SYSTEM")   209    210     # NOTE: To be removed when the libxml2mod API has been figured out.   211    212     def _findId(self, declaration, identifier):   213         i = declaration.find(identifier)   214         if i == -1:   215             return None   216         return self._findIdValue(declaration, i)   217    218     def _findIdValue(self, declaration, i):   219         q = declaration.find('"', i)   220         if q == -1:   221             return None   222         q2 = declaration.find('"', q + 1)   223         if q2 == -1:   224             return None   225         return declaration[q+1:q2]   226    227     def hasAttributeNS(self, ns, localName):   228         return Node_hasAttributeNS(self._node, ns, localName)   229    230     def hasAttribute(self, name):   231         return Node_hasAttribute(self._node, name)   232    233     def getAttributeNS(self, ns, localName):   234         return Node_getAttributeNS(self._node, ns, localName)   235    236     def getAttribute(self, name):   237         return Node_getAttribute(self._node, name)   238    239     def getAttributeNodeNS(self, ns, localName):   240         return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.ownerDocument, self)   241    242     def getAttributeNode(self, localName):   243         return Attribute(Node_getAttributeNode(self._node, localName), self.ownerDocument, self)   244    245     def setAttributeNS(self, ns, name, value):   246         Node_setAttributeNS(self._node, ns, name, value)   247    248     def setAttribute(self, name, value):   249         Node_setAttribute(self._node, name, value)   250    251     def setAttributeNodeNS(self, node):   252         Node_setAttributeNodeNS(self._node, node._node)   253    254     def setAttributeNode(self, node):   255         Node_setAttributeNode(self._node, node._node)   256    257     def removeAttributeNS(self, ns, localName):   258         Node_removeAttributeNS(self._node, ns, localName)   259    260     def removeAttribute(self, name):   261         Node_removeAttribute(self._node, name)   262    263     def createElementNS(self, ns, name):   264         return Node(Node_createElementNS(self._node, ns, name), self.ownerDocument)   265    266     def createElement(self, name):   267         return Node(Node_createElement(self._node, name), self.ownerDocument)   268    269     def createAttributeNS(self, ns, name):   270         tmp = self.createElement("tmp")   271         return Attribute(Node_createAttributeNS(tmp._node, ns, name))   272    273     def createAttribute(self, name):   274         tmp = self.createElement("tmp")   275         return Attribute(Node_createAttribute(tmp._node, name))   276    277     def createTextNode(self, value):   278         return Node(Node_createTextNode(self._node, value), self.ownerDocument)   279    280     def createComment(self, value):   281         return Node(Node_createComment(self._node, value), self.ownerDocument)   282    283     def importNode(self, node, deep):   284         if hasattr(node, "as_native_node"):   285             return Node(Node_importNode(self._node, node.as_native_node(), deep), self.ownerDocument)   286         else:   287             return Node(Node_importNode_DOM(self._node, node, deep), self.ownerDocument)   288    289     def insertBefore(self, tmp, oldNode):   290         if hasattr(tmp, "as_native_node"):   291             return Node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument)   292         else:   293             return Node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self.ownerDocument)   294    295     def replaceChild(self, tmp, oldNode):   296         if hasattr(tmp, "as_native_node"):   297             return Node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument)   298         else:   299             return Node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self.ownerDocument)   300    301     def appendChild(self, tmp):   302         if hasattr(tmp, "as_native_node"):   303             return Node(Node_appendChild(self._node, tmp.as_native_node()), self.ownerDocument)   304         else:   305             return Node(Node_appendChild(self._node, tmp), self.ownerDocument)   306    307     def removeChild(self, tmp):   308         if hasattr(tmp, "as_native_node"):   309             Node_removeChild(self._node, tmp.as_native_node())   310         else:   311             Node_removeChild(self._node, tmp)   312    313     def getElementsByTagName(self, tagName):   314         return self.xpath("//" + tagName)   315    316     def getElementsByTagNameNS(self, namespaceURI, localName):   317         return self.xpath("//ns:" + localName, namespaces={"ns" : namespaceURI})   318    319     def normalize(self):   320         text_nodes = []   321         for node in self.childNodes:   322             if node.nodeType == node.TEXT_NODE:   323                 text_nodes.append(node)   324             elif len(text_nodes) != 0:   325                 self._normalize(text_nodes)   326                 text_nodes = []   327         if len(text_nodes) != 0:   328             self._normalize(text_nodes)   329    330     def _normalize(self, text_nodes):   331         texts = []   332         for text_node in text_nodes[:-1]:   333             texts.append(text_node.nodeValue)   334             self.removeChild(text_node)   335         texts.append(text_nodes[-1].nodeValue)   336         self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1])   337    338     childNodes = property(_childNodes)   339     value = data = nodeValue = property(_nodeValue, _setNodeValue)   340     name = nodeName = property(_nodeName)   341     tagName = property(_tagName)   342     namespaceURI = property(_namespaceURI)   343     prefix = property(_prefix)   344     localName = property(_localName)   345     parentNode = property(_parentNode)   346     nodeType = property(_nodeType)   347     attributes = property(_attributes)   348     previousSibling = property(_previousSibling)   349     nextSibling = property(_nextSibling)   350     doctype = property(_doctype)   351     publicId = property(_publicId)   352     systemId = property(_systemId)   353    354     # NOTE: To be fixed - these being doctype-specific values.   355    356     entities = {}   357     notations = {}   358    359     #def isSameNode(self, other):   360     #    return self._node.nodePath() == other._node.nodePath()   361    362     #def __eq__(self, other):   363     #    return self._node.nodePath() == other._node.nodePath()   364    365     # 4DOM extensions to the usual PyXML API.   366     # NOTE: To be finished.   367    368     def xpath(self, expr, variables=None, namespaces=None):   369         result = Node_xpath(self._node, expr, variables, namespaces)   370         if hasattr(result, "__len__"):   371             return NodeList([get_node(_node, self) for _node in result])   372         else:   373             return result   374    375     # Convenience methods.   376    377     def toString(self, encoding=None, prettyprint=0):   378         return toString(self, encoding, prettyprint)   379    380     def toStream(self, stream, encoding=None, prettyprint=0):   381         toStream(self, stream, encoding, prettyprint)   382    383     def toFile(self, f, encoding=None, prettyprint=0):   384         toFile(self, f, encoding, prettyprint)   385    386 # Attribute nodes.   387    388 class Attribute(Node):   389    390     "A class providing attribute access."   391    392     def __init__(self, node, ownerDocument=None, ownerElement=None):   393         Node.__init__(self, node, ownerDocument)   394         self.ownerElement = ownerElement   395    396     def _parentNode(self):   397         return self.ownerElement   398    399     parentNode = property(_parentNode)   400    401 # Document housekeeping mechanisms.   402    403 class Document(Node):   404    405     "A class providing document-level housekeeping."   406    407     def __init__(self, node):   408         self._node = node   409    410     def _ownerDocument(self):   411         return self   412    413     def _parentNode(self):   414         return None   415    416     def __del__(self):   417         #print "Freeing document", self._node   418         libxml2mod.xmlFreeDoc(self._node)   419    420     ownerDocument = property(_ownerDocument)   421     parentNode = property(_parentNode)   422    423 class DocumentType(object):   424    425     "A class providing a container for document type information."   426    427     def __init__(self, localName, publicId, systemId):   428         self.name = self.localName = localName   429         self.publicId = publicId   430         self.systemId = systemId   431    432         # NOTE: Nothing is currently provided to support the following   433         # NOTE: attributes.   434    435         self.entities = {}   436         self.notations = {}   437    438 # Factory functions.   439    440 def get_node(_node, context_node):   441     if Node_nodeType(_node) == context_node.DOCUMENT_NODE:   442         return context_node.ownerDocument   443     elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE:   444         return Attribute(_node, context_node.ownerDocument, context_node)   445     else:   446         return Node(_node, context_node.ownerDocument)   447    448 # Utility functions.   449    450 def createDocumentType(localName, publicId, systemId):   451     return DocumentType(localName, publicId, systemId)   452    453 def createDocument(namespaceURI, localName, doctype):   454     return Document(Node_createDocument(namespaceURI, localName, doctype))   455    456 def parse(stream_or_string, html=0):   457    458     """   459     Parse the given 'stream_or_string', where the supplied object can either be   460     a stream (such as a file or stream object), or a string (containing the   461     filename of a document). If the optional 'html' parameter is set to a true   462     value, the content to be parsed will be treated as being HTML rather than   463     XML.   464    465     A document object is returned by this function.   466     """   467    468     if hasattr(stream_or_string, "read"):   469         stream = stream_or_string   470         return parseString(stream.read(), html)   471     else:   472         return parseFile(stream_or_string, html)   473    474 def parseFile(filename, html=0):   475    476     """   477     Parse the file having the given 'filename'. If the optional 'html' parameter   478     is set to a true value, the content to be parsed will be treated as being   479     HTML rather than XML.   480    481     A document object is returned by this function.   482     """   483    484     return Document(Node_parseFile(filename, html))   485    486 def parseString(s, html=0):   487    488     """   489     Parse the content of the given string 's'. If the optional 'html' parameter   490     is set to a true value, the content to be parsed will be treated as being   491     HTML rather than XML.   492    493     A document object is returned by this function.   494     """   495    496     return Document(Node_parseString(s, html))   497    498 def parseURI(uri, html=0):   499    500     """   501     Parse the content found at the given 'uri'. If the optional 'html' parameter   502     is set to a true value, the content to be parsed will be treated as being   503     HTML rather than XML.   504    505     The parseURI does not currently work with HTML. Use parse with a stream   506     object instead. For example:   507    508     d = parse(urllib.urlopen("http://www.python.org"), html=1)   509    510     A document object is returned by this function.   511     """   512    513     return Document(Node_parseURI(uri, html))   514    515 def toString(node, encoding=None, prettyprint=0):   516    517     """   518     Return a string containing the serialised form of the given 'node' and its   519     children. The optional 'encoding' can be used to override the default   520     character encoding used in the serialisation. The optional 'prettyprint'   521     indicates whether the serialised form is prettyprinted or not (the default   522     setting).   523     """   524    525     return Node_toString(node.as_native_node(), encoding, prettyprint)   526    527 def toStream(node, stream, encoding=None, prettyprint=0):   528    529     """   530     Write the serialised form of the given 'node' and its children to the given   531     'stream'. The optional 'encoding' can be used to override the default   532     character encoding used in the serialisation. The optional 'prettyprint'   533     indicates whether the serialised form is prettyprinted or not (the default   534     setting).   535     """   536    537     Node_toStream(node.as_native_node(), stream, encoding, prettyprint)   538    539 def toFile(node, filename, encoding=None, prettyprint=0):   540    541     """   542     Write the serialised form of the given 'node' and its children to a file   543     having the given 'filename'. The optional 'encoding' can be used to override   544     the default character encoding used in the serialisation. The optional   545     'prettyprint' indicates whether the serialised form is prettyprinted or not   546     (the default setting).   547     """   548    549     Node_toFile(node.as_native_node(), filename, encoding, prettyprint)   550    551 def adoptNodes(nodes):   552    553     """   554     A special utility method which adopts the given low-level 'nodes' and which   555     returns a list of high-level equivalents. This is currently experimental and   556     should not be casually used.   557     """   558    559     if len(nodes) == 0:   560         return []   561     doc = Document(libxml2mod.doc(nodes[0]))   562     results = []   563     for node in nodes:   564         results.append(Node(node, doc))   565     return results   566    567 # vim: tabstop=4 expandtab shiftwidth=4
libxml2dom

libxml2dom/__init__.py

libxml2dom/init.py