# HG changeset patch # User paulb # Date 1173730818 0 # Node ID e631f2826b8ca191b973bc32be83d53c495ea2ab # Parent f7db5375fa9b3d69cc7bca3aa328b7d8e853f618 [project @ 2007-03-12 20:20:18 by paulb] Fixed getElementsByTagName(NS). Made parseURI work for HTML documents. diff -r f7db5375fa9b -r e631f2826b8c libxml2dom/__init__.py --- a/libxml2dom/__init__.py Mon Mar 12 20:20:01 2007 +0000 +++ b/libxml2dom/__init__.py Mon Mar 12 20:20:18 2007 +0000 @@ -29,6 +29,7 @@ parseFile as Node_parseFile, \ toString as Node_toString, toStream as Node_toStream, \ toFile as Node_toFile +import urllib # for parseURI in HTML mode class Implementation(object): @@ -345,10 +346,10 @@ Node_removeChild(self._node, tmp) def getElementsByTagName(self, tagName): - return self.xpath("//" + tagName) + return self.xpath(".//" + tagName) def getElementsByTagNameNS(self, namespaceURI, localName): - return self.xpath("//ns:" + localName, namespaces={"ns" : namespaceURI}) + return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI}) def normalize(self): text_nodes = [] @@ -540,16 +541,29 @@ is set to a true value, the content to be parsed will be treated as being HTML rather than XML. - The parseURI does not currently work with HTML. Use parse with a stream - object instead. For example: + XML documents are retrieved using libxml2's own network capabilities; HTML + documents are retrieved using the urllib module provided by Python. To + retrieve either kind of document using Python's own modules for this purpose + (such as urllib), open a stream and pass it to the parse function: - d = parse(urllib.urlopen("http://www.python.org"), html=1) + f = urllib.urlopen(uri) + try: + doc = libxml2dom.parse(f, html) + finally: + f.close() A document object is returned by this function. """ - impl = impl or default_impl - return Document(Node_parseURI(uri, html), impl) + if html: + f = urllib.urlopen(uri) + try: + return parse(f, html, impl) + finally: + f.close() + else: + impl = impl or default_impl + return Document(Node_parseURI(uri, html), impl) def toString(node, encoding=None, prettyprint=0):