1.1 --- a/README.txt Thu Jul 18 17:54:37 2013 +0200
1.2 +++ b/README.txt Thu Jul 18 18:03:47 2013 +0200
1.3 @@ -91,7 +91,8 @@
1.4 New in libxml2dom 0.5.1 (Changes since libxml2dom 0.5)
1.5 ------------------------------------------------------
1.6
1.7 - * Fixed the document encoding for HTML documents retrieved using parseURI.
1.8 + * Changed the parsing of HTML documents retrieved using parseURI to use the
1.9 + libxml2 network retrieval support.
1.10
1.11 New in libxml2dom 0.5 (Changes since libxml2dom 0.4.7)
1.12 ------------------------------------------------------
2.1 --- a/libxml2dom/__init__.py Thu Jul 18 17:54:37 2013 +0200
2.2 +++ b/libxml2dom/__init__.py Thu Jul 18 18:03:47 2013 +0200
2.3 @@ -28,7 +28,6 @@
2.4 parseFile as Node_parseFile, \
2.5 toString as Node_toString, toStream as Node_toStream, \
2.6 toFile as Node_toFile
2.7 -import urllib2 # for parseURI in HTML mode
2.8 import libxml2dom.errors
2.9
2.10 # Standard namespaces.
2.11 @@ -813,11 +812,9 @@
2.12 remote documents (such as DTDs) will be followed in order to obtain such
2.13 documents.
2.14
2.15 - XML documents are retrieved using libxml2's own network capabilities; HTML
2.16 - documents are retrieved using the urllib2 module provided by Python. To
2.17 - retrieve either kind of document using Python's own modules for this purpose
2.18 - (such as urllib or urllib2), open a stream and pass it to the parse
2.19 - function:
2.20 + Documents are retrieved using libxml2's own network capabilities. To
2.21 + retrieve documents using Python's own modules for this purpose (such as
2.22 + urllib or urllib2), open a stream and pass it to the parse function:
2.23
2.24 f = urllib.urlopen(uri)
2.25 try:
2.26 @@ -829,13 +826,8 @@
2.27 """
2.28
2.29 if html:
2.30 - f = urllib2.urlopen(uri)
2.31 - try:
2.32 - htmlencoding = f.headers.get("content-type", htmlencoding)
2.33 - return parse(f, html=html, htmlencoding=htmlencoding, unfinished=unfinished,
2.34 - validate=validate, remote=remote, impl=impl)
2.35 - finally:
2.36 - f.close()
2.37 + return parseFile(uri, html=html, htmlencoding=htmlencoding, unfinished=unfinished,
2.38 + validate=validate, remote=remote, impl=impl)
2.39 else:
2.40 impl = impl or default_impl
2.41 return impl.adoptDocument(Node_parseURI(uri, unfinished=unfinished,