paulb@46 | 1 | #!/usr/bin/env python |
paulb@46 | 2 | |
paulb@46 | 3 | """ |
paulb@55 | 4 | DOM macros for virtual libxml2mod node methods and properties. |
paulb@188 | 5 | |
paulb@227 | 6 | Copyright (C) 2003, 2004, 2005, 2006, 2007 Paul Boddie <paul@boddie.org.uk> |
paulb@188 | 7 | |
paulb@293 | 8 | This program is free software; you can redistribute it and/or modify it under |
paulb@293 | 9 | the terms of the GNU Lesser General Public License as published by the Free |
paulb@293 | 10 | Software Foundation; either version 3 of the License, or (at your option) any |
paulb@293 | 11 | later version. |
paulb@188 | 12 | |
paulb@293 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paulb@293 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paulb@293 | 15 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
paulb@293 | 16 | details. |
paulb@188 | 17 | |
paulb@293 | 18 | You should have received a copy of the GNU Lesser General Public License along |
paulb@293 | 19 | with this program. If not, see <http://www.gnu.org/licenses/>. |
paulb@46 | 20 | """ |
paulb@46 | 21 | |
paulb@46 | 22 | import xml.dom |
paulb@214 | 23 | |
paulb@214 | 24 | # Try the conventional import first. |
paulb@214 | 25 | |
paulb@214 | 26 | try: |
paulb@214 | 27 | import libxml2mod |
paulb@214 | 28 | except ImportError: |
paulb@214 | 29 | from libxmlmods import libxml2mod |
paulb@46 | 30 | |
paulb@46 | 31 | # NOTE: libxml2 seems to use UTF-8 throughout. |
paulb@287 | 32 | # NOTE: Implement: http://www.w3.org/TR/2006/REC-xml-20060816/#AVNormalize |
paulb@46 | 33 | |
paulb@46 | 34 | def from_unicode(s): |
paulb@119 | 35 | if isinstance(s, unicode): |
paulb@46 | 36 | return s.encode("utf-8") |
paulb@46 | 37 | else: |
paulb@161 | 38 | # The string might contain non-ASCII characters, thus upsetting libxml2 |
paulb@161 | 39 | # as it encounters a non-UTF-8 string. |
paulb@161 | 40 | try: |
paulb@161 | 41 | unicode(s) |
paulb@161 | 42 | except UnicodeError: |
paulb@161 | 43 | raise TypeError, "Please use Unicode for non-ASCII data." |
paulb@46 | 44 | return s |
paulb@46 | 45 | |
paulb@46 | 46 | def to_unicode(s): |
paulb@119 | 47 | if isinstance(s, str): |
paulb@46 | 48 | return unicode(s, encoding="utf-8") |
paulb@46 | 49 | else: |
paulb@46 | 50 | return s |
paulb@46 | 51 | |
paulb@170 | 52 | def get_ns(ns): |
paulb@170 | 53 | out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) |
paulb@170 | 54 | # Detect "" and produce None as the empty namespace. |
paulb@170 | 55 | if out_ns: |
paulb@170 | 56 | return out_ns |
paulb@170 | 57 | else: |
paulb@170 | 58 | return None |
paulb@170 | 59 | |
paulb@46 | 60 | def _get_prefix_and_localName(name): |
paulb@46 | 61 | t = name.split(":") |
paulb@46 | 62 | if len(t) == 1: |
paulb@46 | 63 | return None, name |
paulb@46 | 64 | elif len(t) == 2: |
paulb@46 | 65 | return t |
paulb@46 | 66 | else: |
paulb@46 | 67 | # NOTE: Should raise an exception. |
paulb@46 | 68 | return None, None |
paulb@46 | 69 | |
paulb@196 | 70 | def _find_namespace_for_prefix(node, prefix): |
paulb@196 | 71 | |
paulb@196 | 72 | "Find the namespace definition node in the given 'node' for 'prefix'." |
paulb@196 | 73 | |
paulb@196 | 74 | current = libxml2mod.xmlNodeGetNsDefs(node) |
paulb@196 | 75 | while current is not None: |
paulb@196 | 76 | if libxml2mod.name(current) == prefix: |
paulb@196 | 77 | return current |
paulb@196 | 78 | current = libxml2mod.next(current) |
paulb@196 | 79 | return None |
paulb@196 | 80 | |
paulb@179 | 81 | def _find_namespace(node, ns, prefix): |
paulb@196 | 82 | |
paulb@196 | 83 | """ |
paulb@196 | 84 | Find the namespace definition node in the given 'node' for the given 'ns' |
paulb@196 | 85 | and 'prefix'. |
paulb@196 | 86 | """ |
paulb@196 | 87 | |
paulb@300 | 88 | # Special treatment for XML namespace. |
paulb@300 | 89 | |
paulb@300 | 90 | if prefix == "xml" and ns == xml.dom.XML_NAMESPACE: |
paulb@300 | 91 | return libxml2mod.xmlSearchNsByHref(Node_ownerDocument(node), node, xml.dom.XML_NAMESPACE) |
paulb@300 | 92 | |
paulb@179 | 93 | new_ns = None |
paulb@179 | 94 | current = libxml2mod.xmlNodeGetNsDefs(node) |
paulb@179 | 95 | while current is not None: |
paulb@179 | 96 | if _check_namespace(current, ns, prefix): |
paulb@179 | 97 | new_ns = current |
paulb@179 | 98 | break |
paulb@179 | 99 | current = libxml2mod.next(current) |
paulb@179 | 100 | if new_ns is None: |
paulb@179 | 101 | node_ns = libxml2mod.xmlNodeGetNs(node) |
paulb@179 | 102 | if node_ns is not None and _check_namespace(node_ns, ns, prefix): |
paulb@179 | 103 | new_ns = node_ns |
paulb@179 | 104 | return new_ns |
paulb@179 | 105 | |
paulb@179 | 106 | def _check_namespace(current, ns, prefix): |
paulb@196 | 107 | |
paulb@196 | 108 | "Check the 'current' namespace definition node against 'ns' and 'prefix'." |
paulb@196 | 109 | |
paulb@196 | 110 | current_ns = get_ns(current) |
paulb@179 | 111 | current_prefix = libxml2mod.name(current) |
paulb@192 | 112 | if ns == current_ns and (prefix is None or prefix == current_prefix): |
paulb@179 | 113 | return 1 |
paulb@179 | 114 | else: |
paulb@179 | 115 | return 0 |
paulb@179 | 116 | |
paulb@179 | 117 | def _make_namespace(node, ns, prefix, set_default=0): |
paulb@196 | 118 | |
paulb@196 | 119 | """ |
paulb@196 | 120 | Make a new namespace definition node within the given 'node' for 'ns', |
paulb@196 | 121 | 'prefix', setting the default namespace on 'node' when 'prefix' is None and |
paulb@196 | 122 | 'set_default' is set to a true value (unlike the default value for that |
paulb@196 | 123 | parameter). |
paulb@196 | 124 | """ |
paulb@196 | 125 | |
paulb@179 | 126 | if prefix is not None or set_default: |
paulb@179 | 127 | new_ns = libxml2mod.xmlNewNs(node, ns, prefix) |
paulb@179 | 128 | else: |
paulb@179 | 129 | new_ns = None |
paulb@179 | 130 | return new_ns |
paulb@179 | 131 | |
paulb@192 | 132 | def _get_invented_prefix(node, ns): |
paulb@192 | 133 | current = libxml2mod.xmlNodeGetNsDefs(node) |
paulb@192 | 134 | prefixes = [] |
paulb@192 | 135 | while current is not None: |
paulb@192 | 136 | current_prefix = libxml2mod.name(current) |
paulb@192 | 137 | prefixes.append(current_prefix) |
paulb@192 | 138 | current = libxml2mod.next(current) |
paulb@192 | 139 | i = 0 |
paulb@192 | 140 | while 1: |
paulb@192 | 141 | prefix = "NS%d" % i |
paulb@192 | 142 | if prefix not in prefixes: |
paulb@192 | 143 | return prefix |
paulb@192 | 144 | i += 1 |
paulb@192 | 145 | |
paulb@46 | 146 | _nodeTypes = { |
paulb@46 | 147 | "attribute" : xml.dom.Node.ATTRIBUTE_NODE, |
paulb@243 | 148 | "cdata" : xml.dom.Node.CDATA_SECTION_NODE, |
paulb@46 | 149 | "comment" : xml.dom.Node.COMMENT_NODE, |
paulb@46 | 150 | "document_xml" : xml.dom.Node.DOCUMENT_NODE, |
paulb@203 | 151 | "document_html" : xml.dom.Node.DOCUMENT_NODE, |
paulb@46 | 152 | "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, |
paulb@46 | 153 | "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. |
paulb@46 | 154 | "element" : xml.dom.Node.ELEMENT_NODE, |
paulb@46 | 155 | "entity" : xml.dom.Node.ENTITY_NODE, |
paulb@46 | 156 | "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, |
paulb@46 | 157 | "notation" : xml.dom.Node.NOTATION_NODE, |
paulb@46 | 158 | "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, |
paulb@46 | 159 | "text" : xml.dom.Node.TEXT_NODE |
paulb@46 | 160 | } |
paulb@46 | 161 | |
paulb@134 | 162 | _reverseNodeTypes = {} |
paulb@134 | 163 | for label, value in _nodeTypes.items(): |
paulb@134 | 164 | _reverseNodeTypes[value] = label |
paulb@134 | 165 | |
paulb@277 | 166 | def Node_equals(node, other): |
paulb@277 | 167 | return libxml2mod.xmlXPathCmpNodes(node, other) == 0 |
paulb@277 | 168 | |
paulb@46 | 169 | def Node_ownerDocument(node): |
paulb@229 | 170 | return libxml2mod.doc(node) |
paulb@46 | 171 | |
paulb@46 | 172 | def Node_nodeType(node): |
paulb@55 | 173 | return _nodeTypes[libxml2mod.type(node)] |
paulb@46 | 174 | |
paulb@46 | 175 | def Node_childNodes(node): |
paulb@46 | 176 | |
paulb@46 | 177 | # NOTE: Consider a generator instead. |
paulb@46 | 178 | |
paulb@46 | 179 | child_nodes = [] |
paulb@55 | 180 | node = libxml2mod.children(node) |
paulb@46 | 181 | while node is not None: |
paulb@155 | 182 | # Remove doctypes. |
paulb@155 | 183 | if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: |
paulb@155 | 184 | child_nodes.append(node) |
paulb@55 | 185 | node = libxml2mod.next(node) |
paulb@46 | 186 | return child_nodes |
paulb@46 | 187 | |
paulb@46 | 188 | def Node_attributes(node): |
paulb@46 | 189 | attributes = {} |
paulb@196 | 190 | |
paulb@196 | 191 | # Include normal attributes. |
paulb@196 | 192 | |
paulb@196 | 193 | current = libxml2mod.properties(node) |
paulb@196 | 194 | while current is not None: |
paulb@196 | 195 | ns = libxml2mod.xmlNodeGetNs(current) |
paulb@46 | 196 | if ns is not None: |
paulb@196 | 197 | attributes[(get_ns(ns), libxml2mod.name(current))] = current |
paulb@46 | 198 | else: |
paulb@196 | 199 | attributes[(None, libxml2mod.name(current))] = current |
paulb@196 | 200 | current = libxml2mod.next(current) |
paulb@196 | 201 | |
paulb@196 | 202 | # Include xmlns attributes. |
paulb@196 | 203 | |
paulb@196 | 204 | #current = libxml2mod.xmlNodeGetNsDefs(node) |
paulb@196 | 205 | #while current is not None: |
paulb@196 | 206 | # ns = get_ns(current) |
paulb@196 | 207 | # prefix = libxml2mod.name(current) |
paulb@196 | 208 | # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. |
paulb@196 | 209 | # current = libxml2mod.next(current) |
paulb@196 | 210 | |
paulb@46 | 211 | return attributes |
paulb@46 | 212 | |
paulb@46 | 213 | def Node_namespaceURI(node): |
paulb@55 | 214 | ns = libxml2mod.xmlNodeGetNs(node) |
paulb@46 | 215 | if ns is not None: |
paulb@170 | 216 | return get_ns(ns) |
paulb@46 | 217 | else: |
paulb@46 | 218 | return None |
paulb@46 | 219 | |
paulb@46 | 220 | def Node_nodeValue(node): |
paulb@55 | 221 | return to_unicode(libxml2mod.xmlNodeGetContent(node)) |
paulb@46 | 222 | |
paulb@87 | 223 | # NOTE: This is not properly exposed in the libxml2macro interface as the |
paulb@87 | 224 | # NOTE: writable form of nodeValue. |
paulb@87 | 225 | |
paulb@87 | 226 | def Node_setNodeValue(node, value): |
paulb@87 | 227 | # NOTE: Cannot set attribute node values. |
paulb@87 | 228 | libxml2mod.xmlNodeSetContent(node, from_unicode(value)) |
paulb@87 | 229 | |
paulb@237 | 230 | # NOTE: Verify this. The data attribute should only really exist for text, |
paulb@237 | 231 | # NOTE: character data, processing instructions and comments. |
paulb@73 | 232 | |
paulb@73 | 233 | Node_data = Node_nodeValue |
paulb@73 | 234 | |
paulb@237 | 235 | Node_textContent = Node_nodeValue |
paulb@237 | 236 | |
paulb@46 | 237 | def Node_prefix(node): |
paulb@55 | 238 | ns = libxml2mod.xmlNodeGetNs(node) |
paulb@46 | 239 | if ns is not None: |
paulb@55 | 240 | return to_unicode(libxml2mod.name(ns)) |
paulb@46 | 241 | else: |
paulb@46 | 242 | return None |
paulb@46 | 243 | |
paulb@46 | 244 | def Node_nodeName(node): |
paulb@46 | 245 | prefix = Node_prefix(node) |
paulb@46 | 246 | if prefix is not None: |
paulb@46 | 247 | return prefix + ":" + Node_localName(node) |
paulb@46 | 248 | else: |
paulb@46 | 249 | return Node_localName(node) |
paulb@46 | 250 | |
paulb@46 | 251 | def Node_tagName(node): |
paulb@55 | 252 | if libxml2mod.type(node) == "element": |
paulb@46 | 253 | return Node_nodeName(node) |
paulb@46 | 254 | else: |
paulb@46 | 255 | return None |
paulb@46 | 256 | |
paulb@46 | 257 | def Node_localName(node): |
paulb@55 | 258 | return to_unicode(libxml2mod.name(node)) |
paulb@46 | 259 | |
paulb@46 | 260 | def Node_parentNode(node): |
paulb@55 | 261 | if libxml2mod.type(node) == "document_xml": |
paulb@46 | 262 | return None |
paulb@46 | 263 | else: |
paulb@55 | 264 | return libxml2mod.parent(node) |
paulb@46 | 265 | |
paulb@46 | 266 | def Node_previousSibling(node): |
paulb@55 | 267 | if libxml2mod.prev(node) is not None: |
paulb@55 | 268 | return libxml2mod.prev(node) |
paulb@46 | 269 | else: |
paulb@46 | 270 | return None |
paulb@46 | 271 | |
paulb@46 | 272 | def Node_nextSibling(node): |
paulb@55 | 273 | if libxml2mod.next(node) is not None: |
paulb@55 | 274 | return libxml2mod.next(node) |
paulb@46 | 275 | else: |
paulb@46 | 276 | return None |
paulb@46 | 277 | |
paulb@155 | 278 | def Node_doctype(node): |
paulb@155 | 279 | return libxml2mod.xmlGetIntSubset(node) |
paulb@155 | 280 | |
paulb@46 | 281 | def Node_hasAttributeNS(node, ns, localName): |
paulb@196 | 282 | return Node_getAttributeNS(node, ns, localName) is not None or \ |
paulb@196 | 283 | _find_namespace(node, ns, localName) is not None |
paulb@46 | 284 | |
paulb@46 | 285 | def Node_hasAttribute(node, name): |
paulb@92 | 286 | return Node_getAttribute(node, name) is not None |
paulb@46 | 287 | |
paulb@46 | 288 | def Node_getAttributeNS(node, ns, localName): |
paulb@196 | 289 | if ns == xml.dom.XMLNS_NAMESPACE: |
paulb@196 | 290 | ns_def = _find_namespace_for_prefix(node, localName) |
paulb@196 | 291 | if ns_def is not None: |
paulb@196 | 292 | return get_ns(ns_def) |
paulb@196 | 293 | else: |
paulb@196 | 294 | return None |
paulb@196 | 295 | else: |
paulb@196 | 296 | return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) |
paulb@46 | 297 | |
paulb@46 | 298 | def Node_getAttribute(node, name): |
paulb@55 | 299 | return to_unicode(libxml2mod.xmlGetProp(node, name)) |
paulb@46 | 300 | |
paulb@46 | 301 | def Node_getAttributeNodeNS(node, ns, localName): |
paulb@55 | 302 | # NOTE: Needs verifying. |
paulb@87 | 303 | return Node_attributes(node)[(ns, localName)] |
paulb@46 | 304 | |
paulb@46 | 305 | def Node_getAttributeNode(node, name): |
paulb@46 | 306 | # NOTE: Needs verifying. |
paulb@87 | 307 | return Node_attributes(node)[(None, name)] |
paulb@46 | 308 | |
paulb@46 | 309 | def Node_setAttributeNS(node, ns, name, value): |
paulb@46 | 310 | ns, name, value = map(from_unicode, [ns, name, value]) |
paulb@46 | 311 | prefix, localName = _get_prefix_and_localName(name) |
paulb@184 | 312 | |
paulb@184 | 313 | # Detect setting of xmlns:localName=value, looking for cases where |
paulb@184 | 314 | # x:attr=value have caused the definition of xmlns:x=y (as a declaration |
paulb@184 | 315 | # with prefix=x, ns=y). |
paulb@184 | 316 | if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: |
paulb@184 | 317 | if _find_namespace(node, value, localName): |
paulb@179 | 318 | return |
paulb@184 | 319 | new_ns = _make_namespace(node, value, localName, set_default=0) |
paulb@184 | 320 | # For non-xmlns attributes, we find or make a namespace declaration and then |
paulb@184 | 321 | # set an attribute. |
paulb@184 | 322 | elif ns is not None: |
paulb@192 | 323 | # Look for a suitable namespace. |
paulb@179 | 324 | new_ns = _find_namespace(node, ns, prefix) |
paulb@192 | 325 | # Create a declaration if no suitable one was found. |
paulb@179 | 326 | if new_ns is None: |
paulb@192 | 327 | # Invent a prefix for unprefixed attributes with namespaces. |
paulb@192 | 328 | if prefix is None: |
paulb@192 | 329 | prefix = _get_invented_prefix(node, ns) |
paulb@179 | 330 | new_ns = _make_namespace(node, ns, prefix, set_default=0) |
paulb@300 | 331 | # Remove any conflicting attribute. |
paulb@300 | 332 | if Node_hasAttributeNS(node, ns, localName): |
paulb@300 | 333 | Node_removeAttributeNS(node, ns, localName) |
paulb@179 | 334 | libxml2mod.xmlSetNsProp(node, new_ns, localName, value) |
paulb@46 | 335 | else: |
paulb@46 | 336 | # NOTE: Needs verifying: what should happen to the namespace? |
paulb@46 | 337 | # NOTE: This also catches the case where None is the element's |
paulb@46 | 338 | # NOTE: namespace and is also used for the attribute. |
paulb@179 | 339 | libxml2mod.xmlSetNsProp(node, None, localName, value) |
paulb@46 | 340 | |
paulb@46 | 341 | def Node_setAttribute(node, name, value): |
paulb@46 | 342 | name, value = map(from_unicode, [name, value]) |
paulb@46 | 343 | |
paulb@55 | 344 | libxml2mod.xmlSetProp(node, name, value) |
paulb@55 | 345 | |
paulb@87 | 346 | def Node_setAttributeNodeNS(node, attr): |
paulb@87 | 347 | # NOTE: Not actually putting the node on the element. |
paulb@87 | 348 | Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) |
paulb@55 | 349 | |
paulb@87 | 350 | def Node_setAttributeNode(node, attr): |
paulb@46 | 351 | # NOTE: Not actually putting the node on the element. |
paulb@87 | 352 | Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) |
paulb@46 | 353 | |
paulb@92 | 354 | def Node_removeAttributeNS(node, ns, localName): |
paulb@92 | 355 | attr = Node_getAttributeNodeNS(node, ns, localName) |
paulb@92 | 356 | libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) |
paulb@92 | 357 | |
paulb@92 | 358 | def Node_removeAttribute(node, name): |
paulb@92 | 359 | name = from_unicode(name) |
paulb@92 | 360 | libxml2mod.xmlUnsetProp(node, name) |
paulb@92 | 361 | |
paulb@46 | 362 | def Node_createElementNS(node, ns, name): |
paulb@46 | 363 | ns, name = map(from_unicode, [ns, name]) |
paulb@46 | 364 | |
paulb@46 | 365 | prefix, localName = _get_prefix_and_localName(name) |
paulb@55 | 366 | new_node = libxml2mod.xmlNewNode(localName) |
paulb@166 | 367 | |
paulb@166 | 368 | # If the namespace is not empty, set the declaration. |
paulb@46 | 369 | if ns is not None: |
paulb@179 | 370 | new_ns = _find_namespace(new_node, ns, prefix) |
paulb@179 | 371 | if new_ns is None: |
paulb@179 | 372 | new_ns = _make_namespace(new_node, ns, prefix, set_default=1) |
paulb@55 | 373 | libxml2mod.xmlSetNs(new_node, new_ns) |
paulb@166 | 374 | # If the namespace is empty, set a "null" declaration. |
paulb@177 | 375 | elif prefix is not None: |
paulb@179 | 376 | new_ns = _find_namespace(new_node, "", prefix) |
paulb@179 | 377 | if new_ns is None: |
paulb@179 | 378 | new_ns = _make_namespace(new_node, "", prefix) |
paulb@177 | 379 | libxml2mod.xmlSetNs(new_node, new_ns) |
paulb@166 | 380 | else: |
paulb@174 | 381 | libxml2mod.xmlSetNs(new_node, None) |
paulb@174 | 382 | Node_setAttribute(new_node, "xmlns", "") |
paulb@46 | 383 | return new_node |
paulb@46 | 384 | |
paulb@46 | 385 | def Node_createElement(node, name): |
paulb@46 | 386 | name = from_unicode(name) |
paulb@46 | 387 | |
paulb@55 | 388 | new_node = libxml2mod.xmlNewNode(name) |
paulb@46 | 389 | return new_node |
paulb@46 | 390 | |
paulb@46 | 391 | def Node_createAttributeNS(node, ns, name): |
paulb@46 | 392 | ns, name = map(from_unicode, [ns, name]) |
paulb@46 | 393 | |
paulb@46 | 394 | prefix, localName = _get_prefix_and_localName(name) |
paulb@46 | 395 | # NOTE: Does it make sense to set the namespace if it is empty? |
paulb@46 | 396 | if ns is not None: |
paulb@179 | 397 | new_ns = _find_namespace(node, ns, prefix) |
paulb@179 | 398 | if new_ns is None: |
paulb@179 | 399 | new_ns = _make_namespace(node, ns, prefix, set_default=0) |
paulb@46 | 400 | else: |
paulb@46 | 401 | new_ns = None |
paulb@89 | 402 | new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) |
paulb@87 | 403 | return new_node |
paulb@46 | 404 | |
paulb@46 | 405 | def Node_createAttribute(node, name): |
paulb@46 | 406 | name = from_unicode(name) |
paulb@46 | 407 | |
paulb@87 | 408 | # NOTE: xmlNewProp does not seem to work. |
paulb@87 | 409 | return Node_createAttributeNS(node, None, name) |
paulb@46 | 410 | |
paulb@46 | 411 | def Node_createTextNode(node, value): |
paulb@46 | 412 | value = from_unicode(value) |
paulb@46 | 413 | |
paulb@55 | 414 | return libxml2mod.xmlNewText(value) |
paulb@46 | 415 | |
paulb@46 | 416 | def Node_createComment(node, value): |
paulb@46 | 417 | value = from_unicode(value) |
paulb@46 | 418 | |
paulb@55 | 419 | return libxml2mod.xmlNewComment(value) |
paulb@46 | 420 | |
paulb@251 | 421 | def Node_createCDATASection(node, value): |
paulb@251 | 422 | value = from_unicode(value) |
paulb@251 | 423 | |
paulb@251 | 424 | return libxml2mod.xmlNewCDataBlock(Node_ownerDocument(node), value, len(value)) |
paulb@251 | 425 | |
paulb@46 | 426 | def Node_insertBefore(node, tmp, oldNode): |
paulb@87 | 427 | return libxml2mod.xmlAddPrevSibling(oldNode, tmp) |
paulb@46 | 428 | |
paulb@46 | 429 | def Node_replaceChild(node, tmp, oldNode): |
paulb@87 | 430 | return libxml2mod.xmlReplaceNode(oldNode, tmp) |
paulb@46 | 431 | |
paulb@46 | 432 | def Node_appendChild(node, tmp): |
paulb@55 | 433 | return libxml2mod.xmlAddChild(node, tmp) |
paulb@46 | 434 | |
paulb@46 | 435 | def Node_removeChild(node, child): |
paulb@85 | 436 | libxml2mod.xmlUnlinkNode(child) |
paulb@46 | 437 | |
paulb@73 | 438 | def Node_importNode(node, other, deep): |
paulb@73 | 439 | if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: |
paulb@73 | 440 | imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) |
paulb@73 | 441 | for attr in Node_attributes(other).values(): |
paulb@73 | 442 | Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) |
paulb@73 | 443 | |
paulb@73 | 444 | if deep: |
paulb@73 | 445 | for child in Node_childNodes(other): |
paulb@73 | 446 | imported_child = Node_importNode(node, child, deep) |
paulb@73 | 447 | if imported_child: |
paulb@73 | 448 | Node_appendChild(imported_element, imported_child) |
paulb@73 | 449 | |
paulb@73 | 450 | return imported_element |
paulb@73 | 451 | |
paulb@73 | 452 | elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: |
paulb@73 | 453 | return Node_createTextNode(node, Node_nodeValue(other)) |
paulb@73 | 454 | |
paulb@73 | 455 | elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: |
paulb@73 | 456 | return Node_createComment(node, Node_data(other)) |
paulb@73 | 457 | |
paulb@251 | 458 | elif Node_nodeType(other) == xml.dom.Node.CDATA_SECTION_NODE: |
paulb@251 | 459 | return Node_createCDATASection(node, Node_data(other)) |
paulb@251 | 460 | |
paulb@227 | 461 | raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) |
paulb@73 | 462 | |
paulb@73 | 463 | def Node_importNode_DOM(node, other, deep): |
paulb@73 | 464 | if other.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@73 | 465 | imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) |
paulb@73 | 466 | for attr in other.attributes.values(): |
paulb@73 | 467 | Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) |
paulb@73 | 468 | |
paulb@73 | 469 | if deep: |
paulb@73 | 470 | for child in other.childNodes: |
paulb@73 | 471 | imported_child = Node_importNode_DOM(node, child, deep) |
paulb@73 | 472 | if imported_child: |
paulb@73 | 473 | Node_appendChild(imported_element, imported_child) |
paulb@73 | 474 | |
paulb@73 | 475 | return imported_element |
paulb@73 | 476 | |
paulb@73 | 477 | elif other.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@73 | 478 | return Node_createTextNode(node, other.nodeValue) |
paulb@73 | 479 | |
paulb@73 | 480 | elif other.nodeType == xml.dom.Node.COMMENT_NODE: |
paulb@73 | 481 | return Node_createComment(node, other.data) |
paulb@73 | 482 | |
paulb@251 | 483 | elif other.nodeType == xml.dom.Node.CDATA_SECTION_NODE: |
paulb@251 | 484 | return Node_createCDATASection(node, other.data) |
paulb@251 | 485 | |
paulb@227 | 486 | raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType)) |
paulb@73 | 487 | |
paulb@46 | 488 | def Node_xpath(node, expr, variables=None, namespaces=None): |
paulb@149 | 489 | expr = from_unicode(expr) |
paulb@149 | 490 | |
paulb@230 | 491 | context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) |
paulb@82 | 492 | libxml2mod.xmlXPathSetContextNode(context, node) |
paulb@46 | 493 | # NOTE: Discover namespaces from the node. |
paulb@97 | 494 | # NOTE: Work out how to specify paths without having to use prefixes on |
paulb@97 | 495 | # NOTE: names all the time. |
paulb@46 | 496 | for prefix, ns in (namespaces or {}).items(): |
paulb@55 | 497 | libxml2mod.xmlXPathRegisterNs(context, prefix, ns) |
paulb@97 | 498 | # NOTE: No such functions are exposed in current versions of libxml2. |
paulb@97 | 499 | #for (prefix, ns), value in (variables or {}).items(): |
paulb@149 | 500 | # value = from_unicode(value) |
paulb@97 | 501 | # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) |
paulb@77 | 502 | result = libxml2mod.xmlXPathEval(expr, context) |
paulb@77 | 503 | libxml2mod.xmlXPathFreeContext(context) |
paulb@77 | 504 | return result |
paulb@46 | 505 | |
paulb@269 | 506 | # Exceptions. |
paulb@269 | 507 | |
paulb@269 | 508 | class LSException(Exception): |
paulb@269 | 509 | |
paulb@269 | 510 | "DOM Level 3 Load/Save exception." |
paulb@269 | 511 | |
paulb@269 | 512 | PARSE_ERR = 81 |
paulb@269 | 513 | SERIALIZE_ERR = 82 |
paulb@269 | 514 | |
paulb@300 | 515 | def __repr__(self): |
paulb@300 | 516 | return str(self) |
paulb@300 | 517 | |
paulb@300 | 518 | def __str__(self): |
paulb@300 | 519 | exctype = self.args[0] |
paulb@300 | 520 | if exctype == self.PARSE_ERR: |
paulb@300 | 521 | return "Parse error: LSException(%d)" % exctype |
paulb@300 | 522 | elif exctype == self.SERIALIZE_ERR: |
paulb@300 | 523 | return "Serialize error: LSException(%d)" % exctype |
paulb@300 | 524 | else: |
paulb@300 | 525 | return Exception.__repr__(self) |
paulb@300 | 526 | |
paulb@46 | 527 | # Utility functions. |
paulb@46 | 528 | |
paulb@46 | 529 | def createDocument(namespaceURI, localName, doctype): |
paulb@46 | 530 | # NOTE: Fixed to use version 1.0 only. |
paulb@55 | 531 | d = libxml2mod.xmlNewDoc("1.0") |
paulb@46 | 532 | if localName is not None: |
paulb@174 | 533 | # NOTE: Verify that this is always what should occur. |
paulb@174 | 534 | root = Node_createElementNS(d, namespaceURI, localName) |
paulb@46 | 535 | Node_appendChild(d, root) |
paulb@155 | 536 | if doctype is not None: |
paulb@155 | 537 | libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) |
paulb@46 | 538 | return d |
paulb@46 | 539 | |
paulb@269 | 540 | def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0): |
paulb@46 | 541 | if hasattr(stream_or_string, "read"): |
paulb@46 | 542 | stream = stream_or_string |
paulb@269 | 543 | return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished) |
paulb@46 | 544 | else: |
paulb@269 | 545 | return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished) |
paulb@46 | 546 | |
paulb@269 | 547 | def parseFile(s, html=0, htmlencoding=None, unfinished=0): |
paulb@46 | 548 | # NOTE: Switching off validation and remote DTD resolution. |
paulb@103 | 549 | if not html: |
paulb@103 | 550 | context = libxml2mod.xmlCreateFileParserCtxt(s) |
paulb@273 | 551 | Parser_configure(context) |
paulb@273 | 552 | Parser_parse(context) |
paulb@273 | 553 | doc = Parser_document(context) |
paulb@273 | 554 | if unfinished or Parser_well_formed(context): |
paulb@269 | 555 | return doc |
paulb@269 | 556 | else: |
paulb@269 | 557 | raise LSException(LSException.PARSE_ERR) |
paulb@103 | 558 | else: |
paulb@246 | 559 | return libxml2mod.htmlReadFile(s, htmlencoding, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) |
paulb@46 | 560 | |
paulb@269 | 561 | def parseString(s, html=0, htmlencoding=None, unfinished=0): |
paulb@46 | 562 | # NOTE: Switching off validation and remote DTD resolution. |
paulb@103 | 563 | if not html: |
paulb@103 | 564 | context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) |
paulb@273 | 565 | Parser_configure(context) |
paulb@273 | 566 | Parser_parse(context) |
paulb@273 | 567 | doc = Parser_document(context) |
paulb@273 | 568 | if unfinished or Parser_well_formed(context): |
paulb@269 | 569 | return doc |
paulb@269 | 570 | else: |
paulb@269 | 571 | raise LSException(LSException.PARSE_ERR) |
paulb@103 | 572 | else: |
paulb@103 | 573 | # NOTE: URL given as None. |
paulb@103 | 574 | html_url = None |
paulb@246 | 575 | return libxml2mod.htmlReadMemory(s, len(s), html_url, htmlencoding, |
paulb@103 | 576 | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) |
paulb@46 | 577 | |
paulb@269 | 578 | def parseURI(uri, html=0, htmlencoding=None, unfinished=0): |
paulb@103 | 579 | # NOTE: Switching off validation and remote DTD resolution. |
paulb@103 | 580 | if not html: |
paulb@116 | 581 | context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) |
paulb@273 | 582 | Parser_configure(context) |
paulb@273 | 583 | Parser_parse(context) |
paulb@273 | 584 | doc = Parser_document(context) |
paulb@273 | 585 | if unfinished or Parser_well_formed(context): |
paulb@269 | 586 | return doc |
paulb@269 | 587 | else: |
paulb@269 | 588 | raise LSException(LSException.PARSE_ERR) |
paulb@103 | 589 | else: |
paulb@127 | 590 | raise NotImplementedError, "parseURI does not yet support HTML" |
paulb@55 | 591 | |
paulb@120 | 592 | def toString(node, encoding=None, prettyprint=0): |
paulb@120 | 593 | return libxml2mod.serializeNode(node, encoding, prettyprint) |
paulb@73 | 594 | |
paulb@120 | 595 | def toStream(node, stream, encoding=None, prettyprint=0): |
paulb@120 | 596 | stream.write(toString(node, encoding, prettyprint)) |
paulb@73 | 597 | |
paulb@120 | 598 | def toFile(node, f, encoding=None, prettyprint=0): |
paulb@120 | 599 | libxml2mod.saveNodeTo(node, f, encoding, prettyprint) |
paulb@46 | 600 | |
paulb@273 | 601 | # libxml2mod constants and helper functions. |
paulb@103 | 602 | |
paulb@103 | 603 | HTML_PARSE_NOERROR = 32 |
paulb@103 | 604 | HTML_PARSE_NOWARNING = 64 |
paulb@103 | 605 | HTML_PARSE_NONET = 2048 |
paulb@103 | 606 | XML_PARSE_NOERROR = 32 |
paulb@103 | 607 | XML_PARSE_NOWARNING = 64 |
paulb@103 | 608 | XML_PARSE_NONET = 2048 |
paulb@103 | 609 | |
paulb@273 | 610 | def Parser_push(): |
paulb@273 | 611 | return libxml2mod.xmlCreatePushParser(None, "", 0, None) |
paulb@273 | 612 | |
paulb@273 | 613 | def Parser_configure(context): |
paulb@273 | 614 | libxml2mod.xmlParserSetPedantic(context, 0) |
paulb@273 | 615 | libxml2mod.xmlParserSetValidate(context, 0) |
paulb@273 | 616 | libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) |
paulb@273 | 617 | |
paulb@273 | 618 | def Parser_feed(context, s): |
paulb@273 | 619 | libxml2mod.xmlParseChunk(context, s, len(s), 1) |
paulb@273 | 620 | |
paulb@273 | 621 | def Parser_well_formed(context): |
paulb@273 | 622 | return libxml2mod.xmlParserGetWellFormed(context) |
paulb@273 | 623 | |
paulb@273 | 624 | def Parser_document(context): |
paulb@273 | 625 | return libxml2mod.xmlParserGetDoc(context) |
paulb@273 | 626 | |
paulb@273 | 627 | def Parser_parse(context): |
paulb@273 | 628 | libxml2mod.xmlParseDocument(context) |
paulb@273 | 629 | |
paulb@46 | 630 | # vim: tabstop=4 expandtab shiftwidth=4 |