paulb@46 | 1 | #!/usr/bin/env python |
paulb@46 | 2 | |
paulb@46 | 3 | """ |
paulb@55 | 4 | DOM macros for virtual libxml2mod node methods and properties. |
paulb@46 | 5 | """ |
paulb@46 | 6 | |
paulb@46 | 7 | import xml.dom |
paulb@55 | 8 | import libxml2mod |
paulb@46 | 9 | |
paulb@46 | 10 | class TemporaryNode: |
paulb@46 | 11 | def __init__(self, ns, name): |
paulb@46 | 12 | self._ns = ns |
paulb@46 | 13 | self.name = name |
paulb@46 | 14 | self.type = "attribute" |
paulb@55 | 15 | self.nodeValue = None |
paulb@46 | 16 | |
paulb@46 | 17 | def ns(self): |
paulb@46 | 18 | return self._ns |
paulb@46 | 19 | |
paulb@46 | 20 | # NOTE: libxml2 seems to use UTF-8 throughout. |
paulb@46 | 21 | |
paulb@46 | 22 | def from_unicode(s): |
paulb@46 | 23 | if type(s) == type(u""): |
paulb@46 | 24 | return s.encode("utf-8") |
paulb@46 | 25 | else: |
paulb@46 | 26 | return s |
paulb@46 | 27 | |
paulb@46 | 28 | def to_unicode(s): |
paulb@46 | 29 | if type(s) == type(""): |
paulb@46 | 30 | return unicode(s, encoding="utf-8") |
paulb@46 | 31 | else: |
paulb@46 | 32 | return s |
paulb@46 | 33 | |
paulb@46 | 34 | def _get_prefix_and_localName(name): |
paulb@46 | 35 | t = name.split(":") |
paulb@46 | 36 | if len(t) == 1: |
paulb@46 | 37 | return None, name |
paulb@46 | 38 | elif len(t) == 2: |
paulb@46 | 39 | return t |
paulb@46 | 40 | else: |
paulb@46 | 41 | # NOTE: Should raise an exception. |
paulb@46 | 42 | return None, None |
paulb@46 | 43 | |
paulb@46 | 44 | _nodeTypes = { |
paulb@46 | 45 | "attribute" : xml.dom.Node.ATTRIBUTE_NODE, |
paulb@46 | 46 | "comment" : xml.dom.Node.COMMENT_NODE, |
paulb@46 | 47 | "document_xml" : xml.dom.Node.DOCUMENT_NODE, |
paulb@46 | 48 | "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, |
paulb@46 | 49 | "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. |
paulb@46 | 50 | "element" : xml.dom.Node.ELEMENT_NODE, |
paulb@46 | 51 | "entity" : xml.dom.Node.ENTITY_NODE, |
paulb@46 | 52 | "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, |
paulb@46 | 53 | "notation" : xml.dom.Node.NOTATION_NODE, |
paulb@46 | 54 | "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, |
paulb@46 | 55 | "text" : xml.dom.Node.TEXT_NODE |
paulb@46 | 56 | } |
paulb@46 | 57 | |
paulb@46 | 58 | def Node_ownerDocument(node): |
paulb@55 | 59 | return libxml2mod.doc(node) |
paulb@46 | 60 | |
paulb@46 | 61 | def Node_nodeType(node): |
paulb@46 | 62 | global _nodesTypes |
paulb@55 | 63 | return _nodeTypes[libxml2mod.type(node)] |
paulb@46 | 64 | |
paulb@46 | 65 | def Node_childNodes(node): |
paulb@46 | 66 | |
paulb@46 | 67 | # NOTE: Consider a generator instead. |
paulb@46 | 68 | |
paulb@46 | 69 | child_nodes = [] |
paulb@55 | 70 | node = libxml2mod.children(node) |
paulb@46 | 71 | while node is not None: |
paulb@46 | 72 | child_nodes.append(node) |
paulb@55 | 73 | node = libxml2mod.next(node) |
paulb@46 | 74 | return child_nodes |
paulb@46 | 75 | |
paulb@46 | 76 | def Node_attributes(node): |
paulb@46 | 77 | attributes = {} |
paulb@55 | 78 | node = libxml2mod.properties(node) |
paulb@46 | 79 | while node is not None: |
paulb@55 | 80 | ns = libxml2mod.xmlNodeGetNs(node) |
paulb@46 | 81 | if ns is not None: |
paulb@55 | 82 | attributes[(libxml2mod.xmlNodeGetContent(ns), libxml2mod.name(node))] = node |
paulb@46 | 83 | else: |
paulb@55 | 84 | attributes[(None, libxml2mod.name(node))] = node |
paulb@55 | 85 | node = libxml2mod.next(node) |
paulb@46 | 86 | return attributes |
paulb@46 | 87 | |
paulb@46 | 88 | def Node_namespaceURI(node): |
paulb@55 | 89 | ns = libxml2mod.xmlNodeGetNs(node) |
paulb@46 | 90 | if ns is not None: |
paulb@55 | 91 | return to_unicode(libxml2mod.xmlNodeGetContent(ns)) |
paulb@46 | 92 | else: |
paulb@46 | 93 | return None |
paulb@46 | 94 | |
paulb@46 | 95 | def Node_nodeValue(node): |
paulb@55 | 96 | return to_unicode(libxml2mod.xmlNodeGetContent(node)) |
paulb@46 | 97 | |
paulb@46 | 98 | def Node_prefix(node): |
paulb@55 | 99 | ns = libxml2mod.xmlNodeGetNs(node) |
paulb@46 | 100 | if ns is not None: |
paulb@55 | 101 | return to_unicode(libxml2mod.name(ns)) |
paulb@46 | 102 | else: |
paulb@46 | 103 | return None |
paulb@46 | 104 | |
paulb@46 | 105 | def Node_nodeName(node): |
paulb@46 | 106 | prefix = Node_prefix(node) |
paulb@46 | 107 | if prefix is not None: |
paulb@46 | 108 | return prefix + ":" + Node_localName(node) |
paulb@46 | 109 | else: |
paulb@46 | 110 | return Node_localName(node) |
paulb@46 | 111 | |
paulb@46 | 112 | def Node_tagName(node): |
paulb@55 | 113 | if libxml2mod.type(node) == "element": |
paulb@46 | 114 | return Node_nodeName(node) |
paulb@46 | 115 | else: |
paulb@46 | 116 | return None |
paulb@46 | 117 | |
paulb@46 | 118 | def Node_localName(node): |
paulb@55 | 119 | return to_unicode(libxml2mod.name(node)) |
paulb@46 | 120 | |
paulb@46 | 121 | def Node_parentNode(node): |
paulb@55 | 122 | if libxml2mod.type(node) == "document_xml": |
paulb@46 | 123 | return None |
paulb@46 | 124 | else: |
paulb@55 | 125 | return libxml2mod.parent(node) |
paulb@46 | 126 | |
paulb@46 | 127 | def Node_previousSibling(node): |
paulb@55 | 128 | if libxml2mod.prev(node) is not None: |
paulb@55 | 129 | return libxml2mod.prev(node) |
paulb@46 | 130 | else: |
paulb@46 | 131 | return None |
paulb@46 | 132 | |
paulb@46 | 133 | def Node_nextSibling(node): |
paulb@55 | 134 | if libxml2mod.next(node) is not None: |
paulb@55 | 135 | return libxml2mod.next(node) |
paulb@46 | 136 | else: |
paulb@46 | 137 | return None |
paulb@46 | 138 | |
paulb@46 | 139 | def Node_hasAttributeNS(node, ns, localName): |
paulb@46 | 140 | return Node_getAttributeNS(ns, localName) is not None |
paulb@46 | 141 | |
paulb@46 | 142 | def Node_hasAttribute(node, name): |
paulb@46 | 143 | return Node_getAttribute(name) is not None |
paulb@46 | 144 | |
paulb@46 | 145 | def Node_getAttributeNS(node, ns, localName): |
paulb@55 | 146 | return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) |
paulb@46 | 147 | |
paulb@46 | 148 | def Node_getAttribute(node, name): |
paulb@55 | 149 | return to_unicode(libxml2mod.xmlGetProp(node, name)) |
paulb@46 | 150 | |
paulb@46 | 151 | def Node_getAttributeNodeNS(node, ns, localName): |
paulb@55 | 152 | # NOTE: Needs verifying. |
paulb@55 | 153 | return libxml2mod.xmlGetNsProp(node, localName, ns) |
paulb@46 | 154 | |
paulb@46 | 155 | def Node_getAttributeNode(node, name): |
paulb@46 | 156 | # NOTE: Needs verifying. |
paulb@55 | 157 | return libxml2mod.xmlGetProp(node, name) |
paulb@46 | 158 | |
paulb@46 | 159 | def Node_setAttributeNS(node, ns, name, value): |
paulb@46 | 160 | # NOTE: Need to convert from Unicode. |
paulb@46 | 161 | ns, name, value = map(from_unicode, [ns, name, value]) |
paulb@46 | 162 | |
paulb@46 | 163 | prefix, localName = _get_prefix_and_localName(name) |
paulb@55 | 164 | |
paulb@55 | 165 | # NOTE: Might need to be xmlSetNsProp. |
paulb@46 | 166 | if prefix is not None: |
paulb@55 | 167 | libxml2mod.xmlNewNsProp(node, libxml2mod.xmlNewNs(node, ns, prefix), localName, value) |
paulb@55 | 168 | elif ns is not None and ns == libxml2mod.xmlNodeGetContent(libxml2mod.xmlNodeGetNs(node)): |
paulb@55 | 169 | libxml2mod.xmlNewNsProp(node, libxml2mod.xmlNodeGetNs(node), localName, value) |
paulb@46 | 170 | else: |
paulb@46 | 171 | # NOTE: Needs verifying: what should happen to the namespace? |
paulb@46 | 172 | # NOTE: This also catches the case where None is the element's |
paulb@46 | 173 | # NOTE: namespace and is also used for the attribute. |
paulb@55 | 174 | libxml2mod.xmlNewNsProp(node, None, localName, value) |
paulb@46 | 175 | |
paulb@46 | 176 | def Node_setAttribute(node, name, value): |
paulb@46 | 177 | # NOTE: Need to convert from Unicode. |
paulb@46 | 178 | name, value = map(from_unicode, [name, value]) |
paulb@46 | 179 | |
paulb@55 | 180 | libxml2mod.xmlSetProp(node, name, value) |
paulb@55 | 181 | |
paulb@55 | 182 | def _add_node(node, tmp): |
paulb@55 | 183 | if tmp.ns is not None: |
paulb@55 | 184 | child = libxml2mod.xmlNewNsProp(node, None, Node_localName(tmp), None) |
paulb@55 | 185 | ns = libxml2mod.xmlNewNs(child, Node_namespaceURI(tmp), Node_prefix(tmp)) |
paulb@55 | 186 | libxml2mod.xmlNodeSetNs(child, ns) |
paulb@55 | 187 | else: |
paulb@55 | 188 | child = libxml2mod.xmlNewProp(node, libxml2mod.name(tmp)) |
paulb@55 | 189 | |
paulb@55 | 190 | return child |
paulb@46 | 191 | |
paulb@46 | 192 | def Node_setAttributeNodeNS(node, ns, name, attr): |
paulb@46 | 193 | # NOTE: Not actually putting the node on the element. |
paulb@55 | 194 | Node_setAttributeNS(node, ns, name, attr.nodeValue) # Node_nodeValue(attr) |
paulb@46 | 195 | |
paulb@46 | 196 | def Node_setAttributeNode(node, name, attr): |
paulb@46 | 197 | # NOTE: Not actually putting the node on the element. |
paulb@55 | 198 | Node_setAttribute(node, name, attr.nodeValue) # Node_nodeValue(attr) |
paulb@46 | 199 | |
paulb@46 | 200 | def Node_createElementNS(node, ns, name): |
paulb@46 | 201 | # NOTE: Need to convert from Unicode. |
paulb@46 | 202 | ns, name = map(from_unicode, [ns, name]) |
paulb@46 | 203 | |
paulb@46 | 204 | prefix, localName = _get_prefix_and_localName(name) |
paulb@55 | 205 | new_node = libxml2mod.xmlNewNode(localName) |
paulb@46 | 206 | # NOTE: Does it make sense to set the namespace if it is empty? |
paulb@46 | 207 | if ns is not None: |
paulb@55 | 208 | new_ns = libxml2mod.xmlNewNs(new_node, ns, prefix) |
paulb@55 | 209 | libxml2mod.xmlSetNs(new_node, new_ns) |
paulb@46 | 210 | return new_node |
paulb@46 | 211 | |
paulb@46 | 212 | def Node_createElement(node, name): |
paulb@46 | 213 | # NOTE: Need to convert from Unicode. |
paulb@46 | 214 | name = from_unicode(name) |
paulb@46 | 215 | |
paulb@55 | 216 | new_node = libxml2mod.xmlNewNode(name) |
paulb@46 | 217 | return new_node |
paulb@46 | 218 | |
paulb@46 | 219 | def Node_createAttributeNS(node, ns, name): |
paulb@46 | 220 | # NOTE: Need to convert from Unicode. |
paulb@46 | 221 | ns, name = map(from_unicode, [ns, name]) |
paulb@46 | 222 | |
paulb@46 | 223 | prefix, localName = _get_prefix_and_localName(name) |
paulb@46 | 224 | # NOTE: Does it make sense to set the namespace if it is empty? |
paulb@46 | 225 | if ns is not None: |
paulb@55 | 226 | new_ns = libxml2mod.xmlNewNs(new_node, ns, prefix) |
paulb@46 | 227 | else: |
paulb@46 | 228 | new_ns = None |
paulb@46 | 229 | return TemporaryNode(new_ns, localName) |
paulb@46 | 230 | |
paulb@46 | 231 | def Node_createAttribute(node, name): |
paulb@46 | 232 | # NOTE: Need to convert from Unicode. |
paulb@46 | 233 | name = from_unicode(name) |
paulb@46 | 234 | |
paulb@46 | 235 | return TemporaryNode(None, name) |
paulb@46 | 236 | |
paulb@46 | 237 | def Node_createTextNode(node, value): |
paulb@46 | 238 | # NOTE: Need to convert from Unicode. |
paulb@46 | 239 | value = from_unicode(value) |
paulb@46 | 240 | |
paulb@55 | 241 | return libxml2mod.xmlNewText(value) |
paulb@46 | 242 | |
paulb@46 | 243 | def Node_createComment(node, value): |
paulb@46 | 244 | # NOTE: Need to convert from Unicode. |
paulb@46 | 245 | value = from_unicode(value) |
paulb@46 | 246 | |
paulb@55 | 247 | return libxml2mod.xmlNewComment(value) |
paulb@46 | 248 | |
paulb@46 | 249 | def Node_insertBefore(node, tmp, oldNode): |
paulb@48 | 250 | if not isinstance(tmp, TemporaryNode): |
paulb@55 | 251 | return libxml2mod.xmlAddPrevSibling(oldNode, tmp) |
paulb@46 | 252 | else: |
paulb@48 | 253 | return None |
paulb@46 | 254 | |
paulb@46 | 255 | def Node_replaceChild(node, tmp, oldNode): |
paulb@48 | 256 | if not isinstance(tmp, TemporaryNode): |
paulb@55 | 257 | return libxml2mod.xmlReplaceNode(oldNode, tmp) |
paulb@46 | 258 | else: |
paulb@48 | 259 | return None |
paulb@46 | 260 | |
paulb@46 | 261 | def Node_appendChild(node, tmp): |
paulb@55 | 262 | return libxml2mod.xmlAddChild(node, tmp) |
paulb@46 | 263 | |
paulb@46 | 264 | def Node_removeChild(node, child): |
paulb@55 | 265 | libxml2mod.unlinkNode(child) |
paulb@46 | 266 | |
paulb@46 | 267 | def Node_xpath(node, expr, variables=None, namespaces=None): |
paulb@55 | 268 | context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node)) |
paulb@55 | 269 | libxml2mod.xmlXPathSetContextNode(context, node) |
paulb@46 | 270 | # NOTE: Discover namespaces from the node. |
paulb@46 | 271 | for prefix, ns in (namespaces or {}).items(): |
paulb@55 | 272 | libxml2mod.xmlXPathRegisterNs(context, prefix, ns) |
paulb@46 | 273 | # NOTE: May need to tidy up the context. |
paulb@55 | 274 | return libxml2mod.xmlXPathEval(context, expr) |
paulb@46 | 275 | |
paulb@46 | 276 | # Utility functions. |
paulb@46 | 277 | |
paulb@46 | 278 | def createDocumentType(localName, publicId, systemId): |
paulb@46 | 279 | return None |
paulb@46 | 280 | |
paulb@46 | 281 | def createDocument(namespaceURI, localName, doctype): |
paulb@46 | 282 | # NOTE: Fixed to use version 1.0 only. |
paulb@55 | 283 | d = libxml2mod.xmlNewDoc("1.0") |
paulb@46 | 284 | if localName is not None: |
paulb@46 | 285 | root = Node_createElementNS(d, namespaceURI, localName) |
paulb@46 | 286 | Node_appendChild(d, root) |
paulb@46 | 287 | return d |
paulb@46 | 288 | |
paulb@46 | 289 | def parse(stream_or_string): |
paulb@46 | 290 | if hasattr(stream_or_string, "read"): |
paulb@46 | 291 | stream = stream_or_string |
paulb@46 | 292 | return parseString(stream.read()) |
paulb@46 | 293 | else: |
paulb@46 | 294 | return parseFile(stream_or_string) |
paulb@46 | 295 | |
paulb@46 | 296 | def parseFile(s): |
paulb@46 | 297 | # NOTE: Switching off validation and remote DTD resolution. |
paulb@55 | 298 | context = libxml2mod.xmlCreateFileParserCtxt(s) |
paulb@55 | 299 | libxml2mod.xmlParserSetValidate(context, 0) |
paulb@55 | 300 | libxml2mod.xmlCtxtUseOptions(context, 0) |
paulb@55 | 301 | libxml2mod.xmlParseDocument(context) |
paulb@55 | 302 | return libxml2mod.xmlParserGetDoc(context) |
paulb@46 | 303 | |
paulb@46 | 304 | def parseString(s): |
paulb@46 | 305 | # NOTE: Switching off validation and remote DTD resolution. |
paulb@55 | 306 | context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) |
paulb@55 | 307 | libxml2mod.xmlParserSetValidate(context, 0) |
paulb@55 | 308 | libxml2mod.xmlCtxtUseOptions(context, 0) |
paulb@55 | 309 | libxml2mod.xmlParseDocument(context) |
paulb@55 | 310 | return libxml2mod.xmlParserGetDoc(context) |
paulb@46 | 311 | |
paulb@46 | 312 | def parseURI(uri): |
paulb@55 | 313 | context = libxml2mod.xmlCreateURLParserCtxt(url) |
paulb@55 | 314 | libxml2mod.xmlParserSetValidate(context, 0) |
paulb@55 | 315 | libxml2mod.xmlCtxtUseOptions(context, 0) |
paulb@55 | 316 | libxml2mod.xmlParseDocument(context) |
paulb@55 | 317 | return libxml2mod.xmlParserGetDoc(context) |
paulb@55 | 318 | |
paulb@55 | 319 | def toFile(doc, s): |
paulb@55 | 320 | libxml2mod.xmlSaveFile(s, doc) |
paulb@46 | 321 | |
paulb@46 | 322 | # vim: tabstop=4 expandtab shiftwidth=4 |