1 #!/usr/bin/env python 2 3 """ 4 Classes which process field collections, producing instance 5 documents. Each field entry consists of a field name mapped 6 to a string value, where the field name may have the following 7 formats: 8 9 /name1#n1/name2 10 /name1#n1/name2#n2/name3 11 /name1#n1/name2#n2/name3#n3/name4 12 ... 13 14 The indexes n1, n2, n3, ... indicate the position of elements 15 (starting from 1) in the entire element list, whose elements 16 may have different names. For example: 17 18 /zoo#1/name 19 /zoo#1/cage#1:name 20 /zoo#1/cage#2:name 21 /zoo#1/funding#3/contributor#1/name 22 23 Some fields may contain the "=" string. This string is 24 reserved and all text following it is meant to specify a path 25 into a particular document. For example: 26 27 _action_add_animal=/zoo#1/cage#2 28 """ 29 30 import libxml2dom 31 from xml.dom import EMPTY_NAMESPACE 32 33 class FieldsError(Exception): 34 pass 35 36 class Fields: 37 38 """ 39 A class which converts fields in the documented form to XML 40 instance documents. 41 """ 42 43 _path_separator = "/" 44 _pair_separator = "#" 45 _selector_indicator = "=" 46 47 def __init__(self, encoding="utf-8"): 48 49 """ 50 Initialise the fields processor with the given 'encoding', 51 which is optional and which only applies to field data in 52 Python string form (and not Unicode objects). 53 """ 54 55 self.encoding = encoding 56 57 def complete_documents(self, documents, fields): 58 59 """ 60 Complete the given 'documents' using the 'fields' items list. 61 """ 62 63 for field, value in fields: 64 65 # Ignore selectors. 66 67 if field.find(self._selector_indicator) != -1: 68 continue 69 70 model_name, components = self._get_model_name_and_components(field) 71 if model_name is None: 72 continue 73 74 # Convert the value to Unicode if necessary. 75 76 if type(value) == type(""): 77 value = unicode(value, encoding=self.encoding) 78 79 # Get a new instance document if none has been made for the 80 # model. 81 82 if not documents.has_key(model_name): 83 documents[model_name] = self._new_instance(model_name) 84 node = documents[model_name] 85 86 # Traverse the components within the instance. 87 88 for component in components: 89 t = component.split(self._pair_separator) 90 if len(t) == 1: 91 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 92 break 93 94 elif len(t) == 2: 95 96 # Convert from one-based indexing (the position() function) 97 # to zero-based indexing. 98 99 name, index = t[0], int(t[1]) - 1 100 if index < 0: 101 break 102 node = self._enter_element(node, name, index) 103 104 def complete_selectors(self, selectors, fields, documents): 105 106 """ 107 Fill in the given 'selectors' dictionary using the given 108 'fields' so that it contains mappings from selector names to 109 parts of the specified 'documents'. 110 """ 111 112 for field, value in fields: 113 114 # Process selectors only. 115 116 selector_components = field.split(self._selector_indicator) 117 if len(selector_components) < 2: 118 continue 119 120 # Get the selector name and path. 121 # Note that the joining of the components uses the separator, 122 # but the separator really should not exist in the path. 123 124 selector_name = selector_components[0] 125 path = self._selector_indicator.join(selector_components[1:]) 126 127 model_name, components = self._get_model_name_and_components(path) 128 if model_name is None: 129 continue 130 131 # Go to the instance element. 132 133 if not documents.has_key(model_name) or documents[model_name] is None: 134 continue 135 136 node = documents[model_name] 137 138 # Traverse the path to find the part of the document to be 139 # selected. 140 141 for component in components: 142 t = component.split(self._pair_separator) 143 if len(t) == 1: 144 145 # Select attribute. 146 147 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 148 break 149 150 elif len(t) == 2: 151 152 # Convert from one-based indexing (the position() function) 153 # to zero-based indexing. 154 155 name, index = t[0], int(t[1]) - 1 156 if index < 0: 157 break 158 159 # NOTE: Controversial creation of potentially non-existent 160 # NOTE: nodes. 161 162 node = self._enter_element(node, name, index) 163 164 if not selectors.has_key(selector_name): 165 selectors[selector_name] = [] 166 selectors[selector_name].append(node) 167 168 def _enter_element(self, node, name, index): 169 170 """ 171 From 'node' enter the element with the given 'name' at the 172 given 'index' position amongst the child elements. Create 173 missing child elements if necessary. 174 """ 175 176 self._ensure_elements(node, index) 177 178 elements = node.xpath("*") 179 if elements[index].localName == "placeholder": 180 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 181 node.replaceChild(new_node, elements[index]) 182 else: 183 new_node = elements[index] 184 if new_node.localName != name: 185 raise FieldsError, (new_node.localName, name, elements, index) 186 187 # Enter the newly-created element. 188 189 return new_node 190 191 def _get_model_name_and_components(self, field): 192 193 """ 194 From 'field', return the model name and components which 195 describe the path within the instance document associated 196 with that model. 197 """ 198 199 # Get the components of the field name. 200 # Example: /name1#n1/name2#n2/name3 201 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 202 203 components = field.split(self._path_separator) 204 if len(components) < 2: 205 return None, None 206 207 # Extract the model name from the top-level element 208 # specification. 209 # Expected: ['name1', 'n1'] 210 211 model_name_and_index = components[1].split(self._pair_separator) 212 if len(model_name_and_index) != 2: 213 return None, None 214 215 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 216 217 return model_name_and_index[0], components[1:] 218 219 def _ensure_elements(self, document, index): 220 221 """ 222 In the given 'document', extend the child elements list 223 so that a node can be stored at the given 'index'. 224 """ 225 226 elements = document.xpath("*") 227 i = len(elements) 228 while i <= index: 229 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 230 document.appendChild(new_node) 231 i += 1 232 233 def make_documents(self, fields): 234 235 """ 236 Make a dictionary mapping model names to new documents prepared 237 from the given 'fields' dictionary. 238 """ 239 240 documents = {} 241 self.complete_documents(documents, fields) 242 243 # Fix the dictionary to return the actual document root. 244 245 for model_name, instance_root in documents.items(): 246 documents[model_name] = instance_root 247 return documents 248 249 def get_selectors(self, fields, documents): 250 251 """ 252 Get a dictionary containing a mapping of selector names to 253 selected parts of the given 'documents'. 254 """ 255 256 selectors = {} 257 self.complete_selectors(selectors, fields, documents) 258 return selectors 259 260 def _new_instance(self, name): 261 262 "Return an instance root of the given 'name' in a new document." 263 264 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 265 266 if __name__ == "__main__": 267 268 d = [ 269 ("_action_update", "Some value"), 270 ("_action_delete=/zoo#1/cage#2", "Some value"), 271 ("/actions#1/update#1/selected", "Some value"), # Not actually used in output documents or input. 272 ("/zoo#1/name", "The Zoo ???"), 273 ("/zoo#1/cage#1/name", "reptiles"), 274 ("/zoo#1/cage#1/capacity", "5"), 275 ("/zoo#1/cage#1/animal#1/name", "Monty"), 276 ("/zoo#1/cage#1/animal#1/species#1/name", "Python"), 277 ("/zoo#1/cage#1/animal#1/property#2/name", "texture"), 278 ("/zoo#1/cage#1/animal#1/property#2/value", "scaled"), 279 ("/zoo#1/cage#1/animal#1/property#3/name", "length"), 280 ("/zoo#1/cage#1/animal#1/property#3/value", "5m"), 281 ("/zoo#1/cage#1/animal#2/name", "Vincent"), 282 ("/zoo#1/cage#1/animal#2/species#1/name", "Lizard"), 283 ("/zoo#1/cage#1/animal#2/property#2/name", "colour"), 284 ("/zoo#1/cage#1/animal#2/property#2/value", "variable"), 285 ("/zoo#1/cage#1/animal#2/property#3/name", "length"), 286 ("/zoo#1/cage#1/animal#2/property#3/value", "1m"), 287 ("/zoo#1/cage#2/name", "mammals"), 288 ("/zoo#1/cage#2/capacity", "25"), 289 ("/zoo#1/cage#2/animal#1/name", "Simon"), 290 ("/zoo#1/cage#2/animal#1/species#1/name", "Giraffe"), 291 ("/zoo#1/cage#2/animal#2/name", "Leonard"), 292 ("/zoo#1/cage#2/animal#2/species#1/name", "Lion"), 293 ("/zoo#1/cage#2/animal#2/property#2/name", "danger"), 294 ("/zoo#1/cage#2/animal#2/property#2/value", "high"), 295 ("/zoo#1/funding#3/type", "private"), 296 ("/zoo#1/funding#3/contributor#1/name", "Animal Corporation"), 297 ("/zoo#1/funding#3/contributor#1/amount", "543210.987") 298 ] 299 300 import time 301 import sys, cmdsyntax 302 303 # Find the documents. 304 305 syntax = cmdsyntax.Syntax(""" 306 --plain-output=OUTPUT_FILE 307 --instance-name=NAME 308 """) 309 310 syntax_matches = syntax.get_args(sys.argv[1:]) 311 312 try: 313 args = syntax_matches[0] 314 except IndexError: 315 print syntax.syntax 316 sys.exit(1) 317 318 # Create an object to interpret the test data. 319 320 fields = Fields("iso-8859-1") 321 322 t = time.time() 323 documents = fields.make_documents(d) 324 print "Building time", time.time() - t 325 326 t = time.time() 327 libxml2dom.toStream(documents[args["instance-name"]], stream=open(args["plain-output"], "wb"), encoding="utf-8") 328 print "Prettyprinting time", time.time() - t 329 330 print "Selectors", repr(fields.get_selectors(d, documents)) 331 332 # vim: tabstop=4 expandtab shiftwidth=4