1 #!/usr/bin/env python 2 3 """ 4 Classes which process field collections, producing instance 5 documents. Each field entry consists of a field name mapped 6 to a string value, where the field name may have the following 7 formats: 8 9 /name1#n1/name2 10 /name1#n1/name2#n2/name3 11 /name1#n1/name2#n2/name3#n3/name4 12 ... 13 14 The indexes n1, n2, n3, ... indicate the position of elements 15 (starting from 1) in the entire element list, whose elements 16 may have different names. For example: 17 18 /zoo#1/name 19 /zoo#1/cage#1/name 20 /zoo#1/cage#2/name 21 /zoo#1/funding#3/contributor#1/name 22 23 Some fields may contain the "=" string. This string is 24 reserved and all text following it is meant to specify a path 25 into a particular document. For example: 26 27 _action_add_animal=/zoo#1/cage#2 28 """ 29 30 import libxml2dom 31 from xml.dom import EMPTY_NAMESPACE 32 33 class FieldsError(Exception): 34 pass 35 36 class Fields: 37 38 """ 39 A class which converts fields in the documented form to XML 40 instance documents. 41 """ 42 43 _path_separator = "/" 44 _pair_separator = "#" 45 _selector_indicator = "=" 46 47 def __init__(self, encoding="utf-8", values_are_lists=0): 48 49 """ 50 Initialise the fields processor with the given 'encoding', 51 which is optional and which only applies to field data in 52 Python string form (and not Unicode objects). 53 54 If the optional 'values_are_lists' parameter is set to true 55 then each actual field value will be obtained by taking the 56 first element from each supplied field value. 57 """ 58 59 self.encoding = encoding 60 self.values_are_lists = values_are_lists 61 62 def complete_documents(self, documents, fields): 63 64 """ 65 Complete the given 'documents' using the 'fields' items list. 66 """ 67 68 for field, value in fields: 69 70 # Ignore selectors. 71 72 if field.find(self._selector_indicator) != -1: 73 continue 74 75 model_name, components = self._get_model_name_and_components(field) 76 if model_name is None: 77 continue 78 79 # Convert from lists if necessary. 80 81 if self.values_are_lists: 82 value = value[0] 83 84 # Convert the value to Unicode if necessary. 85 86 if type(value) == type(""): 87 value = unicode(value, encoding=self.encoding) 88 89 # Get a new instance document if none has been made for the 90 # model. 91 92 if not documents.has_key(model_name): 93 documents[model_name] = self.new_instance(model_name) 94 node = documents[model_name] 95 96 # Traverse the components within the instance. 97 98 for component in components: 99 t = component.split(self._pair_separator) 100 if len(t) == 1: 101 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 102 break 103 104 elif len(t) == 2: 105 106 # Convert from one-based indexing (the position() function) 107 # to zero-based indexing. 108 109 name, index = t[0], int(t[1]) - 1 110 if index < 0: 111 break 112 node = self._enter_element(node, name, index) 113 114 def complete_selectors(self, selectors, fields, documents): 115 116 """ 117 Fill in the given 'selectors' dictionary using the given 118 'fields' so that it contains mappings from selector names to 119 parts of the specified 'documents'. 120 """ 121 122 for field, value in fields: 123 124 # Process selectors only. 125 126 selector_components = field.split(self._selector_indicator) 127 if len(selector_components) < 2: 128 continue 129 130 # Get the selector name and path. 131 # Note that the joining of the components uses the separator, 132 # but the separator really should not exist in the path. 133 134 selector_name = selector_components[0] 135 path = self._selector_indicator.join(selector_components[1:]) 136 137 model_name, components = self._get_model_name_and_components(path) 138 if model_name is None: 139 continue 140 141 # Go to the instance element. 142 143 if not documents.has_key(model_name) or documents[model_name] is None: 144 continue 145 146 node = documents[model_name] 147 148 # Traverse the path to find the part of the document to be 149 # selected. 150 151 for component in components: 152 t = component.split(self._pair_separator) 153 if len(t) == 1: 154 155 # Select attribute. 156 157 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 158 break 159 160 elif len(t) == 2: 161 162 # Convert from one-based indexing (the position() function) 163 # to zero-based indexing. 164 165 name, index = t[0], int(t[1]) - 1 166 if index < 0: 167 break 168 169 # NOTE: Controversial creation of potentially non-existent 170 # NOTE: nodes. 171 172 node = self._enter_element(node, name, index) 173 174 if not selectors.has_key(selector_name): 175 selectors[selector_name] = [] 176 selectors[selector_name].append(node) 177 178 def _enter_element(self, node, name, index): 179 180 """ 181 From 'node' enter the element with the given 'name' at the 182 given 'index' position amongst the child elements. Create 183 missing child elements if necessary. 184 """ 185 186 self._ensure_elements(node, index) 187 188 elements = node.xpath("*") 189 if elements[index].localName == "placeholder": 190 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 191 node.replaceChild(new_node, elements[index]) 192 else: 193 new_node = elements[index] 194 if new_node.localName != name: 195 raise FieldsError, (new_node.localName, name, elements, index) 196 197 # Enter the newly-created element. 198 199 return new_node 200 201 def _get_model_name_and_components(self, field): 202 203 """ 204 From 'field', return the model name and components which 205 describe the path within the instance document associated 206 with that model. 207 """ 208 209 # Get the components of the field name. 210 # Example: /name1#n1/name2#n2/name3 211 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 212 213 components = field.split(self._path_separator) 214 if len(components) < 2: 215 return None, None 216 217 # Extract the model name from the top-level element 218 # specification. 219 # Expected: ['name1', 'n1'] 220 221 model_name_and_index = components[1].split(self._pair_separator) 222 if len(model_name_and_index) != 2: 223 return None, None 224 225 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 226 227 return model_name_and_index[0], components[1:] 228 229 def _ensure_elements(self, document, index): 230 231 """ 232 In the given 'document', extend the child elements list 233 so that a node can be stored at the given 'index'. 234 """ 235 236 elements = document.xpath("*") 237 i = len(elements) 238 while i <= index: 239 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 240 document.appendChild(new_node) 241 i += 1 242 243 def make_documents(self, fields): 244 245 """ 246 Make a dictionary mapping model names to new documents prepared 247 from the given 'fields' dictionary. 248 """ 249 250 documents = {} 251 self.complete_documents(documents, fields) 252 253 # Fix the dictionary to return the actual document root. 254 255 for model_name, instance_root in documents.items(): 256 documents[model_name] = instance_root 257 return documents 258 259 def get_selectors(self, fields, documents): 260 261 """ 262 Get a dictionary containing a mapping of selector names to 263 selected parts of the given 'documents'. 264 """ 265 266 selectors = {} 267 self.complete_selectors(selectors, fields, documents) 268 return selectors 269 270 def new_instance(self, name): 271 272 "Return an instance root of the given 'name' in a new document." 273 274 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 275 276 if __name__ == "__main__": 277 278 items = [ 279 ("_action_update", "Some value"), 280 ("_action_delete=/zoo#1/cage#2", "Some value"), 281 ("/actions#1/update#1/selected", "Some value"), # Not actually used in output documents or input. 282 ("/zoo#1/name", "The Zoo ???"), 283 ("/zoo#1/cage#1/name", "reptiles"), 284 ("/zoo#1/cage#1/capacity", "5"), 285 ("/zoo#1/cage#1/animal#1/name", "Monty"), 286 ("/zoo#1/cage#1/animal#1/species#1/name", "Python"), 287 ("/zoo#1/cage#1/animal#1/property#2/name", "texture"), 288 ("/zoo#1/cage#1/animal#1/property#2/value", "scaled"), 289 ("/zoo#1/cage#1/animal#1/property#3/name", "length"), 290 ("/zoo#1/cage#1/animal#1/property#3/value", "5m"), 291 ("/zoo#1/cage#1/animal#2/name", "Vincent"), 292 ("/zoo#1/cage#1/animal#2/species#1/name", "Lizard"), 293 ("/zoo#1/cage#1/animal#2/property#2/name", "colour"), 294 ("/zoo#1/cage#1/animal#2/property#2/value", "variable"), 295 ("/zoo#1/cage#1/animal#2/property#3/name", "length"), 296 ("/zoo#1/cage#1/animal#2/property#3/value", "1m"), 297 ("/zoo#1/cage#2/name", "mammals"), 298 ("/zoo#1/cage#2/capacity", "25"), 299 ("/zoo#1/cage#2/animal#1/name", "Simon"), 300 ("/zoo#1/cage#2/animal#1/species#1/name", "Giraffe"), 301 ("/zoo#1/cage#2/animal#2/name", "Leonard"), 302 ("/zoo#1/cage#2/animal#2/species#1/name", "Lion"), 303 ("/zoo#1/cage#2/animal#2/property#2/name", "danger"), 304 ("/zoo#1/cage#2/animal#2/property#2/value", "high"), 305 ("/zoo#1/funding#3/type", "private"), 306 ("/zoo#1/funding#3/contributor#1/name", "Animal Corporation"), 307 ("/zoo#1/funding#3/contributor#1/amount", "543210.987") 308 ] 309 310 import time 311 import sys, cmdsyntax 312 313 # Find the documents. 314 315 syntax = cmdsyntax.Syntax(""" 316 --plain-output=OUTPUT_FILE 317 --instance-name=NAME 318 """) 319 320 syntax_matches = syntax.get_args(sys.argv[1:]) 321 322 try: 323 args = syntax_matches[0] 324 except IndexError: 325 print syntax.syntax 326 sys.exit(1) 327 328 # Create an object to interpret the test data. 329 330 fields = Fields("iso-8859-1") 331 332 t = time.time() 333 documents = fields.make_documents(items) 334 print "Building time", time.time() - t 335 336 t = time.time() 337 libxml2dom.toStream(documents[args["instance-name"]], stream=open(args["plain-output"], "wb"), encoding="utf-8") 338 print "Prettyprinting time", time.time() - t 339 340 print "Selectors", repr(fields.get_selectors(items, documents)) 341 342 # vim: tabstop=4 expandtab shiftwidth=4