1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005 Paul Boddie <paul@boddie.org.uk> 9 10 This library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Lesser General Public 12 License as published by the Free Software Foundation; either 13 version 2.1 of the License, or (at your option) any later version. 14 15 This library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Lesser General Public License for more details. 19 20 You should have received a copy of the GNU Lesser General Public 21 License along with this library; if not, write to the Free Software 22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 24 -------- 25 26 Classes which process field collections, producing instance documents. Each 27 field entry consists of a field name mapped to a string value, where the field 28 name may have the following formats: 29 30 /name1$n1/name2 31 /name1$n1/name2$n2/name3 32 /name1$n1/name2$n2/name3$n3/name4 33 ... 34 35 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 36 in the entire element list, whose elements may have different names. For 37 example: 38 39 /zoo$1/name 40 /zoo$1/cage$1/name 41 /zoo$1/cage$2/name 42 /zoo$1/funding$3/contributor$1/name 43 44 Where multiple values can be collected for a given field, the following notation 45 is employed: 46 47 /package$1/categories$1/category$$value 48 49 Some fields may contain the "=" string. This string is reserved and all text 50 following it is meant to specify a path into a particular document. For example: 51 52 _action_add_animal=/zoo$1/cage$2 53 """ 54 55 import Constants 56 import libxml2dom 57 from xml.dom import EMPTY_NAMESPACE 58 59 class FieldsError(Exception): 60 pass 61 62 class FieldProcessor: 63 64 """ 65 A class which converts fields in the documented form to XML 66 instance documents. 67 """ 68 69 def __init__(self, encoding="utf-8", values_are_lists=0): 70 71 """ 72 Initialise the fields processor with the given 'encoding', 73 which is optional and which only applies to field data in 74 Python string form (and not Unicode objects). 75 76 If the optional 'values_are_lists' parameter is set to true 77 then each actual field value will be obtained by taking the 78 first element from each supplied field value. 79 """ 80 81 self.encoding = encoding 82 self.values_are_lists = values_are_lists 83 84 def complete_documents(self, documents, fields): 85 86 """ 87 Complete the given 'documents' using the 'fields' items list. 88 """ 89 90 for field, value in fields: 91 92 # Ignore selectors. 93 94 if field.find(Constants.selector_indicator) != -1: 95 continue 96 97 model_name, components = self._get_model_name_and_components(field) 98 if model_name is None: 99 continue 100 101 # Get a new instance document if none has been made for the 102 # model. 103 104 if not documents.has_key(model_name): 105 documents[model_name] = self.new_instance(model_name) 106 node = documents[model_name] 107 108 # Traverse the components within the instance. 109 110 for component in components: 111 t = component.split(Constants.pair_separator) 112 if len(t) == 1: 113 114 # Convert from lists if necessary. 115 116 if self.values_are_lists: 117 value = value[0] 118 119 # Convert the value to Unicode if necessary. 120 121 if type(value) == type(""): 122 value = unicode(value, encoding=self.encoding) 123 124 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 125 break 126 127 elif len(t) == 2: 128 129 # Convert from one-based indexing (the position() 130 # function) to zero-based indexing. 131 132 name, index = t[0], int(t[1]) - 1 133 if index < 0: 134 break 135 try: 136 node = self._enter_element(node, name, index) 137 except FieldsError, exc: 138 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 139 field, name, index, exc.args[0]) 140 141 elif len(t) == 3 and t[1] == "": 142 143 # Multivalued fields. 144 145 if not self.values_are_lists: 146 values = [value] 147 else: 148 values = value 149 150 name = t[0] 151 for subvalue in values: 152 subnode = self._append_element(node, name) 153 154 # Convert the value to Unicode if necessary. 155 156 if type(subvalue) == type(""): 157 subvalue = unicode(subvalue, encoding=self.encoding) 158 159 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue) 160 161 def complete_selectors(self, selectors, fields, documents): 162 163 """ 164 Fill in the given 'selectors' dictionary using the given 165 'fields' so that it contains mappings from selector names to 166 parts of the specified 'documents'. 167 """ 168 169 for field, value in fields: 170 171 # Process selectors only. 172 173 selector_components = field.split(Constants.selector_indicator) 174 if len(selector_components) < 2: 175 continue 176 177 # Get the selector name and path. 178 # Note that the joining of the components uses the separator, 179 # but the separator really should not exist in the path. 180 181 selector_name = selector_components[0] 182 path = Constants.selector_indicator.join(selector_components[1:]) 183 184 model_name, components = self._get_model_name_and_components(path) 185 if model_name is None: 186 continue 187 188 # Go to the instance element. 189 190 if not documents.has_key(model_name) or documents[model_name] is None: 191 continue 192 193 node = documents[model_name] 194 195 # Traverse the path to find the part of the document to be 196 # selected. 197 198 for component in components: 199 t = component.split(Constants.pair_separator) 200 if len(t) == 1: 201 202 # Select attribute. 203 204 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 205 break 206 207 elif len(t) == 2: 208 209 # Convert from one-based indexing (the position() function) 210 # to zero-based indexing. 211 212 name, index = t[0], int(t[1]) - 1 213 if index < 0: 214 break 215 216 # NOTE: Controversial creation of potentially non-existent 217 # NOTE: nodes. 218 219 try: 220 node = self._enter_element(node, name, index) 221 except FieldsError, exc: 222 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 223 field, name, index, exc.args[0]) 224 225 if not selectors.has_key(selector_name): 226 selectors[selector_name] = [] 227 selectors[selector_name].append(node) 228 229 def _append_element(self, node, name): 230 231 """ 232 Within 'node' append an element with the given 'name'. 233 """ 234 235 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 236 node.appendChild(new_node) 237 return new_node 238 239 def _enter_element(self, node, name, index): 240 241 """ 242 From 'node' enter the element with the given 'name' at the 243 given 'index' position amongst the child elements. Create 244 missing child elements if necessary. 245 """ 246 247 self._ensure_elements(node, index) 248 249 elements = node.xpath("*") 250 if elements[index].localName == "placeholder": 251 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 252 node.replaceChild(new_node, elements[index]) 253 else: 254 new_node = elements[index] 255 if new_node.localName != name: 256 raise FieldsError, (new_node.localName, name, elements, index) 257 258 # Enter the newly-created element. 259 260 return new_node 261 262 def _get_model_name_and_components(self, field): 263 264 """ 265 From 'field', return the model name and components which 266 describe the path within the instance document associated 267 with that model. 268 """ 269 270 # Get the components of the field name. 271 # Example: /name1#n1/name2#n2/name3 272 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 273 274 components = field.split(Constants.path_separator) 275 if len(components) < 2: 276 return None, None 277 278 # Extract the model name from the top-level element 279 # specification. 280 # Expected: ['name1', 'n1'] 281 282 model_name_and_index = components[1].split(Constants.pair_separator) 283 if len(model_name_and_index) != 2: 284 return None, None 285 286 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 287 288 return model_name_and_index[0], components[1:] 289 290 def _ensure_elements(self, document, index): 291 292 """ 293 In the given 'document', extend the child elements list 294 so that a node can be stored at the given 'index'. 295 """ 296 297 elements = document.xpath("*") 298 i = len(elements) 299 while i <= index: 300 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 301 document.appendChild(new_node) 302 i += 1 303 304 def make_documents(self, fields): 305 306 """ 307 Make a dictionary mapping model names to new documents prepared 308 from the given 'fields' dictionary. 309 """ 310 311 documents = {} 312 self.complete_documents(documents, fields) 313 314 # Fix the dictionary to return the actual document root. 315 316 for model_name, instance_root in documents.items(): 317 documents[model_name] = instance_root 318 return documents 319 320 def get_selectors(self, fields, documents): 321 322 """ 323 Get a dictionary containing a mapping of selector names to 324 selected parts of the given 'documents'. 325 """ 326 327 selectors = {} 328 self.complete_selectors(selectors, fields, documents) 329 return selectors 330 331 def new_instance(self, name): 332 333 "Return an instance root of the given 'name' in a new document." 334 335 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 336 337 # NOTE: Legacy name exposure. 338 339 Fields = FieldProcessor 340 341 class Form(FieldProcessor): 342 343 "A collection of documents processed from form fields." 344 345 def __init__(self, *args, **kw): 346 FieldProcessor.__init__(self, *args, **kw) 347 self.parameters = {} 348 self.documents = {} 349 350 def set_parameters(self, parameters): 351 self.parameters = parameters 352 self.documents = self.make_documents(self.parameters.items()) 353 354 def get_parameters(self): 355 return self.parameters 356 357 def get_documents(self): 358 return self.documents 359 360 def get_selectors(self): 361 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents) 362 363 def new_instance(self, name): 364 doc = FieldProcessor.new_instance(self, name) 365 self.documents[name] = doc 366 return doc 367 368 if __name__ == "__main__": 369 370 items = [ 371 ("_action_update", "Some value"), 372 ("_action_delete=/zoo$1/cage$2", "Some value"), 373 ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input. 374 ("/zoo$1/name", "The Zoo ???"), 375 ("/zoo$1/cage$1/name", "reptiles"), 376 ("/zoo$1/cage$1/capacity", "5"), 377 ("/zoo$1/cage$1/animal$1/name", "Monty"), 378 ("/zoo$1/cage$1/animal$1/species$1/name", "Python"), 379 ("/zoo$1/cage$1/animal$1/property$2/name", "texture"), 380 ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"), 381 ("/zoo$1/cage$1/animal$1/property$3/name", "length"), 382 ("/zoo$1/cage$1/animal$1/property$3/value", "5m"), 383 ("/zoo$1/cage$1/animal$2/name", "Vincent"), 384 ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"), 385 ("/zoo$1/cage$1/animal$2/property$2/name", "colour"), 386 ("/zoo$1/cage$1/animal$2/property$2/value", "variable"), 387 ("/zoo$1/cage$1/animal$2/property$3/name", "length"), 388 ("/zoo$1/cage$1/animal$2/property$3/value", "1m"), 389 ("/zoo$1/cage$2/name", "mammals"), 390 ("/zoo$1/cage$2/capacity", "25"), 391 ("/zoo$1/cage$2/animal$1/name", "Simon"), 392 ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"), 393 ("/zoo$1/cage$2/animal$2/name", "Leonard"), 394 ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"), 395 ("/zoo$1/cage$2/animal$2/property$2/name", "danger"), 396 ("/zoo$1/cage$2/animal$2/property$2/value", "high"), 397 ("/zoo$1/funding$3/type", "private"), 398 ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"), 399 ("/zoo$1/funding$3/contributor$1/amount", "543210.987"), 400 ("/zoo$1/funding$3/contributor$1/industry$$type", "animals") 401 ] 402 403 import time 404 import sys, cmdsyntax 405 406 # Find the documents. 407 408 syntax = cmdsyntax.Syntax(""" 409 --plain-output=OUTPUT_FILE 410 --instance-name=NAME 411 """) 412 413 syntax_matches = syntax.get_args(sys.argv[1:]) 414 415 try: 416 args = syntax_matches[0] 417 except IndexError: 418 print syntax.syntax 419 sys.exit(1) 420 421 # Create an object to interpret the test data. 422 423 fields = FieldProcessor("iso-8859-1") 424 425 t = time.time() 426 documents = fields.make_documents(items) 427 print "Building time", time.time() - t 428 429 t = time.time() 430 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 431 print "Prettyprinting time", time.time() - t 432 433 print "Selectors", repr(fields.get_selectors(items, documents)) 434 435 # vim: tabstop=4 expandtab shiftwidth=4