1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005 Paul Boddie <paul@boddie.org.uk> 9 10 This library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Lesser General Public 12 License as published by the Free Software Foundation; either 13 version 2.1 of the License, or (at your option) any later version. 14 15 This library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Lesser General Public License for more details. 19 20 You should have received a copy of the GNU Lesser General Public 21 License along with this library; if not, write to the Free Software 22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 23 24 -------- 25 26 Classes which process field collections, producing instance documents. Each 27 field entry consists of a field name mapped to a string value, where the field 28 name may have the following formats: 29 30 /name1$n1/name2 31 /name1$n1/name2$n2/name3 32 /name1$n1/name2$n2/name3$n3/name4 33 ... 34 35 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 36 in the entire element list, whose elements may have different names. For 37 example: 38 39 /zoo$1/name 40 /zoo$1/cage$1/name 41 /zoo$1/cage$2/name 42 /zoo$1/funding$3/contributor$1/name 43 44 Where multiple values can be collected for a given field, the following notation 45 is employed: 46 47 /package$1/categories$1/category$$value 48 49 Some fields may contain the "=" string. This string is reserved and all text 50 following it is meant to specify a path into a particular document. For example: 51 52 _action_add_animal=/zoo$1/cage$2 53 """ 54 55 import Constants 56 import libxml2dom 57 from xml.dom import EMPTY_NAMESPACE 58 59 class FieldsError(Exception): 60 pass 61 62 class FieldProcessor: 63 64 """ 65 A class which converts fields in the documented form to XML 66 instance documents. 67 """ 68 69 def __init__(self, encoding="utf-8", values_are_lists=0): 70 71 """ 72 Initialise the fields processor with the given 'encoding', 73 which is optional and which only applies to field data in 74 Python string form (and not Unicode objects). 75 76 If the optional 'values_are_lists' parameter is set to true 77 then each actual field value will be obtained by taking the 78 first element from each supplied field value. 79 """ 80 81 self.encoding = encoding 82 self.values_are_lists = values_are_lists 83 84 def complete_documents(self, documents, fields): 85 86 """ 87 Complete the given 'documents' using the 'fields' items list. 88 """ 89 90 for field, value in fields: 91 92 # Ignore selectors. 93 94 if field.find(Constants.selector_indicator) != -1: 95 continue 96 97 model_name, components = self._get_model_name_and_components(field) 98 if model_name is None: 99 continue 100 101 # Get a new instance document if none has been made for the 102 # model. 103 104 if not documents.has_key(model_name): 105 documents[model_name] = self.new_instance(model_name) 106 node = documents[model_name] 107 108 # Traverse the components within the instance. 109 110 for component in components: 111 t = component.split(Constants.pair_separator) 112 if len(t) == 1: 113 114 # Convert from lists if necessary. 115 116 if self.values_are_lists: 117 value = value[0] 118 119 # Convert the value to Unicode if necessary. 120 121 if type(value) == type(""): 122 value = unicode(value, encoding=self.encoding) 123 124 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 125 break 126 127 elif len(t) == 2: 128 129 # Convert from one-based indexing (the position() 130 # function) to zero-based indexing. 131 132 name, index = t[0], int(t[1]) - 1 133 if index < 0: 134 break 135 try: 136 node = self._enter_element(node, name, index) 137 except FieldsError, exc: 138 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 139 field, name, index, exc.args[0]) 140 141 elif len(t) == 3 and t[1] == "": 142 143 # Multivalued fields. 144 145 if not self.values_are_lists: 146 values = [value] 147 else: 148 values = value 149 150 name = t[0] 151 for subvalue in values: 152 subnode = self._append_element(node, name) 153 154 # Convert the value to Unicode if necessary. 155 156 if type(subvalue) == type(""): 157 subvalue = unicode(subvalue, encoding=self.encoding) 158 159 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue) 160 161 def complete_selectors(self, selectors, fields, documents, create): 162 163 """ 164 Fill in the given 'selectors' dictionary using the given 165 'fields' so that it contains mappings from selector names to 166 parts of the specified 'documents'. If 'create' is set to a 167 true value, selected elements will be created if not already 168 present; otherwise, ignore such selectors. 169 """ 170 171 for field, value in fields: 172 173 # Process selectors only. 174 175 selector_components = field.split(Constants.selector_indicator) 176 if len(selector_components) < 2: 177 continue 178 179 # Get the selector name and path. 180 # Note that the joining of the components uses the separator, 181 # but the separator really should not exist in the path. 182 183 selector_name = selector_components[0] 184 path = Constants.selector_indicator.join(selector_components[1:]) 185 186 model_name, components = self._get_model_name_and_components(path) 187 if model_name is None: 188 continue 189 190 # Go to the instance element. 191 192 if not documents.has_key(model_name) or documents[model_name] is None: 193 continue 194 195 node = documents[model_name] 196 197 # Traverse the path to find the part of the document to be 198 # selected. 199 200 for component in components: 201 t = component.split(Constants.pair_separator) 202 if len(t) == 1: 203 204 # Select attribute. 205 206 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 207 break 208 209 elif len(t) == 2: 210 211 # Convert from one-based indexing (the position() function) 212 # to zero-based indexing. 213 214 name, index = t[0], int(t[1]) - 1 215 if index < 0: 216 break 217 218 # If create is set, create selected elements. 219 220 if create: 221 try: 222 node = self._enter_element(node, name, index) 223 except FieldsError, exc: 224 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 225 field, name, index, exc.args[0]) 226 227 # Where a node cannot be found, do not create a selector. 228 229 else: 230 node = self._find_element(node, name, index) 231 if node is None: 232 break 233 234 if not selectors.has_key(selector_name): 235 selectors[selector_name] = [] 236 if node is not None: 237 selectors[selector_name].append(node) 238 239 def _append_element(self, node, name): 240 241 """ 242 Within 'node' append an element with the given 'name'. 243 """ 244 245 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 246 node.appendChild(new_node) 247 return new_node 248 249 def _enter_element(self, node, name, index): 250 251 """ 252 From 'node' enter the element with the given 'name' at the 253 given 'index' position amongst the child elements. Create 254 missing child elements if necessary. 255 """ 256 257 self._ensure_elements(node, index) 258 259 elements = node.xpath("*") 260 if elements[index].localName == "placeholder": 261 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 262 node.replaceChild(new_node, elements[index]) 263 else: 264 new_node = elements[index] 265 if new_node.localName != name: 266 raise FieldsError, (new_node.localName, name, elements, index) 267 268 # Enter the newly-created element. 269 270 return new_node 271 272 def _find_element(self, node, name, index): 273 274 """ 275 From 'node' find the element with the given 'name' at the 276 given 'index' position amongst the child elements. Return 277 None if no such element exists. 278 """ 279 280 elements = node.xpath("*") 281 try: 282 new_node = elements[index] 283 if new_node.localName != name: 284 return None 285 except IndexError: 286 return None 287 return new_node 288 289 def _get_model_name_and_components(self, field): 290 291 """ 292 From 'field', return the model name and components which 293 describe the path within the instance document associated 294 with that model. 295 """ 296 297 # Get the components of the field name. 298 # Example: /name1#n1/name2#n2/name3 299 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 300 301 components = field.split(Constants.path_separator) 302 if len(components) < 2: 303 return None, None 304 305 # Extract the model name from the top-level element 306 # specification. 307 # Expected: ['name1', 'n1'] 308 309 model_name_and_index = components[1].split(Constants.pair_separator) 310 if len(model_name_and_index) != 2: 311 return None, None 312 313 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 314 315 return model_name_and_index[0], components[1:] 316 317 def _ensure_elements(self, document, index): 318 319 """ 320 In the given 'document', extend the child elements list 321 so that a node can be stored at the given 'index'. 322 """ 323 324 elements = document.xpath("*") 325 i = len(elements) 326 while i <= index: 327 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 328 document.appendChild(new_node) 329 i += 1 330 331 def make_documents(self, fields): 332 333 """ 334 Make a dictionary mapping model names to new documents prepared 335 from the given 'fields' dictionary. 336 """ 337 338 documents = {} 339 self.complete_documents(documents, fields) 340 341 # Fix the dictionary to return the actual document root. 342 343 for model_name, instance_root in documents.items(): 344 documents[model_name] = instance_root 345 return documents 346 347 def get_selectors(self, fields, documents, create=0): 348 349 """ 350 Get a dictionary containing a mapping of selector names to 351 selected parts of the given 'documents'. If 'create' is set 352 to a true value, selected elements will be created if not 353 already present. 354 """ 355 356 selectors = {} 357 self.complete_selectors(selectors, fields, documents, create) 358 return selectors 359 360 def new_instance(self, name): 361 362 "Return an instance root of the given 'name' in a new document." 363 364 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 365 366 # An alias for the older method name. 367 368 new_document = new_instance 369 370 # NOTE: Legacy name exposure. 371 372 Fields = FieldProcessor 373 374 class Form(FieldProcessor): 375 376 "A collection of documents processed from form fields." 377 378 def __init__(self, *args, **kw): 379 380 """ 381 Initialise the form data container with the general 'args' and 'kw' 382 parameters. 383 """ 384 385 FieldProcessor.__init__(self, *args, **kw) 386 self.parameters = {} 387 self.documents = {} 388 389 def set_parameters(self, parameters): 390 391 "Set the request 'parameters' (or fields) in the container." 392 393 self.parameters = parameters 394 self.documents = self.make_documents(self.parameters.items()) 395 396 def get_parameters(self): 397 398 """ 399 Get the request parameters (or fields) from the container. Note that 400 these parameters comprise the raw form field values submitted in a 401 request rather than the structured form data. 402 403 Return a dictionary mapping parameter names to values. 404 """ 405 406 return self.parameters 407 408 def get_documents(self): 409 410 """ 411 Get the form data documents from the container, returning a dictionary 412 mapping document names to DOM-style document objects. 413 """ 414 415 return self.documents 416 417 def get_document(self, name): 418 419 """ 420 Get the form data document with the given 'name' from the container, 421 returning a DOM-style document object if such a document exists, or None 422 if no such document can be found. 423 """ 424 425 return self.documents.get(name) 426 427 def get_selectors(self, create=0): 428 429 """ 430 Get the form data selectors from the container, returning a dictionary 431 mapping selector names to collections of selected elements. If 'create' 432 is set to a true value (unlike the default), the selected elements will 433 be created in the form data document if not already present. 434 """ 435 436 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents, create) 437 438 def get_selector(self, name, create=0): 439 440 """ 441 Get the form data selectors for the given 'name', returning a collection 442 of selected elements. If 'create' is set to a true value (unlike the 443 default), the selected elements will be created in the form data 444 document if not already present. 445 """ 446 447 parameters = [] 448 for parameter_name, value in parameters.items(): 449 if parameter_name.startswith(name + Constants.selector_indicator): 450 parameters.append((parameter_name, value)) 451 return FieldProcessor.get_selectors(self, parameters, self.documents, create) 452 453 def new_instance(self, name): 454 455 """ 456 Make a new document with the given 'name', storing it in the container 457 and returning the document. 458 """ 459 460 doc = FieldProcessor.new_instance(self, name) 461 self.documents[name] = doc 462 return doc 463 464 # An alias for the older method name. 465 466 new_document = new_instance 467 468 def set_document(self, name, doc): 469 470 """ 471 Store in the container under the given 'name' the supplied document 472 'doc'. 473 """ 474 475 self.documents[name] = doc 476 477 if __name__ == "__main__": 478 479 items = [ 480 ("_action_update", "Some value"), 481 ("_action_delete=/zoo$1/cage$2", "Some value"), 482 ("_action_nasty=/zoo$1/cage$3", "Some value"), 483 ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input. 484 ("/zoo$1/name", "The Zoo ???"), 485 ("/zoo$1/cage$1/name", "reptiles"), 486 ("/zoo$1/cage$1/capacity", "5"), 487 ("/zoo$1/cage$1/animal$1/name", "Monty"), 488 ("/zoo$1/cage$1/animal$1/species$1/name", "Python"), 489 ("/zoo$1/cage$1/animal$1/property$2/name", "texture"), 490 ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"), 491 ("/zoo$1/cage$1/animal$1/property$3/name", "length"), 492 ("/zoo$1/cage$1/animal$1/property$3/value", "5m"), 493 ("/zoo$1/cage$1/animal$2/name", "Vincent"), 494 ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"), 495 ("/zoo$1/cage$1/animal$2/property$2/name", "colour"), 496 ("/zoo$1/cage$1/animal$2/property$2/value", "variable"), 497 ("/zoo$1/cage$1/animal$2/property$3/name", "length"), 498 ("/zoo$1/cage$1/animal$2/property$3/value", "1m"), 499 ("/zoo$1/cage$2/name", "mammals"), 500 ("/zoo$1/cage$2/capacity", "25"), 501 ("/zoo$1/cage$2/animal$1/name", "Simon"), 502 ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"), 503 ("/zoo$1/cage$2/animal$2/name", "Leonard"), 504 ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"), 505 ("/zoo$1/cage$2/animal$2/property$2/name", "danger"), 506 ("/zoo$1/cage$2/animal$2/property$2/value", "high"), 507 ("/zoo$1/funding$3/type", "private"), 508 ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"), 509 ("/zoo$1/funding$3/contributor$1/amount", "543210.987"), 510 ("/zoo$1/funding$3/contributor$1/industry$$type", "animals") 511 ] 512 513 import time 514 import sys, cmdsyntax 515 516 # Find the documents. 517 518 syntax = cmdsyntax.Syntax(""" 519 --plain-output=OUTPUT_FILE 520 --instance-name=NAME 521 """) 522 523 syntax_matches = syntax.get_args(sys.argv[1:]) 524 525 try: 526 args = syntax_matches[0] 527 except IndexError: 528 print syntax.syntax 529 sys.exit(1) 530 531 # Create an object to interpret the test data. 532 533 fields = FieldProcessor("iso-8859-1") 534 535 t = time.time() 536 documents = fields.make_documents(items) 537 print "Building time", time.time() - t 538 539 t = time.time() 540 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 541 print "Prettyprinting time", time.time() - t 542 543 print "Selectors", repr(fields.get_selectors(items, documents)) 544 545 # vim: tabstop=4 expandtab shiftwidth=4