1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005, 2006, 2007, 2008, 2009 Paul Boddie <paul@boddie.org.uk> 9 10 This program is free software; you can redistribute it and/or modify it under 11 the terms of the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your option) any 13 later version. 14 15 This program is distributed in the hope that it will be useful, but WITHOUT 16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 18 details. 19 20 You should have received a copy of the GNU Lesser General Public License along 21 with this program. If not, see <http://www.gnu.org/licenses/>. 22 23 -------- 24 25 Classes which process field collections, producing instance documents. Each 26 field entry consists of a field name mapped to a string value, where the field 27 name may have the following formats: 28 29 /name1$n1/name2 30 /name1$n1/name2$n2/name3 31 /name1$n1/name2$n2/name3$n3/name4 32 ... 33 34 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 35 in the entire element list, whose elements may have different names. For 36 example: 37 38 /zoo$1/name 39 /zoo$1/cage$1/name 40 /zoo$1/cage$2/name 41 /zoo$1/funding$3/contributor$1/name 42 43 Where multiple values can be collected for a given field, the following notation 44 is employed: 45 46 /package$1/categories$1/category$$value 47 48 Some fields may contain the "=" string. This string is reserved and all text 49 following it is meant to specify a path into a particular document. For example: 50 51 _action_add_animal=/zoo$1/cage$2 52 """ 53 54 import Constants 55 import libxml2dom 56 from xml.dom import EMPTY_NAMESPACE 57 try: 58 set 59 except NameError: 60 from sets import Set as set 61 62 FILE_NAMESPACE = "http://www.boddie.org.uk/ns/xmltools/file-upload" 63 64 class FieldsError(Exception): 65 pass 66 67 class FieldProcessor: 68 69 """ 70 A class which converts fields in the documented form to XML 71 instance documents. 72 """ 73 74 def __init__(self, encoding="utf-8", values_are_lists=0): 75 76 """ 77 Initialise the fields processor with the given 'encoding', 78 which is optional and which only applies to field data in 79 Python string form (and not Unicode objects). 80 81 If the optional 'values_are_lists' parameter is set to true 82 then each actual field value will be obtained by taking the 83 first element from each supplied field value. 84 """ 85 86 self.encoding = encoding 87 self.values_are_lists = values_are_lists 88 89 def complete_documents(self, documents, fields): 90 91 """ 92 Complete the given 'documents' using the 'fields' items list. 93 94 Include a special entry in 'documents' for the key None, 95 referencing a list of file upload tuples of the form 96 (filename, content). 97 """ 98 99 files = [] 100 101 for field, value in fields: 102 103 # Ignore selectors. 104 105 if field.find(Constants.selector_indicator) != -1: 106 continue 107 108 model_name, components = self._get_model_name_and_components(field) 109 if model_name is None: 110 continue 111 112 # Get a new instance document if none has been made for the 113 # model. 114 115 if not documents.has_key(model_name): 116 documents[model_name] = self.new_instance(model_name) 117 node = documents[model_name] 118 119 # Traverse the components within the instance. 120 121 for component in components: 122 t = component.split(Constants.pair_separator) 123 if len(t) == 1: 124 125 # Convert from lists if necessary. 126 127 if self.values_are_lists: 128 value = value[0] 129 130 self._set_attribute(node, t[0], value, files) 131 break 132 133 elif len(t) == 2: 134 135 # Convert from one-based indexing (the position() 136 # function) to zero-based indexing. 137 138 name, index = t[0], int(t[1]) - 1 139 if index < 0: 140 break 141 try: 142 node = self._enter_element(node, name, index) 143 except FieldsError, exc: 144 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 145 field, name, index, exc.args[0]) 146 147 elif len(t) == 3 and t[1] == "": 148 149 # Multivalued fields. 150 151 if not self.values_are_lists: 152 values = [value] 153 else: 154 values = value 155 156 name = t[0] 157 for subvalue in values: 158 subnode = self._append_element(node, name) 159 self._set_attribute(subnode, t[2], subvalue, files) 160 161 documents[None] = files 162 163 def _set_attribute(self, node, name, value, files): 164 165 """ 166 Set an attribute on 'node' having the given 'name' and 'value', adding 167 entries to the 'files' list if file upload fields are detected. 168 """ 169 170 # Convert the value to Unicode if necessary. 171 172 if type(value) == type(""): 173 value = unicode(value, encoding=self.encoding) 174 175 # Remove CR characters, ignoring non-textual parameters. 176 177 if isinstance(value, (str, unicode)): 178 node.setAttributeNS(EMPTY_NAMESPACE, name, value.replace("\r", "")) 179 180 # Handle file uploads having certain attributes. 181 182 elif hasattr(value, "content") and hasattr(value, "filename"): 183 node.setAttributeNS(FILE_NAMESPACE, "file:" + name, str(len(files))) 184 node.setAttributeNS(EMPTY_NAMESPACE, name, value.filename) 185 files.append((value.filename, value.content)) 186 187 def complete_selectors(self, selectors, fields, documents, create): 188 189 """ 190 Fill in the given 'selectors' dictionary using the given 191 'fields' so that it contains mappings from selector names to 192 parts of the specified 'documents'. If 'create' is set to a 193 true value, selected elements will be created if not already 194 present; otherwise, ignore such selectors. 195 """ 196 197 for field, value in fields: 198 199 # Process selectors only. 200 201 selector_components = field.split(Constants.selector_indicator) 202 if len(selector_components) < 2: 203 continue 204 205 # Get the selector name and path. 206 # Note that the joining of the components uses the separator, 207 # but the separator really should not exist in the path. 208 209 selector_name = selector_components[0] 210 path = Constants.selector_indicator.join(selector_components[1:]) 211 212 model_name, components = self._get_model_name_and_components(path) 213 if model_name is None: 214 continue 215 216 # Go to the instance element. 217 218 if not documents.has_key(model_name) or documents[model_name] is None: 219 continue 220 221 node = documents[model_name] 222 223 # Traverse the path to find the part of the document to be 224 # selected. 225 226 for component in components: 227 t = component.split(Constants.pair_separator) 228 if len(t) == 1: 229 230 # Select attribute. 231 232 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 233 break 234 235 elif len(t) == 2: 236 237 # Convert from one-based indexing (the position() function) 238 # to zero-based indexing. 239 240 name, index = t[0], int(t[1]) - 1 241 if index < 0: 242 break 243 244 # If create is set, create selected elements. 245 246 if create: 247 try: 248 node = self._enter_element(node, name, index) 249 except FieldsError, exc: 250 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 251 field, name, index, exc.args[0]) 252 253 # Where a node cannot be found, do not create a selector. 254 255 else: 256 node = self._find_element(node, name, index) 257 if node is None: 258 break 259 260 if not selectors.has_key(selector_name): 261 selectors[selector_name] = [] 262 if node is not None: 263 selectors[selector_name].append(node) 264 265 def _append_element(self, node, name): 266 267 """ 268 Within 'node' append an element with the given 'name'. 269 """ 270 271 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 272 node.appendChild(new_node) 273 return new_node 274 275 def _enter_element(self, node, name, index): 276 277 """ 278 From 'node' enter the element with the given 'name' at the 279 given 'index' position amongst the child elements. Create 280 missing child elements if necessary. 281 """ 282 283 self._ensure_elements(node, index) 284 285 elements = node.xpath("*") 286 if elements[index].localName == "placeholder": 287 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 288 node.replaceChild(new_node, elements[index]) 289 else: 290 new_node = elements[index] 291 if new_node.localName != name: 292 raise FieldsError, (new_node.localName, name, elements, index) 293 294 # Enter the newly-created element. 295 296 return new_node 297 298 def _find_element(self, node, name, index): 299 300 """ 301 From 'node' find the element with the given 'name' at the 302 given 'index' position amongst the child elements. Return 303 None if no such element exists. 304 """ 305 306 elements = node.xpath("*") 307 try: 308 new_node = elements[index] 309 if new_node.localName != name: 310 return None 311 except IndexError: 312 return None 313 return new_node 314 315 def _get_model_name_and_components(self, field): 316 317 """ 318 From 'field', return the model name and components which 319 describe the path within the instance document associated 320 with that model. 321 """ 322 323 # Get the components of the field name. 324 # Example: /name1#n1/name2#n2/name3 325 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 326 327 components = field.split(Constants.path_separator) 328 if len(components) < 2: 329 return None, None 330 331 # Extract the model name from the top-level element 332 # specification. 333 # Expected: ['name1', 'n1'] 334 335 model_name_and_index = components[1].split(Constants.pair_separator) 336 if len(model_name_and_index) != 2: 337 return None, None 338 339 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 340 341 return model_name_and_index[0], components[1:] 342 343 def _ensure_elements(self, document, index): 344 345 """ 346 In the given 'document', extend the child elements list 347 so that a node can be stored at the given 'index'. 348 """ 349 350 elements = document.xpath("*") 351 i = len(elements) 352 while i <= index: 353 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 354 document.appendChild(new_node) 355 i += 1 356 357 def make_documents(self, fields): 358 359 """ 360 Make a dictionary mapping model names to new documents prepared 361 from the given 'fields' dictionary. 362 """ 363 364 documents = {} 365 self.complete_documents(documents, fields) 366 367 # Fix the dictionary to return the actual document root. 368 369 for model_name, instance_root in documents.items(): 370 documents[model_name] = instance_root 371 return documents 372 373 def get_selectors(self, fields, documents, create=0): 374 375 """ 376 Get a dictionary containing a mapping of selector names to 377 selected parts of the given 'documents'. If 'create' is set 378 to a true value, selected elements will be created if not 379 already present. 380 """ 381 382 selectors = {} 383 self.complete_selectors(selectors, fields, documents, create) 384 return selectors 385 386 def new_instance(self, name): 387 388 "Return an instance root of the given 'name' in a new document." 389 390 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 391 392 # An alias for the older method name. 393 394 new_document = new_instance 395 396 # NOTE: Legacy name exposure. 397 398 Fields = FieldProcessor 399 400 class Form(FieldProcessor): 401 402 "A collection of documents processed from form fields." 403 404 def __init__(self, *args, **kw): 405 406 """ 407 Initialise the form data container with the general 'args' and 'kw' 408 parameters. 409 """ 410 411 FieldProcessor.__init__(self, *args, **kw) 412 self.parameters = {} 413 self.documents = {} 414 self.new_documents = set() 415 416 # Activity-related attributes. 417 418 self.current_activity = None 419 420 def set_parameters(self, parameters): 421 422 "Set the request 'parameters' (or fields) in the container." 423 424 self.parameters = parameters 425 self.documents = self.make_documents(self.parameters.items()) 426 427 def get_parameters(self): 428 429 """ 430 Get the request parameters (or fields) from the container. Note that 431 these parameters comprise the raw form field values submitted in a 432 request rather than the structured form data. 433 434 Return a dictionary mapping parameter names to values. 435 """ 436 437 return self.parameters 438 439 def get_documents(self): 440 441 """ 442 Get the form data documents from the container, returning a dictionary 443 mapping document names to DOM-style document objects. 444 """ 445 446 return self.documents 447 448 # NOTE: Was get_document. 449 450 def _get_document(self, name): 451 452 """ 453 Get the form data document with the given 'name' from the container, 454 returning a DOM-style document object if such a document exists, or None 455 if no such document can be found. 456 """ 457 458 return self.documents.get(name) 459 460 def get_selectors(self, create=0): 461 462 """ 463 Get the form data selectors from the container, returning a dictionary 464 mapping selector names to collections of selected elements. If 'create' 465 is set to a true value (unlike the default), the selected elements will 466 be created in the form data document if not already present. 467 """ 468 469 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents, create) 470 471 def get_selector(self, name, create=0): 472 473 """ 474 Get the form data selectors for the given 'name', returning a collection 475 of selected elements. If 'create' is set to a true value (unlike the 476 default), the selected elements will be created in the form data 477 document if not already present. 478 """ 479 480 parameters = [] 481 for parameter_name, value in parameters.items(): 482 if parameter_name.startswith(name + Constants.selector_indicator): 483 parameters.append((parameter_name, value)) 484 return FieldProcessor.get_selectors(self, parameters, self.documents, create) 485 486 def get_files(self): 487 488 """ 489 Get the uploaded file details as a list of tuples of the form 490 (filename, content). 491 """ 492 493 return self.documents.get(None, []) 494 495 def new_instance(self, name): 496 497 """ 498 Make a new document with the given 'name', storing it in the container 499 and returning the document. 500 """ 501 502 doc = FieldProcessor.new_instance(self, name) 503 self.documents[name] = doc 504 return doc 505 506 # An alias for the older method name. 507 508 new_document = new_instance 509 510 # NOTE: Was set_document. 511 512 def _set_document(self, name, doc): 513 514 """ 515 Store in the container under the given 'name' the supplied document 516 'doc'. 517 """ 518 519 self.documents[name] = doc 520 521 # Support for activities and the main/default document. 522 523 def set_activity(self, name): 524 self.current_activity = name 525 526 def get_activity(self): 527 return self.current_activity 528 529 # NOTE: Signatures are flexible to support the older methods above. 530 531 def set_document(self, name_or_doc, doc=None): 532 if doc is not None: 533 self._set_document(name_or_doc, doc) 534 else: 535 self._set_document(self.current_activity, name_or_doc) 536 537 def get_document(self, name=None): 538 if name is None: 539 return self._get_document(self.current_activity) 540 else: 541 return self._get_document(name) 542 543 def is_new_document(self, name=None): 544 if name is None: 545 return self.current_activity in self.new_documents 546 else: 547 return name in self.new_documents 548 549 if __name__ == "__main__": 550 551 items = [ 552 ("_action_update", "Some value"), 553 ("_action_delete=/zoo$1/cage$2", "Some value"), 554 ("_action_nasty=/zoo$1/cage$3", "Some value"), 555 ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input. 556 ("/zoo$1/name", "The Zoo ???"), 557 ("/zoo$1/cage$1/name", "reptiles"), 558 ("/zoo$1/cage$1/capacity", "5"), 559 ("/zoo$1/cage$1/animal$1/name", "Monty"), 560 ("/zoo$1/cage$1/animal$1/species$1/name", "Python"), 561 ("/zoo$1/cage$1/animal$1/property$2/name", "texture"), 562 ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"), 563 ("/zoo$1/cage$1/animal$1/property$3/name", "length"), 564 ("/zoo$1/cage$1/animal$1/property$3/value", "5m"), 565 ("/zoo$1/cage$1/animal$2/name", "Vincent"), 566 ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"), 567 ("/zoo$1/cage$1/animal$2/property$2/name", "colour"), 568 ("/zoo$1/cage$1/animal$2/property$2/value", "variable"), 569 ("/zoo$1/cage$1/animal$2/property$3/name", "length"), 570 ("/zoo$1/cage$1/animal$2/property$3/value", "1m"), 571 ("/zoo$1/cage$2/name", "mammals"), 572 ("/zoo$1/cage$2/capacity", "25"), 573 ("/zoo$1/cage$2/animal$1/name", "Simon"), 574 ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"), 575 ("/zoo$1/cage$2/animal$2/name", "Leonard"), 576 ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"), 577 ("/zoo$1/cage$2/animal$2/property$2/name", "danger"), 578 ("/zoo$1/cage$2/animal$2/property$2/value", "high"), 579 ("/zoo$1/funding$3/type", "private"), 580 ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"), 581 ("/zoo$1/funding$3/contributor$1/amount", "543210.987"), 582 ("/zoo$1/funding$3/contributor$1/industry$$type", "animals") 583 ] 584 585 import time 586 import sys, cmdsyntax 587 588 # Find the documents. 589 590 syntax = cmdsyntax.Syntax(""" 591 --plain-output=OUTPUT_FILE 592 --instance-name=NAME 593 """) 594 595 syntax_matches = syntax.get_args(sys.argv[1:]) 596 597 try: 598 args = syntax_matches[0] 599 except IndexError: 600 print syntax.syntax 601 sys.exit(1) 602 603 # Create an object to interpret the test data. 604 605 fields = FieldProcessor("iso-8859-1") 606 607 t = time.time() 608 documents = fields.make_documents(items) 609 print "Building time", time.time() - t 610 611 t = time.time() 612 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 613 print "Prettyprinting time", time.time() - t 614 615 print "Selectors", repr(fields.get_selectors(items, documents)) 616 617 # vim: tabstop=4 expandtab shiftwidth=4