1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk> 9 10 This program is free software; you can redistribute it and/or modify it under 11 the terms of the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your option) any 13 later version. 14 15 This program is distributed in the hope that it will be useful, but WITHOUT 16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 18 details. 19 20 You should have received a copy of the GNU Lesser General Public License along 21 with this program. If not, see <http://www.gnu.org/licenses/>. 22 23 -------- 24 25 Classes which process field collections, producing instance documents. Each 26 field entry consists of a field name mapped to a string value, where the field 27 name may have the following formats: 28 29 /name1$n1/name2 30 /name1$n1/name2$n2/name3 31 /name1$n1/name2$n2/name3$n3/name4 32 ... 33 34 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 35 in the entire element list, whose elements may have different names. For 36 example: 37 38 /zoo$1/name 39 /zoo$1/cage$1/name 40 /zoo$1/cage$2/name 41 /zoo$1/funding$3/contributor$1/name 42 43 Where multiple values can be collected for a given field, the following notation 44 is employed: 45 46 /package$1/categories$1/category$$value 47 48 Some fields may contain the "=" string. This string is reserved and all text 49 following it is meant to specify a path into a particular document. For example: 50 51 _action_add_animal=/zoo$1/cage$2 52 """ 53 54 import Constants 55 import libxml2dom 56 from xml.dom import EMPTY_NAMESPACE 57 try: 58 set 59 except NameError: 60 from sets import Set as set 61 62 class FieldsError(Exception): 63 pass 64 65 class FieldProcessor: 66 67 """ 68 A class which converts fields in the documented form to XML 69 instance documents. 70 """ 71 72 def __init__(self, encoding="utf-8", values_are_lists=0): 73 74 """ 75 Initialise the fields processor with the given 'encoding', 76 which is optional and which only applies to field data in 77 Python string form (and not Unicode objects). 78 79 If the optional 'values_are_lists' parameter is set to true 80 then each actual field value will be obtained by taking the 81 first element from each supplied field value. 82 """ 83 84 self.encoding = encoding 85 self.values_are_lists = values_are_lists 86 87 def complete_documents(self, documents, fields): 88 89 """ 90 Complete the given 'documents' using the 'fields' items list. 91 """ 92 93 for field, value in fields: 94 95 # Ignore selectors. 96 97 if field.find(Constants.selector_indicator) != -1: 98 continue 99 100 model_name, components = self._get_model_name_and_components(field) 101 if model_name is None: 102 continue 103 104 # Get a new instance document if none has been made for the 105 # model. 106 107 if not documents.has_key(model_name): 108 documents[model_name] = self.new_instance(model_name) 109 node = documents[model_name] 110 111 # Traverse the components within the instance. 112 113 for component in components: 114 t = component.split(Constants.pair_separator) 115 if len(t) == 1: 116 117 # Convert from lists if necessary. 118 119 if self.values_are_lists: 120 value = value[0] 121 122 # Convert the value to Unicode if necessary. 123 124 if type(value) == type(""): 125 value = unicode(value, encoding=self.encoding) 126 127 # Remove CR characters, ignoring non-textual parameters. 128 129 if isinstance(value, (str, unicode)): 130 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value.replace("\r", "")) 131 break 132 133 elif len(t) == 2: 134 135 # Convert from one-based indexing (the position() 136 # function) to zero-based indexing. 137 138 name, index = t[0], int(t[1]) - 1 139 if index < 0: 140 break 141 try: 142 node = self._enter_element(node, name, index) 143 except FieldsError, exc: 144 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 145 field, name, index, exc.args[0]) 146 147 elif len(t) == 3 and t[1] == "": 148 149 # Multivalued fields. 150 151 if not self.values_are_lists: 152 values = [value] 153 else: 154 values = value 155 156 name = t[0] 157 for subvalue in values: 158 subnode = self._append_element(node, name) 159 160 # Convert the value to Unicode if necessary. 161 162 if type(subvalue) == type(""): 163 subvalue = unicode(subvalue, encoding=self.encoding) 164 165 # Remove CR characters, ignoring non-textual parameters. 166 167 if isinstance(subvalue, (str, unicode)): 168 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue.replace("\r", "")) 169 170 def complete_selectors(self, selectors, fields, documents, create): 171 172 """ 173 Fill in the given 'selectors' dictionary using the given 174 'fields' so that it contains mappings from selector names to 175 parts of the specified 'documents'. If 'create' is set to a 176 true value, selected elements will be created if not already 177 present; otherwise, ignore such selectors. 178 """ 179 180 for field, value in fields: 181 182 # Process selectors only. 183 184 selector_components = field.split(Constants.selector_indicator) 185 if len(selector_components) < 2: 186 continue 187 188 # Get the selector name and path. 189 # Note that the joining of the components uses the separator, 190 # but the separator really should not exist in the path. 191 192 selector_name = selector_components[0] 193 path = Constants.selector_indicator.join(selector_components[1:]) 194 195 model_name, components = self._get_model_name_and_components(path) 196 if model_name is None: 197 continue 198 199 # Go to the instance element. 200 201 if not documents.has_key(model_name) or documents[model_name] is None: 202 continue 203 204 node = documents[model_name] 205 206 # Traverse the path to find the part of the document to be 207 # selected. 208 209 for component in components: 210 t = component.split(Constants.pair_separator) 211 if len(t) == 1: 212 213 # Select attribute. 214 215 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 216 break 217 218 elif len(t) == 2: 219 220 # Convert from one-based indexing (the position() function) 221 # to zero-based indexing. 222 223 name, index = t[0], int(t[1]) - 1 224 if index < 0: 225 break 226 227 # If create is set, create selected elements. 228 229 if create: 230 try: 231 node = self._enter_element(node, name, index) 232 except FieldsError, exc: 233 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 234 field, name, index, exc.args[0]) 235 236 # Where a node cannot be found, do not create a selector. 237 238 else: 239 node = self._find_element(node, name, index) 240 if node is None: 241 break 242 243 if not selectors.has_key(selector_name): 244 selectors[selector_name] = [] 245 if node is not None: 246 selectors[selector_name].append(node) 247 248 def _append_element(self, node, name): 249 250 """ 251 Within 'node' append an element with the given 'name'. 252 """ 253 254 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 255 node.appendChild(new_node) 256 return new_node 257 258 def _enter_element(self, node, name, index): 259 260 """ 261 From 'node' enter the element with the given 'name' at the 262 given 'index' position amongst the child elements. Create 263 missing child elements if necessary. 264 """ 265 266 self._ensure_elements(node, index) 267 268 elements = node.xpath("*") 269 if elements[index].localName == "placeholder": 270 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 271 node.replaceChild(new_node, elements[index]) 272 else: 273 new_node = elements[index] 274 if new_node.localName != name: 275 raise FieldsError, (new_node.localName, name, elements, index) 276 277 # Enter the newly-created element. 278 279 return new_node 280 281 def _find_element(self, node, name, index): 282 283 """ 284 From 'node' find the element with the given 'name' at the 285 given 'index' position amongst the child elements. Return 286 None if no such element exists. 287 """ 288 289 elements = node.xpath("*") 290 try: 291 new_node = elements[index] 292 if new_node.localName != name: 293 return None 294 except IndexError: 295 return None 296 return new_node 297 298 def _get_model_name_and_components(self, field): 299 300 """ 301 From 'field', return the model name and components which 302 describe the path within the instance document associated 303 with that model. 304 """ 305 306 # Get the components of the field name. 307 # Example: /name1#n1/name2#n2/name3 308 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 309 310 components = field.split(Constants.path_separator) 311 if len(components) < 2: 312 return None, None 313 314 # Extract the model name from the top-level element 315 # specification. 316 # Expected: ['name1', 'n1'] 317 318 model_name_and_index = components[1].split(Constants.pair_separator) 319 if len(model_name_and_index) != 2: 320 return None, None 321 322 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 323 324 return model_name_and_index[0], components[1:] 325 326 def _ensure_elements(self, document, index): 327 328 """ 329 In the given 'document', extend the child elements list 330 so that a node can be stored at the given 'index'. 331 """ 332 333 elements = document.xpath("*") 334 i = len(elements) 335 while i <= index: 336 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 337 document.appendChild(new_node) 338 i += 1 339 340 def make_documents(self, fields): 341 342 """ 343 Make a dictionary mapping model names to new documents prepared 344 from the given 'fields' dictionary. 345 """ 346 347 documents = {} 348 self.complete_documents(documents, fields) 349 350 # Fix the dictionary to return the actual document root. 351 352 for model_name, instance_root in documents.items(): 353 documents[model_name] = instance_root 354 return documents 355 356 def get_selectors(self, fields, documents, create=0): 357 358 """ 359 Get a dictionary containing a mapping of selector names to 360 selected parts of the given 'documents'. If 'create' is set 361 to a true value, selected elements will be created if not 362 already present. 363 """ 364 365 selectors = {} 366 self.complete_selectors(selectors, fields, documents, create) 367 return selectors 368 369 def new_instance(self, name): 370 371 "Return an instance root of the given 'name' in a new document." 372 373 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 374 375 # An alias for the older method name. 376 377 new_document = new_instance 378 379 # NOTE: Legacy name exposure. 380 381 Fields = FieldProcessor 382 383 class Form(FieldProcessor): 384 385 "A collection of documents processed from form fields." 386 387 def __init__(self, *args, **kw): 388 389 """ 390 Initialise the form data container with the general 'args' and 'kw' 391 parameters. 392 """ 393 394 FieldProcessor.__init__(self, *args, **kw) 395 self.parameters = {} 396 self.documents = {} 397 self.new_documents = set() 398 399 # Activity-related attributes. 400 401 self.current_activity = None 402 403 def set_parameters(self, parameters): 404 405 "Set the request 'parameters' (or fields) in the container." 406 407 self.parameters = parameters 408 self.documents = self.make_documents(self.parameters.items()) 409 410 def get_parameters(self): 411 412 """ 413 Get the request parameters (or fields) from the container. Note that 414 these parameters comprise the raw form field values submitted in a 415 request rather than the structured form data. 416 417 Return a dictionary mapping parameter names to values. 418 """ 419 420 return self.parameters 421 422 def get_documents(self): 423 424 """ 425 Get the form data documents from the container, returning a dictionary 426 mapping document names to DOM-style document objects. 427 """ 428 429 return self.documents 430 431 # NOTE: Was get_document. 432 433 def _get_document(self, name): 434 435 """ 436 Get the form data document with the given 'name' from the container, 437 returning a DOM-style document object if such a document exists, or None 438 if no such document can be found. 439 """ 440 441 return self.documents.get(name) 442 443 def get_selectors(self, create=0): 444 445 """ 446 Get the form data selectors from the container, returning a dictionary 447 mapping selector names to collections of selected elements. If 'create' 448 is set to a true value (unlike the default), the selected elements will 449 be created in the form data document if not already present. 450 """ 451 452 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents, create) 453 454 def get_selector(self, name, create=0): 455 456 """ 457 Get the form data selectors for the given 'name', returning a collection 458 of selected elements. If 'create' is set to a true value (unlike the 459 default), the selected elements will be created in the form data 460 document if not already present. 461 """ 462 463 parameters = [] 464 for parameter_name, value in parameters.items(): 465 if parameter_name.startswith(name + Constants.selector_indicator): 466 parameters.append((parameter_name, value)) 467 return FieldProcessor.get_selectors(self, parameters, self.documents, create) 468 469 def new_instance(self, name): 470 471 """ 472 Make a new document with the given 'name', storing it in the container 473 and returning the document. 474 """ 475 476 doc = FieldProcessor.new_instance(self, name) 477 self.documents[name] = doc 478 return doc 479 480 # An alias for the older method name. 481 482 new_document = new_instance 483 484 # NOTE: Was set_document. 485 486 def _set_document(self, name, doc): 487 488 """ 489 Store in the container under the given 'name' the supplied document 490 'doc'. 491 """ 492 493 self.documents[name] = doc 494 495 # Support for activities and the main/default document. 496 497 def set_activity(self, name): 498 self.current_activity = name 499 500 def get_activity(self): 501 return self.current_activity 502 503 # NOTE: Signatures are flexible to support the older methods above. 504 505 def set_document(self, name_or_doc, doc=None): 506 if doc is not None: 507 self._set_document(name_or_doc, doc) 508 else: 509 self._set_document(self.current_activity, name_or_doc) 510 511 def get_document(self, name=None): 512 if name is None: 513 return self._get_document(self.current_activity) 514 else: 515 return self._get_document(name) 516 517 def is_new_document(self, name=None): 518 if name is None: 519 return self.current_activity in self.new_documents 520 else: 521 return name in self.new_documents 522 523 if __name__ == "__main__": 524 525 items = [ 526 ("_action_update", "Some value"), 527 ("_action_delete=/zoo$1/cage$2", "Some value"), 528 ("_action_nasty=/zoo$1/cage$3", "Some value"), 529 ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input. 530 ("/zoo$1/name", "The Zoo ???"), 531 ("/zoo$1/cage$1/name", "reptiles"), 532 ("/zoo$1/cage$1/capacity", "5"), 533 ("/zoo$1/cage$1/animal$1/name", "Monty"), 534 ("/zoo$1/cage$1/animal$1/species$1/name", "Python"), 535 ("/zoo$1/cage$1/animal$1/property$2/name", "texture"), 536 ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"), 537 ("/zoo$1/cage$1/animal$1/property$3/name", "length"), 538 ("/zoo$1/cage$1/animal$1/property$3/value", "5m"), 539 ("/zoo$1/cage$1/animal$2/name", "Vincent"), 540 ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"), 541 ("/zoo$1/cage$1/animal$2/property$2/name", "colour"), 542 ("/zoo$1/cage$1/animal$2/property$2/value", "variable"), 543 ("/zoo$1/cage$1/animal$2/property$3/name", "length"), 544 ("/zoo$1/cage$1/animal$2/property$3/value", "1m"), 545 ("/zoo$1/cage$2/name", "mammals"), 546 ("/zoo$1/cage$2/capacity", "25"), 547 ("/zoo$1/cage$2/animal$1/name", "Simon"), 548 ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"), 549 ("/zoo$1/cage$2/animal$2/name", "Leonard"), 550 ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"), 551 ("/zoo$1/cage$2/animal$2/property$2/name", "danger"), 552 ("/zoo$1/cage$2/animal$2/property$2/value", "high"), 553 ("/zoo$1/funding$3/type", "private"), 554 ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"), 555 ("/zoo$1/funding$3/contributor$1/amount", "543210.987"), 556 ("/zoo$1/funding$3/contributor$1/industry$$type", "animals") 557 ] 558 559 import time 560 import sys, cmdsyntax 561 562 # Find the documents. 563 564 syntax = cmdsyntax.Syntax(""" 565 --plain-output=OUTPUT_FILE 566 --instance-name=NAME 567 """) 568 569 syntax_matches = syntax.get_args(sys.argv[1:]) 570 571 try: 572 args = syntax_matches[0] 573 except IndexError: 574 print syntax.syntax 575 sys.exit(1) 576 577 # Create an object to interpret the test data. 578 579 fields = FieldProcessor("iso-8859-1") 580 581 t = time.time() 582 documents = fields.make_documents(items) 583 print "Building time", time.time() - t 584 585 t = time.time() 586 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 587 print "Prettyprinting time", time.time() - t 588 589 print "Selectors", repr(fields.get_selectors(items, documents)) 590 591 # vim: tabstop=4 expandtab shiftwidth=4