1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk> 9 10 This program is free software; you can redistribute it and/or modify it under 11 the terms of the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your option) any 13 later version. 14 15 This program is distributed in the hope that it will be useful, but WITHOUT 16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 18 details. 19 20 You should have received a copy of the GNU Lesser General Public License along 21 with this program. If not, see <http://www.gnu.org/licenses/>. 22 23 -------- 24 25 Classes which process field collections, producing instance documents. Each 26 field entry consists of a field name mapped to a string value, where the field 27 name may have the following formats: 28 29 /name1$n1/name2 30 /name1$n1/name2$n2/name3 31 /name1$n1/name2$n2/name3$n3/name4 32 ... 33 34 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 35 in the entire element list, whose elements may have different names. For 36 example: 37 38 /zoo$1/name 39 /zoo$1/cage$1/name 40 /zoo$1/cage$2/name 41 /zoo$1/funding$3/contributor$1/name 42 43 Where multiple values can be collected for a given field, the following notation 44 is employed: 45 46 /package$1/categories$1/category$$value 47 48 Some fields may contain the "=" string. This string is reserved and all text 49 following it is meant to specify a path into a particular document. For example: 50 51 _action_add_animal=/zoo$1/cage$2 52 """ 53 54 import Constants 55 import libxml2dom 56 from xml.dom import EMPTY_NAMESPACE 57 try: 58 set 59 except NameError: 60 from sets import Set as set 61 62 class FieldsError(Exception): 63 pass 64 65 class FieldProcessor: 66 67 """ 68 A class which converts fields in the documented form to XML 69 instance documents. 70 """ 71 72 def __init__(self, encoding="utf-8", values_are_lists=0): 73 74 """ 75 Initialise the fields processor with the given 'encoding', 76 which is optional and which only applies to field data in 77 Python string form (and not Unicode objects). 78 79 If the optional 'values_are_lists' parameter is set to true 80 then each actual field value will be obtained by taking the 81 first element from each supplied field value. 82 """ 83 84 self.encoding = encoding 85 self.values_are_lists = values_are_lists 86 87 def complete_documents(self, documents, fields): 88 89 """ 90 Complete the given 'documents' using the 'fields' items list. 91 """ 92 93 for field, value in fields: 94 95 # Ignore selectors. 96 97 if field.find(Constants.selector_indicator) != -1: 98 continue 99 100 model_name, components = self._get_model_name_and_components(field) 101 if model_name is None: 102 continue 103 104 # Get a new instance document if none has been made for the 105 # model. 106 107 if not documents.has_key(model_name): 108 documents[model_name] = self.new_instance(model_name) 109 node = documents[model_name] 110 111 # Traverse the components within the instance. 112 113 for component in components: 114 t = component.split(Constants.pair_separator) 115 if len(t) == 1: 116 117 # Convert from lists if necessary. 118 119 if self.values_are_lists: 120 value = value[0] 121 122 # Convert the value to Unicode if necessary. 123 124 if type(value) == type(""): 125 value = unicode(value, encoding=self.encoding) 126 127 # Remove CR characters. 128 129 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value.replace("\r", "")) 130 break 131 132 elif len(t) == 2: 133 134 # Convert from one-based indexing (the position() 135 # function) to zero-based indexing. 136 137 name, index = t[0], int(t[1]) - 1 138 if index < 0: 139 break 140 try: 141 node = self._enter_element(node, name, index) 142 except FieldsError, exc: 143 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 144 field, name, index, exc.args[0]) 145 146 elif len(t) == 3 and t[1] == "": 147 148 # Multivalued fields. 149 150 if not self.values_are_lists: 151 values = [value] 152 else: 153 values = value 154 155 name = t[0] 156 for subvalue in values: 157 subnode = self._append_element(node, name) 158 159 # Convert the value to Unicode if necessary. 160 161 if type(subvalue) == type(""): 162 subvalue = unicode(subvalue, encoding=self.encoding) 163 164 # Remove CR characters. 165 166 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue.replace("\r", "")) 167 168 def complete_selectors(self, selectors, fields, documents, create): 169 170 """ 171 Fill in the given 'selectors' dictionary using the given 172 'fields' so that it contains mappings from selector names to 173 parts of the specified 'documents'. If 'create' is set to a 174 true value, selected elements will be created if not already 175 present; otherwise, ignore such selectors. 176 """ 177 178 for field, value in fields: 179 180 # Process selectors only. 181 182 selector_components = field.split(Constants.selector_indicator) 183 if len(selector_components) < 2: 184 continue 185 186 # Get the selector name and path. 187 # Note that the joining of the components uses the separator, 188 # but the separator really should not exist in the path. 189 190 selector_name = selector_components[0] 191 path = Constants.selector_indicator.join(selector_components[1:]) 192 193 model_name, components = self._get_model_name_and_components(path) 194 if model_name is None: 195 continue 196 197 # Go to the instance element. 198 199 if not documents.has_key(model_name) or documents[model_name] is None: 200 continue 201 202 node = documents[model_name] 203 204 # Traverse the path to find the part of the document to be 205 # selected. 206 207 for component in components: 208 t = component.split(Constants.pair_separator) 209 if len(t) == 1: 210 211 # Select attribute. 212 213 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 214 break 215 216 elif len(t) == 2: 217 218 # Convert from one-based indexing (the position() function) 219 # to zero-based indexing. 220 221 name, index = t[0], int(t[1]) - 1 222 if index < 0: 223 break 224 225 # If create is set, create selected elements. 226 227 if create: 228 try: 229 node = self._enter_element(node, name, index) 230 except FieldsError, exc: 231 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 232 field, name, index, exc.args[0]) 233 234 # Where a node cannot be found, do not create a selector. 235 236 else: 237 node = self._find_element(node, name, index) 238 if node is None: 239 break 240 241 if not selectors.has_key(selector_name): 242 selectors[selector_name] = [] 243 if node is not None: 244 selectors[selector_name].append(node) 245 246 def _append_element(self, node, name): 247 248 """ 249 Within 'node' append an element with the given 'name'. 250 """ 251 252 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 253 node.appendChild(new_node) 254 return new_node 255 256 def _enter_element(self, node, name, index): 257 258 """ 259 From 'node' enter the element with the given 'name' at the 260 given 'index' position amongst the child elements. Create 261 missing child elements if necessary. 262 """ 263 264 self._ensure_elements(node, index) 265 266 elements = node.xpath("*") 267 if elements[index].localName == "placeholder": 268 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 269 node.replaceChild(new_node, elements[index]) 270 else: 271 new_node = elements[index] 272 if new_node.localName != name: 273 raise FieldsError, (new_node.localName, name, elements, index) 274 275 # Enter the newly-created element. 276 277 return new_node 278 279 def _find_element(self, node, name, index): 280 281 """ 282 From 'node' find the element with the given 'name' at the 283 given 'index' position amongst the child elements. Return 284 None if no such element exists. 285 """ 286 287 elements = node.xpath("*") 288 try: 289 new_node = elements[index] 290 if new_node.localName != name: 291 return None 292 except IndexError: 293 return None 294 return new_node 295 296 def _get_model_name_and_components(self, field): 297 298 """ 299 From 'field', return the model name and components which 300 describe the path within the instance document associated 301 with that model. 302 """ 303 304 # Get the components of the field name. 305 # Example: /name1#n1/name2#n2/name3 306 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 307 308 components = field.split(Constants.path_separator) 309 if len(components) < 2: 310 return None, None 311 312 # Extract the model name from the top-level element 313 # specification. 314 # Expected: ['name1', 'n1'] 315 316 model_name_and_index = components[1].split(Constants.pair_separator) 317 if len(model_name_and_index) != 2: 318 return None, None 319 320 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 321 322 return model_name_and_index[0], components[1:] 323 324 def _ensure_elements(self, document, index): 325 326 """ 327 In the given 'document', extend the child elements list 328 so that a node can be stored at the given 'index'. 329 """ 330 331 elements = document.xpath("*") 332 i = len(elements) 333 while i <= index: 334 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 335 document.appendChild(new_node) 336 i += 1 337 338 def make_documents(self, fields): 339 340 """ 341 Make a dictionary mapping model names to new documents prepared 342 from the given 'fields' dictionary. 343 """ 344 345 documents = {} 346 self.complete_documents(documents, fields) 347 348 # Fix the dictionary to return the actual document root. 349 350 for model_name, instance_root in documents.items(): 351 documents[model_name] = instance_root 352 return documents 353 354 def get_selectors(self, fields, documents, create=0): 355 356 """ 357 Get a dictionary containing a mapping of selector names to 358 selected parts of the given 'documents'. If 'create' is set 359 to a true value, selected elements will be created if not 360 already present. 361 """ 362 363 selectors = {} 364 self.complete_selectors(selectors, fields, documents, create) 365 return selectors 366 367 def new_instance(self, name): 368 369 "Return an instance root of the given 'name' in a new document." 370 371 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 372 373 # An alias for the older method name. 374 375 new_document = new_instance 376 377 # NOTE: Legacy name exposure. 378 379 Fields = FieldProcessor 380 381 class Form(FieldProcessor): 382 383 "A collection of documents processed from form fields." 384 385 def __init__(self, *args, **kw): 386 387 """ 388 Initialise the form data container with the general 'args' and 'kw' 389 parameters. 390 """ 391 392 FieldProcessor.__init__(self, *args, **kw) 393 self.parameters = {} 394 self.documents = {} 395 self.new_documents = set() 396 397 # Activity-related attributes. 398 399 self.current_activity = None 400 401 def set_parameters(self, parameters): 402 403 "Set the request 'parameters' (or fields) in the container." 404 405 self.parameters = parameters 406 self.documents = self.make_documents(self.parameters.items()) 407 408 def get_parameters(self): 409 410 """ 411 Get the request parameters (or fields) from the container. Note that 412 these parameters comprise the raw form field values submitted in a 413 request rather than the structured form data. 414 415 Return a dictionary mapping parameter names to values. 416 """ 417 418 return self.parameters 419 420 def get_documents(self): 421 422 """ 423 Get the form data documents from the container, returning a dictionary 424 mapping document names to DOM-style document objects. 425 """ 426 427 return self.documents 428 429 # NOTE: Was get_document. 430 431 def _get_document(self, name): 432 433 """ 434 Get the form data document with the given 'name' from the container, 435 returning a DOM-style document object if such a document exists, or None 436 if no such document can be found. 437 """ 438 439 return self.documents.get(name) 440 441 def get_selectors(self, create=0): 442 443 """ 444 Get the form data selectors from the container, returning a dictionary 445 mapping selector names to collections of selected elements. If 'create' 446 is set to a true value (unlike the default), the selected elements will 447 be created in the form data document if not already present. 448 """ 449 450 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents, create) 451 452 def get_selector(self, name, create=0): 453 454 """ 455 Get the form data selectors for the given 'name', returning a collection 456 of selected elements. If 'create' is set to a true value (unlike the 457 default), the selected elements will be created in the form data 458 document if not already present. 459 """ 460 461 parameters = [] 462 for parameter_name, value in parameters.items(): 463 if parameter_name.startswith(name + Constants.selector_indicator): 464 parameters.append((parameter_name, value)) 465 return FieldProcessor.get_selectors(self, parameters, self.documents, create) 466 467 def new_instance(self, name): 468 469 """ 470 Make a new document with the given 'name', storing it in the container 471 and returning the document. 472 """ 473 474 doc = FieldProcessor.new_instance(self, name) 475 self.documents[name] = doc 476 return doc 477 478 # An alias for the older method name. 479 480 new_document = new_instance 481 482 # NOTE: Was set_document. 483 484 def _set_document(self, name, doc): 485 486 """ 487 Store in the container under the given 'name' the supplied document 488 'doc'. 489 """ 490 491 self.documents[name] = doc 492 493 # Support for activities and the main/default document. 494 495 def set_activity(self, name): 496 self.current_activity = name 497 498 def get_activity(self): 499 return self.current_activity 500 501 # NOTE: Signatures are flexible to support the older methods above. 502 503 def set_document(self, name_or_doc, doc=None): 504 if doc is not None: 505 self._set_document(name_or_doc, doc) 506 else: 507 self._set_document(self.current_activity, name_or_doc) 508 509 def get_document(self, name=None): 510 if name is None: 511 return self._get_document(self.current_activity) 512 else: 513 return self._get_document(name) 514 515 def is_new_document(self, name=None): 516 if name is None: 517 return self.current_activity in self.new_documents 518 else: 519 return name in self.new_documents 520 521 if __name__ == "__main__": 522 523 items = [ 524 ("_action_update", "Some value"), 525 ("_action_delete=/zoo$1/cage$2", "Some value"), 526 ("_action_nasty=/zoo$1/cage$3", "Some value"), 527 ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input. 528 ("/zoo$1/name", "The Zoo ???"), 529 ("/zoo$1/cage$1/name", "reptiles"), 530 ("/zoo$1/cage$1/capacity", "5"), 531 ("/zoo$1/cage$1/animal$1/name", "Monty"), 532 ("/zoo$1/cage$1/animal$1/species$1/name", "Python"), 533 ("/zoo$1/cage$1/animal$1/property$2/name", "texture"), 534 ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"), 535 ("/zoo$1/cage$1/animal$1/property$3/name", "length"), 536 ("/zoo$1/cage$1/animal$1/property$3/value", "5m"), 537 ("/zoo$1/cage$1/animal$2/name", "Vincent"), 538 ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"), 539 ("/zoo$1/cage$1/animal$2/property$2/name", "colour"), 540 ("/zoo$1/cage$1/animal$2/property$2/value", "variable"), 541 ("/zoo$1/cage$1/animal$2/property$3/name", "length"), 542 ("/zoo$1/cage$1/animal$2/property$3/value", "1m"), 543 ("/zoo$1/cage$2/name", "mammals"), 544 ("/zoo$1/cage$2/capacity", "25"), 545 ("/zoo$1/cage$2/animal$1/name", "Simon"), 546 ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"), 547 ("/zoo$1/cage$2/animal$2/name", "Leonard"), 548 ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"), 549 ("/zoo$1/cage$2/animal$2/property$2/name", "danger"), 550 ("/zoo$1/cage$2/animal$2/property$2/value", "high"), 551 ("/zoo$1/funding$3/type", "private"), 552 ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"), 553 ("/zoo$1/funding$3/contributor$1/amount", "543210.987"), 554 ("/zoo$1/funding$3/contributor$1/industry$$type", "animals") 555 ] 556 557 import time 558 import sys, cmdsyntax 559 560 # Find the documents. 561 562 syntax = cmdsyntax.Syntax(""" 563 --plain-output=OUTPUT_FILE 564 --instance-name=NAME 565 """) 566 567 syntax_matches = syntax.get_args(sys.argv[1:]) 568 569 try: 570 args = syntax_matches[0] 571 except IndexError: 572 print syntax.syntax 573 sys.exit(1) 574 575 # Create an object to interpret the test data. 576 577 fields = FieldProcessor("iso-8859-1") 578 579 t = time.time() 580 documents = fields.make_documents(items) 581 print "Building time", time.time() - t 582 583 t = time.time() 584 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 585 print "Prettyprinting time", time.time() - t 586 587 print "Selectors", repr(fields.get_selectors(items, documents)) 588 589 # vim: tabstop=4 expandtab shiftwidth=4