1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005 Paul Boddie <paul@boddie.org.uk> 9 10 This library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Lesser General Public 12 License as published by the Free Software Foundation; either 13 version 2.1 of the License, or (at your option) any later version. 14 15 This library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Lesser General Public License for more details. 19 20 You should have received a copy of the GNU Lesser General Public 21 License along with this library; if not, write to the Free Software 22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 23 24 -------- 25 26 Classes which process field collections, producing instance documents. Each 27 field entry consists of a field name mapped to a string value, where the field 28 name may have the following formats: 29 30 /name1$n1/name2 31 /name1$n1/name2$n2/name3 32 /name1$n1/name2$n2/name3$n3/name4 33 ... 34 35 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 36 in the entire element list, whose elements may have different names. For 37 example: 38 39 /zoo$1/name 40 /zoo$1/cage$1/name 41 /zoo$1/cage$2/name 42 /zoo$1/funding$3/contributor$1/name 43 44 Where multiple values can be collected for a given field, the following notation 45 is employed: 46 47 /package$1/categories$1/category$$value 48 49 Some fields may contain the "=" string. This string is reserved and all text 50 following it is meant to specify a path into a particular document. For example: 51 52 _action_add_animal=/zoo$1/cage$2 53 """ 54 55 import Constants 56 import libxml2dom 57 from xml.dom import EMPTY_NAMESPACE 58 59 class FieldsError(Exception): 60 pass 61 62 class FieldProcessor: 63 64 """ 65 A class which converts fields in the documented form to XML 66 instance documents. 67 """ 68 69 def __init__(self, encoding="utf-8", values_are_lists=0): 70 71 """ 72 Initialise the fields processor with the given 'encoding', 73 which is optional and which only applies to field data in 74 Python string form (and not Unicode objects). 75 76 If the optional 'values_are_lists' parameter is set to true 77 then each actual field value will be obtained by taking the 78 first element from each supplied field value. 79 """ 80 81 self.encoding = encoding 82 self.values_are_lists = values_are_lists 83 84 def complete_documents(self, documents, fields): 85 86 """ 87 Complete the given 'documents' using the 'fields' items list. 88 """ 89 90 for field, value in fields: 91 92 # Ignore selectors. 93 94 if field.find(Constants.selector_indicator) != -1: 95 continue 96 97 model_name, components = self._get_model_name_and_components(field) 98 if model_name is None: 99 continue 100 101 # Get a new instance document if none has been made for the 102 # model. 103 104 if not documents.has_key(model_name): 105 documents[model_name] = self.new_instance(model_name) 106 node = documents[model_name] 107 108 # Traverse the components within the instance. 109 110 for component in components: 111 t = component.split(Constants.pair_separator) 112 if len(t) == 1: 113 114 # Convert from lists if necessary. 115 116 if self.values_are_lists: 117 value = value[0] 118 119 # Convert the value to Unicode if necessary. 120 121 if type(value) == type(""): 122 value = unicode(value, encoding=self.encoding) 123 124 # Remove CR characters. 125 126 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value.replace("\r", "")) 127 break 128 129 elif len(t) == 2: 130 131 # Convert from one-based indexing (the position() 132 # function) to zero-based indexing. 133 134 name, index = t[0], int(t[1]) - 1 135 if index < 0: 136 break 137 try: 138 node = self._enter_element(node, name, index) 139 except FieldsError, exc: 140 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 141 field, name, index, exc.args[0]) 142 143 elif len(t) == 3 and t[1] == "": 144 145 # Multivalued fields. 146 147 if not self.values_are_lists: 148 values = [value] 149 else: 150 values = value 151 152 name = t[0] 153 for subvalue in values: 154 subnode = self._append_element(node, name) 155 156 # Convert the value to Unicode if necessary. 157 158 if type(subvalue) == type(""): 159 subvalue = unicode(subvalue, encoding=self.encoding) 160 161 # Remove CR characters. 162 163 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue.replace("\r", "")) 164 165 def complete_selectors(self, selectors, fields, documents, create): 166 167 """ 168 Fill in the given 'selectors' dictionary using the given 169 'fields' so that it contains mappings from selector names to 170 parts of the specified 'documents'. If 'create' is set to a 171 true value, selected elements will be created if not already 172 present; otherwise, ignore such selectors. 173 """ 174 175 for field, value in fields: 176 177 # Process selectors only. 178 179 selector_components = field.split(Constants.selector_indicator) 180 if len(selector_components) < 2: 181 continue 182 183 # Get the selector name and path. 184 # Note that the joining of the components uses the separator, 185 # but the separator really should not exist in the path. 186 187 selector_name = selector_components[0] 188 path = Constants.selector_indicator.join(selector_components[1:]) 189 190 model_name, components = self._get_model_name_and_components(path) 191 if model_name is None: 192 continue 193 194 # Go to the instance element. 195 196 if not documents.has_key(model_name) or documents[model_name] is None: 197 continue 198 199 node = documents[model_name] 200 201 # Traverse the path to find the part of the document to be 202 # selected. 203 204 for component in components: 205 t = component.split(Constants.pair_separator) 206 if len(t) == 1: 207 208 # Select attribute. 209 210 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 211 break 212 213 elif len(t) == 2: 214 215 # Convert from one-based indexing (the position() function) 216 # to zero-based indexing. 217 218 name, index = t[0], int(t[1]) - 1 219 if index < 0: 220 break 221 222 # If create is set, create selected elements. 223 224 if create: 225 try: 226 node = self._enter_element(node, name, index) 227 except FieldsError, exc: 228 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 229 field, name, index, exc.args[0]) 230 231 # Where a node cannot be found, do not create a selector. 232 233 else: 234 node = self._find_element(node, name, index) 235 if node is None: 236 break 237 238 if not selectors.has_key(selector_name): 239 selectors[selector_name] = [] 240 if node is not None: 241 selectors[selector_name].append(node) 242 243 def _append_element(self, node, name): 244 245 """ 246 Within 'node' append an element with the given 'name'. 247 """ 248 249 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 250 node.appendChild(new_node) 251 return new_node 252 253 def _enter_element(self, node, name, index): 254 255 """ 256 From 'node' enter the element with the given 'name' at the 257 given 'index' position amongst the child elements. Create 258 missing child elements if necessary. 259 """ 260 261 self._ensure_elements(node, index) 262 263 elements = node.xpath("*") 264 if elements[index].localName == "placeholder": 265 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 266 node.replaceChild(new_node, elements[index]) 267 else: 268 new_node = elements[index] 269 if new_node.localName != name: 270 raise FieldsError, (new_node.localName, name, elements, index) 271 272 # Enter the newly-created element. 273 274 return new_node 275 276 def _find_element(self, node, name, index): 277 278 """ 279 From 'node' find the element with the given 'name' at the 280 given 'index' position amongst the child elements. Return 281 None if no such element exists. 282 """ 283 284 elements = node.xpath("*") 285 try: 286 new_node = elements[index] 287 if new_node.localName != name: 288 return None 289 except IndexError: 290 return None 291 return new_node 292 293 def _get_model_name_and_components(self, field): 294 295 """ 296 From 'field', return the model name and components which 297 describe the path within the instance document associated 298 with that model. 299 """ 300 301 # Get the components of the field name. 302 # Example: /name1#n1/name2#n2/name3 303 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 304 305 components = field.split(Constants.path_separator) 306 if len(components) < 2: 307 return None, None 308 309 # Extract the model name from the top-level element 310 # specification. 311 # Expected: ['name1', 'n1'] 312 313 model_name_and_index = components[1].split(Constants.pair_separator) 314 if len(model_name_and_index) != 2: 315 return None, None 316 317 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 318 319 return model_name_and_index[0], components[1:] 320 321 def _ensure_elements(self, document, index): 322 323 """ 324 In the given 'document', extend the child elements list 325 so that a node can be stored at the given 'index'. 326 """ 327 328 elements = document.xpath("*") 329 i = len(elements) 330 while i <= index: 331 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 332 document.appendChild(new_node) 333 i += 1 334 335 def make_documents(self, fields): 336 337 """ 338 Make a dictionary mapping model names to new documents prepared 339 from the given 'fields' dictionary. 340 """ 341 342 documents = {} 343 self.complete_documents(documents, fields) 344 345 # Fix the dictionary to return the actual document root. 346 347 for model_name, instance_root in documents.items(): 348 documents[model_name] = instance_root 349 return documents 350 351 def get_selectors(self, fields, documents, create=0): 352 353 """ 354 Get a dictionary containing a mapping of selector names to 355 selected parts of the given 'documents'. If 'create' is set 356 to a true value, selected elements will be created if not 357 already present. 358 """ 359 360 selectors = {} 361 self.complete_selectors(selectors, fields, documents, create) 362 return selectors 363 364 def new_instance(self, name): 365 366 "Return an instance root of the given 'name' in a new document." 367 368 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 369 370 # An alias for the older method name. 371 372 new_document = new_instance 373 374 # NOTE: Legacy name exposure. 375 376 Fields = FieldProcessor 377 378 class Form(FieldProcessor): 379 380 "A collection of documents processed from form fields." 381 382 def __init__(self, *args, **kw): 383 384 """ 385 Initialise the form data container with the general 'args' and 'kw' 386 parameters. 387 """ 388 389 FieldProcessor.__init__(self, *args, **kw) 390 self.parameters = {} 391 self.documents = {} 392 393 def set_parameters(self, parameters): 394 395 "Set the request 'parameters' (or fields) in the container." 396 397 self.parameters = parameters 398 self.documents = self.make_documents(self.parameters.items()) 399 400 def get_parameters(self): 401 402 """ 403 Get the request parameters (or fields) from the container. Note that 404 these parameters comprise the raw form field values submitted in a 405 request rather than the structured form data. 406 407 Return a dictionary mapping parameter names to values. 408 """ 409 410 return self.parameters 411 412 def get_documents(self): 413 414 """ 415 Get the form data documents from the container, returning a dictionary 416 mapping document names to DOM-style document objects. 417 """ 418 419 return self.documents 420 421 def get_document(self, name): 422 423 """ 424 Get the form data document with the given 'name' from the container, 425 returning a DOM-style document object if such a document exists, or None 426 if no such document can be found. 427 """ 428 429 return self.documents.get(name) 430 431 def get_selectors(self, create=0): 432 433 """ 434 Get the form data selectors from the container, returning a dictionary 435 mapping selector names to collections of selected elements. If 'create' 436 is set to a true value (unlike the default), the selected elements will 437 be created in the form data document if not already present. 438 """ 439 440 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents, create) 441 442 def get_selector(self, name, create=0): 443 444 """ 445 Get the form data selectors for the given 'name', returning a collection 446 of selected elements. If 'create' is set to a true value (unlike the 447 default), the selected elements will be created in the form data 448 document if not already present. 449 """ 450 451 parameters = [] 452 for parameter_name, value in parameters.items(): 453 if parameter_name.startswith(name + Constants.selector_indicator): 454 parameters.append((parameter_name, value)) 455 return FieldProcessor.get_selectors(self, parameters, self.documents, create) 456 457 def new_instance(self, name): 458 459 """ 460 Make a new document with the given 'name', storing it in the container 461 and returning the document. 462 """ 463 464 doc = FieldProcessor.new_instance(self, name) 465 self.documents[name] = doc 466 return doc 467 468 # An alias for the older method name. 469 470 new_document = new_instance 471 472 def set_document(self, name, doc): 473 474 """ 475 Store in the container under the given 'name' the supplied document 476 'doc'. 477 """ 478 479 self.documents[name] = doc 480 481 if __name__ == "__main__": 482 483 items = [ 484 ("_action_update", "Some value"), 485 ("_action_delete=/zoo$1/cage$2", "Some value"), 486 ("_action_nasty=/zoo$1/cage$3", "Some value"), 487 ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input. 488 ("/zoo$1/name", "The Zoo ???"), 489 ("/zoo$1/cage$1/name", "reptiles"), 490 ("/zoo$1/cage$1/capacity", "5"), 491 ("/zoo$1/cage$1/animal$1/name", "Monty"), 492 ("/zoo$1/cage$1/animal$1/species$1/name", "Python"), 493 ("/zoo$1/cage$1/animal$1/property$2/name", "texture"), 494 ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"), 495 ("/zoo$1/cage$1/animal$1/property$3/name", "length"), 496 ("/zoo$1/cage$1/animal$1/property$3/value", "5m"), 497 ("/zoo$1/cage$1/animal$2/name", "Vincent"), 498 ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"), 499 ("/zoo$1/cage$1/animal$2/property$2/name", "colour"), 500 ("/zoo$1/cage$1/animal$2/property$2/value", "variable"), 501 ("/zoo$1/cage$1/animal$2/property$3/name", "length"), 502 ("/zoo$1/cage$1/animal$2/property$3/value", "1m"), 503 ("/zoo$1/cage$2/name", "mammals"), 504 ("/zoo$1/cage$2/capacity", "25"), 505 ("/zoo$1/cage$2/animal$1/name", "Simon"), 506 ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"), 507 ("/zoo$1/cage$2/animal$2/name", "Leonard"), 508 ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"), 509 ("/zoo$1/cage$2/animal$2/property$2/name", "danger"), 510 ("/zoo$1/cage$2/animal$2/property$2/value", "high"), 511 ("/zoo$1/funding$3/type", "private"), 512 ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"), 513 ("/zoo$1/funding$3/contributor$1/amount", "543210.987"), 514 ("/zoo$1/funding$3/contributor$1/industry$$type", "animals") 515 ] 516 517 import time 518 import sys, cmdsyntax 519 520 # Find the documents. 521 522 syntax = cmdsyntax.Syntax(""" 523 --plain-output=OUTPUT_FILE 524 --instance-name=NAME 525 """) 526 527 syntax_matches = syntax.get_args(sys.argv[1:]) 528 529 try: 530 args = syntax_matches[0] 531 except IndexError: 532 print syntax.syntax 533 sys.exit(1) 534 535 # Create an object to interpret the test data. 536 537 fields = FieldProcessor("iso-8859-1") 538 539 t = time.time() 540 documents = fields.make_documents(items) 541 print "Building time", time.time() - t 542 543 t = time.time() 544 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 545 print "Prettyprinting time", time.time() - t 546 547 print "Selectors", repr(fields.get_selectors(items, documents)) 548 549 # vim: tabstop=4 expandtab shiftwidth=4