1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005 Paul Boddie <paul@boddie.org.uk> 9 10 This library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Lesser General Public 12 License as published by the Free Software Foundation; either 13 version 2.1 of the License, or (at your option) any later version. 14 15 This library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Lesser General Public License for more details. 19 20 You should have received a copy of the GNU Lesser General Public 21 License along with this library; if not, write to the Free Software 22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 24 -------- 25 26 Classes which process field collections, producing instance documents. Each 27 field entry consists of a field name mapped to a string value, where the field 28 name may have the following formats: 29 30 /name1#n1/name2 31 /name1#n1/name2#n2/name3 32 /name1#n1/name2#n2/name3#n3/name4 33 ... 34 35 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 36 in the entire element list, whose elements may have different names. For 37 example: 38 39 /zoo#1/name 40 /zoo#1/cage#1/name 41 /zoo#1/cage#2/name 42 /zoo#1/funding#3/contributor#1/name 43 44 Where multiple values can be collected for a given field, the following notation 45 is employed: 46 47 /package#1/categories#1/category##value 48 49 Some fields may contain the "=" string. This string is reserved and all text 50 following it is meant to specify a path into a particular document. For example: 51 52 _action_add_animal=/zoo#1/cage#2 53 """ 54 55 import Constants 56 import libxml2dom 57 from xml.dom import EMPTY_NAMESPACE 58 59 class FieldsError(Exception): 60 pass 61 62 class FieldProcessor: 63 64 """ 65 A class which converts fields in the documented form to XML 66 instance documents. 67 """ 68 69 def __init__(self, encoding="utf-8", values_are_lists=0): 70 71 """ 72 Initialise the fields processor with the given 'encoding', 73 which is optional and which only applies to field data in 74 Python string form (and not Unicode objects). 75 76 If the optional 'values_are_lists' parameter is set to true 77 then each actual field value will be obtained by taking the 78 first element from each supplied field value. 79 """ 80 81 self.encoding = encoding 82 self.values_are_lists = values_are_lists 83 84 def complete_documents(self, documents, fields): 85 86 """ 87 Complete the given 'documents' using the 'fields' items list. 88 """ 89 90 for field, value in fields: 91 92 # Ignore selectors. 93 94 if field.find(Constants.selector_indicator) != -1: 95 continue 96 97 model_name, components = self._get_model_name_and_components(field) 98 if model_name is None: 99 continue 100 101 # Get a new instance document if none has been made for the 102 # model. 103 104 if not documents.has_key(model_name): 105 documents[model_name] = self.new_instance(model_name) 106 node = documents[model_name] 107 108 # Traverse the components within the instance. 109 110 for component in components: 111 t = component.split(Constants.pair_separator) 112 if len(t) == 1: 113 114 # Convert from lists if necessary. 115 116 if self.values_are_lists: 117 value = value[0] 118 119 # Convert the value to Unicode if necessary. 120 121 if type(value) == type(""): 122 value = unicode(value, encoding=self.encoding) 123 124 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 125 break 126 127 elif len(t) == 2: 128 129 # Convert from one-based indexing (the position() 130 # function) to zero-based indexing. 131 132 name, index = t[0], int(t[1]) - 1 133 if index < 0: 134 break 135 node = self._enter_element(node, name, index) 136 137 elif len(t) == 3 and t[1] == "": 138 139 # Multivalued fields. 140 141 if not self.values_are_lists: 142 values = [value] 143 else: 144 values = value 145 146 name = t[0] 147 for subvalue in values: 148 subnode = self._append_element(node, name) 149 150 # Convert the value to Unicode if necessary. 151 152 if type(subvalue) == type(""): 153 subvalue = unicode(subvalue, encoding=self.encoding) 154 155 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue) 156 157 def complete_selectors(self, selectors, fields, documents): 158 159 """ 160 Fill in the given 'selectors' dictionary using the given 161 'fields' so that it contains mappings from selector names to 162 parts of the specified 'documents'. 163 """ 164 165 for field, value in fields: 166 167 # Process selectors only. 168 169 selector_components = field.split(Constants.selector_indicator) 170 if len(selector_components) < 2: 171 continue 172 173 # Get the selector name and path. 174 # Note that the joining of the components uses the separator, 175 # but the separator really should not exist in the path. 176 177 selector_name = selector_components[0] 178 path = Constants.selector_indicator.join(selector_components[1:]) 179 180 model_name, components = self._get_model_name_and_components(path) 181 if model_name is None: 182 continue 183 184 # Go to the instance element. 185 186 if not documents.has_key(model_name) or documents[model_name] is None: 187 continue 188 189 node = documents[model_name] 190 191 # Traverse the path to find the part of the document to be 192 # selected. 193 194 for component in components: 195 t = component.split(Constants.pair_separator) 196 if len(t) == 1: 197 198 # Select attribute. 199 200 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 201 break 202 203 elif len(t) == 2: 204 205 # Convert from one-based indexing (the position() function) 206 # to zero-based indexing. 207 208 name, index = t[0], int(t[1]) - 1 209 if index < 0: 210 break 211 212 # NOTE: Controversial creation of potentially non-existent 213 # NOTE: nodes. 214 215 node = self._enter_element(node, name, index) 216 217 if not selectors.has_key(selector_name): 218 selectors[selector_name] = [] 219 selectors[selector_name].append(node) 220 221 def _append_element(self, node, name): 222 223 """ 224 Within 'node' append an element with the given 'name'. 225 """ 226 227 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 228 node.appendChild(new_node) 229 return new_node 230 231 def _enter_element(self, node, name, index): 232 233 """ 234 From 'node' enter the element with the given 'name' at the 235 given 'index' position amongst the child elements. Create 236 missing child elements if necessary. 237 """ 238 239 self._ensure_elements(node, index) 240 241 elements = node.xpath("*") 242 if elements[index].localName == "placeholder": 243 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 244 node.replaceChild(new_node, elements[index]) 245 else: 246 new_node = elements[index] 247 if new_node.localName != name: 248 raise FieldsError, (new_node.localName, name, elements, index) 249 250 # Enter the newly-created element. 251 252 return new_node 253 254 def _get_model_name_and_components(self, field): 255 256 """ 257 From 'field', return the model name and components which 258 describe the path within the instance document associated 259 with that model. 260 """ 261 262 # Get the components of the field name. 263 # Example: /name1#n1/name2#n2/name3 264 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 265 266 components = field.split(Constants.path_separator) 267 if len(components) < 2: 268 return None, None 269 270 # Extract the model name from the top-level element 271 # specification. 272 # Expected: ['name1', 'n1'] 273 274 model_name_and_index = components[1].split(Constants.pair_separator) 275 if len(model_name_and_index) != 2: 276 return None, None 277 278 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 279 280 return model_name_and_index[0], components[1:] 281 282 def _ensure_elements(self, document, index): 283 284 """ 285 In the given 'document', extend the child elements list 286 so that a node can be stored at the given 'index'. 287 """ 288 289 elements = document.xpath("*") 290 i = len(elements) 291 while i <= index: 292 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 293 document.appendChild(new_node) 294 i += 1 295 296 def make_documents(self, fields): 297 298 """ 299 Make a dictionary mapping model names to new documents prepared 300 from the given 'fields' dictionary. 301 """ 302 303 documents = {} 304 self.complete_documents(documents, fields) 305 306 # Fix the dictionary to return the actual document root. 307 308 for model_name, instance_root in documents.items(): 309 documents[model_name] = instance_root 310 return documents 311 312 def get_selectors(self, fields, documents): 313 314 """ 315 Get a dictionary containing a mapping of selector names to 316 selected parts of the given 'documents'. 317 """ 318 319 selectors = {} 320 self.complete_selectors(selectors, fields, documents) 321 return selectors 322 323 def new_instance(self, name): 324 325 "Return an instance root of the given 'name' in a new document." 326 327 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 328 329 # NOTE: Legacy name exposure. 330 331 Fields = FieldProcessor 332 333 class Form(FieldProcessor): 334 335 "A collection of documents processed from form fields." 336 337 def __init__(self, *args, **kw): 338 FieldProcessor.__init__(self, *args, **kw) 339 self.parameters = {} 340 self.documents = {} 341 342 def set_parameters(self, parameters): 343 self.parameters = parameters 344 self.documents = self.make_documents(self.parameters.items()) 345 346 def get_parameters(self): 347 return self.parameters 348 349 def get_documents(self): 350 return self.documents 351 352 def get_selectors(self): 353 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents) 354 355 def new_instance(self, name): 356 doc = FieldProcessor.new_instance(self, name) 357 self.documents[name] = doc 358 return doc 359 360 if __name__ == "__main__": 361 362 items = [ 363 ("_action_update", "Some value"), 364 ("_action_delete=/zoo#1/cage#2", "Some value"), 365 ("/actions#1/update#1/selected", "Some value"), # Not actually used in output documents or input. 366 ("/zoo#1/name", "The Zoo ???"), 367 ("/zoo#1/cage#1/name", "reptiles"), 368 ("/zoo#1/cage#1/capacity", "5"), 369 ("/zoo#1/cage#1/animal#1/name", "Monty"), 370 ("/zoo#1/cage#1/animal#1/species#1/name", "Python"), 371 ("/zoo#1/cage#1/animal#1/property#2/name", "texture"), 372 ("/zoo#1/cage#1/animal#1/property#2/value", "scaled"), 373 ("/zoo#1/cage#1/animal#1/property#3/name", "length"), 374 ("/zoo#1/cage#1/animal#1/property#3/value", "5m"), 375 ("/zoo#1/cage#1/animal#2/name", "Vincent"), 376 ("/zoo#1/cage#1/animal#2/species#1/name", "Lizard"), 377 ("/zoo#1/cage#1/animal#2/property#2/name", "colour"), 378 ("/zoo#1/cage#1/animal#2/property#2/value", "variable"), 379 ("/zoo#1/cage#1/animal#2/property#3/name", "length"), 380 ("/zoo#1/cage#1/animal#2/property#3/value", "1m"), 381 ("/zoo#1/cage#2/name", "mammals"), 382 ("/zoo#1/cage#2/capacity", "25"), 383 ("/zoo#1/cage#2/animal#1/name", "Simon"), 384 ("/zoo#1/cage#2/animal#1/species#1/name", "Giraffe"), 385 ("/zoo#1/cage#2/animal#2/name", "Leonard"), 386 ("/zoo#1/cage#2/animal#2/species#1/name", "Lion"), 387 ("/zoo#1/cage#2/animal#2/property#2/name", "danger"), 388 ("/zoo#1/cage#2/animal#2/property#2/value", "high"), 389 ("/zoo#1/funding#3/type", "private"), 390 ("/zoo#1/funding#3/contributor#1/name", "Animal Corporation"), 391 ("/zoo#1/funding#3/contributor#1/amount", "543210.987"), 392 ("/zoo#1/funding#3/contributor#1/industry##type", "animals") 393 ] 394 395 import time 396 import sys, cmdsyntax 397 398 # Find the documents. 399 400 syntax = cmdsyntax.Syntax(""" 401 --plain-output=OUTPUT_FILE 402 --instance-name=NAME 403 """) 404 405 syntax_matches = syntax.get_args(sys.argv[1:]) 406 407 try: 408 args = syntax_matches[0] 409 except IndexError: 410 print syntax.syntax 411 sys.exit(1) 412 413 # Create an object to interpret the test data. 414 415 fields = FieldProcessor("iso-8859-1") 416 417 t = time.time() 418 documents = fields.make_documents(items) 419 print "Building time", time.time() - t 420 421 t = time.time() 422 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 423 print "Prettyprinting time", time.time() - t 424 425 print "Selectors", repr(fields.get_selectors(items, documents)) 426 427 # vim: tabstop=4 expandtab shiftwidth=4