XSLTools

XSLForms/Fields.py

580:5429063823d3
2007-06-05 paulb [project @ 2007-06-05 23:18:56 by paulb] Fixed CR character handling, removing such characters (typically occurring in CRLF sequences) so that recipients of the eventual documents do not double up newlines.
     1 #!/usr/bin/env python     2 # -*- coding: iso-8859-1 -*-     3      4 """     5 Interpretation of field collections from sources such as HTTP request parameter     6 dictionaries.     7      8 Copyright (C) 2005 Paul Boddie <paul@boddie.org.uk>     9     10 This library is free software; you can redistribute it and/or    11 modify it under the terms of the GNU Lesser General Public    12 License as published by the Free Software Foundation; either    13 version 2.1 of the License, or (at your option) any later version.    14     15 This library is distributed in the hope that it will be useful,    16 but WITHOUT ANY WARRANTY; without even the implied warranty of    17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    18 Lesser General Public License for more details.    19     20 You should have received a copy of the GNU Lesser General Public    21 License along with this library; if not, write to the Free Software    22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA    23     24 --------    25     26 Classes which process field collections, producing instance documents. Each    27 field entry consists of a field name mapped to a string value, where the field    28 name may have the following formats:    29     30     /name1$n1/name2    31     /name1$n1/name2$n2/name3    32     /name1$n1/name2$n2/name3$n3/name4    33     ...    34     35 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1)    36 in the entire element list, whose elements may have different names. For    37 example:    38     39     /zoo$1/name    40     /zoo$1/cage$1/name    41     /zoo$1/cage$2/name    42     /zoo$1/funding$3/contributor$1/name    43     44 Where multiple values can be collected for a given field, the following notation    45 is employed:    46     47     /package$1/categories$1/category$$value    48     49 Some fields may contain the "=" string. This string is reserved and all text    50 following it is meant to specify a path into a particular document. For example:    51     52     _action_add_animal=/zoo$1/cage$2    53 """    54     55 import Constants    56 import libxml2dom    57 from xml.dom import EMPTY_NAMESPACE    58     59 class FieldsError(Exception):    60     pass    61     62 class FieldProcessor:    63     64     """    65     A class which converts fields in the documented form to XML    66     instance documents.    67     """    68     69     def __init__(self, encoding="utf-8", values_are_lists=0):    70     71         """    72         Initialise the fields processor with the given 'encoding',    73         which is optional and which only applies to field data in    74         Python string form (and not Unicode objects).    75     76         If the optional 'values_are_lists' parameter is set to true    77         then each actual field value will be obtained by taking the    78         first element from each supplied field value.    79         """    80     81         self.encoding = encoding    82         self.values_are_lists = values_are_lists    83     84     def complete_documents(self, documents, fields):    85     86         """    87         Complete the given 'documents' using the 'fields' items list.    88         """    89     90         for field, value in fields:    91     92             # Ignore selectors.    93     94             if field.find(Constants.selector_indicator) != -1:    95                 continue    96     97             model_name, components = self._get_model_name_and_components(field)    98             if model_name is None:    99                 continue   100    101             # Get a new instance document if none has been made for the   102             # model.   103    104             if not documents.has_key(model_name):   105                 documents[model_name] = self.new_instance(model_name)   106             node = documents[model_name]   107    108             # Traverse the components within the instance.   109    110             for component in components:   111                 t = component.split(Constants.pair_separator)   112                 if len(t) == 1:   113    114                     # Convert from lists if necessary.   115    116                     if self.values_are_lists:   117                         value = value[0]   118    119                     # Convert the value to Unicode if necessary.   120    121                     if type(value) == type(""):   122                         value = unicode(value, encoding=self.encoding)   123    124                     # Remove CR characters.   125    126                     node.setAttributeNS(EMPTY_NAMESPACE, t[0], value.replace("\r", ""))   127                     break   128    129                 elif len(t) == 2:   130    131                     # Convert from one-based indexing (the position()   132                     # function) to zero-based indexing.   133    134                     name, index = t[0], int(t[1]) - 1   135                     if index < 0:   136                         break   137                     try:   138                         node = self._enter_element(node, name, index)   139                     except FieldsError, exc:   140                         raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % (   141                             field, name, index, exc.args[0])   142    143                 elif len(t) == 3 and t[1] == "":   144    145                     # Multivalued fields.   146    147                     if not self.values_are_lists:   148                         values = [value]   149                     else:   150                         values = value   151    152                     name = t[0]   153                     for subvalue in values:   154                         subnode = self._append_element(node, name)   155    156                         # Convert the value to Unicode if necessary.   157    158                         if type(subvalue) == type(""):   159                             subvalue = unicode(subvalue, encoding=self.encoding)   160    161                         # Remove CR characters.   162    163                         subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue.replace("\r", ""))   164    165     def complete_selectors(self, selectors, fields, documents, create):   166    167         """   168         Fill in the given 'selectors' dictionary using the given   169         'fields' so that it contains mappings from selector names to   170         parts of the specified 'documents'. If 'create' is set to a   171         true value, selected elements will be created if not already   172         present; otherwise, ignore such selectors.   173         """   174    175         for field, value in fields:   176    177             # Process selectors only.   178    179             selector_components = field.split(Constants.selector_indicator)   180             if len(selector_components) < 2:   181                 continue   182    183             # Get the selector name and path.   184             # Note that the joining of the components uses the separator,   185             # but the separator really should not exist in the path.   186    187             selector_name = selector_components[0]   188             path = Constants.selector_indicator.join(selector_components[1:])   189    190             model_name, components = self._get_model_name_and_components(path)   191             if model_name is None:   192                 continue   193    194             # Go to the instance element.   195    196             if not documents.has_key(model_name) or documents[model_name] is None:   197                 continue   198     199             node = documents[model_name]   200    201             # Traverse the path to find the part of the document to be   202             # selected.   203    204             for component in components:   205                 t = component.split(Constants.pair_separator)   206                 if len(t) == 1:   207    208                     # Select attribute.   209    210                     node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0])   211                     break   212    213                 elif len(t) == 2:   214    215                     # Convert from one-based indexing (the position() function)   216                     # to zero-based indexing.   217    218                     name, index = t[0], int(t[1]) - 1   219                     if index < 0:   220                         break   221    222                     # If create is set, create selected elements.   223    224                     if create:   225                         try:   226                             node = self._enter_element(node, name, index)   227                         except FieldsError, exc:   228                             raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % (   229                                 field, name, index, exc.args[0])   230    231                     # Where a node cannot be found, do not create a selector.   232    233                     else:   234                         node = self._find_element(node, name, index)   235                         if node is None:   236                             break   237    238             if not selectors.has_key(selector_name):   239                 selectors[selector_name] = []   240             if node is not None:   241                 selectors[selector_name].append(node)   242    243     def _append_element(self, node, name):   244    245         """   246         Within 'node' append an element with the given 'name'.   247         """   248    249         new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name)   250         node.appendChild(new_node)   251         return new_node   252    253     def _enter_element(self, node, name, index):   254    255         """   256         From 'node' enter the element with the given 'name' at the   257         given 'index' position amongst the child elements. Create   258         missing child elements if necessary.   259         """   260    261         self._ensure_elements(node, index)   262    263         elements = node.xpath("*")   264         if elements[index].localName == "placeholder":   265             new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name)   266             node.replaceChild(new_node, elements[index])   267         else:   268             new_node = elements[index]   269             if new_node.localName != name:   270                 raise FieldsError, (new_node.localName, name, elements, index)   271    272         # Enter the newly-created element.   273    274         return new_node   275    276     def _find_element(self, node, name, index):   277    278         """   279         From 'node' find the element with the given 'name' at the   280         given 'index' position amongst the child elements. Return   281         None if no such element exists.   282         """   283    284         elements = node.xpath("*")   285         try:   286             new_node = elements[index]   287             if new_node.localName != name:   288                 return None   289         except IndexError:   290             return None   291         return new_node   292    293     def _get_model_name_and_components(self, field):   294    295         """   296         From 'field', return the model name and components which   297         describe the path within the instance document associated   298         with that model.   299         """   300    301         # Get the components of the field name.   302         # Example:  /name1#n1/name2#n2/name3   303         # Expected: ['', 'name1#n1', 'name2#n2', 'name3']   304    305         components = field.split(Constants.path_separator)   306         if len(components) < 2:   307             return None, None   308    309         # Extract the model name from the top-level element   310         # specification.   311         # Expected: ['name1', 'n1']   312    313         model_name_and_index = components[1].split(Constants.pair_separator)   314         if len(model_name_and_index) != 2:   315             return None, None   316    317         # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3']   318    319         return model_name_and_index[0], components[1:]   320    321     def _ensure_elements(self, document, index):   322    323         """   324         In the given 'document', extend the child elements list   325         so that a node can be stored at the given 'index'.   326         """   327    328         elements = document.xpath("*")   329         i = len(elements)   330         while i <= index:   331             new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder")   332             document.appendChild(new_node)   333             i += 1   334    335     def make_documents(self, fields):   336    337         """   338         Make a dictionary mapping model names to new documents prepared   339         from the given 'fields' dictionary.   340         """   341    342         documents = {}   343         self.complete_documents(documents, fields)   344    345         # Fix the dictionary to return the actual document root.   346    347         for model_name, instance_root in documents.items():   348             documents[model_name] = instance_root   349         return documents   350    351     def get_selectors(self, fields, documents, create=0):   352    353         """   354         Get a dictionary containing a mapping of selector names to   355         selected parts of the given 'documents'. If 'create' is set   356         to a true value, selected elements will be created if not   357         already present.   358         """   359    360         selectors = {}   361         self.complete_selectors(selectors, fields, documents, create)   362         return selectors   363    364     def new_instance(self, name):   365    366         "Return an instance root of the given 'name' in a new document."   367    368         return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None)   369    370     # An alias for the older method name.   371    372     new_document = new_instance   373    374 # NOTE: Legacy name exposure.   375    376 Fields = FieldProcessor   377    378 class Form(FieldProcessor):   379    380     "A collection of documents processed from form fields."   381    382     def __init__(self, *args, **kw):   383    384         """   385         Initialise the form data container with the general 'args' and 'kw'   386         parameters.   387         """   388    389         FieldProcessor.__init__(self, *args, **kw)   390         self.parameters = {}   391         self.documents = {}   392    393     def set_parameters(self, parameters):   394    395         "Set the request 'parameters' (or fields) in the container."   396    397         self.parameters = parameters   398         self.documents = self.make_documents(self.parameters.items())   399    400     def get_parameters(self):   401    402         """   403         Get the request parameters (or fields) from the container. Note that   404         these parameters comprise the raw form field values submitted in a   405         request rather than the structured form data.   406    407         Return a dictionary mapping parameter names to values.   408         """   409    410         return self.parameters   411    412     def get_documents(self):   413    414         """   415         Get the form data documents from the container, returning a dictionary   416         mapping document names to DOM-style document objects.   417         """   418    419         return self.documents   420    421     def get_document(self, name):   422    423         """   424         Get the form data document with the given 'name' from the container,   425         returning a DOM-style document object if such a document exists, or None   426         if no such document can be found.   427         """   428    429         return self.documents.get(name)   430    431     def get_selectors(self, create=0):   432    433         """   434         Get the form data selectors from the container, returning a dictionary   435         mapping selector names to collections of selected elements. If 'create'   436         is set to a true value (unlike the default), the selected elements will   437         be created in the form data document if not already present.   438         """   439    440         return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents, create)   441    442     def get_selector(self, name, create=0):   443    444         """   445         Get the form data selectors for the given 'name', returning a collection   446         of selected elements. If 'create' is set to a true value (unlike the   447         default), the selected elements will be created in the form data   448         document if not already present.   449         """   450    451         parameters = []   452         for parameter_name, value in parameters.items():   453             if parameter_name.startswith(name + Constants.selector_indicator):   454                 parameters.append((parameter_name, value))   455         return FieldProcessor.get_selectors(self, parameters, self.documents, create)   456    457     def new_instance(self, name):   458    459         """   460         Make a new document with the given 'name', storing it in the container   461         and returning the document.   462         """   463    464         doc = FieldProcessor.new_instance(self, name)   465         self.documents[name] = doc   466         return doc   467    468     # An alias for the older method name.   469    470     new_document = new_instance   471    472     def set_document(self, name, doc):   473    474         """   475         Store in the container under the given 'name' the supplied document   476         'doc'.   477         """   478    479         self.documents[name] = doc   480    481 if __name__ == "__main__":   482    483     items = [   484             ("_action_update", "Some value"),   485             ("_action_delete=/zoo$1/cage$2", "Some value"),   486             ("_action_nasty=/zoo$1/cage$3", "Some value"),   487             ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input.   488             ("/zoo$1/name", "The Zoo ???"),   489             ("/zoo$1/cage$1/name", "reptiles"),   490             ("/zoo$1/cage$1/capacity", "5"),   491             ("/zoo$1/cage$1/animal$1/name", "Monty"),   492             ("/zoo$1/cage$1/animal$1/species$1/name", "Python"),   493             ("/zoo$1/cage$1/animal$1/property$2/name", "texture"),   494             ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"),   495             ("/zoo$1/cage$1/animal$1/property$3/name", "length"),   496             ("/zoo$1/cage$1/animal$1/property$3/value", "5m"),   497             ("/zoo$1/cage$1/animal$2/name", "Vincent"),   498             ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"),   499             ("/zoo$1/cage$1/animal$2/property$2/name", "colour"),   500             ("/zoo$1/cage$1/animal$2/property$2/value", "variable"),   501             ("/zoo$1/cage$1/animal$2/property$3/name", "length"),   502             ("/zoo$1/cage$1/animal$2/property$3/value", "1m"),   503             ("/zoo$1/cage$2/name", "mammals"),   504             ("/zoo$1/cage$2/capacity", "25"),   505             ("/zoo$1/cage$2/animal$1/name", "Simon"),   506             ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"),   507             ("/zoo$1/cage$2/animal$2/name", "Leonard"),   508             ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"),   509             ("/zoo$1/cage$2/animal$2/property$2/name", "danger"),   510             ("/zoo$1/cage$2/animal$2/property$2/value", "high"),   511             ("/zoo$1/funding$3/type", "private"),   512             ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"),   513             ("/zoo$1/funding$3/contributor$1/amount", "543210.987"),   514             ("/zoo$1/funding$3/contributor$1/industry$$type", "animals")   515         ]   516    517     import time   518     import sys, cmdsyntax   519    520     # Find the documents.   521    522     syntax = cmdsyntax.Syntax("""   523         --plain-output=OUTPUT_FILE   524         --instance-name=NAME   525         """)   526    527     syntax_matches = syntax.get_args(sys.argv[1:])   528    529     try:   530         args = syntax_matches[0]   531     except IndexError:   532         print syntax.syntax   533         sys.exit(1)   534    535     # Create an object to interpret the test data.   536    537     fields = FieldProcessor("iso-8859-1")   538    539     t = time.time()   540     documents = fields.make_documents(items)   541     print "Building time", time.time() - t   542    543     t = time.time()   544     documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8")   545     print "Prettyprinting time", time.time() - t   546    547     print "Selectors", repr(fields.get_selectors(items, documents))   548    549 # vim: tabstop=4 expandtab shiftwidth=4