imip-agent

vContent.py

0:4bc0a825daad
2014-09-21 Paul Boddie An iMIP agent for Postfix.
     1 #!/usr/bin/env python     2      3 """     4 Parsing of vCard, vCalendar and iCalendar files.     5      6 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011, 2013 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20     21 --------    22     23 References:    24     25 RFC 5545: Internet Calendaring and Scheduling Core Object Specification    26           (iCalendar)    27           http://tools.ietf.org/html/rfc5545    28     29 RFC 2445: Internet Calendaring and Scheduling Core Object Specification    30           (iCalendar)    31           http://tools.ietf.org/html/rfc2445    32     33 RFC 2425: A MIME Content-Type for Directory Information    34           http://tools.ietf.org/html/rfc2425    35     36 RFC 2426: vCard MIME Directory Profile    37           http://tools.ietf.org/html/rfc2426    38 """    39     40 try:    41     set    42 except NameError:    43     from sets import Set as set    44     45 # Encoding-related imports.    46     47 import base64, quopri    48 import codecs    49     50 # Tokenisation help.    51     52 import re    53     54 # Configuration.    55     56 default_encoding = "utf-8"    57     58 # Reader and parser classes.    59     60 class Reader:    61     62     "A simple class wrapping a file, providing simple pushback capabilities."    63     64     def __init__(self, f, non_standard_newline=0):    65     66         """    67         Initialise the object with the file 'f'. If 'non_standard_newline' is    68         set to a true value (unlike the default), lines ending with CR will be    69         treated as complete lines.    70         """    71     72         self.f = f    73         self.non_standard_newline = non_standard_newline    74         self.lines = []    75         self.line_number = 1 # about to read line 1    76     77     def close(self):    78     79         "Close the reader."    80     81         self.f.close()    82     83     def pushback(self, line):    84     85         """    86         Push the given 'line' back so that the next line read is actually the    87         given 'line' and not the next line from the underlying file.    88         """    89     90         self.lines.append(line)    91         self.line_number -= 1    92     93     def readline(self):    94     95         """    96         If no pushed-back lines exist, read a line directly from the file.    97         Otherwise, read from the list of pushed-back lines.    98         """    99    100         self.line_number += 1   101         if self.lines:   102             return self.lines.pop()   103         else:   104             # Sanity check for broken lines (\r instead of \r\n or \n).   105             line = self.f.readline()   106             while line.endswith("\r") and not self.non_standard_newline:   107                 s = self.f.readline()   108                 if not s:   109                     break   110                 line += s   111             if line.endswith("\r") and self.non_standard_newline:   112                 return line + "\n"   113             else:   114                 return line   115    116     def read_content_line(self):   117    118         """   119         Read an entire content line, itself potentially consisting of many   120         physical lines of text, returning a string.   121         """   122    123         # Skip blank lines.   124    125         line = self.readline()   126         while line:   127             line_stripped = line.rstrip("\r\n")   128             if not line_stripped:   129                 line = self.readline()   130             else:   131                 break   132         else:   133             return ""   134    135         # Strip all appropriate whitespace from the right end of each line.   136         # For subsequent lines, remove the first whitespace character.   137         # See section 4.1 of the iCalendar specification.   138    139         lines = [line_stripped]   140    141         line = self.readline()   142         while line.startswith(" ") or line.startswith("\t"):   143             lines.append(line[1:].rstrip("\r\n"))   144             line = self.readline()   145    146         # Since one line too many will have been read, push the line back into   147         # the file.   148    149         if line:   150             self.pushback(line)   151    152         return "".join(lines)   153    154     def get_content_line(self):   155    156         "Return a content line object for the current line."   157    158         return ContentLine(self.read_content_line())   159    160 class ContentLine:   161    162     "A content line which can be searched."   163    164     SEPARATORS = re.compile('[;:"]')   165     SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]')   166    167     def __init__(self, text):   168         self.text = text   169         self.start = 0   170    171     def __repr__(self):   172         return "ContentLine(%r)" % self.text   173    174     def get_remaining(self):   175    176         "Get the remaining text from the content line."   177    178         return self.text[self.start:]   179    180     def search(self, targets):   181    182         """   183         Find one of the 'targets' in the text, returning the string from the   184         current position up to the target found, along with the target string,   185         using a tuple of the form (string, target). If no target was found,   186         return the entire string together with a target of None.   187    188         The 'targets' parameter must be a regular expression object or an object   189         compatible with the API of such objects.   190         """   191    192         text = self.text   193         start = pos = self.start   194         length = len(text)   195    196         # Remember the first target.   197    198         first = None   199         first_pos = None   200         in_quoted_region = 0   201    202         # Process the text, looking for the targets.   203    204         while pos < length:   205             match = targets.search(text, pos)   206    207             # Where nothing matches, end the search.   208    209             if match is None:   210                 pos = length   211    212             # Where a double quote matches, toggle the region state.   213    214             elif match.group() == '"':   215                 in_quoted_region = not in_quoted_region   216                 pos = match.end()   217    218             # Where something else matches outside a region, stop searching.   219    220             elif not in_quoted_region:   221                 first = match.group()   222                 first_pos = match.start()   223                 break   224    225             # Otherwise, keep looking for the end of the region.   226    227             else:   228                 pos = match.end()   229    230         # Where no more input can provide the targets, return a special result.   231    232         else:   233             self.start = length   234             return text[start:], None   235    236         self.start = match.end()   237         return text[start:first_pos], first   238    239 class StreamParser:   240    241     "A stream parser for content in vCard/vCalendar/iCalendar-like formats."   242    243     def __init__(self, f):   244    245         "Initialise the parser for the given file 'f'."   246    247         self.f = f   248    249     def close(self):   250    251         "Close the reader."   252    253         self.f.close()   254    255     def __iter__(self):   256    257         "Return self as the iterator."   258    259         return self   260    261     def next(self):   262    263         """   264         Return the next content item in the file as a tuple of the form   265         (name, parameters, values).   266         """   267    268         return self.parse_content_line()   269    270     def decode_content(self, value):   271    272         "Decode the given 'value', replacing quoted characters."   273    274         return value.replace("\r", "").replace("\\N", "\n").replace("\\n", "\n")   275    276     # Internal methods.   277    278     def parse_content_line(self):   279    280         """   281         Return the name, parameters and value information for the current   282         content line in the file being parsed.   283         """   284    285         f = self.f   286         line_number = f.line_number   287         line = f.get_content_line()   288    289         # Read the property name.   290    291         name, sep = line.search(line.SEPARATORS)   292         name = name.strip()   293    294         if not name and sep is None:   295             raise StopIteration   296    297         # Read the parameters.   298    299         parameters = {}   300    301         while sep == ";":   302    303             # Find the actual modifier.   304    305             parameter_name, sep = line.search(line.SEPARATORS_PLUS_EQUALS)   306             parameter_name = parameter_name.strip()   307    308             if sep == "=":   309                 parameter_value, sep = line.search(line.SEPARATORS)   310                 parameter_value = parameter_value.strip()   311             else:   312                 parameter_value = None   313    314             # Append a key, value tuple to the parameters list.   315    316             parameters[parameter_name] = parameter_value   317    318         # Get the value content.   319    320         if sep != ":":   321             raise ValueError, (line_number, line)   322    323         # Obtain and decode the value.   324    325         value = self.decode(name, parameters, line.get_remaining())   326    327         return name, parameters, value   328    329     def decode(self, name, parameters, value):   330    331         "Decode using 'name' and 'parameters' the given 'value'."   332    333         encoding = parameters.get("ENCODING")   334         charset = parameters.get("CHARSET")   335    336         value = self.decode_content(value)   337    338         if encoding == "QUOTED-PRINTABLE":   339             return unicode(quopri.decodestring(value), charset or "iso-8859-1")   340         elif encoding == "BASE64":   341             return base64.decodestring(value)   342         else:   343             return value   344    345 class ParserBase:   346    347     "An abstract parser for content in vCard/vCalendar/iCalendar-like formats."   348    349     def __init__(self):   350    351         "Initialise the parser."   352    353         self.names = []   354    355     def parse(self, f, parser_cls=None):   356    357         "Parse the contents of the file 'f'."   358    359         parser = (parser_cls or StreamParser)(f)   360    361         for name, parameters, value in parser:   362    363             if name == "BEGIN":   364                 self.names.append(value)   365                 self.startComponent(value, parameters)   366    367             elif name == "END":   368                 start_name = self.names.pop()   369                 if start_name != value:   370                     raise ParseError, "Mismatch in BEGIN and END declarations (%r and %r) at line %d." % (   371                         start_name, value, f.line_number)   372    373                 self.endComponent(value)   374    375             else:   376                 self.handleProperty(name, parameters, value)   377    378 class Parser(ParserBase):   379    380     "A SAX-like parser for vCard/vCalendar/iCalendar-like formats."   381    382     def __init__(self):   383         ParserBase.__init__(self)   384         self.components = []   385    386     def startComponent(self, name, parameters):   387    388         """   389         Add the component with the given 'name' and 'parameters', recording an   390         empty list of children as part of the component's content.   391         """   392    393         component = self.handleProperty(name, parameters)   394         self.components.append(component)   395         return component   396    397     def endComponent(self, name):   398    399         """   400         End the component with the given 'name' by removing it from the active   401         component stack. If only one component exists on the stack, retain it   402         for later inspection.   403         """   404    405         if len(self.components) > 1:   406             return self.components.pop()   407    408         # Or return the only element.   409    410         elif self.components:   411             return self.components[0]   412    413     def handleProperty(self, name, parameters, value=None):   414    415         """   416         Record the property with the given 'name', 'parameters' and optional   417         'value' as part of the current component's children.   418         """   419    420         component = self.makeComponent(name, parameters, value)   421         self.attachComponent(component)   422         return component   423    424     # Component object construction/manipulation methods.   425    426     def attachComponent(self, component):   427    428         "Attach the given 'component' to its parent."   429    430         if self.components:   431             component_name, component_parameters, component_children = self.components[-1]   432             component_children.append(component)   433    434     def makeComponent(self, name, parameters, value=None):   435    436         """   437         Make a component object from the given 'name', 'parameters' and optional   438         'value'.   439         """   440    441         return (name, parameters, value or [])   442    443     # Public methods.   444    445     def parse(self, f, parser_cls=None):   446    447         "Parse the contents of the file 'f'."   448    449         ParserBase.parse(self, f, parser_cls)   450         return self.components[0]   451    452 # Writer classes.   453    454 class Writer:   455    456     "A simple class wrapping a file, providing simple output capabilities."   457    458     default_line_length = 76   459    460     def __init__(self, write, line_length=None):   461    462         """   463         Initialise the object with the given 'write' operation. If 'line_length'   464         is set, the length of written lines will conform to the specified value   465         instead of the default value.    466         """   467    468         self._write = write   469         self.line_length = line_length or self.default_line_length   470         self.char_offset = 0   471    472     def write(self, text):   473    474         "Write the 'text' to the file."   475    476         write = self._write   477         line_length = self.line_length   478    479         i = 0   480         remaining = len(text)   481    482         while remaining:   483             space = line_length - self.char_offset   484             if remaining > space:   485                 write(text[i:i + space])   486                 write("\r\n ")   487                 self.char_offset = 1   488                 i += space   489                 remaining -= space   490             else:   491                 write(text[i:])   492                 self.char_offset += remaining   493                 i += remaining   494                 remaining = 0   495    496     def end_line(self):   497    498         "End the current content line."   499    500         if self.char_offset > 0:   501             self.char_offset = 0   502             self._write("\r\n")   503    504 class StreamWriter:   505    506     "A stream writer for content in vCard/vCalendar/iCalendar-like formats."   507    508     def __init__(self, f):   509    510         "Initialise the stream writer with the given 'f' stream object."   511    512         self.f = f   513    514     def write(self, name, parameters, value):   515    516         """   517         Write a content line, serialising the given 'name', 'parameters' and   518         'value' information.   519         """   520    521         self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value))   522    523     # Internal methods.   524    525     def write_content_line(self, name, encoded_parameters, encoded_value):   526    527         """   528         Write a content line for the given 'name', 'encoded_parameters' and   529         'encoded_value' information.   530         """   531    532         f = self.f   533    534         f.write(name)   535         for param_name, param_value in encoded_parameters.items():   536             f.write(";")   537             f.write(param_name)   538             f.write("=")   539             f.write(param_value)   540         f.write(":")   541         f.write(encoded_value)   542         f.end_line()   543    544     def encode_quoted_parameter_value(self, value):   545    546         "Encode the given 'value'."   547    548         return '"%s"' % value   549    550     def encode_value(self, name, parameters, value):   551    552         """   553         Encode using 'name' and 'parameters' the given 'value' so that the   554         resulting encoded form employs any specified character encodings.   555         """   556    557         encoding = parameters.get("ENCODING")   558         charset = parameters.get("CHARSET")   559    560         if encoding == "QUOTED-PRINTABLE":   561             value = quopri.encodestring(value.encode(charset or "iso-8859-1"))   562         elif encoding == "BASE64":   563             value = base64.encodestring(value)   564    565         return self.encode_content(value)   566    567     # Overrideable methods.   568    569     def encode_parameters(self, parameters):   570    571         """   572         Encode the given 'parameters' according to the vCalendar specification.   573         """   574    575         encoded_parameters = {}   576    577         for param_name, param_value in parameters.items():   578    579             # Basic format support merely involves quoting values which seem to   580             # need it. Other more specific formats may define exactly which   581             # parameters should be quoted.   582    583             if ContentLine.SEPARATORS.search(param_value):   584                 param_value = self.encode_quoted_parameter_value(param_value)   585    586             encoded_parameters[param_name] = param_value   587    588         return encoded_parameters   589    590     def encode_content(self, value):   591    592         "Encode the given 'value', quoting characters."   593    594         return value.replace("\n", "\\n")   595    596 # Utility functions.   597    598 def is_input_stream(stream_or_string):   599     return hasattr(stream_or_string, "read")   600    601 def get_input_stream(stream_or_string, encoding=None):   602     if is_input_stream(stream_or_string):   603         return stream_or_string   604     else:   605         return codecs.open(stream_or_string, encoding=(encoding or default_encoding))   606    607 def get_output_stream(stream_or_string, encoding=None):   608     if hasattr(stream_or_string, "write"):   609         return stream_or_string   610     else:   611         return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding))   612    613 # Public functions.   614    615 def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):   616    617     """   618     Parse the resource data found through the use of the 'stream_or_string',   619     which is either a stream providing Unicode data (the codecs module can be   620     used to open files or to wrap streams in order to provide Unicode data) or a   621     filename identifying a file to be parsed.   622    623     The optional 'encoding' can be used to specify the character encoding used   624     by the file to be parsed.   625    626     The optional 'non_standard_newline' can be set to a true value (unlike the   627     default) in order to attempt to process files with CR as the end of line   628     character.   629    630     As a result of parsing the resource, the root node of the imported resource   631     is returned.   632     """   633    634     stream = get_input_stream(stream_or_string, encoding)   635     reader = Reader(stream, non_standard_newline)   636    637     # Parse using the reader.   638    639     try:   640         parser = (parser_cls or Parser)()   641         return parser.parse(reader)   642    643     # Close any opened streams.   644    645     finally:   646         if not is_input_stream(stream_or_string):   647             reader.close()   648    649 def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):   650    651     """   652     Parse the resource data found through the use of the 'stream_or_string',   653     which is either a stream providing Unicode data (the codecs module can be   654     used to open files or to wrap streams in order to provide Unicode data) or a   655     filename identifying a file to be parsed.   656    657     The optional 'encoding' can be used to specify the character encoding used   658     by the file to be parsed.   659    660     The optional 'non_standard_newline' can be set to a true value (unlike the   661     default) in order to attempt to process files with CR as the end of line   662     character.   663    664     An iterator is returned which provides event tuples describing parsing   665     events of the form (name, parameters, value).   666     """   667    668     stream = get_input_stream(stream_or_string, encoding)   669     reader = Reader(stream, non_standard_newline)   670     parser = (parser_cls or StreamParser)(reader)   671     return parser   672    673 def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None, writer_cls=None):   674    675     """   676     Return a writer which will either send data to the resource found through   677     the use of 'stream_or_string' or using the given 'write' operation.   678    679     The 'stream_or_string' parameter may be either a stream accepting Unicode   680     data (the codecs module can be used to open files or to wrap streams in   681     order to accept Unicode data) or a filename identifying a file to be   682     written.   683    684     The optional 'encoding' can be used to specify the character encoding used   685     by the file to be written.   686    687     The optional 'line_length' can be used to specify how long lines should be   688     in the resulting data.   689     """   690    691     if stream_or_string:   692         stream = get_output_stream(stream_or_string, encoding)   693         _writer = Writer(stream.write, line_length)   694     elif write:   695         _writer = Writer(write, line_length)   696     else:   697         raise IOError, "No stream, filename or write operation specified."   698    699     return (writer_cls or StreamWriter)(_writer)   700    701 # vim: tabstop=4 expandtab shiftwidth=4