imip-agent (file vContent.py at d559de7bdc6c)

     1 #!/usr/bin/env python     2      3 """     4 Parsing of vCard, vCalendar and iCalendar files.     5      6 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011, 2013,     7               2014, 2015, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21     22 --------    23     24 References:    25     26 RFC 5545: Internet Calendaring and Scheduling Core Object Specification    27           (iCalendar)    28           http://tools.ietf.org/html/rfc5545    29     30 RFC 2445: Internet Calendaring and Scheduling Core Object Specification    31           (iCalendar)    32           http://tools.ietf.org/html/rfc2445    33     34 RFC 2425: A MIME Content-Type for Directory Information    35           http://tools.ietf.org/html/rfc2425    36     37 RFC 2426: vCard MIME Directory Profile    38           http://tools.ietf.org/html/rfc2426    39 """    40     41 try:    42     set    43 except NameError:    44     from sets import Set as set    45     46 # Encoding-related imports.    47     48 import base64, quopri    49 import codecs    50     51 # Tokenisation help.    52     53 import re    54     55 # Configuration.    56     57 default_encoding = "utf-8"    58     59 class ParseError(Exception):    60     61     "General parsing errors."    62     63     pass    64     65 class WriteError(Exception):    66     67     "General writing errors."    68     69     pass    70     71 # Reader and parser classes.    72     73 class Reader:    74     75     "A simple class wrapping a file, providing simple pushback capabilities."    76     77     def __init__(self, f, non_standard_newline=0):    78     79         """    80         Initialise the object with the file 'f'. If 'non_standard_newline' is    81         set to a true value (unlike the default), lines ending with CR will be    82         treated as complete lines.    83         """    84     85         self.f = f    86         self.non_standard_newline = non_standard_newline    87         self.lines = []    88         self.line_number = 1 # about to read line 1    89     90     def close(self):    91     92         "Close the reader."    93     94         self.f.close()    95     96     def pushback(self, line):    97     98         """    99         Push the given 'line' back so that the next line read is actually the   100         given 'line' and not the next line from the underlying file.   101         """   102    103         self.lines.append(line)   104         self.line_number -= 1   105    106     def readline(self):   107    108         """   109         If no pushed-back lines exist, read a line directly from the file.   110         Otherwise, read from the list of pushed-back lines.   111         """   112    113         self.line_number += 1   114         if self.lines:   115             return self.lines.pop()   116         else:   117             # Sanity check for broken lines (\r instead of \r\n or \n).   118             line = self.f.readline()   119             while line.endswith("\r") and not self.non_standard_newline:   120                 s = self.f.readline()   121                 if not s:   122                     break   123                 line += s   124             if line.endswith("\r") and self.non_standard_newline:   125                 return line + "\n"   126             else:   127                 return line   128    129     def read_content_line(self):   130    131         """   132         Read an entire content line, itself potentially consisting of many   133         physical lines of text, returning a string.   134         """   135    136         # Skip blank lines.   137    138         line = self.readline()   139         while line:   140             line_stripped = line.rstrip("\r\n")   141             if not line_stripped:   142                 line = self.readline()   143             else:   144                 break   145         else:   146             return ""   147    148         # Strip all appropriate whitespace from the right end of each line.   149         # For subsequent lines, remove the first whitespace character.   150         # See section 4.1 of the iCalendar specification.   151    152         lines = [line_stripped]   153    154         line = self.readline()   155         while line.startswith(" ") or line.startswith("\t"):   156             lines.append(line[1:].rstrip("\r\n"))   157             line = self.readline()   158    159         # Since one line too many will have been read, push the line back into   160         # the file.   161    162         if line:   163             self.pushback(line)   164    165         return "".join(lines)   166    167     def get_content_line(self):   168    169         "Return a content line object for the current line."   170    171         return ContentLine(self.read_content_line())   172    173 class ContentLine:   174    175     "A content line which can be searched."   176    177     SEPARATORS = re.compile('[;:"]')   178     SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]')   179    180     def __init__(self, text):   181         self.text = text   182         self.start = 0   183    184     def __repr__(self):   185         return "ContentLine(%r)" % self.text   186    187     def get_remaining(self):   188    189         "Get the remaining text from the content line."   190    191         return self.text[self.start:]   192    193     def search(self, targets):   194    195         """   196         Find one of the 'targets' in the text, returning the string from the   197         current position up to the target found, along with the target string,   198         using a tuple of the form (string, target). If no target was found,   199         return the entire string together with a target of None.   200    201         The 'targets' parameter must be a regular expression object or an object   202         compatible with the API of such objects.   203         """   204    205         text = self.text   206         start = pos = self.start   207         length = len(text)   208    209         # Remember the first target.   210    211         first = None   212         first_pos = None   213         in_quoted_region = 0   214    215         # Process the text, looking for the targets.   216    217         while pos < length:   218             match = targets.search(text, pos)   219    220             # Where nothing matches, end the search.   221    222             if match is None:   223                 pos = length   224    225             # Where a double quote matches, toggle the region state.   226    227             elif match.group() == '"':   228                 in_quoted_region = not in_quoted_region   229                 pos = match.end()   230    231             # Where something else matches outside a region, stop searching.   232    233             elif not in_quoted_region:   234                 first = match.group()   235                 first_pos = match.start()   236                 break   237    238             # Otherwise, keep looking for the end of the region.   239    240             else:   241                 pos = match.end()   242    243         # Where no more input can provide the targets, return a special result.   244    245         else:   246             self.start = length   247             return text[start:], None   248    249         self.start = match.end()   250         return text[start:first_pos], first   251    252 class StreamParser:   253    254     "A stream parser for content in vCard/vCalendar/iCalendar-like formats."   255    256     def __init__(self, f):   257    258         "Initialise the parser for the given file 'f'."   259    260         self.f = f   261    262     def close(self):   263    264         "Close the reader."   265    266         self.f.close()   267    268     def __iter__(self):   269    270         "Return self as the iterator."   271    272         return self   273    274     def next(self):   275    276         """   277         Return the next content item in the file as a tuple of the form   278         (name, parameters, values).   279         """   280    281         return self.parse_content_line()   282    283     def decode_content(self, name, value):   284    285         """   286         Decode for property 'name' the given 'value', replacing quoted   287         characters.   288         """   289    290         return value.replace("\r", "").replace("\\N", "\n").replace("\\n", "\n")   291    292     # Internal methods.   293    294     def parse_content_line(self):   295    296         """   297         Return the name, parameters and value information for the current   298         content line in the file being parsed.   299         """   300    301         f = self.f   302         line_number = f.line_number   303         line = f.get_content_line()   304    305         # Read the property name.   306    307         name, sep = line.search(line.SEPARATORS)   308         name = name.strip()   309    310         if not name and sep is None:   311             raise StopIteration   312    313         # Read the parameters.   314    315         parameters = {}   316    317         while sep == ";":   318    319             # Find the actual modifier.   320    321             parameter_name, sep = line.search(line.SEPARATORS_PLUS_EQUALS)   322             parameter_name = parameter_name.strip()   323    324             if sep == "=":   325                 parameter_value, sep = line.search(line.SEPARATORS)   326                 parameter_value = parameter_value.strip()   327             else:   328                 parameter_value = None   329    330             # Append a key, value tuple to the parameters list.   331    332             parameters[parameter_name] = parameter_value   333    334         # Get the value content.   335    336         if sep != ":":   337             raise ValueError, (line_number, line)   338    339         # Obtain and decode the value.   340    341         value = self.decode(name, parameters, line.get_remaining())   342    343         return name, parameters, value   344    345     def decode(self, name, parameters, value):   346    347         "Decode using 'name' and 'parameters' the given 'value'."   348    349         encoding = parameters.get("ENCODING")   350         charset = parameters.get("CHARSET")   351    352         value = self.decode_content(name, value)   353    354         if encoding == "QUOTED-PRINTABLE":   355             return unicode(quopri.decodestring(value), charset or "iso-8859-1")   356         elif encoding == "BASE64":   357             return base64.decodestring(value)   358         else:   359             return value   360    361 class ParserBase:   362    363     "An abstract parser for content in vCard/vCalendar/iCalendar-like formats."   364    365     def __init__(self):   366    367         "Initialise the parser."   368    369         self.names = []   370    371     def parse(self, f, parser_cls=None):   372    373         "Parse the contents of the file 'f'."   374    375         parser = (parser_cls or StreamParser)(f)   376    377         for name, parameters, value in parser:   378    379             if name == "BEGIN":   380                 self.names.append(value)   381                 self.startComponent(value, parameters)   382    383             elif name == "END":   384                 start_name = self.names.pop()   385                 if start_name != value:   386                     raise ParseError, "Mismatch in BEGIN and END declarations (%r and %r) at line %d." % (   387                         start_name, value, f.line_number)   388    389                 self.endComponent(value)   390    391             else:   392                 self.handleProperty(name, parameters, value)   393    394 class Parser(ParserBase):   395    396     "A SAX-like parser for vCard/vCalendar/iCalendar-like formats."   397    398     def __init__(self):   399         ParserBase.__init__(self)   400         self.components = []   401    402     def startComponent(self, name, parameters):   403    404         """   405         Add the component with the given 'name' and 'parameters', recording an   406         empty list of children as part of the component's content.   407         """   408    409         component = self.handleProperty(name, parameters)   410         self.components.append(component)   411         return component   412    413     def endComponent(self, name):   414    415         """   416         End the component with the given 'name' by removing it from the active   417         component stack. If only one component exists on the stack, retain it   418         for later inspection.   419         """   420    421         if len(self.components) > 1:   422             return self.components.pop()   423    424         # Or return the only element.   425    426         elif self.components:   427             return self.components[0]   428    429     def handleProperty(self, name, parameters, value=None):   430    431         """   432         Record the property with the given 'name', 'parameters' and optional   433         'value' as part of the current component's children.   434         """   435    436         component = self.makeComponent(name, parameters, value)   437         self.attachComponent(component)   438         return component   439    440     # Component object construction/manipulation methods.   441    442     def attachComponent(self, component):   443    444         "Attach the given 'component' to its parent."   445    446         if self.components:   447             component_name, component_parameters, component_children = self.components[-1]   448             component_children.append(component)   449    450     def makeComponent(self, name, parameters, value=None):   451    452         """   453         Make a component object from the given 'name', 'parameters' and optional   454         'value'.   455         """   456    457         return (name, parameters, value or [])   458    459     # Public methods.   460    461     def parse(self, f, parser_cls=None):   462    463         "Parse the contents of the file 'f'."   464    465         ParserBase.parse(self, f, parser_cls)   466         try:   467             return self.components[0]   468         except IndexError:   469             raise ParseError, "No vContent component found in file."   470    471 # Writer classes.   472    473 class Writer:   474    475     "A simple class wrapping a file, providing simple output capabilities."   476    477     default_line_length = 76   478    479     def __init__(self, write, line_length=None):   480    481         """   482         Initialise the object with the given 'write' operation. If 'line_length'   483         is set, the length of written lines will conform to the specified value   484         instead of the default value.    485         """   486    487         self._write = write   488         self.line_length = line_length or self.default_line_length   489         self.char_offset = 0   490    491     def write(self, text):   492    493         "Write the 'text' to the file."   494    495         write = self._write   496         line_length = self.line_length   497    498         i = 0   499         remaining = len(text)   500    501         while remaining:   502             space = line_length - self.char_offset   503             if remaining > space:   504                 write(text[i:i + space])   505                 write("\r\n ")   506                 self.char_offset = 1   507                 i += space   508                 remaining -= space   509             else:   510                 write(text[i:])   511                 self.char_offset += remaining   512                 i += remaining   513                 remaining = 0   514    515     def end_line(self):   516    517         "End the current content line."   518    519         if self.char_offset > 0:   520             self.char_offset = 0   521             self._write("\r\n")   522    523 class StreamWriter:   524    525     "A stream writer for content in vCard/vCalendar/iCalendar-like formats."   526    527     def __init__(self, f):   528    529         "Initialise the stream writer with the given 'f' stream object."   530    531         self.f = f   532    533     def append(self, record):   534         self.write(*record)   535    536     def write(self, name, parameters, value):   537    538         """   539         Write a content line, serialising the given 'name', 'parameters' and   540         'value' information.   541         """   542    543         self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value))   544    545     # Internal methods.   546    547     def write_content_line(self, name, encoded_parameters, encoded_value):   548    549         """   550         Write a content line for the given 'name', 'encoded_parameters' and   551         'encoded_value' information.   552         """   553    554         f = self.f   555    556         f.write(name)   557         for param_name, param_value in encoded_parameters.items():   558             f.write(";")   559             f.write(param_name)   560             f.write("=")   561             f.write(param_value)   562         f.write(":")   563         f.write(encoded_value)   564         f.end_line()   565    566     def encode_quoted_parameter_value(self, value):   567    568         "Encode the given 'value'."   569    570         return '"%s"' % value   571    572     def encode_value(self, name, parameters, value):   573    574         """   575         Encode using 'name' and 'parameters' the given 'value' so that the   576         resulting encoded form employs any specified character encodings.   577         """   578    579         encoding = parameters.get("ENCODING")   580         charset = parameters.get("CHARSET")   581    582         try:   583             if encoding == "QUOTED-PRINTABLE":   584                 value = quopri.encodestring(value.encode(charset or "iso-8859-1"))   585             elif encoding == "BASE64":   586                 value = base64.encodestring(value)   587    588             return self.encode_content(name, value)   589         except TypeError:   590             raise WriteError, "Property %r value with parameters %r cannot be encoded: %r" % (name, parameters, value)   591    592     # Overrideable methods.   593    594     def encode_parameters(self, parameters):   595    596         """   597         Encode the given 'parameters' according to the vCalendar specification.   598         """   599    600         encoded_parameters = {}   601    602         for param_name, param_value in parameters.items():   603    604             # Basic format support merely involves quoting values which seem to   605             # need it. Other more specific formats may define exactly which   606             # parameters should be quoted.   607    608             if ContentLine.SEPARATORS.search(param_value):   609                 param_value = self.encode_quoted_parameter_value(param_value)   610    611             encoded_parameters[param_name] = param_value   612    613         return encoded_parameters   614    615     def encode_content(self, name, value):   616    617         "Encode for property 'name' the given 'value', quoting characters."   618    619         return (value or "").replace("\n", "\\n")   620    621 # Utility functions.   622    623 def is_input_stream(stream_or_string):   624     return hasattr(stream_or_string, "read")   625    626 def get_input_stream(stream_or_string, encoding=None):   627     if is_input_stream(stream_or_string):   628         if isinstance(stream_or_string, codecs.StreamReader):   629             return stream_or_string   630         else:   631             return codecs.getreader(encoding or default_encoding)(stream_or_string)   632     else:   633         return codecs.open(stream_or_string, encoding=(encoding or default_encoding))   634    635 def get_output_stream(stream_or_string, encoding=None):   636     if hasattr(stream_or_string, "write"):   637         if isinstance(stream_or_string, codecs.StreamWriter):   638             return stream_or_string   639         else:   640             return codecs.getwriter(encoding or default_encoding)(stream_or_string)   641     else:   642         return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding))   643    644 def items_to_dict(items, sections=None):   645    646     """   647     Return the given 'items' as a dictionary mapping names to tuples of the form   648     (value, attributes). Where 'sections' is provided, only items whose names   649     occur in the given 'sections' collection will be treated as groups or   650     sections of definitions.   651     """   652    653     d = {}   654     for name, attr, value in items:   655         if not d.has_key(name):   656             d[name] = []   657         if isinstance(value, list) and (not sections or name in sections):   658             d[name].append((items_to_dict(value, sections), attr))   659         else:   660             d[name].append((value, attr))   661     return d   662    663 def dict_to_items(d):   664    665     """   666     Return 'd' converted to a list of items suitable for serialisation using   667     iterwrite.   668     """   669    670     items = []   671     for name, value in d.items():   672         if isinstance(value, list):   673             for v, a in value:   674                 if isinstance(v, dict):   675                     items.append((name, a, dict_to_items(v)))   676                 else:   677                     items.append((name, a, v))   678         else:   679             v, a = value   680             items.append((name, a, dict_to_items(v)))   681     return items   682    683 # Public functions.   684    685 def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):   686    687     """   688     Parse the resource data found through the use of the 'stream_or_string',   689     which is either a stream providing Unicode data (the codecs module can be   690     used to open files or to wrap streams in order to provide Unicode data) or a   691     filename identifying a file to be parsed.   692    693     The optional 'encoding' can be used to specify the character encoding used   694     by the file to be parsed.   695    696     The optional 'non_standard_newline' can be set to a true value (unlike the   697     default) in order to attempt to process files with CR as the end of line   698     character.   699    700     As a result of parsing the resource, the root node of the imported resource   701     is returned.   702     """   703    704     stream = get_input_stream(stream_or_string, encoding)   705     reader = Reader(stream, non_standard_newline)   706    707     # Parse using the reader.   708    709     try:   710         parser = (parser_cls or Parser)()   711         return parser.parse(reader)   712    713     # Close any opened streams.   714    715     finally:   716         if not is_input_stream(stream_or_string):   717             reader.close()   718    719 def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):   720    721     """   722     Parse the resource data found through the use of the 'stream_or_string',   723     which is either a stream providing Unicode data (the codecs module can be   724     used to open files or to wrap streams in order to provide Unicode data) or a   725     filename identifying a file to be parsed.   726    727     The optional 'encoding' can be used to specify the character encoding used   728     by the file to be parsed.   729    730     The optional 'non_standard_newline' can be set to a true value (unlike the   731     default) in order to attempt to process files with CR as the end of line   732     character.   733    734     An iterator is returned which provides event tuples describing parsing   735     events of the form (name, parameters, value).   736     """   737    738     stream = get_input_stream(stream_or_string, encoding)   739     reader = Reader(stream, non_standard_newline)   740     parser = (parser_cls or StreamParser)(reader)   741     return parser   742    743 def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None, writer_cls=None):   744    745     """   746     Return a writer which will either send data to the resource found through   747     the use of 'stream_or_string' or using the given 'write' operation.   748    749     The 'stream_or_string' parameter may be either a stream accepting Unicode   750     data (the codecs module can be used to open files or to wrap streams in   751     order to accept Unicode data) or a filename identifying a file to be   752     written.   753    754     The optional 'encoding' can be used to specify the character encoding used   755     by the file to be written.   756    757     The optional 'line_length' can be used to specify how long lines should be   758     in the resulting data.   759     """   760    761     if stream_or_string:   762         stream = get_output_stream(stream_or_string, encoding)   763         _writer = Writer(stream.write, line_length)   764     elif write:   765         _writer = Writer(write, line_length)   766     else:   767         raise IOError, "No stream, filename or write operation specified."   768    769     return (writer_cls or StreamWriter)(_writer)   770    771 def to_dict(node, sections=None):   772    773     "Return the 'node' converted to a dictionary representation."   774    775     name, attr, items = node   776     return {name : (isinstance(items, list) and items_to_dict(items, sections) or items, attr)}   777    778 def to_node(d):   779    780     "Return 'd' converted to a items-based representation."   781    782     return dict_to_items(d)[0]   783    784 # vim: tabstop=4 expandtab shiftwidth=4