imip-agent (file imiptools/text.py at 68e9e6ca2284)

     1 #!/usr/bin/env python     2      3 """     4 Parsing of textual content.     5      6 Copyright (C) 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 import codecs    23 import re    24     25 # Parsing of lines to obtain functions and arguments.    26     27 line_pattern_str = (    28                    r"(?:"    29                    r"(?:'(.*?)')" # single-quoted text    30                    r"|"    31                    r'(?:"(.*?)")' # double-quoted text    32                    r"|"    33                    r"([^\s]+)"    # non-whitespace characters    34                    r")+"    35                    r"(?:\s+|$)"   # optional trailing whitespace before line end    36                    )    37     38 line_pattern = re.compile(line_pattern_str)    39     40 def parse_line(text):    41     42     """    43     Parse the given 'text', returning a list of words separated by whitespace in    44     the input, where whitespace may occur inside words if quoted using single or    45     double quotes.    46     47     Hello world     -> ['Hello', 'world']    48     Hello ' world'  -> ['Hello', ' world']    49     Hello' 'world   -> ["'Hello'", "'world']    50     """    51     52     parts = []    53     54     # Match the components of each part.    55     56     for match in line_pattern.finditer(text):    57     58         # Combine the components by traversing the matching groups.    59     60         parts.append(reduce(lambda a, b: (a or "") + (b or ""), match.groups()))    61     62     return parts    63     64 # Parsing of tabular files.    65     66 def set_defaults(t, empty_defaults):    67     68     """    69     In the list 't', replace values that are empty or absent with defaults    70     provided by the 'empty_defaults' collection whose entries are of the form    71     (index, value).    72     """    73     74     for i, default in empty_defaults:    75         if i >= len(t):    76             t += [None] * (i - len(t) + 1)    77         if not t[i]:    78             t[i] = default    79     return t    80     81 def get_table(filename, empty_defaults=None, tab_separated=True):    82     83     """    84     From the file having the given 'filename', return a list of tuples    85     representing the file's contents.    86     87     The 'empty_defaults' is a list of (index, value) tuples indicating the    88     default value where a column either does not exist or provides an empty    89     value.    90     91     If 'tab_separated' is specified and is a false value, line parsing using    92     the imiptools.text.parse_line function will be performed instead of    93     splitting each line of the file using tab characters as separators.    94     """    95     96     f = codecs.open(filename, "rb", encoding="utf-8")    97     try:    98         return get_table_from_stream(f, empty_defaults, tab_separated)    99     finally:   100         f.close()   101    102 def get_table_from_stream(f, empty_defaults=None, tab_separated=True):   103    104     """   105     Return a list of tuples representing the contents of the stream 'f'.   106    107     The 'empty_defaults' is a list of (index, value) tuples indicating the   108     default value where a column either does not exist or provides an empty   109     value.   110    111     If 'tab_separated' is specified and is a false value, line parsing using   112     the imiptools.text.parse_line function will be performed instead of   113     splitting each line of the file using tab characters as separators.   114     """   115    116     l = []   117    118     for line in f.readlines():   119         line = line.strip(" \r\n")   120    121         if tab_separated:   122             t = line.split("\t")   123         else:   124             t = parse_line(line)   125    126         if empty_defaults:   127             t = set_defaults(t, empty_defaults)   128         l.append(tuple(t))   129    130     return l   131    132 # vim: tabstop=4 expandtab shiftwidth=4