1 #!/usr/bin/env python 2 3 """ 4 Reader and accessor classes for indexing. 5 6 Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 class TextFile: 22 23 "A wrapper around text files." 24 25 def __init__(self, f, accessor): 26 self.f = f 27 self.accessor = accessor 28 29 def seek(self, pos): 30 self.f.seek(pos) 31 32 def get_records(self): 33 return FileIterator(self.f.xreadlines(), self.accessor) 34 35 class FileIterator: 36 37 "An iterator over records employing record accessors." 38 39 def __init__(self, records, accessor): 40 self.records = records 41 self.accessor = accessor 42 self.iterator = None 43 44 def __iter__(self): 45 self.iterator = iter(self.records) 46 return self 47 48 def next(self): 49 if self.iterator is None: 50 iter(self) 51 record = self.iterator.next() 52 return self.accessor.get_key(record), record 53 54 class DelimitedRecord: 55 56 "An accessor using a delimiter to split a record." 57 58 def __init__(self, keys=None, delimiter=None, numeric=0): 59 60 """ 61 Initialise the accessor using a sequence of 'keys' indicating the 62 columns in each record that provide the values in the eventual compound 63 key provided by each record, along with a 'delimiter' indicating how 64 such columns are identified. If 'numeric' is set to a true value, keys 65 will be interpreted as numbers. 66 """ 67 68 self.keys = keys or [0] 69 self.delimiter = delimiter 70 self.numeric = numeric 71 72 # Define a conversion method. 73 74 self.convert = numeric and self.convert_numeric or (lambda x: x) 75 76 def convert_numeric(self, term): 77 return map(int, term) 78 79 def get_key(self, record): 80 values = record.split(self.delimiter) 81 return self.convert([values[key] for key in self.keys]) 82 83 def get_sort_command(self): 84 return "sort%s%s%s" % ( 85 self.delimiter and (" -t $'%s'" % repr(self.delimiter)[1:-1]) or "", 86 self.numeric and " -n" or "", 87 "".join([(" -k %d,%d" % (key + 1, key + 1)) for key in self.keys]) 88 ) 89 90 # vim: tabstop=4 expandtab shiftwidth=4