1 #!/usr/bin/env python 2 3 """ 4 Reader and accessor classes for indexing. 5 6 Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 class File: 22 23 "A generic file wrapper." 24 25 def __init__(self, f, accessor): 26 self.f = f 27 self.accessor = accessor 28 29 def seek(self, pos): 30 self.f.seek(pos) 31 32 class TextFile(File): 33 34 "A wrapper around text files." 35 36 def get_records(self): 37 return Iterator(self.f.xreadlines(), self.accessor) 38 39 class Iterator: 40 41 "An iterator over records employing record accessors." 42 43 def __init__(self, records, accessor): 44 self.records = records 45 self.accessor = accessor 46 self.iterator = None 47 48 def __iter__(self): 49 self.iterator = iter(self.records) 50 return self 51 52 def next(self): 53 if self.iterator is None: 54 iter(self) 55 record = self.iterator.next() 56 return self.accessor.get_key(record), record 57 58 class DelimitedRecord: 59 60 "An accessor using a delimiter to split a record." 61 62 def __init__(self, keys=None, delimiter=None, numeric=0): 63 64 """ 65 Initialise the accessor using a sequence of 'keys' indicating the 66 columns in each record that provide the values in the eventual compound 67 key provided by each record, along with a 'delimiter' indicating how 68 such columns are identified. If 'numeric' is set to a true value, keys 69 will be interpreted as numbers. 70 """ 71 72 self.keys = keys or [0] 73 self.delimiter = delimiter 74 self.numeric = numeric 75 76 # Define a conversion method. 77 78 self.convert = numeric and self.convert_numeric or (lambda x: x) 79 80 def convert_numeric(self, term): 81 return map(int, term) 82 83 def get_key(self, record): 84 values = record.split(self.delimiter) 85 return self.convert([values[key] for key in self.keys]) 86 87 def get_sort_command(self): 88 return "sort%s%s%s" % ( 89 self.delimiter and (" -t $'%s'" % repr(self.delimiter)[1:-1]) or "", 90 self.numeric and " -n" or "", 91 "".join([(" -k %d,%d" % (key + 1, key + 1)) for key in self.keys]) 92 ) 93 94 # vim: tabstop=4 expandtab shiftwidth=4