paul@6 | 1 | #!/usr/bin/env python |
paul@6 | 2 | |
paul@6 | 3 | """ |
paul@6 | 4 | Reader and accessor classes for indexing. |
paul@6 | 5 | |
paul@6 | 6 | Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk> |
paul@6 | 7 | |
paul@6 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@6 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@6 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@6 | 11 | version. |
paul@6 | 12 | |
paul@6 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT ANY |
paul@6 | 14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A |
paul@6 | 15 | PARTICULAR PURPOSE. See the GNU General Public License for more details. |
paul@6 | 16 | |
paul@6 | 17 | You should have received a copy of the GNU General Public License along |
paul@6 | 18 | with this program. If not, see <http://www.gnu.org/licenses/>. |
paul@6 | 19 | """ |
paul@6 | 20 | |
paul@10 | 21 | class File: |
paul@6 | 22 | |
paul@10 | 23 | "A generic file wrapper." |
paul@6 | 24 | |
paul@8 | 25 | def __init__(self, f, accessor): |
paul@6 | 26 | self.f = f |
paul@8 | 27 | self.accessor = accessor |
paul@6 | 28 | |
paul@6 | 29 | def seek(self, pos): |
paul@6 | 30 | self.f.seek(pos) |
paul@6 | 31 | |
paul@10 | 32 | class TextFile(File): |
paul@10 | 33 | |
paul@10 | 34 | "A wrapper around text files." |
paul@10 | 35 | |
paul@6 | 36 | def get_records(self): |
paul@10 | 37 | return Iterator(self.f.xreadlines(), self.accessor) |
paul@8 | 38 | |
paul@10 | 39 | class Iterator: |
paul@8 | 40 | |
paul@8 | 41 | "An iterator over records employing record accessors." |
paul@8 | 42 | |
paul@9 | 43 | def __init__(self, records, accessor): |
paul@9 | 44 | self.records = records |
paul@8 | 45 | self.accessor = accessor |
paul@8 | 46 | self.iterator = None |
paul@8 | 47 | |
paul@8 | 48 | def __iter__(self): |
paul@9 | 49 | self.iterator = iter(self.records) |
paul@8 | 50 | return self |
paul@8 | 51 | |
paul@8 | 52 | def next(self): |
paul@8 | 53 | if self.iterator is None: |
paul@8 | 54 | iter(self) |
paul@8 | 55 | record = self.iterator.next() |
paul@8 | 56 | return self.accessor.get_key(record), record |
paul@6 | 57 | |
paul@6 | 58 | class DelimitedRecord: |
paul@6 | 59 | |
paul@6 | 60 | "An accessor using a delimiter to split a record." |
paul@6 | 61 | |
paul@7 | 62 | def __init__(self, keys=None, delimiter=None, numeric=0): |
paul@6 | 63 | |
paul@6 | 64 | """ |
paul@6 | 65 | Initialise the accessor using a sequence of 'keys' indicating the |
paul@6 | 66 | columns in each record that provide the values in the eventual compound |
paul@6 | 67 | key provided by each record, along with a 'delimiter' indicating how |
paul@7 | 68 | such columns are identified. If 'numeric' is set to a true value, keys |
paul@7 | 69 | will be interpreted as numbers. |
paul@6 | 70 | """ |
paul@6 | 71 | |
paul@6 | 72 | self.keys = keys or [0] |
paul@6 | 73 | self.delimiter = delimiter |
paul@9 | 74 | self.numeric = numeric |
paul@8 | 75 | |
paul@8 | 76 | # Define a conversion method. |
paul@8 | 77 | |
paul@7 | 78 | self.convert = numeric and self.convert_numeric or (lambda x: x) |
paul@7 | 79 | |
paul@7 | 80 | def convert_numeric(self, term): |
paul@7 | 81 | return map(int, term) |
paul@6 | 82 | |
paul@6 | 83 | def get_key(self, record): |
paul@6 | 84 | values = record.split(self.delimiter) |
paul@7 | 85 | return self.convert([values[key] for key in self.keys]) |
paul@6 | 86 | |
paul@9 | 87 | def get_sort_command(self): |
paul@9 | 88 | return "sort%s%s%s" % ( |
paul@9 | 89 | self.delimiter and (" -t $'%s'" % repr(self.delimiter)[1:-1]) or "", |
paul@9 | 90 | self.numeric and " -n" or "", |
paul@9 | 91 | "".join([(" -k %d,%d" % (key + 1, key + 1)) for key in self.keys]) |
paul@9 | 92 | ) |
paul@9 | 93 | |
paul@6 | 94 | # vim: tabstop=4 expandtab shiftwidth=4 |