paul@6 | 1 | #!/usr/bin/env python |
paul@6 | 2 | |
paul@6 | 3 | """ |
paul@6 | 4 | Reader and accessor classes for indexing. |
paul@6 | 5 | |
paul@6 | 6 | Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk> |
paul@6 | 7 | |
paul@6 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@6 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@6 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@6 | 11 | version. |
paul@6 | 12 | |
paul@6 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT ANY |
paul@6 | 14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A |
paul@6 | 15 | PARTICULAR PURPOSE. See the GNU General Public License for more details. |
paul@6 | 16 | |
paul@6 | 17 | You should have received a copy of the GNU General Public License along |
paul@6 | 18 | with this program. If not, see <http://www.gnu.org/licenses/>. |
paul@6 | 19 | """ |
paul@6 | 20 | |
paul@6 | 21 | class TextFile: |
paul@6 | 22 | |
paul@6 | 23 | "A wrapper around text files." |
paul@6 | 24 | |
paul@8 | 25 | def __init__(self, f, accessor): |
paul@6 | 26 | self.f = f |
paul@8 | 27 | self.accessor = accessor |
paul@6 | 28 | |
paul@6 | 29 | def seek(self, pos): |
paul@6 | 30 | self.f.seek(pos) |
paul@6 | 31 | |
paul@6 | 32 | def get_records(self): |
paul@8 | 33 | return FileIterator(self.f, self.accessor) |
paul@8 | 34 | |
paul@8 | 35 | class FileIterator: |
paul@8 | 36 | |
paul@8 | 37 | "An iterator over records employing record accessors." |
paul@8 | 38 | |
paul@8 | 39 | def __init__(self, resource, accessor): |
paul@8 | 40 | self.resource = resource |
paul@8 | 41 | self.accessor = accessor |
paul@8 | 42 | self.iterator = None |
paul@8 | 43 | |
paul@8 | 44 | def __iter__(self): |
paul@8 | 45 | self.iterator = iter(self.resource.xreadlines()) |
paul@8 | 46 | return self |
paul@8 | 47 | |
paul@8 | 48 | def next(self): |
paul@8 | 49 | if self.iterator is None: |
paul@8 | 50 | iter(self) |
paul@8 | 51 | record = self.iterator.next() |
paul@8 | 52 | return self.accessor.get_key(record), record |
paul@6 | 53 | |
paul@6 | 54 | class DelimitedRecord: |
paul@6 | 55 | |
paul@6 | 56 | "An accessor using a delimiter to split a record." |
paul@6 | 57 | |
paul@7 | 58 | def __init__(self, keys=None, delimiter=None, numeric=0): |
paul@6 | 59 | |
paul@6 | 60 | """ |
paul@6 | 61 | Initialise the accessor using a sequence of 'keys' indicating the |
paul@6 | 62 | columns in each record that provide the values in the eventual compound |
paul@6 | 63 | key provided by each record, along with a 'delimiter' indicating how |
paul@7 | 64 | such columns are identified. If 'numeric' is set to a true value, keys |
paul@7 | 65 | will be interpreted as numbers. |
paul@6 | 66 | """ |
paul@6 | 67 | |
paul@6 | 68 | self.keys = keys or [0] |
paul@6 | 69 | self.delimiter = delimiter |
paul@8 | 70 | |
paul@8 | 71 | # Define a conversion method. |
paul@8 | 72 | |
paul@7 | 73 | self.convert = numeric and self.convert_numeric or (lambda x: x) |
paul@7 | 74 | |
paul@7 | 75 | def convert_numeric(self, term): |
paul@7 | 76 | return map(int, term) |
paul@6 | 77 | |
paul@6 | 78 | def get_key(self, record): |
paul@6 | 79 | values = record.split(self.delimiter) |
paul@7 | 80 | return self.convert([values[key] for key in self.keys]) |
paul@6 | 81 | |
paul@6 | 82 | # vim: tabstop=4 expandtab shiftwidth=4 |