# HG changeset patch # User Paul Boddie # Date 1317494955 -7200 # Node ID 8795a00cfb265c33316a994ed63128e1dff959a3 # Parent ce60e75cb65b88934e93582ac35fc3e989762625 Separated value conversion from the extraction of individual key values. diff -r ce60e75cb65b -r 8795a00cfb26 simplex/accessors.py --- a/simplex/accessors.py Sat Oct 01 19:33:58 2011 +0200 +++ b/simplex/accessors.py Sat Oct 01 20:49:15 2011 +0200 @@ -22,26 +22,20 @@ "An accessor using a delimiter to split a record." - def __init__(self, keys=None, delimiter=None, numeric=0): + def __init__(self, keys=None, delimiter=None, converter=None): """ Initialise the accessor using a sequence of 'keys' indicating the columns in each record that provide the values in the eventual compound key provided by each record, along with a 'delimiter' indicating how - such columns are identified. If 'numeric' is set to a true value, keys - will be interpreted as numbers. + such columns are identified. If 'converter' is specified, this will be + used to convert the retrieved data. """ self.keys = keys or [0] self.delimiter = delimiter - self.numeric = numeric - - # Define a conversion method. - - self.convert = numeric and self.convert_numeric or (lambda x: x) - - def convert_numeric(self, term): - return map(int, term) + self.converter = converter + self.convert = converter and converter.convert or (lambda x: x) def get_key(self, record): values = record.split(self.delimiter) @@ -56,8 +50,18 @@ return "sort%s%s%s" % ( self.delimiter and (" -t $'%s'" % repr(self.delimiter)[1:-1]) or "", - self.numeric and " -n" or "", + self.converter and self.converter.get_sort_options() or "", "".join([(" -k %d,%d" % (key + 1, key + 1)) for key in self.keys]) ) +class ConvertNumeric: + + "Convert numeric values to integers." + + def convert(self, term): + return map(int, term) + + def get_sort_options(self): + return " -n" + # vim: tabstop=4 expandtab shiftwidth=4 diff -r ce60e75cb65b -r 8795a00cfb26 test_indexed.py --- a/test_indexed.py Sat Oct 01 19:33:58 2011 +0200 +++ b/test_indexed.py Sat Oct 01 20:49:15 2011 +0200 @@ -13,8 +13,11 @@ sys.exit(1) f = open(filename) -accessor = DelimitedRecord(keys, numeric=(numeric == "true")) +accessor = DelimitedRecord(keys, converter=(numeric == "true" and ConvertNumeric() or None)) reader = TextFile(f, Iterator(accessor)) + +print "Sort command:", accessor.get_sort_command() + try: t = time.time() l = make_index(reader, int(interval)) diff -r ce60e75cb65b -r 8795a00cfb26 test_scan.py --- a/test_scan.py Sat Oct 01 19:33:58 2011 +0200 +++ b/test_scan.py Sat Oct 01 20:49:15 2011 +0200 @@ -13,8 +13,11 @@ sys.exit(1) f = open(filename) -accessor = DelimitedRecord(keys, numeric=(numeric == "true")) +accessor = DelimitedRecord(keys, converter=(numeric == "true" and ConvertNumeric() or None)) reader = TextFile(f, Iterator(accessor)) + +print "Sort command:", accessor.get_sort_command() + try: for term in terms: reader.seek(0)