1.1 --- a/simplex/__init__.py Sat Oct 01 15:07:17 2011 +0200
1.2 +++ b/simplex/__init__.py Sat Oct 01 16:01:22 2011 +0200
1.3 @@ -101,9 +101,15 @@
1.4 """
1.5
1.6 for record in reader.get_records():
1.7 - if term == accessor.get_key(record):
1.8 + key = accessor.get_key(record)
1.9 + if term == key:
1.10 return record
1.11
1.12 + # Short-circuit failed searches.
1.13 +
1.14 + elif term < key:
1.15 + return None
1.16 +
1.17 return None
1.18
1.19 def groups(l, length):
2.1 --- a/simplex/readers.py Sat Oct 01 15:07:17 2011 +0200
2.2 +++ b/simplex/readers.py Sat Oct 01 16:01:22 2011 +0200
2.3 @@ -35,20 +35,25 @@
2.4
2.5 "An accessor using a delimiter to split a record."
2.6
2.7 - def __init__(self, keys=None, delimiter=None):
2.8 + def __init__(self, keys=None, delimiter=None, numeric=0):
2.9
2.10 """
2.11 Initialise the accessor using a sequence of 'keys' indicating the
2.12 columns in each record that provide the values in the eventual compound
2.13 key provided by each record, along with a 'delimiter' indicating how
2.14 - such columns are identified.
2.15 + such columns are identified. If 'numeric' is set to a true value, keys
2.16 + will be interpreted as numbers.
2.17 """
2.18
2.19 self.keys = keys or [0]
2.20 self.delimiter = delimiter
2.21 + self.convert = numeric and self.convert_numeric or (lambda x: x)
2.22 +
2.23 + def convert_numeric(self, term):
2.24 + return map(int, term)
2.25
2.26 def get_key(self, record):
2.27 values = record.split(self.delimiter)
2.28 - return [values[key] for key in self.keys]
2.29 + return self.convert([values[key] for key in self.keys])
2.30
2.31 # vim: tabstop=4 expandtab shiftwidth=4
3.1 --- a/test_indexed.py Sat Oct 01 15:07:17 2011 +0200
3.2 +++ b/test_indexed.py Sat Oct 01 16:01:22 2011 +0200
3.3 @@ -5,8 +5,8 @@
3.4
3.5 try:
3.6 separator = sys.argv.index("--")
3.7 - filename, interval = sys.argv[1:3]
3.8 - keys = map(int, sys.argv[3:separator])
3.9 + filename, numeric, interval = sys.argv[1:4]
3.10 + keys = map(int, sys.argv[4:separator])
3.11 terms = groups(sys.argv[separator+1:], len(keys))
3.12 except (IndexError, ValueError):
3.13 print >>sys.stderr, "Usage: %s <filename> <interval> <key>... -- <term value>..." % sys.argv[0]
3.14 @@ -14,7 +14,7 @@
3.15
3.16 f = open(filename)
3.17 reader = TextFile(f)
3.18 -accessor = DelimitedRecord(keys)
3.19 +accessor = DelimitedRecord(keys, numeric=(numeric == "true"))
3.20 try:
3.21 t = time.time()
3.22 l = make_index(reader, accessor, int(interval))
3.23 @@ -24,7 +24,7 @@
3.24
3.25 for term in terms:
3.26 t = time.time()
3.27 - line = find_with_index(reader, accessor, l, term)
3.28 + line = find_with_index(reader, accessor, l, accessor.convert(term))
3.29 if line:
3.30 print "Found (at %s seconds)...\n%s" % (time.time() - t, line)
3.31
4.1 --- a/test_scan.py Sat Oct 01 15:07:17 2011 +0200
4.2 +++ b/test_scan.py Sat Oct 01 16:01:22 2011 +0200
4.3 @@ -5,8 +5,8 @@
4.4
4.5 try:
4.6 separator = sys.argv.index("--")
4.7 - filename = sys.argv[1]
4.8 - keys = map(int, sys.argv[2:separator])
4.9 + filename, numeric = sys.argv[1:3]
4.10 + keys = map(int, sys.argv[3:separator])
4.11 terms = groups(sys.argv[separator+1:], len(keys))
4.12 except (IndexError, ValueError):
4.13 print >>sys.stderr, "Usage: %s <filename> <key>... -- <term value>..." % sys.argv[0]
4.14 @@ -14,13 +14,13 @@
4.15
4.16 f = open(filename)
4.17 reader = TextFile(f)
4.18 -accessor = DelimitedRecord(keys)
4.19 +accessor = DelimitedRecord(keys, numeric=(numeric == "true"))
4.20 try:
4.21 for term in terms:
4.22 reader.seek(0)
4.23
4.24 t = time.time()
4.25 - line = find_in_file(reader, accessor, term)
4.26 + line = find_in_file(reader, accessor, accessor.convert(term))
4.27 if line:
4.28 print "Found (at %s seconds)...\n%s" % (time.time() - t, line)
4.29 finally: