1.1 --- a/simplex/__init__.py Sat Oct 01 16:01:22 2011 +0200
1.2 +++ b/simplex/__init__.py Sat Oct 01 16:40:29 2011 +0200
1.3 @@ -31,13 +31,12 @@
1.4 from simplex.readers import *
1.5 import bisect
1.6
1.7 -def make_index(reader, accessor, interval):
1.8 +def make_index(reader, interval):
1.9
1.10 """
1.11 - Index a resource whose 'reader' provides records and whose 'accessor' can
1.12 - yield the key for such records, creating an index entry for a record after a
1.13 - given number of records, defined by 'interval', have been read since the
1.14 - last entry was produced.
1.15 + Index a resource whose 'reader' provides records and keys, creating an index
1.16 + entry for a record after a given number of records, defined by 'interval',
1.17 + have been read since the last entry was produced.
1.18 """
1.19
1.20 l = []
1.21 @@ -46,8 +45,7 @@
1.22 current_key = None
1.23 start_pos = 0
1.24
1.25 - for i, record in enumerate(reader.get_records()):
1.26 - key = accessor.get_key(record)
1.27 + for i, (key, record) in enumerate(reader.get_records()):
1.28
1.29 # Where duplicate keys are permitted, the first record employing the key
1.30 # must be available as an index entry. Otherwise, records preceding the
1.31 @@ -65,13 +63,12 @@
1.32
1.33 return l
1.34
1.35 -def find_with_index(reader, accessor, l, term):
1.36 +def find_with_index(reader, l, term):
1.37
1.38 """
1.39 - Find in the resource whose 'reader' provides records and whose 'accessor'
1.40 - can yield the key for such records, using the given index list 'l', the
1.41 - given 'term', returning a record employing the term or None if no such
1.42 - record was found.
1.43 + In the resource whose 'reader' provides records and keys, using the given
1.44 + index list 'l', find the given 'term', returning a record employing the term
1.45 + or None if no such record was found.
1.46 """
1.47
1.48 i = bisect.bisect_left(l, (term, None))
1.49 @@ -90,18 +87,17 @@
1.50 found, pos = l[i]
1.51
1.52 reader.seek(pos)
1.53 - return find_in_file(reader, accessor, term)
1.54 + return find_in_file(reader, term)
1.55
1.56 -def find_in_file(reader, accessor, term):
1.57 +def find_in_file(reader, term):
1.58
1.59 """
1.60 - Find in the resource whose 'reader' provides records and whose 'accessor'
1.61 - can yield the key for such records, the given 'term', returning a record
1.62 - employing the term or None if no such record was found.
1.63 + In the resource whose 'reader' provides records and keys, find the given
1.64 + 'term', returning a record employing the term or None if no such record was
1.65 + found.
1.66 """
1.67
1.68 - for record in reader.get_records():
1.69 - key = accessor.get_key(record)
1.70 + for key, record in reader.get_records():
1.71 if term == key:
1.72 return record
1.73
2.1 --- a/simplex/readers.py Sat Oct 01 16:01:22 2011 +0200
2.2 +++ b/simplex/readers.py Sat Oct 01 16:40:29 2011 +0200
2.3 @@ -22,14 +22,34 @@
2.4
2.5 "A wrapper around text files."
2.6
2.7 - def __init__(self, f):
2.8 + def __init__(self, f, accessor):
2.9 self.f = f
2.10 + self.accessor = accessor
2.11
2.12 def seek(self, pos):
2.13 self.f.seek(pos)
2.14
2.15 def get_records(self):
2.16 - return self.f.xreadlines()
2.17 + return FileIterator(self.f, self.accessor)
2.18 +
2.19 +class FileIterator:
2.20 +
2.21 + "An iterator over records employing record accessors."
2.22 +
2.23 + def __init__(self, resource, accessor):
2.24 + self.resource = resource
2.25 + self.accessor = accessor
2.26 + self.iterator = None
2.27 +
2.28 + def __iter__(self):
2.29 + self.iterator = iter(self.resource.xreadlines())
2.30 + return self
2.31 +
2.32 + def next(self):
2.33 + if self.iterator is None:
2.34 + iter(self)
2.35 + record = self.iterator.next()
2.36 + return self.accessor.get_key(record), record
2.37
2.38 class DelimitedRecord:
2.39
2.40 @@ -47,6 +67,9 @@
2.41
2.42 self.keys = keys or [0]
2.43 self.delimiter = delimiter
2.44 +
2.45 + # Define a conversion method.
2.46 +
2.47 self.convert = numeric and self.convert_numeric or (lambda x: x)
2.48
2.49 def convert_numeric(self, term):
3.1 --- a/test_indexed.py Sat Oct 01 16:01:22 2011 +0200
3.2 +++ b/test_indexed.py Sat Oct 01 16:40:29 2011 +0200
3.3 @@ -13,18 +13,18 @@
3.4 sys.exit(1)
3.5
3.6 f = open(filename)
3.7 -reader = TextFile(f)
3.8 accessor = DelimitedRecord(keys, numeric=(numeric == "true"))
3.9 +reader = TextFile(f, accessor)
3.10 try:
3.11 t = time.time()
3.12 - l = make_index(reader, accessor, int(interval))
3.13 + l = make_index(reader, int(interval))
3.14 print "Indexed in %s seconds." % (time.time() - t)
3.15
3.16 # Now use the index.
3.17
3.18 for term in terms:
3.19 t = time.time()
3.20 - line = find_with_index(reader, accessor, l, accessor.convert(term))
3.21 + line = find_with_index(reader, l, accessor.convert(term))
3.22 if line:
3.23 print "Found (at %s seconds)...\n%s" % (time.time() - t, line)
3.24
4.1 --- a/test_scan.py Sat Oct 01 16:01:22 2011 +0200
4.2 +++ b/test_scan.py Sat Oct 01 16:40:29 2011 +0200
4.3 @@ -13,14 +13,14 @@
4.4 sys.exit(1)
4.5
4.6 f = open(filename)
4.7 -reader = TextFile(f)
4.8 accessor = DelimitedRecord(keys, numeric=(numeric == "true"))
4.9 +reader = TextFile(f, accessor)
4.10 try:
4.11 for term in terms:
4.12 reader.seek(0)
4.13
4.14 t = time.time()
4.15 - line = find_in_file(reader, accessor, accessor.convert(term))
4.16 + line = find_in_file(reader, accessor.convert(term))
4.17 if line:
4.18 print "Found (at %s seconds)...\n%s" % (time.time() - t, line)
4.19 finally: