1.1 --- a/iixr/fields.py Fri Oct 02 00:22:10 2009 +0200
1.2 +++ b/iixr/fields.py Sat Oct 03 03:03:32 2009 +0200
1.3 @@ -21,6 +21,8 @@
1.4 from iixr.files import *
1.5 from bisect import bisect_right # to find terms in the dictionary index
1.6
1.7 +DOCUMENT_CACHE_LIMIT = 10000
1.8 +
1.9 class FieldWriter(FileWriter):
1.10
1.11 "Writing field data to files."
1.12 @@ -176,6 +178,7 @@
1.13 self.field_reader = field_reader
1.14 self.field_index_reader = field_index_reader
1.15
1.16 + self.cache = {}
1.17 self.docs = []
1.18 try:
1.19 while 1:
1.20 @@ -219,6 +222,9 @@
1.21
1.22 "Read the fields of the document with the given 'docnum'."
1.23
1.24 + if self.cache.has_key(docnum):
1.25 + return self.cache[docnum]
1.26 +
1.27 i = bisect_right(self.docs, (docnum, self.max_offset)) - 1
1.28
1.29 # Get the entry position providing the term or one preceding it.
1.30 @@ -243,6 +249,16 @@
1.31 # If the document is found, return the fields.
1.32
1.33 if docnum == found_docnum:
1.34 +
1.35 + # Store the fields in the cache, removing entries if the limit has
1.36 + # been reached.
1.37 +
1.38 + keys = self.cache.keys()
1.39 +
1.40 + if len(keys) == DOCUMENT_CACHE_LIMIT:
1.41 + del self.cache[keys[0]]
1.42 +
1.43 + self.cache[docnum] = fields
1.44 return fields
1.45 else:
1.46 return None