1.1 --- a/iixr/index.py Sat Sep 19 01:43:35 2009 +0200
1.2 +++ b/iixr/index.py Sat Sep 19 21:36:32 2009 +0200
1.3 @@ -234,14 +234,41 @@
1.4 self.reader = IndexReader(self.pathname)
1.5 return self.reader
1.6
1.7 + def _get_partitions(self, prefix):
1.8 +
1.9 + """
1.10 + Return a set of partition identifiers using 'prefix' to identify
1.11 + relevant files.
1.12 + """
1.13 +
1.14 + prefix_length = len(prefix)
1.15 +
1.16 + partitions = set()
1.17 + for filename in listdir(self.pathname):
1.18 + if filename.startswith(prefix):
1.19 + partitions.add(filename[prefix_length:])
1.20 + return partitions
1.21 +
1.22 + def get_term_partitions(self):
1.23 +
1.24 + "Return a set of term partition identifiers."
1.25 +
1.26 + return self._get_partitions("terms-")
1.27 +
1.28 + def get_field_partitions(self):
1.29 +
1.30 + "Return a set of field partition identifiers."
1.31 +
1.32 + return self._get_partitions("fields-")
1.33 +
1.34 def merge(self):
1.35
1.36 "Merge/optimise index partitions."
1.37
1.38 - self.merge_terms()
1.39 - self.merge_fields()
1.40 + self._merge_terms()
1.41 + self._merge_fields()
1.42
1.43 - def merge_terms(self, interval=TERM_INTERVAL, doc_interval=DOCUMENT_INTERVAL):
1.44 + def _merge_terms(self, interval=TERM_INTERVAL, doc_interval=DOCUMENT_INTERVAL):
1.45
1.46 """
1.47 Merge term dictionaries using the given indexing 'interval' and
1.48 @@ -249,13 +276,10 @@
1.49 """
1.50
1.51 readers = []
1.52 - partitions = set()
1.53 + partitions = self.get_term_partitions()
1.54
1.55 - for filename in listdir(self.pathname):
1.56 - if filename.startswith("terms-"): # 6 character prefix
1.57 - partition = filename[6:]
1.58 - readers.append(get_term_reader(self.pathname, partition))
1.59 - partitions.add(partition)
1.60 + for partition in partitions:
1.61 + readers.append(get_term_reader(self.pathname, partition))
1.62
1.63 # Write directly to a dictionary.
1.64
1.65 @@ -280,18 +304,15 @@
1.66 if partition != "merged":
1.67 rename_term_files(self.pathname, partition, "merged")
1.68
1.69 - def merge_fields(self, interval=FIELD_INTERVAL):
1.70 + def _merge_fields(self, interval=FIELD_INTERVAL):
1.71
1.72 "Merge field dictionaries using the given indexing 'interval'."
1.73
1.74 readers = []
1.75 - partitions = set()
1.76 + partitions = self.get_field_partitions()
1.77
1.78 - for filename in listdir(self.pathname):
1.79 - if filename.startswith("fields-"): # 7 character prefix
1.80 - partition = filename[7:]
1.81 - readers.append(get_field_reader(self.pathname, partition))
1.82 - partitions.add(partition)
1.83 + for partition in partitions:
1.84 + readers.append(get_field_reader(self.pathname, partition))
1.85
1.86 # Write directly to a dictionary.
1.87
1.88 @@ -316,6 +337,18 @@
1.89 if partition != "merged":
1.90 rename_field_files(self.pathname, partition, "merged")
1.91
1.92 + def update(self, other_indexes):
1.93 +
1.94 + "Copy the content of the 'other_indexes' into this index and merge."
1.95 +
1.96 + for i, index in enumerate(other_indexes):
1.97 + for partition in index.get_term_partitions():
1.98 + copy_term_files(index.pathname, partition, self.pathname, "-added-%d" % i)
1.99 + for partition in index.get_field_partitions():
1.100 + copy_field_files(index.pathname, partition, self.pathname, "-added-%d" % i)
1.101 +
1.102 + self.merge()
1.103 +
1.104 def close(self):
1.105 if self.reader is not None:
1.106 self.reader.close()