# HG changeset patch # User Paul Boddie # Date 1251322562 -7200 # Node ID 247345e7c82fbd8513674fe492fbc4645fe9a604 # Parent 17ffd3434b27adec5c26106ce6ea186a1e9f08ab Added position readers and writers to term dictionary instances, permitting the reading and writing of term and position data. diff -r 17ffd3434b27 -r 247345e7c82f iixr.py --- a/iixr.py Wed Aug 26 22:51:29 2009 +0200 +++ b/iixr.py Wed Aug 26 23:36:02 2009 +0200 @@ -356,9 +356,10 @@ "Writing term dictionaries." - def __init__(self, info_writer, index_writer, interval): + def __init__(self, info_writer, index_writer, position_writer, interval): self.info_writer = info_writer self.index_writer = index_writer + self.position_writer = position_writer self.interval = interval self.entry = 0 @@ -376,17 +377,29 @@ self.entry += 1 + def write_term_positions(self, term, doc_positions): + + """ + Write the given 'term' and the 'doc_positions' recording the documents + and positions at which the term is found. + """ + + offset = self.position_writer.write_all_positions(doc_positions) + self.write_term(term, offset) + def close(self): self.info_writer.close() self.index_writer.close() + self.position_writer.close() class TermDictionaryReader: "Reading term dictionaries." - def __init__(self, info_reader, index_reader): + def __init__(self, info_reader, index_reader, position_reader): self.info_reader = info_reader self.index_reader = index_reader + self.position_reader = position_reader self.terms = [] try: @@ -436,8 +449,19 @@ else: return None + def find_positions(self, term): + + "Return the documents and positions at which the given 'term' is found." + + offset = self.find(term) + if offset is None: + return None + else: + return self.position_reader.read_all_positions(offset) + def close(self): self.info_reader.close() self.index_reader.close() + self.position_reader.close() # vim: tabstop=4 expandtab shiftwidth=4 diff -r 17ffd3434b27 -r 247345e7c82f test.py --- a/test.py Wed Aug 26 22:51:29 2009 +0200 +++ b/test.py Wed Aug 26 23:36:02 2009 +0200 @@ -2,7 +2,7 @@ import iixr -numbers = [12345678, 0, 1] +numbers = [12345678, 0, 1, 127, 128, 255, 256] f = open("test", "wb") w = iixr.FileWriter(f) @@ -115,24 +115,65 @@ w = iixr.TermWriter(f) f2 = open("testI", "wb") w2 = iixr.TermIndexWriter(f2) -w3 = iixr.TermDictionaryWriter(w, w2, 3) +f3 = open("testP", "wb") +w3 = iixr.PositionWriter(f3) +wd = iixr.TermDictionaryWriter(w, w2, w3, 3) for term, offset in terms: - w3.write_term(term, offset) -w3.close() + wd.write_term(term, offset) +wd.close() f = open("test", "rb") r = iixr.TermReader(f) f2 = open("testI", "rb") r2 = iixr.TermIndexReader(f2) -r3 = iixr.TermDictionaryReader(r, r2) +f3 = open("testP", "rb") +r3 = iixr.PositionReader(f3) +rd = iixr.TermDictionaryReader(r, r2, r3) terms_reversed = terms[:] terms_reversed.reverse() for term, offset in terms_reversed: - o = r3.find(term) + o = rd.find(term) print offset == o, offset, o for term in ("dog", "dingo"): - o = r3.find(term) + o = rd.find(term) print o is None, o -r3.close() +rd.close() + +terms_with_positions = [ + ("aardvark", [(1, [2, 45, 96]), (20, [13])]), + ("anteater", [(1, [43, 44])]), + ("badger", [(7, [2, 22, 196]), (19, [55, 1333]), (21, [0])]), + ("bull", [(6, [128]), (16, [12])]), + ("bulldog", [(43, [17, 19, 256, 512])]), + ("cat", [(123, [12, 145, 196]), (1200, [113])]) + ] + +f = open("test", "wb") +w = iixr.TermWriter(f) +f2 = open("testI", "wb") +w2 = iixr.TermIndexWriter(f2) +f3 = open("testP", "wb") +w3 = iixr.PositionWriter(f3) +wd = iixr.TermDictionaryWriter(w, w2, w3, 3) +for term, doc_positions in terms_with_positions: + wd.write_term_positions(term, doc_positions) +wd.close() + +f = open("test", "rb") +r = iixr.TermReader(f) +f2 = open("testI", "rb") +r2 = iixr.TermIndexReader(f2) +f3 = open("testP", "rb") +r3 = iixr.PositionReader(f3) +rd = iixr.TermDictionaryReader(r, r2, r3) +terms_reversed = terms_with_positions[:] +terms_reversed.reverse() +for term, doc_positions in terms_reversed: + dp = rd.find_positions(term) + print doc_positions == dp, doc_positions, dp +for term in ("dog", "dingo"): + dp = rd.find_positions(term) + print dp is None, dp +rd.close() # vim: tabstop=4 expandtab shiftwidth=4