1.1 --- a/test.py Sat Aug 29 21:15:47 2009 +0200
1.2 +++ b/test.py Sat Aug 29 22:12:25 2009 +0200
1.3 @@ -54,9 +54,8 @@
1.4 w = iixr.PositionWriter(f)
1.5 offsets = []
1.6 for doc_positions in all_doc_positions:
1.7 - offsets.append(
1.8 - w.write_all_positions(doc_positions)
1.9 - )
1.10 + offset, frequency = w.write_all_positions(doc_positions)
1.11 + offsets.append(offset)
1.12 w.close()
1.13
1.14 f = open("test", "rb")
1.15 @@ -145,51 +144,55 @@
1.16 # Test terms.
1.17
1.18 terms = [
1.19 - ("aardvark", 100000123),
1.20 - ("anteater", 100000456),
1.21 - ("badger", 100000789),
1.22 - ("bull", 1000001234),
1.23 - ("bulldog", 1000002345),
1.24 - ("cat", 1000003456)
1.25 + # term offset frequency
1.26 + ("aardvark", 100000123, 1),
1.27 + ("anteater", 100000456, 2),
1.28 + ("badger", 100000789, 13),
1.29 + ("bull", 1000001234, 59),
1.30 + ("bulldog", 1000002345, 99),
1.31 + ("cat", 1000003456, 89)
1.32 ]
1.33
1.34 f = open("test", "wb")
1.35 w = iixr.TermWriter(f)
1.36 -for term, offset in terms:
1.37 - w.write_term(term, offset)
1.38 +for term, offset, frequency in terms:
1.39 + w.write_term(term, offset, frequency)
1.40 w.close()
1.41
1.42 f = open("test", "rb")
1.43 r = iixr.TermReader(f)
1.44 -for term, offset in terms:
1.45 - t, o = r.read_term()
1.46 +for term, offset, frequency in terms:
1.47 + t, o, fr = r.read_term()
1.48 print term == t, term, t
1.49 print offset == o, offset, o
1.50 + print frequency == fr, frequency, fr
1.51 r.close()
1.52
1.53 # Test terms in index files.
1.54
1.55 indexed_terms = [
1.56 - ("aardvark", 100000123, 200000321),
1.57 - ("anteater", 100000456, 200000654),
1.58 - ("badger", 100000789, 200000987),
1.59 - ("bull", 1000001234, 200004321),
1.60 - ("bulldog", 1000002345, 200005432),
1.61 - ("cat", 1000003456, 200006543)
1.62 + # term offset frequency info_offset
1.63 + ("aardvark", 100000123, 1, 200000321),
1.64 + ("anteater", 100000456, 2, 200000654),
1.65 + ("badger", 100000789, 13, 200000987),
1.66 + ("bull", 1000001234, 59, 200004321),
1.67 + ("bulldog", 1000002345, 99, 200005432),
1.68 + ("cat", 1000003456, 89, 200006543)
1.69 ]
1.70
1.71 f = open("test", "wb")
1.72 w = iixr.TermIndexWriter(f)
1.73 -for term, offset, info_offset in indexed_terms:
1.74 - w.write_term(term, offset, info_offset)
1.75 +for term, offset, frequency, info_offset in indexed_terms:
1.76 + w.write_term(term, offset, frequency, info_offset)
1.77 w.close()
1.78
1.79 f = open("test", "rb")
1.80 r = iixr.TermIndexReader(f)
1.81 -for term, offset, info_offset in indexed_terms:
1.82 - t, o, i = r.read_term()
1.83 +for term, offset, frequency, info_offset in indexed_terms:
1.84 + t, o, fr, i = r.read_term()
1.85 print term == t, term, t
1.86 print offset == o, offset, o
1.87 + print frequency == fr, frequency, fr
1.88 print info_offset == i, info_offset, i
1.89 r.close()
1.90
1.91 @@ -202,8 +205,8 @@
1.92 f3 = open("testP", "wb")
1.93 w3 = iixr.PositionWriter(f3)
1.94 wd = iixr.TermDictionaryWriter(w, w2, w3, 3)
1.95 -for term, offset in terms:
1.96 - wd._write_term(term, offset)
1.97 +for term, offset, frequency in terms:
1.98 + wd._write_term(term, offset, frequency)
1.99 wd.close()
1.100
1.101 f = open("test", "rb")
1.102 @@ -215,12 +218,13 @@
1.103 rd = iixr.TermDictionaryReader(r, r2, r3)
1.104 terms_reversed = terms[:]
1.105 terms_reversed.reverse()
1.106 -for term, offset in terms_reversed:
1.107 - o = rd._find_term(term)
1.108 +for term, offset, frequency in terms_reversed:
1.109 + o, fr = rd._find_term(term)
1.110 print offset == o, offset, o
1.111 + print frequency == fr, frequency, fr
1.112 for term in ("dog", "dingo"):
1.113 - o = rd._find_term(term)
1.114 - print o is None, o
1.115 + t = rd._find_term(term)
1.116 + print t is None, t
1.117 rd.close()
1.118
1.119 # Test dictionaries with term and position data.
1.120 @@ -274,10 +278,10 @@
1.121 ]
1.122
1.123 doc_tests = [
1.124 - ("Every", [(2, [0]), (14, [0])]),
1.125 - ("good", [(2, [1]), (13, [1])]),
1.126 - ("deserves", [(2, [3]), (13, [3])]),
1.127 - ("sea", [(36, [2, 6])])
1.128 + ("Every", 2, [(2, [0]), (14, [0])]),
1.129 + ("good", 2, [(2, [1]), (13, [1])]),
1.130 + ("deserves", 2, [(2, [3]), (13, [3])]),
1.131 + ("sea", 2, [(36, [2, 6])])
1.132 ]
1.133
1.134 index = iixr.Index("test_index")
1.135 @@ -289,9 +293,11 @@
1.136 wi.close()
1.137
1.138 rd = index.get_reader()
1.139 -for term, doc_positions in doc_tests:
1.140 +for term, frequency, doc_positions in doc_tests:
1.141 dp = rd.find_positions(term)
1.142 print doc_positions == dp, doc_positions, dp
1.143 + fr = rd.get_frequency(term)
1.144 + print frequency == fr, frequency, fr
1.145 for docnum, text in docs:
1.146 df = rd.get_fields(docnum)
1.147 print text == df[0], text, df[0]