1 #!/usr/bin/env python 2 3 import iixr 4 5 numbers = [12345678, 0, 1, 127, 128, 255, 256] 6 7 f = open("test", "wb") 8 w = iixr.FileWriter(f) 9 for number in numbers: 10 w.write_number(number) 11 w.close() 12 13 f = open("test", "rb") 14 r = iixr.FileReader(f) 15 for number in numbers: 16 n = r.read_number() 17 print number == n, number, n 18 r.close() 19 20 all_doc_positions = [ 21 [ 22 (123, [1, 3, 5, 15, 25]), 23 (124, [0, 100]) 24 ], 25 [ 26 (78, [9]), 27 (196, [10, 11]) 28 ] 29 ] 30 31 f = open("test", "wb") 32 w = iixr.PositionWriter(f) 33 for doc_positions in all_doc_positions: 34 for docnum, positions in doc_positions: 35 w.write_positions(docnum, positions) 36 w.reset() 37 w.close() 38 39 f = open("test", "rb") 40 r = iixr.PositionReader(f) 41 for doc_positions in all_doc_positions: 42 for docnum, positions in doc_positions: 43 d, p = r.read_positions() 44 print docnum == d, docnum, d 45 print positions == p, positions, p 46 r.reset() 47 r.close() 48 49 f = open("test", "wb") 50 w = iixr.PositionWriter(f) 51 offsets = [] 52 for doc_positions in all_doc_positions: 53 offsets.append( 54 w.write_all_positions(doc_positions) 55 ) 56 w.close() 57 58 f = open("test", "rb") 59 r = iixr.PositionReader(f) 60 offsets.reverse() 61 all_doc_positions.reverse() 62 for offset, doc_positions in zip(offsets, all_doc_positions): 63 dp = r.read_all_positions(offset) 64 print doc_positions == dp, doc_positions, dp 65 r.close() 66 67 terms = [ 68 ("aardvark", 100000123), 69 ("anteater", 100000456), 70 ("badger", 100000789), 71 ("bull", 1000001234), 72 ("bulldog", 1000002345), 73 ("cat", 1000003456) 74 ] 75 76 f = open("test", "wb") 77 w = iixr.TermWriter(f) 78 for term, offset in terms: 79 w.write_term(term, offset) 80 w.close() 81 82 f = open("test", "rb") 83 r = iixr.TermReader(f) 84 for term, offset in terms: 85 t, o = r.read_term() 86 print term == t, term, t 87 print offset == o, offset, o 88 r.close() 89 90 indexed_terms = [ 91 ("aardvark", 100000123, 200000321), 92 ("anteater", 100000456, 200000654), 93 ("badger", 100000789, 200000987), 94 ("bull", 1000001234, 200004321), 95 ("bulldog", 1000002345, 200005432), 96 ("cat", 1000003456, 200006543) 97 ] 98 99 f = open("test", "wb") 100 w = iixr.TermIndexWriter(f) 101 for term, offset, info_offset in indexed_terms: 102 w.write_term(term, offset, info_offset) 103 w.close() 104 105 f = open("test", "rb") 106 r = iixr.TermIndexReader(f) 107 for term, offset, info_offset in indexed_terms: 108 t, o, i = r.read_term() 109 print term == t, term, t 110 print offset == o, offset, o 111 print info_offset == i, info_offset, i 112 r.close() 113 114 f = open("test", "wb") 115 w = iixr.TermWriter(f) 116 f2 = open("testI", "wb") 117 w2 = iixr.TermIndexWriter(f2) 118 f3 = open("testP", "wb") 119 w3 = iixr.PositionWriter(f3) 120 wd = iixr.TermDictionaryWriter(w, w2, w3, 3) 121 for term, offset in terms: 122 wd.write_term(term, offset) 123 wd.close() 124 125 f = open("test", "rb") 126 r = iixr.TermReader(f) 127 f2 = open("testI", "rb") 128 r2 = iixr.TermIndexReader(f2) 129 f3 = open("testP", "rb") 130 r3 = iixr.PositionReader(f3) 131 rd = iixr.TermDictionaryReader(r, r2, r3) 132 terms_reversed = terms[:] 133 terms_reversed.reverse() 134 for term, offset in terms_reversed: 135 o = rd.find_term(term) 136 print offset == o, offset, o 137 for term in ("dog", "dingo"): 138 o = rd.find_term(term) 139 print o is None, o 140 rd.close() 141 142 terms_with_positions = [ 143 ("aardvark", [(1, [2, 45, 96]), (20, [13])]), 144 ("anteater", [(1, [43, 44])]), 145 ("badger", [(7, [2, 22, 196]), (19, [55, 1333]), (21, [0])]), 146 ("bull", [(6, [128]), (16, [12])]), 147 ("bulldog", [(43, [17, 19, 256, 512])]), 148 ("cat", [(123, [12, 145, 196]), (1200, [113])]) 149 ] 150 151 f = open("test", "wb") 152 w = iixr.TermWriter(f) 153 f2 = open("testI", "wb") 154 w2 = iixr.TermIndexWriter(f2) 155 f3 = open("testP", "wb") 156 w3 = iixr.PositionWriter(f3) 157 wd = iixr.TermDictionaryWriter(w, w2, w3, 3) 158 for term, doc_positions in terms_with_positions: 159 wd.write_term_positions(term, doc_positions) 160 wd.close() 161 162 f = open("test", "rb") 163 r = iixr.TermReader(f) 164 f2 = open("testI", "rb") 165 r2 = iixr.TermIndexReader(f2) 166 f3 = open("testP", "rb") 167 r3 = iixr.PositionReader(f3) 168 rd = iixr.TermDictionaryReader(r, r2, r3) 169 terms_reversed = terms_with_positions[:] 170 terms_reversed.reverse() 171 for term, doc_positions in terms_reversed: 172 dp = rd.find_positions(term) 173 print doc_positions == dp, doc_positions, dp 174 for term in ("dog", "dingo"): 175 dp = rd.find_positions(term) 176 print dp is None, dp 177 rd.close() 178 179 docs = [ 180 (1, "The cat sat on the mat"), 181 (2, "Every good boy deserves football"), 182 (13, "One good turn deserves another"), 183 (14, "Every man for himself"), 184 (25, "Red sky at night shepherd's delight"), 185 (36, "She sells sea shells on the sea shore") 186 ] 187 188 doc_tests = [ 189 ("Every", [(2, [0]), (14, [0])]), 190 ("good", [(2, [1]), (13, [1])]), 191 ("deserves", [(2, [3]), (13, [3])]), 192 ("sea", [(36, [2, 6])]) 193 ] 194 195 f = open("test", "wb") 196 w = iixr.TermWriter(f) 197 f2 = open("testI", "wb") 198 w2 = iixr.TermIndexWriter(f2) 199 f3 = open("testP", "wb") 200 w3 = iixr.PositionWriter(f3) 201 wd = iixr.TermDictionaryWriter(w, w2, w3, 3) 202 wi = iixr.IndexWriter(wd) 203 for docnum, text in docs: 204 for position, term in enumerate(text.split()): 205 wi.add_position(term, docnum, position) 206 wi.close() 207 208 f = open("test", "rb") 209 r = iixr.TermReader(f) 210 f2 = open("testI", "rb") 211 r2 = iixr.TermIndexReader(f2) 212 f3 = open("testP", "rb") 213 r3 = iixr.PositionReader(f3) 214 rd = iixr.TermDictionaryReader(r, r2, r3) 215 for term, doc_positions in doc_tests: 216 dp = rd.find_positions(term) 217 print doc_positions == dp, doc_positions, dp 218 rd.close() 219 220 # vim: tabstop=4 expandtab shiftwidth=4