1.1 --- a/test.py Fri Aug 28 01:15:17 2009 +0200
1.2 +++ b/test.py Sat Aug 29 02:15:29 2009 +0200
1.3 @@ -2,6 +2,8 @@
1.4
1.5 import iixr
1.6
1.7 +# Test basic data types.
1.8 +
1.9 numbers = [12345678, 0, 1, 127, 128, 255, 256]
1.10
1.11 f = open("test", "wb")
1.12 @@ -17,6 +19,8 @@
1.13 print number == n, number, n
1.14 r.close()
1.15
1.16 +# Test positions.
1.17 +
1.18 all_doc_positions = [
1.19 [
1.20 (123, [1, 3, 5, 15, 25]),
1.21 @@ -64,30 +68,82 @@
1.22 print doc_positions == dp, doc_positions, dp
1.23 r.close()
1.24
1.25 +# Test fields.
1.26 +
1.27 doc_fields = [
1.28 - ["testing", "fields", "stored", "compressed"],
1.29 - ["fields", "for a second", "document"]
1.30 + (123, ["testing", "fields", "stored", "compressed"]),
1.31 + (456, ["fields", "for a second", "document"]),
1.32 + (789, ["field value"]),
1.33 + (1234, []),
1.34 + (2345, ["abc", "def"]),
1.35 + (3456, ["apple", "banana", "cherry"]),
1.36 + (4567, ["drue", "eple"])
1.37 ]
1.38
1.39 f = open("testF", "wb")
1.40 w = iixr.FieldWriter(f)
1.41 -offsets = []
1.42 -for fields in doc_fields:
1.43 - offsets.append(w.write_fields(fields))
1.44 +for docnum, fields in doc_fields:
1.45 + w.write_fields(docnum, fields)
1.46 w.close()
1.47
1.48 f = open("testF", "rb")
1.49 r = iixr.FieldReader(f)
1.50 -for fields in doc_fields:
1.51 - df = r.read_fields()
1.52 - print fields == df, fields, df
1.53 -offsets.reverse()
1.54 -doc_fields.reverse()
1.55 -for offset, fields in zip(offsets, doc_fields):
1.56 - df = r.read_doc_fields(offset)
1.57 +for docnum, fields in doc_fields:
1.58 + dn, df = r.read_fields()
1.59 + print docnum == dn, docnum, dn
1.60 print fields == df, fields, df
1.61 r.close()
1.62
1.63 +# Test field index files.
1.64 +
1.65 +indexed_docs = [
1.66 + (123, 100000987),
1.67 + (456, 100004321),
1.68 + (789, 100008765)
1.69 + ]
1.70 +
1.71 +f = open("testFI", "wb")
1.72 +w = iixr.FieldIndexWriter(f)
1.73 +for docnum, offset in indexed_docs:
1.74 + w.write_document(docnum, offset)
1.75 +w.close()
1.76 +
1.77 +f = open("testFI", "rb")
1.78 +r = iixr.FieldIndexReader(f)
1.79 +for docnum, offset in indexed_docs:
1.80 + dn, o = r.read_document()
1.81 + print docnum == dn, docnum, dn
1.82 + print offset == o, offset, o
1.83 +r.close()
1.84 +
1.85 +# Test field dictionaries.
1.86 +
1.87 +f = open("testF", "wb")
1.88 +w = iixr.FieldWriter(f)
1.89 +f2 = open("testFI", "wb")
1.90 +w2 = iixr.FieldIndexWriter(f2)
1.91 +wd = iixr.FieldDictionaryWriter(w, w2, 3)
1.92 +for docnum, fields in doc_fields:
1.93 + wd.write_fields(docnum, fields)
1.94 +wd.close()
1.95 +
1.96 +f = open("testF", "rb")
1.97 +r = iixr.FieldReader(f)
1.98 +f2 = open("testFI", "rb")
1.99 +r2 = iixr.FieldIndexReader(f2)
1.100 +rd = iixr.FieldDictionaryReader(r, r2)
1.101 +doc_fields_reversed = doc_fields[:]
1.102 +doc_fields_reversed.reverse()
1.103 +for docnum, fields in doc_fields_reversed:
1.104 + df = rd.read_fields(docnum)
1.105 + print fields == df, fields, df
1.106 +for docnum in (13579, 246810):
1.107 + df = rd.read_fields(docnum)
1.108 + print df is None, df
1.109 +rd.close()
1.110 +
1.111 +# Test terms.
1.112 +
1.113 terms = [
1.114 ("aardvark", 100000123),
1.115 ("anteater", 100000456),
1.116 @@ -111,6 +167,8 @@
1.117 print offset == o, offset, o
1.118 r.close()
1.119
1.120 +# Test terms in index files.
1.121 +
1.122 indexed_terms = [
1.123 ("aardvark", 100000123, 200000321),
1.124 ("anteater", 100000456, 200000654),
1.125 @@ -135,6 +193,8 @@
1.126 print info_offset == i, info_offset, i
1.127 r.close()
1.128
1.129 +# Test dictionaries with only term data.
1.130 +
1.131 f = open("test", "wb")
1.132 w = iixr.TermWriter(f)
1.133 f2 = open("testI", "wb")
1.134 @@ -143,7 +203,7 @@
1.135 w3 = iixr.PositionWriter(f3)
1.136 wd = iixr.TermDictionaryWriter(w, w2, w3, 3)
1.137 for term, offset in terms:
1.138 - wd.write_term(term, offset)
1.139 + wd._write_term(term, offset)
1.140 wd.close()
1.141
1.142 f = open("test", "rb")
1.143 @@ -156,13 +216,15 @@
1.144 terms_reversed = terms[:]
1.145 terms_reversed.reverse()
1.146 for term, offset in terms_reversed:
1.147 - o = rd.find_term(term)
1.148 + o = rd._find_term(term)
1.149 print offset == o, offset, o
1.150 for term in ("dog", "dingo"):
1.151 - o = rd.find_term(term)
1.152 + o = rd._find_term(term)
1.153 print o is None, o
1.154 rd.close()
1.155
1.156 +# Test dictionaries with term and position data.
1.157 +
1.158 terms_with_positions = [
1.159 ("aardvark", [(1, [2, 45, 96]), (20, [13])]),
1.160 ("anteater", [(1, [43, 44])]),
1.161 @@ -200,6 +262,8 @@
1.162 print dp is None, dp
1.163 rd.close()
1.164
1.165 +# Test high-level index operations.
1.166 +
1.167 docs = [
1.168 (1, "The cat sat on the mat"),
1.169 (2, "Every good boy deserves football"),