1.1 --- a/iixr.py Thu Sep 10 23:19:13 2009 +0200
1.2 +++ b/iixr.py Fri Sep 11 00:01:53 2009 +0200
1.3 @@ -45,6 +45,33 @@
1.4 compressors = [("b", bz2.compress), ("z", zlib.compress)]
1.5 decompressors = {"b" : bz2.decompress, "z" : zlib.decompress}
1.6
1.7 +# Utility functions.
1.8 +
1.9 +def vint(number):
1.10 +
1.11 + # Negative numbers are not supported.
1.12 +
1.13 + if number < 0:
1.14 + raise ValueError, "Number %r is negative." % number
1.15 +
1.16 + # Special case: one byte containing a 7-bit number.
1.17 +
1.18 + elif number < 128:
1.19 + return chr(number)
1.20 +
1.21 + # Write the number from least to most significant digits.
1.22 +
1.23 + bytes = []
1.24 +
1.25 + while number != 0:
1.26 + lsd = number & 127
1.27 + number = number >> 7
1.28 + if number != 0:
1.29 + lsd |= 128
1.30 + bytes.append(chr(lsd))
1.31 +
1.32 + return "".join(bytes)
1.33 +
1.34 # Foundation classes.
1.35
1.36 class File:
1.37 @@ -78,30 +105,7 @@
1.38
1.39 "Write 'number' to the file using a variable length encoding."
1.40
1.41 - # Negative numbers are not supported.
1.42 -
1.43 - if number < 0:
1.44 - raise ValueError, "Number %r is negative." % number
1.45 -
1.46 - # Special case: one byte containing a 7-bit number.
1.47 -
1.48 - elif number < 128:
1.49 - self.f.write(chr(number))
1.50 - return
1.51 -
1.52 - # Write the number from least to most significant digits.
1.53 -
1.54 - bytes = []
1.55 -
1.56 - while number != 0:
1.57 - lsd = number & 127
1.58 - number = number >> 7
1.59 - if number != 0:
1.60 - lsd |= 128
1.61 - bytes.append(chr(lsd))
1.62 -
1.63 - record = "".join(bytes)
1.64 - self.f.write(record)
1.65 + self.f.write(vint(number))
1.66
1.67 def write_string(self, s, compress=0):
1.68
1.69 @@ -129,15 +133,13 @@
1.70 else:
1.71 flag = "-"
1.72
1.73 - # Record whether compression was used.
1.74 -
1.75 - self.f.write(flag)
1.76 + else:
1.77 + flag = ""
1.78
1.79 # Write the length of the data before the data itself.
1.80
1.81 length = len(s)
1.82 - self.write_number(length)
1.83 - self.f.write(s)
1.84 + self.f.write(flag + vint(length) + s)
1.85
1.86 class FileReader(File):
1.87
1.88 @@ -228,14 +230,15 @@
1.89 # Record the offset of this record.
1.90
1.91 offset = self.f.tell()
1.92 + output = []
1.93
1.94 # Write the document number delta.
1.95
1.96 - self.write_number(docnum - self.last_docnum)
1.97 + output.append(vint(docnum - self.last_docnum))
1.98
1.99 # Write the number of positions.
1.100
1.101 - self.write_number(len(positions))
1.102 + output.append(vint(len(positions)))
1.103
1.104 # Make sure that the positions are sorted.
1.105
1.106 @@ -246,11 +249,14 @@
1.107 last = 0
1.108 for position in positions:
1.109 pos = position - last
1.110 - self.write_number(pos)
1.111 + output.append(vint(pos))
1.112 last = position
1.113
1.114 + # Actually write the data.
1.115 +
1.116 + self.f.write("".join(output))
1.117 +
1.118 self.last_docnum = docnum
1.119 -
1.120 return offset
1.121
1.122 class PositionOpener(FileOpener):
1.123 @@ -289,20 +295,25 @@
1.124 # Record the offset of this record.
1.125
1.126 offset = self.f.tell()
1.127 + output = []
1.128
1.129 # Write the document number delta.
1.130
1.131 - self.write_number(docnum - self.last_docnum)
1.132 + output.append(vint(docnum - self.last_docnum))
1.133 self.last_docnum = docnum
1.134
1.135 # Write the position file offset delta.
1.136
1.137 - self.write_number(pos_offset - self.last_pos_offset)
1.138 + output.append(vint(pos_offset - self.last_pos_offset))
1.139 self.last_pos_offset = pos_offset
1.140
1.141 # Write the document count.
1.142
1.143 - self.write_number(count)
1.144 + output.append(vint(count))
1.145 +
1.146 + # Actually write the data.
1.147 +
1.148 + self.f.write("".join(output))
1.149
1.150 return offset
1.151