# HG changeset patch # User Paul Boddie # Date 1251319889 -7200 # Node ID 17ffd3434b27adec5c26106ce6ea186a1e9f08ab # Parent 5ef1ed1945938a5504bb45e6b3d98cd833714abe Switched to a "VInt" (variable-length integer) representation for numbers and string lengths. diff -r 5ef1ed194593 -r 17ffd3434b27 iixr.py --- a/iixr.py Tue Aug 25 23:53:20 2009 +0200 +++ b/iixr.py Wed Aug 26 22:51:29 2009 +0200 @@ -53,38 +53,23 @@ # Special case: one byte containing zero. elif number == 0: - self.f.write(chr(1) + chr(0)) + self.f.write(chr(0)) return # Write the number from least to most significant digits. - nbytes = 0 bytes = [] while number != 0: - lsd = number & 255 + lsd = number & 127 + number = number >> 7 + if number != 0: + lsd |= 128 bytes.append(chr(lsd)) - number = number >> 8 - nbytes += 1 - # Too large numbers are not supported. - - if nbytes > 255: - raise ValueError, "Number %r is too large." % number - - bytes.insert(0, chr(nbytes)) record = "".join(bytes) self.f.write(record) - def write_unsigned_byte(self, number): - - "Write 'number' to the file using a single byte." - - if not (0 <= number <= 255): - raise ValueError, "Number %r is out of range." % number - - self.f.write(chr(number)) - def write_string(self, s): "Write 's' to the file, recording its length." @@ -94,7 +79,7 @@ if not (0 <= length <= 255): raise ValueError, "String %r is too long." % s - self.write_unsigned_byte(length) + self.write_number(length) self.f.write(s) class FileReader(File): @@ -105,39 +90,31 @@ "Read a number from the file." - nbytes = self.read_unsigned_byte() - # Read each byte, adding it to the number. - bytes = self.f.read(nbytes) - - i = 0 shift = 0 number = 0 + more = 1 - while i < nbytes: - csd = ord(bytes[i]) + while more: + byte = self.f.read(1) + if not byte: + raise EOFError + + csd = ord(byte) + more = csd & 128 != 0 + if more: + csd &= 127 number += (csd << shift) - shift += 8 - i += 1 + shift += 7 return number - def read_unsigned_byte(self): - - "Read a number from the file, consuming a single byte." - - s = self.f.read(1) - if not s: - raise EOFError - - return ord(s) - def read_string(self): "Read a string from the file." - length = self.read_unsigned_byte() + length = self.read_number() return self.f.read(length) # Specific classes. @@ -282,7 +259,7 @@ common = len(commonprefix([self.last_term, term])) suffix = term[common:] - self.write_unsigned_byte(common) + self.write_number(common) self.write_string(suffix) # Write the offset delta. @@ -310,7 +287,7 @@ # Read the prefix length and term suffix. - common = self.read_unsigned_byte() + common = self.read_number() suffix = self.read_string() self.last_term = self.last_term[:common] + suffix