# HG changeset patch # User Paul Boddie # Date 1225663585 -3600 # Node ID c408f51100a9c18e2ba13fdb0ab6f7189c2dbdfb # Parent 7eeb730fcbdbb63a88fe5235bd176432ec05f9db Overhauled the reading and writing to more properly handle folded lines, introducing a ContentLine class for parsing whole content lines, and making a separate Writer class which is able to transparently fold lines for the StreamWriter class. Added iterwrite functions, although their name could be better chosen. Updated the tests to more properly test reading and to test writing. diff -r 7eeb730fcbdb -r c408f51100a9 tests/test_calendar_stream.py --- a/tests/test_calendar_stream.py Sun Nov 02 04:06:23 2008 +0100 +++ b/tests/test_calendar_stream.py Sun Nov 02 23:06:25 2008 +0100 @@ -6,7 +6,7 @@ out = codecs.open("tmp.ics", "w", encoding="utf-8") try: doc = vCalendar.iterparse(f) - w = vCalendar.vCalendarStreamWriter(out) + w = vCalendar.iterwrite(out) for name, parameters, value in doc: print "%r, %r, %r" % (name, parameters, value) w.write(name, parameters, value) diff -r 7eeb730fcbdb -r c408f51100a9 tests/test_card_stream.py --- a/tests/test_card_stream.py Sun Nov 02 04:06:23 2008 +0100 +++ b/tests/test_card_stream.py Sun Nov 02 23:06:25 2008 +0100 @@ -6,7 +6,7 @@ out = codecs.open("tmp.vcf", "w", encoding="utf-8") try: doc = vContent.iterparse(f) - w = vContent.StreamWriter(out) + w = vContent.iterwrite(out) for name, parameters, value in doc: print "%r, %r, %r" % (name, parameters, value) w.write(name, parameters, value) diff -r 7eeb730fcbdb -r c408f51100a9 tests/test_reader.py --- a/tests/test_reader.py Sun Nov 02 04:06:23 2008 +0100 +++ b/tests/test_reader.py Sun Nov 02 23:06:25 2008 +0100 @@ -4,45 +4,50 @@ import StringIO s = StringIO.StringIO("""PROP:p1=v1;p2 -=v2;p21;p3=v3;"p4"="v4";"p5=v5 -;p5=v5":"hello -world" + =v2;p21;p3=v3;"p4"="v4";"p5=v5 + ;p5=v5":"hello + world\\nend test" """) r = vContent.Reader(s) +line = r.get_content_line() +print repr(line.text) -data = r.read_until(r.SEPARATORS) +data = line.search(line.SEPARATORS) print data assert data == ("PROP", ":") -data = r.read_until(r.SEPARATORS_PLUS_EQUALS) +data = line.search(line.SEPARATORS_PLUS_EQUALS) print data assert data == ("p1", "=") -data = r.read_until(r.SEPARATORS) +data = line.search(line.SEPARATORS) print data assert data == ("v1", ";") -data = r.read_until(r.SEPARATORS_PLUS_EQUALS) +data = line.search(line.SEPARATORS_PLUS_EQUALS) print data -assert data == ("p2\n", "=") -data = r.read_until(r.SEPARATORS) +assert data == ("p2", "=") +data = line.search(line.SEPARATORS) print data assert data == ("v2", ";") -data = r.read_until(r.SEPARATORS_PLUS_EQUALS) +data = line.search(line.SEPARATORS_PLUS_EQUALS) print data assert data == ("p21", ";") -data = r.read_until(r.SEPARATORS_PLUS_EQUALS) +data = line.search(line.SEPARATORS_PLUS_EQUALS) print data assert data == ("p3", "=") -data = r.read_until(r.SEPARATORS) +data = line.search(line.SEPARATORS) print data assert data == ("v3", ";") -data = r.read_until(r.SEPARATORS_PLUS_EQUALS) +data = line.search(line.SEPARATORS_PLUS_EQUALS) print data assert data == ('"p4"', "=") -data = r.read_until(r.SEPARATORS) +data = line.search(line.SEPARATORS) print data assert data == ('"v4"', ";") -data = r.read_until(r.SEPARATORS_PLUS_EQUALS) +data = line.search(line.SEPARATORS_PLUS_EQUALS) print data -assert data == ('"p5=v5\n;p5=v5"', ":") +assert data == ('"p5=v5;p5=v5"', ":") +data = line.get_remaining() +print repr(data) +assert data == '"hello world\\nend test"' # vim: tabstop=4 expandtab shiftwidth=4 diff -r 7eeb730fcbdb -r c408f51100a9 vCalendar.py --- a/vCalendar.py Sun Nov 02 04:06:23 2008 +0100 +++ b/vCalendar.py Sun Nov 02 23:06:25 2008 +0100 @@ -195,4 +195,7 @@ return vContent.iterparse(f, non_standard_newline, vCalendarStreamParser) +def iterwrite(f, line_length=None): + return vContent.iterwrite(f, line_length, vCalendarStreamWriter) + # vim: tabstop=4 expandtab shiftwidth=4 diff -r 7eeb730fcbdb -r c408f51100a9 vContent.py --- a/vContent.py Sun Nov 02 04:06:23 2008 +0100 +++ b/vContent.py Sun Nov 02 23:06:25 2008 +0100 @@ -52,9 +52,6 @@ "A simple class wrapping a file, providing simple pushback capabilities." - SEPARATORS = re.compile('[;:"]') - SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]') - def __init__(self, f, non_standard_newline=0): """ @@ -66,7 +63,7 @@ self.f = f self.non_standard_newline = non_standard_newline self.lines = [] - self.line_number = 0 + self.line_number = 1 # about to read line 1 def pushback(self, line): @@ -98,23 +95,69 @@ else: return line - def read_until(self, targets): + def read_content_line(self): """ - Read from the stream until one of the 'targets' is seen. Return the - string from the current position up to the target found, along with the - target string, using a tuple of the form (string, target). If no target - was found, return the entire string together with a target of None. + Read an entire content line, itself potentially consisting of many + physical lines of text. """ - # Remember the entire text read and the index of the current line in - # that text. + line = self.readline() - lines = [] + # Strip all appropriate whitespace from the right end of each line. + # For subsequent lines, remove the first whitespace character. + # See section 4.1 of the iCalendar specification. + + lines = [line.rstrip("\r\n")] line = self.readline() - lines.append(line) - start = 0 + while line.startswith(" ") or line.startswith("\t"): + lines.append(line[1:].rstrip("\r\n")) + line = self.readline() + + # Since one line too many will have been read, push the line back into + # the file. + + if line: + self.pushback(line) + + return "".join(lines) + + def get_content_line(self): + + "Return a content line object for the current line." + + return ContentLine(self.read_content_line()) + +class ContentLine: + + "A content line which can be searched." + + SEPARATORS = re.compile('[;:"]') + SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]') + + def __init__(self, text): + self.text = text + self.start = 0 + + def get_remaining(self): + + "Get the remaining text from the content line." + + return self.text[self.start:] + + def search(self, targets): + + """ + Find one of the 'targets' in the text, returning the string from the + current position up to the target found, along with the target string, + using a tuple of the form (string, target). If no target was found, + return the entire string together with a target of None. + """ + + text = self.text + start = pos = self.start + length = len(text) # Remember the first target. @@ -122,23 +165,21 @@ first_pos = None in_quoted_region = 0 - # Process each line, looking for the targets. + # Process the text, looking for the targets. - while line != "": - match = targets.search(line, start) + while pos < length: + match = targets.search(text, pos) - # Where nothing matches, get the next line. + # Where nothing matches, end the search. if match is None: - line = self.readline() - lines.append(line) - start = 0 + pos = length # Where a double quote matches, toggle the region state. elif match.group() == '"': in_quoted_region = not in_quoted_region - start = match.end() + pos = match.end() # Where something else matches outside a region, stop searching. @@ -150,25 +191,16 @@ # Otherwise, keep looking for the end of the region. else: - start = match.end() + pos = match.end() # Where no more input can provide the targets, return a special result. else: - text = "".join(lines) - return text, None - - # Push back the text after the target. + self.start = length + return text[start:], None - after_target = lines[-1][first_pos + len(first):] - self.pushback(after_target) - - # Produce the lines until the matching line, together with the portion - # of the matching line before the target. - - lines[-1] = lines[-1][:first_pos] - text = "".join(lines) - return text, first + self.start = match.end() + return text[start:first_pos], first class StreamParser: @@ -211,24 +243,30 @@ """ f = self.f + line_number = f.line_number + line = f.get_content_line() - parameters = {} - name, sep = f.read_until(f.SEPARATORS) + # Read the property name. + name, sep = line.search(line.SEPARATORS) name = name.strip() if not name and sep is None: raise StopIteration + # Read the parameters. + + parameters = {} + while sep == ";": # Find the actual modifier. - parameter_name, sep = f.read_until(f.SEPARATORS_PLUS_EQUALS) + parameter_name, sep = line.search(line.SEPARATORS_PLUS_EQUALS) parameter_name = parameter_name.strip() if sep == "=": - parameter_value, sep = f.read_until(f.SEPARATORS) + parameter_value, sep = line.search(line.SEPARATORS) parameter_value = parameter_value.strip() else: parameter_value = None @@ -240,27 +278,11 @@ # Get the value content. if sep != ":": - raise ValueError, f.line_number - - # Strip all appropriate whitespace from the right end of each line. - # For subsequent lines, remove the first whitespace character. - # See section 4.1 of the iCalendar specification. + raise ValueError, line_number - line = f.readline() - value_lines = [line.rstrip("\r\n")] - line = f.readline() - while line != "" and line[0] in [" ", "\t"]: - value_lines.append(line.rstrip("\r\n")[1:]) - line = f.readline() + # Obtain and decode the value. - # Since one line too many will have been read, push the line back into the - # file. - - f.pushback(line) - - # Decode the value. - - value = self.decode(name, parameters, "".join(value_lines)) + value = self.decode(name, parameters, line.get_remaining()) return name, parameters, value @@ -384,16 +406,65 @@ # Writer classes. +class Writer: + + "A simple class wrapping a file, providing simple output capabilities." + + default_line_length = 76 + + def __init__(self, f, line_length=None): + + """ + Initialise the object with the file 'f'. If 'line_length' is set, the + length of written lines will conform to the specified value instead of + the default value. + """ + + self.f = f + self.line_length = line_length or self.default_line_length + self.char_offset = 0 + + def write(self, text): + + "Write the 'text' to the file." + + f = self.f + line_length = self.line_length + + i = 0 + remaining = len(text) + + while remaining: + space = line_length - self.char_offset + if remaining > space: + f.write(text[i:i + space]) + f.write("\r\n ") + self.char_offset = 1 + i += space + remaining -= space + else: + f.write(text[i:]) + self.char_offset += remaining + i += remaining + remaining = 0 + + def end_line(self): + + "End the current content line." + + if self.char_offset > 0: + self.char_offset = 0 + self.f.write("\r\n") + class StreamWriter: "A stream writer for content in vCard/vCalendar/iCalendar-like formats." - def __init__(self, f, line_length=76): + def __init__(self, f): "Initialise the parser for the given file 'f'." self.f = f - self.line_length = line_length def write(self, name, parameters, value): @@ -405,12 +476,14 @@ f = self.f f.write(name) - self.write_parameters(parameters) + for parameter_name, parameter_value in parameters.items(): + f.write(";") + f.write(parameter_name) + f.write("=") + f.write(parameter_value) f.write(":") - - for line in self.fold(self.encode(name, parameters, value)): - f.write(line) - f.write("\r\n") + f.write(self.encode(name, parameters, value)) + f.end_line() def encode_content(self, value): @@ -420,18 +493,6 @@ # Internal methods. - def write_parameters(self, parameters): - - "Write the given 'parameters'." - - f = self.f - - for parameter_name, parameter_value in parameters.items(): - f.write(";") - f.write(parameter_name) - f.write("=") - f.write(parameter_value) - def encode(self, name, parameters, value): "Encode using 'name' and 'parameters' the given 'value'." @@ -446,22 +507,6 @@ return self.encode_content(value) - def fold(self, text): - - "Fold the given 'text'." - - line_length = self.line_length - i = 0 - lines = [] - - line = text[i:i+line_length] - while line: - lines.append(line) - i += line_length - line = text[i:i+line_length] - - return lines - # Public functions. def parse(f, non_standard_newline=0, parser_cls=None): @@ -502,4 +547,9 @@ parser = (parser_cls or StreamParser)(reader) return iter(parser) +def iterwrite(f, line_length=None, writer_cls=None): + _writer = Writer(f, line_length) + writer = (writer_cls or StreamWriter)(_writer) + return writer + # vim: tabstop=4 expandtab shiftwidth=4