vContent (annotate vContent.py in ffea599ea31b)

vContent

Annotated vContent.py

39:ffea599ea31b

2014-09-22

Paul Boddie

Added missing ParseError class.

paul@0	1	#!/usr/bin/env python
paul@0	2
paul@0	3	"""
paul@0	4	Parsing of vCard, vCalendar and iCalendar files.
paul@0	5
paul@39	6	Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011, 2013,
paul@39	7	2014 Paul Boddie <paul@boddie.org.uk>
paul@0	8
paul@0	9	This program is free software; you can redistribute it and/or modify it under
paul@14	10	the terms of the GNU General Public License as published by the Free Software
paul@14	11	Foundation; either version 3 of the License, or (at your option) any later
paul@14	12	version.
paul@0	13
paul@0	14	This program is distributed in the hope that it will be useful, but WITHOUT
paul@0	15	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@14	16	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
paul@0	17	details.
paul@0	18
paul@14	19	You should have received a copy of the GNU General Public License along with
paul@14	20	this program. If not, see <http://www.gnu.org/licenses/>.
paul@0	21
paul@0	22	--------
paul@0	23
paul@0	24	References:
paul@0	25
paul@16	26	RFC 5545: Internet Calendaring and Scheduling Core Object Specification
paul@16	27	(iCalendar)
paul@18	28	http://tools.ietf.org/html/rfc5545
paul@16	29
paul@0	30	RFC 2445: Internet Calendaring and Scheduling Core Object Specification
paul@0	31	(iCalendar)
paul@18	32	http://tools.ietf.org/html/rfc2445
paul@0	33
paul@0	34	RFC 2425: A MIME Content-Type for Directory Information
paul@18	35	http://tools.ietf.org/html/rfc2425
paul@0	36
paul@0	37	RFC 2426: vCard MIME Directory Profile
paul@18	38	http://tools.ietf.org/html/rfc2426
paul@0	39	"""
paul@0	40
paul@4	41	try:
paul@4	42	set
paul@4	43	except NameError:
paul@4	44	from sets import Set as set
paul@4	45
paul@0	46	# Encoding-related imports.
paul@0	47
paul@0	48	import base64, quopri
paul@9	49	import codecs
paul@0	50
paul@4	51	# Tokenisation help.
paul@4	52
paul@4	53	import re
paul@4	54
paul@9	55	# Configuration.
paul@9	56
paul@9	57	default_encoding = "utf-8"
paul@9	58
paul@39	59	class ParseError(Exception):
paul@39	60
paul@39	61	"General parsing errors."
paul@39	62
paul@39	63	pass
paul@39	64
paul@7	65	# Reader and parser classes.
paul@0	66
paul@0	67	class Reader:
paul@0	68
paul@0	69	"A simple class wrapping a file, providing simple pushback capabilities."
paul@0	70
paul@0	71	def __init__(self, f, non_standard_newline=0):
paul@0	72
paul@0	73	"""
paul@0	74	Initialise the object with the file 'f'. If 'non_standard_newline' is
paul@0	75	set to a true value (unlike the default), lines ending with CR will be
paul@0	76	treated as complete lines.
paul@0	77	"""
paul@0	78
paul@0	79	self.f = f
paul@0	80	self.non_standard_newline = non_standard_newline
paul@0	81	self.lines = []
paul@8	82	self.line_number = 1 # about to read line 1
paul@0	83
paul@9	84	def close(self):
paul@9	85
paul@9	86	"Close the reader."
paul@9	87
paul@9	88	self.f.close()
paul@9	89
paul@0	90	def pushback(self, line):
paul@0	91
paul@0	92	"""
paul@0	93	Push the given 'line' back so that the next line read is actually the
paul@0	94	given 'line' and not the next line from the underlying file.
paul@0	95	"""
paul@0	96
paul@0	97	self.lines.append(line)
paul@0	98	self.line_number -= 1
paul@0	99
paul@0	100	def readline(self):
paul@0	101
paul@0	102	"""
paul@0	103	If no pushed-back lines exist, read a line directly from the file.
paul@0	104	Otherwise, read from the list of pushed-back lines.
paul@0	105	"""
paul@0	106
paul@0	107	self.line_number += 1
paul@0	108	if self.lines:
paul@0	109	return self.lines.pop()
paul@0	110	else:
paul@11	111	# Sanity check for broken lines (\r instead of \r\n or \n).
paul@0	112	line = self.f.readline()
paul@0	113	while line.endswith("\r") and not self.non_standard_newline:
paul@31	114	s = self.f.readline()
paul@31	115	if not s:
paul@31	116	break
paul@31	117	line += s
paul@0	118	if line.endswith("\r") and self.non_standard_newline:
paul@0	119	return line + "\n"
paul@0	120	else:
paul@0	121	return line
paul@0	122
paul@8	123	def read_content_line(self):
paul@0	124
paul@0	125	"""
paul@8	126	Read an entire content line, itself potentially consisting of many
paul@11	127	physical lines of text, returning a string.
paul@0	128	"""
paul@0	129
paul@9	130	# Skip blank lines.
paul@9	131
paul@8	132	line = self.readline()
paul@9	133	while line:
paul@9	134	line_stripped = line.rstrip("\r\n")
paul@9	135	if not line_stripped:
paul@9	136	line = self.readline()
paul@9	137	else:
paul@9	138	break
paul@9	139	else:
paul@9	140	return ""
paul@0	141
paul@8	142	# Strip all appropriate whitespace from the right end of each line.
paul@8	143	# For subsequent lines, remove the first whitespace character.
paul@8	144	# See section 4.1 of the iCalendar specification.
paul@8	145
paul@9	146	lines = [line_stripped]
paul@0	147
paul@0	148	line = self.readline()
paul@8	149	while line.startswith(" ") or line.startswith("\t"):
paul@8	150	lines.append(line[1:].rstrip("\r\n"))
paul@8	151	line = self.readline()
paul@8	152
paul@8	153	# Since one line too many will have been read, push the line back into
paul@8	154	# the file.
paul@8	155
paul@8	156	if line:
paul@8	157	self.pushback(line)
paul@8	158
paul@8	159	return "".join(lines)
paul@8	160
paul@8	161	def get_content_line(self):
paul@8	162
paul@8	163	"Return a content line object for the current line."
paul@8	164
paul@8	165	return ContentLine(self.read_content_line())
paul@8	166
paul@8	167	class ContentLine:
paul@8	168
paul@8	169	"A content line which can be searched."
paul@8	170
paul@8	171	SEPARATORS = re.compile('[;:"]')
paul@8	172	SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]')
paul@8	173
paul@8	174	def __init__(self, text):
paul@8	175	self.text = text
paul@8	176	self.start = 0
paul@8	177
paul@30	178	def __repr__(self):
paul@30	179	return "ContentLine(%r)" % self.text
paul@30	180
paul@8	181	def get_remaining(self):
paul@8	182
paul@8	183	"Get the remaining text from the content line."
paul@8	184
paul@8	185	return self.text[self.start:]
paul@8	186
paul@8	187	def search(self, targets):
paul@8	188
paul@8	189	"""
paul@8	190	Find one of the 'targets' in the text, returning the string from the
paul@8	191	current position up to the target found, along with the target string,
paul@8	192	using a tuple of the form (string, target). If no target was found,
paul@8	193	return the entire string together with a target of None.
paul@11	194
paul@11	195	The 'targets' parameter must be a regular expression object or an object
paul@11	196	compatible with the API of such objects.
paul@8	197	"""
paul@8	198
paul@8	199	text = self.text
paul@8	200	start = pos = self.start
paul@8	201	length = len(text)
paul@0	202
paul@4	203	# Remember the first target.
paul@4	204
paul@4	205	first = None
paul@4	206	first_pos = None
paul@4	207	in_quoted_region = 0
paul@0	208
paul@8	209	# Process the text, looking for the targets.
paul@4	210
paul@8	211	while pos < length:
paul@8	212	match = targets.search(text, pos)
paul@4	213
paul@8	214	# Where nothing matches, end the search.
paul@0	215
paul@4	216	if match is None:
paul@8	217	pos = length
paul@0	218
paul@4	219	# Where a double quote matches, toggle the region state.
paul@0	220
paul@4	221	elif match.group() == '"':
paul@4	222	in_quoted_region = not in_quoted_region
paul@8	223	pos = match.end()
paul@4	224
paul@4	225	# Where something else matches outside a region, stop searching.
paul@0	226
paul@4	227	elif not in_quoted_region:
paul@4	228	first = match.group()
paul@4	229	first_pos = match.start()
paul@4	230	break
paul@0	231
paul@4	232	# Otherwise, keep looking for the end of the region.
paul@4	233
paul@4	234	else:
paul@8	235	pos = match.end()
paul@4	236
paul@4	237	# Where no more input can provide the targets, return a special result.
paul@0	238
paul@4	239	else:
paul@8	240	self.start = length
paul@8	241	return text[start:], None
paul@0	242
paul@8	243	self.start = match.end()
paul@8	244	return text[start:first_pos], first
paul@0	245
paul@0	246	class StreamParser:
paul@0	247
paul@0	248	"A stream parser for content in vCard/vCalendar/iCalendar-like formats."
paul@0	249
paul@0	250	def __init__(self, f):
paul@0	251
paul@0	252	"Initialise the parser for the given file 'f'."
paul@0	253
paul@0	254	self.f = f
paul@0	255
paul@9	256	def close(self):
paul@9	257
paul@9	258	"Close the reader."
paul@9	259
paul@9	260	self.f.close()
paul@9	261
paul@0	262	def __iter__(self):
paul@0	263
paul@0	264	"Return self as the iterator."
paul@0	265
paul@0	266	return self
paul@0	267
paul@0	268	def next(self):
paul@0	269
paul@0	270	"""
paul@0	271	Return the next content item in the file as a tuple of the form
paul@0	272	(name, parameters, values).
paul@0	273	"""
paul@0	274
paul@0	275	return self.parse_content_line()
paul@0	276
paul@7	277	def decode_content(self, value):
paul@7	278
paul@7	279	"Decode the given 'value', replacing quoted characters."
paul@7	280
paul@7	281	return value.replace("\r", "").replace("\\N", "\n").replace("\\n", "\n")
paul@7	282
paul@5	283	# Internal methods.
paul@5	284
paul@0	285	def parse_content_line(self):
paul@0	286
paul@0	287	"""
paul@7	288	Return the name, parameters and value information for the current
paul@7	289	content line in the file being parsed.
paul@0	290	"""
paul@0	291
paul@0	292	f = self.f
paul@8	293	line_number = f.line_number
paul@8	294	line = f.get_content_line()
paul@0	295
paul@8	296	# Read the property name.
paul@0	297
paul@8	298	name, sep = line.search(line.SEPARATORS)
paul@0	299	name = name.strip()
paul@0	300
paul@0	301	if not name and sep is None:
paul@0	302	raise StopIteration
paul@0	303
paul@8	304	# Read the parameters.
paul@8	305
paul@8	306	parameters = {}
paul@8	307
paul@0	308	while sep == ";":
paul@0	309
paul@0	310	# Find the actual modifier.
paul@0	311
paul@8	312	parameter_name, sep = line.search(line.SEPARATORS_PLUS_EQUALS)
paul@0	313	parameter_name = parameter_name.strip()
paul@0	314
paul@0	315	if sep == "=":
paul@8	316	parameter_value, sep = line.search(line.SEPARATORS)
paul@0	317	parameter_value = parameter_value.strip()
paul@0	318	else:
paul@0	319	parameter_value = None
paul@0	320
paul@0	321	# Append a key, value tuple to the parameters list.
paul@0	322
paul@0	323	parameters[parameter_name] = parameter_value
paul@0	324
paul@0	325	# Get the value content.
paul@0	326
paul@0	327	if sep != ":":
paul@30	328	raise ValueError, (line_number, line)
paul@0	329
paul@8	330	# Obtain and decode the value.
paul@0	331
paul@8	332	value = self.decode(name, parameters, line.get_remaining())
paul@0	333
paul@0	334	return name, parameters, value
paul@0	335
paul@7	336	def decode(self, name, parameters, value):
paul@1	337
paul@7	338	"Decode using 'name' and 'parameters' the given 'value'."
paul@0	339
paul@1	340	encoding = parameters.get("ENCODING")
paul@1	341	charset = parameters.get("CHARSET")
paul@0	342
paul@7	343	value = self.decode_content(value)
paul@0	344
paul@0	345	if encoding == "QUOTED-PRINTABLE":
paul@1	346	return unicode(quopri.decodestring(value), charset or "iso-8859-1")
paul@0	347	elif encoding == "BASE64":
paul@0	348	return base64.decodestring(value)
paul@0	349	else:
paul@1	350	return value
paul@0	351
paul@2	352	class ParserBase:
paul@0	353
paul@2	354	"An abstract parser for content in vCard/vCalendar/iCalendar-like formats."
paul@0	355
paul@0	356	def __init__(self):
paul@0	357
paul@0	358	"Initialise the parser."
paul@0	359
paul@2	360	self.names = []
paul@0	361
paul@5	362	def parse(self, f, parser_cls=None):
paul@0	363
paul@0	364	"Parse the contents of the file 'f'."
paul@0	365
paul@5	366	parser = (parser_cls or StreamParser)(f)
paul@0	367
paul@0	368	for name, parameters, value in parser:
paul@0	369
paul@0	370	if name == "BEGIN":
paul@2	371	self.names.append(value)
paul@3	372	self.startComponent(value, parameters)
paul@0	373
paul@0	374	elif name == "END":
paul@2	375	start_name = self.names.pop()
paul@2	376	if start_name != value:
paul@0	377	raise ParseError, "Mismatch in BEGIN and END declarations (%r and %r) at line %d." % (
paul@2	378	start_name, value, f.line_number)
paul@2	379
paul@3	380	self.endComponent(value)
paul@0	381
paul@0	382	else:
paul@3	383	self.handleProperty(name, parameters, value)
paul@2	384
paul@2	385	class Parser(ParserBase):
paul@2	386
paul@2	387	"A SAX-like parser for vCard/vCalendar/iCalendar-like formats."
paul@2	388
paul@2	389	def __init__(self):
paul@2	390	ParserBase.__init__(self)
paul@3	391	self.components = []
paul@2	392
paul@3	393	def startComponent(self, name, parameters):
paul@2	394
paul@2	395	"""
paul@3	396	Add the component with the given 'name' and 'parameters', recording an
paul@3	397	empty list of children as part of the component's content.
paul@2	398	"""
paul@2	399
paul@12	400	component = self.handleProperty(name, parameters)
paul@3	401	self.components.append(component)
paul@3	402	return component
paul@2	403
paul@3	404	def endComponent(self, name):
paul@2	405
paul@2	406	"""
paul@3	407	End the component with the given 'name' by removing it from the active
paul@12	408	component stack. If only one component exists on the stack, retain it
paul@12	409	for later inspection.
paul@2	410	"""
paul@2	411
paul@3	412	if len(self.components) > 1:
paul@3	413	return self.components.pop()
paul@12	414
paul@12	415	# Or return the only element.
paul@12	416
paul@3	417	elif self.components:
paul@12	418	return self.components[0]
paul@2	419
paul@12	420	def handleProperty(self, name, parameters, value=None):
paul@0	421
paul@2	422	"""
paul@12	423	Record the property with the given 'name', 'parameters' and optional
paul@12	424	'value' as part of the current component's children.
paul@2	425	"""
paul@2	426
paul@2	427	component = self.makeComponent(name, parameters, value)
paul@2	428	self.attachComponent(component)
paul@2	429	return component
paul@2	430
paul@2	431	# Component object construction/manipulation methods.
paul@2	432
paul@2	433	def attachComponent(self, component):
paul@2	434
paul@2	435	"Attach the given 'component' to its parent."
paul@2	436
paul@3	437	if self.components:
paul@3	438	component_name, component_parameters, component_children = self.components[-1]
paul@3	439	component_children.append(component)
paul@2	440
paul@12	441	def makeComponent(self, name, parameters, value=None):
paul@2	442
paul@2	443	"""
paul@12	444	Make a component object from the given 'name', 'parameters' and optional
paul@12	445	'value'.
paul@2	446	"""
paul@2	447
paul@12	448	return (name, parameters, value or [])
paul@2	449
paul@2	450	# Public methods.
paul@2	451
paul@5	452	def parse(self, f, parser_cls=None):
paul@2	453
paul@2	454	"Parse the contents of the file 'f'."
paul@2	455
paul@5	456	ParserBase.parse(self, f, parser_cls)
paul@3	457	return self.components[0]
paul@0	458
paul@7	459	# Writer classes.
paul@7	460
paul@8	461	class Writer:
paul@8	462
paul@8	463	"A simple class wrapping a file, providing simple output capabilities."
paul@8	464
paul@8	465	default_line_length = 76
paul@8	466
paul@21	467	def __init__(self, write, line_length=None):
paul@8	468
paul@8	469	"""
paul@21	470	Initialise the object with the given 'write' operation. If 'line_length'
paul@21	471	is set, the length of written lines will conform to the specified value
paul@21	472	instead of the default value.
paul@8	473	"""
paul@8	474
paul@21	475	self._write = write
paul@8	476	self.line_length = line_length or self.default_line_length
paul@8	477	self.char_offset = 0
paul@8	478
paul@8	479	def write(self, text):
paul@8	480
paul@8	481	"Write the 'text' to the file."
paul@8	482
paul@21	483	write = self._write
paul@8	484	line_length = self.line_length
paul@8	485
paul@8	486	i = 0
paul@8	487	remaining = len(text)
paul@8	488
paul@8	489	while remaining:
paul@8	490	space = line_length - self.char_offset
paul@8	491	if remaining > space:
paul@21	492	write(text[i:i + space])
paul@21	493	write("\r\n ")
paul@8	494	self.char_offset = 1
paul@8	495	i += space
paul@8	496	remaining -= space
paul@8	497	else:
paul@21	498	write(text[i:])
paul@8	499	self.char_offset += remaining
paul@8	500	i += remaining
paul@8	501	remaining = 0
paul@8	502
paul@8	503	def end_line(self):
paul@8	504
paul@8	505	"End the current content line."
paul@8	506
paul@8	507	if self.char_offset > 0:
paul@8	508	self.char_offset = 0
paul@21	509	self._write("\r\n")
paul@8	510
paul@7	511	class StreamWriter:
paul@7	512
paul@7	513	"A stream writer for content in vCard/vCalendar/iCalendar-like formats."
paul@7	514
paul@8	515	def __init__(self, f):
paul@7	516
paul@21	517	"Initialise the stream writer with the given 'f' stream object."
paul@7	518
paul@7	519	self.f = f
paul@7	520
paul@37	521	def append(self, record):
paul@37	522	self.write(*record)
paul@37	523
paul@11	524	def write(self, name, parameters, value):
paul@7	525
paul@7	526	"""
paul@11	527	Write a content line, serialising the given 'name', 'parameters' and
paul@11	528	'value' information.
paul@11	529	"""
paul@11	530
paul@11	531	self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value))
paul@11	532
paul@11	533	# Internal methods.
paul@11	534
paul@11	535	def write_content_line(self, name, encoded_parameters, encoded_value):
paul@11	536
paul@11	537	"""
paul@11	538	Write a content line for the given 'name', 'encoded_parameters' and
paul@11	539	'encoded_value' information.
paul@7	540	"""
paul@7	541
paul@7	542	f = self.f
paul@7	543
paul@7	544	f.write(name)
paul@11	545	for param_name, param_value in encoded_parameters.items():
paul@8	546	f.write(";")
paul@11	547	f.write(param_name)
paul@8	548	f.write("=")
paul@11	549	f.write(param_value)
paul@7	550	f.write(":")
paul@11	551	f.write(encoded_value)
paul@8	552	f.end_line()
paul@7	553
paul@11	554	def encode_quoted_parameter_value(self, value):
paul@7	555
paul@11	556	"Encode the given 'value'."
paul@7	557
paul@11	558	return '"%s"' % value
paul@7	559
paul@11	560	def encode_value(self, name, parameters, value):
paul@7	561
paul@11	562	"""
paul@11	563	Encode using 'name' and 'parameters' the given 'value' so that the
paul@11	564	resulting encoded form employs any specified character encodings.
paul@11	565	"""
paul@7	566
paul@7	567	encoding = parameters.get("ENCODING")
paul@7	568	charset = parameters.get("CHARSET")
paul@7	569
paul@7	570	if encoding == "QUOTED-PRINTABLE":
paul@7	571	value = quopri.encodestring(value.encode(charset or "iso-8859-1"))
paul@7	572	elif encoding == "BASE64":
paul@7	573	value = base64.encodestring(value)
paul@7	574
paul@7	575	return self.encode_content(value)
paul@7	576
paul@11	577	# Overrideable methods.
paul@11	578
paul@11	579	def encode_parameters(self, parameters):
paul@11	580
paul@11	581	"""
paul@11	582	Encode the given 'parameters' according to the vCalendar specification.
paul@11	583	"""
paul@11	584
paul@11	585	encoded_parameters = {}
paul@11	586
paul@11	587	for param_name, param_value in parameters.items():
paul@11	588
paul@11	589	# Basic format support merely involves quoting values which seem to
paul@11	590	# need it. Other more specific formats may define exactly which
paul@11	591	# parameters should be quoted.
paul@11	592
paul@11	593	if ContentLine.SEPARATORS.search(param_value):
paul@11	594	param_value = self.encode_quoted_parameter_value(param_value)
paul@11	595
paul@11	596	encoded_parameters[param_name] = param_value
paul@11	597
paul@11	598	return encoded_parameters
paul@11	599
paul@11	600	def encode_content(self, value):
paul@11	601
paul@11	602	"Encode the given 'value', quoting characters."
paul@11	603
paul@11	604	return value.replace("\n", "\\n")
paul@11	605
paul@9	606	# Utility functions.
paul@9	607
paul@9	608	def is_input_stream(stream_or_string):
paul@9	609	return hasattr(stream_or_string, "read")
paul@9	610
paul@11	611	def get_input_stream(stream_or_string, encoding=None):
paul@9	612	if is_input_stream(stream_or_string):
paul@9	613	return stream_or_string
paul@9	614	else:
paul@11	615	return codecs.open(stream_or_string, encoding=(encoding or default_encoding))
paul@9	616
paul@11	617	def get_output_stream(stream_or_string, encoding=None):
paul@9	618	if hasattr(stream_or_string, "write"):
paul@9	619	return stream_or_string
paul@9	620	else:
paul@11	621	return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding))
paul@9	622
paul@0	623	# Public functions.
paul@0	624
paul@11	625	def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
paul@0	626
paul@0	627	"""
paul@9	628	Parse the resource data found through the use of the 'stream_or_string',
paul@9	629	which is either a stream providing Unicode data (the codecs module can be
paul@9	630	used to open files or to wrap streams in order to provide Unicode data) or a
paul@9	631	filename identifying a file to be parsed.
paul@0	632
paul@11	633	The optional 'encoding' can be used to specify the character encoding used
paul@11	634	by the file to be parsed.
paul@11	635
paul@0	636	The optional 'non_standard_newline' can be set to a true value (unlike the
paul@0	637	default) in order to attempt to process files with CR as the end of line
paul@0	638	character.
paul@0	639
paul@0	640	As a result of parsing the resource, the root node of the imported resource
paul@0	641	is returned.
paul@0	642	"""
paul@0	643
paul@11	644	stream = get_input_stream(stream_or_string, encoding)
paul@9	645	reader = Reader(stream, non_standard_newline)
paul@9	646
paul@9	647	# Parse using the reader.
paul@0	648
paul@9	649	try:
paul@9	650	parser = (parser_cls or Parser)()
paul@9	651	return parser.parse(reader)
paul@9	652
paul@9	653	# Close any opened streams.
paul@9	654
paul@9	655	finally:
paul@9	656	if not is_input_stream(stream_or_string):
paul@9	657	reader.close()
paul@9	658
paul@11	659	def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
paul@5	660
paul@5	661	"""
paul@9	662	Parse the resource data found through the use of the 'stream_or_string',
paul@9	663	which is either a stream providing Unicode data (the codecs module can be
paul@9	664	used to open files or to wrap streams in order to provide Unicode data) or a
paul@9	665	filename identifying a file to be parsed.
paul@5	666
paul@11	667	The optional 'encoding' can be used to specify the character encoding used
paul@11	668	by the file to be parsed.
paul@11	669
paul@5	670	The optional 'non_standard_newline' can be set to a true value (unlike the
paul@5	671	default) in order to attempt to process files with CR as the end of line
paul@5	672	character.
paul@5	673
paul@5	674	An iterator is returned which provides event tuples describing parsing
paul@5	675	events of the form (name, parameters, value).
paul@5	676	"""
paul@5	677
paul@11	678	stream = get_input_stream(stream_or_string, encoding)
paul@9	679	reader = Reader(stream, non_standard_newline)
paul@5	680	parser = (parser_cls or StreamParser)(reader)
paul@9	681	return parser
paul@5	682
paul@21	683	def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None, writer_cls=None):
paul@11	684
paul@11	685	"""
paul@21	686	Return a writer which will either send data to the resource found through
paul@21	687	the use of 'stream_or_string' or using the given 'write' operation.
paul@21	688
paul@21	689	The 'stream_or_string' parameter may be either a stream accepting Unicode
paul@21	690	data (the codecs module can be used to open files or to wrap streams in
paul@21	691	order to accept Unicode data) or a filename identifying a file to be
paul@21	692	written.
paul@11	693
paul@11	694	The optional 'encoding' can be used to specify the character encoding used
paul@11	695	by the file to be written.
paul@11	696
paul@11	697	The optional 'line_length' can be used to specify how long lines should be
paul@11	698	in the resulting data.
paul@11	699	"""
paul@11	700
paul@21	701	if stream_or_string:
paul@21	702	stream = get_output_stream(stream_or_string, encoding)
paul@21	703	_writer = Writer(stream.write, line_length)
paul@21	704	elif write:
paul@21	705	_writer = Writer(write, line_length)
paul@21	706	else:
paul@21	707	raise IOError, "No stream, filename or write operation specified."
paul@21	708
paul@21	709	return (writer_cls or StreamWriter)(_writer)
paul@8	710
paul@0	711	# vim: tabstop=4 expandtab shiftwidth=4