1.1 --- a/EventAggregatorSupport.py Sat Nov 12 00:53:01 2011 +0100
1.2 +++ b/EventAggregatorSupport.py Fri Nov 25 00:04:53 2011 +0100
1.3 @@ -20,7 +20,7 @@
1.4 import re
1.5 import bisect
1.6 import operator
1.7 -import urllib
1.8 +import urllib, urllib2
1.9
1.10 try:
1.11 from cStringIO import StringIO
1.12 @@ -103,7 +103,7 @@
1.13
1.14 # Content type parsing.
1.15
1.16 -encoding_regexp_str = ur'charset=(?P<encoding>[-A-Za-z0-9]+)'
1.17 +encoding_regexp_str = ur'(?P<content_type>[^\s;]*)(?:;\s*charset=(?P<encoding>[-A-Za-z0-9]+))?'
1.18 encoding_regexp = re.compile(encoding_regexp_str)
1.19
1.20 # Simple content parsing.
1.21 @@ -144,12 +144,12 @@
1.22 else:
1.23 return None
1.24
1.25 -def getContentEncoding(content_type):
1.26 +def getContentTypeAndEncoding(content_type):
1.27 m = encoding_regexp.search(content_type)
1.28 if m:
1.29 - return m.group("encoding")
1.30 + return m.group("content_type"), m.group("encoding")
1.31 else:
1.32 - return None
1.33 + return None, None
1.34
1.35 def int_or_none(x):
1.36 if x is None:
1.37 @@ -1240,7 +1240,9 @@
1.38
1.39 for source in sources:
1.40 try:
1.41 - url, format = sources_dict[source].split()
1.42 + details = sources_dict[source].split()
1.43 + url = details[0]
1.44 + format = (details[1:] or ["ical"])[0]
1.45 except (KeyError, ValueError):
1.46 pass
1.47 else:
1.48 @@ -1257,10 +1259,13 @@
1.49 url = url.replace("{end}", urllib.quote_plus(calendar_end and str(calendar_end) or ""))
1.50
1.51 # Get a parser.
1.52 + # NOTE: This could be done reactively by choosing a parser based on
1.53 + # NOTE: the content type provided by the URL.
1.54
1.55 if format == "ical" and vCalendar is not None:
1.56 parser = vCalendar.parse
1.57 resource_cls = EventCalendar
1.58 + required_content_type = "text/calendar"
1.59 else:
1.60 continue
1.61
1.62 @@ -1283,15 +1288,22 @@
1.63
1.64 # Access the remote data source.
1.65
1.66 - cache_entry.open(mode="w")
1.67 - f = urllib.urlopen(url)
1.68 try:
1.69 - cache_entry.write(url + "\n")
1.70 - cache_entry.write((f.headers.get("content-type") or "") + "\n")
1.71 - cache_entry.write(f.read())
1.72 - finally:
1.73 - cache_entry.close()
1.74 - f.close()
1.75 + f = urllib2.urlopen(url)
1.76 + cache_entry.open(mode="w")
1.77 + try:
1.78 + cache_entry.write(url + "\n")
1.79 + cache_entry.write((f.headers.get("content-type") or "") + "\n")
1.80 + cache_entry.write(f.read())
1.81 + finally:
1.82 + cache_entry.close()
1.83 + f.close()
1.84 +
1.85 + # In case of an exception, just ignore the remote source.
1.86 + # NOTE: This could be reported somewhere.
1.87 +
1.88 + except IOError:
1.89 + continue
1.90
1.91 # Open the cache entry and read it.
1.92
1.93 @@ -1306,7 +1318,16 @@
1.94 f = StringIO(data)
1.95 try:
1.96 url = f.readline()
1.97 - encoding = getContentEncoding(f.readline())
1.98 +
1.99 + # Get the content type and encoding, making sure that the data
1.100 + # can be parsed.
1.101 +
1.102 + content_type, encoding = getContentTypeAndEncoding(f.readline())
1.103 + if content_type != required_content_type:
1.104 + continue
1.105 +
1.106 + # Send the data to the parser.
1.107 +
1.108 uf = codecs.getreader(encoding or "utf-8")(f)
1.109 try:
1.110 resources.append(resource_cls(url, parser(uf)))
2.1 --- a/README.txt Sat Nov 12 00:53:01 2011 +0100
2.2 +++ b/README.txt Fri Nov 25 00:04:53 2011 +0100
2.3 @@ -298,6 +298,12 @@
2.4 time zone information for the correct interpretation of time information in
2.5 those summaries. Thus, it is highly recommended that pytz be installed.
2.6
2.7 +New in EventAggregator 0.8.2 (Changes since EventAggregator 0.8.1)
2.8 +------------------------------------------------------------------
2.9 +
2.10 + * Improved the error handling around remote event source data retrieval,
2.11 + introducing handling of missing resources and unsupported content types.
2.12 +
2.13 New in EventAggregator 0.8.1 (Changes since EventAggregator 0.8)
2.14 ----------------------------------------------------------------
2.15
3.1 --- a/TO_DO.txt Sat Nov 12 00:53:01 2011 +0100
3.2 +++ b/TO_DO.txt Fri Nov 25 00:04:53 2011 +0100
3.3 @@ -113,3 +113,10 @@
3.4 way of avoiding repetition of the same events described in different places is
3.5 for authors to include a UID property identifying each event, using the same
3.6 value regardless of where the event is being published.
3.7 +
3.8 +Remote Source Timeouts
3.9 +----------------------
3.10 +
3.11 +Sometimes, network problems can cause delays in accessing remote sources. The
3.12 +library should support either a timeout mechanism or asynchronous retrieval of
3.13 +remote source data.