MoinSupport (annotate MoinRemoteSupport.py in 687ce88f8005)

MoinSupport

Annotated MoinRemoteSupport.py

114:687ce88f8005

2014-01-31

Paul Boddie

Fetch a resource if its cache entry is of zero size, for whatever reason.

paul@18	1	# -- coding: iso-8859-1 --
paul@18	2	"""
paul@18	3	MoinMoin - MoinRemoteSupport library
paul@18	4
paul@111	5	@copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk>
paul@18	6	@license: GNU GPL (v2 or later), see COPYING.txt for details.
paul@18	7	"""
paul@18	8
paul@52	9	from ContentTypeSupport import getContentTypeAndEncoding
paul@18	10	from MoinMoin.action import cache
paul@113	11	from MoinMoin import caching, log
paul@113	12	from email.parser import Parser
paul@113	13	from email.mime.multipart import MIMEMultipart
paul@113	14	from urllib import splithost, splitpasswd, splitport, splituser, unquote_plus
paul@113	15	from urlparse import urlsplit
paul@21	16	import urllib2, time
paul@113	17	import imaplib
paul@113	18
paul@113	19	logging = log.getLogger(__name__)
paul@18	20
paul@111	21	def getCachedResource(request, url, arena, scope, max_cache_age, reader=None):
paul@18	22
paul@18	23	"""
paul@18	24	Using the given 'request', return the resource data for the given 'url',
paul@18	25	accessing a cache entry with the given 'arena' and 'scope' where the data
paul@18	26	has already been downloaded. The 'max_cache_age' indicates the length in
paul@18	27	seconds that a cache entry remains valid.
paul@20	28
paul@111	29	If the optional 'reader' object is given, it will be used to access the
paul@111	30	'url' and write the downloaded data to a cache entry. Otherwise, a standard
paul@111	31	URL reader will be used.
paul@111	32
paul@20	33	If the resource cannot be downloaded and cached, None is returned.
paul@22	34	Otherwise, the form of the data is as follows:
paul@22	35
paul@49	36	url <newline>
paul@49	37	[ content-type-header ] <newline>
paul@49	38	[ other-header <newline> ]*
paul@49	39	<newline>
paul@49	40	content-body
paul@18	41	"""
paul@18	42
paul@111	43	reader = reader or urlreader
paul@111	44
paul@18	45	# See if the URL is cached.
paul@18	46
paul@18	47	cache_key = cache.key(request, content=url)
paul@18	48	cache_entry = caching.CacheEntry(request, arena, cache_key, scope=scope)
paul@18	49
paul@18	50	# If no entry exists, or if the entry is older than the specified age,
paul@18	51	# create one with the response from the URL.
paul@18	52
paul@18	53	now = time.time()
paul@18	54	mtime = cache_entry.mtime()
paul@18	55
paul@18	56	# NOTE: The URL could be checked and the 'If-Modified-Since' header
paul@18	57	# NOTE: (see MoinMoin.action.pollsistersites) could be checked.
paul@18	58
paul@114	59	if not cache_entry.exists() or cache_entry.size() == 0 or now - mtime >= max_cache_age:
paul@18	60
paul@18	61	# Access the remote data source.
paul@18	62
paul@18	63	cache_entry.open(mode="w")
paul@18	64
paul@18	65	try:
paul@113	66	try:
paul@113	67	# Read from the source and write to the cache.
paul@111	68
paul@113	69	reader(url, cache_entry)
paul@113	70
paul@113	71	# In case of an exception, return None.
paul@18	72
paul@113	73	except IOError:
paul@113	74	if cache_entry.exists():
paul@113	75	cache_entry.remove()
paul@113	76	return None
paul@18	77
paul@113	78	finally:
paul@113	79	cache_entry.close()
paul@18	80
paul@18	81	# Open the cache entry and read it.
paul@18	82
paul@18	83	cache_entry.open()
paul@18	84	try:
paul@18	85	return cache_entry.read()
paul@18	86	finally:
paul@18	87	cache_entry.close()
paul@18	88
paul@111	89	def urlreader(url, cache_entry):
paul@111	90
paul@111	91	"Retrieve data from the given 'url', writing it to the 'cache_entry'."
paul@111	92
paul@111	93	f = urllib2.urlopen(url)
paul@111	94	try:
paul@113	95	writeCacheHeaders(url, f.headers, cache_entry)
paul@111	96	cache_entry.write(f.read())
paul@111	97	finally:
paul@111	98	f.close()
paul@111	99
paul@113	100	def imapreader(url, cache_entry):
paul@113	101
paul@113	102	"""
paul@113	103	Retrieve data associated with the given 'url' using the IMAP protocol
paul@113	104	specifically, writing it to the 'cache_entry'.
paul@113	105	"""
paul@113	106
paul@113	107	# NOTE: Should use something like pykolab.imap_utf7.encode here.
paul@113	108
paul@113	109	enc = lambda s: s.encode("utf-7")
paul@113	110
paul@113	111	# The URL maps to credentials and folder details.
paul@113	112
paul@113	113	scheme, netloc, path, query, fragment = urlsplit(url)
paul@113	114	credentials, location = splituser(netloc)
paul@113	115	username, password = map(unquote_plus, splitpasswd(credentials))
paul@113	116	host, port = splitport(location)
paul@113	117	folders = map(unquote_plus, path.split("/")[1:])
paul@113	118
paul@113	119	# Connect and log in to the IMAP server.
paul@113	120
paul@113	121	cls = scheme == "imaps" and imaplib.IMAP4_SSL or imaplib.IMAP4
paul@113	122
paul@113	123	if port is None:
paul@113	124	i = cls(host)
paul@113	125	else:
paul@113	126	i = cls(host, int(port))
paul@113	127
paul@113	128	i.login(username, password)
paul@113	129
paul@113	130	try:
paul@113	131	# Descend to the desired folder.
paul@113	132
paul@113	133	for folder in folders:
paul@113	134	code, response = i.select(enc(folder), readonly=True)
paul@113	135	if code != "OK":
paul@113	136	logging.warning("Could not enter folder: %s" % folder)
paul@113	137	raise IOError
paul@113	138
paul@113	139	# Search for all messages.
paul@113	140	# NOTE: This could also be parameterised.
paul@113	141
paul@113	142	code, response = i.search(None, "(ALL)")
paul@113	143
paul@113	144	if code != "OK":
paul@113	145	logging.warning("Could not enter folder: %s" % folder)
paul@113	146	raise IOError
paul@113	147
paul@113	148	# For each result, obtain the full message, but embed it in a larger
paul@113	149	# multipart message.
paul@113	150
paul@113	151	message = MIMEMultipart()
paul@113	152
paul@113	153	writeCacheHeaders(url, message, cache_entry)
paul@113	154
paul@113	155	numbers = response and response[0].split(" ") or []
paul@113	156
paul@113	157	for n in numbers:
paul@113	158	code, response = i.fetch(n, "(RFC822.PEEK)")
paul@113	159
paul@113	160	if code == "OK" and response:
paul@113	161
paul@113	162	# Write the message payload into the cache entry for later
paul@113	163	# processing.
paul@113	164
paul@113	165	for data in response:
paul@113	166	try:
paul@113	167	envelope, body = data
paul@113	168	message.attach(Parser().parsestr(body))
paul@113	169	except ValueError:
paul@113	170	pass
paul@113	171	else:
paul@113	172	logging.warning("Could not obtain message %d from folder %s" % (n, folder))
paul@113	173
paul@113	174	cache_entry.write(message.as_string())
paul@113	175
paul@113	176	finally:
paul@113	177	i.logout()
paul@113	178	del i
paul@113	179
paul@113	180	def writeCacheHeaders(url, headers, cache_entry):
paul@113	181
paul@113	182	"""
paul@113	183	For the given 'url', write it and the given 'headers' to the given
paul@113	184	'cache_entry'.
paul@113	185	"""
paul@113	186
paul@113	187	cache_entry.write(url + "\n")
paul@113	188	cache_entry.write((headers.get("content-type") or "") + "\n")
paul@113	189	for key, value in headers.items():
paul@113	190	if key.lower() != "content-type":
paul@113	191	cache_entry.write("%s: %s\n" % (key, value))
paul@113	192	cache_entry.write("\n")
paul@113	193
paul@52	194	def getCachedResourceMetadata(f):
paul@52	195
paul@52	196	"Return a metadata dictionary for the given resource file-like object 'f'."
paul@52	197
paul@52	198	url = f.readline()
paul@52	199	content_type, encoding = getContentTypeAndEncoding(f.readline())
paul@52	200
paul@52	201	metadata = {}
paul@52	202	line = f.readline()
paul@52	203
paul@52	204	while line.strip():
paul@52	205	key, value = [v.strip() for v in line.split(":", 1)]
paul@52	206	metadata[key] = value
paul@52	207	line = f.readline()
paul@52	208
paul@52	209	return url, content_type, encoding, metadata
paul@52	210
paul@18	211	# vim: tabstop=4 expandtab shiftwidth=4