paul@18 | 1 | # -*- coding: iso-8859-1 -*- |
paul@18 | 2 | """ |
paul@18 | 3 | MoinMoin - MoinRemoteSupport library |
paul@18 | 4 | |
paul@111 | 5 | @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> |
paul@18 | 6 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@18 | 7 | """ |
paul@18 | 8 | |
paul@52 | 9 | from ContentTypeSupport import getContentTypeAndEncoding |
paul@18 | 10 | from MoinMoin.action import cache |
paul@113 | 11 | from MoinMoin import caching, log |
paul@113 | 12 | from email.parser import Parser |
paul@113 | 13 | from email.mime.multipart import MIMEMultipart |
paul@113 | 14 | from urllib import splithost, splitpasswd, splitport, splituser, unquote_plus |
paul@113 | 15 | from urlparse import urlsplit |
paul@21 | 16 | import urllib2, time |
paul@113 | 17 | import imaplib |
paul@113 | 18 | |
paul@113 | 19 | logging = log.getLogger(__name__) |
paul@18 | 20 | |
paul@111 | 21 | def getCachedResource(request, url, arena, scope, max_cache_age, reader=None): |
paul@18 | 22 | |
paul@18 | 23 | """ |
paul@18 | 24 | Using the given 'request', return the resource data for the given 'url', |
paul@18 | 25 | accessing a cache entry with the given 'arena' and 'scope' where the data |
paul@18 | 26 | has already been downloaded. The 'max_cache_age' indicates the length in |
paul@18 | 27 | seconds that a cache entry remains valid. |
paul@20 | 28 | |
paul@111 | 29 | If the optional 'reader' object is given, it will be used to access the |
paul@111 | 30 | 'url' and write the downloaded data to a cache entry. Otherwise, a standard |
paul@111 | 31 | URL reader will be used. |
paul@111 | 32 | |
paul@20 | 33 | If the resource cannot be downloaded and cached, None is returned. |
paul@22 | 34 | Otherwise, the form of the data is as follows: |
paul@22 | 35 | |
paul@49 | 36 | url <newline> |
paul@49 | 37 | [ content-type-header ] <newline> |
paul@49 | 38 | [ other-header <newline> ]* |
paul@49 | 39 | <newline> |
paul@49 | 40 | content-body |
paul@18 | 41 | """ |
paul@18 | 42 | |
paul@111 | 43 | reader = reader or urlreader |
paul@111 | 44 | |
paul@18 | 45 | # See if the URL is cached. |
paul@18 | 46 | |
paul@18 | 47 | cache_key = cache.key(request, content=url) |
paul@18 | 48 | cache_entry = caching.CacheEntry(request, arena, cache_key, scope=scope) |
paul@18 | 49 | |
paul@18 | 50 | # If no entry exists, or if the entry is older than the specified age, |
paul@18 | 51 | # create one with the response from the URL. |
paul@18 | 52 | |
paul@18 | 53 | now = time.time() |
paul@18 | 54 | mtime = cache_entry.mtime() |
paul@18 | 55 | |
paul@18 | 56 | # NOTE: The URL could be checked and the 'If-Modified-Since' header |
paul@18 | 57 | # NOTE: (see MoinMoin.action.pollsistersites) could be checked. |
paul@18 | 58 | |
paul@114 | 59 | if not cache_entry.exists() or cache_entry.size() == 0 or now - mtime >= max_cache_age: |
paul@18 | 60 | |
paul@18 | 61 | # Access the remote data source. |
paul@18 | 62 | |
paul@18 | 63 | cache_entry.open(mode="w") |
paul@18 | 64 | |
paul@18 | 65 | try: |
paul@113 | 66 | try: |
paul@113 | 67 | # Read from the source and write to the cache. |
paul@111 | 68 | |
paul@113 | 69 | reader(url, cache_entry) |
paul@113 | 70 | |
paul@113 | 71 | # In case of an exception, return None. |
paul@18 | 72 | |
paul@113 | 73 | except IOError: |
paul@113 | 74 | if cache_entry.exists(): |
paul@113 | 75 | cache_entry.remove() |
paul@113 | 76 | return None |
paul@18 | 77 | |
paul@113 | 78 | finally: |
paul@113 | 79 | cache_entry.close() |
paul@18 | 80 | |
paul@18 | 81 | # Open the cache entry and read it. |
paul@18 | 82 | |
paul@18 | 83 | cache_entry.open() |
paul@18 | 84 | try: |
paul@18 | 85 | return cache_entry.read() |
paul@18 | 86 | finally: |
paul@18 | 87 | cache_entry.close() |
paul@18 | 88 | |
paul@111 | 89 | def urlreader(url, cache_entry): |
paul@111 | 90 | |
paul@111 | 91 | "Retrieve data from the given 'url', writing it to the 'cache_entry'." |
paul@111 | 92 | |
paul@111 | 93 | f = urllib2.urlopen(url) |
paul@111 | 94 | try: |
paul@113 | 95 | writeCacheHeaders(url, f.headers, cache_entry) |
paul@111 | 96 | cache_entry.write(f.read()) |
paul@111 | 97 | finally: |
paul@111 | 98 | f.close() |
paul@111 | 99 | |
paul@113 | 100 | def imapreader(url, cache_entry): |
paul@113 | 101 | |
paul@113 | 102 | """ |
paul@113 | 103 | Retrieve data associated with the given 'url' using the IMAP protocol |
paul@113 | 104 | specifically, writing it to the 'cache_entry'. |
paul@113 | 105 | """ |
paul@113 | 106 | |
paul@113 | 107 | # NOTE: Should use something like pykolab.imap_utf7.encode here. |
paul@113 | 108 | |
paul@113 | 109 | enc = lambda s: s.encode("utf-7") |
paul@113 | 110 | |
paul@113 | 111 | # The URL maps to credentials and folder details. |
paul@113 | 112 | |
paul@113 | 113 | scheme, netloc, path, query, fragment = urlsplit(url) |
paul@113 | 114 | credentials, location = splituser(netloc) |
paul@113 | 115 | username, password = map(unquote_plus, splitpasswd(credentials)) |
paul@113 | 116 | host, port = splitport(location) |
paul@113 | 117 | folders = map(unquote_plus, path.split("/")[1:]) |
paul@113 | 118 | |
paul@113 | 119 | # Connect and log in to the IMAP server. |
paul@113 | 120 | |
paul@113 | 121 | cls = scheme == "imaps" and imaplib.IMAP4_SSL or imaplib.IMAP4 |
paul@113 | 122 | |
paul@113 | 123 | if port is None: |
paul@113 | 124 | i = cls(host) |
paul@113 | 125 | else: |
paul@113 | 126 | i = cls(host, int(port)) |
paul@113 | 127 | |
paul@113 | 128 | i.login(username, password) |
paul@113 | 129 | |
paul@113 | 130 | try: |
paul@113 | 131 | # Descend to the desired folder. |
paul@113 | 132 | |
paul@113 | 133 | for folder in folders: |
paul@113 | 134 | code, response = i.select(enc(folder), readonly=True) |
paul@113 | 135 | if code != "OK": |
paul@113 | 136 | logging.warning("Could not enter folder: %s" % folder) |
paul@113 | 137 | raise IOError |
paul@113 | 138 | |
paul@113 | 139 | # Search for all messages. |
paul@113 | 140 | # NOTE: This could also be parameterised. |
paul@113 | 141 | |
paul@113 | 142 | code, response = i.search(None, "(ALL)") |
paul@113 | 143 | |
paul@113 | 144 | if code != "OK": |
paul@113 | 145 | logging.warning("Could not enter folder: %s" % folder) |
paul@113 | 146 | raise IOError |
paul@113 | 147 | |
paul@113 | 148 | # For each result, obtain the full message, but embed it in a larger |
paul@113 | 149 | # multipart message. |
paul@113 | 150 | |
paul@113 | 151 | message = MIMEMultipart() |
paul@113 | 152 | |
paul@113 | 153 | writeCacheHeaders(url, message, cache_entry) |
paul@113 | 154 | |
paul@113 | 155 | numbers = response and response[0].split(" ") or [] |
paul@113 | 156 | |
paul@113 | 157 | for n in numbers: |
paul@113 | 158 | code, response = i.fetch(n, "(RFC822.PEEK)") |
paul@113 | 159 | |
paul@113 | 160 | if code == "OK" and response: |
paul@113 | 161 | |
paul@113 | 162 | # Write the message payload into the cache entry for later |
paul@113 | 163 | # processing. |
paul@113 | 164 | |
paul@113 | 165 | for data in response: |
paul@113 | 166 | try: |
paul@113 | 167 | envelope, body = data |
paul@113 | 168 | message.attach(Parser().parsestr(body)) |
paul@113 | 169 | except ValueError: |
paul@113 | 170 | pass |
paul@113 | 171 | else: |
paul@113 | 172 | logging.warning("Could not obtain message %d from folder %s" % (n, folder)) |
paul@113 | 173 | |
paul@113 | 174 | cache_entry.write(message.as_string()) |
paul@113 | 175 | |
paul@113 | 176 | finally: |
paul@113 | 177 | i.logout() |
paul@113 | 178 | del i |
paul@113 | 179 | |
paul@113 | 180 | def writeCacheHeaders(url, headers, cache_entry): |
paul@113 | 181 | |
paul@113 | 182 | """ |
paul@113 | 183 | For the given 'url', write it and the given 'headers' to the given |
paul@113 | 184 | 'cache_entry'. |
paul@113 | 185 | """ |
paul@113 | 186 | |
paul@113 | 187 | cache_entry.write(url + "\n") |
paul@113 | 188 | cache_entry.write((headers.get("content-type") or "") + "\n") |
paul@113 | 189 | for key, value in headers.items(): |
paul@113 | 190 | if key.lower() != "content-type": |
paul@113 | 191 | cache_entry.write("%s: %s\n" % (key, value)) |
paul@113 | 192 | cache_entry.write("\n") |
paul@113 | 193 | |
paul@52 | 194 | def getCachedResourceMetadata(f): |
paul@52 | 195 | |
paul@52 | 196 | "Return a metadata dictionary for the given resource file-like object 'f'." |
paul@52 | 197 | |
paul@52 | 198 | url = f.readline() |
paul@52 | 199 | content_type, encoding = getContentTypeAndEncoding(f.readline()) |
paul@52 | 200 | |
paul@52 | 201 | metadata = {} |
paul@52 | 202 | line = f.readline() |
paul@52 | 203 | |
paul@52 | 204 | while line.strip(): |
paul@52 | 205 | key, value = [v.strip() for v in line.split(":", 1)] |
paul@52 | 206 | metadata[key] = value |
paul@52 | 207 | line = f.readline() |
paul@52 | 208 | |
paul@52 | 209 | return url, content_type, encoding, metadata |
paul@52 | 210 | |
paul@18 | 211 | # vim: tabstop=4 expandtab shiftwidth=4 |