paul@36 | 1 | # -*- coding: iso-8859-1 -*- |
paul@36 | 2 | """ |
paul@36 | 3 | MoinMoin - ContentTypeSupport library |
paul@36 | 4 | |
paul@36 | 5 | @copyright: 2012, 2013 by Paul Boddie <paul@boddie.org.uk> |
paul@36 | 6 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@36 | 7 | """ |
paul@36 | 8 | |
paul@36 | 9 | import re |
paul@36 | 10 | |
paul@36 | 11 | # Content type parsing. |
paul@36 | 12 | |
paul@36 | 13 | encoding_regexp_str = ur'(?P<content_type>[^\s;]*)(?:;\s*charset=(?P<encoding>[-A-Za-z0-9]+))?' |
paul@36 | 14 | encoding_regexp = re.compile(encoding_regexp_str) |
paul@36 | 15 | |
paul@36 | 16 | # Accept header parsing. |
paul@36 | 17 | |
paul@36 | 18 | accept_regexp_str = ur';\s*q=' |
paul@36 | 19 | accept_regexp = re.compile(accept_regexp_str) |
paul@36 | 20 | |
paul@36 | 21 | # Content/media type and preferences support. |
paul@36 | 22 | |
paul@36 | 23 | class MediaRange: |
paul@36 | 24 | |
paul@36 | 25 | "A content/media type value which supports whole categories of data." |
paul@36 | 26 | |
paul@36 | 27 | def __init__(self, media_range, accept_parameters=None): |
paul@36 | 28 | self.media_range = media_range |
paul@36 | 29 | self.accept_parameters = accept_parameters or {} |
paul@36 | 30 | |
paul@36 | 31 | parts = media_range.split(";") |
paul@36 | 32 | self.media_type = parts[0] |
paul@36 | 33 | self.parameters = getMappingFromParameterStrings(parts[1:]) |
paul@36 | 34 | |
paul@36 | 35 | # The media type is divided into category and subcategory. |
paul@36 | 36 | |
paul@36 | 37 | parts = self.media_type.split("/") |
paul@36 | 38 | self.category = parts[0] |
paul@36 | 39 | self.subcategory = "/".join(parts[1:]) |
paul@36 | 40 | |
paul@36 | 41 | def get_parts(self): |
paul@36 | 42 | |
paul@36 | 43 | "Return the category, subcategory parts." |
paul@36 | 44 | |
paul@36 | 45 | return self.category, self.subcategory |
paul@36 | 46 | |
paul@36 | 47 | def get_specificity(self): |
paul@36 | 48 | |
paul@36 | 49 | """ |
paul@36 | 50 | Return the specificity of the media type in terms of the scope of the |
paul@36 | 51 | category and subcategory, and also in terms of any qualifying |
paul@36 | 52 | parameters. |
paul@36 | 53 | """ |
paul@36 | 54 | |
paul@36 | 55 | if "*" in self.get_parts(): |
paul@36 | 56 | return -list(self.get_parts()).count("*") |
paul@36 | 57 | else: |
paul@36 | 58 | return len(self.parameters) |
paul@36 | 59 | |
paul@36 | 60 | def permits(self, other): |
paul@36 | 61 | |
paul@36 | 62 | """ |
paul@36 | 63 | Return whether this media type permits the use of the 'other' media type |
paul@36 | 64 | if suggested as suitable content. |
paul@36 | 65 | """ |
paul@36 | 66 | |
paul@36 | 67 | if not isinstance(other, MediaRange): |
paul@36 | 68 | other = MediaRange(other) |
paul@36 | 69 | |
paul@36 | 70 | category = categoryPermits(self.category, other.category) |
paul@36 | 71 | subcategory = categoryPermits(self.subcategory, other.subcategory) |
paul@36 | 72 | |
paul@36 | 73 | if category and subcategory: |
paul@36 | 74 | if "*" not in (category, subcategory): |
paul@36 | 75 | return not self.parameters or self.parameters == other.parameters |
paul@36 | 76 | else: |
paul@36 | 77 | return True |
paul@36 | 78 | else: |
paul@36 | 79 | return False |
paul@36 | 80 | |
paul@36 | 81 | def __eq__(self, other): |
paul@36 | 82 | |
paul@36 | 83 | """ |
paul@36 | 84 | Return whether this media type is effectively the same as the 'other' |
paul@36 | 85 | media type. |
paul@36 | 86 | """ |
paul@36 | 87 | |
paul@36 | 88 | if not isinstance(other, MediaRange): |
paul@36 | 89 | other = MediaRange(other) |
paul@36 | 90 | |
paul@36 | 91 | category = categoryMatches(self.category, other.category) |
paul@36 | 92 | subcategory = categoryMatches(self.subcategory, other.subcategory) |
paul@36 | 93 | |
paul@36 | 94 | if category and subcategory: |
paul@36 | 95 | if "*" not in (category, subcategory): |
paul@36 | 96 | return self.parameters == other.parameters or \ |
paul@36 | 97 | not self.parameters or not other.parameters |
paul@36 | 98 | else: |
paul@36 | 99 | return True |
paul@36 | 100 | else: |
paul@36 | 101 | return False |
paul@36 | 102 | |
paul@36 | 103 | def __ne__(self, other): |
paul@36 | 104 | return not self.__eq__(other) |
paul@36 | 105 | |
paul@36 | 106 | def __hash__(self): |
paul@36 | 107 | return hash(self.media_range) |
paul@36 | 108 | |
paul@36 | 109 | def __repr__(self): |
paul@36 | 110 | return "MediaRange(%r)" % self.media_range |
paul@36 | 111 | |
paul@36 | 112 | def categoryMatches(this, that): |
paul@36 | 113 | |
paul@36 | 114 | """ |
paul@36 | 115 | Return the basis of a match between 'this' and 'that' or False if the given |
paul@36 | 116 | categories do not match. |
paul@36 | 117 | """ |
paul@36 | 118 | |
paul@36 | 119 | return (this == "*" or this == that) and this or \ |
paul@36 | 120 | that == "*" and that or False |
paul@36 | 121 | |
paul@36 | 122 | def categoryPermits(this, that): |
paul@36 | 123 | |
paul@36 | 124 | """ |
paul@36 | 125 | Return whether 'this' category permits 'that' category. Where 'this' is a |
paul@36 | 126 | wildcard ("*"), 'that' should always match. A value of False is returned if |
paul@36 | 127 | the categories do not otherwise match. |
paul@36 | 128 | """ |
paul@36 | 129 | |
paul@36 | 130 | return (this == "*" or this == that) and this or False |
paul@36 | 131 | |
paul@36 | 132 | def getMappingFromParameterStrings(l): |
paul@36 | 133 | |
paul@36 | 134 | """ |
paul@36 | 135 | Return a mapping representing the list of "name=value" strings given by 'l'. |
paul@36 | 136 | """ |
paul@36 | 137 | |
paul@36 | 138 | parameters = {} |
paul@36 | 139 | |
paul@36 | 140 | for parameter in l: |
paul@36 | 141 | parts = parameter.split("=") |
paul@36 | 142 | name = parts[0].strip() |
paul@36 | 143 | value = "=".join(parts[1:]).strip() |
paul@36 | 144 | parameters[name] = value |
paul@36 | 145 | |
paul@36 | 146 | return parameters |
paul@36 | 147 | |
paul@36 | 148 | def getContentPreferences(accept): |
paul@36 | 149 | |
paul@36 | 150 | """ |
paul@36 | 151 | Return a mapping from media types to parameters for content/media types |
paul@36 | 152 | extracted from the given 'accept' header value. The mapping is returned in |
paul@36 | 153 | the form of a list of (media type, parameters) tuples. |
paul@36 | 154 | |
paul@36 | 155 | See: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 |
paul@36 | 156 | """ |
paul@36 | 157 | |
paul@36 | 158 | preferences = [] |
paul@36 | 159 | |
paul@36 | 160 | for field in accept.split(","): |
paul@36 | 161 | |
paul@36 | 162 | # The media type with parameters (defined by the "media-range") is |
paul@36 | 163 | # separated from any other parameters (defined as "accept-extension" |
paul@36 | 164 | # parameters) by a quality parameter. |
paul@36 | 165 | |
paul@36 | 166 | fparts = accept_regexp.split(field) |
paul@36 | 167 | |
paul@36 | 168 | # The first part is always the media type. |
paul@36 | 169 | |
paul@36 | 170 | media_type = fparts[0].strip() |
paul@36 | 171 | |
paul@36 | 172 | # Any other parts can be interpreted as extension parameters. |
paul@36 | 173 | |
paul@36 | 174 | if len(fparts) > 1: |
paul@36 | 175 | fparts = ("q=" + ";q=".join(fparts[1:])).split(";") |
paul@36 | 176 | else: |
paul@36 | 177 | fparts = [] |
paul@36 | 178 | |
paul@36 | 179 | # Each field in the preferences can incorporate parameters separated by |
paul@36 | 180 | # semicolon characters. |
paul@36 | 181 | |
paul@36 | 182 | parameters = getMappingFromParameterStrings(fparts) |
paul@36 | 183 | media_range = MediaRange(media_type, parameters) |
paul@36 | 184 | preferences.append(media_range) |
paul@36 | 185 | |
paul@36 | 186 | return ContentPreferences(preferences) |
paul@36 | 187 | |
paul@36 | 188 | class ContentPreferences: |
paul@36 | 189 | |
paul@36 | 190 | "A wrapper around content preference information." |
paul@36 | 191 | |
paul@36 | 192 | def __init__(self, preferences): |
paul@36 | 193 | self.preferences = preferences |
paul@36 | 194 | |
paul@36 | 195 | def __iter__(self): |
paul@36 | 196 | return iter(self.preferences) |
paul@36 | 197 | |
paul@36 | 198 | def get_ordered(self, by_quality=0): |
paul@36 | 199 | |
paul@36 | 200 | """ |
paul@36 | 201 | Return a list of content/media types in descending order of preference. |
paul@36 | 202 | If 'by_quality' is set to a true value, the "q" value will be used as |
paul@36 | 203 | the primary measure of preference; otherwise, only the specificity will |
paul@36 | 204 | be considered. |
paul@36 | 205 | """ |
paul@36 | 206 | |
paul@36 | 207 | ordered = {} |
paul@36 | 208 | |
paul@36 | 209 | for media_range in self.preferences: |
paul@36 | 210 | specificity = media_range.get_specificity() |
paul@36 | 211 | |
paul@36 | 212 | if by_quality: |
paul@36 | 213 | q = float(media_range.accept_parameters.get("q", "1")) |
paul@36 | 214 | key = q, specificity |
paul@36 | 215 | else: |
paul@36 | 216 | key = specificity |
paul@36 | 217 | |
paul@36 | 218 | if not ordered.has_key(key): |
paul@36 | 219 | ordered[key] = [] |
paul@36 | 220 | |
paul@36 | 221 | ordered[key].append(media_range) |
paul@36 | 222 | |
paul@36 | 223 | # Return the preferences in descending order of quality and specificity. |
paul@36 | 224 | |
paul@36 | 225 | keys = ordered.keys() |
paul@36 | 226 | keys.sort(reverse=True) |
paul@36 | 227 | return [ordered[key] for key in keys] |
paul@36 | 228 | |
paul@36 | 229 | def get_acceptable_types(self, available): |
paul@36 | 230 | |
paul@36 | 231 | """ |
paul@36 | 232 | Return content/media types from those in the 'available' list supported |
paul@36 | 233 | by the known preferences grouped by preference level in descending order |
paul@36 | 234 | of preference. |
paul@36 | 235 | """ |
paul@36 | 236 | |
paul@36 | 237 | matches = {} |
paul@36 | 238 | available = set(available[:]) |
paul@36 | 239 | |
paul@36 | 240 | for level in self.get_ordered(): |
paul@36 | 241 | for media_range in level: |
paul@36 | 242 | |
paul@36 | 243 | # Attempt to match available types. |
paul@36 | 244 | |
paul@36 | 245 | found = set() |
paul@36 | 246 | for available_type in available: |
paul@36 | 247 | if media_range.permits(available_type): |
paul@36 | 248 | q = float(media_range.accept_parameters.get("q", "1")) |
paul@36 | 249 | if not matches.has_key(q): |
paul@36 | 250 | matches[q] = [] |
paul@36 | 251 | matches[q].append(available_type) |
paul@36 | 252 | found.add(available_type) |
paul@36 | 253 | |
paul@36 | 254 | # Stop looking for matches for matched available types. |
paul@36 | 255 | |
paul@36 | 256 | if found: |
paul@36 | 257 | available.difference_update(found) |
paul@36 | 258 | |
paul@36 | 259 | # Sort the matches in descending order of quality. |
paul@36 | 260 | |
paul@36 | 261 | all_q = matches.keys() |
paul@36 | 262 | |
paul@36 | 263 | if all_q: |
paul@36 | 264 | all_q.sort(reverse=True) |
paul@36 | 265 | return [matches[q] for q in all_q] |
paul@36 | 266 | else: |
paul@36 | 267 | return [] |
paul@36 | 268 | |
paul@36 | 269 | def get_preferred_types(self, available): |
paul@36 | 270 | |
paul@36 | 271 | """ |
paul@36 | 272 | Return the preferred content/media types from those in the 'available' |
paul@36 | 273 | list, given the known preferences. |
paul@36 | 274 | """ |
paul@36 | 275 | |
paul@36 | 276 | preferred = self.get_acceptable_types(available) |
paul@36 | 277 | if preferred: |
paul@36 | 278 | return preferred[0] |
paul@36 | 279 | else: |
paul@36 | 280 | return [] |
paul@36 | 281 | |
paul@36 | 282 | # Content type parsing. |
paul@36 | 283 | |
paul@36 | 284 | def getContentTypeAndEncoding(content_type): |
paul@36 | 285 | |
paul@36 | 286 | """ |
paul@36 | 287 | Return a tuple with the content/media type and encoding, extracted from the |
paul@36 | 288 | given 'content_type' header value. |
paul@36 | 289 | """ |
paul@36 | 290 | |
paul@36 | 291 | m = encoding_regexp.search(content_type) |
paul@36 | 292 | if m: |
paul@36 | 293 | return m.group("content_type"), m.group("encoding") |
paul@36 | 294 | else: |
paul@36 | 295 | return None, None |
paul@36 | 296 | |
paul@36 | 297 | # vim: tabstop=4 expandtab shiftwidth=4 |