# HG changeset patch # User Paul Boddie # Date 1327357400 -3600 # Node ID 9bde0ff6e584dcb5603ee322a9338d0ab99a76a1 # Parent c2b1444e4a466b9a0210a024e5f70ec458f3e8a4 Added some support for content/media type and content preference handling. diff -r c2b1444e4a46 -r 9bde0ff6e584 MoinSupport.py --- a/MoinSupport.py Sun Jan 22 00:46:00 2012 +0100 +++ b/MoinSupport.py Mon Jan 23 23:23:20 2012 +0100 @@ -21,9 +21,20 @@ encoding_regexp_str = ur'(?P[^\s;]*)(?:;\s*charset=(?P[-A-Za-z0-9]+))?' encoding_regexp = re.compile(encoding_regexp_str) +# Accept header parsing. + +accept_regexp_str = ur';\s*q=' +accept_regexp = re.compile(accept_regexp_str) + # Utility functions. def getContentTypeAndEncoding(content_type): + + """ + Return a tuple with the content/media type and encoding, extracted from the + given 'content_type' header value. + """ + m = encoding_regexp.search(content_type) if m: return m.group("content_type"), m.group("encoding") @@ -147,6 +158,231 @@ else: return request.path +# Content/media type and preferences support. + +class MediaRange: + + "A content/media type value which supports whole categories of data." + + def __init__(self, media_range, accept_parameters=None): + self.media_range = media_range + self.accept_parameters = accept_parameters or {} + + parts = media_range.split(";") + self.media_type = parts[0] + self.parameters = getMappingFromParameterStrings(parts[1:]) + + # The media type is divided into category and subcategory. + + parts = self.media_type.split("/") + self.category = parts[0] + self.subcategory = "/".join(parts[1:]) + + def get_parts(self): + return self.category, self.subcategory + + def get_specificity(self): + if "*" in self.get_parts(): + return -list(self.get_parts()).count("*") + else: + return len(self.parameters) + + def permits(self, other): + if not isinstance(other, MediaRange): + other = MediaRange(other) + + category = categoryPermits(self.category, other.category) + subcategory = categoryPermits(self.subcategory, other.subcategory) + + if category and subcategory: + if "*" not in (category, subcategory): + return not self.parameters or self.parameters == other.parameters + else: + return True + else: + return False + + def __eq__(self, other): + if not isinstance(other, MediaRange): + other = MediaRange(other) + + category = categoryMatches(self.category, other.category) + subcategory = categoryMatches(self.subcategory, other.subcategory) + + if category and subcategory: + if "*" not in (category, subcategory): + return self.parameters == other.parameters or \ + not self.parameters or not other.parameters + else: + return True + else: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash(self.media_range) + + def __repr__(self): + return "MediaRange(%r)" % self.media_range + +def categoryMatches(this, that): + + """ + Return the basis of a match between 'this' and 'that' or False if the given + categories do not match. + """ + + return (this == "*" or this == that) and this or \ + that == "*" and that or False + +def categoryPermits(this, that): + + """ + Return whether 'this' category permits 'that' category. Where 'this' is a + wildcard ("*"), 'that' should always match. A value of False is returned if + the categories do not otherwise match. + """ + + return (this == "*" or this == that) and this or False + +def getMappingFromParameterStrings(l): + + """ + Return a mapping representing the list of "name=value" strings given by 'l'. + """ + + parameters = {} + + for parameter in l: + parts = parameter.split("=") + name = parts[0].strip() + value = "=".join(parts[1:]).strip() + parameters[name] = value + + return parameters + +def getContentPreferences(accept): + + """ + Return a mapping from media types to parameters for content/media types + extracted from the given 'accept' header value. The mapping is returned in + the form of a list of (media type, parameters) tuples. + + See: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 + """ + + preferences = [] + + for field in accept.split(","): + + # The media type with parameters (defined by the "media-range") is + # separated from any other parameters (defined as "accept-extension" + # parameters) by a quality parameter. + + fparts = accept_regexp.split(field) + + # The first part is always the media type. + + media_type = fparts[0].strip() + + # Any other parts can be interpreted as extension parameters. + + if len(fparts) > 1: + fparts = ("q=" + ";q=".join(fparts[1:])).split(";") + else: + fparts = [] + + # Each field in the preferences can incorporate parameters separated by + # semicolon characters. + + parameters = getMappingFromParameterStrings(fparts) + media_range = MediaRange(media_type, parameters) + preferences.append(media_range) + + return ContentPreferences(preferences) + +class ContentPreferences: + + "A wrapper around content preference information." + + def __init__(self, preferences): + self.preferences = preferences + + def __iter__(self): + return iter(self.preferences) + + def get_ordered(self, by_quality=0): + + """ + Return a list of content/media types in descending order of preference. + If 'by_quality' is set to a true value, the "q" value will be used as + the primary measure of preference; otherwise, only the specificity will + be considered. + """ + + ordered = {} + + for media_range in self.preferences: + specificity = media_range.get_specificity() + + if by_quality: + q = float(media_range.accept_parameters.get("q", "1")) + key = q, specificity + else: + key = specificity + + if not ordered.has_key(key): + ordered[key] = [] + + ordered[key].append(media_range) + + # Return the preferences in descending order of quality and specificity. + + keys = ordered.keys() + keys.sort(reverse=True) + return [ordered[key] for key in keys] + + def get_preferred_type(self, available): + + """ + Return the preferred content/media type from those in the 'available' + list, given the known preferences. + """ + + matches = {} + available = set(available[:]) + + for level in self.get_ordered(): + for media_range in level: + + # Attempt to match available types. + + found = set() + for available_type in available: + if media_range.permits(available_type): + q = float(media_range.accept_parameters.get("q", "1")) + if not matches.has_key(q): + matches[q] = [] + matches[q].append(available_type) + found.add(available_type) + + # Stop looking for matches for matched available types. + + if found: + available.difference_update(found) + + # Sort the matches in descending order of quality. + + all_q = matches.keys() + + if all_q: + all_q.sort(reverse=True) + return matches[all_q[0]] + else: + return None + # Page access functions. def getPageURL(page): diff -r c2b1444e4a46 -r 9bde0ff6e584 tests/test_mediarange.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_mediarange.py Mon Jan 23 23:23:20 2012 +0100 @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +""" +A test of content preferences using examples from the HTTP 1.1 specification. +See: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 +""" + +from MoinSupport import MediaRange + +mr1 = MediaRange("audio/*") +mr2 = MediaRange("audio/basic") +print mr1 == mr2, ":", mr1, "==", mr2 +print mr1.permits(mr2), ":", mr1, "permits", mr2 + +mr1 = MediaRange("text/plain") +mr2 = MediaRange("text/html") +print mr1 != mr2, ":", mr1, "!=", mr2 +print not mr1.permits(mr2), ":", mr1, "does not permit", mr2 + +mr1 = MediaRange("text/*") +mr2 = MediaRange("text/html") +mr3 = MediaRange("text/html;level=1") +mr4 = MediaRange("*/*") +mr5 = MediaRange("text/html;level=2") +print mr1 == mr2, ":", mr1, "==", mr2 +print mr1.permits(mr2), ":", mr1, "permits", mr2 +print not mr2.permits(mr1), ":", mr2, "does not permit", mr1 +print mr1 == mr3, ":", mr1, "==", mr3 +print mr1.permits(mr3), ":", mr1, "permits", mr3 +print mr1 == mr4, ":", mr1, "==", mr4 +print mr1 == mr5, ":", mr1, "==", mr5 +print mr2 == mr3, ":", mr2, "==", mr3 +print mr2.permits(mr3), ":", mr2, "permits", mr3 +print mr2 == mr4, ":", mr2, "==", mr4 +print mr2 == mr5, ":", mr2, "==", mr5 +print mr3 == mr4, ":", mr3, "==", mr4 +print mr3 != mr5, ":", mr3, "!=", mr5 +print mr4 == mr5, ":", mr4, "==", mr5 +print mr4.permits(mr5), ":", mr4, "permits", mr5 +print not mr5.permits(mr4), ":", mr5, "does not permit", mr4 + +# vim: tabstop=4 expandtab shiftwidth=4 diff -r c2b1444e4a46 -r 9bde0ff6e584 tests/test_preferences.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_preferences.py Mon Jan 23 23:23:20 2012 +0100 @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +""" +A test of content preferences using examples from the HTTP 1.1 specification. +See: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1 +""" + +from MoinSupport import getContentPreferences + +s0 = "audio/*; q=0.2, audio/basic" +prefs = getContentPreferences(s0) +oprefs = prefs.get_ordered(True) +expected = [ + ["audio/basic"], + ["audio/*"] + ] +print oprefs == expected, ":", oprefs, "==", expected + +s1 = "text/plain; q=0.5, text/html, text/x-dvi; q=0.8, text/x-c" +prefs = getContentPreferences(s1) +oprefs = prefs.get_ordered(True) +expected = [ + ["text/html", "text/x-c"], # equal quality, specificity + ["text/x-dvi"], + ["text/plain"] + ] +print oprefs == expected, ":", oprefs, "==", expected + +s2 = "text/*, text/html, text/html;level=1, */*" +prefs = getContentPreferences(s2) +oprefs = prefs.get_ordered(True) +expected = [ + ["text/html;level=1"], # specificity is 1 + ["text/html"], # specificity is 0 + ["text/*"], # specificity is -1 + ["*/*"] # specificity is -2 + ] +print oprefs == expected, ":", oprefs, "==", expected + +s3 = "text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5" +prefs = getContentPreferences(s3) +oprefs = prefs.get_ordered(True) +expected = [ + ["text/html;level=1"], + ["text/html"], # specificity is 1 + ["*/*"], # specificity is -2 + ["text/html;level=2"], + ["text/*"] + ] +print oprefs == expected, ":", oprefs, "==", expected + +available = [ + "text/html;level=1", + "text/html" + ] +expected = ["text/html;level=1"] +print prefs.get_preferred_type(available) == expected, ":", prefs.get_preferred_type(available), "==", expected + +available = [ + "text/plain", + "image/jpeg" + ] +expected = ["image/jpeg"] +print prefs.get_preferred_type(available) == expected, ":", prefs.get_preferred_type(available), "==", expected + +available = [ + "text/html;level=2", + "text/html;level=3" + ] +expected = ["text/html;level=3"] +print prefs.get_preferred_type(available) == expected, ":", prefs.get_preferred_type(available), "==", expected + +# vim: tabstop=4 expandtab shiftwidth=4