# HG changeset patch # User Paul Boddie # Date 1368468089 -7200 # Node ID feb90c7c4432acee53091e9ca75cb62e07115907 # Parent 97257310c06b4a4ffed89c5e092dedb6f9c3f97a Added tolerance of plain XML feed data, the type of which is, in any case, detected by the parser. Introduced avoidance of "source" element information that can confuse the gathering of title and link information. Added handling of XHTML content and the rendering of XHTML and plain text content. Widened the acceptable "type" attribute values according to the Atom specification. diff -r 97257310c06b -r feb90c7c4432 macros/SharedContent.py --- a/macros/SharedContent.py Sat May 11 00:39:47 2013 +0200 +++ b/macros/SharedContent.py Mon May 13 20:01:29 2013 +0200 @@ -99,7 +99,7 @@ feed = StringIO(data) _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed) - if content_type not in ("application/atom+xml", "application/rss+xml"): + if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"): raise FeedContentTypeError try: @@ -109,12 +109,13 @@ feed_type = None update = None + in_source = False events = xml.dom.pulldom.parse(feed) for event, value in events: - if event == xml.dom.pulldom.START_ELEMENT: + if not in_source and event == xml.dom.pulldom.START_ELEMENT: tagname = value.localName # Detect the feed type and items. @@ -132,6 +133,13 @@ update = Update() + # Detect source declarations. + + elif feed_type == "atom" and tagname == "source": + in_source = True + + # Handle item elements. + elif tagname == "title": events.expandNode(value) if update: @@ -149,8 +157,11 @@ elif feed_type == "atom" and tagname == "content": events.expandNode(value) if update: - update.content = text(value) update.content_type = value.getAttribute("type") + if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): + update.content = value.toxml() + else: + update.content = text(value) elif feed_type == "atom" and tagname == "updated" or \ feed_type == "rss" and tagname == "pubDate": @@ -173,6 +184,9 @@ update = None + elif feed_type == "atom" and tagname == "source": + in_source = False + finally: feed.close() @@ -244,13 +258,18 @@ for update in updates: # Emit content where appropriate. - # NOTE: HTML should be sanitised. + # NOTE: HTML and XHTML should be sanitised. if show_content: append(fmt.div(on=1, css_class="moinshare-update")) append(fmt.div(on=1, css_class="moinshare-content")) - if update.content and update.content_type == "html": - append(fmt.rawHTML(unescape(update.content))) + if update.content: + if update.content_type in ("html", "text/html"): + append(fmt.rawHTML(unescape(update.content))) + elif update.content_type in ("xhtml", "application/xhtml+xml"): + append(fmt.rawHTML(update.content)) + elif update.content_type in ("text", "text/plain"): + append(fmt.text(update.content)) append(fmt.div(on=0)) append(fmt.div(on=1, css_class="moinshare-date")) append(fmt.text(str(update.updated)))