# HG changeset patch # User Paul Boddie # Date 1333053306 -7200 # Node ID c27598946b4cc04b9878139af664c50aaab8f94e A SAX-based simplified XML parsing library. diff -r 000000000000 -r c27598946b4c xmlread.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xmlread.py Thu Mar 29 22:35:06 2012 +0200 @@ -0,0 +1,62 @@ +#!/usr/bin/env python + +import xml.sax + +class Parser(xml.sax.handler.ContentHandler): + + "A basic parser, tracking elements and attributes." + + def __init__(self): + self.elements = [] + self.attributes = [] + self.text = [] + + def startElement(self, name, attrs): + self.elements.append(name) + self.attributes.append(attrs) + self.text.append([]) + + def characters(self, content): + self.text[-1].append(content) + + def endElement(self, name): + self.handleElement(name) + self.elements.pop() + self.attributes.pop() + self.text.pop() + + def handleElement(self, name): + pass + + def parse(self, f): + try: + parser = xml.sax.make_parser() + parser.setContentHandler(self) + parser.setErrorHandler(xml.sax.handler.ErrorHandler()) + parser.setFeature(xml.sax.handler.feature_external_ges, 0) + parser.parse(f) + finally: + f.close() + +class ConfigurableParser(Parser): + + "A parser which can be configured to handle elements individually." + + def __init__(self, handlers=None): + Parser.__init__(self) + self.handlers = handlers or {} + + def __setitem__(self, name, handler): + self.handlers[name] = handler + + def update(self, handlers): + self.handlers.update(handlers) + + def handleElement(self, name): + for n in (name, None): + handler = self.handlers.get(n) + if handler: + handler(name, self.elements, self.attributes, "".join(self.text[-1])) + break + +# vim: tabstop=4 expandtab shiftwidth=4