1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/xmlread.py Thu Mar 29 22:35:06 2012 +0200
1.3 @@ -0,0 +1,62 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +import xml.sax
1.7 +
1.8 +class Parser(xml.sax.handler.ContentHandler):
1.9 +
1.10 + "A basic parser, tracking elements and attributes."
1.11 +
1.12 + def __init__(self):
1.13 + self.elements = []
1.14 + self.attributes = []
1.15 + self.text = []
1.16 +
1.17 + def startElement(self, name, attrs):
1.18 + self.elements.append(name)
1.19 + self.attributes.append(attrs)
1.20 + self.text.append([])
1.21 +
1.22 + def characters(self, content):
1.23 + self.text[-1].append(content)
1.24 +
1.25 + def endElement(self, name):
1.26 + self.handleElement(name)
1.27 + self.elements.pop()
1.28 + self.attributes.pop()
1.29 + self.text.pop()
1.30 +
1.31 + def handleElement(self, name):
1.32 + pass
1.33 +
1.34 + def parse(self, f):
1.35 + try:
1.36 + parser = xml.sax.make_parser()
1.37 + parser.setContentHandler(self)
1.38 + parser.setErrorHandler(xml.sax.handler.ErrorHandler())
1.39 + parser.setFeature(xml.sax.handler.feature_external_ges, 0)
1.40 + parser.parse(f)
1.41 + finally:
1.42 + f.close()
1.43 +
1.44 +class ConfigurableParser(Parser):
1.45 +
1.46 + "A parser which can be configured to handle elements individually."
1.47 +
1.48 + def __init__(self, handlers=None):
1.49 + Parser.__init__(self)
1.50 + self.handlers = handlers or {}
1.51 +
1.52 + def __setitem__(self, name, handler):
1.53 + self.handlers[name] = handler
1.54 +
1.55 + def update(self, handlers):
1.56 + self.handlers.update(handlers)
1.57 +
1.58 + def handleElement(self, name):
1.59 + for n in (name, None):
1.60 + handler = self.handlers.get(n)
1.61 + if handler:
1.62 + handler(name, self.elements, self.attributes, "".join(self.text[-1]))
1.63 + break
1.64 +
1.65 +# vim: tabstop=4 expandtab shiftwidth=4