xmlread

Changeset

0:c27598946b4c
2012-03-29 Paul Boddie raw files shortlog changelog graph A SAX-based simplified XML parsing library.
xmlread.py (file)
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/xmlread.py	Thu Mar 29 22:35:06 2012 +0200
     1.3 @@ -0,0 +1,62 @@
     1.4 +#!/usr/bin/env python
     1.5 +
     1.6 +import xml.sax
     1.7 +
     1.8 +class Parser(xml.sax.handler.ContentHandler):
     1.9 +
    1.10 +    "A basic parser, tracking elements and attributes."
    1.11 +
    1.12 +    def __init__(self):
    1.13 +        self.elements = []
    1.14 +        self.attributes = []
    1.15 +        self.text = []
    1.16 +
    1.17 +    def startElement(self, name, attrs):
    1.18 +        self.elements.append(name)
    1.19 +        self.attributes.append(attrs)
    1.20 +        self.text.append([])
    1.21 +
    1.22 +    def characters(self, content):
    1.23 +        self.text[-1].append(content)
    1.24 +
    1.25 +    def endElement(self, name):
    1.26 +        self.handleElement(name)
    1.27 +        self.elements.pop()
    1.28 +        self.attributes.pop()
    1.29 +        self.text.pop()
    1.30 +
    1.31 +    def handleElement(self, name):
    1.32 +        pass
    1.33 +
    1.34 +    def parse(self, f):
    1.35 +        try:
    1.36 +            parser = xml.sax.make_parser()
    1.37 +            parser.setContentHandler(self)
    1.38 +            parser.setErrorHandler(xml.sax.handler.ErrorHandler())
    1.39 +            parser.setFeature(xml.sax.handler.feature_external_ges, 0)
    1.40 +            parser.parse(f)
    1.41 +        finally:
    1.42 +            f.close()
    1.43 +
    1.44 +class ConfigurableParser(Parser):
    1.45 +
    1.46 +    "A parser which can be configured to handle elements individually."
    1.47 +
    1.48 +    def __init__(self, handlers=None):
    1.49 +        Parser.__init__(self)
    1.50 +        self.handlers = handlers or {}
    1.51 +
    1.52 +    def __setitem__(self, name, handler):
    1.53 +        self.handlers[name] = handler
    1.54 +
    1.55 +    def update(self, handlers):
    1.56 +        self.handlers.update(handlers)
    1.57 +
    1.58 +    def handleElement(self, name):
    1.59 +        for n in (name, None):
    1.60 +            handler = self.handlers.get(n)
    1.61 +            if handler:
    1.62 +                handler(name, self.elements, self.attributes, "".join(self.text[-1]))
    1.63 +                break
    1.64 +
    1.65 +# vim: tabstop=4 expandtab shiftwidth=4