paul@0 | 1 | #!/usr/bin/env python |
paul@0 | 2 | |
paul@2 | 3 | """ |
paul@2 | 4 | A SAX-based parser framework. |
paul@2 | 5 | |
paul@2 | 6 | Copyright (C) 2012 Paul Boddie <paul@boddie.org.uk> |
paul@2 | 7 | |
paul@2 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@2 | 9 | the terms of the GNU Lesser General Public License as published by the Free |
paul@2 | 10 | Software Foundation; either version 3 of the License, or (at your option) any |
paul@2 | 11 | later version. |
paul@2 | 12 | |
paul@2 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@2 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@2 | 15 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
paul@2 | 16 | details. |
paul@2 | 17 | |
paul@2 | 18 | You should have received a copy of the GNU Lesser General Public License along |
paul@2 | 19 | with this program. If not, see <http://www.gnu.org/licenses/>. |
paul@2 | 20 | """ |
paul@2 | 21 | |
paul@2 | 22 | __version__ = "0.1" |
paul@2 | 23 | |
paul@0 | 24 | import xml.sax |
paul@0 | 25 | |
paul@0 | 26 | class Parser(xml.sax.handler.ContentHandler): |
paul@0 | 27 | |
paul@0 | 28 | "A basic parser, tracking elements and attributes." |
paul@0 | 29 | |
paul@0 | 30 | def __init__(self): |
paul@0 | 31 | self.elements = [] |
paul@0 | 32 | self.attributes = [] |
paul@0 | 33 | self.text = [] |
paul@0 | 34 | |
paul@0 | 35 | def startElement(self, name, attrs): |
paul@0 | 36 | self.elements.append(name) |
paul@0 | 37 | self.attributes.append(attrs) |
paul@0 | 38 | self.text.append([]) |
paul@0 | 39 | |
paul@0 | 40 | def characters(self, content): |
paul@0 | 41 | self.text[-1].append(content) |
paul@0 | 42 | |
paul@0 | 43 | def endElement(self, name): |
paul@0 | 44 | self.handleElement(name) |
paul@0 | 45 | self.elements.pop() |
paul@0 | 46 | self.attributes.pop() |
paul@0 | 47 | self.text.pop() |
paul@0 | 48 | |
paul@0 | 49 | def handleElement(self, name): |
paul@2 | 50 | |
paul@2 | 51 | "Handle a completed element having the given 'name'." |
paul@2 | 52 | |
paul@0 | 53 | pass |
paul@0 | 54 | |
paul@0 | 55 | def parse(self, f): |
paul@2 | 56 | |
paul@2 | 57 | "Parse content from the file object 'f' using reasonable defaults." |
paul@2 | 58 | |
paul@0 | 59 | try: |
paul@0 | 60 | parser = xml.sax.make_parser() |
paul@0 | 61 | parser.setContentHandler(self) |
paul@0 | 62 | parser.setErrorHandler(xml.sax.handler.ErrorHandler()) |
paul@0 | 63 | parser.setFeature(xml.sax.handler.feature_external_ges, 0) |
paul@0 | 64 | parser.parse(f) |
paul@0 | 65 | finally: |
paul@0 | 66 | f.close() |
paul@0 | 67 | |
paul@0 | 68 | class ConfigurableParser(Parser): |
paul@0 | 69 | |
paul@0 | 70 | "A parser which can be configured to handle elements individually." |
paul@0 | 71 | |
paul@0 | 72 | def __init__(self, handlers=None): |
paul@0 | 73 | Parser.__init__(self) |
paul@0 | 74 | self.handlers = handlers or {} |
paul@0 | 75 | |
paul@0 | 76 | def __setitem__(self, name, handler): |
paul@0 | 77 | self.handlers[name] = handler |
paul@0 | 78 | |
paul@0 | 79 | def update(self, handlers): |
paul@0 | 80 | self.handlers.update(handlers) |
paul@0 | 81 | |
paul@0 | 82 | def handleElement(self, name): |
paul@2 | 83 | |
paul@2 | 84 | """ |
paul@2 | 85 | Handle a completed element having the given 'name'. If a handler has |
paul@2 | 86 | been registered for the name on this object, the handler will be invoked |
paul@2 | 87 | with... |
paul@2 | 88 | |
paul@2 | 89 | * 'name' (the current element name) |
paul@2 | 90 | * the path to and including the current element (a list of names) |
paul@2 | 91 | * the attributes for elements in the path (a list of dictionaries, one |
paul@2 | 92 | for each element) |
paul@2 | 93 | * the text fragments for elements in the path (a list of lists, one |
paul@2 | 94 | list of fragments for each element) |
paul@2 | 95 | * the final textual content for the current element |
paul@2 | 96 | |
paul@2 | 97 | Where a handler has been registered for None, it will be called for any |
paul@2 | 98 | element without a specific handler. |
paul@2 | 99 | """ |
paul@2 | 100 | |
paul@0 | 101 | for n in (name, None): |
paul@0 | 102 | handler = self.handlers.get(n) |
paul@0 | 103 | if handler: |
paul@1 | 104 | handler(name, self.elements, self.attributes, self.text, "".join(self.text[-1])) |
paul@0 | 105 | break |
paul@0 | 106 | |
paul@0 | 107 | # vim: tabstop=4 expandtab shiftwidth=4 |