1 #!/usr/bin/env python 2 3 """ 4 A SAX-based parser framework. 5 6 Copyright (C) 2012 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 __version__ = "0.1" 23 24 import xml.sax 25 26 class Parser(xml.sax.handler.ContentHandler): 27 28 "A basic parser, tracking elements and attributes." 29 30 def __init__(self): 31 self.elements = [] 32 self.attributes = [] 33 self.text = [] 34 35 def startElement(self, name, attrs): 36 self.elements.append(name) 37 self.attributes.append(attrs) 38 self.text.append([]) 39 40 def characters(self, content): 41 self.text[-1].append(content) 42 43 def endElement(self, name): 44 self.handleElement(name) 45 self.elements.pop() 46 self.attributes.pop() 47 self.text.pop() 48 49 def handleElement(self, name): 50 51 "Handle a completed element having the given 'name'." 52 53 pass 54 55 def parse(self, f): 56 57 "Parse content from the file object 'f' using reasonable defaults." 58 59 try: 60 parser = xml.sax.make_parser() 61 parser.setContentHandler(self) 62 parser.setErrorHandler(xml.sax.handler.ErrorHandler()) 63 parser.setFeature(xml.sax.handler.feature_external_ges, 0) 64 parser.parse(f) 65 finally: 66 f.close() 67 68 class ConfigurableParser(Parser): 69 70 "A parser which can be configured to handle elements individually." 71 72 def __init__(self, handlers=None): 73 Parser.__init__(self) 74 self.handlers = handlers or {} 75 76 def __setitem__(self, name, handler): 77 self.handlers[name] = handler 78 79 def update(self, handlers): 80 self.handlers.update(handlers) 81 82 def handleElement(self, name): 83 84 """ 85 Handle a completed element having the given 'name'. If a handler has 86 been registered for the name on this object, the handler will be invoked 87 with... 88 89 * 'name' (the current element name) 90 * the path to and including the current element (a list of names) 91 * the attributes for elements in the path (a list of dictionaries, one 92 for each element) 93 * the text fragments for elements in the path (a list of lists, one 94 list of fragments for each element) 95 * the final textual content for the current element 96 97 Where a handler has been registered for None, it will be called for any 98 element without a specific handler. 99 """ 100 101 for n in (name, None): 102 handler = self.handlers.get(n) 103 if handler: 104 handler(name, self.elements, self.attributes, self.text, "".join(self.text[-1])) 105 break 106 107 # vim: tabstop=4 expandtab shiftwidth=4