1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/markup.py Sun Oct 26 00:45:58 2014 +0200
1.3 @@ -0,0 +1,527 @@
1.4 +# This code is in the public domain, it comes
1.5 +# with absolutely no warranty and you can do
1.6 +# absolutely whatever you want with it.
1.7 +
1.8 +__date__ = '1 October 2012'
1.9 +__version__ = '1.9'
1.10 +__doc__= """
1.11 +This is markup.py - a Python module that attempts to
1.12 +make it easier to generate HTML/XML from a Python program
1.13 +in an intuitive, lightweight, customizable and pythonic way.
1.14 +
1.15 +The code is in the public domain.
1.16 +
1.17 +Version: %s as of %s.
1.18 +
1.19 +Documentation and further info is at http://markup.sourceforge.net/
1.20 +
1.21 +Please send bug reports, feature requests, enhancement
1.22 +ideas or questions to nogradi at gmail dot com.
1.23 +
1.24 +Installation: drop markup.py somewhere into your Python path.
1.25 +""" % ( __version__, __date__ )
1.26 +
1.27 +try:
1.28 + basestring
1.29 + import string
1.30 +except:
1.31 + # python 3
1.32 + basestring = str
1.33 + string = str
1.34 +
1.35 +# tags which are reserved python keywords will be referred
1.36 +# to by a leading underscore otherwise we end up with a syntax error
1.37 +import keyword
1.38 +
1.39 +class element:
1.40 + """This class handles the addition of a new element."""
1.41 +
1.42 + def __init__( self, tag, case='lower', parent=None ):
1.43 + self.parent = parent
1.44 +
1.45 + if case == 'upper':
1.46 + self.tag = tag.upper( )
1.47 + elif case == 'lower':
1.48 + self.tag = tag.lower( )
1.49 + elif case =='given':
1.50 + self.tag = tag
1.51 + else:
1.52 + self.tag = tag
1.53 +
1.54 + def __call__( self, *args, **kwargs ):
1.55 + if len( args ) > 1:
1.56 + raise ArgumentError( self.tag )
1.57 +
1.58 + # if class_ was defined in parent it should be added to every element
1.59 + if self.parent is not None and self.parent.class_ is not None:
1.60 + if 'class_' not in kwargs:
1.61 + kwargs['class_'] = self.parent.class_
1.62 +
1.63 + if self.parent is None and len( args ) == 1:
1.64 + x = [ self.render( self.tag, False, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ]
1.65 + return '\n'.join( x )
1.66 + elif self.parent is None and len( args ) == 0:
1.67 + x = [ self.render( self.tag, True, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ]
1.68 + return '\n'.join( x )
1.69 +
1.70 + if self.tag in self.parent.twotags:
1.71 + for myarg, mydict in _argsdicts( args, kwargs ):
1.72 + self.render( self.tag, False, myarg, mydict )
1.73 + elif self.tag in self.parent.onetags:
1.74 + if len( args ) == 0:
1.75 + for myarg, mydict in _argsdicts( args, kwargs ):
1.76 + self.render( self.tag, True, myarg, mydict ) # here myarg is always None, because len( args ) = 0
1.77 + else:
1.78 + raise ClosingError( self.tag )
1.79 + elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
1.80 + raise DeprecationError( self.tag )
1.81 + else:
1.82 + raise InvalidElementError( self.tag, self.parent.mode )
1.83 +
1.84 + def render( self, tag, single, between, kwargs ):
1.85 + """Append the actual tags to content."""
1.86 +
1.87 + out = "<%s" % tag
1.88 + for key, value in list( kwargs.items( ) ):
1.89 + if value is not None: # when value is None that means stuff like <... checked>
1.90 + key = key.strip('_') # strip this so class_ will mean class, etc.
1.91 + if key == 'http_equiv': # special cases, maybe change _ to - overall?
1.92 + key = 'http-equiv'
1.93 + elif key == 'accept_charset':
1.94 + key = 'accept-charset'
1.95 + out = "%s %s=\"%s\"" % ( out, key, escape( value ) )
1.96 + else:
1.97 + out = "%s %s" % ( out, key )
1.98 + if between is not None:
1.99 + out = "%s>%s</%s>" % ( out, between, tag )
1.100 + else:
1.101 + if single:
1.102 + out = "%s />" % out
1.103 + else:
1.104 + out = "%s>" % out
1.105 + if self.parent is not None:
1.106 + self.parent.content.append( out )
1.107 + else:
1.108 + return out
1.109 +
1.110 + def close( self ):
1.111 + """Append a closing tag unless element has only opening tag."""
1.112 +
1.113 + if self.tag in self.parent.twotags:
1.114 + self.parent.content.append( "</%s>" % self.tag )
1.115 + elif self.tag in self.parent.onetags:
1.116 + raise ClosingError( self.tag )
1.117 + elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
1.118 + raise DeprecationError( self.tag )
1.119 +
1.120 + def open( self, **kwargs ):
1.121 + """Append an opening tag."""
1.122 +
1.123 + if self.tag in self.parent.twotags or self.tag in self.parent.onetags:
1.124 + self.render( self.tag, False, None, kwargs )
1.125 + elif self.mode == 'strict_html' and self.tag in self.parent.deptags:
1.126 + raise DeprecationError( self.tag )
1.127 +
1.128 +class page:
1.129 + """This is our main class representing a document. Elements are added
1.130 + as attributes of an instance of this class."""
1.131 +
1.132 + def __init__( self, mode='strict_html', case='lower', onetags=None, twotags=None, separator='\n', class_=None ):
1.133 + """Stuff that effects the whole document.
1.134 +
1.135 + mode -- 'strict_html' for HTML 4.01 (default)
1.136 + 'html' alias for 'strict_html'
1.137 + 'loose_html' to allow some deprecated elements
1.138 + 'xml' to allow arbitrary elements
1.139 +
1.140 + case -- 'lower' element names will be printed in lower case (default)
1.141 + 'upper' they will be printed in upper case
1.142 + 'given' element names will be printed as they are given
1.143 +
1.144 + onetags -- list or tuple of valid elements with opening tags only
1.145 + twotags -- list or tuple of valid elements with both opening and closing tags
1.146 + these two keyword arguments may be used to select
1.147 + the set of valid elements in 'xml' mode
1.148 + invalid elements will raise appropriate exceptions
1.149 +
1.150 + separator -- string to place between added elements, defaults to newline
1.151 +
1.152 + class_ -- a class that will be added to every element if defined"""
1.153 +
1.154 + valid_onetags = [ "AREA", "BASE", "BR", "COL", "FRAME", "HR", "IMG", "INPUT", "LINK", "META", "PARAM" ]
1.155 + valid_twotags = [ "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON",
1.156 + "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET",
1.157 + "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS",
1.158 + "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP",
1.159 + "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE",
1.160 + "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR",
1.161 + "TT", "UL", "VAR" ]
1.162 + deprecated_onetags = [ "BASEFONT", "ISINDEX" ]
1.163 + deprecated_twotags = [ "APPLET", "CENTER", "DIR", "FONT", "MENU", "S", "STRIKE", "U" ]
1.164 +
1.165 + self.header = [ ]
1.166 + self.content = [ ]
1.167 + self.footer = [ ]
1.168 + self.case = case
1.169 + self.separator = separator
1.170 +
1.171 + # init( ) sets it to True so we know that </body></html> has to be printed at the end
1.172 + self._full = False
1.173 + self.class_= class_
1.174 +
1.175 + if mode == 'strict_html' or mode == 'html':
1.176 + self.onetags = valid_onetags
1.177 + self.onetags += list( map( string.lower, self.onetags ) )
1.178 + self.twotags = valid_twotags
1.179 + self.twotags += list( map( string.lower, self.twotags ) )
1.180 + self.deptags = deprecated_onetags + deprecated_twotags
1.181 + self.deptags += list( map( string.lower, self.deptags ) )
1.182 + self.mode = 'strict_html'
1.183 + elif mode == 'loose_html':
1.184 + self.onetags = valid_onetags + deprecated_onetags
1.185 + self.onetags += list( map( string.lower, self.onetags ) )
1.186 + self.twotags = valid_twotags + deprecated_twotags
1.187 + self.twotags += list( map( string.lower, self.twotags ) )
1.188 + self.mode = mode
1.189 + elif mode == 'xml':
1.190 + if onetags and twotags:
1.191 + self.onetags = onetags
1.192 + self.twotags = twotags
1.193 + elif ( onetags and not twotags ) or ( twotags and not onetags ):
1.194 + raise CustomizationError( )
1.195 + else:
1.196 + self.onetags = russell( )
1.197 + self.twotags = russell( )
1.198 + self.mode = mode
1.199 + else:
1.200 + raise ModeError( mode )
1.201 +
1.202 + def __getattr__( self, attr ):
1.203 +
1.204 + # tags should start with double underscore
1.205 + if attr.startswith("__") and attr.endswith("__"):
1.206 + raise AttributeError( attr )
1.207 + # tag with single underscore should be a reserved keyword
1.208 + if attr.startswith( '_' ):
1.209 + attr = attr.lstrip( '_' )
1.210 + if attr not in keyword.kwlist:
1.211 + raise AttributeError( attr )
1.212 +
1.213 + return element( attr, case=self.case, parent=self )
1.214 +
1.215 + def __str__( self ):
1.216 +
1.217 + if self._full and ( self.mode == 'strict_html' or self.mode == 'loose_html' ):
1.218 + end = [ '</body>', '</html>' ]
1.219 + else:
1.220 + end = [ ]
1.221 +
1.222 + return self.separator.join( self.header + self.content + self.footer + end )
1.223 +
1.224 + def __call__( self, escape=False ):
1.225 + """Return the document as a string.
1.226 +
1.227 + escape -- False print normally
1.228 + True replace < and > by < and >
1.229 + the default escape sequences in most browsers"""
1.230 +
1.231 + if escape:
1.232 + return _escape( self.__str__( ) )
1.233 + else:
1.234 + return self.__str__( )
1.235 +
1.236 + def add( self, text ):
1.237 + """This is an alias to addcontent."""
1.238 + self.addcontent( text )
1.239 +
1.240 + def addfooter( self, text ):
1.241 + """Add some text to the bottom of the document"""
1.242 + self.footer.append( text )
1.243 +
1.244 + def addheader( self, text ):
1.245 + """Add some text to the top of the document"""
1.246 + self.header.append( text )
1.247 +
1.248 + def addcontent( self, text ):
1.249 + """Add some text to the main part of the document"""
1.250 + self.content.append( text )
1.251 +
1.252 +
1.253 + def init( self, lang='en', css=None, metainfo=None, title=None, header=None,
1.254 + footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None, base=None ):
1.255 + """This method is used for complete documents with appropriate
1.256 + doctype, encoding, title, etc information. For an HTML/XML snippet
1.257 + omit this method.
1.258 +
1.259 + lang -- language, usually a two character string, will appear
1.260 + as <html lang='en'> in html mode (ignored in xml mode)
1.261 +
1.262 + css -- Cascading Style Sheet filename as a string or a list of
1.263 + strings for multiple css files (ignored in xml mode)
1.264 +
1.265 + metainfo -- a dictionary in the form { 'name':'content' } to be inserted
1.266 + into meta element(s) as <meta name='name' content='content'>
1.267 + (ignored in xml mode)
1.268 +
1.269 + base -- set the <base href="..."> tag in <head>
1.270 +
1.271 + bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added
1.272 + as attributes of the <body> element as <body key='value' ... >
1.273 + (ignored in xml mode)
1.274 +
1.275 + script -- dictionary containing src:type pairs, <script type='text/type' src=src></script>
1.276 + or a list of [ 'src1', 'src2', ... ] in which case 'javascript' is assumed for all
1.277 +
1.278 + title -- the title of the document as a string to be inserted into
1.279 + a title element as <title>my title</title> (ignored in xml mode)
1.280 +
1.281 + header -- some text to be inserted right after the <body> element
1.282 + (ignored in xml mode)
1.283 +
1.284 + footer -- some text to be inserted right before the </body> element
1.285 + (ignored in xml mode)
1.286 +
1.287 + charset -- a string defining the character set, will be inserted into a
1.288 + <meta http-equiv='Content-Type' content='text/html; charset=myset'>
1.289 + element (ignored in xml mode)
1.290 +
1.291 + encoding -- a string defining the encoding, will be put into to first line of
1.292 + the document as <?xml version='1.0' encoding='myencoding' ?> in
1.293 + xml mode (ignored in html mode)
1.294 +
1.295 + doctype -- the document type string, defaults to
1.296 + <!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>
1.297 + in html mode (ignored in xml mode)"""
1.298 +
1.299 + self._full = True
1.300 +
1.301 + if self.mode == 'strict_html' or self.mode == 'loose_html':
1.302 + if doctype is None:
1.303 + doctype = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>"
1.304 + self.header.append( doctype )
1.305 + self.html( lang=lang )
1.306 + self.head( )
1.307 + if charset is not None:
1.308 + self.meta( http_equiv='Content-Type', content="text/html; charset=%s" % charset )
1.309 + if metainfo is not None:
1.310 + self.metainfo( metainfo )
1.311 + if css is not None:
1.312 + self.css( css )
1.313 + if title is not None:
1.314 + self.title( title )
1.315 + if script is not None:
1.316 + self.scripts( script )
1.317 + if base is not None:
1.318 + self.base( href='%s' % base )
1.319 + self.head.close()
1.320 + if bodyattrs is not None:
1.321 + self.body( **bodyattrs )
1.322 + else:
1.323 + self.body( )
1.324 + if header is not None:
1.325 + self.content.append( header )
1.326 + if footer is not None:
1.327 + self.footer.append( footer )
1.328 +
1.329 + elif self.mode == 'xml':
1.330 + if doctype is None:
1.331 + if encoding is not None:
1.332 + doctype = "<?xml version='1.0' encoding='%s' ?>" % encoding
1.333 + else:
1.334 + doctype = "<?xml version='1.0' ?>"
1.335 + self.header.append( doctype )
1.336 +
1.337 + def css( self, filelist ):
1.338 + """This convenience function is only useful for html.
1.339 + It adds css stylesheet(s) to the document via the <link> element."""
1.340 +
1.341 + if isinstance( filelist, basestring ):
1.342 + self.link( href=filelist, rel='stylesheet', type='text/css', media='all' )
1.343 + else:
1.344 + for file in filelist:
1.345 + self.link( href=file, rel='stylesheet', type='text/css', media='all' )
1.346 +
1.347 + def metainfo( self, mydict ):
1.348 + """This convenience function is only useful for html.
1.349 + It adds meta information via the <meta> element, the argument is
1.350 + a dictionary of the form { 'name':'content' }."""
1.351 +
1.352 + if isinstance( mydict, dict ):
1.353 + for name, content in list( mydict.items( ) ):
1.354 + self.meta( name=name, content=content )
1.355 + else:
1.356 + raise TypeError( "Metainfo should be called with a dictionary argument of name:content pairs." )
1.357 +
1.358 + def scripts( self, mydict ):
1.359 + """Only useful in html, mydict is dictionary of src:type pairs or a list
1.360 + of script sources [ 'src1', 'src2', ... ] in which case 'javascript' is assumed for type.
1.361 + Will be rendered as <script type='text/type' src=src></script>"""
1.362 +
1.363 + if isinstance( mydict, dict ):
1.364 + for src, type in list( mydict.items( ) ):
1.365 + self.script( '', src=src, type='text/%s' % type )
1.366 + else:
1.367 + try:
1.368 + for src in mydict:
1.369 + self.script( '', src=src, type='text/javascript' )
1.370 + except:
1.371 + raise TypeError( "Script should be given a dictionary of src:type pairs or a list of javascript src's." )
1.372 +
1.373 +
1.374 +class _oneliner:
1.375 + """An instance of oneliner returns a string corresponding to one element.
1.376 + This class can be used to write 'oneliners' that return a string
1.377 + immediately so there is no need to instantiate the page class."""
1.378 +
1.379 + def __init__( self, case='lower' ):
1.380 + self.case = case
1.381 +
1.382 + def __getattr__( self, attr ):
1.383 +
1.384 + # tags should start with double underscore
1.385 + if attr.startswith("__") and attr.endswith("__"):
1.386 + raise AttributeError( attr )
1.387 + # tag with single underscore should be a reserved keyword
1.388 + if attr.startswith( '_' ):
1.389 + attr = attr.lstrip( '_' )
1.390 + if attr not in keyword.kwlist:
1.391 + raise AttributeError( attr )
1.392 +
1.393 + return element( attr, case=self.case, parent=None )
1.394 +
1.395 +oneliner = _oneliner( case='lower' )
1.396 +upper_oneliner = _oneliner( case='upper' )
1.397 +given_oneliner = _oneliner( case='given' )
1.398 +
1.399 +def _argsdicts( args, mydict ):
1.400 + """A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1."""
1.401 +
1.402 + if len( args ) == 0:
1.403 + args = None,
1.404 + elif len( args ) == 1:
1.405 + args = _totuple( args[0] )
1.406 + else:
1.407 + raise Exception( "We should have never gotten here." )
1.408 +
1.409 + mykeys = list( mydict.keys( ) )
1.410 + myvalues = list( map( _totuple, list( mydict.values( ) ) ) )
1.411 +
1.412 + maxlength = max( list( map( len, [ args ] + myvalues ) ) )
1.413 +
1.414 + for i in range( maxlength ):
1.415 + thisdict = { }
1.416 + for key, value in zip( mykeys, myvalues ):
1.417 + try:
1.418 + thisdict[ key ] = value[i]
1.419 + except IndexError:
1.420 + thisdict[ key ] = value[-1]
1.421 + try:
1.422 + thisarg = args[i]
1.423 + except IndexError:
1.424 + thisarg = args[-1]
1.425 +
1.426 + yield thisarg, thisdict
1.427 +
1.428 +def _totuple( x ):
1.429 + """Utility stuff to convert string, int, long, float, None or anything to a usable tuple."""
1.430 +
1.431 + if isinstance( x, basestring ):
1.432 + out = x,
1.433 + elif isinstance( x, ( int, long, float ) ):
1.434 + out = str( x ),
1.435 + elif x is None:
1.436 + out = None,
1.437 + else:
1.438 + out = tuple( x )
1.439 +
1.440 + return out
1.441 +
1.442 +def escape( text, newline=False ):
1.443 + """Escape special html characters."""
1.444 +
1.445 + if isinstance( text, basestring ):
1.446 + if '&' in text:
1.447 + text = text.replace( '&', '&' )
1.448 + if '>' in text:
1.449 + text = text.replace( '>', '>' )
1.450 + if '<' in text:
1.451 + text = text.replace( '<', '<' )
1.452 + if '\"' in text:
1.453 + text = text.replace( '\"', '"' )
1.454 + if '\'' in text:
1.455 + text = text.replace( '\'', '"' )
1.456 + if newline:
1.457 + if '\n' in text:
1.458 + text = text.replace( '\n', '<br>' )
1.459 +
1.460 + return text
1.461 +
1.462 +_escape = escape
1.463 +
1.464 +def unescape( text ):
1.465 + """Inverse of escape."""
1.466 +
1.467 + if isinstance( text, basestring ):
1.468 + if '&' in text:
1.469 + text = text.replace( '&', '&' )
1.470 + if '>' in text:
1.471 + text = text.replace( '>', '>' )
1.472 + if '<' in text:
1.473 + text = text.replace( '<', '<' )
1.474 + if '"' in text:
1.475 + text = text.replace( '"', '\"' )
1.476 +
1.477 + return text
1.478 +
1.479 +class dummy:
1.480 + """A dummy class for attaching attributes."""
1.481 + pass
1.482 +
1.483 +doctype = dummy( )
1.484 +doctype.frameset = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">"""
1.485 +doctype.strict = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">"""
1.486 +doctype.loose = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">"""
1.487 +
1.488 +class russell:
1.489 + """A dummy class that contains anything."""
1.490 +
1.491 + def __contains__( self, item ):
1.492 + return True
1.493 +
1.494 +
1.495 +class MarkupError( Exception ):
1.496 + """All our exceptions subclass this."""
1.497 + def __str__( self ):
1.498 + return self.message
1.499 +
1.500 +class ClosingError( MarkupError ):
1.501 + def __init__( self, tag ):
1.502 + self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag
1.503 +
1.504 +class OpeningError( MarkupError ):
1.505 + def __init__( self, tag ):
1.506 + self.message = "The element '%s' can not be opened." % tag
1.507 +
1.508 +class ArgumentError( MarkupError ):
1.509 + def __init__( self, tag ):
1.510 + self.message = "The element '%s' was called with more than one non-keyword argument." % tag
1.511 +
1.512 +class InvalidElementError( MarkupError ):
1.513 + def __init__( self, tag, mode ):
1.514 + self.message = "The element '%s' is not valid for your mode '%s'." % ( tag, mode )
1.515 +
1.516 +class DeprecationError( MarkupError ):
1.517 + def __init__( self, tag ):
1.518 + self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag
1.519 +
1.520 +class ModeError( MarkupError ):
1.521 + def __init__( self, mode ):
1.522 + self.message = "Mode '%s' is invalid, possible values: strict_html, html (alias for strict_html), loose_html, xml." % mode
1.523 +
1.524 +class CustomizationError( MarkupError ):
1.525 + def __init__( self ):
1.526 + self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'."
1.527 +
1.528 +if __name__ == '__main__':
1.529 + import sys
1.530 + sys.stdout.write( __doc__ )