1 # This code is in the public domain, it comes 2 # with absolutely no warranty and you can do 3 # absolutely whatever you want with it. 4 5 __date__ = '1 October 2012' 6 __version__ = '1.9' 7 __doc__= """ 8 This is markup.py - a Python module that attempts to 9 make it easier to generate HTML/XML from a Python program 10 in an intuitive, lightweight, customizable and pythonic way. 11 12 The code is in the public domain. 13 14 Version: %s as of %s. 15 16 Documentation and further info is at http://markup.sourceforge.net/ 17 18 Please send bug reports, feature requests, enhancement 19 ideas or questions to nogradi at gmail dot com. 20 21 Installation: drop markup.py somewhere into your Python path. 22 """ % ( __version__, __date__ ) 23 24 try: 25 basestring 26 import string 27 except: 28 # python 3 29 basestring = str 30 string = str 31 32 # tags which are reserved python keywords will be referred 33 # to by a leading underscore otherwise we end up with a syntax error 34 import keyword 35 36 class element: 37 """This class handles the addition of a new element.""" 38 39 def __init__( self, tag, case='lower', parent=None ): 40 self.parent = parent 41 42 if case == 'upper': 43 self.tag = tag.upper( ) 44 elif case == 'lower': 45 self.tag = tag.lower( ) 46 elif case =='given': 47 self.tag = tag 48 else: 49 self.tag = tag 50 51 def __call__( self, *args, **kwargs ): 52 if len( args ) > 1: 53 raise ArgumentError( self.tag ) 54 55 # if class_ was defined in parent it should be added to every element 56 if self.parent is not None and self.parent.class_ is not None: 57 if 'class_' not in kwargs: 58 kwargs['class_'] = self.parent.class_ 59 60 if self.parent is None and len( args ) == 1: 61 x = [ self.render( self.tag, False, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] 62 return '\n'.join( x ) 63 elif self.parent is None and len( args ) == 0: 64 x = [ self.render( self.tag, True, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] 65 return '\n'.join( x ) 66 67 if self.tag in self.parent.twotags: 68 for myarg, mydict in _argsdicts( args, kwargs ): 69 self.render( self.tag, False, myarg, mydict ) 70 elif self.tag in self.parent.onetags: 71 if len( args ) == 0: 72 for myarg, mydict in _argsdicts( args, kwargs ): 73 self.render( self.tag, True, myarg, mydict ) # here myarg is always None, because len( args ) = 0 74 else: 75 raise ClosingError( self.tag ) 76 elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: 77 raise DeprecationError( self.tag ) 78 else: 79 raise InvalidElementError( self.tag, self.parent.mode ) 80 81 def render( self, tag, single, between, kwargs ): 82 """Append the actual tags to content.""" 83 84 out = "<%s" % tag 85 for key, value in list( kwargs.items( ) ): 86 if value is not None: # when value is None that means stuff like <... checked> 87 key = key.strip('_') # strip this so class_ will mean class, etc. 88 if key == 'http_equiv': # special cases, maybe change _ to - overall? 89 key = 'http-equiv' 90 elif key == 'accept_charset': 91 key = 'accept-charset' 92 out = "%s %s=\"%s\"" % ( out, key, escape( value ) ) 93 else: 94 out = "%s %s" % ( out, key ) 95 if between is not None: 96 out = "%s>%s</%s>" % ( out, between, tag ) 97 else: 98 if single: 99 out = "%s />" % out 100 else: 101 out = "%s>" % out 102 if self.parent is not None: 103 self.parent.content.append( out ) 104 else: 105 return out 106 107 def close( self ): 108 """Append a closing tag unless element has only opening tag.""" 109 110 if self.tag in self.parent.twotags: 111 self.parent.content.append( "</%s>" % self.tag ) 112 elif self.tag in self.parent.onetags: 113 raise ClosingError( self.tag ) 114 elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: 115 raise DeprecationError( self.tag ) 116 117 def open( self, **kwargs ): 118 """Append an opening tag.""" 119 120 if self.tag in self.parent.twotags or self.tag in self.parent.onetags: 121 self.render( self.tag, False, None, kwargs ) 122 elif self.mode == 'strict_html' and self.tag in self.parent.deptags: 123 raise DeprecationError( self.tag ) 124 125 class page: 126 """This is our main class representing a document. Elements are added 127 as attributes of an instance of this class.""" 128 129 def __init__( self, mode='strict_html', case='lower', onetags=None, twotags=None, separator='\n', class_=None ): 130 """Stuff that effects the whole document. 131 132 mode -- 'strict_html' for HTML 4.01 (default) 133 'html' alias for 'strict_html' 134 'loose_html' to allow some deprecated elements 135 'xml' to allow arbitrary elements 136 137 case -- 'lower' element names will be printed in lower case (default) 138 'upper' they will be printed in upper case 139 'given' element names will be printed as they are given 140 141 onetags -- list or tuple of valid elements with opening tags only 142 twotags -- list or tuple of valid elements with both opening and closing tags 143 these two keyword arguments may be used to select 144 the set of valid elements in 'xml' mode 145 invalid elements will raise appropriate exceptions 146 147 separator -- string to place between added elements, defaults to newline 148 149 class_ -- a class that will be added to every element if defined""" 150 151 valid_onetags = [ "AREA", "BASE", "BR", "COL", "FRAME", "HR", "IMG", "INPUT", "LINK", "META", "PARAM" ] 152 valid_twotags = [ "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON", 153 "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET", 154 "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS", 155 "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP", 156 "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE", 157 "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR", 158 "TT", "UL", "VAR" ] 159 deprecated_onetags = [ "BASEFONT", "ISINDEX" ] 160 deprecated_twotags = [ "APPLET", "CENTER", "DIR", "FONT", "MENU", "S", "STRIKE", "U" ] 161 162 self.header = [ ] 163 self.content = [ ] 164 self.footer = [ ] 165 self.case = case 166 self.separator = separator 167 168 # init( ) sets it to True so we know that </body></html> has to be printed at the end 169 self._full = False 170 self.class_= class_ 171 172 if mode == 'strict_html' or mode == 'html': 173 self.onetags = valid_onetags 174 self.onetags += list( map( string.lower, self.onetags ) ) 175 self.twotags = valid_twotags 176 self.twotags += list( map( string.lower, self.twotags ) ) 177 self.deptags = deprecated_onetags + deprecated_twotags 178 self.deptags += list( map( string.lower, self.deptags ) ) 179 self.mode = 'strict_html' 180 elif mode == 'loose_html': 181 self.onetags = valid_onetags + deprecated_onetags 182 self.onetags += list( map( string.lower, self.onetags ) ) 183 self.twotags = valid_twotags + deprecated_twotags 184 self.twotags += list( map( string.lower, self.twotags ) ) 185 self.mode = mode 186 elif mode == 'xml': 187 if onetags and twotags: 188 self.onetags = onetags 189 self.twotags = twotags 190 elif ( onetags and not twotags ) or ( twotags and not onetags ): 191 raise CustomizationError( ) 192 else: 193 self.onetags = russell( ) 194 self.twotags = russell( ) 195 self.mode = mode 196 else: 197 raise ModeError( mode ) 198 199 def __getattr__( self, attr ): 200 201 # tags should start with double underscore 202 if attr.startswith("__") and attr.endswith("__"): 203 raise AttributeError( attr ) 204 # tag with single underscore should be a reserved keyword 205 if attr.startswith( '_' ): 206 attr = attr.lstrip( '_' ) 207 if attr not in keyword.kwlist: 208 raise AttributeError( attr ) 209 210 return element( attr, case=self.case, parent=self ) 211 212 def __str__( self ): 213 214 if self._full and ( self.mode == 'strict_html' or self.mode == 'loose_html' ): 215 end = [ '</body>', '</html>' ] 216 else: 217 end = [ ] 218 219 return self.separator.join( self.header + self.content + self.footer + end ) 220 221 def __call__( self, escape=False ): 222 """Return the document as a string. 223 224 escape -- False print normally 225 True replace < and > by < and > 226 the default escape sequences in most browsers""" 227 228 if escape: 229 return _escape( self.__str__( ) ) 230 else: 231 return self.__str__( ) 232 233 def add( self, text ): 234 """This is an alias to addcontent.""" 235 self.addcontent( text ) 236 237 def addfooter( self, text ): 238 """Add some text to the bottom of the document""" 239 self.footer.append( text ) 240 241 def addheader( self, text ): 242 """Add some text to the top of the document""" 243 self.header.append( text ) 244 245 def addcontent( self, text ): 246 """Add some text to the main part of the document""" 247 self.content.append( text ) 248 249 250 def init( self, lang='en', css=None, metainfo=None, title=None, header=None, 251 footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None, base=None ): 252 """This method is used for complete documents with appropriate 253 doctype, encoding, title, etc information. For an HTML/XML snippet 254 omit this method. 255 256 lang -- language, usually a two character string, will appear 257 as <html lang='en'> in html mode (ignored in xml mode) 258 259 css -- Cascading Style Sheet filename as a string or a list of 260 strings for multiple css files (ignored in xml mode) 261 262 metainfo -- a dictionary in the form { 'name':'content' } to be inserted 263 into meta element(s) as <meta name='name' content='content'> 264 (ignored in xml mode) 265 266 base -- set the <base href="..."> tag in <head> 267 268 bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added 269 as attributes of the <body> element as <body key='value' ... > 270 (ignored in xml mode) 271 272 script -- dictionary containing src:type pairs, <script type='text/type' src=src></script> 273 or a list of [ 'src1', 'src2', ... ] in which case 'javascript' is assumed for all 274 275 title -- the title of the document as a string to be inserted into 276 a title element as <title>my title</title> (ignored in xml mode) 277 278 header -- some text to be inserted right after the <body> element 279 (ignored in xml mode) 280 281 footer -- some text to be inserted right before the </body> element 282 (ignored in xml mode) 283 284 charset -- a string defining the character set, will be inserted into a 285 <meta http-equiv='Content-Type' content='text/html; charset=myset'> 286 element (ignored in xml mode) 287 288 encoding -- a string defining the encoding, will be put into to first line of 289 the document as <?xml version='1.0' encoding='myencoding' ?> in 290 xml mode (ignored in html mode) 291 292 doctype -- the document type string, defaults to 293 <!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'> 294 in html mode (ignored in xml mode)""" 295 296 self._full = True 297 298 if self.mode == 'strict_html' or self.mode == 'loose_html': 299 if doctype is None: 300 doctype = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>" 301 self.header.append( doctype ) 302 self.html( lang=lang ) 303 self.head( ) 304 if charset is not None: 305 self.meta( http_equiv='Content-Type', content="text/html; charset=%s" % charset ) 306 if metainfo is not None: 307 self.metainfo( metainfo ) 308 if css is not None: 309 self.css( css ) 310 if title is not None: 311 self.title( title ) 312 if script is not None: 313 self.scripts( script ) 314 if base is not None: 315 self.base( href='%s' % base ) 316 self.head.close() 317 if bodyattrs is not None: 318 self.body( **bodyattrs ) 319 else: 320 self.body( ) 321 if header is not None: 322 self.content.append( header ) 323 if footer is not None: 324 self.footer.append( footer ) 325 326 elif self.mode == 'xml': 327 if doctype is None: 328 if encoding is not None: 329 doctype = "<?xml version='1.0' encoding='%s' ?>" % encoding 330 else: 331 doctype = "<?xml version='1.0' ?>" 332 self.header.append( doctype ) 333 334 def css( self, filelist ): 335 """This convenience function is only useful for html. 336 It adds css stylesheet(s) to the document via the <link> element.""" 337 338 if isinstance( filelist, basestring ): 339 self.link( href=filelist, rel='stylesheet', type='text/css', media='all' ) 340 else: 341 for file in filelist: 342 self.link( href=file, rel='stylesheet', type='text/css', media='all' ) 343 344 def metainfo( self, mydict ): 345 """This convenience function is only useful for html. 346 It adds meta information via the <meta> element, the argument is 347 a dictionary of the form { 'name':'content' }.""" 348 349 if isinstance( mydict, dict ): 350 for name, content in list( mydict.items( ) ): 351 self.meta( name=name, content=content ) 352 else: 353 raise TypeError( "Metainfo should be called with a dictionary argument of name:content pairs." ) 354 355 def scripts( self, mydict ): 356 """Only useful in html, mydict is dictionary of src:type pairs or a list 357 of script sources [ 'src1', 'src2', ... ] in which case 'javascript' is assumed for type. 358 Will be rendered as <script type='text/type' src=src></script>""" 359 360 if isinstance( mydict, dict ): 361 for src, type in list( mydict.items( ) ): 362 self.script( '', src=src, type='text/%s' % type ) 363 else: 364 try: 365 for src in mydict: 366 self.script( '', src=src, type='text/javascript' ) 367 except: 368 raise TypeError( "Script should be given a dictionary of src:type pairs or a list of javascript src's." ) 369 370 371 class _oneliner: 372 """An instance of oneliner returns a string corresponding to one element. 373 This class can be used to write 'oneliners' that return a string 374 immediately so there is no need to instantiate the page class.""" 375 376 def __init__( self, case='lower' ): 377 self.case = case 378 379 def __getattr__( self, attr ): 380 381 # tags should start with double underscore 382 if attr.startswith("__") and attr.endswith("__"): 383 raise AttributeError( attr ) 384 # tag with single underscore should be a reserved keyword 385 if attr.startswith( '_' ): 386 attr = attr.lstrip( '_' ) 387 if attr not in keyword.kwlist: 388 raise AttributeError( attr ) 389 390 return element( attr, case=self.case, parent=None ) 391 392 oneliner = _oneliner( case='lower' ) 393 upper_oneliner = _oneliner( case='upper' ) 394 given_oneliner = _oneliner( case='given' ) 395 396 def _argsdicts( args, mydict ): 397 """A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1.""" 398 399 if len( args ) == 0: 400 args = None, 401 elif len( args ) == 1: 402 args = _totuple( args[0] ) 403 else: 404 raise Exception( "We should have never gotten here." ) 405 406 mykeys = list( mydict.keys( ) ) 407 myvalues = list( map( _totuple, list( mydict.values( ) ) ) ) 408 409 maxlength = max( list( map( len, [ args ] + myvalues ) ) ) 410 411 for i in range( maxlength ): 412 thisdict = { } 413 for key, value in zip( mykeys, myvalues ): 414 try: 415 thisdict[ key ] = value[i] 416 except IndexError: 417 thisdict[ key ] = value[-1] 418 try: 419 thisarg = args[i] 420 except IndexError: 421 thisarg = args[-1] 422 423 yield thisarg, thisdict 424 425 def _totuple( x ): 426 """Utility stuff to convert string, int, long, float, None or anything to a usable tuple.""" 427 428 if isinstance( x, basestring ): 429 out = x, 430 elif isinstance( x, ( int, long, float ) ): 431 out = str( x ), 432 elif x is None: 433 out = None, 434 else: 435 out = tuple( x ) 436 437 return out 438 439 def escape( text, newline=False ): 440 """Escape special html characters.""" 441 442 if isinstance( text, basestring ): 443 if '&' in text: 444 text = text.replace( '&', '&' ) 445 if '>' in text: 446 text = text.replace( '>', '>' ) 447 if '<' in text: 448 text = text.replace( '<', '<' ) 449 if '\"' in text: 450 text = text.replace( '\"', '"' ) 451 if '\'' in text: 452 text = text.replace( '\'', ''' ) 453 if newline: 454 if '\n' in text: 455 text = text.replace( '\n', '<br>' ) 456 457 return text 458 459 _escape = escape 460 461 def unescape( text ): 462 """Inverse of escape.""" 463 464 if isinstance( text, basestring ): 465 if '&' in text: 466 text = text.replace( '&', '&' ) 467 if '>' in text: 468 text = text.replace( '>', '>' ) 469 if '<' in text: 470 text = text.replace( '<', '<' ) 471 if '"' in text: 472 text = text.replace( '"', '\"' ) 473 474 return text 475 476 class dummy: 477 """A dummy class for attaching attributes.""" 478 pass 479 480 doctype = dummy( ) 481 doctype.frameset = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">""" 482 doctype.strict = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">""" 483 doctype.loose = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">""" 484 485 class russell: 486 """A dummy class that contains anything.""" 487 488 def __contains__( self, item ): 489 return True 490 491 492 class MarkupError( Exception ): 493 """All our exceptions subclass this.""" 494 def __str__( self ): 495 return self.message 496 497 class ClosingError( MarkupError ): 498 def __init__( self, tag ): 499 self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag 500 501 class OpeningError( MarkupError ): 502 def __init__( self, tag ): 503 self.message = "The element '%s' can not be opened." % tag 504 505 class ArgumentError( MarkupError ): 506 def __init__( self, tag ): 507 self.message = "The element '%s' was called with more than one non-keyword argument." % tag 508 509 class InvalidElementError( MarkupError ): 510 def __init__( self, tag, mode ): 511 self.message = "The element '%s' is not valid for your mode '%s'." % ( tag, mode ) 512 513 class DeprecationError( MarkupError ): 514 def __init__( self, tag ): 515 self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag 516 517 class ModeError( MarkupError ): 518 def __init__( self, mode ): 519 self.message = "Mode '%s' is invalid, possible values: strict_html, html (alias for strict_html), loose_html, xml." % mode 520 521 class CustomizationError( MarkupError ): 522 def __init__( self ): 523 self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'." 524 525 if __name__ == '__main__': 526 import sys 527 sys.stdout.write( __doc__ )