1 # This code is in the public domain, it comes 2 # with absolutely no warranty and you can do 3 # absolutely whatever you want with it. 4 5 __date__ = '16 March 2015' 6 __version__ = '1.10' 7 __doc__= """ 8 This is markup.py - a Python module that attempts to 9 make it easier to generate HTML/XML from a Python program 10 in an intuitive, lightweight, customizable and pythonic way. 11 It works with both python 2 and 3. 12 13 The code is in the public domain. 14 15 Version: %s as of %s. 16 17 Documentation and further info is at http://markup.sourceforge.net/ 18 19 Please send bug reports, feature requests, enhancement 20 ideas or questions to nogradi at gmail dot com. 21 22 Installation: drop markup.py somewhere into your Python path. 23 """ % ( __version__, __date__ ) 24 25 try: 26 basestring 27 import string 28 except: 29 # python 3 30 basestring = str 31 string = str 32 long = int 33 34 # tags which are reserved python keywords will be referred 35 # to by a leading underscore otherwise we end up with a syntax error 36 import keyword 37 38 class element: 39 """This class handles the addition of a new element.""" 40 41 def __init__( self, tag, case='lower', parent=None ): 42 self.parent = parent 43 44 if case == 'upper': 45 self.tag = tag.upper( ) 46 elif case == 'lower': 47 self.tag = tag.lower( ) 48 elif case =='given': 49 self.tag = tag 50 else: 51 self.tag = tag 52 53 def __call__( self, *args, **kwargs ): 54 if len( args ) > 1: 55 raise ArgumentError( self.tag ) 56 57 # if class_ was defined in parent it should be added to every element 58 if self.parent is not None and self.parent.class_ is not None: 59 if 'class_' not in kwargs: 60 kwargs['class_'] = self.parent.class_ 61 62 if self.parent is None and len( args ) == 1: 63 x = [ self.render( self.tag, False, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] 64 return '\n'.join( x ) 65 elif self.parent is None and len( args ) == 0: 66 x = [ self.render( self.tag, True, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] 67 return '\n'.join( x ) 68 69 if self.tag in self.parent.twotags: 70 for myarg, mydict in _argsdicts( args, kwargs ): 71 self.render( self.tag, False, myarg, mydict ) 72 elif self.tag in self.parent.onetags: 73 if len( args ) == 0: 74 for myarg, mydict in _argsdicts( args, kwargs ): 75 self.render( self.tag, True, myarg, mydict ) # here myarg is always None, because len( args ) = 0 76 else: 77 raise ClosingError( self.tag ) 78 elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: 79 raise DeprecationError( self.tag ) 80 else: 81 raise InvalidElementError( self.tag, self.parent.mode ) 82 83 def render( self, tag, single, between, kwargs ): 84 """Append the actual tags to content.""" 85 86 out = "<%s" % tag 87 for key, value in list( kwargs.items( ) ): 88 if value is not None: # when value is None that means stuff like <... checked> 89 key = key.strip('_') # strip this so class_ will mean class, etc. 90 if key == 'http_equiv': # special cases, maybe change _ to - overall? 91 key = 'http-equiv' 92 elif key == 'accept_charset': 93 key = 'accept-charset' 94 out = "%s %s=\"%s\"" % ( out, key, escape( value ) ) 95 else: 96 out = "%s %s" % ( out, key ) 97 if between is not None: 98 out = "%s>%s</%s>" % ( out, escape( between ), tag ) 99 else: 100 if single: 101 out = "%s />" % out 102 else: 103 out = "%s>" % out 104 if self.parent is not None: 105 self.parent.content.append( out ) 106 else: 107 return out 108 109 def close( self ): 110 """Append a closing tag unless element has only opening tag.""" 111 112 if self.tag in self.parent.twotags: 113 self.parent.content.append( "</%s>" % self.tag ) 114 elif self.tag in self.parent.onetags: 115 raise ClosingError( self.tag ) 116 elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: 117 raise DeprecationError( self.tag ) 118 119 def open( self, **kwargs ): 120 """Append an opening tag.""" 121 122 if self.tag in self.parent.twotags or self.tag in self.parent.onetags: 123 self.render( self.tag, False, None, kwargs ) 124 elif self.mode == 'strict_html' and self.tag in self.parent.deptags: 125 raise DeprecationError( self.tag ) 126 127 class page: 128 """This is our main class representing a document. Elements are added 129 as attributes of an instance of this class.""" 130 131 def __init__( self, mode='strict_html', case='lower', onetags=None, twotags=None, separator='\n', class_=None ): 132 """Stuff that effects the whole document. 133 134 mode -- 'strict_html' for HTML 4.01 (default) 135 'html' alias for 'strict_html' 136 'loose_html' to allow some deprecated elements 137 'xml' to allow arbitrary elements 138 139 case -- 'lower' element names will be printed in lower case (default) 140 'upper' they will be printed in upper case 141 'given' element names will be printed as they are given 142 143 onetags -- list or tuple of valid elements with opening tags only 144 twotags -- list or tuple of valid elements with both opening and closing tags 145 these two keyword arguments may be used to select 146 the set of valid elements in 'xml' mode 147 invalid elements will raise appropriate exceptions 148 149 separator -- string to place between added elements, defaults to newline 150 151 class_ -- a class that will be added to every element if defined""" 152 153 valid_onetags = [ "AREA", "BASE", "BR", "COL", "FRAME", "HR", "IMG", "INPUT", "LINK", "META", "PARAM" ] 154 valid_twotags = [ "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON", 155 "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET", 156 "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS", 157 "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP", 158 "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE", 159 "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR", 160 "TT", "UL", "VAR" ] 161 deprecated_onetags = [ "BASEFONT", "ISINDEX" ] 162 deprecated_twotags = [ "APPLET", "CENTER", "DIR", "FONT", "MENU", "S", "STRIKE", "U" ] 163 164 self.header = [ ] 165 self.content = [ ] 166 self.footer = [ ] 167 self.case = case 168 self.separator = separator 169 170 # init( ) sets it to True so we know that </body></html> has to be printed at the end 171 self._full = False 172 self.class_= class_ 173 174 if mode == 'strict_html' or mode == 'html': 175 self.onetags = valid_onetags 176 self.onetags += list( map( string.lower, self.onetags ) ) 177 self.twotags = valid_twotags 178 self.twotags += list( map( string.lower, self.twotags ) ) 179 self.deptags = deprecated_onetags + deprecated_twotags 180 self.deptags += list( map( string.lower, self.deptags ) ) 181 self.mode = 'strict_html' 182 elif mode == 'loose_html': 183 self.onetags = valid_onetags + deprecated_onetags 184 self.onetags += list( map( string.lower, self.onetags ) ) 185 self.twotags = valid_twotags + deprecated_twotags 186 self.twotags += list( map( string.lower, self.twotags ) ) 187 self.mode = mode 188 elif mode == 'xml': 189 if onetags and twotags: 190 self.onetags = onetags 191 self.twotags = twotags 192 elif ( onetags and not twotags ) or ( twotags and not onetags ): 193 raise CustomizationError( ) 194 else: 195 self.onetags = russell( ) 196 self.twotags = russell( ) 197 self.mode = mode 198 else: 199 raise ModeError( mode ) 200 201 def __getattr__( self, attr ): 202 203 # tags should start with double underscore 204 if attr.startswith("__") and attr.endswith("__"): 205 raise AttributeError( attr ) 206 # tag with single underscore should be a reserved keyword 207 if attr.startswith( '_' ): 208 attr = attr.lstrip( '_' ) 209 if attr not in keyword.kwlist: 210 raise AttributeError( attr ) 211 212 return element( attr, case=self.case, parent=self ) 213 214 def __str__( self ): 215 216 if self._full and ( self.mode == 'strict_html' or self.mode == 'loose_html' ): 217 end = [ '</body>', '</html>' ] 218 else: 219 end = [ ] 220 221 return self.separator.join( self.header + self.content + self.footer + end ) 222 223 def __call__( self, escape=False ): 224 """Return the document as a string. 225 226 escape -- False print normally 227 True replace < and > by < and > 228 the default escape sequences in most browsers""" 229 230 if escape: 231 return _escape( self.__str__( ) ) 232 else: 233 return self.__str__( ) 234 235 def add( self, text ): 236 """This is an alias to addcontent.""" 237 self.addcontent( text ) 238 239 def addfooter( self, text ): 240 """Add some text to the bottom of the document""" 241 self.footer.append( escape( text ) ) 242 243 def addheader( self, text ): 244 """Add some text to the top of the document""" 245 self.header.append( escape( text ) ) 246 247 def addcontent( self, text ): 248 """Add some text to the main part of the document""" 249 self.content.append( escape( text ) ) 250 251 252 def init( self, lang='en', css=None, metainfo=None, title=None, header=None, 253 footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None, base=None ): 254 """This method is used for complete documents with appropriate 255 doctype, encoding, title, etc information. For an HTML/XML snippet 256 omit this method. 257 258 lang -- language, usually a two character string, will appear 259 as <html lang='en'> in html mode (ignored in xml mode) 260 261 css -- Cascading Style Sheet filename as a string or a list of 262 strings for multiple css files (ignored in xml mode) 263 264 metainfo -- a dictionary in the form { 'name':'content' } to be inserted 265 into meta element(s) as <meta name='name' content='content'> 266 (ignored in xml mode) 267 268 base -- set the <base href="..."> tag in <head> 269 270 bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added 271 as attributes of the <body> element as <body key='value' ... > 272 (ignored in xml mode) 273 274 script -- dictionary containing src:type pairs, <script type='text/type' src=src></script> 275 or a list of [ 'src1', 'src2', ... ] in which case 'javascript' is assumed for all 276 277 title -- the title of the document as a string to be inserted into 278 a title element as <title>my title</title> (ignored in xml mode) 279 280 header -- some text to be inserted right after the <body> element 281 (ignored in xml mode) 282 283 footer -- some text to be inserted right before the </body> element 284 (ignored in xml mode) 285 286 charset -- a string defining the character set, will be inserted into a 287 <meta http-equiv='Content-Type' content='text/html; charset=myset'> 288 element (ignored in xml mode) 289 290 encoding -- a string defining the encoding, will be put into to first line of 291 the document as <?xml version='1.0' encoding='myencoding' ?> in 292 xml mode (ignored in html mode) 293 294 doctype -- the document type string, defaults to 295 <!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'> 296 in html mode (ignored in xml mode)""" 297 298 self._full = True 299 300 if self.mode == 'strict_html' or self.mode == 'loose_html': 301 if doctype is None: 302 doctype = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>" 303 self.header.append( doctype ) 304 self.html( lang=lang ) 305 self.head( ) 306 if charset is not None: 307 self.meta( http_equiv='Content-Type', content="text/html; charset=%s" % charset ) 308 if metainfo is not None: 309 self.metainfo( metainfo ) 310 if css is not None: 311 self.css( css ) 312 if title is not None: 313 self.title( title ) 314 if script is not None: 315 self.scripts( script ) 316 if base is not None: 317 self.base( href='%s' % base ) 318 self.head.close() 319 if bodyattrs is not None: 320 self.body( **bodyattrs ) 321 else: 322 self.body( ) 323 if header is not None: 324 self.content.append( header ) 325 if footer is not None: 326 self.footer.append( footer ) 327 328 elif self.mode == 'xml': 329 if doctype is None: 330 if encoding is not None: 331 doctype = "<?xml version='1.0' encoding='%s' ?>" % encoding 332 else: 333 doctype = "<?xml version='1.0' ?>" 334 self.header.append( doctype ) 335 336 def css( self, filelist ): 337 """This convenience function is only useful for html. 338 It adds css stylesheet(s) to the document via the <link> element.""" 339 340 if isinstance( filelist, basestring ): 341 self.link( href=filelist, rel='stylesheet', type='text/css', media='all' ) 342 else: 343 for file in filelist: 344 self.link( href=file, rel='stylesheet', type='text/css', media='all' ) 345 346 def metainfo( self, mydict ): 347 """This convenience function is only useful for html. 348 It adds meta information via the <meta> element, the argument is 349 a dictionary of the form { 'name':'content' }.""" 350 351 if isinstance( mydict, dict ): 352 for name, content in list( mydict.items( ) ): 353 self.meta( name=name, content=content ) 354 else: 355 raise TypeError( "Metainfo should be called with a dictionary argument of name:content pairs." ) 356 357 def scripts( self, mydict ): 358 """Only useful in html, mydict is dictionary of src:type pairs or a list 359 of script sources [ 'src1', 'src2', ... ] in which case 'javascript' is assumed for type. 360 Will be rendered as <script type='text/type' src=src></script>""" 361 362 if isinstance( mydict, dict ): 363 for src, type in list( mydict.items( ) ): 364 self.script( '', src=src, type='text/%s' % type ) 365 else: 366 try: 367 for src in mydict: 368 self.script( '', src=src, type='text/javascript' ) 369 except: 370 raise TypeError( "Script should be given a dictionary of src:type pairs or a list of javascript src's." ) 371 372 373 class _oneliner: 374 """An instance of oneliner returns a string corresponding to one element. 375 This class can be used to write 'oneliners' that return a string 376 immediately so there is no need to instantiate the page class.""" 377 378 def __init__( self, case='lower' ): 379 self.case = case 380 381 def __getattr__( self, attr ): 382 383 # tags should start with double underscore 384 if attr.startswith("__") and attr.endswith("__"): 385 raise AttributeError( attr ) 386 # tag with single underscore should be a reserved keyword 387 if attr.startswith( '_' ): 388 attr = attr.lstrip( '_' ) 389 if attr not in keyword.kwlist: 390 raise AttributeError( attr ) 391 392 return element( attr, case=self.case, parent=None ) 393 394 oneliner = _oneliner( case='lower' ) 395 upper_oneliner = _oneliner( case='upper' ) 396 given_oneliner = _oneliner( case='given' ) 397 398 def _argsdicts( args, mydict ): 399 """A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1.""" 400 401 if len( args ) == 0: 402 args = None, 403 elif len( args ) == 1: 404 args = _totuple( args[0] ) 405 else: 406 raise Exception( "We should have never gotten here." ) 407 408 mykeys = list( mydict.keys( ) ) 409 myvalues = list( map( _totuple, list( mydict.values( ) ) ) ) 410 411 maxlength = max( list( map( len, [ args ] + myvalues ) ) ) 412 413 for i in range( maxlength ): 414 thisdict = { } 415 for key, value in zip( mykeys, myvalues ): 416 try: 417 thisdict[ key ] = value[i] 418 except IndexError: 419 thisdict[ key ] = value[-1] 420 try: 421 thisarg = args[i] 422 except IndexError: 423 thisarg = args[-1] 424 425 yield thisarg, thisdict 426 427 def _totuple( x ): 428 """Utility stuff to convert string, int, long, float, None or anything to a usable tuple.""" 429 430 if isinstance( x, basestring ): 431 out = x, 432 elif isinstance( x, ( int, long, float ) ): 433 out = str( x ), 434 elif x is None: 435 out = None, 436 else: 437 out = tuple( x ) 438 439 return out 440 441 def escape( text, newline=False ): 442 """Escape special html characters.""" 443 444 if isinstance( text, basestring ): 445 if '&' in text: 446 text = text.replace( '&', '&' ) 447 if '>' in text: 448 text = text.replace( '>', '>' ) 449 if '<' in text: 450 text = text.replace( '<', '<' ) 451 if '\"' in text: 452 text = text.replace( '\"', '"' ) 453 if '\'' in text: 454 text = text.replace( '\'', ''' ) 455 if newline: 456 if '\n' in text: 457 text = text.replace( '\n', '<br>' ) 458 459 return text 460 461 _escape = escape 462 463 def unescape( text ): 464 """Inverse of escape.""" 465 466 if isinstance( text, basestring ): 467 if '&' in text: 468 text = text.replace( '&', '&' ) 469 if '>' in text: 470 text = text.replace( '>', '>' ) 471 if '<' in text: 472 text = text.replace( '<', '<' ) 473 if '"' in text: 474 text = text.replace( '"', '\"' ) 475 476 return text 477 478 class dummy: 479 """A dummy class for attaching attributes.""" 480 pass 481 482 doctype = dummy( ) 483 doctype.frameset = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">""" 484 doctype.strict = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">""" 485 doctype.loose = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">""" 486 487 class russell: 488 """A dummy class that contains anything.""" 489 490 def __contains__( self, item ): 491 return True 492 493 494 class MarkupError( Exception ): 495 """All our exceptions subclass this.""" 496 def __str__( self ): 497 return self.message 498 499 class ClosingError( MarkupError ): 500 def __init__( self, tag ): 501 self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag 502 503 class OpeningError( MarkupError ): 504 def __init__( self, tag ): 505 self.message = "The element '%s' can not be opened." % tag 506 507 class ArgumentError( MarkupError ): 508 def __init__( self, tag ): 509 self.message = "The element '%s' was called with more than one non-keyword argument." % tag 510 511 class InvalidElementError( MarkupError ): 512 def __init__( self, tag, mode ): 513 self.message = "The element '%s' is not valid for your mode '%s'." % ( tag, mode ) 514 515 class DeprecationError( MarkupError ): 516 def __init__( self, tag ): 517 self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag 518 519 class ModeError( MarkupError ): 520 def __init__( self, mode ): 521 self.message = "Mode '%s' is invalid, possible values: strict_html, html (alias for strict_html), loose_html, xml." % mode 522 523 class CustomizationError( MarkupError ): 524 def __init__( self ): 525 self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'." 526 527 if __name__ == '__main__': 528 import sys 529 sys.stdout.write( __doc__ )