MoinMessage (annotate emailfix/generator.py in 49da801a72e4)

# Copyright (C) 2001-2010 Python Software Foundation

paul@139

2

# Contact: email-sig@python.org

"""Classes to generate plain text from a message object tree."""

__all__ = ['Generator', 'DecodedGenerator']

import re

paul@139

9

import sys

paul@139

10

import time

paul@139

11

import random

paul@139

12

import warnings

from cStringIO import StringIO

paul@139

15

from emailfix.header import Header

UNDERSCORE = '_'

paul@139

18

NL = '\n'

fcre = re.compile(r'^From ', re.MULTILINE)

paul@139

21

nlre = re.compile(r'(?<!\r)\n', re.MULTILINE)

def _is8bitstring(s):

paul@139

24

    if isinstance(s, str):

paul@139

25

        try:

paul@139

26

            unicode(s, 'us-ascii')

paul@139

27

        except UnicodeError:

paul@139

28

            return True

paul@139

29

    return False

class Generator:

paul@139

34

    """Generates output from a Message object tree.

    This basic generator writes the message to the given file object as plain

paul@139

37

    text.

paul@139

38

"""

    # Public interface

    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):

paul@139

44

        """Create the generator for message flattening.

        outfp is the output file-like object for writing the message to.  It

paul@139

47

        must have a write() method.

        Optional mangle_from_ is a flag that, when True (the default), escapes

paul@139

50

        From_ lines in the body of the message by putting a `>' in front of

paul@139

51

        them.

        Optional maxheaderlen specifies the longest length for a non-continued

paul@139

54

        header.  When a header line is longer (in characters, with tabs

paul@139

55

        expanded to 8 spaces) than maxheaderlen, the header will split as

paul@139

56

        defined in the Header class.  Set maxheaderlen to zero to disable

paul@139

57

        header wrapping.  The default is 78, as recommended (but not required)

paul@139

58

        by RFC 2822.

paul@139

59

"""

paul@139

60

        self._fp = outfp

paul@139

61

        self._mangle_from_ = mangle_from_

paul@139

62

        self._maxheaderlen = maxheaderlen

    def write(self, s):

paul@139

65

        # Just delegate to the file object

paul@139

66

        self._fp.write(s)

    def flatten(self, msg, unixfrom=False, linesep=NL):

paul@139

69

        """Print the message object tree rooted at msg to the output file

paul@139

70

        specified when the Generator instance was created.

        unixfrom is a flag that forces the printing of a Unix From_ delimiter

paul@139

73

        before the first object in the message tree.  If the original message

paul@139

74

        has no From_ delimiter, a `standard' one is crafted.  By default, this

paul@139

75

        is False to inhibit the printing of any From_ delimiter.

        Note that for subobjects, no From_ line is printed.

paul@139

78

"""

paul@139

79

        self._NL = linesep

paul@139

80

        if unixfrom:

paul@139

81

            ufrom = msg.get_unixfrom()

paul@139

82

            if not ufrom:

paul@139

83

                ufrom = 'From nobody ' + time.ctime(time.time())

paul@139

84

            self.write(ufrom + self._NL)

paul@139

85

        self._write(msg)

    def clone(self, fp):

paul@139

88

        """Clone this generator with the exact same options."""

paul@139

89

        return self.__class__(fp, self._mangle_from_, self._maxheaderlen)

    # Protected interface - undocumented ;/

    def _write(self, msg):

paul@139

96

        # We can't write the headers yet because of the following scenario:

paul@139

97

        # say a multipart message includes the boundary string somewhere in

paul@139

98

        # its body.  We'd have to calculate the new boundary /before/ we write

paul@139

99

        # the headers so that we can write the correct Content-Type:

paul@139

100

        # parameter.

        # The way we do this, so as to make the _handle_*() methods simpler,

paul@139

103

        # is to cache any subpart writes into a StringIO.  The we write the

paul@139

104

        # headers and the StringIO contents.  That way, subpart handlers can

paul@139

105

        # Do The Right Thing, and can still modify the Content-Type: header if

paul@139

106

        # necessary.

paul@139

107

        oldfp = self._fp

paul@139

108

        try:

paul@139

109

            self._fp = sfp = StringIO()

paul@139

110

            self._dispatch(msg)

paul@139

111

        finally:

paul@139

112

            self._fp = oldfp

paul@139

113

        # Write the headers.  First we see if the message object wants to

paul@139

114

        # handle that itself.  If not, we'll do it generically.

paul@139

115

        meth = getattr(msg, '_write_headers', None)

paul@139

116

        if meth is None:

paul@139

117

            self._write_headers(msg)

paul@139

118

        else:

paul@139

119

            meth(self)

paul@139

120

        self._fp.write(sfp.getvalue())

    def _dispatch(self, msg):

paul@139

123

        # Get the Content-Type: for the message, then try to dispatch to

paul@139

124

        # self._handle_<maintype>_<subtype>().  If there's no handler for the

paul@139

125

        # full MIME type, then dispatch to self._handle_<maintype>().  If

paul@139

126

        # that's missing too, then dispatch to self._writeBody().

paul@139

127

        main = msg.get_content_maintype()

paul@139

128

        sub = msg.get_content_subtype()

paul@139

129

        specific = UNDERSCORE.join((main, sub)).replace('-', '_')

paul@139

130

        meth = getattr(self, '_handle_' + specific, None)

paul@139

131

        if meth is None:

paul@139

132

            generic = main.replace('-', '_')

paul@139

133

            meth = getattr(self, '_handle_' + generic, None)

paul@139

134

            if meth is None:

paul@139

135

                meth = self._writeBody

paul@139

136

        meth(msg)

    # Default handlers

    def _write_headers(self, msg):

paul@139

143

        for h, v in msg.items():

paul@139

144

            self.write('%s: ' % h)

paul@139

145

            if self._maxheaderlen == 0:

paul@139

146

                # Explicit no-wrapping

paul@139

147

                if _is8bitstring(v):

paul@139

148

                    self.write(v + self._NL)

paul@139

149

                else:

paul@139

150

                    self.write(nlre.sub(self._NL, v) + self._NL)

paul@139

151

            elif isinstance(v, Header):

paul@139

152

                # Header instances know what to do

paul@139

153

                self.write(v.encode(linesep=self._NL) + self._NL)

paul@139

154

            elif _is8bitstring(v):

paul@139

155

                # If we have raw 8bit data in a byte string, we have no idea

paul@139

156

                # what the encoding is.  There is no safe way to split this

paul@139

157

                # string.  If it's ascii-subset, then we could do a normal

paul@139

158

                # ascii split, but if it's multibyte then we could break the

paul@139

159

                # string.  There's no way to know so the least harm seems to

paul@139

160

                # be to not split the string and risk it being too long.

paul@139

161

                self.write(v + self._NL)

paul@139

162

            else:

paul@139

163

                # Header's got lots of smarts, so use it.  Note that this is

paul@139

164

                # fundamentally broken though because we lose idempotency when

paul@139

165

                # the header string is continued with tabs.  It will now be

paul@139

166

                # continued with spaces.  This was reversedly broken before we

paul@139

167

                # fixed bug 1974.  Either way, we lose.

paul@139

168

                self.write(Header(

paul@139

169

                    v, maxlinelen=self._maxheaderlen, header_name=h).encode(

paul@139

170

                    linesep=self._NL) + self._NL)

paul@139

171

        # A blank line always separates headers from body

paul@139

172

        self.write(self._NL)

    # Handlers for writing types and subtypes

    def _handle_text(self, msg):

paul@139

179

        payload = msg.get_payload()

paul@139

180

        if payload is None:

paul@139

181

            return

paul@139

182

        if not isinstance(payload, basestring):

paul@139

183

            raise TypeError('string payload expected: %s' % type(payload))

paul@139

184

        if self._mangle_from_:

paul@139

185

            payload = fcre.sub('>From ', payload)

paul@139

186

        self.write(nlre.sub(self._NL, payload))

    # Default body handler

paul@139

189

    _writeBody = _handle_text

    def _handle_multipart(self, msg):

paul@139

192

        # The trick here is to write out each part separately, merge them all

paul@139

193

        # together, and then make sure that the boundary we've chosen isn't

paul@139

194

        # present in the payload.

paul@139

195

        msgtexts = []

paul@139

196

        subparts = msg.get_payload()

paul@139

197

        if subparts is None:

paul@139

198

            subparts = []

paul@139

199

        elif isinstance(subparts, basestring):

paul@139

200

            # e.g. a non-strict parse of a message with no starting boundary.

paul@139

201

            self.write(subparts)

paul@139

202

            return

paul@139

203

        elif not isinstance(subparts, list):

paul@139

204

            # Scalar payload

paul@139

205

            subparts = [subparts]

paul@139

206

        for part in subparts:

paul@139

207

            s = StringIO()

paul@139

208

            g = self.clone(s)

paul@139

209

            g.flatten(part, unixfrom=False, linesep=self._NL)

paul@139

210

            msgtexts.append(s.getvalue())

paul@139

211

        # BAW: What about boundaries that are wrapped in double-quotes?

paul@139

212

        boundary = msg.get_boundary()

paul@139

213

        if not boundary:

paul@139

214

            # Create a boundary that doesn't appear in any of the

paul@139

215

            # message texts.

paul@139

216

            alltext = self._NL.join(msgtexts)

paul@139

217

            boundary = _make_boundary(alltext)

paul@139

218

            msg.set_boundary(boundary)

paul@139

219

        # If there's a preamble, write it out, with a trailing CRLF

paul@139

220

        if msg.preamble is not None:

paul@139

221

            if self._mangle_from_:

paul@139

222

                preamble = fcre.sub('>From ', msg.preamble)

paul@139

223

            else:

paul@139

224

                preamble = msg.preamble

paul@139

225

            self.write(preamble + self._NL)

paul@139

226

        # dash-boundary transport-padding CRLF

paul@139

227

        self.write('--' + boundary + self._NL)

paul@139

228

        # body-part

paul@139

229

        if msgtexts:

paul@139

230

            self.write(msgtexts.pop(0))

paul@139

231

        # *encapsulation

paul@139

232

        # --> delimiter transport-padding

paul@139

233

        # --> CRLF body-part

paul@139

234

        for body_part in msgtexts:

paul@139

235

            # delimiter transport-padding CRLF

paul@139

236

            self.write(self._NL + '--' + boundary + self._NL)

paul@139

237

            # body-part

paul@139

238

            self.write(body_part)

paul@139

239

        # close-delimiter transport-padding

paul@139

240

        self.write(self._NL + '--' + boundary + '--')

paul@139

241

        if msg.epilogue is not None:

paul@139

242

            self.write(self._NL)

paul@139

243

            if self._mangle_from_:

paul@139

244

                epilogue = fcre.sub('>From ', msg.epilogue)

paul@139

245

            else:

paul@139

246

                epilogue = msg.epilogue

paul@139

247

            self.write(epilogue)

    def _handle_multipart_signed(self, msg):

paul@139

250

        # The contents of signed parts has to stay unmodified in order to keep

paul@139

251

        # the signature intact per RFC1847 2.1, so we disable header wrapping.

paul@139

252

        # RDM: This isn't enough to completely preserve the part, but it helps.

paul@139

253

        old_maxheaderlen = self._maxheaderlen

paul@139

254

        try:

paul@139

255

            self._maxheaderlen = 0

paul@139

256

            self._handle_multipart(msg)

paul@139

257

        finally:

paul@139

258

            self._maxheaderlen = old_maxheaderlen

    def _handle_message_delivery_status(self, msg):

paul@139

261

        # We can't just write the headers directly to self's file object

paul@139

262

        # because this will leave an extra newline between the last header

paul@139

263

        # block and the boundary.  Sigh.

paul@139

264

        blocks = []

paul@139

265

        for part in msg.get_payload():

paul@139

266

            s = StringIO()

paul@139

267

            g = self.clone(s)

paul@139

268

            g.flatten(part, unixfrom=False, linesep=self._NL)

paul@139

269

            text = s.getvalue()

paul@139

270

            lines = text.split(self._NL)

paul@139

271

            # Strip off the unnecessary trailing empty line

paul@139

272

            if lines and lines[-1] == '':

paul@139

273

                blocks.append(self._NL.join(lines[:-1]))

paul@139

274

            else:

paul@139

275

                blocks.append(text)

paul@139

276

        # Now join all the blocks with an empty line.  This has the lovely

paul@139

277

        # effect of separating each block with an empty line, but not adding

paul@139

278

        # an extra one after the last one.

paul@139

279

        self.write(self._NL.join(blocks))

    def _handle_message(self, msg):

paul@139

282

        s = StringIO()

paul@139

283

        g = self.clone(s)

paul@139

284

        # The payload of a message/rfc822 part should be a multipart sequence

paul@139

285

        # of length 1.  The zeroth element of the list should be the Message

paul@139

286

        # object for the subpart.  Extract that object, stringify it, and

paul@139

287

        # write it out.

paul@139

288

        # Except, it turns out, when it's a string instead, which happens when

paul@139

289

        # and only when HeaderParser is used on a message of mime type

paul@139

290

        # message/rfc822.  Such messages are generated by, for example,

paul@139

291

        # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So

paul@139

292

        # in that case we just emit the string body.

paul@139

293

        payload = msg.get_payload()

paul@139

294

        if isinstance(payload, list):

paul@139

295

            g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)

paul@139

296

            payload = s.getvalue()

paul@139

297

        self.write(payload)

_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'

class DecodedGenerator(Generator):

paul@139

304

    """Generates a text representation of a message.

    Like the Generator base class, except that non-text parts are substituted

paul@139

307

    with a format string representing the part.

paul@139

308

"""

paul@139

309

    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):

paul@139

310

        """Like Generator.__init__() except that an additional optional

paul@139

311

        argument is allowed.

        Walks through all subparts of a message.  If the subpart is of main

paul@139

314

        type `text', then it prints the decoded payload of the subpart.

        Otherwise, fmt is a format string that is used instead of the message

paul@139

317

        payload.  fmt is expanded with the following keywords (in

paul@139

318

        %(keyword)s format):

        type       : Full MIME type of the non-text part

paul@139

321

        maintype   : Main MIME type of the non-text part

paul@139

322

        subtype    : Sub-MIME type of the non-text part

paul@139

323

        filename   : Filename of the non-text part

paul@139

324

        description: Description associated with the non-text part

paul@139

325

        encoding   : Content transfer encoding of the non-text part

        The default value for fmt is None, meaning

        [Non-text (%(type)s) part of message omitted, filename %(filename)s]

paul@139

330

"""

paul@139

331

        Generator.__init__(self, outfp, mangle_from_, maxheaderlen)

paul@139

332

        if fmt is None:

paul@139

333

            self._fmt = _FMT

paul@139

334

        else:

paul@139

335

            self._fmt = fmt

    def _dispatch(self, msg):

paul@139

338

        for part in msg.walk():

paul@139

339

            maintype = part.get_content_maintype()

paul@139

340

            if maintype == 'text':

paul@139

341

                print >> self, part.get_payload(decode=True)

paul@139

342

            elif maintype == 'multipart':

paul@139

343

                # Just skip this

paul@139

344

                pass

paul@139

345

            else:

paul@139

346

                print >> self, self._fmt % {

paul@139

347

                    'type'       : part.get_content_type(),

paul@139

348

                    'maintype'   : part.get_content_maintype(),

paul@139

349

                    'subtype'    : part.get_content_subtype(),

paul@139

350

                    'filename'   : part.get_filename('[no filename]'),

paul@139

351

                    'description': part.get('Content-Description',

paul@139

352

                                            '[no description]'),

paul@139

353

                    'encoding'   : part.get('Content-Transfer-Encoding',

paul@139

354

                                            '[no encoding]'),

# Helper

paul@139

360

_width = len(repr(sys.maxint-1))

paul@139

361

_fmt = '%%0%dd' % _width

def _make_boundary(text=None):

paul@139

364

    # Craft a random boundary.  If text is given, ensure that the chosen

paul@139

365

    # boundary doesn't appear in the text.

paul@139

366

    token = random.randrange(sys.maxint)

paul@139

367

    boundary = ('=' * 15) + (_fmt % token) + '=='

paul@139

368

    if text is None:

paul@139

369

        return boundary

paul@139

370

    b = boundary

paul@139

371

    counter = 0

paul@139

372

    while True:

paul@139

373

        cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)

paul@139

374

        if not cre.search(text):

paul@139

375

            break

paul@139

376

        b = boundary + '.' + str(counter)

paul@139

377

        counter += 1

paul@139

378

    return b

paul@139	1	# Copyright (C) 2001-2010 Python Software Foundation
paul@139	2	# Contact: email-sig@python.org
paul@139	3
paul@139	4	"""Classes to generate plain text from a message object tree."""
paul@139	5
paul@139	6	__all__ = ['Generator', 'DecodedGenerator']
paul@139	7
paul@139	8	import re
paul@139	9	import sys
paul@139	10	import time
paul@139	11	import random
paul@139	12	import warnings
paul@139	13
paul@139	14	from cStringIO import StringIO
paul@139	15	from emailfix.header import Header
paul@139	16
paul@139	17	UNDERSCORE = '_'
paul@139	18	NL = '\n'
paul@139	19
paul@139	20	fcre = re.compile(r'^From ', re.MULTILINE)
paul@139	21	nlre = re.compile(r'(?<!\r)\n', re.MULTILINE)
paul@139	22
paul@139	23	def _is8bitstring(s):
paul@139	24	if isinstance(s, str):
paul@139	25	try:
paul@139	26	unicode(s, 'us-ascii')
paul@139	27	except UnicodeError:
paul@139	28	return True
paul@139	29	return False
paul@139	30
paul@139	31
paul@139	32
paul@139	33	class Generator:
paul@139	34	"""Generates output from a Message object tree.
paul@139	35
paul@139	36	This basic generator writes the message to the given file object as plain
paul@139	37	text.
paul@139	38	"""
paul@139	39	#
paul@139	40	# Public interface
paul@139	41	#
paul@139	42
paul@139	43	def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
paul@139	44	"""Create the generator for message flattening.
paul@139	45
paul@139	46	outfp is the output file-like object for writing the message to. It
paul@139	47	must have a write() method.
paul@139	48
paul@139	49	Optional mangle_from_ is a flag that, when True (the default), escapes
paul@139	50	From_ lines in the body of the message by putting a `>' in front of
paul@139	51	them.
paul@139	52
paul@139	53	Optional maxheaderlen specifies the longest length for a non-continued
paul@139	54	header. When a header line is longer (in characters, with tabs
paul@139	55	expanded to 8 spaces) than maxheaderlen, the header will split as
paul@139	56	defined in the Header class. Set maxheaderlen to zero to disable
paul@139	57	header wrapping. The default is 78, as recommended (but not required)
paul@139	58	by RFC 2822.
paul@139	59	"""
paul@139	60	self._fp = outfp
paul@139	61	self._mangle_from_ = mangle_from_
paul@139	62	self._maxheaderlen = maxheaderlen
paul@139	63
paul@139	64	def write(self, s):
paul@139	65	# Just delegate to the file object
paul@139	66	self._fp.write(s)
paul@139	67
paul@139	68	def flatten(self, msg, unixfrom=False, linesep=NL):
paul@139	69	"""Print the message object tree rooted at msg to the output file
paul@139	70	specified when the Generator instance was created.
paul@139	71
paul@139	72	unixfrom is a flag that forces the printing of a Unix From_ delimiter
paul@139	73	before the first object in the message tree. If the original message
paul@139	74	has no From_ delimiter, a `standard' one is crafted. By default, this
paul@139	75	is False to inhibit the printing of any From_ delimiter.
paul@139	76
paul@139	77	Note that for subobjects, no From_ line is printed.
paul@139	78	"""
paul@139	79	self._NL = linesep
paul@139	80	if unixfrom:
paul@139	81	ufrom = msg.get_unixfrom()
paul@139	82	if not ufrom:
paul@139	83	ufrom = 'From nobody ' + time.ctime(time.time())
paul@139	84	self.write(ufrom + self._NL)
paul@139	85	self._write(msg)
paul@139	86
paul@139	87	def clone(self, fp):
paul@139	88	"""Clone this generator with the exact same options."""
paul@139	89	return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
paul@139	90
paul@139	91	#
paul@139	92	# Protected interface - undocumented ;/
paul@139	93	#
paul@139	94
paul@139	95	def _write(self, msg):
paul@139	96	# We can't write the headers yet because of the following scenario:
paul@139	97	# say a multipart message includes the boundary string somewhere in
paul@139	98	# its body. We'd have to calculate the new boundary /before/ we write
paul@139	99	# the headers so that we can write the correct Content-Type:
paul@139	100	# parameter.
paul@139	101	#
paul@139	102	# The way we do this, so as to make the _handle_*() methods simpler,
paul@139	103	# is to cache any subpart writes into a StringIO. The we write the
paul@139	104	# headers and the StringIO contents. That way, subpart handlers can
paul@139	105	# Do The Right Thing, and can still modify the Content-Type: header if
paul@139	106	# necessary.
paul@139	107	oldfp = self._fp
paul@139	108	try:
paul@139	109	self._fp = sfp = StringIO()
paul@139	110	self._dispatch(msg)
paul@139	111	finally:
paul@139	112	self._fp = oldfp
paul@139	113	# Write the headers. First we see if the message object wants to
paul@139	114	# handle that itself. If not, we'll do it generically.
paul@139	115	meth = getattr(msg, '_write_headers', None)
paul@139	116	if meth is None:
paul@139	117	self._write_headers(msg)
paul@139	118	else:
paul@139	119	meth(self)
paul@139	120	self._fp.write(sfp.getvalue())
paul@139	121
paul@139	122	def _dispatch(self, msg):
paul@139	123	# Get the Content-Type: for the message, then try to dispatch to
paul@139	124	# self._handle_<maintype>_<subtype>(). If there's no handler for the
paul@139	125	# full MIME type, then dispatch to self._handle_<maintype>(). If
paul@139	126	# that's missing too, then dispatch to self._writeBody().
paul@139	127	main = msg.get_content_maintype()
paul@139	128	sub = msg.get_content_subtype()
paul@139	129	specific = UNDERSCORE.join((main, sub)).replace('-', '_')
paul@139	130	meth = getattr(self, '_handle_' + specific, None)
paul@139	131	if meth is None:
paul@139	132	generic = main.replace('-', '_')
paul@139	133	meth = getattr(self, '_handle_' + generic, None)
paul@139	134	if meth is None:
paul@139	135	meth = self._writeBody
paul@139	136	meth(msg)
paul@139	137
paul@139	138	#
paul@139	139	# Default handlers
paul@139	140	#
paul@139	141
paul@139	142	def _write_headers(self, msg):
paul@139	143	for h, v in msg.items():
paul@139	144	self.write('%s: ' % h)
paul@139	145	if self._maxheaderlen == 0:
paul@139	146	# Explicit no-wrapping
paul@139	147	if _is8bitstring(v):
paul@139	148	self.write(v + self._NL)
paul@139	149	else:
paul@139	150	self.write(nlre.sub(self._NL, v) + self._NL)
paul@139	151	elif isinstance(v, Header):
paul@139	152	# Header instances know what to do
paul@139	153	self.write(v.encode(linesep=self._NL) + self._NL)
paul@139	154	elif _is8bitstring(v):
paul@139	155	# If we have raw 8bit data in a byte string, we have no idea
paul@139	156	# what the encoding is. There is no safe way to split this
paul@139	157	# string. If it's ascii-subset, then we could do a normal
paul@139	158	# ascii split, but if it's multibyte then we could break the
paul@139	159	# string. There's no way to know so the least harm seems to
paul@139	160	# be to not split the string and risk it being too long.
paul@139	161	self.write(v + self._NL)
paul@139	162	else:
paul@139	163	# Header's got lots of smarts, so use it. Note that this is
paul@139	164	# fundamentally broken though because we lose idempotency when
paul@139	165	# the header string is continued with tabs. It will now be
paul@139	166	# continued with spaces. This was reversedly broken before we
paul@139	167	# fixed bug 1974. Either way, we lose.
paul@139	168	self.write(Header(
paul@139	169	v, maxlinelen=self._maxheaderlen, header_name=h).encode(
paul@139	170	linesep=self._NL) + self._NL)
paul@139	171	# A blank line always separates headers from body
paul@139	172	self.write(self._NL)
paul@139	173
paul@139	174	#
paul@139	175	# Handlers for writing types and subtypes
paul@139	176	#
paul@139	177
paul@139	178	def _handle_text(self, msg):
paul@139	179	payload = msg.get_payload()
paul@139	180	if payload is None:
paul@139	181	return
paul@139	182	if not isinstance(payload, basestring):
paul@139	183	raise TypeError('string payload expected: %s' % type(payload))
paul@139	184	if self._mangle_from_:
paul@139	185	payload = fcre.sub('>From ', payload)
paul@139	186	self.write(nlre.sub(self._NL, payload))
paul@139	187
paul@139	188	# Default body handler
paul@139	189	_writeBody = _handle_text
paul@139	190
paul@139	191	def _handle_multipart(self, msg):
paul@139	192	# The trick here is to write out each part separately, merge them all
paul@139	193	# together, and then make sure that the boundary we've chosen isn't
paul@139	194	# present in the payload.
paul@139	195	msgtexts = []
paul@139	196	subparts = msg.get_payload()
paul@139	197	if subparts is None:
paul@139	198	subparts = []
paul@139	199	elif isinstance(subparts, basestring):
paul@139	200	# e.g. a non-strict parse of a message with no starting boundary.
paul@139	201	self.write(subparts)
paul@139	202	return
paul@139	203	elif not isinstance(subparts, list):
paul@139	204	# Scalar payload
paul@139	205	subparts = [subparts]
paul@139	206	for part in subparts:
paul@139	207	s = StringIO()
paul@139	208	g = self.clone(s)
paul@139	209	g.flatten(part, unixfrom=False, linesep=self._NL)
paul@139	210	msgtexts.append(s.getvalue())
paul@139	211	# BAW: What about boundaries that are wrapped in double-quotes?
paul@139	212	boundary = msg.get_boundary()
paul@139	213	if not boundary:
paul@139	214	# Create a boundary that doesn't appear in any of the
paul@139	215	# message texts.
paul@139	216	alltext = self._NL.join(msgtexts)
paul@139	217	boundary = _make_boundary(alltext)
paul@139	218	msg.set_boundary(boundary)
paul@139	219	# If there's a preamble, write it out, with a trailing CRLF
paul@139	220	if msg.preamble is not None:
paul@139	221	if self._mangle_from_:
paul@139	222	preamble = fcre.sub('>From ', msg.preamble)
paul@139	223	else:
paul@139	224	preamble = msg.preamble
paul@139	225	self.write(preamble + self._NL)
paul@139	226	# dash-boundary transport-padding CRLF
paul@139	227	self.write('--' + boundary + self._NL)
paul@139	228	# body-part
paul@139	229	if msgtexts:
paul@139	230	self.write(msgtexts.pop(0))
paul@139	231	# *encapsulation
paul@139	232	# --> delimiter transport-padding
paul@139	233	# --> CRLF body-part
paul@139	234	for body_part in msgtexts:
paul@139	235	# delimiter transport-padding CRLF
paul@139	236	self.write(self._NL + '--' + boundary + self._NL)
paul@139	237	# body-part
paul@139	238	self.write(body_part)
paul@139	239	# close-delimiter transport-padding
paul@139	240	self.write(self._NL + '--' + boundary + '--')
paul@139	241	if msg.epilogue is not None:
paul@139	242	self.write(self._NL)
paul@139	243	if self._mangle_from_:
paul@139	244	epilogue = fcre.sub('>From ', msg.epilogue)
paul@139	245	else:
paul@139	246	epilogue = msg.epilogue
paul@139	247	self.write(epilogue)
paul@139	248
paul@139	249	def _handle_multipart_signed(self, msg):
paul@139	250	# The contents of signed parts has to stay unmodified in order to keep
paul@139	251	# the signature intact per RFC1847 2.1, so we disable header wrapping.
paul@139	252	# RDM: This isn't enough to completely preserve the part, but it helps.
paul@139	253	old_maxheaderlen = self._maxheaderlen
paul@139	254	try:
paul@139	255	self._maxheaderlen = 0
paul@139	256	self._handle_multipart(msg)
paul@139	257	finally:
paul@139	258	self._maxheaderlen = old_maxheaderlen
paul@139	259
paul@139	260	def _handle_message_delivery_status(self, msg):
paul@139	261	# We can't just write the headers directly to self's file object
paul@139	262	# because this will leave an extra newline between the last header
paul@139	263	# block and the boundary. Sigh.
paul@139	264	blocks = []
paul@139	265	for part in msg.get_payload():
paul@139	266	s = StringIO()
paul@139	267	g = self.clone(s)
paul@139	268	g.flatten(part, unixfrom=False, linesep=self._NL)
paul@139	269	text = s.getvalue()
paul@139	270	lines = text.split(self._NL)
paul@139	271	# Strip off the unnecessary trailing empty line
paul@139	272	if lines and lines[-1] == '':
paul@139	273	blocks.append(self._NL.join(lines[:-1]))
paul@139	274	else:
paul@139	275	blocks.append(text)
paul@139	276	# Now join all the blocks with an empty line. This has the lovely
paul@139	277	# effect of separating each block with an empty line, but not adding
paul@139	278	# an extra one after the last one.
paul@139	279	self.write(self._NL.join(blocks))
paul@139	280
paul@139	281	def _handle_message(self, msg):
paul@139	282	s = StringIO()
paul@139	283	g = self.clone(s)
paul@139	284	# The payload of a message/rfc822 part should be a multipart sequence
paul@139	285	# of length 1. The zeroth element of the list should be the Message
paul@139	286	# object for the subpart. Extract that object, stringify it, and
paul@139	287	# write it out.
paul@139	288	# Except, it turns out, when it's a string instead, which happens when
paul@139	289	# and only when HeaderParser is used on a message of mime type
paul@139	290	# message/rfc822. Such messages are generated by, for example,
paul@139	291	# Groupwise when forwarding unadorned messages. (Issue 7970.) So
paul@139	292	# in that case we just emit the string body.
paul@139	293	payload = msg.get_payload()
paul@139	294	if isinstance(payload, list):
paul@139	295	g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
paul@139	296	payload = s.getvalue()
paul@139	297	self.write(payload)
paul@139	298
paul@139	299
paul@139	300
paul@139	301	_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
paul@139	302
paul@139	303	class DecodedGenerator(Generator):
paul@139	304	"""Generates a text representation of a message.
paul@139	305
paul@139	306	Like the Generator base class, except that non-text parts are substituted
paul@139	307	with a format string representing the part.
paul@139	308	"""
paul@139	309	def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
paul@139	310	"""Like Generator.__init__() except that an additional optional
paul@139	311	argument is allowed.
paul@139	312
paul@139	313	Walks through all subparts of a message. If the subpart is of main
paul@139	314	type `text', then it prints the decoded payload of the subpart.
paul@139	315
paul@139	316	Otherwise, fmt is a format string that is used instead of the message
paul@139	317	payload. fmt is expanded with the following keywords (in
paul@139	318	%(keyword)s format):
paul@139	319
paul@139	320	type : Full MIME type of the non-text part
paul@139	321	maintype : Main MIME type of the non-text part
paul@139	322	subtype : Sub-MIME type of the non-text part
paul@139	323	filename : Filename of the non-text part
paul@139	324	description: Description associated with the non-text part
paul@139	325	encoding : Content transfer encoding of the non-text part
paul@139	326
paul@139	327	The default value for fmt is None, meaning
paul@139	328
paul@139	329	[Non-text (%(type)s) part of message omitted, filename %(filename)s]
paul@139	330	"""
paul@139	331	Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
paul@139	332	if fmt is None:
paul@139	333	self._fmt = _FMT
paul@139	334	else:
paul@139	335	self._fmt = fmt
paul@139	336
paul@139	337	def _dispatch(self, msg):
paul@139	338	for part in msg.walk():
paul@139	339	maintype = part.get_content_maintype()
paul@139	340	if maintype == 'text':
paul@139	341	print >> self, part.get_payload(decode=True)
paul@139	342	elif maintype == 'multipart':
paul@139	343	# Just skip this
paul@139	344	pass
paul@139	345	else:
paul@139	346	print >> self, self._fmt % {
paul@139	347	'type' : part.get_content_type(),
paul@139	348	'maintype' : part.get_content_maintype(),
paul@139	349	'subtype' : part.get_content_subtype(),
paul@139	350	'filename' : part.get_filename('[no filename]'),
paul@139	351	'description': part.get('Content-Description',
paul@139	352	'[no description]'),
paul@139	353	'encoding' : part.get('Content-Transfer-Encoding',
paul@139	354	'[no encoding]'),
paul@139	355	}
paul@139	356
paul@139	357
paul@139	358
paul@139	359	# Helper
paul@139	360	_width = len(repr(sys.maxint-1))
paul@139	361	_fmt = '%%0%dd' % _width
paul@139	362
paul@139	363	def _make_boundary(text=None):
paul@139	364	# Craft a random boundary. If text is given, ensure that the chosen
paul@139	365	# boundary doesn't appear in the text.
paul@139	366	token = random.randrange(sys.maxint)
paul@139	367	boundary = ('=' * 15) + (_fmt % token) + '=='
paul@139	368	if text is None:
paul@139	369	return boundary
paul@139	370	b = boundary
paul@139	371	counter = 0
paul@139	372	while True:
paul@139	373	cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
paul@139	374	if not cre.search(text):
paul@139	375	break
paul@139	376	b = boundary + '.' + str(counter)
paul@139	377	counter += 1
paul@139	378	return b

MoinMessage

Annotated emailfix/generator.py