1.1 --- a/pyparser/pyparse.py Sat Feb 04 00:12:06 2017 +0100
1.2 +++ b/pyparser/pyparse.py Sat Feb 04 15:35:58 2017 +0100
1.3 @@ -1,6 +1,13 @@
1.4 from pyparser import parser, pytokenizer, pygram, error
1.5 from pyparser import consts
1.6
1.7 +def recode_to_utf8(bytes, encoding):
1.8 + text = bytes.decode(encoding)
1.9 + if not isinstance(text, unicode):
1.10 + raise error.SyntaxError("codec did not return a unicode object")
1.11 + recoded = text.encode("utf-8")
1.12 + return recoded
1.13 +
1.14 def _normalize_encoding(encoding):
1.15 """returns normalized name for <encoding>
1.16
1.17 @@ -96,6 +103,17 @@
1.18 filename=compile_info.filename)
1.19 else:
1.20 enc = _normalize_encoding(_check_for_encoding(textsrc))
1.21 + if enc is not None and enc != 'utf-8':
1.22 + try:
1.23 + textsrc = recode_to_utf8(textsrc, enc)
1.24 + except LookupError as e:
1.25 + # if the codec is not found, LookupError is raised.
1.26 + raise error.SyntaxError("Unknown encoding: %s" % enc,
1.27 + filename=compile_info.filename)
1.28 + # Transform unicode errors into SyntaxError
1.29 + except UnicodeDecodeError as e:
1.30 + message = str(e)
1.31 + raise error.SyntaxError(message)
1.32
1.33 flags = compile_info.flags
1.34