1.1 --- a/pyparser/pyparse.py	Sat Feb 04 00:12:06 2017 +0100
     1.2 +++ b/pyparser/pyparse.py	Sat Feb 04 15:35:58 2017 +0100
     1.3 @@ -1,6 +1,13 @@
     1.4  from pyparser import parser, pytokenizer, pygram, error
     1.5  from pyparser import consts
     1.6  
     1.7 +def recode_to_utf8(bytes, encoding):
     1.8 +    text = bytes.decode(encoding)
     1.9 +    if not isinstance(text, unicode):
    1.10 +        raise error.SyntaxError("codec did not return a unicode object")
    1.11 +    recoded = text.encode("utf-8")
    1.12 +    return recoded
    1.13 +
    1.14  def _normalize_encoding(encoding):
    1.15      """returns normalized name for <encoding>
    1.16  
    1.17 @@ -96,6 +103,17 @@
    1.18                                          filename=compile_info.filename)
    1.19          else:
    1.20              enc = _normalize_encoding(_check_for_encoding(textsrc))
    1.21 +            if enc is not None and enc != 'utf-8':
    1.22 +                try:
    1.23 +                    textsrc = recode_to_utf8(textsrc, enc)
    1.24 +                except LookupError as e:
    1.25 +                    # if the codec is not found, LookupError is raised.
    1.26 +                    raise error.SyntaxError("Unknown encoding: %s" % enc,
    1.27 +                                            filename=compile_info.filename)
    1.28 +                # Transform unicode errors into SyntaxError
    1.29 +                except UnicodeDecodeError as e:
    1.30 +                    message = str(e)
    1.31 +                    raise error.SyntaxError(message)
    1.32  
    1.33          flags = compile_info.flags
    1.34