# HG changeset patch # User Paul Boddie # Date 1486163447 -3600 # Node ID f91b467ef5687586a2903a77dac00be8e6530bd2 # Parent 9ec67eea98b56125b0e5fe828b0344ae1a000136 Removed recoding to UTF-8 since this failed for ISO-8859-15, causing UTF-8 recodings of byte sequences to occur, not producing such undesirable data for ISO-8859-1 only because of it being special-cased. This change may break other ASCII-incompatible encodings because UTF-8 is likely to be the safe form of such data, permitting the parser to understand it, and without such recoding the parser will no longer recognise the grammar's tokens. diff -r 9ec67eea98b5 -r f91b467ef568 compiler/transformer.py --- a/compiler/transformer.py Fri Feb 03 23:25:00 2017 +0100 +++ b/compiler/transformer.py Sat Feb 04 00:10:47 2017 +0100 @@ -669,11 +669,6 @@ def decode_literal(self, lit): if self.encoding: - # this is particularly fragile & a bit of a - # hack... changes in compile.c:parsestr and - # tokenizer.c must be reflected here. - if self.encoding not in ['utf-8', 'iso-8859-1']: - lit = unicode(lit, 'utf-8').encode(self.encoding) return eval("# coding: %s\n%s" % (self.encoding, lit)) else: return eval(lit) diff -r 9ec67eea98b5 -r f91b467ef568 pyparser/pyparse.py --- a/pyparser/pyparse.py Fri Feb 03 23:25:00 2017 +0100 +++ b/pyparser/pyparse.py Sat Feb 04 00:10:47 2017 +0100 @@ -1,13 +1,6 @@ from pyparser import parser, pytokenizer, pygram, error from pyparser import consts -def recode_to_utf8(bytes, encoding): - text = bytes.decode(encoding) - if not isinstance(text, unicode): - raise error.SyntaxError("codec did not return a unicode object") - recoded = text.encode("utf-8") - return recoded - def _normalize_encoding(encoding): """returns normalized name for @@ -103,17 +96,6 @@ filename=compile_info.filename) else: enc = _normalize_encoding(_check_for_encoding(textsrc)) - if enc is not None and enc not in ('utf-8', 'iso-8859-1'): - try: - textsrc = recode_to_utf8(textsrc, enc) - except LookupError as e: - # if the codec is not found, LookupError is raised. - raise error.SyntaxError("Unknown encoding: %s" % enc, - filename=compile_info.filename) - # Transform unicode errors into SyntaxError - except UnicodeDecodeError as e: - message = str(e) - raise error.SyntaxError(message) flags = compile_info.flags