# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1483903239 -3600
# Node ID 051349b537b21177327177f4c7667675e08b1102
# Parent  28e2996df412498f7cb5256a5d10221e51b48649
Added a modified version of pyparser (from PyPy) that provides parser module
functionality without relying on CPython.
Added licensing information for the pyparser and compiler packages.

diff -r 28e2996df412 -r 051349b537b2 compiler/transformer.py
--- a/compiler/transformer.py	Sun Jan 08 00:27:02 2017 +0100
+++ b/compiler/transformer.py	Sun Jan 08 20:20:39 2017 +0100
@@ -26,8 +26,8 @@
 # and replace OWNER, ORGANIZATION, and YEAR as appropriate.
 
 from compiler.ast import *
-import parser
-import symbol
+import pyparser.pyparse as parser
+from pyparser.pygram import syms as symbol
 import token
 
 class WalkerError(StandardError):
diff -r 28e2996df412 -r 051349b537b2 docs/COPYING.txt
--- a/docs/COPYING.txt	Sun Jan 08 00:27:02 2017 +0100
+++ b/docs/COPYING.txt	Sun Jan 08 20:20:39 2017 +0100
@@ -16,3 +16,25 @@
 
 You should have received a copy of the GNU General Public License along with
 this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Licence Details for compiler
+----------------------------
+
+See LICENCE-Python.txt for the licensing details applying to the compiler
+package.
+
+The compiler package has been modified to only provide essential abstract
+syntax tree support for Lichen. The following applies to these modifications:
+
+Copyright (C) 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk>
+
+Licence Details for pyparser
+----------------------------
+
+See LICENSE-PyPy.txt for the licensing details applying to the pyparser
+package.
+
+The pyparser package has been modified to work with the modified compiler
+package. The following applies to these modifications:
+
+Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
diff -r 28e2996df412 -r 051349b537b2 docs/LICENCE-Python.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/LICENCE-Python.txt	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,254 @@
+A. HISTORY OF THE SOFTWARE
+==========================
+
+Python was created in the early 1990s by Guido van Rossum at Stichting
+Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
+as a successor of a language called ABC.  Guido remains Python's
+principal author, although it includes many contributions from others.
+
+In 1995, Guido continued his work on Python at the Corporation for
+National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
+in Reston, Virginia where he released several versions of the
+software.
+
+In May 2000, Guido and the Python core development team moved to
+BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
+year, the PythonLabs team moved to Digital Creations (now Zope
+Corporation, see http://www.zope.com).  In 2001, the Python Software
+Foundation (PSF, see http://www.python.org/psf/) was formed, a
+non-profit organization created specifically to own Python-related
+Intellectual Property.  Zope Corporation is a sponsoring member of
+the PSF.
+
+All Python releases are Open Source (see http://www.opensource.org for
+the Open Source Definition).  Historically, most, but not all, Python
+releases have also been GPL-compatible; the table below summarizes
+the various releases.
+
+    Release         Derived     Year        Owner       GPL-
+                    from                                compatible? (1)
+
+    0.9.0 thru 1.2              1991-1995   CWI         yes
+    1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
+    1.6             1.5.2       2000        CNRI        no
+    2.0             1.6         2000        BeOpen.com  no
+    1.6.1           1.6         2001        CNRI        yes (2)
+    2.1             2.0+1.6.1   2001        PSF         no
+    2.0.1           2.0+1.6.1   2001        PSF         yes
+    2.1.1           2.1+2.0.1   2001        PSF         yes
+    2.1.2           2.1.1       2002        PSF         yes
+    2.1.3           2.1.2       2002        PSF         yes
+    2.2 and above   2.1.1       2001-now    PSF         yes
+
+Footnotes:
+
+(1) GPL-compatible doesn't mean that we're distributing Python under
+    the GPL.  All Python licenses, unlike the GPL, let you distribute
+    a modified version without making your changes open source.  The
+    GPL-compatible licenses make it possible to combine Python with
+    other software that is released under the GPL; the others don't.
+
+(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
+    because its license has a choice of law clause.  According to
+    CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
+    is "not incompatible" with the GPL.
+
+Thanks to the many outside volunteers who have worked under Guido's
+direction to make these releases possible.
+
+
+B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
+===============================================================
+
+PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+--------------------------------------------
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation
+("PSF"), and the Individual or Organization ("Licensee") accessing and
+otherwise using this software ("Python") in source or binary form and
+its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF hereby
+grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+analyze, test, perform and/or display publicly, prepare derivative works,
+distribute, and otherwise use Python alone or in any derivative version,
+provided, however, that PSF's License Agreement and PSF's notice of copyright,
+i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012, 2013, 2014 Python Software Foundation; All Rights Reserved" are retained
+in Python alone or in any derivative version prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python.
+
+4. PSF is making Python available to Licensee on an "AS IS"
+basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between PSF and
+Licensee.  This License Agreement does not grant permission to use PSF
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using Python, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+
+
+BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
+-------------------------------------------
+
+BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
+
+1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
+office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
+Individual or Organization ("Licensee") accessing and otherwise using
+this software in source or binary form and its associated
+documentation ("the Software").
+
+2. Subject to the terms and conditions of this BeOpen Python License
+Agreement, BeOpen hereby grants Licensee a non-exclusive,
+royalty-free, world-wide license to reproduce, analyze, test, perform
+and/or display publicly, prepare derivative works, distribute, and
+otherwise use the Software alone or in any derivative version,
+provided, however, that the BeOpen Python License is retained in the
+Software, alone or in any derivative version prepared by Licensee.
+
+3. BeOpen is making the Software available to Licensee on an "AS IS"
+basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
+SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
+AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
+DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+5. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+6. This License Agreement shall be governed by and interpreted in all
+respects by the law of the State of California, excluding conflict of
+law provisions.  Nothing in this License Agreement shall be deemed to
+create any relationship of agency, partnership, or joint venture
+between BeOpen and Licensee.  This License Agreement does not grant
+permission to use BeOpen trademarks or trade names in a trademark
+sense to endorse or promote products or services of Licensee, or any
+third party.  As an exception, the "BeOpen Python" logos available at
+http://www.pythonlabs.com/logos.html may be used according to the
+permissions granted on that web page.
+
+7. By copying, installing or otherwise using the software, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+
+
+CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
+---------------------------------------
+
+1. This LICENSE AGREEMENT is between the Corporation for National
+Research Initiatives, having an office at 1895 Preston White Drive,
+Reston, VA 20191 ("CNRI"), and the Individual or Organization
+("Licensee") accessing and otherwise using Python 1.6.1 software in
+source or binary form and its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, CNRI
+hereby grants Licensee a nonexclusive, royalty-free, world-wide
+license to reproduce, analyze, test, perform and/or display publicly,
+prepare derivative works, distribute, and otherwise use Python 1.6.1
+alone or in any derivative version, provided, however, that CNRI's
+License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
+1995-2001 Corporation for National Research Initiatives; All Rights
+Reserved" are retained in Python 1.6.1 alone or in any derivative
+version prepared by Licensee.  Alternately, in lieu of CNRI's License
+Agreement, Licensee may substitute the following text (omitting the
+quotes): "Python 1.6.1 is made available subject to the terms and
+conditions in CNRI's License Agreement.  This Agreement together with
+Python 1.6.1 may be located on the Internet using the following
+unique, persistent identifier (known as a handle): 1895.22/1013.  This
+Agreement may also be obtained from a proxy server on the Internet
+using the following URL: http://hdl.handle.net/1895.22/1013".
+
+3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python 1.6.1 or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python 1.6.1.
+
+4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
+basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. This License Agreement shall be governed by the federal
+intellectual property law of the United States, including without
+limitation the federal copyright law, and, to the extent such
+U.S. federal law does not apply, by the law of the Commonwealth of
+Virginia, excluding Virginia's conflict of law provisions.
+Notwithstanding the foregoing, with regard to derivative works based
+on Python 1.6.1 that incorporate non-separable material that was
+previously distributed under the GNU General Public License (GPL), the
+law of the Commonwealth of Virginia shall govern this License
+Agreement only as to issues arising under or with respect to
+Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
+License Agreement shall be deemed to create any relationship of
+agency, partnership, or joint venture between CNRI and Licensee.  This
+License Agreement does not grant permission to use CNRI trademarks or
+trade name in a trademark sense to endorse or promote products or
+services of Licensee, or any third party.
+
+8. By clicking on the "ACCEPT" button where indicated, or by copying,
+installing or otherwise using Python 1.6.1, Licensee agrees to be
+bound by the terms and conditions of this License Agreement.
+
+        ACCEPT
+
+
+CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
+--------------------------------------------------
+
+Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
+The Netherlands.  All rights reserved.
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of Stichting Mathematisch
+Centrum or CWI not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
+THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
+FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff -r 28e2996df412 -r 051349b537b2 docs/LICENSE-PyPy.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/LICENSE-PyPy.txt	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,479 @@
+License
+=======
+
+Except when otherwise stated (look for LICENSE files in directories or
+information at the beginning of each file) all software and documentation in
+the 'rpython', 'pypy', 'ctype_configure', 'dotviewer', 'demo', 'lib_pypy',
+'py', and '_pytest' directories is licensed as follows: 
+
+    The MIT License
+
+    Permission is hereby granted, free of charge, to any person 
+    obtaining a copy of this software and associated documentation 
+    files (the "Software"), to deal in the Software without 
+    restriction, including without limitation the rights to use, 
+    copy, modify, merge, publish, distribute, sublicense, and/or 
+    sell copies of the Software, and to permit persons to whom the 
+    Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included 
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+    DEALINGS IN THE SOFTWARE.
+
+
+PyPy Copyright holders 2003-2017
+----------------------------------- 
+
+Except when otherwise stated (look for LICENSE files or information at
+the beginning of each file) the files in the 'pypy' directory are each
+copyrighted by one or more of the following people and organizations:    
+
+  Armin Rigo
+  Maciej Fijalkowski
+  Carl Friedrich Bolz
+  Amaury Forgeot d'Arc
+  Antonio Cuni
+  Samuele Pedroni
+  Matti Picus
+  Alex Gaynor
+  Philip Jenvey
+  Ronan Lamy
+  Brian Kearns
+  Richard Plangger
+  Michael Hudson
+  Manuel Jacob
+  David Schneider
+  Holger Krekel
+  Christian Tismer
+  Hakan Ardo
+  Benjamin Peterson
+  Anders Chrigstrom
+  Eric van Riet Paap
+  Wim Lavrijsen
+  Richard Emslie
+  Alexander Schremmer
+  Dan Villiom Podlaski Christiansen
+  Remi Meier
+  Lukas Diekmann
+  Sven Hager
+  Anders Lehmann
+  Aurelien Campeas
+  Niklaus Haldimann
+  Camillo Bruni
+  Laura Creighton
+  Romain Guillebert
+  Toon Verwaest
+  Leonardo Santagada
+  Seo Sanghyeon
+  Ronny Pfannschmidt
+  Justin Peel
+  Raffael Tfirst
+  David Edelsohn
+  Anders Hammarquist
+  Jakub Gustak
+  Gregor Wegberg
+  Guido Wesdorp
+  Lawrence Oluyede
+  Bartosz Skowron
+  Daniel Roberts
+  Niko Matsakis
+  Adrien Di Mascio
+  Alexander Hesse
+  Ludovic Aubry
+  Jacob Hallen
+  Jason Creighton
+  Mark Young
+  Alex Martelli
+  Spenser Bauman
+  Michal Bendowski
+  stian
+  Jan de Mooij
+  Tyler Wade
+  Vincent Legoll
+  Michael Foord
+  Stephan Diehl
+  Stefan Schwarzer
+  Valentino Volonghi
+  Tomek Meka
+  Stefano Rivera
+  Patrick Maupin
+  Devin Jeanpierre
+  Bob Ippolito
+  Bruno Gola
+  David Malcolm
+  Jean-Paul Calderone
+  Timo Paulssen
+  Edd Barrett
+  Squeaky
+  Marius Gedminas
+  Alexandre Fayolle
+  Simon Burton
+  Martin Matusiak
+  Nicolas Truessel
+  Konstantin Lopuhin
+  Wenzhu Man
+  John Witulski
+  Laurence Tratt
+  Ivan Sichmann Freitas
+  Greg Price
+  Dario Bertini
+  Mark Pearse
+  Simon Cross
+  Jeremy Thurgood
+  Andreas Stührk
+  Tobias Pape
+  Jean-Philippe St. Pierre
+  Guido van Rossum
+  Pavel Vinogradov
+  Paweł Piotr Przeradowski
+  Paul deGrandis
+  Ilya Osadchiy
+  marky1991
+  Tobias Oberstein
+  Adrian Kuhn
+  Boris Feigin
+  tav
+  Taavi Burns
+  Georg Brandl
+  Bert Freudenberg
+  Stian Andreassen
+  Wanja Saatkamp
+  Gerald Klix
+  Mike Blume
+  Oscar Nierstrasz
+  Stefan H. Muller
+  Rami Chowdhury
+  Eugene Oden
+  Henry Mason
+  Vasily Kuznetsov
+  Preston Timmons
+  David Ripton
+  Jeff Terrace
+  Tim Felgentreff
+  Dusty Phillips
+  Lukas Renggli
+  Guenter Jantzen
+  William Leslie
+  Ned Batchelder
+  Anton Gulenko
+  Amit Regmi
+  Ben Young
+  Jasper Schulz
+  Nicolas Chauvat
+  Andrew Durdin
+  Andrew Chambers
+  Sergey Matyunin
+  Michael Schneider
+  Nicholas Riley
+  Jason Chu
+  Igor Trindade Oliveira
+  Yichao Yu
+  Rocco Moretti
+  Gintautas Miliauskas
+  Michael Twomey
+  Lucian Branescu Mihaila
+  anatoly techtonik
+  Gabriel Lavoie
+  Olivier Dormond
+  Jared Grubb
+  Karl Bartel
+  Wouter van Heyst
+  Brian Dorsey
+  Victor Stinner
+  Andrews Medina
+  Sebastian Pawluś
+  Stuart Williams
+  Daniel Patrick
+  Aaron Iles
+  Toby Watson
+  Antoine Pitrou
+  Christian Hudon
+  Michael Cheng
+  Justas Sadzevicius
+  Gasper Zejn
+  Neil Shepperd
+  Stanislaw Halik
+  Mikael Schönenberg
+  Berkin Ilbeyi
+  Faye Zhao
+  Elmo Mäntynen
+  Jonathan David Riehl
+  Anders Qvist
+  Corbin Simpson
+  Chirag Jadwani
+  Beatrice During
+  Alex Perry
+  Vaibhav Sood
+  Alan McIntyre
+  Reuben Cummings
+  Alexander Sedov
+  p_zieschang@yahoo.de
+  Attila Gobi
+  Christopher Pope
+  Aaron Gallagher
+  Florin Papa
+  Christian Tismer 
+  Marc Abramowitz
+  Dan Stromberg
+  Arjun Naik
+  Valentina Mukhamedzhanova
+  Stefano Parmesan
+  touilleMan
+  Alexis Daboville
+  Jens-Uwe Mager
+  Carl Meyer
+  Karl Ramm
+  Pieter Zieschang
+  Gabriel
+  Lukas Vacek
+  Kunal Grover
+  Andrew Dalke
+  Sylvain Thenault
+  Jakub Stasiak
+  Nathan Taylor
+  Vladimir Kryachko
+  Omer Katz
+  Mark Williams
+  Jacek Generowicz
+  Alejandro J. Cura
+  Jacob Oscarson
+  Travis Francis Athougies
+  Ryan Gonzalez
+  Ian Foote
+  Kristjan Valur Jonsson
+  David Lievens
+  Neil Blakey-Milner
+  Lutz Paelike
+  Lucio Torre
+  Lars Wassermann
+  Philipp Rustemeuer
+  Henrik Vendelbo
+  Richard Lancaster
+  Yasir Suhail
+  Dan Buch
+  Miguel de Val Borro
+  Artur Lisiecki
+  Sergey Kishchenko
+  Ignas Mikalajunas
+  Alecsandru Patrascu
+  Christoph Gerum
+  Martin Blais
+  Lene Wagner
+  Catalin Gabriel Manciu
+  Tomo Cocoa
+  Kim Jin Su
+  rafalgalczynski@gmail.com
+  Toni Mattis
+  Amber Brown
+  Lucas Stadler
+  Julian Berman
+  Markus Holtermann
+  roberto@goyle
+  Yury V. Zaytsev
+  Anna Katrina Dominguez
+  Bobby Impollonia
+  Vasantha Ganesh K
+  Andrew Thompson
+  florinpapa
+  Yusei Tahara
+  Aaron Tubbs
+  Ben Darnell
+  Roberto De Ioris
+  Logan Chien
+  Juan Francisco Cantero Hurtado
+  Ruochen Huang
+  Jeong YunWon
+  Godefroid Chappelle
+  Joshua Gilbert
+  Dan Colish
+  Christopher Armstrong
+  Michael Hudson-Doyle
+  Anders Sigfridsson
+  Nikolay Zinov
+  Jason Michalski
+  Floris Bruynooghe
+  Laurens Van Houtven
+  Akira Li
+  Gustavo Niemeyer
+  Stephan Busemann
+  Rafał Gałczyński
+  Matt Bogosian
+  timo
+  Christian Muirhead
+  Berker Peksag
+  James Lan
+  Volodymyr Vladymyrov
+  shoma hosaka
+  Ben Mather
+  Niclas Olofsson
+  Matthew Miller
+  Rodrigo Araújo
+  halgari
+  Boglarka Vezer
+  Chris Pressey
+  Buck Golemon
+  Diana Popa
+  Konrad Delong
+  Dinu Gherman
+  Chris Lambacher
+  coolbutuseless@gmail.com
+  Daniil Yarancev
+  Jim Baker
+  Dan Crosta
+  Nikolaos-Digenis Karagiannis
+  James Robert
+  Armin Ronacher
+  Brett Cannon
+  Donald Stufft
+  yrttyr
+  aliceinwire
+  OlivierBlanvillain
+  Dan Sanders
+  Zooko Wilcox-O Hearn
+  Tomer Chachamu
+  Christopher Groskopf
+  Asmo Soinio
+  jiaaro
+  Mads Kiilerich
+  Antony Lee
+  Jason Madden
+  Daniel Neuh�user
+  reubano@gmail.com
+  Yaroslav Fedevych
+  Jim Hunziker
+  Markus Unterwaditzer
+  Even Wiik Thomassen
+  jbs
+  squeaky
+  soareschen
+  Jonas Pfannschmidt
+  Kurt Griffiths
+  Mike Bayer
+  Stefan Marr
+  Flavio Percoco
+  Kristoffer Kleine
+  Michael Chermside
+  Anna Ravencroft
+  pizi
+  remarkablerocket
+  Andrey Churin
+  Zearin
+  Eli Stevens
+  Tobias Diaz
+  Julien Phalip
+  Roman Podoliaka
+  Dan Loewenherz
+  werat
+
+  Heinrich-Heine University, Germany 
+  Open End AB (formerly AB Strakt), Sweden
+  merlinux GmbH, Germany 
+  tismerysoft GmbH, Germany 
+  Logilab Paris, France 
+  DFKI GmbH, Germany 
+  Impara, Germany
+  Change Maker, Sweden 
+  University of California Berkeley, USA
+  Google Inc.
+  King's College London
+
+The PyPy Logo as used by http://speed.pypy.org and others was created
+by Samuel Reis and is distributed on terms of Creative Commons Share Alike
+License.
+ 
+License for 'lib-python/2.7'
+============================
+
+Except when otherwise stated (look for LICENSE files or copyright/license
+information at the beginning of each file) the files in the 'lib-python/2.7'
+directory are all copyrighted by the Python Software Foundation and licensed
+under the terms that you can find here: https://docs.python.org/2/license.html
+
+License for 'pypy/module/unicodedata/'
+======================================
+
+The following files are from the website of The Unicode Consortium
+at http://www.unicode.org/.  For the terms of use of these files, see
+http://www.unicode.org/terms_of_use.html .  Or they are derived from
+files from the above website, and the same terms of use apply.
+
+    CompositionExclusions-*.txt
+    EastAsianWidth-*.txt
+    LineBreak-*.txt
+    UnicodeData-*.txt
+    UnihanNumeric-*.txt
+
+License for 'dotviewer/font/'
+=============================
+
+Copyright (C) 2008 The Android Open Source Project
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+  
+     http://www.apache.org/licenses/LICENSE-2.0
+  
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Detailed license information is contained in the NOTICE file in the
+directory.
+
+
+Licenses and Acknowledgements for Incorporated Software
+=======================================================
+
+This section is an incomplete, but growing list of licenses and
+acknowledgements for third-party software incorporated in the PyPy
+distribution.
+
+License for 'Tcl/Tk'
+--------------------
+
+This copy of PyPy contains library code that may, when used, result in
+the Tcl/Tk library to be loaded.  PyPy also includes code that may be
+regarded as being a copy of some parts of the Tcl/Tk header files.
+You may see a copy of the License for Tcl/Tk in the file
+`lib_pypy/_tkinter/license.terms` included here.
+
+License for 'bzip2'
+-------------------
+
+This copy of PyPy may be linked (dynamically or statically) with the
+bzip2 library.  You may see a copy of the License for bzip2/libbzip2 at
+
+    http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html
+
+License for 'openssl'
+---------------------
+
+This copy of PyPy may be linked (dynamically or statically) with the
+openssl library.  You may see a copy of the License for OpenSSL at
+
+    https://www.openssl.org/source/license.html
+
+License for 'gdbm'
+------------------
+
+The gdbm module includes code from gdbm.h, which is distributed under
+the terms of the GPL license version 2 or any later version.  Thus the
+gdbm module, provided in the file lib_pypy/gdbm.py, is redistributed
+under the terms of the GPL license as well.
+
+License for 'rpython/rlib/rvmprof/src'
+--------------------------------------
+
+The code is based on gperftools. You may see a copy of the License for it at
+
+    https://github.com/gperftools/gperftools/blob/master/COPYING
diff -r 28e2996df412 -r 051349b537b2 pyparser/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/__init__.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,1 @@
+# empty
diff -r 28e2996df412 -r 051349b537b2 pyparser/automata.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/automata.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,120 @@
+# ______________________________________________________________________
+"""Module automata
+
+THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED
+TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists
+instead of a unique nested one)
+
+$Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $
+"""
+# ______________________________________________________________________
+# Module level definitions
+
+# PYPY Modification: removed the EMPTY class as it's not needed here
+
+
+# PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython
+# dicts (see pytokenize.py).  Then DFA.__init__() turns these dicts into
+# more compact strings.
+DEFAULT = object()
+
+# PYPY Modification : removed all automata functions (any, maybe,
+#                     newArcPair, etc.)
+
+ERROR_STATE = chr(255)
+
+class DFA:
+    # ____________________________________________________________
+    def __init__(self, states, accepts, start = 0):
+        """ NOT_RPYTHON """
+        assert len(states) < 255 # no support for huge amounts of states
+        # construct string for looking up state transitions
+        string_states = [] * len(states)
+        # compute maximum
+        maximum = 0
+        for state in states:
+            for key in state:
+                if key == DEFAULT:
+                    continue
+                maximum = max(ord(key), maximum)
+        self.max_char = maximum + 1
+
+        defaults = []
+        for i, state in enumerate(states):
+            default = ERROR_STATE
+            if DEFAULT in state:
+                default = chr(state[DEFAULT])
+            defaults.append(default)
+            string_state = [default] * self.max_char
+            for key, value in state.iteritems():
+                if key == DEFAULT:
+                    continue
+                assert len(key) == 1
+                assert ord(key) < self.max_char
+                string_state[ord(key)] = chr(value)
+            string_states.extend(string_state)
+        self.states = "".join(string_states)
+        self.defaults = "".join(defaults)
+        self.accepts = accepts
+        self.start = start
+
+    # ____________________________________________________________
+
+    def _next_state(self, item, crntState):
+        if ord(item) >= self.max_char:
+            return self.defaults[crntState]
+        else:
+            return self.states[crntState * self.max_char + ord(item)]
+
+    def recognize(self, inVec, pos = 0):
+        crntState = self.start
+        lastAccept = False
+        i = pos
+        for i in range(pos, len(inVec)):
+            item = inVec[i]
+            accept = self.accepts[crntState]
+            crntState = self._next_state(item, crntState)
+            if crntState != ERROR_STATE:
+                pass
+            elif accept:
+                return i
+            elif lastAccept:
+                # This is now needed b/c of exception cases where there are
+                # transitions to dead states
+                return i - 1
+            else:
+                return -1
+            crntState = ord(crntState)
+            lastAccept = accept
+        # if self.states[crntState][1]:
+        if self.accepts[crntState]:
+            return i + 1
+        elif lastAccept:
+            return i
+        else:
+            return -1
+
+# ______________________________________________________________________
+
+class NonGreedyDFA (DFA):
+
+    def recognize(self, inVec, pos = 0):
+        crntState = self.start
+        i = pos
+        for i in range(pos, len(inVec)):
+            item = inVec[i]
+            accept = self.accepts[crntState]
+            if accept:
+                return i
+            crntState = self._next_state(item, crntState)
+            if crntState == ERROR_STATE:
+                return -1
+            crntState = ord(crntState)
+            i += 1
+        if self.accepts[crntState]:
+            return i
+        else:
+            return -1
+
+# ______________________________________________________________________
+# End of automata.py
diff -r 28e2996df412 -r 051349b537b2 pyparser/consts.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/consts.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,8 @@
+"""
+Various flags used during the compilation process.
+"""
+
+PyCF_SOURCE_IS_UTF8 = 0x0100
+PyCF_DONT_IMPLY_DEDENT = 0x0200
+PyCF_ONLY_AST = 0x0400
+PyCF_ACCEPT_NULL_BYTES = 0x10000000   # PyPy only, for compile()
diff -r 28e2996df412 -r 051349b537b2 pyparser/data/Grammar2.5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/data/Grammar2.5	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,148 @@
+# Grammar for Python
+
+# Note:  Changing the grammar specified in this file will most likely
+#        require corresponding changes in the parser module
+#        (../Modules/parsermodule.c).  If you can't make the changes to
+#        that module yourself, please co-ordinate the required changes
+#        with someone who can; ask around on python-dev for help.  Fred
+#        Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed in PEP 306,
+# "How to Change Python's Grammar"
+
+# Commands for Kees Blom's railroad program
+#diagram:token NAME
+#diagram:token NUMBER
+#diagram:token STRING
+#diagram:token NEWLINE
+#diagram:token ENDMARKER
+#diagram:token INDENT
+#diagram:output\input python.bla
+#diagram:token DEDENT
+#diagram:output\textwidth 20.04cm\oddsidemargin  0.0cm\evensidemargin 0.0cm
+#diagram:rules
+
+# Start symbols for the grammar:
+#	single_input is a single interactive statement;
+#	file_input is a module or sequence of commands read from an input file;
+#	eval_input is the input for the eval() and input() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+funcdef: [decorators] 'def' NAME parameters ':' suite
+parameters: '(' [varargslist] ')'
+varargslist: ((fpdef ['=' test] ',')*
+              ('*' NAME [',' '**' NAME] | '**' NAME) |
+              fpdef ['=' test] (',' fpdef ['=' test])* [','])
+fpdef: NAME | '(' fplist ')'
+fplist: fpdef (',' fpdef)* [',']
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | exec_stmt | assert_stmt)
+expr_stmt: testlist (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist))*)
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+print_stmt: 'print' ( [ test (',' test)* [','] ] |
+                      '>>' test [ (',' test)+ [','] ] )
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test [',' test [',' test]]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+import_from: ('from' ('.'* dotted_name | '.'+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME [('as' | NAME) NAME]
+dotted_as_name: dotted_name [('as' | NAME) NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
+exec_stmt: 'exec' expr ['in' test [',' test]]
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+	    ['else' ':' suite]
+	    ['finally' ':' suite] |
+	   'finally' ':' suite))
+with_stmt: 'with' test [ with_var ] ':' suite
+with_var: ('as' | NAME) expr
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test [',' test]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+# Backward compatibility cruft to support:
+# [ x for x in lambda: True, lambda: False if x() ]
+# even while also allowing:
+# lambda x: 5 if x else 2
+# (But not a mix of the two)
+testlist_safe: old_test [(',' old_test)+ [',']]
+old_test: or_test | old_lambdef
+old_lambdef: 'lambda' [varargslist] ':' old_test
+
+test: or_test ['if' or_test 'else' test] | lambdef
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_gexp] ')' |
+       '[' [listmaker] ']' |
+       '{' [dictmaker] '}' |
+       '`' testlist1 '`' |
+       NAME | NUMBER | STRING+)
+listmaker: test ( list_for | (',' test)* [','] )
+testlist_gexp: test ( gen_for | (',' test)* [','] )
+lambdef: 'lambda' [varargslist] ':' test
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: expr (',' expr)* [',']
+testlist: test (',' test)* [',']
+dictmaker: test ':' test (',' test ':' test)* [',']
+
+classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
+
+arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
+argument: test [gen_for] | test '=' test  # Really [keyword '='] test
+
+list_iter: list_for | list_if
+list_for: 'for' exprlist 'in' testlist_safe [list_iter]
+list_if: 'if' old_test [list_iter]
+
+gen_iter: gen_for | gen_if
+gen_for: 'for' exprlist 'in' or_test [gen_iter]
+gen_if: 'if' old_test [gen_iter]
+
+testlist1: test (',' test)*
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [testlist]
diff -r 28e2996df412 -r 051349b537b2 pyparser/data/Grammar2.7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/data/Grammar2.7	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,143 @@
+# Grammar for Python
+
+# Note:  Changing the grammar specified in this file will most likely
+#        require corresponding changes in the parser module
+#        (../Modules/parsermodule.c).  If you can't make the changes to
+#        that module yourself, please co-ordinate the required changes
+#        with someone who can; ask around on python-dev for help.  Fred
+#        Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed in PEP 306,
+# "How to Change Python's Grammar"
+
+# Start symbols for the grammar:
+#	single_input is a single interactive statement;
+#	file_input is a module or sequence of commands read from an input file;
+#	eval_input is the input for the eval() and input() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef)
+funcdef: 'def' NAME parameters ':' suite
+parameters: '(' [varargslist] ')'
+varargslist: ((fpdef ['=' test] ',')*
+              ('*' NAME [',' '**' NAME] | '**' NAME) |
+              fpdef ['=' test] (',' fpdef ['=' test])* [','])
+fpdef: NAME | '(' fplist ')'
+fplist: fpdef (',' fpdef)* [',']
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | exec_stmt | assert_stmt)
+expr_stmt: testlist (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist))*)
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+print_stmt: 'print' ( [ test (',' test)* [','] ] |
+                      '>>' test [ (',' test)+ [','] ] )
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test [',' test [',' test]]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+import_from: ('from' ('.'* dotted_name | '.'+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
+exec_stmt: 'exec' expr ['in' test [',' test]]
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+	    ['else' ':' suite]
+	    ['finally' ':' suite] |
+	   'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_item: test ['as' expr]
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test [('as' | ',') test]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+# Backward compatibility cruft to support:
+# [ x for x in lambda: True, lambda: False if x() ]
+# even while also allowing:
+# lambda x: 5 if x else 2
+# (But not a mix of the two)
+testlist_safe: old_test [(',' old_test)+ [',']]
+old_test: or_test | old_lambdef
+old_lambdef: 'lambda' [varargslist] ':' old_test
+
+test: or_test ['if' or_test 'else' test] | lambdef
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_comp] ')' |
+       '[' [listmaker] ']' |
+       '{' [dictorsetmaker] '}' |
+       '`' testlist1 '`' |
+       NAME | NUMBER | STRING+)
+listmaker: test ( list_for | (',' test)* [','] )
+testlist_comp: test ( comp_for | (',' test)* [','] )
+lambdef: 'lambda' [varargslist] ':' test
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: expr (',' expr)* [',']
+testlist: test (',' test)* [',']
+dictmaker: test ':' test (',' test ':' test)* [',']
+dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+                  (test (comp_for | (',' test)* [','])) )
+
+classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
+
+arglist: (argument ',')* (argument [',']
+                         |'*' test (',' argument)* [',' '**' test] 
+                         |'**' test)
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+argument: test [comp_for] | test '=' test
+
+list_iter: list_for | list_if
+list_for: 'for' exprlist 'in' testlist_safe [list_iter]
+list_if: 'if' old_test [list_iter]
+
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist 'in' or_test [comp_iter]
+comp_if: 'if' old_test [comp_iter]
+
+testlist1: test (',' test)*
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [testlist]
diff -r 28e2996df412 -r 051349b537b2 pyparser/error.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/error.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,40 @@
+
+class SyntaxError(Exception):
+    """Base class for exceptions raised by the parser."""
+
+    def __init__(self, msg, lineno=0, offset=0, text=None, filename=None,
+                 lastlineno=0):
+        self.msg = msg
+        self.lineno = lineno
+        self.offset = offset
+        self.text = text
+        self.filename = filename
+        self.lastlineno = lastlineno
+
+    def __str__(self):
+        return "%s at pos (%d, %d) in %r" % (self.__class__.__name__,
+                                             self.lineno,
+                                             self.offset,
+                                             self.text)
+
+class IndentationError(SyntaxError):
+    pass
+
+class ASTError(Exception):
+    def __init__(self, msg, ast_node ):
+        self.msg = msg
+        self.ast_node = ast_node
+
+
+class TokenError(SyntaxError):
+
+    def __init__(self, msg, line, lineno, column, tokens, lastlineno=0):
+        SyntaxError.__init__(self, msg, lineno, column, line,
+                             lastlineno=lastlineno)
+        self.tokens = tokens
+
+class TokenIndentationError(IndentationError):
+
+    def __init__(self, msg, line, lineno, column, tokens):
+        SyntaxError.__init__(self, msg, lineno, column, line)
+        self.tokens = tokens
diff -r 28e2996df412 -r 051349b537b2 pyparser/genpytokenize.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/genpytokenize.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,340 @@
+#! /usr/bin/env python
+"""Module genPytokenize
+
+Generates finite state automata for recognizing Python tokens.  These are hand
+coded versions of the regular expressions originally appearing in Ping's
+tokenize module in the Python standard library.
+
+When run from the command line, this should pretty print the DFA machinery.
+
+$Id: genPytokenize.py,v 1.1 2003/10/02 17:37:17 jriehl Exp $
+"""
+
+from pyparser.pylexer import *
+from pyparser.automata import NonGreedyDFA, DFA, DEFAULT
+
+def makePyPseudoDFA ():
+    import string
+    states = []
+    def makeEOL():
+        return group(states,
+                     newArcPair(states, "\n"),
+                     chain(states,
+                           newArcPair(states, "\r"),
+                           maybe(states, newArcPair(states, "\n"))))
+    # ____________________________________________________________
+    def makeLineCont ():
+        return chain(states,
+                     newArcPair(states, "\\"),
+                     makeEOL())
+    # ____________________________________________________________
+    # Ignore stuff
+    def makeWhitespace ():
+        return any(states, groupStr(states, " \f\t"))
+    # ____________________________________________________________
+    def makeComment ():
+        return chain(states,
+                     newArcPair(states, "#"),
+                     any(states, notGroupStr(states, "\r\n")))
+    # ____________________________________________________________
+    #ignore = chain(states,
+    #               makeWhitespace(),
+    #               any(states, chain(states,
+    #                                 makeLineCont(),
+    #                                 makeWhitespace())),
+    #               maybe(states, makeComment()))
+    # ____________________________________________________________
+    # Names
+    name = chain(states,
+                 groupStr(states, string.letters + "_"),
+                 any(states, groupStr(states,
+                                      string.letters + string.digits + "_")))
+    # ____________________________________________________________
+    # Digits
+    def makeDigits ():
+        return groupStr(states, "0123456789")
+    # ____________________________________________________________
+    # Integer numbers
+    hexNumber = chain(states,
+                      newArcPair(states, "0"),
+                      groupStr(states, "xX"),
+                      atleastonce(states,
+                                  groupStr(states, "0123456789abcdefABCDEF")),
+                      maybe(states, groupStr(states, "lL")))
+    octNumber = chain(states,
+                      newArcPair(states, "0"),
+                      maybe(states,
+                            chain(states,
+                                  groupStr(states, "oO"),
+                                  groupStr(states, "01234567"))),
+                      any(states, groupStr(states, "01234567")),
+                      maybe(states, groupStr(states, "lL")))
+    binNumber = chain(states,
+                      newArcPair(states, "0"),
+                      groupStr(states, "bB"),
+                      atleastonce(states, groupStr(states, "01")),
+                      maybe(states, groupStr(states, "lL")))
+    decNumber = chain(states,
+                      groupStr(states, "123456789"),
+                      any(states, makeDigits()),
+                      maybe(states, groupStr(states, "lL")))
+    intNumber = group(states, hexNumber, octNumber, binNumber, decNumber)
+    # ____________________________________________________________
+    # Exponents
+    def makeExp ():
+        return chain(states,
+                     groupStr(states, "eE"),
+                     maybe(states, groupStr(states, "+-")),
+                     atleastonce(states, makeDigits()))
+    # ____________________________________________________________
+    # Floating point numbers
+    def makeFloat ():
+        pointFloat = chain(states,
+                           group(states,
+                                 chain(states,
+                                       atleastonce(states, makeDigits()),
+                                       newArcPair(states, "."),
+                                       any(states, makeDigits())),
+                                 chain(states,
+                                       newArcPair(states, "."),
+                                       atleastonce(states, makeDigits()))),
+                           maybe(states, makeExp()))
+        expFloat = chain(states,
+                         atleastonce(states, makeDigits()),
+                         makeExp())
+        return group(states, pointFloat, expFloat)
+    # ____________________________________________________________
+    # Imaginary numbers
+    imagNumber = group(states,
+                       chain(states,
+                             atleastonce(states, makeDigits()),
+                             groupStr(states, "jJ")),
+                       chain(states,
+                             makeFloat(),
+                             groupStr(states, "jJ")))
+    # ____________________________________________________________
+    # Any old number.
+    number = group(states, imagNumber, makeFloat(), intNumber)
+    # ____________________________________________________________
+    # Funny
+    operator = group(states,
+                     chain(states,
+                           chainStr(states, "**"),
+                           maybe(states, newArcPair(states, "="))),
+                     chain(states,
+                           chainStr(states, ">>"),
+                           maybe(states, newArcPair(states, "="))),
+                     chain(states,
+                           chainStr(states, "<<"),
+                           maybe(states, newArcPair(states, "="))),
+                     chainStr(states, "<>"),
+                     chainStr(states, "!="),
+                     chain(states,
+                           chainStr(states, "//"),
+                           maybe(states, newArcPair(states, "="))),
+                     chain(states,
+                           groupStr(states, "+-*/%&|^=<>"),
+                           maybe(states, newArcPair(states, "="))),
+                     newArcPair(states, "~"))
+    bracket = groupStr(states, "[](){}")
+    special = group(states,
+                    makeEOL(),
+                    groupStr(states, "@:;.,`"))
+    funny = group(states, operator, bracket, special)
+    # ____________________________________________________________
+    def makeStrPrefix ():
+        return chain(states,
+                     maybe(states, groupStr(states, "uUbB")),
+                     maybe(states, groupStr(states, "rR")))
+    # ____________________________________________________________
+    contStr = group(states,
+                    chain(states,
+                          makeStrPrefix(),
+                          newArcPair(states, "'"),
+                          any(states,
+                              notGroupStr(states, "\r\n'\\")),
+                          any(states,
+                              chain(states,
+                                    newArcPair(states, "\\"),
+                                    newArcPair(states, DEFAULT),
+                                    any(states,
+                                        notGroupStr(states, "\r\n'\\")))),
+                          group(states,
+                                newArcPair(states, "'"),
+                                makeLineCont())),
+                    chain(states,
+                          makeStrPrefix(),
+                          newArcPair(states, '"'),
+                          any(states,
+                              notGroupStr(states, '\r\n"\\')),
+                          any(states,
+                              chain(states,
+                                    newArcPair(states, "\\"),
+                                    newArcPair(states, DEFAULT),
+                                    any(states,
+                                        notGroupStr(states, '\r\n"\\')))),
+                          group(states,
+                                newArcPair(states, '"'),
+                                makeLineCont())))
+    triple = chain(states,
+                   makeStrPrefix(),
+                   group(states,
+                         chainStr(states, "'''"),
+                         chainStr(states, '"""')))
+    pseudoExtras = group(states,
+                         makeLineCont(),
+                         makeComment(),
+                         triple)
+    pseudoToken = chain(states,
+                        makeWhitespace(),
+                        group(states,
+                              newArcPair(states, EMPTY),
+                              pseudoExtras, number, funny, contStr, name))
+    dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken)
+    return DFA(dfaStates, dfaAccepts), dfaStates
+
+# ______________________________________________________________________
+
+def makePyEndDFAMap ():
+    states = []
+    single = chain(states,
+                   any(states, notGroupStr(states, "'\\")),
+                   any(states,
+                       chain(states,
+                             newArcPair(states, "\\"),
+                             newArcPair(states, DEFAULT),
+                             any(states, notGroupStr(states, "'\\")))),
+                   newArcPair(states, "'"))
+    states, accepts = nfaToDfa(states, *single)
+    singleDFA = DFA(states, accepts)
+    states_singleDFA = states
+    states = []
+    double = chain(states,
+                   any(states, notGroupStr(states, '"\\')),
+                   any(states,
+                       chain(states,
+                             newArcPair(states, "\\"),
+                             newArcPair(states, DEFAULT),
+                             any(states, notGroupStr(states, '"\\')))),
+                   newArcPair(states, '"'))
+    states, accepts = nfaToDfa(states, *double)
+    doubleDFA = DFA(states, accepts)
+    states_doubleDFA = states
+    states = []
+    single3 = chain(states,
+                    any(states, notGroupStr(states, "'\\")),
+                    any(states,
+                        chain(states,
+                              group(states,
+                                    chain(states,
+                                          newArcPair(states, "\\"),
+                                          newArcPair(states, DEFAULT)),
+                                    chain(states,
+                                          newArcPair(states, "'"),
+                                          notChainStr(states, "''"))),
+                              any(states, notGroupStr(states, "'\\")))),
+                    chainStr(states, "'''"))
+    states, accepts = nfaToDfa(states, *single3)
+    single3DFA = NonGreedyDFA(states, accepts)
+    states_single3DFA = states
+    states = []
+    double3 = chain(states,
+                    any(states, notGroupStr(states, '"\\')),
+                    any(states,
+                        chain(states,
+                              group(states,
+                                    chain(states,
+                                          newArcPair(states, "\\"),
+                                          newArcPair(states, DEFAULT)),
+                                    chain(states,
+                                          newArcPair(states, '"'),
+                                          notChainStr(states, '""'))),
+                              any(states, notGroupStr(states, '"\\')))),
+                    chainStr(states, '"""'))
+    states, accepts = nfaToDfa(states, *double3)
+    double3DFA = NonGreedyDFA(states, accepts)
+    states_double3DFA = states
+    map = {"'" : (singleDFA, states_singleDFA),
+           '"' : (doubleDFA, states_doubleDFA),
+           "r" : None,
+           "R" : None,
+           "u" : None,
+           "U" : None,
+           "b" : None,
+           "B" : None}
+    for uniPrefix in ("", "u", "U", "b", "B", ):
+        for rawPrefix in ("", "r", "R"):
+            prefix = uniPrefix + rawPrefix
+            map[prefix + "'''"] = (single3DFA, states_single3DFA)
+            map[prefix + '"""'] = (double3DFA, states_double3DFA)
+    return map
+
+# ______________________________________________________________________
+
+def output(name, dfa_class, dfa, states):
+    import textwrap
+    lines = []
+    i = 0
+    for line in textwrap.wrap(repr(dfa.accepts), width = 50):
+        if i == 0:
+            lines.append("accepts = ")
+        else:
+            lines.append("           ")
+        lines.append(line)
+        lines.append("\n")
+        i += 1
+    import StringIO
+    lines.append("states = [\n")
+    for numstate, state in enumerate(states):
+        lines.append("    # ")
+        lines.append(str(numstate))
+        lines.append('\n')
+        s = StringIO.StringIO()
+        i = 0
+        for k, v in sorted(state.items()):
+            i += 1
+            if k == DEFAULT:
+                k = "automata.DEFAULT"
+            else:
+                k = repr(k)
+            s.write(k)
+            s.write('::')
+            s.write(repr(v))
+            if i < len(state):
+                s.write(', ')
+        s.write('},')
+        i = 0
+        if len(state) <= 4:
+            text = [s.getvalue()]
+        else:
+            text = textwrap.wrap(s.getvalue(), width=36)
+        for line in text:
+            line = line.replace('::', ': ')
+            if i == 0:
+                lines.append('    {')
+            else:
+                lines.append('     ')
+            lines.append(line)
+            lines.append('\n')
+            i += 1
+    lines.append("    ]\n")
+    lines.append("%s = automata.%s(states, accepts)\n" % (name, dfa_class))
+    return ''.join(lines)
+
+def main ():
+    pseudoDFA, states_pseudoDFA = makePyPseudoDFA()
+    print output("pseudoDFA", "DFA", pseudoDFA, states_pseudoDFA)
+    endDFAMap = makePyEndDFAMap()
+    dfa, states = endDFAMap['"""']
+    print output("double3DFA", "NonGreedyDFA", dfa, states)
+    dfa, states = endDFAMap["'''"]
+    print output("single3DFA", "NonGreedyDFA", dfa, states)
+    dfa, states = endDFAMap["'"]
+    print output("singleDFA", "DFA", dfa, states)
+    dfa, states = endDFAMap["\""]
+    print output("doubleDFA", "DFA", dfa, states)
+
+# ______________________________________________________________________
+
+if __name__ == "__main__":
+    main()
diff -r 28e2996df412 -r 051349b537b2 pyparser/metaparser.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/metaparser.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,357 @@
+"""
+Makes a parser from a grammar source.
+
+Inspired by Guido van Rossum's pgen2.
+"""
+
+import StringIO
+import tokenize
+import token
+
+from pyparser import parser
+
+
+class PgenError(Exception):
+
+    def __init__(self, msg, location=None):
+        Exception.__init__(self, msg)
+        self.location = location
+
+
+class NFA(object):
+
+    def __init__(self):
+        self.arcs = []
+
+    def arc(self, to_state, label=None):
+        self.arcs.append((label, to_state))
+
+    def find_unlabeled_states(self, into):
+        if self in into:
+            return
+        into.add(self)
+        for label, state in self.arcs:
+            if label is None:
+                state.find_unlabeled_states(into)
+
+
+class DFA(object):
+
+    def __init__(self, nfa_set, final_state):
+        self.nfas = nfa_set
+        self.is_final = final_state in nfa_set
+        self.arcs = {}
+
+    def arc(self, next, label):
+        self.arcs[label] = next
+
+    def unify_state(self, old, new):
+        for label, state in self.arcs.iteritems():
+            if state is old:
+                self.arcs[label] = new
+
+    def __repr__(self):
+        return "<DFA arcs=%r>" % self.arcs
+
+    def __eq__(self, other):
+        if not isinstance(other, DFA):
+            # This shouldn't really happen.
+            return NotImplemented
+        if other.is_final != self.is_final:
+            return False
+        if len(self.arcs) != len(other.arcs):
+            return False
+        for label, state in self.arcs.iteritems():
+            try:
+                other_state = other.arcs[label]
+            except KeyError:
+                return False
+            else:
+                if other_state is not state:
+                    return False
+        return True
+
+
+def nfa_to_dfa(start, end):
+    """Convert an NFA to a DFA(s)
+
+    Each DFA is initially a set of NFA states without labels.  We start with the
+    DFA for the start NFA.  Then we add labeled arcs to it pointing to another
+    set of NFAs (the next state).  Finally, we do the same thing to every DFA
+    that is found and return the list of states.
+    """
+    base_nfas = set()
+    start.find_unlabeled_states(base_nfas)
+    state_stack = [DFA(base_nfas, end)]
+    for state in state_stack:
+        arcs = {}
+        for nfa in state.nfas:
+            for label, sub_nfa in nfa.arcs:
+                if label is not None:
+                    sub_nfa.find_unlabeled_states(arcs.setdefault(label, set()))
+        for label, nfa_set in arcs.iteritems():
+            for st in state_stack:
+                if st.nfas == nfa_set:
+                    break
+            else:
+                st = DFA(nfa_set, end)
+                state_stack.append(st)
+            state.arc(st, label)
+    return state_stack
+
+def simplify_dfa(dfa):
+    changed = True
+    while changed:
+        changed = False
+        for i, state in enumerate(dfa):
+            for j in xrange(i + 1, len(dfa)):
+                other_state = dfa[j]
+                if state == other_state:
+                    del dfa[j]
+                    for sub_state in dfa:
+                        sub_state.unify_state(other_state, state)
+                    changed = True
+                    break
+
+
+class ParserGenerator(object):
+    """NOT_RPYTHON"""
+
+    def __init__(self, grammar_source):
+        self.start_symbol = None
+        self.dfas = {}
+        stream = StringIO.StringIO(grammar_source)
+        self.token_stream = tokenize.generate_tokens(stream.readline)
+        self.parse()
+        self.first = {}
+        self.add_first_sets()
+
+    def build_grammar(self, grammar_cls):
+        gram = grammar_cls()
+        gram.start = self.start_symbol
+        names = self.dfas.keys()
+        names.sort()
+        names.remove(self.start_symbol)
+        names.insert(0, self.start_symbol)
+        # First, build symbol and id mappings.
+        for name in names:
+            i = 256 + len(gram.symbol_ids)
+            gram.symbol_ids[name] = i
+            gram.symbol_names[i] = name
+        # Then, iterate through again and finalize labels.
+        for name in names:
+            dfa = self.dfas[name]
+            states = []
+            for state in dfa:
+                arcs = []
+                for label, next in state.arcs.iteritems():
+                    arcs.append((self.make_label(gram, label), dfa.index(next)))
+                states.append((arcs, state.is_final))
+            gram.dfas.append((states, self.make_first(gram, name)))
+            assert len(gram.dfas) - 1 == gram.symbol_ids[name] - 256
+        gram.start = gram.symbol_ids[self.start_symbol]
+        return gram
+
+    def make_label(self, gram, label):
+        label_index = len(gram.labels)
+        if label[0].isalpha():
+            # Either a symbol or a token.
+            if label in gram.symbol_ids:
+                if label in gram.symbol_to_label:
+                    return gram.symbol_to_label[label]
+                else:
+                    gram.labels.append(gram.symbol_ids[label])
+                    gram.symbol_to_label[label] = label_index
+                    return label_index
+            elif label.isupper():
+                token_index = gram.TOKENS[label]
+                if token_index in gram.token_ids:
+                    return gram.token_ids[token_index]
+                else:
+                    gram.labels.append(token_index)
+                    gram.token_ids[token_index] = label_index
+                    return label_index
+            else:
+                # Probably a rule without a definition.
+                raise PgenError("no such rule: %r" % (label,))
+        else:
+            # A keyword or operator.
+            value = label.strip("\"'")
+            if value[0].isalpha():
+                if value in gram.keyword_ids:
+                    return gram.keyword_ids[value]
+                else:
+                    gram.labels.append(gram.KEYWORD_TOKEN)
+                    gram.keyword_ids[value] = label_index
+                    return label_index
+            else:
+                try:
+                    token_index = gram.OPERATOR_MAP[value]
+                except KeyError:
+                    raise PgenError("no such operator: %r" % (value,))
+                if token_index in gram.token_ids:
+                    return gram.token_ids[token_index]
+                else:
+                    gram.labels.append(token_index)
+                    gram.token_ids[token_index] = label_index
+                    return label_index
+
+    def make_first(self, gram, name):
+        original_firsts = self.first[name]
+        firsts = dict()
+        for label in original_firsts:
+            firsts[self.make_label(gram, label)] = None
+        return firsts
+
+    def add_first_sets(self):
+        for name, dfa in self.dfas.iteritems():
+            if name not in self.first:
+                self.get_first(name, dfa)
+
+    def get_first(self, name, dfa):
+        self.first[name] = None
+        state = dfa[0]
+        all_labels = set()
+        overlap_check = {}
+        for label, sub_state in state.arcs.iteritems():
+            if label in self.dfas:
+                if label in self.first:
+                    new_labels = self.first[label]
+                    if new_labels is None:
+                        raise PgenError("recursion in rule: %r" % (name,))
+                else:
+                    new_labels = self.get_first(label, self.dfas[label])
+                all_labels.update(new_labels)
+                overlap_check[label] = new_labels
+            else:
+                all_labels.add(label)
+                overlap_check[label] = set((label,))
+        inverse = {}
+        for label, their_first in overlap_check.iteritems():
+            for sub_label in their_first:
+                if sub_label in inverse:
+                    raise PgenError("ambiguous symbol with label %s"
+                                    % (label,))
+                inverse[sub_label] = label
+        self.first[name] = all_labels
+        return all_labels
+
+    def expect(self, token_type, value=None):
+        if token_type != self.type:
+            expected = token.tok_name[token_type]
+            got = token.tok_name[self.type]
+            raise PgenError("expected token %s but got %s" % (expected, got),
+                            self.location)
+        current_value = self.value
+        if value is not None:
+            if value != current_value:
+                msg = "expected %r but got %r" % (value, current_value)
+                raise PgenError(msg,self.location)
+        self.advance_token()
+        return current_value
+
+    def test_token(self, token_type, value):
+        if self.type == token_type and self.value == value:
+            return True
+        return False
+
+    def advance_token(self):
+        data = self.token_stream.next()
+        # Ignore comments and non-logical newlines.
+        while data[0] in (tokenize.NL, tokenize.COMMENT):
+            data = self.token_stream.next()
+        self.type, self.value = data[:2]
+        self.location = data[2:]
+
+    def parse(self):
+        self.advance_token()
+        while self.type != token.ENDMARKER:
+            # Skip over whitespace.
+            while self.type == token.NEWLINE:
+                self.advance_token()
+            name, start_state, end_state = self.parse_rule()
+            dfa = nfa_to_dfa(start_state, end_state)
+            simplify_dfa(dfa)
+            self.dfas[name] = dfa
+            if self.start_symbol is None:
+                self.start_symbol = name
+
+    def parse_rule(self):
+        # RULE: NAME ':' ALTERNATIVES
+        name = self.expect(token.NAME)
+        self.expect(token.OP, ":")
+        start_state, end_state = self.parse_alternatives()
+        self.expect(token.NEWLINE)
+        return name, start_state, end_state
+
+    def parse_alternatives(self):
+        # ALTERNATIVES: ITEMS ('|' ITEMS)*
+        first_state, end_state = self.parse_items()
+        if self.test_token(token.OP, "|"):
+            # Link all alternatives into a enclosing set of states.
+            enclosing_start_state = NFA()
+            enclosing_end_state = NFA()
+            enclosing_start_state.arc(first_state)
+            end_state.arc(enclosing_end_state)
+            while self.test_token(token.OP, "|"):
+                self.advance_token()
+                sub_start_state, sub_end_state = self.parse_items()
+                enclosing_start_state.arc(sub_start_state)
+                sub_end_state.arc(enclosing_end_state)
+            first_state = enclosing_start_state
+            end_state = enclosing_end_state
+        return first_state, end_state
+
+    def parse_items(self):
+        # ITEMS: ITEM+
+        first_state, end_state = self.parse_item()
+        while self.type in (token.STRING, token.NAME) or \
+                           self.test_token(token.OP, "(") or \
+                           self.test_token(token.OP, "["):
+            sub_first_state, new_end_state = self.parse_item()
+            end_state.arc(sub_first_state)
+            end_state = new_end_state
+        return first_state, end_state
+
+    def parse_item(self):
+        # ITEM: '[' ALTERNATIVES ']' | ATOM ['+' | '*']
+        if self.test_token(token.OP, "["):
+            self.advance_token()
+            start_state, end_state = self.parse_alternatives()
+            self.expect(token.OP, "]")
+            # Bypass the rule if this is optional.
+            start_state.arc(end_state)
+            return start_state, end_state
+        else:
+            atom_state, next_state = self.parse_atom()
+            # Check for a repeater.
+            if self.type == token.OP and self.value in ("+", "*"):
+                next_state.arc(atom_state)
+                repeat = self.value
+                self.advance_token()
+                if repeat == "*":
+                    # Optionally repeated
+                    return atom_state, atom_state
+                else:
+                    # Required
+                    return atom_state, next_state
+            else:
+                return atom_state, next_state
+
+    def parse_atom(self):
+        # ATOM: '(' ALTERNATIVES ')' | NAME | STRING
+        if self.test_token(token.OP, "("):
+            self.advance_token()
+            rule = self.parse_alternatives()
+            self.expect(token.OP, ")")
+            return rule
+        elif self.type in (token.NAME, token.STRING):
+            atom_state = NFA()
+            next_state = NFA()
+            atom_state.arc(next_state, self.value)
+            self.advance_token()
+            return atom_state, next_state
+        else:
+            invalid = token.tok_name[self.type]
+            raise PgenError("unexpected token: %s" % (invalid,),
+                            self.location)
diff -r 28e2996df412 -r 051349b537b2 pyparser/parser.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/parser.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,287 @@
+"""
+A CPython inspired RPython parser.
+"""
+
+
+class Grammar(object):
+    """
+    Base Grammar object.
+
+    Pass this to ParserGenerator.build_grammar to fill it with useful values for
+    the Parser.
+    """
+
+    def __init__(self):
+        self.symbol_ids = {}
+        self.symbol_names = {}
+        self.symbol_to_label = {}
+        self.keyword_ids = {}
+        self.dfas = []
+        self.labels = [0]
+        self.token_ids = {}
+        self.start = -1
+
+    def shared_copy(self):
+        new = self.__class__()
+        new.symbol_ids = self.symbol_ids
+        new.symbols_names = self.symbol_names
+        new.keyword_ids = self.keyword_ids
+        new.dfas = self.dfas
+        new.labels = self.labels
+        new.token_ids = self.token_ids
+        return new
+
+    def _freeze_(self):
+        # Remove some attributes not used in parsing.
+        try:
+            del self.symbol_to_label
+            del self.symbol_names
+            del self.symbol_ids
+        except AttributeError:
+            pass
+        return True
+
+
+class Node(object):
+
+    __slots__ = ("type", )
+
+    def __init__(self, type):
+        self.type = type
+
+    def __eq__(self, other):
+        raise NotImplementedError("abstract base class")
+
+    def __ne__(self, other):
+        return not self == other
+
+    def get_value(self):
+        return None
+
+    def get_child(self, i):
+        raise NotImplementedError("abstract base class")
+
+    def num_children(self):
+        return 0
+
+    def append_child(self, child):
+        raise NotImplementedError("abstract base class")
+
+    def get_lineno(self):
+        raise NotImplementedError("abstract base class")
+
+    def get_column(self):
+        raise NotImplementedError("abstract base class")
+
+
+class Terminal(Node):
+    __slots__ = ("value", "lineno", "column")
+    def __init__(self, type, value, lineno, column):
+        Node.__init__(self, type)
+        self.value = value
+        self.lineno = lineno
+        self.column = column
+
+    def __repr__(self):
+        return "Terminal(type=%s, value=%r)" % (self.type, self.value)
+
+    def __eq__(self, other):
+        # For tests.
+        return (type(self) == type(other) and
+                self.type == other.type and
+                self.value == other.value)
+
+    def get_value(self):
+        return self.value
+
+    def get_lineno(self):
+        return self.lineno
+
+    def get_column(self):
+        return self.column
+
+
+class AbstractNonterminal(Node):
+    __slots__ = ()
+
+    def get_lineno(self):
+        return self.get_child(0).get_lineno()
+
+    def get_column(self):
+        return self.get_child(0).get_column()
+
+    def __eq__(self, other):
+        # For tests.
+        # grumble, annoying
+        if not isinstance(other, AbstractNonterminal):
+            return False
+        if self.type != other.type:
+            return False
+        if self.num_children() != other.num_children():
+            return False
+        for i in range(self.num_children()):
+            if self.get_child(i) != other.get_child(i):
+                return False
+        return True
+
+
+class Nonterminal(AbstractNonterminal):
+    __slots__ = ("_children", )
+    def __init__(self, type, children):
+        Node.__init__(self, type)
+        self._children = children
+
+    def __repr__(self):
+        return "Nonterminal(type=%s, children=%r)" % (self.type, self._children)
+
+    def get_child(self, i):
+        return self._children[i]
+
+    def num_children(self):
+        return len(self._children)
+
+    def append_child(self, child):
+        self._children.append(child)
+
+
+class Nonterminal1(AbstractNonterminal):
+    __slots__ = ("_child", )
+    def __init__(self, type, child):
+        Node.__init__(self, type)
+        self._child = child
+
+    def __repr__(self):
+        return "Nonterminal(type=%s, children=[%r])" % (self.type, self._child)
+
+    def get_child(self, i):
+        assert i == 0 or i == -1
+        return self._child
+
+    def num_children(self):
+        return 1
+
+    def append_child(self, child):
+        assert 0, "should be unreachable"
+
+
+
+class ParseError(Exception):
+
+    def __init__(self, msg, token_type, value, lineno, column, line,
+                 expected=-1):
+        self.msg = msg
+        self.token_type = token_type
+        self.value = value
+        self.lineno = lineno
+        self.column = column
+        self.line = line
+        self.expected = expected
+
+    def __str__(self):
+        return "ParserError(%s, %r)" % (self.token_type, self.value)
+
+
+class Parser(object):
+
+    def __init__(self, grammar):
+        self.grammar = grammar
+        self.root = None
+        self.stack = None
+
+    def prepare(self, start=-1):
+        """Setup the parser for parsing.
+
+        Takes the starting symbol as an argument.
+        """
+        if start == -1:
+            start = self.grammar.start
+        self.root = None
+        current_node = Nonterminal(start, [])
+        self.stack = []
+        self.stack.append((self.grammar.dfas[start - 256], 0, current_node))
+
+    def add_token(self, token_type, value, lineno, column, line):
+        label_index = self.classify(token_type, value, lineno, column, line)
+        sym_id = 0 # for the annotator
+        while True:
+            dfa, state_index, node = self.stack[-1]
+            states, first = dfa
+            arcs, is_accepting = states[state_index]
+            for i, next_state in arcs:
+                sym_id = self.grammar.labels[i]
+                if label_index == i:
+                    # We matched a non-terminal.
+                    self.shift(next_state, token_type, value, lineno, column)
+                    state = states[next_state]
+                    # While the only possible action is to accept, pop nodes off
+                    # the stack.
+                    while state[1] and not state[0]:
+                        self.pop()
+                        if not self.stack:
+                            # Parsing is done.
+                            return True
+                        dfa, state_index, node = self.stack[-1]
+                        state = dfa[0][state_index]
+                    return False
+                elif sym_id >= 256:
+                    sub_node_dfa = self.grammar.dfas[sym_id - 256]
+                    # Check if this token can start a child node.
+                    if label_index in sub_node_dfa[1]:
+                        self.push(sub_node_dfa, next_state, sym_id, lineno,
+                                  column)
+                        break
+            else:
+                # We failed to find any arcs to another state, so unless this
+                # state is accepting, it's invalid input.
+                if is_accepting:
+                    self.pop()
+                    if not self.stack:
+                        raise ParseError("too much input", token_type, value,
+                                         lineno, column, line)
+                else:
+                    # If only one possible input would satisfy, attach it to the
+                    # error.
+                    if len(arcs) == 1:
+                        expected = sym_id
+                    else:
+                        expected = -1
+                    raise ParseError("bad input", token_type, value, lineno,
+                                     column, line, expected)
+
+    def classify(self, token_type, value, lineno, column, line):
+        """Find the label for a token."""
+        if token_type == self.grammar.KEYWORD_TOKEN:
+            label_index = self.grammar.keyword_ids.get(value, -1)
+            if label_index != -1:
+                return label_index
+        label_index = self.grammar.token_ids.get(token_type, -1)
+        if label_index == -1:
+            raise ParseError("invalid token", token_type, value, lineno, column,
+                             line)
+        return label_index
+
+    def shift(self, next_state, token_type, value, lineno, column):
+        """Shift a non-terminal and prepare for the next state."""
+        dfa, state, node = self.stack[-1]
+        new_node = Terminal(token_type, value, lineno, column)
+        node.append_child(new_node)
+        self.stack[-1] = (dfa, next_state, node)
+
+    def push(self, next_dfa, next_state, node_type, lineno, column):
+        """Push a terminal and adjust the current state."""
+        dfa, state, node = self.stack[-1]
+        new_node = Nonterminal(node_type, [])
+        self.stack[-1] = (dfa, next_state, node)
+        self.stack.append((next_dfa, 0, new_node))
+
+    def pop(self):
+        """Pop an entry off the stack and make its node a child of the last."""
+        dfa, state, node = self.stack.pop()
+        if self.stack:
+            # we are now done with node, so we can store it more efficiently if
+            # it has just one child
+            if node.num_children() == 1:
+                node = Nonterminal1(node.type, node.get_child(0))
+            self.stack[-1][2].append_child(node)
+        else:
+            self.root = node
diff -r 28e2996df412 -r 051349b537b2 pyparser/pygram.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/pygram.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,43 @@
+import os
+from pyparser import parser, pytoken, metaparser
+
+class PythonGrammar(parser.Grammar):
+
+    KEYWORD_TOKEN = pytoken.python_tokens["NAME"]
+    TOKENS = pytoken.python_tokens
+    OPERATOR_MAP = pytoken.python_opmap
+
+def _get_python_grammar():
+    here = os.path.dirname(__file__)
+    fp = open(os.path.join(here, "data", "Grammar2.7"))
+    try:
+        gram_source = fp.read()
+    finally:
+        fp.close()
+    pgen = metaparser.ParserGenerator(gram_source)
+    return pgen.build_grammar(PythonGrammar)
+
+
+python_grammar = _get_python_grammar()
+python_grammar_no_print = python_grammar.shared_copy()
+python_grammar_no_print.keyword_ids = python_grammar_no_print.keyword_ids.copy()
+del python_grammar_no_print.keyword_ids["print"]
+
+class _Tokens(object):
+    pass
+
+for tok_name, idx in pytoken.python_tokens.iteritems():
+    setattr(_Tokens, tok_name, idx)
+tokens = _Tokens()
+
+class _Symbols(object):
+    pass
+rev_lookup = {}
+for sym_name, idx in python_grammar.symbol_ids.iteritems():
+    setattr(_Symbols, sym_name, idx)
+    rev_lookup[idx] = sym_name
+syms = _Symbols()
+syms._rev_lookup = rev_lookup # for debugging
+syms.sym_name = rev_lookup # for symbol module compatibility
+
+del _get_python_grammar, _Tokens, tok_name, sym_name, idx
diff -r 28e2996df412 -r 051349b537b2 pyparser/pylexer.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/pylexer.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,240 @@
+# Used by genpytokenize.py to generate the parser in pytokenize.py
+from pyparser.automata import DFA, DEFAULT
+
+class EMPTY: pass
+
+def newArcPair (states, transitionLabel):
+    s1Index = len(states)
+    s2Index = s1Index + 1
+    states.append([(transitionLabel, s2Index)])
+    states.append([])
+    return s1Index, s2Index
+
+# ______________________________________________________________________
+
+def chain (states, *stateIndexPairs):
+    if len(stateIndexPairs) > 1:
+        start, lastFinish = stateIndexPairs[0]
+        for nStart, nFinish in stateIndexPairs[1:]:
+            states[lastFinish].append((EMPTY, nStart))
+            lastFinish = nFinish
+        return start, nFinish
+    else:
+        return stateIndexPairs[0]
+
+
+# ______________________________________________________________________
+
+def chainStr (states, str):
+    return chain(states, *map(lambda x : newArcPair(states, x), str))
+
+# ______________________________________________________________________
+
+def notChainStr (states, str):
+    """XXX I'm not sure this is how it should be done, but I'm going to
+    try it anyway.  Note that for this case, I require only single character
+    arcs, since I would have to basically invert all accepting states and
+    non-accepting states of any sub-NFA's.
+    """
+    assert len(str) > 0
+    arcs = map(lambda x : newArcPair(states, x), str)
+    finish = len(states)
+    states.append([])
+    start, lastFinish = arcs[0]
+    states[start].append((EMPTY, finish))
+    for crntStart, crntFinish in arcs[1:]:
+        states[lastFinish].append((EMPTY, crntStart))
+        states[crntStart].append((EMPTY, finish))
+    return start, finish
+
+# ______________________________________________________________________
+
+def group (states, *stateIndexPairs):
+    if len(stateIndexPairs) > 1:
+        start = len(states)
+        finish = start + 1
+        startList = []
+        states.append(startList)
+        states.append([])
+        for eStart, eFinish in stateIndexPairs:
+            startList.append((EMPTY, eStart))
+            states[eFinish].append((EMPTY, finish))
+        return start, finish
+    else:
+        return stateIndexPairs[0]
+
+# ______________________________________________________________________
+
+def groupStr (states, str):
+    return group(states, *map(lambda x : newArcPair(states, x), str))
+
+# ______________________________________________________________________
+
+def notGroup (states, *stateIndexPairs):
+    """Like group, but will add a DEFAULT transition to a new end state,
+    causing anything in the group to not match by going to a dead state.
+    XXX I think this is right...
+    """
+    start, dead = group(states, *stateIndexPairs)
+    finish = len(states)
+    states.append([])
+    states[start].append((DEFAULT, finish))
+    return start, finish
+
+# ______________________________________________________________________
+
+def notGroupStr (states, str):
+    return notGroup(states, *map(lambda x : newArcPair(states, x), str))
+# ______________________________________________________________________
+
+def any (states, *stateIndexPairs):
+    start, finish = group(states, *stateIndexPairs)
+    states[finish].append((EMPTY, start))
+    return start, start
+
+# ______________________________________________________________________
+
+def maybe (states, *stateIndexPairs):
+    start, finish = group(states, *stateIndexPairs)
+    states[start].append((EMPTY, finish))
+    return start, finish
+
+# ______________________________________________________________________
+
+def atleastonce (states, *stateIndexPairs):
+    start, finish = group(states, *stateIndexPairs)
+    states[finish].append((EMPTY, start))
+    return start, finish
+
+# ______________________________________________________________________
+
+def closure (states, start, result = 0L):
+    if None == result:
+        result = 0L
+    if 0 == (result & (1L << start)):
+        result |= (1L << start)
+        for label, arrow in states[start]:
+            if label == EMPTY:
+                result |= closure(states, arrow, result)
+    return result
+
+# ______________________________________________________________________
+
+def nfaToDfa (states, start, finish):
+    tempStates = []
+    startClosure = closure(states, start)
+    crntTempState = [startClosure, [], 0 != (startClosure & (1L << finish))]
+    tempStates.append(crntTempState)
+    index = 0
+    while index < len(tempStates):
+        crntTempState = tempStates[index]
+        crntClosure, crntArcs, crntAccept = crntTempState
+        for index2 in range(0, len(states)):
+            if 0 != (crntClosure & (1L << index2)):
+                for label, nfaArrow in states[index2]:
+                    if label == EMPTY:
+                        continue
+                    foundTempArc = False
+                    for tempArc in crntArcs:
+                        if tempArc[0] == label:
+                            foundTempArc = True
+                            break
+                    if not foundTempArc:
+                        tempArc = [label, -1, 0L]
+                        crntArcs.append(tempArc)
+                    tempArc[2] = closure(states, nfaArrow, tempArc[2])
+        for arcIndex in range(0, len(crntArcs)):
+            label, arrow, targetStates = crntArcs[arcIndex]
+            targetFound = False
+            arrow = 0
+            for destTempState in tempStates:
+                if destTempState[0] == targetStates:
+                    targetFound = True
+                    break
+                arrow += 1
+            if not targetFound:
+                assert arrow == len(tempStates)
+                newState = [targetStates, [], 0 != (targetStates &
+                                                    (1L << finish))]
+                tempStates.append(newState)
+            crntArcs[arcIndex][1] = arrow
+        index += 1
+    tempStates = simplifyTempDfa(tempStates)
+    states = finalizeTempDfa(tempStates)
+    return states
+
+# ______________________________________________________________________
+
+def sameState (s1, s2):
+    """sameState(s1, s2)
+    Note:
+    state := [ nfaclosure : Long, [ arc ], accept : Boolean ]
+    arc := [ label, arrow : Int, nfaClosure : Long ]
+    """
+    if (len(s1[1]) != len(s2[1])) or (s1[2] != s2[2]):
+        return False
+    for arcIndex in range(0, len(s1[1])):
+        arc1 = s1[1][arcIndex]
+        arc2 = s2[1][arcIndex]
+        if arc1[:-1] != arc2[:-1]:
+            return False
+    return True
+
+# ______________________________________________________________________
+
+def simplifyTempDfa (tempStates):
+    """simplifyTempDfa (tempStates)
+    """
+    changes = True
+    deletedStates = []
+    while changes:
+        changes = False
+        for i in range(1, len(tempStates)):
+            if i in deletedStates:
+                continue
+            for j in range(0, i):
+                if j in deletedStates:
+                    continue
+                if sameState(tempStates[i], tempStates[j]):
+                    deletedStates.append(i)
+                    for k in range(0, len(tempStates)):
+                        if k in deletedStates:
+                            continue
+                        for arc in tempStates[k][1]:
+                            if arc[1] == i:
+                                arc[1] = j
+                    changes = True
+                    break
+    for stateIndex in deletedStates:
+        tempStates[stateIndex] = None
+    return tempStates
+# ______________________________________________________________________
+
+def finalizeTempDfa (tempStates):
+    """finalizeTempDfa (tempStates)
+    
+    Input domain:
+    tempState := [ nfaClosure : Long, [ tempArc ], accept : Boolean ]
+    tempArc := [ label, arrow, nfaClosure ]
+
+    Output domain:
+    state := [ arcMap, accept : Boolean ]
+    """
+    states = []
+    accepts = []
+    stateMap = {}
+    tempIndex = 0
+    for tempIndex in range(0, len(tempStates)):
+        tempState = tempStates[tempIndex]
+        if None != tempState:
+            stateMap[tempIndex] = len(states)
+            states.append({})
+            accepts.append(tempState[2])
+    for tempIndex in stateMap.keys():
+        stateBitset, tempArcs, accepting = tempStates[tempIndex]
+        newIndex = stateMap[tempIndex]
+        arcMap = states[newIndex]
+        for tempArc in tempArcs:
+            arcMap[tempArc[0]] = stateMap[tempArc[1]]
+    return states, accepts
+
diff -r 28e2996df412 -r 051349b537b2 pyparser/pyparse.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/pyparse.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,200 @@
+from pyparser import parser, pytokenizer, pygram, error
+from pyparser import consts
+
+def recode_to_utf8(bytes, encoding):
+    text = bytes.decode(encoding)
+    if not isinstance(text, unicode):
+        raise error.SyntaxError("codec did not return a unicode object")
+    recoded = text.encode("utf-8")
+    return recoded
+
+def _normalize_encoding(encoding):
+    """returns normalized name for <encoding>
+
+    see dist/src/Parser/tokenizer.c 'get_normal_name()'
+    for implementation details / reference
+
+    NOTE: for now, parser.suite() raises a MemoryError when
+          a bad encoding is used. (SF bug #979739)
+    """
+    if encoding is None:
+        return None
+    # lower() + '_' / '-' conversion
+    encoding = encoding.replace('_', '-').lower()
+    if encoding == 'utf-8' or encoding.startswith('utf-8-'):
+        return 'utf-8'
+    for variant in ['latin-1', 'iso-latin-1', 'iso-8859-1']:
+        if (encoding == variant or
+            encoding.startswith(variant + '-')):
+            return 'iso-8859-1'
+    return encoding
+
+def _check_for_encoding(s):
+    eol = s.find('\n')
+    if eol < 0:
+        return _check_line_for_encoding(s)[0]
+    enc, again = _check_line_for_encoding(s[:eol])
+    if enc or not again:
+        return enc
+    eol2 = s.find('\n', eol + 1)
+    if eol2 < 0:
+        return _check_line_for_encoding(s[eol + 1:])[0]
+    return _check_line_for_encoding(s[eol + 1:eol2])[0]
+
+
+def _check_line_for_encoding(line):
+    """returns the declared encoding or None"""
+    i = 0
+    for i in range(len(line)):
+        if line[i] == '#':
+            break
+        if line[i] not in ' \t\014':
+            return None, False  # Not a comment, don't read the second line.
+    return pytokenizer.match_encoding_declaration(line[i:]), True
+
+
+class CompileInfo(object):
+    """Stores information about the source being compiled.
+
+    * filename: The filename of the source.
+    * mode: The parse mode to use. ('exec', 'eval', or 'single')
+    * flags: Parser and compiler flags.
+    * encoding: The source encoding.
+    """
+
+    def __init__(self, filename, mode="exec", flags=0):
+        self.filename = filename
+        self.mode = mode
+        self.encoding = None
+        self.flags = flags
+
+
+_targets = {
+'eval' : pygram.syms.eval_input,
+'single' : pygram.syms.single_input,
+'exec' : pygram.syms.file_input,
+}
+
+class PythonParser(parser.Parser):
+
+    def __init__(self, grammar=pygram.python_grammar):
+        parser.Parser.__init__(self, grammar)
+
+    def parse_source(self, textsrc, compile_info):
+        """Main entry point for parsing Python source.
+
+        Everything from decoding the source to tokenizing to building the parse
+        tree is handled here.
+        """
+        # Detect source encoding.
+        enc = None
+        if textsrc.startswith("\xEF\xBB\xBF"):
+            textsrc = textsrc[3:]
+            enc = 'utf-8'
+            # If an encoding is explicitly given check that it is utf-8.
+            decl_enc = _check_for_encoding(textsrc)
+            if decl_enc and decl_enc != "utf-8":
+                raise error.SyntaxError("UTF-8 BOM with %s coding cookie" % decl_enc,
+                                        filename=compile_info.filename)
+        elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8:
+            enc = 'utf-8'
+            if _check_for_encoding(textsrc) is not None:
+                raise error.SyntaxError("coding declaration in unicode string",
+                                        filename=compile_info.filename)
+        else:
+            enc = _normalize_encoding(_check_for_encoding(textsrc))
+            if enc is not None and enc not in ('utf-8', 'iso-8859-1'):
+                try:
+                    textsrc = recode_to_utf8(textsrc, enc)
+                except LookupError as e:
+                    # if the codec is not found, LookupError is raised.
+                    raise error.SyntaxError("Unknown encoding: %s" % enc,
+                                            filename=compile_info.filename)
+                # Transform unicode errors into SyntaxError
+                except UnicodeDecodeError as e:
+                    message = str(e)
+                    raise error.SyntaxError(message)
+
+        flags = compile_info.flags
+
+        # The tokenizer is very picky about how it wants its input.
+        source_lines = textsrc.splitlines(True)
+        if source_lines and not source_lines[-1].endswith("\n"):
+            source_lines[-1] += '\n'
+        if textsrc and textsrc[-1] == "\n":
+            flags &= ~consts.PyCF_DONT_IMPLY_DEDENT
+
+        self.prepare(_targets[compile_info.mode])
+        tp = 0
+        try:
+            try:
+                # Note: we no longer pass the CO_FUTURE_* to the tokenizer,
+                # which is expected to work independently of them.  It's
+                # certainly the case for all futures in Python <= 2.7.
+                tokens = pytokenizer.generate_tokens(source_lines, flags)
+
+                self.grammar = pygram.python_grammar
+
+                for tp, value, lineno, column, line in tokens:
+                    if self.add_token(tp, value, lineno, column, line):
+                        break
+            except error.TokenError as e:
+                e.filename = compile_info.filename
+                raise
+            except parser.ParseError as e:
+                # Catch parse errors, pretty them up and reraise them as a
+                # SyntaxError.
+                new_err = error.IndentationError
+                if tp == pygram.tokens.INDENT:
+                    msg = "unexpected indent"
+                elif e.expected == pygram.tokens.INDENT:
+                    msg = "expected an indented block"
+                else:
+                    new_err = error.SyntaxError
+                    msg = "invalid syntax"
+                raise new_err(msg, e.lineno, e.column, e.line,
+                              compile_info.filename)
+            else:
+                tree = self.root
+        finally:
+            # Avoid hanging onto the tree.
+            self.root = None
+        if enc is not None:
+            compile_info.encoding = enc
+        return tree
+
+def parse(filename):
+    """returns the parsed contents of <filename>"""
+    info = CompileInfo(filename)
+    f = open(filename)
+    try:
+        return PythonParser().parse_source(f.read(), info)
+    finally:
+        f.close()
+
+def suite(text):
+    """returns the parsed form of the given program <text>"""
+    info = CompileInfo("<stdin>")
+    return PythonParser().parse_source(text, info)
+
+def expr(text):
+    """returns the parsed form of the given expression <text>"""
+    info = CompileInfo("<stdin>", "single")
+    return PythonParser().parse_source(text, info)
+
+def st2tuple(tree, line_info=True, col_info=False):
+    """returns <tree> in tuple form for the compiler package"""
+    if isinstance(tree, parser.AbstractNonterminal):
+        l = [tree.type]
+        for i in range(0, tree.num_children()):
+            l.append(st2tuple(tree.get_child(i)))
+        return tuple(l)
+    elif isinstance(tree, parser.Terminal):
+        l = [tree.type, tree.value]
+        if line_info:
+            l.append(tree.get_lineno())
+        if col_info:
+            l.append(tree.get_column())
+        return tuple(l)
+    else:
+        raise TypeError, tree
diff -r 28e2996df412 -r 051349b537b2 pyparser/pytoken.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/pytoken.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,71 @@
+"""Python token definitions."""
+
+python_tokens = {}
+python_opmap = {}
+
+def _add_tok(name, *values):
+    index = len(python_tokens)
+    assert index < 256
+    python_tokens[name] = index
+    for value in values:
+        python_opmap[value] = index
+
+_add_tok('ENDMARKER')
+_add_tok('NAME')
+_add_tok('NUMBER')
+_add_tok('STRING')
+_add_tok('NEWLINE')
+_add_tok('INDENT')
+_add_tok('DEDENT')
+_add_tok('LPAR', "(")
+_add_tok('RPAR', ")")
+_add_tok('LSQB', "[")
+_add_tok('RSQB', "]")
+_add_tok('COLON', ":")
+_add_tok('COMMA',  "," )
+_add_tok('SEMI', ";" )
+_add_tok('PLUS', "+" )
+_add_tok('MINUS', "-" )
+_add_tok('STAR', "*" )
+_add_tok('SLASH', "/" )
+_add_tok('VBAR', "|" )
+_add_tok('AMPER', "&" )
+_add_tok('LESS', "<" )
+_add_tok('GREATER', ">" )
+_add_tok('EQUAL', "=" )
+_add_tok('DOT', "." )
+_add_tok('PERCENT', "%" )
+_add_tok('BACKQUOTE', "`" )
+_add_tok('LBRACE', "{" )
+_add_tok('RBRACE', "}" )
+_add_tok('EQEQUAL', "==" )
+_add_tok('NOTEQUAL', "!=", "<>" )
+_add_tok('LESSEQUAL', "<=" )
+_add_tok('GREATEREQUAL', ">=" )
+_add_tok('TILDE', "~" )
+_add_tok('CIRCUMFLEX', "^" )
+_add_tok('LEFTSHIFT', "<<" )
+_add_tok('RIGHTSHIFT', ">>" )
+_add_tok('DOUBLESTAR', "**" )
+_add_tok('PLUSEQUAL', "+=" )
+_add_tok('MINEQUAL', "-=" )
+_add_tok('STAREQUAL', "*=" )
+_add_tok('SLASHEQUAL', "/=" )
+_add_tok('PERCENTEQUAL', "%=" )
+_add_tok('AMPEREQUAL', "&=" )
+_add_tok('VBAREQUAL', "|=" )
+_add_tok('CIRCUMFLEXEQUAL', "^=" )
+_add_tok('LEFTSHIFTEQUAL', "<<=" )
+_add_tok('RIGHTSHIFTEQUAL', ">>=" )
+_add_tok('DOUBLESTAREQUAL', "**=" )
+_add_tok('DOUBLESLASH', "//" )
+_add_tok('DOUBLESLASHEQUAL',"//=" )
+_add_tok('AT', "@" )
+_add_tok('OP')
+_add_tok('ERRORTOKEN')
+
+# extra PyPy-specific tokens
+_add_tok("COMMENT")
+_add_tok("NL")
+
+del _add_tok
diff -r 28e2996df412 -r 051349b537b2 pyparser/pytokenize.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/pytokenize.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,375 @@
+# ______________________________________________________________________
+"""Module pytokenize
+
+THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED
+TO BE ANNOTABLE (Mainly made lists homogeneous)
+
+This is a modified version of Ka-Ping Yee's tokenize module found in the
+Python standard library.
+
+The primary modification is the removal of the tokenizer's dependence on the
+standard Python regular expression module, which is written in C.  The regular
+expressions have been replaced with hand built DFA's using the
+basil.util.automata module.
+
+$Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $
+"""
+# ______________________________________________________________________
+
+from pyparser import automata
+
+__all__ = [ "tokenize" ]
+
+# ______________________________________________________________________
+# Automatically generated DFA's
+
+accepts = [True, True, True, True, True, True, True, True,
+           True, True, False, True, True, True, True, False,
+           False, False, True, False, False, True, False,
+           False, True, False, True, False, True, False,
+           False, True, False, False, True, True, True,
+           False, False, True, False, False, False, True]
+states = [
+    # 0
+    {'\t': 0, '\n': 13, '\x0c': 0,
+     '\r': 14, ' ': 0, '!': 10, '"': 16,
+     '#': 18, '%': 12, '&': 12, "'": 15,
+     '(': 13, ')': 13, '*': 7, '+': 12,
+     ',': 13, '-': 12, '.': 6, '/': 11,
+     '0': 4, '1': 5, '2': 5, '3': 5,
+     '4': 5, '5': 5, '6': 5, '7': 5,
+     '8': 5, '9': 5, ':': 13, ';': 13,
+     '<': 9, '=': 12, '>': 8, '@': 13,
+     'A': 1, 'B': 2, 'C': 1, 'D': 1,
+     'E': 1, 'F': 1, 'G': 1, 'H': 1,
+     'I': 1, 'J': 1, 'K': 1, 'L': 1,
+     'M': 1, 'N': 1, 'O': 1, 'P': 1,
+     'Q': 1, 'R': 3, 'S': 1, 'T': 1,
+     'U': 2, 'V': 1, 'W': 1, 'X': 1,
+     'Y': 1, 'Z': 1, '[': 13, '\\': 17,
+     ']': 13, '^': 12, '_': 1, '`': 13,
+     'a': 1, 'b': 2, 'c': 1, 'd': 1,
+     'e': 1, 'f': 1, 'g': 1, 'h': 1,
+     'i': 1, 'j': 1, 'k': 1, 'l': 1,
+     'm': 1, 'n': 1, 'o': 1, 'p': 1,
+     'q': 1, 'r': 3, 's': 1, 't': 1,
+     'u': 2, 'v': 1, 'w': 1, 'x': 1,
+     'y': 1, 'z': 1, '{': 13, '|': 12,
+     '}': 13, '~': 13},
+    # 1
+    {'0': 1, '1': 1, '2': 1, '3': 1,
+     '4': 1, '5': 1, '6': 1, '7': 1,
+     '8': 1, '9': 1, 'A': 1, 'B': 1,
+     'C': 1, 'D': 1, 'E': 1, 'F': 1,
+     'G': 1, 'H': 1, 'I': 1, 'J': 1,
+     'K': 1, 'L': 1, 'M': 1, 'N': 1,
+     'O': 1, 'P': 1, 'Q': 1, 'R': 1,
+     'S': 1, 'T': 1, 'U': 1, 'V': 1,
+     'W': 1, 'X': 1, 'Y': 1, 'Z': 1,
+     '_': 1, 'a': 1, 'b': 1, 'c': 1,
+     'd': 1, 'e': 1, 'f': 1, 'g': 1,
+     'h': 1, 'i': 1, 'j': 1, 'k': 1,
+     'l': 1, 'm': 1, 'n': 1, 'o': 1,
+     'p': 1, 'q': 1, 'r': 1, 's': 1,
+     't': 1, 'u': 1, 'v': 1, 'w': 1,
+     'x': 1, 'y': 1, 'z': 1},
+    # 2
+    {'"': 16, "'": 15, '0': 1, '1': 1,
+     '2': 1, '3': 1, '4': 1, '5': 1,
+     '6': 1, '7': 1, '8': 1, '9': 1,
+     'A': 1, 'B': 1, 'C': 1, 'D': 1,
+     'E': 1, 'F': 1, 'G': 1, 'H': 1,
+     'I': 1, 'J': 1, 'K': 1, 'L': 1,
+     'M': 1, 'N': 1, 'O': 1, 'P': 1,
+     'Q': 1, 'R': 3, 'S': 1, 'T': 1,
+     'U': 1, 'V': 1, 'W': 1, 'X': 1,
+     'Y': 1, 'Z': 1, '_': 1, 'a': 1,
+     'b': 1, 'c': 1, 'd': 1, 'e': 1,
+     'f': 1, 'g': 1, 'h': 1, 'i': 1,
+     'j': 1, 'k': 1, 'l': 1, 'm': 1,
+     'n': 1, 'o': 1, 'p': 1, 'q': 1,
+     'r': 3, 's': 1, 't': 1, 'u': 1,
+     'v': 1, 'w': 1, 'x': 1, 'y': 1,
+     'z': 1},
+    # 3
+    {'"': 16, "'": 15, '0': 1, '1': 1,
+     '2': 1, '3': 1, '4': 1, '5': 1,
+     '6': 1, '7': 1, '8': 1, '9': 1,
+     'A': 1, 'B': 1, 'C': 1, 'D': 1,
+     'E': 1, 'F': 1, 'G': 1, 'H': 1,
+     'I': 1, 'J': 1, 'K': 1, 'L': 1,
+     'M': 1, 'N': 1, 'O': 1, 'P': 1,
+     'Q': 1, 'R': 1, 'S': 1, 'T': 1,
+     'U': 1, 'V': 1, 'W': 1, 'X': 1,
+     'Y': 1, 'Z': 1, '_': 1, 'a': 1,
+     'b': 1, 'c': 1, 'd': 1, 'e': 1,
+     'f': 1, 'g': 1, 'h': 1, 'i': 1,
+     'j': 1, 'k': 1, 'l': 1, 'm': 1,
+     'n': 1, 'o': 1, 'p': 1, 'q': 1,
+     'r': 1, 's': 1, 't': 1, 'u': 1,
+     'v': 1, 'w': 1, 'x': 1, 'y': 1,
+     'z': 1},
+    # 4
+    {'.': 24, '0': 21, '1': 21, '2': 21,
+     '3': 21, '4': 21, '5': 21, '6': 21,
+     '7': 21, '8': 23, '9': 23, 'B': 22,
+     'E': 25, 'J': 13, 'L': 13, 'O': 20,
+     'X': 19, 'b': 22, 'e': 25, 'j': 13,
+     'l': 13, 'o': 20, 'x': 19},
+    # 5
+    {'.': 24, '0': 5, '1': 5, '2': 5,
+     '3': 5, '4': 5, '5': 5, '6': 5,
+     '7': 5, '8': 5, '9': 5, 'E': 25,
+     'J': 13, 'L': 13, 'e': 25, 'j': 13,
+     'l': 13},
+    # 6
+    {'0': 26, '1': 26, '2': 26, '3': 26,
+     '4': 26, '5': 26, '6': 26, '7': 26,
+     '8': 26, '9': 26},
+    # 7
+    {'*': 12, '=': 13},
+    # 8
+    {'=': 13, '>': 12},
+    # 9
+    {'<': 12, '=': 13, '>': 13},
+    # 10
+    {'=': 13},
+    # 11
+    {'/': 12, '=': 13},
+    # 12
+    {'=': 13},
+    # 13
+    {},
+    # 14
+    {'\n': 13},
+    # 15
+    {automata.DEFAULT: 30, '\n': 27,
+     '\r': 27, "'": 28, '\\': 29},
+    # 16
+    {automata.DEFAULT: 33, '\n': 27,
+     '\r': 27, '"': 31, '\\': 32},
+    # 17
+    {'\n': 13, '\r': 14},
+    # 18
+    {automata.DEFAULT: 18, '\n': 27, '\r': 27},
+    # 19
+    {'0': 34, '1': 34, '2': 34, '3': 34,
+     '4': 34, '5': 34, '6': 34, '7': 34,
+     '8': 34, '9': 34, 'A': 34, 'B': 34,
+     'C': 34, 'D': 34, 'E': 34, 'F': 34,
+     'a': 34, 'b': 34, 'c': 34, 'd': 34,
+     'e': 34, 'f': 34},
+    # 20
+    {'0': 35, '1': 35, '2': 35, '3': 35,
+     '4': 35, '5': 35, '6': 35, '7': 35},
+    # 21
+    {'.': 24, '0': 21, '1': 21, '2': 21,
+     '3': 21, '4': 21, '5': 21, '6': 21,
+     '7': 21, '8': 23, '9': 23, 'E': 25,
+     'J': 13, 'L': 13, 'e': 25, 'j': 13,
+     'l': 13},
+    # 22
+    {'0': 36, '1': 36},
+    # 23
+    {'.': 24, '0': 23, '1': 23, '2': 23,
+     '3': 23, '4': 23, '5': 23, '6': 23,
+     '7': 23, '8': 23, '9': 23, 'E': 25,
+     'J': 13, 'e': 25, 'j': 13},
+    # 24
+    {'0': 24, '1': 24, '2': 24, '3': 24,
+     '4': 24, '5': 24, '6': 24, '7': 24,
+     '8': 24, '9': 24, 'E': 37, 'J': 13,
+     'e': 37, 'j': 13},
+    # 25
+    {'+': 38, '-': 38, '0': 39, '1': 39,
+     '2': 39, '3': 39, '4': 39, '5': 39,
+     '6': 39, '7': 39, '8': 39, '9': 39},
+    # 26
+    {'0': 26, '1': 26, '2': 26, '3': 26,
+     '4': 26, '5': 26, '6': 26, '7': 26,
+     '8': 26, '9': 26, 'E': 37, 'J': 13,
+     'e': 37, 'j': 13},
+    # 27
+    {},
+    # 28
+    {"'": 13},
+    # 29
+    {automata.DEFAULT: 40, '\n': 13, '\r': 14},
+    # 30
+    {automata.DEFAULT: 30, '\n': 27,
+     '\r': 27, "'": 13, '\\': 29},
+    # 31
+    {'"': 13},
+    # 32
+    {automata.DEFAULT: 41, '\n': 13, '\r': 14},
+    # 33
+    {automata.DEFAULT: 33, '\n': 27,
+     '\r': 27, '"': 13, '\\': 32},
+    # 34
+    {'0': 34, '1': 34, '2': 34, '3': 34,
+     '4': 34, '5': 34, '6': 34, '7': 34,
+     '8': 34, '9': 34, 'A': 34, 'B': 34,
+     'C': 34, 'D': 34, 'E': 34, 'F': 34,
+     'L': 13, 'a': 34, 'b': 34, 'c': 34,
+     'd': 34, 'e': 34, 'f': 34, 'l': 13},
+    # 35
+    {'0': 35, '1': 35, '2': 35, '3': 35,
+     '4': 35, '5': 35, '6': 35, '7': 35,
+     'L': 13, 'l': 13},
+    # 36
+    {'0': 36, '1': 36, 'L': 13, 'l': 13},
+    # 37
+    {'+': 42, '-': 42, '0': 43, '1': 43,
+     '2': 43, '3': 43, '4': 43, '5': 43,
+     '6': 43, '7': 43, '8': 43, '9': 43},
+    # 38
+    {'0': 39, '1': 39, '2': 39, '3': 39,
+     '4': 39, '5': 39, '6': 39, '7': 39,
+     '8': 39, '9': 39},
+    # 39
+    {'0': 39, '1': 39, '2': 39, '3': 39,
+     '4': 39, '5': 39, '6': 39, '7': 39,
+     '8': 39, '9': 39, 'J': 13, 'j': 13},
+    # 40
+    {automata.DEFAULT: 40, '\n': 27,
+     '\r': 27, "'": 13, '\\': 29},
+    # 41
+    {automata.DEFAULT: 41, '\n': 27,
+     '\r': 27, '"': 13, '\\': 32},
+    # 42
+    {'0': 43, '1': 43, '2': 43, '3': 43,
+     '4': 43, '5': 43, '6': 43, '7': 43,
+     '8': 43, '9': 43},
+    # 43
+    {'0': 43, '1': 43, '2': 43, '3': 43,
+     '4': 43, '5': 43, '6': 43, '7': 43,
+     '8': 43, '9': 43, 'J': 13, 'j': 13},
+    ]
+pseudoDFA = automata.DFA(states, accepts)
+
+accepts = [False, False, False, False, False, True]
+states = [
+    # 0
+    {automata.DEFAULT: 0, '"': 1, '\\': 2},
+    # 1
+    {automata.DEFAULT: 4, '"': 3, '\\': 2},
+    # 2
+    {automata.DEFAULT: 4},
+    # 3
+    {automata.DEFAULT: 4, '"': 5, '\\': 2},
+    # 4
+    {automata.DEFAULT: 4, '"': 1, '\\': 2},
+    # 5
+    {automata.DEFAULT: 4, '"': 5, '\\': 2},
+    ]
+double3DFA = automata.NonGreedyDFA(states, accepts)
+
+accepts = [False, False, False, False, False, True]
+states = [
+    # 0
+    {automata.DEFAULT: 0, "'": 1, '\\': 2},
+    # 1
+    {automata.DEFAULT: 4, "'": 3, '\\': 2},
+    # 2
+    {automata.DEFAULT: 4},
+    # 3
+    {automata.DEFAULT: 4, "'": 5, '\\': 2},
+    # 4
+    {automata.DEFAULT: 4, "'": 1, '\\': 2},
+    # 5
+    {automata.DEFAULT: 4, "'": 5, '\\': 2},
+    ]
+single3DFA = automata.NonGreedyDFA(states, accepts)
+
+accepts = [False, True, False, False]
+states = [
+    # 0
+    {automata.DEFAULT: 0, "'": 1, '\\': 2},
+    # 1
+    {},
+    # 2
+    {automata.DEFAULT: 3},
+    # 3
+    {automata.DEFAULT: 3, "'": 1, '\\': 2},
+    ]
+singleDFA = automata.DFA(states, accepts)
+
+accepts = [False, True, False, False]
+states = [
+    # 0
+    {automata.DEFAULT: 0, '"': 1, '\\': 2},
+    # 1
+    {},
+    # 2
+    {automata.DEFAULT: 3},
+    # 3
+    {automata.DEFAULT: 3, '"': 1, '\\': 2},
+    ]
+doubleDFA = automata.DFA(states, accepts)
+
+#_______________________________________________________________________
+# End of automatically generated DFA's
+
+endDFAs = {"'" : singleDFA,
+           '"' : doubleDFA,
+           'r' : None,
+           'R' : None,
+           'u' : None,
+           'U' : None,
+           'b' : None,
+           'B' : None}
+
+for uniPrefix in ("", "u", "U", "b", "B"):
+    for rawPrefix in ("", "r", "R"):
+        prefix = uniPrefix + rawPrefix
+        endDFAs[prefix + "'''"] = single3DFA
+        endDFAs[prefix + '"""'] = double3DFA
+
+whiteSpaceStatesAccepts = [True]
+whiteSpaceStates = [{'\t': 0, ' ': 0, '\x0c': 0}]
+whiteSpaceDFA = automata.DFA(whiteSpaceStates, whiteSpaceStatesAccepts)
+
+# ______________________________________________________________________
+# COPIED:
+
+triple_quoted = {}
+for t in ("'''", '"""',
+          "r'''", 'r"""', "R'''", 'R"""',
+          "u'''", 'u"""', "U'''", 'U"""',
+          "b'''", 'b"""', "B'''", 'B"""',
+          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
+          "uR'''", 'uR"""', "UR'''", 'UR"""',
+          "br'''", 'br"""', "Br'''", 'Br"""',
+          "bR'''", 'bR"""', "BR'''", 'BR"""'):
+    triple_quoted[t] = t
+single_quoted = {}
+for t in ("'", '"',
+          "r'", 'r"', "R'", 'R"',
+          "u'", 'u"', "U'", 'U"',
+          "b'", 'b"', "B'", 'B"',
+          "ur'", 'ur"', "Ur'", 'Ur"',
+          "uR'", 'uR"', "UR'", 'UR"',
+          "br'", 'br"', "Br'", 'Br"',
+          "bR'", 'bR"', "BR'", 'BR"'):
+    single_quoted[t] = t
+
+tabsize = 8
+
+# PYPY MODIFICATION: removed TokenError class as it's not needed here
+
+# PYPY MODIFICATION: removed StopTokenizing class as it's not needed here
+
+# PYPY MODIFICATION: removed printtoken() as it's not needed here
+
+# PYPY MODIFICATION: removed tokenize() as it's not needed here
+
+# PYPY MODIFICATION: removed tokenize_loop() as it's not needed here
+
+# PYPY MODIFICATION: removed generate_tokens() as it was copied / modified
+#                    in pythonlexer.py
+
+# PYPY MODIFICATION: removed main() as it's not needed here
+
+# ______________________________________________________________________
+# End of pytokenize.py
+
diff -r 28e2996df412 -r 051349b537b2 pyparser/pytokenizer.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/pytokenizer.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,273 @@
+from pyparser import automata
+from pyparser.pygram import tokens
+from pyparser.pytoken import python_opmap
+from pyparser.error import TokenError, TokenIndentationError
+from pyparser.pytokenize import tabsize, whiteSpaceDFA, \
+    triple_quoted, endDFAs, single_quoted, pseudoDFA
+from pyparser import consts
+
+NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
+NUMCHARS = '0123456789'
+ALNUMCHARS = NAMECHARS + NUMCHARS
+EXTENDED_ALNUMCHARS = ALNUMCHARS + '-.'
+WHITESPACES = ' \t\n\r\v\f'
+
+def match_encoding_declaration(comment):
+    """returns the declared encoding or None
+
+    This function is a replacement for :
+    >>> py_encoding = re.compile(r"coding[:=]\s*([-\w.]+)")
+    >>> py_encoding.search(comment)
+    """
+    index = comment.find('coding')
+    if index < 0:
+        return None
+    next_char = comment[index + 6]
+    if next_char not in ':=':
+        return None
+    end_of_decl = comment[index + 7:]
+    index = 0
+    for char in end_of_decl:
+        if char not in WHITESPACES:
+            break
+        index += 1
+    else:
+        return None
+    encoding = ''
+    for char in end_of_decl[index:]:
+        if char in EXTENDED_ALNUMCHARS:
+            encoding += char
+        else:
+            break
+    if encoding != '':
+        return encoding
+    return None
+
+
+DUMMY_DFA = automata.DFA([], [])
+
+def generate_tokens(lines, flags):
+    """
+    This is a rewrite of pypy.module.parser.pytokenize.generate_tokens since
+    the original function is not RPYTHON (uses yield)
+    It was also slightly modified to generate Token instances instead
+    of the original 5-tuples -- it's now a 4-tuple of
+
+    * the Token instance
+    * the whole line as a string
+    * the line number (the real one, counting continuation lines)
+    * the position on the line of the end of the token.
+
+    Original docstring ::
+
+        The generate_tokens() generator requires one argment, readline, which
+        must be a callable object which provides the same interface as the
+        readline() method of built-in file objects. Each call to the function
+        should return one line of input as a string.
+
+        The generator produces 5-tuples with these members: the token type; the
+        token string; a 2-tuple (srow, scol) of ints specifying the row and
+        column where the token begins in the source; a 2-tuple (erow, ecol) of
+        ints specifying the row and column where the token ends in the source;
+        and the line on which the token was found. The line passed is the
+        logical line; continuation lines are included.
+    """
+    token_list = []
+    lnum = parenlev = continued = 0
+    namechars = NAMECHARS
+    numchars = NUMCHARS
+    contstr, needcont = '', 0
+    contline = None
+    indents = [0]
+    last_comment = ''
+    parenlevstart = (0, 0, "")
+
+    # make the annotator happy
+    endDFA = DUMMY_DFA
+    # make the annotator happy
+    line = ''
+    pos = 0
+    lines.append("")
+    strstart = (0, 0, "")
+    for line in lines:
+        lnum = lnum + 1
+        line = universal_newline(line)
+        pos, max = 0, len(line)
+
+        if contstr:
+            if not line:
+                raise TokenError(
+                    "EOF while scanning triple-quoted string literal",
+                    strstart[2], strstart[0], strstart[1]+1,
+                    token_list, lnum-1)
+            endmatch = endDFA.recognize(line)
+            if endmatch >= 0:
+                pos = end = endmatch
+                tok = (tokens.STRING, contstr + line[:end], strstart[0],
+                       strstart[1], line)
+                token_list.append(tok)
+                last_comment = ''
+                contstr, needcont = '', 0
+                contline = None
+            elif (needcont and not line.endswith('\\\n') and
+                               not line.endswith('\\\r\n')):
+                tok = (tokens.ERRORTOKEN, contstr + line, strstart[0],
+                       strstart[1], line)
+                token_list.append(tok)
+                last_comment = ''
+                contstr = ''
+                contline = None
+                continue
+            else:
+                contstr = contstr + line
+                contline = contline + line
+                continue
+
+        elif parenlev == 0 and not continued:  # new statement
+            if not line: break
+            column = 0
+            while pos < max:                   # measure leading whitespace
+                if line[pos] == ' ': column = column + 1
+                elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
+                elif line[pos] == '\f': column = 0
+                else: break
+                pos = pos + 1
+            if pos == max: break
+
+            if line[pos] in '#\r\n':
+                # skip comments or blank lines
+                continue
+
+            if column > indents[-1]:           # count indents or dedents
+                indents.append(column)
+                token_list.append((tokens.INDENT, line[:pos], lnum, 0, line))
+                last_comment = ''
+            while column < indents[-1]:
+                indents = indents[:-1]
+                token_list.append((tokens.DEDENT, '', lnum, pos, line))
+                last_comment = ''
+            if column != indents[-1]:
+                err = "unindent does not match any outer indentation level"
+                raise TokenIndentationError(err, line, lnum, 0, token_list)
+
+        else:                                  # continued statement
+            if not line:
+                if parenlev > 0:
+                    lnum1, start1, line1 = parenlevstart
+                    raise TokenError("parenthesis is never closed", line1,
+                                     lnum1, start1 + 1, token_list, lnum)
+                raise TokenError("EOF in multi-line statement", line,
+                                 lnum, 0, token_list)
+            continued = 0
+
+        while pos < max:
+            pseudomatch = pseudoDFA.recognize(line, pos)
+            if pseudomatch >= 0:                            # scan for tokens
+                # JDR: Modified
+                start = whiteSpaceDFA.recognize(line, pos)
+                if start < 0:
+                    start = pos
+                end = pseudomatch
+
+                if start == end:
+                    raise TokenError("Unknown character", line,
+                                     lnum, start + 1, token_list)
+
+                pos = end
+                token, initial = line[start:end], line[start]
+                if initial in numchars or \
+                   (initial == '.' and token != '.'):      # ordinary number
+                    token_list.append((tokens.NUMBER, token, lnum, start, line))
+                    last_comment = ''
+                elif initial in '\r\n':
+                    if parenlev <= 0:
+                        tok = (tokens.NEWLINE, last_comment, lnum, start, line)
+                        token_list.append(tok)
+                    last_comment = ''
+                elif initial == '#':
+                    # skip comment
+                    last_comment = token
+                elif token in triple_quoted:
+                    endDFA = endDFAs[token]
+                    endmatch = endDFA.recognize(line, pos)
+                    if endmatch >= 0:                     # all on one line
+                        pos = endmatch
+                        token = line[start:pos]
+                        tok = (tokens.STRING, token, lnum, start, line)
+                        token_list.append(tok)
+                        last_comment = ''
+                    else:
+                        strstart = (lnum, start, line)
+                        contstr = line[start:]
+                        contline = line
+                        break
+                elif initial in single_quoted or \
+                    token[:2] in single_quoted or \
+                    token[:3] in single_quoted:
+                    if token[-1] == '\n':                  # continued string
+                        strstart = (lnum, start, line)
+                        endDFA = (endDFAs[initial] or endDFAs[token[1]] or
+                                   endDFAs[token[2]])
+                        contstr, needcont = line[start:], 1
+                        contline = line
+                        break
+                    else:                                  # ordinary string
+                        tok = (tokens.STRING, token, lnum, start, line)
+                        token_list.append(tok)
+                        last_comment = ''
+                elif initial in namechars:                 # ordinary name
+                    token_list.append((tokens.NAME, token, lnum, start, line))
+                    last_comment = ''
+                elif initial == '\\':                      # continued stmt
+                    continued = 1
+                else:
+                    if initial in '([{':
+                        if parenlev == 0:
+                            parenlevstart = (lnum, start, line)
+                        parenlev = parenlev + 1
+                    elif initial in ')]}':
+                        parenlev = parenlev - 1
+                        if parenlev < 0:
+                            raise TokenError("unmatched '%s'" % initial, line,
+                                             lnum, start + 1, token_list)
+                    if token in python_opmap:
+                        punct = python_opmap[token]
+                    else:
+                        punct = tokens.OP
+                    token_list.append((punct, token, lnum, start, line))
+                    last_comment = ''
+            else:
+                start = whiteSpaceDFA.recognize(line, pos)
+                if start < 0:
+                    start = pos
+                if start<max and line[start] in single_quoted:
+                    raise TokenError("EOL while scanning string literal",
+                             line, lnum, start+1, token_list)
+                tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line)
+                token_list.append(tok)
+                last_comment = ''
+                pos = pos + 1
+
+    lnum -= 1
+    if not (flags & consts.PyCF_DONT_IMPLY_DEDENT):
+        if token_list and token_list[-1][0] != tokens.NEWLINE:
+            tok = (tokens.NEWLINE, '', lnum, 0, '\n')
+            token_list.append(tok)
+        for indent in indents[1:]:                # pop remaining indent levels
+            token_list.append((tokens.DEDENT, '', lnum, pos, line))
+    tok = (tokens.NEWLINE, '', lnum, 0, '\n')
+    token_list.append(tok)
+
+    token_list.append((tokens.ENDMARKER, '', lnum, pos, line))
+    return token_list
+
+
+def universal_newline(line):
+    # show annotator that indexes below are non-negative
+    line_len_m2 = len(line) - 2
+    if line_len_m2 >= 0 and line[-2] == '\r' and line[-1] == '\n':
+        return line[:line_len_m2] + '\n'
+    line_len_m1 = len(line) - 1
+    if line_len_m1 >= 0 and line[-1] == '\r':
+        return line[:line_len_m1] + '\n'
+    return line
diff -r 28e2996df412 -r 051349b537b2 pyparser/test/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/test/__init__.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,1 @@
+
diff -r 28e2996df412 -r 051349b537b2 pyparser/test/expressions.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/test/expressions.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,510 @@
+"""
+list of tested expressions / suites (used by test_parser and test_astbuilder)
+"""
+
+constants = [
+    "0",
+    "7",
+    "-3",
+    "053",
+    "0x18",
+    "14L",
+    "1.0",
+    "3.9",
+    "-3.6",
+    "1.8e19",
+    "90000000000000",
+    "90000000000000.",
+    "3j"
+    ]
+
+expressions = [
+    "x = a + 1",
+    "x = 1 - a",
+    "x = a * b",
+    "x = a ** 2",
+    "x = a / b",
+    "x = a & b",
+    "x = a | b",
+    "x = a ^ b",
+    "x = a // b",
+    "x = a * b + 1",
+    "x = a + 1 * b",
+    "x = a * b / c",
+    "x = a * (1 + c)",
+    "x, y, z = 1, 2, 3",
+    "x = 'a' 'b' 'c'",
+    "del foo",
+    "del foo[bar]",
+    "del foo.bar",
+    "l[0]",
+    "k[v,]",
+    "m[a,b]",
+    "a.b.c[d]",
+    "file('some.txt').read()",
+    "a[0].read()",
+    "a[1:1].read()",
+    "f('foo')('bar')('spam')",
+    "f('foo')('bar')('spam').read()[0]",
+    "a.b[0][0]",
+    "a.b[0][:]",
+    "a.b[0][::]",
+    "a.b[0][0].pop()[0].push('bar')('baz').spam",
+    "a.b[0].read()[1][2].foo().spam()[0].bar",
+    "a**2",
+    "a**2**2",
+    "a.b[0]**2",
+    "a.b[0].read()[1][2].foo().spam()[0].bar ** 2",
+    "l[start:end] = l2",
+    "l[::] = l2",
+    "a = `s`",
+    "a = `1 + 2 + f(3, 4)`",
+    "[a, b] = c",
+    "(a, b) = c",
+    "[a, (b,c), d] = e",
+    "a, (b, c), d = e",
+    ]
+
+# We do not export the following tests because we would have to implement 2.5
+# features in the stable compiler (other than just building the AST).
+expressions_inbetweenversions = expressions + [
+    "1 if True else 2",
+    "1 if False else 2",
+    ]
+
+funccalls = [
+    "l = func()",
+    "l = func(10)",
+    "l = func(10, 12, a, b=c, *args)",
+    "l = func(10, 12, a, b=c, **kwargs)",
+    "l = func(10, 12, a, b=c, *args, **kwargs)",
+    "l = func(10, 12, a, b=c)",
+    "e = l.pop(3)",
+    "e = k.l.pop(3)",
+    "simplefilter('ignore', category=PendingDeprecationWarning, append=1)",
+    """methodmap = dict(subdirs=phase4,
+                        same_files=phase3, diff_files=phase3, funny_files=phase3,
+                        common_dirs = phase2, common_files=phase2, common_funny=phase2,
+                        common=phase1, left_only=phase1, right_only=phase1,
+                        left_list=phase0, right_list=phase0)""",
+    "odata = b2a_qp(data, quotetabs = quotetabs, header = header)",
+    ]
+
+listmakers = [
+    "l = []",
+    "l = [1, 2, 3]",
+    "l = [i for i in range(10)]",
+    "l = [i for i in range(10) if i%2 == 0]",
+    "l = [i for i in range(10) if i%2 == 0 or i%2 == 1]", # <--
+    "l = [i for i in range(10) if i%2 == 0 and i%2 == 1]",
+    "l = [i for j in range(10) for i in range(j)]",
+    "l = [i for j in range(10) for i in range(j) if j%2 == 0]",
+    "l = [i for j in range(10) for i in range(j) if j%2 == 0 and i%2 == 0]",
+    "l = [(a, b) for (a,b,c) in l2]",
+    "l = [{a:b} for (a,b,c) in l2]",
+    "l = [i for j in k if j%2 == 0 if j*2 < 20 for i in j if i%2==0]",
+    ]
+
+genexps = [
+    "l = (i for i in j)",
+    "l = (i for i in j if i%2 == 0)",
+    "l = (i for j in k for i in j)",
+    "l = (i for j in k for i in j if j%2==0)",
+    "l = (i for j in k if j%2 == 0 if j*2 < 20 for i in j if i%2==0)",
+    "l = (i for i in [ j*2 for j in range(10) ] )",
+    "l = [i for i in ( j*2 for j in range(10) ) ]",
+    "l = (i for i in [ j*2 for j in ( k*3 for k in range(10) ) ] )",
+    "l = [i for j in ( j*2 for j in [ k*3 for k in range(10) ] ) ]",
+    "l = f(i for i in j)",
+    ]
+
+
+dictmakers = [
+    "l = {a : b, 'c' : 0}",
+    "l = {}",
+    ]
+
+backtrackings = [
+    "f = lambda x: x+1",
+    "f = lambda x,y: x+y",
+    "f = lambda x,y=1,z=t: x+y",
+    "f = lambda x,y=1,z=t,*args,**kwargs: x+y",
+    "f = lambda x,y=1,z=t,*args: x+y",
+    "f = lambda x,y=1,z=t,**kwargs: x+y",
+    "f = lambda: 1",
+    "f = lambda *args: 1",
+    "f = lambda **kwargs: 1",
+    ]
+
+comparisons = [
+    "a < b",
+    "a > b",
+    "a not in b",
+    "a is not b",
+    "a in b",
+    "a is b",
+    "3 < x < 5",
+    "(3 < x) < 5",
+    "a < b < c < d",
+    "(a < b) < (c < d)",
+    "a < (b < c) < d",
+    ]
+
+multiexpr = [
+    'a = b; c = d;',
+    'a = b = c = d',
+    ]
+
+attraccess = [
+    'a.b = 2',
+    'x = a.b',
+    ]
+
+slices = [
+    "l[:]",
+    "l[::]",
+    "l[1:2]",
+    "l[1:]",
+    "l[:2]",
+    "l[1::]",
+    "l[:1:]",
+    "l[::1]",
+    "l[1:2:]",
+    "l[:1:2]",
+    "l[1::2]",
+    "l[0:1:2]",
+    "a.b.l[:]",
+    "a.b.l[1:2]",
+    "a.b.l[1:]",
+    "a.b.l[:2]",
+    "a.b.l[0:1:2]",
+    "a[1:2:3, 100]",
+    "a[:2:3, 100]",
+    "a[1::3, 100,]",
+    "a[1:2:, 100]",
+    "a[1:2, 100]",
+    "a[1:, 100,]",
+    "a[:2, 100]",
+    "a[:, 100]",
+    "a[100, 1:2:3,]",
+    "a[100, :2:3]",
+    "a[100, 1::3]",
+    "a[100, 1:2:,]",
+    "a[100, 1:2]",
+    "a[100, 1:]",
+    "a[100, :2,]",
+    "a[100, :]",
+    ]
+
+imports = [
+    'import os',
+    'import sys, os',
+    'import os.path',
+    'import os.path, sys',
+    'import sys, os.path as osp',
+    'import os.path as osp',
+    'import os.path as osp, sys as _sys',
+    'import a.b.c.d',
+    'import a.b.c.d as abcd',
+    'from os import path',
+    'from os import path, system',
+    ]
+
+imports_newstyle = [
+    'from os import path, system',
+    'from os import path as P, system as S',
+    'from os import (path as P, system as S,)',
+    'from os import *',
+    ]
+
+if_stmts = [
+    "if a == 1: a+= 2",
+    """if a == 1:
+    a += 2
+elif a == 2:
+    a += 3
+else:
+    a += 4
+""",
+    "if a and not b == c: pass",
+    "if a and not not not b == c: pass",
+    "if 0: print 'foo'"
+    ]
+
+asserts = [
+    'assert False',
+    'assert a == 1',
+    'assert a == 1 and b == 2',
+    'assert a == 1 and b == 2, "assertion failed"',
+    ]
+
+execs = [
+    'exec a',
+    'exec "a=b+3"',
+    'exec a in f()',
+    'exec a in f(), g()',
+    ]
+
+prints = [
+    'print',
+    'print a',
+    'print a,',
+    'print a, b',
+    'print a, "b", c',
+    'print >> err',
+    'print >> err, "error"',
+    'print >> err, "error",',
+    'print >> err, "error", a',
+    ]
+
+globs = [
+    'global a',
+    'global a,b,c',
+    ]
+
+raises_ = [      # NB. 'raises' creates a name conflict with py.test magic
+    'raise',
+    'raise ValueError',
+    'raise ValueError("error")',
+    'raise ValueError, "error"',
+    'raise ValueError, "error", foo',
+    ]
+
+tryexcepts = [
+    """try:
+    a
+    b
+except:
+    pass
+""",
+    """try:
+    a
+    b
+except NameError:
+    pass
+""",
+    """try:
+    a
+    b
+except NameError, err:
+    pass
+""",
+    """try:
+    a
+    b
+except (NameError, ValueError):
+    pass
+""",
+    """try:
+    a
+    b
+except (NameError, ValueError), err:
+    pass
+""",
+    """try:
+    a
+except NameError, err:
+    pass
+except ValueError, err:
+    pass
+""",
+    """def f():
+    try:
+        a
+    except NameError, err:
+        a = 1
+        b = 2
+    except ValueError, err:
+        a = 2
+        return a
+"""
+    """try:
+    a
+except NameError, err:
+    a = 1
+except ValueError, err:
+    a = 2
+else:
+    a += 3
+""",
+    """try:
+    a
+finally:
+    b
+""",
+    """def f():
+    try:
+        return a
+    finally:
+        a = 3
+        return 1
+""",
+
+    ]
+    
+one_stmt_funcdefs = [
+    "def f(): return 1",
+    "def f(x): return x+1",
+    "def f(x,y): return x+y",
+    "def f(x,y=1,z=t): return x+y",
+    "def f(x,y=1,z=t,*args,**kwargs): return x+y",
+    "def f(x,y=1,z=t,*args): return x+y",
+    "def f(x,y=1,z=t,**kwargs): return x+y",
+    "def f(*args): return 1",
+    "def f(**kwargs): return 1",
+    "def f(t=()): pass",
+    "def f(a, b, (c, d), e): pass",
+    "def f(a, b, (c, (d, e), f, (g, h))): pass",
+    "def f(a, b, (c, (d, e), f, (g, h)), i): pass",
+    "def f((a)): pass",
+    ]
+
+one_stmt_classdefs = [
+    "class Pdb(bdb.Bdb, cmd.Cmd): pass",
+    "class A: pass",
+    ]
+
+docstrings = [
+    '''def foo(): return 1''',
+    '''class Foo: pass''',
+    '''class Foo: "foo"''',
+    '''def foo():
+    """foo docstring"""
+    return 1
+''',
+    '''def foo():
+    """foo docstring"""
+    a = 1
+    """bar"""
+    return a
+''',
+    '''def foo():
+    """doc"""; print 1
+    a=1
+''',
+    '''"""Docstring""";print 1''',
+    ]
+
+returns = [
+    'def f(): return',
+    'def f(): return 1',
+    'def f(): return a.b',
+    'def f(): return a',
+    'def f(): return a,b,c,d',
+    #'return (a,b,c,d)',      --- this one makes no sense, as far as I can tell
+    ]
+
+augassigns = [
+    'a=1;a+=2',
+    'a=1;a-=2',
+    'a=1;a*=2',
+    'a=1;a/=2',
+    'a=1;a//=2',
+    'a=1;a%=2',
+    'a=1;a**=2',
+    'a=1;a>>=2',
+    'a=1;a<<=2',
+    'a=1;a&=2',
+    'a=1;a^=2',
+    'a=1;a|=2',
+    
+    'a=A();a.x+=2',
+    'a=A();a.x-=2',
+    'a=A();a.x*=2',
+    'a=A();a.x/=2',
+    'a=A();a.x//=2',
+    'a=A();a.x%=2',
+    'a=A();a.x**=2',
+    'a=A();a.x>>=2',
+    'a=A();a.x<<=2',
+    'a=A();a.x&=2',
+    'a=A();a.x^=2',
+    'a=A();a.x|=2',
+
+    'a=A();a[0]+=2',
+    'a=A();a[0]-=2',
+    'a=A();a[0]*=2',
+    'a=A();a[0]/=2',
+    'a=A();a[0]//=2',
+    'a=A();a[0]%=2',
+    'a=A();a[0]**=2',
+    'a=A();a[0]>>=2',
+    'a=A();a[0]<<=2',
+    'a=A();a[0]&=2',
+    'a=A();a[0]^=2',
+    'a=A();a[0]|=2',
+
+    'a=A();a[0:2]+=2',
+    'a=A();a[0:2]-=2',
+    'a=A();a[0:2]*=2',
+    'a=A();a[0:2]/=2',
+    'a=A();a[0:2]//=2',
+    'a=A();a[0:2]%=2',
+    'a=A();a[0:2]**=2',
+    'a=A();a[0:2]>>=2',
+    'a=A();a[0:2]<<=2',
+    'a=A();a[0:2]&=2',
+    'a=A();a[0:2]^=2',
+    'a=A();a[0:2]|=2',
+    ]
+
+PY23_TESTS = [
+    constants,
+    expressions,
+    augassigns,
+    comparisons,
+    funccalls,
+    backtrackings,
+    listmakers, # ERRORS
+    dictmakers,
+    multiexpr,
+    attraccess,
+    slices,
+    imports,
+    execs,
+    prints,
+    globs,
+    raises_,
+
+    ]
+
+OPTIONAL_TESTS = [
+    # expressions_inbetweenversions, 
+    genexps,
+    imports_newstyle,
+    asserts,
+    ]
+
+TESTS = PY23_TESTS + OPTIONAL_TESTS
+
+
+## TESTS = [
+##     ["l = [i for i in range(10) if i%2 == 0 or i%2 == 1]"],
+##     ]
+
+CHANGES_25_INPUTS = [
+    ["class A(): pass"],
+    ["def f(): x = yield 3"]
+    ]
+
+EXEC_INPUTS = [
+    one_stmt_classdefs,
+    one_stmt_funcdefs,
+    if_stmts,
+    tryexcepts,
+    docstrings,
+    returns,
+    ]
+
+SINGLE_INPUTS = [
+   one_stmt_funcdefs,
+   ['\t # hello\n',
+    'print 6*7',
+    'if 1:  x\n',
+    'x = 5',
+    'x = 5 ',
+    '''"""Docstring""";print 1''',
+    '''"Docstring"''',
+    '''"Docstring" "\\x00"''',
+    ]
+]
diff -r 28e2996df412 -r 051349b537b2 pyparser/test/test_automata.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/test/test_automata.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,12 @@
+from pyparser.automata import DFA, DEFAULT
+
+def test_states():
+    d = DFA([{"\x00": 1}, {"\x01": 0}], [False, True])
+    assert d.states == "\x01\xff\xff\x00"
+    assert d.defaults == "\xff\xff"
+    assert d.max_char == 2
+
+    d = DFA([{"\x00": 1}, {DEFAULT: 0}], [False, True])
+    assert d.states == "\x01\x00"
+    assert d.defaults == "\xff\x00"
+    assert d.max_char == 1
diff -r 28e2996df412 -r 051349b537b2 pyparser/test/test_gendfa.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/test/test_gendfa.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,16 @@
+from pyparser.automata import DFA, DEFAULT
+from pyparser.genpytokenize import output
+
+def test_states():
+    states = [{"\x00": 1}, {"\x01": 0}]
+    d = DFA(states[:], [False, True])
+    assert output('test', DFA, d, states) == """\
+accepts = [False, True]
+states = [
+    # 0
+    {'\\x00': 1},
+    # 1
+    {'\\x01': 0},
+    ]
+test = automata.pyparser.automata.DFA(states, accepts)
+"""
diff -r 28e2996df412 -r 051349b537b2 pyparser/test/test_metaparser.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/test/test_metaparser.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,112 @@
+import py
+import os
+import glob
+import tokenize
+import token
+import StringIO
+from pyparser.metaparser import ParserGenerator, PgenError
+from pyparser.pygram import PythonGrammar
+from pyparser import parser
+
+
+class MyGrammar(parser.Grammar):
+    TOKENS = token.__dict__
+    OPERATOR_MAP = {
+        "+" : token.OP,
+        "-" : token.OP,
+        }
+    KEYWORD_TOKEN = token.NAME
+
+
+class TestParserGenerator:
+
+    def gram_for(self, grammar_source):
+        p = ParserGenerator(grammar_source + "\n")
+        return p.build_grammar(MyGrammar)
+
+    def test_multiple_rules(self):
+        g = self.gram_for("foo: NAME bar\nbar: STRING")
+        assert len(g.dfas) == 2
+        assert g.start == g.symbol_ids["foo"]
+
+    def test_simple(self):
+        g = self.gram_for("eval: NAME\n")
+        assert len(g.dfas) == 1
+        eval_sym = g.symbol_ids["eval"]
+        assert g.start == eval_sym
+        states, first = g.dfas[eval_sym - 256]
+        assert states == [([(1, 1)], False), ([], True)]
+        assert g.labels[0] == 0
+
+    def test_load_python_grammars(self):
+        gram_pat = os.path.join(os.path.dirname(__file__), "..", "data",
+                                "Grammar*")
+        for gram_file in glob.glob(gram_pat):
+            fp = open(gram_file, "r")
+            try:
+                ParserGenerator(fp.read()).build_grammar(PythonGrammar)
+            finally:
+                fp.close()
+
+    def test_items(self):
+        g = self.gram_for("foo: NAME STRING OP '+'")
+        assert len(g.dfas) == 1
+        states = g.dfas[g.symbol_ids["foo"] - 256][0]
+        last = states[0][0][0][1]
+        for state in states[1:-1]:
+            assert last < state[0][0][1]
+            last = state[0][0][1]
+
+    def test_alternatives(self):
+        g = self.gram_for("foo: STRING | OP")
+        assert len(g.dfas) == 1
+
+    def test_optional(self):
+        g = self.gram_for("foo: [NAME]")
+
+    def test_grouping(self):
+        g = self.gram_for("foo: (NAME | STRING) OP")
+
+    def test_keyword(self):
+        g = self.gram_for("foo: 'some_keyword' 'for'")
+        assert len(g.keyword_ids) == 2
+        assert len(g.token_ids) == 0
+
+    def test_token(self):
+        g = self.gram_for("foo: NAME")
+        assert len(g.token_ids) == 1
+
+    def test_operator(self):
+        g = self.gram_for("add: NUMBER '+' NUMBER")
+        assert len(g.keyword_ids) == 0
+        assert len(g.token_ids) == 2
+
+        exc = py.test.raises(PgenError, self.gram_for, "add: '/'").value
+        assert str(exc) == "no such operator: '/'"
+
+    def test_symbol(self):
+        g = self.gram_for("foo: some_other_rule\nsome_other_rule: NAME")
+        assert len(g.dfas) == 2
+        assert len(g.labels) == 3
+
+        exc = py.test.raises(PgenError, self.gram_for, "foo: no_rule").value
+        assert str(exc) == "no such rule: 'no_rule'"
+
+    def test_repeaters(self):
+        g1 = self.gram_for("foo: NAME+")
+        g2 = self.gram_for("foo: NAME*")
+        assert g1.dfas != g2.dfas
+
+        g = self.gram_for("foo: (NAME | STRING)*")
+        g = self.gram_for("foo: (NAME | STRING)+")
+
+    def test_error(self):
+        exc = py.test.raises(PgenError, self.gram_for, "hi").value
+        assert str(exc) == "expected token OP but got NEWLINE"
+        assert exc.location == ((1, 2), (1, 3), "hi\n")
+        exc = py.test.raises(PgenError, self.gram_for, "hi+").value
+        assert str(exc) == "expected ':' but got '+'"
+        assert exc.location == ((1, 2), (1, 3), "hi+\n")
+
+    def test_comments_and_whitespace(self):
+        self.gram_for("\n\n# comment\nrule: NAME # comment")
diff -r 28e2996df412 -r 051349b537b2 pyparser/test/test_parser.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/test/test_parser.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,293 @@
+# New parser tests.
+import py
+import tokenize
+import token
+import StringIO
+from pyparser import parser, metaparser, pygram
+from pyparser.test.test_metaparser import MyGrammar
+
+
+class SimpleParser(parser.Parser):
+
+    def parse(self, input):
+        self.prepare()
+        rl = StringIO.StringIO(input + "\n").readline
+        gen = tokenize.generate_tokens(rl)
+        for tp, value, begin, end, line in gen:
+            if self.add_token(tp, value, begin[0], begin[1], line):
+                py.test.raises(StopIteration, gen.next)
+        return self.root
+
+
+def tree_from_string(expected, gram):
+    def count_indent(s):
+        indent = 0
+        for char in s:
+            if char != " ":
+                break
+            indent += 1
+        return indent
+    last_newline_index = 0
+    for i, char in enumerate(expected):
+        if char == "\n":
+            last_newline_index = i
+        elif char != " ":
+            break
+    if last_newline_index:
+        expected = expected[last_newline_index + 1:]
+    base_indent = count_indent(expected)
+    assert not divmod(base_indent, 4)[1], "not using 4 space indentation"
+    lines = [line[base_indent:] for line in expected.splitlines()]
+    last_indent = 0
+    node_stack = []
+    for line in lines:
+        if not line.strip():
+            continue
+        data = line.split()
+        if data[0].isupper():
+            tp = getattr(token, data[0])
+            if len(data) == 2:
+                value = data[1].strip("\"")
+            elif tp == token.NEWLINE:
+                value = "\n"
+            else:
+                value = ""
+            n = parser.Terminal(tp, value, 0, 0)
+        else:
+            tp = gram.symbol_ids[data[0]]
+            children = []
+            n = parser.Nonterminal(tp, children)
+        new_indent = count_indent(line)
+        if new_indent >= last_indent:
+            if new_indent == last_indent and node_stack:
+                node_stack.pop()
+            if node_stack:
+                node_stack[-1].append_child(n)
+            node_stack.append(n)
+        else:
+            diff = last_indent - new_indent
+            pop_nodes = diff // 4 + 1
+            del node_stack[-pop_nodes:]
+            node_stack[-1].append_child(n)
+            node_stack.append(n)
+        last_indent = new_indent
+    return node_stack[0]
+
+
+class TestParser:
+
+    def parser_for(self, gram, add_endmarker=True):
+        if add_endmarker:
+            gram += " NEWLINE ENDMARKER\n"
+        pgen = metaparser.ParserGenerator(gram)
+        g = pgen.build_grammar(MyGrammar)
+        return SimpleParser(g), g
+
+    def test_multiple_rules(self):
+        gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER
+bar: NAME NUMBER\n"""
+        p, gram = self.parser_for(gram, False)
+        expected = """
+        foo
+            NAME "next_rule"
+            bar
+                NAME "a_name"
+                NUMBER "42"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        input = "next_rule a_name 42 end"
+        assert tree_from_string(expected, gram) == p.parse(input)
+
+    def test_recursive_rule(self):
+        gram = """foo: NAME bar STRING NEWLINE ENDMARKER
+bar: NAME [bar] NUMBER\n"""
+        p, gram = self.parser_for(gram, False)
+        expected = """
+        foo
+            NAME "hi"
+            bar
+                NAME "hello"
+                bar
+                    NAME "a_name"
+                    NUMBER "32"
+                NUMBER "42"
+            STRING "'string'"
+            NEWLINE
+            ENDMARKER"""
+        input = "hi hello a_name 32 42 'string'"
+        assert tree_from_string(expected, gram) == p.parse(input)
+
+    def test_symbol(self):
+        gram = """parent: first_child second_child NEWLINE ENDMARKER
+first_child: NAME age
+second_child: STRING
+age: NUMBER\n"""
+        p, gram = self.parser_for(gram, False)
+        expected = """
+        parent
+            first_child
+                NAME "harry"
+                age
+                     NUMBER "13"
+            second_child
+                STRING "'fred'"
+            NEWLINE
+            ENDMARKER"""
+        input = "harry 13 'fred'"
+        assert tree_from_string(expected, gram) == p.parse(input)
+
+    def test_token(self):
+        p, gram = self.parser_for("foo: NAME")
+        expected = """
+        foo
+           NAME "hi"
+           NEWLINE
+           ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi")
+        py.test.raises(parser.ParseError, p.parse, "567")
+        p, gram = self.parser_for("foo: NUMBER NAME STRING")
+        expected = """
+        foo
+           NUMBER "42"
+           NAME "hi"
+           STRING "'bar'"
+           NEWLINE
+           ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'")
+
+    def test_optional(self):
+        p, gram = self.parser_for("foo: [NAME] 'end'")
+        expected = """
+        foo
+            NAME "hi"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi end")
+        expected = """
+        foo
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("end")
+
+    def test_grouping(self):
+        p, gram = self.parser_for(
+            "foo: ((NUMBER NAME | STRING) | 'second_option')")
+        expected = """
+        foo
+            NUMBER "42"
+            NAME "hi"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("42 hi")
+        expected = """
+        foo
+            STRING "'hi'"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("'hi'")
+        expected = """
+        foo
+            NAME "second_option"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("second_option")
+        py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'")
+        py.test.raises(parser.ParseError, p.parse, "42 second_option")
+
+    def test_alternative(self):
+        p, gram = self.parser_for("foo: (NAME | NUMBER)")
+        expected = """
+        foo
+            NAME "hi"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi")
+        expected = """
+        foo
+            NUMBER "42"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("42")
+        py.test.raises(parser.ParseError, p.parse, "hi 23")
+        py.test.raises(parser.ParseError, p.parse, "23 hi")
+        py.test.raises(parser.ParseError, p.parse, "'some string'")
+
+    def test_keyword(self):
+        p, gram = self.parser_for("foo: 'key'")
+        expected = """
+        foo
+            NAME "key"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("key")
+        py.test.raises(parser.ParseError, p.parse, "")
+        p, gram = self.parser_for("foo: NAME 'key'")
+        expected = """
+        foo
+            NAME "some_name"
+            NAME "key"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("some_name key")
+        py.test.raises(parser.ParseError, p.parse, "some_name")
+
+    def test_repeaters(self):
+        p, gram = self.parser_for("foo: NAME+ 'end'")
+        expected = """
+        foo
+            NAME "hi"
+            NAME "bye"
+            NAME "nothing"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi bye nothing end")
+        py.test.raises(parser.ParseError, p.parse, "end")
+        py.test.raises(parser.ParseError, p.parse, "hi bye")
+        p, gram = self.parser_for("foo: NAME* 'end'")
+        expected = """
+        foo
+            NAME "hi"
+            NAME "bye"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi bye end")
+        py.test.raises(parser.ParseError, p.parse, "hi bye")
+        expected = """
+        foo
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("end")
+
+        p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'")
+        expected = """
+        foo
+            NAME "a_name"
+            NAME "name_two"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("a_name name_two end")
+        expected = """
+        foo
+            NUMBER "42"
+            NAME "name"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("42 name end")
+        py.test.raises(parser.ParseError, p.parse, "end")
+        p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'")
+        expected = """
+        foo
+            NAME "hi"
+            NUMBER 42
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi 42 end")
diff -r 28e2996df412 -r 051349b537b2 pyparser/test/test_pyparse.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/test/test_pyparse.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+import py
+from pyparser import pyparse
+from pyparser.pygram import syms, tokens
+from pyparser.error import SyntaxError, IndentationError
+from pyparser import consts
+
+
+class TestPythonParser:
+
+    def setup_class(self):
+        self.parser = pyparse.PythonParser()
+
+    def parse(self, source, mode="exec", info=None):
+        if info is None:
+            info = pyparse.CompileInfo("<test>", mode)
+        return self.parser.parse_source(source, info)
+
+    def test_with_and_as(self):
+        py.test.raises(SyntaxError, self.parse, "with = 23")
+        py.test.raises(SyntaxError, self.parse, "as = 2")
+
+    def test_dont_imply_dedent(self):
+        info = pyparse.CompileInfo("<test>", "single",
+                                   consts.PyCF_DONT_IMPLY_DEDENT)
+        self.parse('if 1:\n  x\n', info=info)
+        self.parse('x = 5 ', info=info)
+
+    def test_clear_state(self):
+        assert self.parser.root is None
+        tree = self.parse("name = 32")
+        assert self.parser.root is None
+
+    def test_encoding(self):
+        info = pyparse.CompileInfo("<test>", "exec")
+        tree = self.parse("""# coding: latin-1
+stuff = "nothing"
+""", info=info)
+        assert tree.type == syms.file_input
+        assert info.encoding == "iso-8859-1"
+        sentence = u"u'Die Männer ärgen sich!'"
+        input = (u"# coding: utf-7\nstuff = %s" % (sentence,)).encode("utf-7")
+        tree = self.parse(input, info=info)
+        assert info.encoding == "utf-7"
+        input = "# coding: iso-8859-15\nx"
+        self.parse(input, info=info)
+        assert info.encoding == "iso-8859-15"
+        input = "\xEF\xBB\xBF# coding: utf-8\nx"
+        self.parse(input, info=info)
+        assert info.encoding == "utf-8"
+        input = "# coding: utf-8\nx"
+        info.flags |= consts.PyCF_SOURCE_IS_UTF8
+        exc = py.test.raises(SyntaxError, self.parse, input, info=info).value
+        info.flags &= ~consts.PyCF_SOURCE_IS_UTF8
+        assert exc.msg == "coding declaration in unicode string"
+        input = "\xEF\xBB\xBF# coding: latin-1\nx"
+        exc = py.test.raises(SyntaxError, self.parse, input).value
+        assert exc.msg == "UTF-8 BOM with latin-1 coding cookie"
+        input = "# coding: not-here"
+        exc = py.test.raises(SyntaxError, self.parse, input).value
+        assert exc.msg == "Unknown encoding: not-here"
+        input = u"# coding: ascii\n\xe2".encode('utf-8')
+        exc = py.test.raises(SyntaxError, self.parse, input).value
+        assert exc.msg == ("'ascii' codec can't decode byte 0xc3 "
+                           "in position 16: ordinal not in range(128)")
+
+    def test_non_unicode_codec(self):
+        exc = py.test.raises(SyntaxError, self.parse, """\
+# coding: string-escape
+\x70\x72\x69\x6e\x74\x20\x32\x2b\x32\x0a
+""").value
+        assert exc.msg == "codec did not return a unicode object"
+
+    def test_syntax_error(self):
+        parse = self.parse
+        exc = py.test.raises(SyntaxError, parse, "name another for").value
+        assert exc.msg == "invalid syntax"
+        assert exc.lineno == 1
+        assert exc.offset == 5
+        assert exc.text.startswith("name another for")
+        exc = py.test.raises(SyntaxError, parse, "x = \"blah\n\n\n").value
+        assert exc.msg == "EOL while scanning string literal"
+        assert exc.lineno == 1
+        assert exc.offset == 5
+        exc = py.test.raises(SyntaxError, parse, "x = '''\n\n\n").value
+        assert exc.msg == "EOF while scanning triple-quoted string literal"
+        assert exc.lineno == 1
+        assert exc.offset == 5
+        assert exc.lastlineno == 3
+        for input in ("())", "(()", "((", "))"):
+            py.test.raises(SyntaxError, parse, input)
+        exc = py.test.raises(SyntaxError, parse, "x = (\n\n(),\n(),").value
+        assert exc.msg == "parenthesis is never closed"
+        assert exc.lineno == 1
+        assert exc.offset == 5
+        assert exc.lastlineno == 5
+        exc = py.test.raises(SyntaxError, parse, "abc)").value
+        assert exc.msg == "unmatched ')'"
+        assert exc.lineno == 1
+        assert exc.offset == 4
+
+    def test_is(self):
+        self.parse("x is y")
+        self.parse("x is not y")
+
+    def test_indentation_error(self):
+        parse = self.parse
+        input = """
+def f():
+pass"""
+        exc = py.test.raises(IndentationError, parse, input).value
+        assert exc.msg == "expected an indented block"
+        assert exc.lineno == 3
+        assert exc.text.startswith("pass")
+        assert exc.offset == 0
+        input = "hi\n    indented"
+        exc = py.test.raises(IndentationError, parse, input).value
+        assert exc.msg == "unexpected indent"
+        input = "def f():\n    pass\n  next_stmt"
+        exc = py.test.raises(IndentationError, parse, input).value
+        assert exc.msg == "unindent does not match any outer indentation level"
+        assert exc.lineno == 3
+
+    def test_mac_newline(self):
+        self.parse("this_is\ra_mac\rfile")
+
+    def test_mode(self):
+        assert self.parse("x = 43*54").type == syms.file_input
+        tree = self.parse("43**54", "eval")
+        assert tree.type == syms.eval_input
+        py.test.raises(SyntaxError, self.parse, "x = 54", "eval")
+        tree = self.parse("x = 43", "single")
+        assert tree.type == syms.single_input
+
+    def test_multiline_string(self):
+        self.parse("''' \n '''")
+        self.parse("r''' \n '''")
+
+    def test_bytes_literal(self):
+        self.parse('b" "')
+        self.parse('br" "')
+        self.parse('b""" """')
+        self.parse("b''' '''")
+        self.parse("br'\\\n'")
+
+        py.test.raises(SyntaxError, self.parse, "b'a\\n")
+
+    def test_new_octal_literal(self):
+        self.parse('0777')
+        self.parse('0o777')
+        self.parse('0o777L')
+        py.test.raises(SyntaxError, self.parse, "0o778")
+
+    def test_new_binary_literal(self):
+        self.parse('0b1101')
+        self.parse('0b0l')
+        py.test.raises(SyntaxError, self.parse, "0b112")
+
+    def test_universal_newlines(self):
+        fmt = 'stuff = """hello%sworld"""'
+        expected_tree = self.parse(fmt % '\n')
+        for linefeed in ["\r\n","\r"]:
+            tree = self.parse(fmt % linefeed)
+            assert expected_tree == tree
diff -r 28e2996df412 -r 051349b537b2 pyparser/test/unittest_samples.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyparser/test/unittest_samples.py	Sun Jan 08 20:20:39 2017 +0100
@@ -0,0 +1,95 @@
+"""test module for CPython / PyPy nested tuples comparison"""
+
+import os, os.path as osp
+import sys
+from pyparser.pythonutil import python_parse, pypy_parse
+from pprint import pprint
+from pyparser import grammar
+grammar.DEBUG = False
+from symbol import sym_name
+
+
+def name(elt):
+    return "%s[%s]"% (sym_name.get(elt,elt),elt)
+
+def read_samples_dir():
+    return [osp.join('samples', fname) for fname in os.listdir('samples') if fname.endswith('.py')]
+
+def print_sym_tuple(nested, level=0, limit=15, names=False, trace=()):
+    buf = []
+    if level <= limit:
+        buf.append("%s(" % (" "*level))
+    else:
+        buf.append("(")
+    for index, elt in enumerate(nested):
+        # Test if debugging and if on last element of error path
+        if trace and not trace[1:] and index == trace[0]:
+            buf.append('\n----> ')
+        if type(elt) is int:
+            if names:
+                buf.append(name(elt))
+            else:
+                buf.append(str(elt))
+            buf.append(', ')
+        elif type(elt) is str:
+            buf.append(repr(elt))
+        else:
+            if level < limit:
+                buf.append('\n')
+            buf.extend(print_sym_tuple(elt, level+1, limit,
+                                       names, trace[1:]))
+    buf.append(')')
+    return buf
+
+def assert_tuples_equal(tup1, tup2, curpos = ()):
+    for index, (elt1, elt2) in enumerate(zip(tup1, tup2)):
+        if elt1 != elt2:
+            if type(elt1) is tuple and type(elt2) is tuple:
+                assert_tuples_equal(elt1, elt2, curpos + (index,))
+            raise AssertionError('Found difference at %s : %s != %s' %
+                                 (curpos, name(elt1), name(elt2) ), curpos)
+
+from time import time, clock
+def test_samples( samples ):
+    time_reports = {}
+    for sample in samples:
+        print "testing", sample
+        tstart1, cstart1 = time(), clock()
+        pypy_tuples = pypy_parse(sample)
+        tstart2, cstart2 = time(), clock()
+        python_tuples = python_parse(sample)
+        time_reports[sample] = (time() - tstart2, tstart2-tstart1, clock() - cstart2, cstart2-cstart1 )
+        #print "-"*10, "PyPy parse results", "-"*10
+        #print ''.join(print_sym_tuple(pypy_tuples, names=True))
+        #print "-"*10, "CPython parse results", "-"*10
+        #print ''.join(print_sym_tuple(python_tuples, names=True))
+        print
+        try:
+            assert_tuples_equal(pypy_tuples, python_tuples)
+        except AssertionError as e:
+            error_path = e.args[-1]
+            print "ERROR PATH =", error_path
+            print "="*80
+            print file(sample).read()
+            print "="*80
+            print "-"*10, "PyPy parse results", "-"*10
+            print ''.join(print_sym_tuple(pypy_tuples, names=True, trace=error_path))
+            print "-"*10, "CPython parse results", "-"*10
+            print ''.join(print_sym_tuple(python_tuples, names=True, trace=error_path))
+            print "Failed on (%s)" % sample
+            # raise
+    pprint(time_reports)
+
+if __name__=="__main__":
+    import getopt
+    opts, args = getopt.getopt( sys.argv[1:], "d:", [] )
+    for opt, val in opts:
+        if opt == "-d":
+            pass
+#            set_debug(int(val))
+    if args:
+        samples = args
+    else:
+        samples = read_samples_dir()
+
+    test_samples( samples )