# HG changeset patch # User Paul Boddie # Date 1483903239 -3600 # Node ID 051349b537b21177327177f4c7667675e08b1102 # Parent 28e2996df412498f7cb5256a5d10221e51b48649 Added a modified version of pyparser (from PyPy) that provides parser module functionality without relying on CPython. Added licensing information for the pyparser and compiler packages. diff -r 28e2996df412 -r 051349b537b2 compiler/transformer.py --- a/compiler/transformer.py Sun Jan 08 00:27:02 2017 +0100 +++ b/compiler/transformer.py Sun Jan 08 20:20:39 2017 +0100 @@ -26,8 +26,8 @@ # and replace OWNER, ORGANIZATION, and YEAR as appropriate. from compiler.ast import * -import parser -import symbol +import pyparser.pyparse as parser +from pyparser.pygram import syms as symbol import token class WalkerError(StandardError): diff -r 28e2996df412 -r 051349b537b2 docs/COPYING.txt --- a/docs/COPYING.txt Sun Jan 08 00:27:02 2017 +0100 +++ b/docs/COPYING.txt Sun Jan 08 20:20:39 2017 +0100 @@ -16,3 +16,25 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . + +Licence Details for compiler +---------------------------- + +See LICENCE-Python.txt for the licensing details applying to the compiler +package. + +The compiler package has been modified to only provide essential abstract +syntax tree support for Lichen. The following applies to these modifications: + +Copyright (C) 2014, 2015, 2016 Paul Boddie + +Licence Details for pyparser +---------------------------- + +See LICENSE-PyPy.txt for the licensing details applying to the pyparser +package. + +The pyparser package has been modified to work with the modified compiler +package. The following applies to these modifications: + +Copyright (C) 2016 Paul Boddie diff -r 28e2996df412 -r 051349b537b2 docs/LICENCE-Python.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docs/LICENCE-Python.txt Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,254 @@ +A. HISTORY OF THE SOFTWARE +========================== + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands +as a successor of a language called ABC. Guido remains Python's +principal author, although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for +National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) +in Reston, Virginia where he released several versions of the +software. + +In May 2000, Guido and the Python core development team moved to +BeOpen.com to form the BeOpen PythonLabs team. In October of the same +year, the PythonLabs team moved to Digital Creations (now Zope +Corporation, see http://www.zope.com). In 2001, the Python Software +Foundation (PSF, see http://www.python.org/psf/) was formed, a +non-profit organization created specifically to own Python-related +Intellectual Property. Zope Corporation is a sponsoring member of +the PSF. + +All Python releases are Open Source (see http://www.opensource.org for +the Open Source Definition). Historically, most, but not all, Python +releases have also been GPL-compatible; the table below summarizes +the various releases. + + Release Derived Year Owner GPL- + from compatible? (1) + + 0.9.0 thru 1.2 1991-1995 CWI yes + 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes + 1.6 1.5.2 2000 CNRI no + 2.0 1.6 2000 BeOpen.com no + 1.6.1 1.6 2001 CNRI yes (2) + 2.1 2.0+1.6.1 2001 PSF no + 2.0.1 2.0+1.6.1 2001 PSF yes + 2.1.1 2.1+2.0.1 2001 PSF yes + 2.1.2 2.1.1 2002 PSF yes + 2.1.3 2.1.2 2002 PSF yes + 2.2 and above 2.1.1 2001-now PSF yes + +Footnotes: + +(1) GPL-compatible doesn't mean that we're distributing Python under + the GPL. All Python licenses, unlike the GPL, let you distribute + a modified version without making your changes open source. The + GPL-compatible licenses make it possible to combine Python with + other software that is released under the GPL; the others don't. + +(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, + because its license has a choice of law clause. According to + CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 + is "not incompatible" with the GPL. + +Thanks to the many outside volunteers who have worked under Guido's +direction to make these releases possible. + + +B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON +=============================================================== + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014 Python Software Foundation; All Rights Reserved" are retained +in Python alone or in any derivative version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the +Individual or Organization ("Licensee") accessing and otherwise using +this software in source or binary form and its associated +documentation ("the Software"). + +2. Subject to the terms and conditions of this BeOpen Python License +Agreement, BeOpen hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the BeOpen Python License is retained in the +Software, alone or in any derivative version prepared by Licensee. + +3. BeOpen is making the Software available to Licensee on an "AS IS" +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +5. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +6. This License Agreement shall be governed by and interpreted in all +respects by the law of the State of California, excluding conflict of +law provisions. Nothing in this License Agreement shall be deemed to +create any relationship of agency, partnership, or joint venture +between BeOpen and Licensee. This License Agreement does not grant +permission to use BeOpen trademarks or trade names in a trademark +sense to endorse or promote products or services of Licensee, or any +third party. As an exception, the "BeOpen Python" logos available at +http://www.pythonlabs.com/logos.html may be used according to the +permissions granted on that web page. + +7. By copying, installing or otherwise using the software, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +1. This LICENSE AGREEMENT is between the Corporation for National +Research Initiatives, having an office at 1895 Preston White Drive, +Reston, VA 20191 ("CNRI"), and the Individual or Organization +("Licensee") accessing and otherwise using Python 1.6.1 software in +source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, CNRI +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python 1.6.1 +alone or in any derivative version, provided, however, that CNRI's +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) +1995-2001 Corporation for National Research Initiatives; All Rights +Reserved" are retained in Python 1.6.1 alone or in any derivative +version prepared by Licensee. Alternately, in lieu of CNRI's License +Agreement, Licensee may substitute the following text (omitting the +quotes): "Python 1.6.1 is made available subject to the terms and +conditions in CNRI's License Agreement. This Agreement together with +Python 1.6.1 may be located on the Internet using the following +unique, persistent identifier (known as a handle): 1895.22/1013. This +Agreement may also be obtained from a proxy server on the Internet +using the following URL: http://hdl.handle.net/1895.22/1013". + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python 1.6.1 or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python 1.6.1. + +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. This License Agreement shall be governed by the federal +intellectual property law of the United States, including without +limitation the federal copyright law, and, to the extent such +U.S. federal law does not apply, by the law of the Commonwealth of +Virginia, excluding Virginia's conflict of law provisions. +Notwithstanding the foregoing, with regard to derivative works based +on Python 1.6.1 that incorporate non-separable material that was +previously distributed under the GNU General Public License (GPL), the +law of the Commonwealth of Virginia shall govern this License +Agreement only as to issues arising under or with respect to +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this +License Agreement shall be deemed to create any relationship of +agency, partnership, or joint venture between CNRI and Licensee. This +License Agreement does not grant permission to use CNRI trademarks or +trade name in a trademark sense to endorse or promote products or +services of Licensee, or any third party. + +8. By clicking on the "ACCEPT" button where indicated, or by copying, +installing or otherwise using Python 1.6.1, Licensee agrees to be +bound by the terms and conditions of this License Agreement. + + ACCEPT + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, +The Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff -r 28e2996df412 -r 051349b537b2 docs/LICENSE-PyPy.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docs/LICENSE-PyPy.txt Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,479 @@ +License +======= + +Except when otherwise stated (look for LICENSE files in directories or +information at the beginning of each file) all software and documentation in +the 'rpython', 'pypy', 'ctype_configure', 'dotviewer', 'demo', 'lib_pypy', +'py', and '_pytest' directories is licensed as follows: + + The MIT License + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + +PyPy Copyright holders 2003-2017 +----------------------------------- + +Except when otherwise stated (look for LICENSE files or information at +the beginning of each file) the files in the 'pypy' directory are each +copyrighted by one or more of the following people and organizations: + + Armin Rigo + Maciej Fijalkowski + Carl Friedrich Bolz + Amaury Forgeot d'Arc + Antonio Cuni + Samuele Pedroni + Matti Picus + Alex Gaynor + Philip Jenvey + Ronan Lamy + Brian Kearns + Richard Plangger + Michael Hudson + Manuel Jacob + David Schneider + Holger Krekel + Christian Tismer + Hakan Ardo + Benjamin Peterson + Anders Chrigstrom + Eric van Riet Paap + Wim Lavrijsen + Richard Emslie + Alexander Schremmer + Dan Villiom Podlaski Christiansen + Remi Meier + Lukas Diekmann + Sven Hager + Anders Lehmann + Aurelien Campeas + Niklaus Haldimann + Camillo Bruni + Laura Creighton + Romain Guillebert + Toon Verwaest + Leonardo Santagada + Seo Sanghyeon + Ronny Pfannschmidt + Justin Peel + Raffael Tfirst + David Edelsohn + Anders Hammarquist + Jakub Gustak + Gregor Wegberg + Guido Wesdorp + Lawrence Oluyede + Bartosz Skowron + Daniel Roberts + Niko Matsakis + Adrien Di Mascio + Alexander Hesse + Ludovic Aubry + Jacob Hallen + Jason Creighton + Mark Young + Alex Martelli + Spenser Bauman + Michal Bendowski + stian + Jan de Mooij + Tyler Wade + Vincent Legoll + Michael Foord + Stephan Diehl + Stefan Schwarzer + Valentino Volonghi + Tomek Meka + Stefano Rivera + Patrick Maupin + Devin Jeanpierre + Bob Ippolito + Bruno Gola + David Malcolm + Jean-Paul Calderone + Timo Paulssen + Edd Barrett + Squeaky + Marius Gedminas + Alexandre Fayolle + Simon Burton + Martin Matusiak + Nicolas Truessel + Konstantin Lopuhin + Wenzhu Man + John Witulski + Laurence Tratt + Ivan Sichmann Freitas + Greg Price + Dario Bertini + Mark Pearse + Simon Cross + Jeremy Thurgood + Andreas Stührk + Tobias Pape + Jean-Philippe St. Pierre + Guido van Rossum + Pavel Vinogradov + PaweÅ‚ Piotr Przeradowski + Paul deGrandis + Ilya Osadchiy + marky1991 + Tobias Oberstein + Adrian Kuhn + Boris Feigin + tav + Taavi Burns + Georg Brandl + Bert Freudenberg + Stian Andreassen + Wanja Saatkamp + Gerald Klix + Mike Blume + Oscar Nierstrasz + Stefan H. Muller + Rami Chowdhury + Eugene Oden + Henry Mason + Vasily Kuznetsov + Preston Timmons + David Ripton + Jeff Terrace + Tim Felgentreff + Dusty Phillips + Lukas Renggli + Guenter Jantzen + William Leslie + Ned Batchelder + Anton Gulenko + Amit Regmi + Ben Young + Jasper Schulz + Nicolas Chauvat + Andrew Durdin + Andrew Chambers + Sergey Matyunin + Michael Schneider + Nicholas Riley + Jason Chu + Igor Trindade Oliveira + Yichao Yu + Rocco Moretti + Gintautas Miliauskas + Michael Twomey + Lucian Branescu Mihaila + anatoly techtonik + Gabriel Lavoie + Olivier Dormond + Jared Grubb + Karl Bartel + Wouter van Heyst + Brian Dorsey + Victor Stinner + Andrews Medina + Sebastian PawluÅ› + Stuart Williams + Daniel Patrick + Aaron Iles + Toby Watson + Antoine Pitrou + Christian Hudon + Michael Cheng + Justas Sadzevicius + Gasper Zejn + Neil Shepperd + Stanislaw Halik + Mikael Schönenberg + Berkin Ilbeyi + Faye Zhao + Elmo Mäntynen + Jonathan David Riehl + Anders Qvist + Corbin Simpson + Chirag Jadwani + Beatrice During + Alex Perry + Vaibhav Sood + Alan McIntyre + Reuben Cummings + Alexander Sedov + p_zieschang@yahoo.de + Attila Gobi + Christopher Pope + Aaron Gallagher + Florin Papa + Christian Tismer + Marc Abramowitz + Dan Stromberg + Arjun Naik + Valentina Mukhamedzhanova + Stefano Parmesan + touilleMan + Alexis Daboville + Jens-Uwe Mager + Carl Meyer + Karl Ramm + Pieter Zieschang + Gabriel + Lukas Vacek + Kunal Grover + Andrew Dalke + Sylvain Thenault + Jakub Stasiak + Nathan Taylor + Vladimir Kryachko + Omer Katz + Mark Williams + Jacek Generowicz + Alejandro J. Cura + Jacob Oscarson + Travis Francis Athougies + Ryan Gonzalez + Ian Foote + Kristjan Valur Jonsson + David Lievens + Neil Blakey-Milner + Lutz Paelike + Lucio Torre + Lars Wassermann + Philipp Rustemeuer + Henrik Vendelbo + Richard Lancaster + Yasir Suhail + Dan Buch + Miguel de Val Borro + Artur Lisiecki + Sergey Kishchenko + Ignas Mikalajunas + Alecsandru Patrascu + Christoph Gerum + Martin Blais + Lene Wagner + Catalin Gabriel Manciu + Tomo Cocoa + Kim Jin Su + rafalgalczynski@gmail.com + Toni Mattis + Amber Brown + Lucas Stadler + Julian Berman + Markus Holtermann + roberto@goyle + Yury V. Zaytsev + Anna Katrina Dominguez + Bobby Impollonia + Vasantha Ganesh K + Andrew Thompson + florinpapa + Yusei Tahara + Aaron Tubbs + Ben Darnell + Roberto De Ioris + Logan Chien + Juan Francisco Cantero Hurtado + Ruochen Huang + Jeong YunWon + Godefroid Chappelle + Joshua Gilbert + Dan Colish + Christopher Armstrong + Michael Hudson-Doyle + Anders Sigfridsson + Nikolay Zinov + Jason Michalski + Floris Bruynooghe + Laurens Van Houtven + Akira Li + Gustavo Niemeyer + Stephan Busemann + RafaÅ‚ GaÅ‚czyÅ„ski + Matt Bogosian + timo + Christian Muirhead + Berker Peksag + James Lan + Volodymyr Vladymyrov + shoma hosaka + Ben Mather + Niclas Olofsson + Matthew Miller + Rodrigo Araújo + halgari + Boglarka Vezer + Chris Pressey + Buck Golemon + Diana Popa + Konrad Delong + Dinu Gherman + Chris Lambacher + coolbutuseless@gmail.com + Daniil Yarancev + Jim Baker + Dan Crosta + Nikolaos-Digenis Karagiannis + James Robert + Armin Ronacher + Brett Cannon + Donald Stufft + yrttyr + aliceinwire + OlivierBlanvillain + Dan Sanders + Zooko Wilcox-O Hearn + Tomer Chachamu + Christopher Groskopf + Asmo Soinio + jiaaro + Mads Kiilerich + Antony Lee + Jason Madden + Daniel Neuhäuser + reubano@gmail.com + Yaroslav Fedevych + Jim Hunziker + Markus Unterwaditzer + Even Wiik Thomassen + jbs + squeaky + soareschen + Jonas Pfannschmidt + Kurt Griffiths + Mike Bayer + Stefan Marr + Flavio Percoco + Kristoffer Kleine + Michael Chermside + Anna Ravencroft + pizi + remarkablerocket + Andrey Churin + Zearin + Eli Stevens + Tobias Diaz + Julien Phalip + Roman Podoliaka + Dan Loewenherz + werat + + Heinrich-Heine University, Germany + Open End AB (formerly AB Strakt), Sweden + merlinux GmbH, Germany + tismerysoft GmbH, Germany + Logilab Paris, France + DFKI GmbH, Germany + Impara, Germany + Change Maker, Sweden + University of California Berkeley, USA + Google Inc. + King's College London + +The PyPy Logo as used by http://speed.pypy.org and others was created +by Samuel Reis and is distributed on terms of Creative Commons Share Alike +License. + +License for 'lib-python/2.7' +============================ + +Except when otherwise stated (look for LICENSE files or copyright/license +information at the beginning of each file) the files in the 'lib-python/2.7' +directory are all copyrighted by the Python Software Foundation and licensed +under the terms that you can find here: https://docs.python.org/2/license.html + +License for 'pypy/module/unicodedata/' +====================================== + +The following files are from the website of The Unicode Consortium +at http://www.unicode.org/. For the terms of use of these files, see +http://www.unicode.org/terms_of_use.html . Or they are derived from +files from the above website, and the same terms of use apply. + + CompositionExclusions-*.txt + EastAsianWidth-*.txt + LineBreak-*.txt + UnicodeData-*.txt + UnihanNumeric-*.txt + +License for 'dotviewer/font/' +============================= + +Copyright (C) 2008 The Android Open Source Project + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Detailed license information is contained in the NOTICE file in the +directory. + + +Licenses and Acknowledgements for Incorporated Software +======================================================= + +This section is an incomplete, but growing list of licenses and +acknowledgements for third-party software incorporated in the PyPy +distribution. + +License for 'Tcl/Tk' +-------------------- + +This copy of PyPy contains library code that may, when used, result in +the Tcl/Tk library to be loaded. PyPy also includes code that may be +regarded as being a copy of some parts of the Tcl/Tk header files. +You may see a copy of the License for Tcl/Tk in the file +`lib_pypy/_tkinter/license.terms` included here. + +License for 'bzip2' +------------------- + +This copy of PyPy may be linked (dynamically or statically) with the +bzip2 library. You may see a copy of the License for bzip2/libbzip2 at + + http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html + +License for 'openssl' +--------------------- + +This copy of PyPy may be linked (dynamically or statically) with the +openssl library. You may see a copy of the License for OpenSSL at + + https://www.openssl.org/source/license.html + +License for 'gdbm' +------------------ + +The gdbm module includes code from gdbm.h, which is distributed under +the terms of the GPL license version 2 or any later version. Thus the +gdbm module, provided in the file lib_pypy/gdbm.py, is redistributed +under the terms of the GPL license as well. + +License for 'rpython/rlib/rvmprof/src' +-------------------------------------- + +The code is based on gperftools. You may see a copy of the License for it at + + https://github.com/gperftools/gperftools/blob/master/COPYING diff -r 28e2996df412 -r 051349b537b2 pyparser/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/__init__.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,1 @@ +# empty diff -r 28e2996df412 -r 051349b537b2 pyparser/automata.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/automata.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,120 @@ +# ______________________________________________________________________ +"""Module automata + +THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED +TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists +instead of a unique nested one) + +$Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $ +""" +# ______________________________________________________________________ +# Module level definitions + +# PYPY Modification: removed the EMPTY class as it's not needed here + + +# PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython +# dicts (see pytokenize.py). Then DFA.__init__() turns these dicts into +# more compact strings. +DEFAULT = object() + +# PYPY Modification : removed all automata functions (any, maybe, +# newArcPair, etc.) + +ERROR_STATE = chr(255) + +class DFA: + # ____________________________________________________________ + def __init__(self, states, accepts, start = 0): + """ NOT_RPYTHON """ + assert len(states) < 255 # no support for huge amounts of states + # construct string for looking up state transitions + string_states = [] * len(states) + # compute maximum + maximum = 0 + for state in states: + for key in state: + if key == DEFAULT: + continue + maximum = max(ord(key), maximum) + self.max_char = maximum + 1 + + defaults = [] + for i, state in enumerate(states): + default = ERROR_STATE + if DEFAULT in state: + default = chr(state[DEFAULT]) + defaults.append(default) + string_state = [default] * self.max_char + for key, value in state.iteritems(): + if key == DEFAULT: + continue + assert len(key) == 1 + assert ord(key) < self.max_char + string_state[ord(key)] = chr(value) + string_states.extend(string_state) + self.states = "".join(string_states) + self.defaults = "".join(defaults) + self.accepts = accepts + self.start = start + + # ____________________________________________________________ + + def _next_state(self, item, crntState): + if ord(item) >= self.max_char: + return self.defaults[crntState] + else: + return self.states[crntState * self.max_char + ord(item)] + + def recognize(self, inVec, pos = 0): + crntState = self.start + lastAccept = False + i = pos + for i in range(pos, len(inVec)): + item = inVec[i] + accept = self.accepts[crntState] + crntState = self._next_state(item, crntState) + if crntState != ERROR_STATE: + pass + elif accept: + return i + elif lastAccept: + # This is now needed b/c of exception cases where there are + # transitions to dead states + return i - 1 + else: + return -1 + crntState = ord(crntState) + lastAccept = accept + # if self.states[crntState][1]: + if self.accepts[crntState]: + return i + 1 + elif lastAccept: + return i + else: + return -1 + +# ______________________________________________________________________ + +class NonGreedyDFA (DFA): + + def recognize(self, inVec, pos = 0): + crntState = self.start + i = pos + for i in range(pos, len(inVec)): + item = inVec[i] + accept = self.accepts[crntState] + if accept: + return i + crntState = self._next_state(item, crntState) + if crntState == ERROR_STATE: + return -1 + crntState = ord(crntState) + i += 1 + if self.accepts[crntState]: + return i + else: + return -1 + +# ______________________________________________________________________ +# End of automata.py diff -r 28e2996df412 -r 051349b537b2 pyparser/consts.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/consts.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,8 @@ +""" +Various flags used during the compilation process. +""" + +PyCF_SOURCE_IS_UTF8 = 0x0100 +PyCF_DONT_IMPLY_DEDENT = 0x0200 +PyCF_ONLY_AST = 0x0400 +PyCF_ACCEPT_NULL_BYTES = 0x10000000 # PyPy only, for compile() diff -r 28e2996df412 -r 051349b537b2 pyparser/data/Grammar2.5 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/data/Grammar2.5 Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,148 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed in PEP 306, +# "How to Change Python's Grammar" + +# Commands for Kees Blom's railroad program +#diagram:token NAME +#diagram:token NUMBER +#diagram:token STRING +#diagram:token NEWLINE +#diagram:token ENDMARKER +#diagram:token INDENT +#diagram:output\input python.bla +#diagram:token DEDENT +#diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm +#diagram:rules + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() and input() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +funcdef: [decorators] 'def' NAME parameters ':' suite +parameters: '(' [varargslist] ')' +varargslist: ((fpdef ['=' test] ',')* + ('*' NAME [',' '**' NAME] | '**' NAME) | + fpdef ['=' test] (',' fpdef ['=' test])* [',']) +fpdef: NAME | '(' fplist ')' +fplist: fpdef (',' fpdef)* [','] + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist))*) +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test [',' test [',' test]]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +import_from: ('from' ('.'* dotted_name | '.'+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME [('as' | NAME) NAME] +dotted_as_name: dotted_name [('as' | NAME) NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +exec_stmt: 'exec' expr ['in' test [',' test]] +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' test [ with_var ] ':' suite +with_var: ('as' | NAME) expr +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test [',' test]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +# Backward compatibility cruft to support: +# [ x for x in lambda: True, lambda: False if x() ] +# even while also allowing: +# lambda x: 5 if x else 2 +# (But not a mix of the two) +testlist_safe: old_test [(',' old_test)+ [',']] +old_test: or_test | old_lambdef +old_lambdef: 'lambda' [varargslist] ':' old_test + +test: or_test ['if' or_test 'else' test] | lambdef +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_gexp] ')' | + '[' [listmaker] ']' | + '{' [dictmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | STRING+) +listmaker: test ( list_for | (',' test)* [','] ) +testlist_gexp: test ( gen_for | (',' test)* [','] ) +lambdef: 'lambda' [varargslist] ':' test +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: expr (',' expr)* [','] +testlist: test (',' test)* [','] +dictmaker: test ':' test (',' test ':' test)* [','] + +classdef: 'class' NAME ['(' [testlist] ')'] ':' suite + +arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) +argument: test [gen_for] | test '=' test # Really [keyword '='] test + +list_iter: list_for | list_if +list_for: 'for' exprlist 'in' testlist_safe [list_iter] +list_if: 'if' old_test [list_iter] + +gen_iter: gen_for | gen_if +gen_for: 'for' exprlist 'in' or_test [gen_iter] +gen_if: 'if' old_test [gen_iter] + +testlist1: test (',' test)* + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [testlist] diff -r 28e2996df412 -r 051349b537b2 pyparser/data/Grammar2.7 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/data/Grammar2.7 Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,143 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed in PEP 306, +# "How to Change Python's Grammar" + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() and input() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: (NEWLINE | stmt)* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef) +funcdef: 'def' NAME parameters ':' suite +parameters: '(' [varargslist] ')' +varargslist: ((fpdef ['=' test] ',')* + ('*' NAME [',' '**' NAME] | '**' NAME) | + fpdef ['=' test] (',' fpdef ['=' test])* [',']) +fpdef: NAME | '(' fplist ')' +fplist: fpdef (',' fpdef)* [','] + +stmt: simple_stmt | compound_stmt +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist))*) +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test [',' test [',' test]]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +import_from: ('from' ('.'* dotted_name | '.'+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +exec_stmt: 'exec' expr ['in' test [',' test]] +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test [('as' | ',') test]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +# Backward compatibility cruft to support: +# [ x for x in lambda: True, lambda: False if x() ] +# even while also allowing: +# lambda x: 5 if x else 2 +# (But not a mix of the two) +testlist_safe: old_test [(',' old_test)+ [',']] +old_test: or_test | old_lambdef +old_lambdef: 'lambda' [varargslist] ':' old_test + +test: or_test ['if' or_test 'else' test] | lambdef +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [listmaker] ']' | + '{' [dictorsetmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | STRING+) +listmaker: test ( list_for | (',' test)* [','] ) +testlist_comp: test ( comp_for | (',' test)* [','] ) +lambdef: 'lambda' [varargslist] ':' test +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: expr (',' expr)* [','] +testlist: test (',' test)* [','] +dictmaker: test ':' test (',' test ':' test)* [','] +dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | + (test (comp_for | (',' test)* [','])) ) + +classdef: 'class' NAME ['(' [testlist] ')'] ':' suite + +arglist: (argument ',')* (argument [','] + |'*' test (',' argument)* [',' '**' test] + |'**' test) +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +argument: test [comp_for] | test '=' test + +list_iter: list_for | list_if +list_for: 'for' exprlist 'in' testlist_safe [list_iter] +list_if: 'if' old_test [list_iter] + +comp_iter: comp_for | comp_if +comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' old_test [comp_iter] + +testlist1: test (',' test)* + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [testlist] diff -r 28e2996df412 -r 051349b537b2 pyparser/error.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/error.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,40 @@ + +class SyntaxError(Exception): + """Base class for exceptions raised by the parser.""" + + def __init__(self, msg, lineno=0, offset=0, text=None, filename=None, + lastlineno=0): + self.msg = msg + self.lineno = lineno + self.offset = offset + self.text = text + self.filename = filename + self.lastlineno = lastlineno + + def __str__(self): + return "%s at pos (%d, %d) in %r" % (self.__class__.__name__, + self.lineno, + self.offset, + self.text) + +class IndentationError(SyntaxError): + pass + +class ASTError(Exception): + def __init__(self, msg, ast_node ): + self.msg = msg + self.ast_node = ast_node + + +class TokenError(SyntaxError): + + def __init__(self, msg, line, lineno, column, tokens, lastlineno=0): + SyntaxError.__init__(self, msg, lineno, column, line, + lastlineno=lastlineno) + self.tokens = tokens + +class TokenIndentationError(IndentationError): + + def __init__(self, msg, line, lineno, column, tokens): + SyntaxError.__init__(self, msg, lineno, column, line) + self.tokens = tokens diff -r 28e2996df412 -r 051349b537b2 pyparser/genpytokenize.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/genpytokenize.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,340 @@ +#! /usr/bin/env python +"""Module genPytokenize + +Generates finite state automata for recognizing Python tokens. These are hand +coded versions of the regular expressions originally appearing in Ping's +tokenize module in the Python standard library. + +When run from the command line, this should pretty print the DFA machinery. + +$Id: genPytokenize.py,v 1.1 2003/10/02 17:37:17 jriehl Exp $ +""" + +from pyparser.pylexer import * +from pyparser.automata import NonGreedyDFA, DFA, DEFAULT + +def makePyPseudoDFA (): + import string + states = [] + def makeEOL(): + return group(states, + newArcPair(states, "\n"), + chain(states, + newArcPair(states, "\r"), + maybe(states, newArcPair(states, "\n")))) + # ____________________________________________________________ + def makeLineCont (): + return chain(states, + newArcPair(states, "\\"), + makeEOL()) + # ____________________________________________________________ + # Ignore stuff + def makeWhitespace (): + return any(states, groupStr(states, " \f\t")) + # ____________________________________________________________ + def makeComment (): + return chain(states, + newArcPair(states, "#"), + any(states, notGroupStr(states, "\r\n"))) + # ____________________________________________________________ + #ignore = chain(states, + # makeWhitespace(), + # any(states, chain(states, + # makeLineCont(), + # makeWhitespace())), + # maybe(states, makeComment())) + # ____________________________________________________________ + # Names + name = chain(states, + groupStr(states, string.letters + "_"), + any(states, groupStr(states, + string.letters + string.digits + "_"))) + # ____________________________________________________________ + # Digits + def makeDigits (): + return groupStr(states, "0123456789") + # ____________________________________________________________ + # Integer numbers + hexNumber = chain(states, + newArcPair(states, "0"), + groupStr(states, "xX"), + atleastonce(states, + groupStr(states, "0123456789abcdefABCDEF")), + maybe(states, groupStr(states, "lL"))) + octNumber = chain(states, + newArcPair(states, "0"), + maybe(states, + chain(states, + groupStr(states, "oO"), + groupStr(states, "01234567"))), + any(states, groupStr(states, "01234567")), + maybe(states, groupStr(states, "lL"))) + binNumber = chain(states, + newArcPair(states, "0"), + groupStr(states, "bB"), + atleastonce(states, groupStr(states, "01")), + maybe(states, groupStr(states, "lL"))) + decNumber = chain(states, + groupStr(states, "123456789"), + any(states, makeDigits()), + maybe(states, groupStr(states, "lL"))) + intNumber = group(states, hexNumber, octNumber, binNumber, decNumber) + # ____________________________________________________________ + # Exponents + def makeExp (): + return chain(states, + groupStr(states, "eE"), + maybe(states, groupStr(states, "+-")), + atleastonce(states, makeDigits())) + # ____________________________________________________________ + # Floating point numbers + def makeFloat (): + pointFloat = chain(states, + group(states, + chain(states, + atleastonce(states, makeDigits()), + newArcPair(states, "."), + any(states, makeDigits())), + chain(states, + newArcPair(states, "."), + atleastonce(states, makeDigits()))), + maybe(states, makeExp())) + expFloat = chain(states, + atleastonce(states, makeDigits()), + makeExp()) + return group(states, pointFloat, expFloat) + # ____________________________________________________________ + # Imaginary numbers + imagNumber = group(states, + chain(states, + atleastonce(states, makeDigits()), + groupStr(states, "jJ")), + chain(states, + makeFloat(), + groupStr(states, "jJ"))) + # ____________________________________________________________ + # Any old number. + number = group(states, imagNumber, makeFloat(), intNumber) + # ____________________________________________________________ + # Funny + operator = group(states, + chain(states, + chainStr(states, "**"), + maybe(states, newArcPair(states, "="))), + chain(states, + chainStr(states, ">>"), + maybe(states, newArcPair(states, "="))), + chain(states, + chainStr(states, "<<"), + maybe(states, newArcPair(states, "="))), + chainStr(states, "<>"), + chainStr(states, "!="), + chain(states, + chainStr(states, "//"), + maybe(states, newArcPair(states, "="))), + chain(states, + groupStr(states, "+-*/%&|^=<>"), + maybe(states, newArcPair(states, "="))), + newArcPair(states, "~")) + bracket = groupStr(states, "[](){}") + special = group(states, + makeEOL(), + groupStr(states, "@:;.,`")) + funny = group(states, operator, bracket, special) + # ____________________________________________________________ + def makeStrPrefix (): + return chain(states, + maybe(states, groupStr(states, "uUbB")), + maybe(states, groupStr(states, "rR"))) + # ____________________________________________________________ + contStr = group(states, + chain(states, + makeStrPrefix(), + newArcPair(states, "'"), + any(states, + notGroupStr(states, "\r\n'\\")), + any(states, + chain(states, + newArcPair(states, "\\"), + newArcPair(states, DEFAULT), + any(states, + notGroupStr(states, "\r\n'\\")))), + group(states, + newArcPair(states, "'"), + makeLineCont())), + chain(states, + makeStrPrefix(), + newArcPair(states, '"'), + any(states, + notGroupStr(states, '\r\n"\\')), + any(states, + chain(states, + newArcPair(states, "\\"), + newArcPair(states, DEFAULT), + any(states, + notGroupStr(states, '\r\n"\\')))), + group(states, + newArcPair(states, '"'), + makeLineCont()))) + triple = chain(states, + makeStrPrefix(), + group(states, + chainStr(states, "'''"), + chainStr(states, '"""'))) + pseudoExtras = group(states, + makeLineCont(), + makeComment(), + triple) + pseudoToken = chain(states, + makeWhitespace(), + group(states, + newArcPair(states, EMPTY), + pseudoExtras, number, funny, contStr, name)) + dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken) + return DFA(dfaStates, dfaAccepts), dfaStates + +# ______________________________________________________________________ + +def makePyEndDFAMap (): + states = [] + single = chain(states, + any(states, notGroupStr(states, "'\\")), + any(states, + chain(states, + newArcPair(states, "\\"), + newArcPair(states, DEFAULT), + any(states, notGroupStr(states, "'\\")))), + newArcPair(states, "'")) + states, accepts = nfaToDfa(states, *single) + singleDFA = DFA(states, accepts) + states_singleDFA = states + states = [] + double = chain(states, + any(states, notGroupStr(states, '"\\')), + any(states, + chain(states, + newArcPair(states, "\\"), + newArcPair(states, DEFAULT), + any(states, notGroupStr(states, '"\\')))), + newArcPair(states, '"')) + states, accepts = nfaToDfa(states, *double) + doubleDFA = DFA(states, accepts) + states_doubleDFA = states + states = [] + single3 = chain(states, + any(states, notGroupStr(states, "'\\")), + any(states, + chain(states, + group(states, + chain(states, + newArcPair(states, "\\"), + newArcPair(states, DEFAULT)), + chain(states, + newArcPair(states, "'"), + notChainStr(states, "''"))), + any(states, notGroupStr(states, "'\\")))), + chainStr(states, "'''")) + states, accepts = nfaToDfa(states, *single3) + single3DFA = NonGreedyDFA(states, accepts) + states_single3DFA = states + states = [] + double3 = chain(states, + any(states, notGroupStr(states, '"\\')), + any(states, + chain(states, + group(states, + chain(states, + newArcPair(states, "\\"), + newArcPair(states, DEFAULT)), + chain(states, + newArcPair(states, '"'), + notChainStr(states, '""'))), + any(states, notGroupStr(states, '"\\')))), + chainStr(states, '"""')) + states, accepts = nfaToDfa(states, *double3) + double3DFA = NonGreedyDFA(states, accepts) + states_double3DFA = states + map = {"'" : (singleDFA, states_singleDFA), + '"' : (doubleDFA, states_doubleDFA), + "r" : None, + "R" : None, + "u" : None, + "U" : None, + "b" : None, + "B" : None} + for uniPrefix in ("", "u", "U", "b", "B", ): + for rawPrefix in ("", "r", "R"): + prefix = uniPrefix + rawPrefix + map[prefix + "'''"] = (single3DFA, states_single3DFA) + map[prefix + '"""'] = (double3DFA, states_double3DFA) + return map + +# ______________________________________________________________________ + +def output(name, dfa_class, dfa, states): + import textwrap + lines = [] + i = 0 + for line in textwrap.wrap(repr(dfa.accepts), width = 50): + if i == 0: + lines.append("accepts = ") + else: + lines.append(" ") + lines.append(line) + lines.append("\n") + i += 1 + import StringIO + lines.append("states = [\n") + for numstate, state in enumerate(states): + lines.append(" # ") + lines.append(str(numstate)) + lines.append('\n') + s = StringIO.StringIO() + i = 0 + for k, v in sorted(state.items()): + i += 1 + if k == DEFAULT: + k = "automata.DEFAULT" + else: + k = repr(k) + s.write(k) + s.write('::') + s.write(repr(v)) + if i < len(state): + s.write(', ') + s.write('},') + i = 0 + if len(state) <= 4: + text = [s.getvalue()] + else: + text = textwrap.wrap(s.getvalue(), width=36) + for line in text: + line = line.replace('::', ': ') + if i == 0: + lines.append(' {') + else: + lines.append(' ') + lines.append(line) + lines.append('\n') + i += 1 + lines.append(" ]\n") + lines.append("%s = automata.%s(states, accepts)\n" % (name, dfa_class)) + return ''.join(lines) + +def main (): + pseudoDFA, states_pseudoDFA = makePyPseudoDFA() + print output("pseudoDFA", "DFA", pseudoDFA, states_pseudoDFA) + endDFAMap = makePyEndDFAMap() + dfa, states = endDFAMap['"""'] + print output("double3DFA", "NonGreedyDFA", dfa, states) + dfa, states = endDFAMap["'''"] + print output("single3DFA", "NonGreedyDFA", dfa, states) + dfa, states = endDFAMap["'"] + print output("singleDFA", "DFA", dfa, states) + dfa, states = endDFAMap["\""] + print output("doubleDFA", "DFA", dfa, states) + +# ______________________________________________________________________ + +if __name__ == "__main__": + main() diff -r 28e2996df412 -r 051349b537b2 pyparser/metaparser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/metaparser.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,357 @@ +""" +Makes a parser from a grammar source. + +Inspired by Guido van Rossum's pgen2. +""" + +import StringIO +import tokenize +import token + +from pyparser import parser + + +class PgenError(Exception): + + def __init__(self, msg, location=None): + Exception.__init__(self, msg) + self.location = location + + +class NFA(object): + + def __init__(self): + self.arcs = [] + + def arc(self, to_state, label=None): + self.arcs.append((label, to_state)) + + def find_unlabeled_states(self, into): + if self in into: + return + into.add(self) + for label, state in self.arcs: + if label is None: + state.find_unlabeled_states(into) + + +class DFA(object): + + def __init__(self, nfa_set, final_state): + self.nfas = nfa_set + self.is_final = final_state in nfa_set + self.arcs = {} + + def arc(self, next, label): + self.arcs[label] = next + + def unify_state(self, old, new): + for label, state in self.arcs.iteritems(): + if state is old: + self.arcs[label] = new + + def __repr__(self): + return "" % self.arcs + + def __eq__(self, other): + if not isinstance(other, DFA): + # This shouldn't really happen. + return NotImplemented + if other.is_final != self.is_final: + return False + if len(self.arcs) != len(other.arcs): + return False + for label, state in self.arcs.iteritems(): + try: + other_state = other.arcs[label] + except KeyError: + return False + else: + if other_state is not state: + return False + return True + + +def nfa_to_dfa(start, end): + """Convert an NFA to a DFA(s) + + Each DFA is initially a set of NFA states without labels. We start with the + DFA for the start NFA. Then we add labeled arcs to it pointing to another + set of NFAs (the next state). Finally, we do the same thing to every DFA + that is found and return the list of states. + """ + base_nfas = set() + start.find_unlabeled_states(base_nfas) + state_stack = [DFA(base_nfas, end)] + for state in state_stack: + arcs = {} + for nfa in state.nfas: + for label, sub_nfa in nfa.arcs: + if label is not None: + sub_nfa.find_unlabeled_states(arcs.setdefault(label, set())) + for label, nfa_set in arcs.iteritems(): + for st in state_stack: + if st.nfas == nfa_set: + break + else: + st = DFA(nfa_set, end) + state_stack.append(st) + state.arc(st, label) + return state_stack + +def simplify_dfa(dfa): + changed = True + while changed: + changed = False + for i, state in enumerate(dfa): + for j in xrange(i + 1, len(dfa)): + other_state = dfa[j] + if state == other_state: + del dfa[j] + for sub_state in dfa: + sub_state.unify_state(other_state, state) + changed = True + break + + +class ParserGenerator(object): + """NOT_RPYTHON""" + + def __init__(self, grammar_source): + self.start_symbol = None + self.dfas = {} + stream = StringIO.StringIO(grammar_source) + self.token_stream = tokenize.generate_tokens(stream.readline) + self.parse() + self.first = {} + self.add_first_sets() + + def build_grammar(self, grammar_cls): + gram = grammar_cls() + gram.start = self.start_symbol + names = self.dfas.keys() + names.sort() + names.remove(self.start_symbol) + names.insert(0, self.start_symbol) + # First, build symbol and id mappings. + for name in names: + i = 256 + len(gram.symbol_ids) + gram.symbol_ids[name] = i + gram.symbol_names[i] = name + # Then, iterate through again and finalize labels. + for name in names: + dfa = self.dfas[name] + states = [] + for state in dfa: + arcs = [] + for label, next in state.arcs.iteritems(): + arcs.append((self.make_label(gram, label), dfa.index(next))) + states.append((arcs, state.is_final)) + gram.dfas.append((states, self.make_first(gram, name))) + assert len(gram.dfas) - 1 == gram.symbol_ids[name] - 256 + gram.start = gram.symbol_ids[self.start_symbol] + return gram + + def make_label(self, gram, label): + label_index = len(gram.labels) + if label[0].isalpha(): + # Either a symbol or a token. + if label in gram.symbol_ids: + if label in gram.symbol_to_label: + return gram.symbol_to_label[label] + else: + gram.labels.append(gram.symbol_ids[label]) + gram.symbol_to_label[label] = label_index + return label_index + elif label.isupper(): + token_index = gram.TOKENS[label] + if token_index in gram.token_ids: + return gram.token_ids[token_index] + else: + gram.labels.append(token_index) + gram.token_ids[token_index] = label_index + return label_index + else: + # Probably a rule without a definition. + raise PgenError("no such rule: %r" % (label,)) + else: + # A keyword or operator. + value = label.strip("\"'") + if value[0].isalpha(): + if value in gram.keyword_ids: + return gram.keyword_ids[value] + else: + gram.labels.append(gram.KEYWORD_TOKEN) + gram.keyword_ids[value] = label_index + return label_index + else: + try: + token_index = gram.OPERATOR_MAP[value] + except KeyError: + raise PgenError("no such operator: %r" % (value,)) + if token_index in gram.token_ids: + return gram.token_ids[token_index] + else: + gram.labels.append(token_index) + gram.token_ids[token_index] = label_index + return label_index + + def make_first(self, gram, name): + original_firsts = self.first[name] + firsts = dict() + for label in original_firsts: + firsts[self.make_label(gram, label)] = None + return firsts + + def add_first_sets(self): + for name, dfa in self.dfas.iteritems(): + if name not in self.first: + self.get_first(name, dfa) + + def get_first(self, name, dfa): + self.first[name] = None + state = dfa[0] + all_labels = set() + overlap_check = {} + for label, sub_state in state.arcs.iteritems(): + if label in self.dfas: + if label in self.first: + new_labels = self.first[label] + if new_labels is None: + raise PgenError("recursion in rule: %r" % (name,)) + else: + new_labels = self.get_first(label, self.dfas[label]) + all_labels.update(new_labels) + overlap_check[label] = new_labels + else: + all_labels.add(label) + overlap_check[label] = set((label,)) + inverse = {} + for label, their_first in overlap_check.iteritems(): + for sub_label in their_first: + if sub_label in inverse: + raise PgenError("ambiguous symbol with label %s" + % (label,)) + inverse[sub_label] = label + self.first[name] = all_labels + return all_labels + + def expect(self, token_type, value=None): + if token_type != self.type: + expected = token.tok_name[token_type] + got = token.tok_name[self.type] + raise PgenError("expected token %s but got %s" % (expected, got), + self.location) + current_value = self.value + if value is not None: + if value != current_value: + msg = "expected %r but got %r" % (value, current_value) + raise PgenError(msg,self.location) + self.advance_token() + return current_value + + def test_token(self, token_type, value): + if self.type == token_type and self.value == value: + return True + return False + + def advance_token(self): + data = self.token_stream.next() + # Ignore comments and non-logical newlines. + while data[0] in (tokenize.NL, tokenize.COMMENT): + data = self.token_stream.next() + self.type, self.value = data[:2] + self.location = data[2:] + + def parse(self): + self.advance_token() + while self.type != token.ENDMARKER: + # Skip over whitespace. + while self.type == token.NEWLINE: + self.advance_token() + name, start_state, end_state = self.parse_rule() + dfa = nfa_to_dfa(start_state, end_state) + simplify_dfa(dfa) + self.dfas[name] = dfa + if self.start_symbol is None: + self.start_symbol = name + + def parse_rule(self): + # RULE: NAME ':' ALTERNATIVES + name = self.expect(token.NAME) + self.expect(token.OP, ":") + start_state, end_state = self.parse_alternatives() + self.expect(token.NEWLINE) + return name, start_state, end_state + + def parse_alternatives(self): + # ALTERNATIVES: ITEMS ('|' ITEMS)* + first_state, end_state = self.parse_items() + if self.test_token(token.OP, "|"): + # Link all alternatives into a enclosing set of states. + enclosing_start_state = NFA() + enclosing_end_state = NFA() + enclosing_start_state.arc(first_state) + end_state.arc(enclosing_end_state) + while self.test_token(token.OP, "|"): + self.advance_token() + sub_start_state, sub_end_state = self.parse_items() + enclosing_start_state.arc(sub_start_state) + sub_end_state.arc(enclosing_end_state) + first_state = enclosing_start_state + end_state = enclosing_end_state + return first_state, end_state + + def parse_items(self): + # ITEMS: ITEM+ + first_state, end_state = self.parse_item() + while self.type in (token.STRING, token.NAME) or \ + self.test_token(token.OP, "(") or \ + self.test_token(token.OP, "["): + sub_first_state, new_end_state = self.parse_item() + end_state.arc(sub_first_state) + end_state = new_end_state + return first_state, end_state + + def parse_item(self): + # ITEM: '[' ALTERNATIVES ']' | ATOM ['+' | '*'] + if self.test_token(token.OP, "["): + self.advance_token() + start_state, end_state = self.parse_alternatives() + self.expect(token.OP, "]") + # Bypass the rule if this is optional. + start_state.arc(end_state) + return start_state, end_state + else: + atom_state, next_state = self.parse_atom() + # Check for a repeater. + if self.type == token.OP and self.value in ("+", "*"): + next_state.arc(atom_state) + repeat = self.value + self.advance_token() + if repeat == "*": + # Optionally repeated + return atom_state, atom_state + else: + # Required + return atom_state, next_state + else: + return atom_state, next_state + + def parse_atom(self): + # ATOM: '(' ALTERNATIVES ')' | NAME | STRING + if self.test_token(token.OP, "("): + self.advance_token() + rule = self.parse_alternatives() + self.expect(token.OP, ")") + return rule + elif self.type in (token.NAME, token.STRING): + atom_state = NFA() + next_state = NFA() + atom_state.arc(next_state, self.value) + self.advance_token() + return atom_state, next_state + else: + invalid = token.tok_name[self.type] + raise PgenError("unexpected token: %s" % (invalid,), + self.location) diff -r 28e2996df412 -r 051349b537b2 pyparser/parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/parser.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,287 @@ +""" +A CPython inspired RPython parser. +""" + + +class Grammar(object): + """ + Base Grammar object. + + Pass this to ParserGenerator.build_grammar to fill it with useful values for + the Parser. + """ + + def __init__(self): + self.symbol_ids = {} + self.symbol_names = {} + self.symbol_to_label = {} + self.keyword_ids = {} + self.dfas = [] + self.labels = [0] + self.token_ids = {} + self.start = -1 + + def shared_copy(self): + new = self.__class__() + new.symbol_ids = self.symbol_ids + new.symbols_names = self.symbol_names + new.keyword_ids = self.keyword_ids + new.dfas = self.dfas + new.labels = self.labels + new.token_ids = self.token_ids + return new + + def _freeze_(self): + # Remove some attributes not used in parsing. + try: + del self.symbol_to_label + del self.symbol_names + del self.symbol_ids + except AttributeError: + pass + return True + + +class Node(object): + + __slots__ = ("type", ) + + def __init__(self, type): + self.type = type + + def __eq__(self, other): + raise NotImplementedError("abstract base class") + + def __ne__(self, other): + return not self == other + + def get_value(self): + return None + + def get_child(self, i): + raise NotImplementedError("abstract base class") + + def num_children(self): + return 0 + + def append_child(self, child): + raise NotImplementedError("abstract base class") + + def get_lineno(self): + raise NotImplementedError("abstract base class") + + def get_column(self): + raise NotImplementedError("abstract base class") + + +class Terminal(Node): + __slots__ = ("value", "lineno", "column") + def __init__(self, type, value, lineno, column): + Node.__init__(self, type) + self.value = value + self.lineno = lineno + self.column = column + + def __repr__(self): + return "Terminal(type=%s, value=%r)" % (self.type, self.value) + + def __eq__(self, other): + # For tests. + return (type(self) == type(other) and + self.type == other.type and + self.value == other.value) + + def get_value(self): + return self.value + + def get_lineno(self): + return self.lineno + + def get_column(self): + return self.column + + +class AbstractNonterminal(Node): + __slots__ = () + + def get_lineno(self): + return self.get_child(0).get_lineno() + + def get_column(self): + return self.get_child(0).get_column() + + def __eq__(self, other): + # For tests. + # grumble, annoying + if not isinstance(other, AbstractNonterminal): + return False + if self.type != other.type: + return False + if self.num_children() != other.num_children(): + return False + for i in range(self.num_children()): + if self.get_child(i) != other.get_child(i): + return False + return True + + +class Nonterminal(AbstractNonterminal): + __slots__ = ("_children", ) + def __init__(self, type, children): + Node.__init__(self, type) + self._children = children + + def __repr__(self): + return "Nonterminal(type=%s, children=%r)" % (self.type, self._children) + + def get_child(self, i): + return self._children[i] + + def num_children(self): + return len(self._children) + + def append_child(self, child): + self._children.append(child) + + +class Nonterminal1(AbstractNonterminal): + __slots__ = ("_child", ) + def __init__(self, type, child): + Node.__init__(self, type) + self._child = child + + def __repr__(self): + return "Nonterminal(type=%s, children=[%r])" % (self.type, self._child) + + def get_child(self, i): + assert i == 0 or i == -1 + return self._child + + def num_children(self): + return 1 + + def append_child(self, child): + assert 0, "should be unreachable" + + + +class ParseError(Exception): + + def __init__(self, msg, token_type, value, lineno, column, line, + expected=-1): + self.msg = msg + self.token_type = token_type + self.value = value + self.lineno = lineno + self.column = column + self.line = line + self.expected = expected + + def __str__(self): + return "ParserError(%s, %r)" % (self.token_type, self.value) + + +class Parser(object): + + def __init__(self, grammar): + self.grammar = grammar + self.root = None + self.stack = None + + def prepare(self, start=-1): + """Setup the parser for parsing. + + Takes the starting symbol as an argument. + """ + if start == -1: + start = self.grammar.start + self.root = None + current_node = Nonterminal(start, []) + self.stack = [] + self.stack.append((self.grammar.dfas[start - 256], 0, current_node)) + + def add_token(self, token_type, value, lineno, column, line): + label_index = self.classify(token_type, value, lineno, column, line) + sym_id = 0 # for the annotator + while True: + dfa, state_index, node = self.stack[-1] + states, first = dfa + arcs, is_accepting = states[state_index] + for i, next_state in arcs: + sym_id = self.grammar.labels[i] + if label_index == i: + # We matched a non-terminal. + self.shift(next_state, token_type, value, lineno, column) + state = states[next_state] + # While the only possible action is to accept, pop nodes off + # the stack. + while state[1] and not state[0]: + self.pop() + if not self.stack: + # Parsing is done. + return True + dfa, state_index, node = self.stack[-1] + state = dfa[0][state_index] + return False + elif sym_id >= 256: + sub_node_dfa = self.grammar.dfas[sym_id - 256] + # Check if this token can start a child node. + if label_index in sub_node_dfa[1]: + self.push(sub_node_dfa, next_state, sym_id, lineno, + column) + break + else: + # We failed to find any arcs to another state, so unless this + # state is accepting, it's invalid input. + if is_accepting: + self.pop() + if not self.stack: + raise ParseError("too much input", token_type, value, + lineno, column, line) + else: + # If only one possible input would satisfy, attach it to the + # error. + if len(arcs) == 1: + expected = sym_id + else: + expected = -1 + raise ParseError("bad input", token_type, value, lineno, + column, line, expected) + + def classify(self, token_type, value, lineno, column, line): + """Find the label for a token.""" + if token_type == self.grammar.KEYWORD_TOKEN: + label_index = self.grammar.keyword_ids.get(value, -1) + if label_index != -1: + return label_index + label_index = self.grammar.token_ids.get(token_type, -1) + if label_index == -1: + raise ParseError("invalid token", token_type, value, lineno, column, + line) + return label_index + + def shift(self, next_state, token_type, value, lineno, column): + """Shift a non-terminal and prepare for the next state.""" + dfa, state, node = self.stack[-1] + new_node = Terminal(token_type, value, lineno, column) + node.append_child(new_node) + self.stack[-1] = (dfa, next_state, node) + + def push(self, next_dfa, next_state, node_type, lineno, column): + """Push a terminal and adjust the current state.""" + dfa, state, node = self.stack[-1] + new_node = Nonterminal(node_type, []) + self.stack[-1] = (dfa, next_state, node) + self.stack.append((next_dfa, 0, new_node)) + + def pop(self): + """Pop an entry off the stack and make its node a child of the last.""" + dfa, state, node = self.stack.pop() + if self.stack: + # we are now done with node, so we can store it more efficiently if + # it has just one child + if node.num_children() == 1: + node = Nonterminal1(node.type, node.get_child(0)) + self.stack[-1][2].append_child(node) + else: + self.root = node diff -r 28e2996df412 -r 051349b537b2 pyparser/pygram.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/pygram.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,43 @@ +import os +from pyparser import parser, pytoken, metaparser + +class PythonGrammar(parser.Grammar): + + KEYWORD_TOKEN = pytoken.python_tokens["NAME"] + TOKENS = pytoken.python_tokens + OPERATOR_MAP = pytoken.python_opmap + +def _get_python_grammar(): + here = os.path.dirname(__file__) + fp = open(os.path.join(here, "data", "Grammar2.7")) + try: + gram_source = fp.read() + finally: + fp.close() + pgen = metaparser.ParserGenerator(gram_source) + return pgen.build_grammar(PythonGrammar) + + +python_grammar = _get_python_grammar() +python_grammar_no_print = python_grammar.shared_copy() +python_grammar_no_print.keyword_ids = python_grammar_no_print.keyword_ids.copy() +del python_grammar_no_print.keyword_ids["print"] + +class _Tokens(object): + pass + +for tok_name, idx in pytoken.python_tokens.iteritems(): + setattr(_Tokens, tok_name, idx) +tokens = _Tokens() + +class _Symbols(object): + pass +rev_lookup = {} +for sym_name, idx in python_grammar.symbol_ids.iteritems(): + setattr(_Symbols, sym_name, idx) + rev_lookup[idx] = sym_name +syms = _Symbols() +syms._rev_lookup = rev_lookup # for debugging +syms.sym_name = rev_lookup # for symbol module compatibility + +del _get_python_grammar, _Tokens, tok_name, sym_name, idx diff -r 28e2996df412 -r 051349b537b2 pyparser/pylexer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/pylexer.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,240 @@ +# Used by genpytokenize.py to generate the parser in pytokenize.py +from pyparser.automata import DFA, DEFAULT + +class EMPTY: pass + +def newArcPair (states, transitionLabel): + s1Index = len(states) + s2Index = s1Index + 1 + states.append([(transitionLabel, s2Index)]) + states.append([]) + return s1Index, s2Index + +# ______________________________________________________________________ + +def chain (states, *stateIndexPairs): + if len(stateIndexPairs) > 1: + start, lastFinish = stateIndexPairs[0] + for nStart, nFinish in stateIndexPairs[1:]: + states[lastFinish].append((EMPTY, nStart)) + lastFinish = nFinish + return start, nFinish + else: + return stateIndexPairs[0] + + +# ______________________________________________________________________ + +def chainStr (states, str): + return chain(states, *map(lambda x : newArcPair(states, x), str)) + +# ______________________________________________________________________ + +def notChainStr (states, str): + """XXX I'm not sure this is how it should be done, but I'm going to + try it anyway. Note that for this case, I require only single character + arcs, since I would have to basically invert all accepting states and + non-accepting states of any sub-NFA's. + """ + assert len(str) > 0 + arcs = map(lambda x : newArcPair(states, x), str) + finish = len(states) + states.append([]) + start, lastFinish = arcs[0] + states[start].append((EMPTY, finish)) + for crntStart, crntFinish in arcs[1:]: + states[lastFinish].append((EMPTY, crntStart)) + states[crntStart].append((EMPTY, finish)) + return start, finish + +# ______________________________________________________________________ + +def group (states, *stateIndexPairs): + if len(stateIndexPairs) > 1: + start = len(states) + finish = start + 1 + startList = [] + states.append(startList) + states.append([]) + for eStart, eFinish in stateIndexPairs: + startList.append((EMPTY, eStart)) + states[eFinish].append((EMPTY, finish)) + return start, finish + else: + return stateIndexPairs[0] + +# ______________________________________________________________________ + +def groupStr (states, str): + return group(states, *map(lambda x : newArcPair(states, x), str)) + +# ______________________________________________________________________ + +def notGroup (states, *stateIndexPairs): + """Like group, but will add a DEFAULT transition to a new end state, + causing anything in the group to not match by going to a dead state. + XXX I think this is right... + """ + start, dead = group(states, *stateIndexPairs) + finish = len(states) + states.append([]) + states[start].append((DEFAULT, finish)) + return start, finish + +# ______________________________________________________________________ + +def notGroupStr (states, str): + return notGroup(states, *map(lambda x : newArcPair(states, x), str)) +# ______________________________________________________________________ + +def any (states, *stateIndexPairs): + start, finish = group(states, *stateIndexPairs) + states[finish].append((EMPTY, start)) + return start, start + +# ______________________________________________________________________ + +def maybe (states, *stateIndexPairs): + start, finish = group(states, *stateIndexPairs) + states[start].append((EMPTY, finish)) + return start, finish + +# ______________________________________________________________________ + +def atleastonce (states, *stateIndexPairs): + start, finish = group(states, *stateIndexPairs) + states[finish].append((EMPTY, start)) + return start, finish + +# ______________________________________________________________________ + +def closure (states, start, result = 0L): + if None == result: + result = 0L + if 0 == (result & (1L << start)): + result |= (1L << start) + for label, arrow in states[start]: + if label == EMPTY: + result |= closure(states, arrow, result) + return result + +# ______________________________________________________________________ + +def nfaToDfa (states, start, finish): + tempStates = [] + startClosure = closure(states, start) + crntTempState = [startClosure, [], 0 != (startClosure & (1L << finish))] + tempStates.append(crntTempState) + index = 0 + while index < len(tempStates): + crntTempState = tempStates[index] + crntClosure, crntArcs, crntAccept = crntTempState + for index2 in range(0, len(states)): + if 0 != (crntClosure & (1L << index2)): + for label, nfaArrow in states[index2]: + if label == EMPTY: + continue + foundTempArc = False + for tempArc in crntArcs: + if tempArc[0] == label: + foundTempArc = True + break + if not foundTempArc: + tempArc = [label, -1, 0L] + crntArcs.append(tempArc) + tempArc[2] = closure(states, nfaArrow, tempArc[2]) + for arcIndex in range(0, len(crntArcs)): + label, arrow, targetStates = crntArcs[arcIndex] + targetFound = False + arrow = 0 + for destTempState in tempStates: + if destTempState[0] == targetStates: + targetFound = True + break + arrow += 1 + if not targetFound: + assert arrow == len(tempStates) + newState = [targetStates, [], 0 != (targetStates & + (1L << finish))] + tempStates.append(newState) + crntArcs[arcIndex][1] = arrow + index += 1 + tempStates = simplifyTempDfa(tempStates) + states = finalizeTempDfa(tempStates) + return states + +# ______________________________________________________________________ + +def sameState (s1, s2): + """sameState(s1, s2) + Note: + state := [ nfaclosure : Long, [ arc ], accept : Boolean ] + arc := [ label, arrow : Int, nfaClosure : Long ] + """ + if (len(s1[1]) != len(s2[1])) or (s1[2] != s2[2]): + return False + for arcIndex in range(0, len(s1[1])): + arc1 = s1[1][arcIndex] + arc2 = s2[1][arcIndex] + if arc1[:-1] != arc2[:-1]: + return False + return True + +# ______________________________________________________________________ + +def simplifyTempDfa (tempStates): + """simplifyTempDfa (tempStates) + """ + changes = True + deletedStates = [] + while changes: + changes = False + for i in range(1, len(tempStates)): + if i in deletedStates: + continue + for j in range(0, i): + if j in deletedStates: + continue + if sameState(tempStates[i], tempStates[j]): + deletedStates.append(i) + for k in range(0, len(tempStates)): + if k in deletedStates: + continue + for arc in tempStates[k][1]: + if arc[1] == i: + arc[1] = j + changes = True + break + for stateIndex in deletedStates: + tempStates[stateIndex] = None + return tempStates +# ______________________________________________________________________ + +def finalizeTempDfa (tempStates): + """finalizeTempDfa (tempStates) + + Input domain: + tempState := [ nfaClosure : Long, [ tempArc ], accept : Boolean ] + tempArc := [ label, arrow, nfaClosure ] + + Output domain: + state := [ arcMap, accept : Boolean ] + """ + states = [] + accepts = [] + stateMap = {} + tempIndex = 0 + for tempIndex in range(0, len(tempStates)): + tempState = tempStates[tempIndex] + if None != tempState: + stateMap[tempIndex] = len(states) + states.append({}) + accepts.append(tempState[2]) + for tempIndex in stateMap.keys(): + stateBitset, tempArcs, accepting = tempStates[tempIndex] + newIndex = stateMap[tempIndex] + arcMap = states[newIndex] + for tempArc in tempArcs: + arcMap[tempArc[0]] = stateMap[tempArc[1]] + return states, accepts + diff -r 28e2996df412 -r 051349b537b2 pyparser/pyparse.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/pyparse.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,200 @@ +from pyparser import parser, pytokenizer, pygram, error +from pyparser import consts + +def recode_to_utf8(bytes, encoding): + text = bytes.decode(encoding) + if not isinstance(text, unicode): + raise error.SyntaxError("codec did not return a unicode object") + recoded = text.encode("utf-8") + return recoded + +def _normalize_encoding(encoding): + """returns normalized name for + + see dist/src/Parser/tokenizer.c 'get_normal_name()' + for implementation details / reference + + NOTE: for now, parser.suite() raises a MemoryError when + a bad encoding is used. (SF bug #979739) + """ + if encoding is None: + return None + # lower() + '_' / '-' conversion + encoding = encoding.replace('_', '-').lower() + if encoding == 'utf-8' or encoding.startswith('utf-8-'): + return 'utf-8' + for variant in ['latin-1', 'iso-latin-1', 'iso-8859-1']: + if (encoding == variant or + encoding.startswith(variant + '-')): + return 'iso-8859-1' + return encoding + +def _check_for_encoding(s): + eol = s.find('\n') + if eol < 0: + return _check_line_for_encoding(s)[0] + enc, again = _check_line_for_encoding(s[:eol]) + if enc or not again: + return enc + eol2 = s.find('\n', eol + 1) + if eol2 < 0: + return _check_line_for_encoding(s[eol + 1:])[0] + return _check_line_for_encoding(s[eol + 1:eol2])[0] + + +def _check_line_for_encoding(line): + """returns the declared encoding or None""" + i = 0 + for i in range(len(line)): + if line[i] == '#': + break + if line[i] not in ' \t\014': + return None, False # Not a comment, don't read the second line. + return pytokenizer.match_encoding_declaration(line[i:]), True + + +class CompileInfo(object): + """Stores information about the source being compiled. + + * filename: The filename of the source. + * mode: The parse mode to use. ('exec', 'eval', or 'single') + * flags: Parser and compiler flags. + * encoding: The source encoding. + """ + + def __init__(self, filename, mode="exec", flags=0): + self.filename = filename + self.mode = mode + self.encoding = None + self.flags = flags + + +_targets = { +'eval' : pygram.syms.eval_input, +'single' : pygram.syms.single_input, +'exec' : pygram.syms.file_input, +} + +class PythonParser(parser.Parser): + + def __init__(self, grammar=pygram.python_grammar): + parser.Parser.__init__(self, grammar) + + def parse_source(self, textsrc, compile_info): + """Main entry point for parsing Python source. + + Everything from decoding the source to tokenizing to building the parse + tree is handled here. + """ + # Detect source encoding. + enc = None + if textsrc.startswith("\xEF\xBB\xBF"): + textsrc = textsrc[3:] + enc = 'utf-8' + # If an encoding is explicitly given check that it is utf-8. + decl_enc = _check_for_encoding(textsrc) + if decl_enc and decl_enc != "utf-8": + raise error.SyntaxError("UTF-8 BOM with %s coding cookie" % decl_enc, + filename=compile_info.filename) + elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8: + enc = 'utf-8' + if _check_for_encoding(textsrc) is not None: + raise error.SyntaxError("coding declaration in unicode string", + filename=compile_info.filename) + else: + enc = _normalize_encoding(_check_for_encoding(textsrc)) + if enc is not None and enc not in ('utf-8', 'iso-8859-1'): + try: + textsrc = recode_to_utf8(textsrc, enc) + except LookupError as e: + # if the codec is not found, LookupError is raised. + raise error.SyntaxError("Unknown encoding: %s" % enc, + filename=compile_info.filename) + # Transform unicode errors into SyntaxError + except UnicodeDecodeError as e: + message = str(e) + raise error.SyntaxError(message) + + flags = compile_info.flags + + # The tokenizer is very picky about how it wants its input. + source_lines = textsrc.splitlines(True) + if source_lines and not source_lines[-1].endswith("\n"): + source_lines[-1] += '\n' + if textsrc and textsrc[-1] == "\n": + flags &= ~consts.PyCF_DONT_IMPLY_DEDENT + + self.prepare(_targets[compile_info.mode]) + tp = 0 + try: + try: + # Note: we no longer pass the CO_FUTURE_* to the tokenizer, + # which is expected to work independently of them. It's + # certainly the case for all futures in Python <= 2.7. + tokens = pytokenizer.generate_tokens(source_lines, flags) + + self.grammar = pygram.python_grammar + + for tp, value, lineno, column, line in tokens: + if self.add_token(tp, value, lineno, column, line): + break + except error.TokenError as e: + e.filename = compile_info.filename + raise + except parser.ParseError as e: + # Catch parse errors, pretty them up and reraise them as a + # SyntaxError. + new_err = error.IndentationError + if tp == pygram.tokens.INDENT: + msg = "unexpected indent" + elif e.expected == pygram.tokens.INDENT: + msg = "expected an indented block" + else: + new_err = error.SyntaxError + msg = "invalid syntax" + raise new_err(msg, e.lineno, e.column, e.line, + compile_info.filename) + else: + tree = self.root + finally: + # Avoid hanging onto the tree. + self.root = None + if enc is not None: + compile_info.encoding = enc + return tree + +def parse(filename): + """returns the parsed contents of """ + info = CompileInfo(filename) + f = open(filename) + try: + return PythonParser().parse_source(f.read(), info) + finally: + f.close() + +def suite(text): + """returns the parsed form of the given program """ + info = CompileInfo("") + return PythonParser().parse_source(text, info) + +def expr(text): + """returns the parsed form of the given expression """ + info = CompileInfo("", "single") + return PythonParser().parse_source(text, info) + +def st2tuple(tree, line_info=True, col_info=False): + """returns in tuple form for the compiler package""" + if isinstance(tree, parser.AbstractNonterminal): + l = [tree.type] + for i in range(0, tree.num_children()): + l.append(st2tuple(tree.get_child(i))) + return tuple(l) + elif isinstance(tree, parser.Terminal): + l = [tree.type, tree.value] + if line_info: + l.append(tree.get_lineno()) + if col_info: + l.append(tree.get_column()) + return tuple(l) + else: + raise TypeError, tree diff -r 28e2996df412 -r 051349b537b2 pyparser/pytoken.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/pytoken.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,71 @@ +"""Python token definitions.""" + +python_tokens = {} +python_opmap = {} + +def _add_tok(name, *values): + index = len(python_tokens) + assert index < 256 + python_tokens[name] = index + for value in values: + python_opmap[value] = index + +_add_tok('ENDMARKER') +_add_tok('NAME') +_add_tok('NUMBER') +_add_tok('STRING') +_add_tok('NEWLINE') +_add_tok('INDENT') +_add_tok('DEDENT') +_add_tok('LPAR', "(") +_add_tok('RPAR', ")") +_add_tok('LSQB', "[") +_add_tok('RSQB', "]") +_add_tok('COLON', ":") +_add_tok('COMMA', "," ) +_add_tok('SEMI', ";" ) +_add_tok('PLUS', "+" ) +_add_tok('MINUS', "-" ) +_add_tok('STAR', "*" ) +_add_tok('SLASH', "/" ) +_add_tok('VBAR', "|" ) +_add_tok('AMPER', "&" ) +_add_tok('LESS', "<" ) +_add_tok('GREATER', ">" ) +_add_tok('EQUAL', "=" ) +_add_tok('DOT', "." ) +_add_tok('PERCENT', "%" ) +_add_tok('BACKQUOTE', "`" ) +_add_tok('LBRACE', "{" ) +_add_tok('RBRACE', "}" ) +_add_tok('EQEQUAL', "==" ) +_add_tok('NOTEQUAL', "!=", "<>" ) +_add_tok('LESSEQUAL', "<=" ) +_add_tok('GREATEREQUAL', ">=" ) +_add_tok('TILDE', "~" ) +_add_tok('CIRCUMFLEX', "^" ) +_add_tok('LEFTSHIFT', "<<" ) +_add_tok('RIGHTSHIFT', ">>" ) +_add_tok('DOUBLESTAR', "**" ) +_add_tok('PLUSEQUAL', "+=" ) +_add_tok('MINEQUAL', "-=" ) +_add_tok('STAREQUAL', "*=" ) +_add_tok('SLASHEQUAL', "/=" ) +_add_tok('PERCENTEQUAL', "%=" ) +_add_tok('AMPEREQUAL', "&=" ) +_add_tok('VBAREQUAL', "|=" ) +_add_tok('CIRCUMFLEXEQUAL', "^=" ) +_add_tok('LEFTSHIFTEQUAL', "<<=" ) +_add_tok('RIGHTSHIFTEQUAL', ">>=" ) +_add_tok('DOUBLESTAREQUAL', "**=" ) +_add_tok('DOUBLESLASH', "//" ) +_add_tok('DOUBLESLASHEQUAL',"//=" ) +_add_tok('AT', "@" ) +_add_tok('OP') +_add_tok('ERRORTOKEN') + +# extra PyPy-specific tokens +_add_tok("COMMENT") +_add_tok("NL") + +del _add_tok diff -r 28e2996df412 -r 051349b537b2 pyparser/pytokenize.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/pytokenize.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,375 @@ +# ______________________________________________________________________ +"""Module pytokenize + +THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED +TO BE ANNOTABLE (Mainly made lists homogeneous) + +This is a modified version of Ka-Ping Yee's tokenize module found in the +Python standard library. + +The primary modification is the removal of the tokenizer's dependence on the +standard Python regular expression module, which is written in C. The regular +expressions have been replaced with hand built DFA's using the +basil.util.automata module. + +$Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $ +""" +# ______________________________________________________________________ + +from pyparser import automata + +__all__ = [ "tokenize" ] + +# ______________________________________________________________________ +# Automatically generated DFA's + +accepts = [True, True, True, True, True, True, True, True, + True, True, False, True, True, True, True, False, + False, False, True, False, False, True, False, + False, True, False, True, False, True, False, + False, True, False, False, True, True, True, + False, False, True, False, False, False, True] +states = [ + # 0 + {'\t': 0, '\n': 13, '\x0c': 0, + '\r': 14, ' ': 0, '!': 10, '"': 16, + '#': 18, '%': 12, '&': 12, "'": 15, + '(': 13, ')': 13, '*': 7, '+': 12, + ',': 13, '-': 12, '.': 6, '/': 11, + '0': 4, '1': 5, '2': 5, '3': 5, + '4': 5, '5': 5, '6': 5, '7': 5, + '8': 5, '9': 5, ':': 13, ';': 13, + '<': 9, '=': 12, '>': 8, '@': 13, + 'A': 1, 'B': 2, 'C': 1, 'D': 1, + 'E': 1, 'F': 1, 'G': 1, 'H': 1, + 'I': 1, 'J': 1, 'K': 1, 'L': 1, + 'M': 1, 'N': 1, 'O': 1, 'P': 1, + 'Q': 1, 'R': 3, 'S': 1, 'T': 1, + 'U': 2, 'V': 1, 'W': 1, 'X': 1, + 'Y': 1, 'Z': 1, '[': 13, '\\': 17, + ']': 13, '^': 12, '_': 1, '`': 13, + 'a': 1, 'b': 2, 'c': 1, 'd': 1, + 'e': 1, 'f': 1, 'g': 1, 'h': 1, + 'i': 1, 'j': 1, 'k': 1, 'l': 1, + 'm': 1, 'n': 1, 'o': 1, 'p': 1, + 'q': 1, 'r': 3, 's': 1, 't': 1, + 'u': 2, 'v': 1, 'w': 1, 'x': 1, + 'y': 1, 'z': 1, '{': 13, '|': 12, + '}': 13, '~': 13}, + # 1 + {'0': 1, '1': 1, '2': 1, '3': 1, + '4': 1, '5': 1, '6': 1, '7': 1, + '8': 1, '9': 1, 'A': 1, 'B': 1, + 'C': 1, 'D': 1, 'E': 1, 'F': 1, + 'G': 1, 'H': 1, 'I': 1, 'J': 1, + 'K': 1, 'L': 1, 'M': 1, 'N': 1, + 'O': 1, 'P': 1, 'Q': 1, 'R': 1, + 'S': 1, 'T': 1, 'U': 1, 'V': 1, + 'W': 1, 'X': 1, 'Y': 1, 'Z': 1, + '_': 1, 'a': 1, 'b': 1, 'c': 1, + 'd': 1, 'e': 1, 'f': 1, 'g': 1, + 'h': 1, 'i': 1, 'j': 1, 'k': 1, + 'l': 1, 'm': 1, 'n': 1, 'o': 1, + 'p': 1, 'q': 1, 'r': 1, 's': 1, + 't': 1, 'u': 1, 'v': 1, 'w': 1, + 'x': 1, 'y': 1, 'z': 1}, + # 2 + {'"': 16, "'": 15, '0': 1, '1': 1, + '2': 1, '3': 1, '4': 1, '5': 1, + '6': 1, '7': 1, '8': 1, '9': 1, + 'A': 1, 'B': 1, 'C': 1, 'D': 1, + 'E': 1, 'F': 1, 'G': 1, 'H': 1, + 'I': 1, 'J': 1, 'K': 1, 'L': 1, + 'M': 1, 'N': 1, 'O': 1, 'P': 1, + 'Q': 1, 'R': 3, 'S': 1, 'T': 1, + 'U': 1, 'V': 1, 'W': 1, 'X': 1, + 'Y': 1, 'Z': 1, '_': 1, 'a': 1, + 'b': 1, 'c': 1, 'd': 1, 'e': 1, + 'f': 1, 'g': 1, 'h': 1, 'i': 1, + 'j': 1, 'k': 1, 'l': 1, 'm': 1, + 'n': 1, 'o': 1, 'p': 1, 'q': 1, + 'r': 3, 's': 1, 't': 1, 'u': 1, + 'v': 1, 'w': 1, 'x': 1, 'y': 1, + 'z': 1}, + # 3 + {'"': 16, "'": 15, '0': 1, '1': 1, + '2': 1, '3': 1, '4': 1, '5': 1, + '6': 1, '7': 1, '8': 1, '9': 1, + 'A': 1, 'B': 1, 'C': 1, 'D': 1, + 'E': 1, 'F': 1, 'G': 1, 'H': 1, + 'I': 1, 'J': 1, 'K': 1, 'L': 1, + 'M': 1, 'N': 1, 'O': 1, 'P': 1, + 'Q': 1, 'R': 1, 'S': 1, 'T': 1, + 'U': 1, 'V': 1, 'W': 1, 'X': 1, + 'Y': 1, 'Z': 1, '_': 1, 'a': 1, + 'b': 1, 'c': 1, 'd': 1, 'e': 1, + 'f': 1, 'g': 1, 'h': 1, 'i': 1, + 'j': 1, 'k': 1, 'l': 1, 'm': 1, + 'n': 1, 'o': 1, 'p': 1, 'q': 1, + 'r': 1, 's': 1, 't': 1, 'u': 1, + 'v': 1, 'w': 1, 'x': 1, 'y': 1, + 'z': 1}, + # 4 + {'.': 24, '0': 21, '1': 21, '2': 21, + '3': 21, '4': 21, '5': 21, '6': 21, + '7': 21, '8': 23, '9': 23, 'B': 22, + 'E': 25, 'J': 13, 'L': 13, 'O': 20, + 'X': 19, 'b': 22, 'e': 25, 'j': 13, + 'l': 13, 'o': 20, 'x': 19}, + # 5 + {'.': 24, '0': 5, '1': 5, '2': 5, + '3': 5, '4': 5, '5': 5, '6': 5, + '7': 5, '8': 5, '9': 5, 'E': 25, + 'J': 13, 'L': 13, 'e': 25, 'j': 13, + 'l': 13}, + # 6 + {'0': 26, '1': 26, '2': 26, '3': 26, + '4': 26, '5': 26, '6': 26, '7': 26, + '8': 26, '9': 26}, + # 7 + {'*': 12, '=': 13}, + # 8 + {'=': 13, '>': 12}, + # 9 + {'<': 12, '=': 13, '>': 13}, + # 10 + {'=': 13}, + # 11 + {'/': 12, '=': 13}, + # 12 + {'=': 13}, + # 13 + {}, + # 14 + {'\n': 13}, + # 15 + {automata.DEFAULT: 30, '\n': 27, + '\r': 27, "'": 28, '\\': 29}, + # 16 + {automata.DEFAULT: 33, '\n': 27, + '\r': 27, '"': 31, '\\': 32}, + # 17 + {'\n': 13, '\r': 14}, + # 18 + {automata.DEFAULT: 18, '\n': 27, '\r': 27}, + # 19 + {'0': 34, '1': 34, '2': 34, '3': 34, + '4': 34, '5': 34, '6': 34, '7': 34, + '8': 34, '9': 34, 'A': 34, 'B': 34, + 'C': 34, 'D': 34, 'E': 34, 'F': 34, + 'a': 34, 'b': 34, 'c': 34, 'd': 34, + 'e': 34, 'f': 34}, + # 20 + {'0': 35, '1': 35, '2': 35, '3': 35, + '4': 35, '5': 35, '6': 35, '7': 35}, + # 21 + {'.': 24, '0': 21, '1': 21, '2': 21, + '3': 21, '4': 21, '5': 21, '6': 21, + '7': 21, '8': 23, '9': 23, 'E': 25, + 'J': 13, 'L': 13, 'e': 25, 'j': 13, + 'l': 13}, + # 22 + {'0': 36, '1': 36}, + # 23 + {'.': 24, '0': 23, '1': 23, '2': 23, + '3': 23, '4': 23, '5': 23, '6': 23, + '7': 23, '8': 23, '9': 23, 'E': 25, + 'J': 13, 'e': 25, 'j': 13}, + # 24 + {'0': 24, '1': 24, '2': 24, '3': 24, + '4': 24, '5': 24, '6': 24, '7': 24, + '8': 24, '9': 24, 'E': 37, 'J': 13, + 'e': 37, 'j': 13}, + # 25 + {'+': 38, '-': 38, '0': 39, '1': 39, + '2': 39, '3': 39, '4': 39, '5': 39, + '6': 39, '7': 39, '8': 39, '9': 39}, + # 26 + {'0': 26, '1': 26, '2': 26, '3': 26, + '4': 26, '5': 26, '6': 26, '7': 26, + '8': 26, '9': 26, 'E': 37, 'J': 13, + 'e': 37, 'j': 13}, + # 27 + {}, + # 28 + {"'": 13}, + # 29 + {automata.DEFAULT: 40, '\n': 13, '\r': 14}, + # 30 + {automata.DEFAULT: 30, '\n': 27, + '\r': 27, "'": 13, '\\': 29}, + # 31 + {'"': 13}, + # 32 + {automata.DEFAULT: 41, '\n': 13, '\r': 14}, + # 33 + {automata.DEFAULT: 33, '\n': 27, + '\r': 27, '"': 13, '\\': 32}, + # 34 + {'0': 34, '1': 34, '2': 34, '3': 34, + '4': 34, '5': 34, '6': 34, '7': 34, + '8': 34, '9': 34, 'A': 34, 'B': 34, + 'C': 34, 'D': 34, 'E': 34, 'F': 34, + 'L': 13, 'a': 34, 'b': 34, 'c': 34, + 'd': 34, 'e': 34, 'f': 34, 'l': 13}, + # 35 + {'0': 35, '1': 35, '2': 35, '3': 35, + '4': 35, '5': 35, '6': 35, '7': 35, + 'L': 13, 'l': 13}, + # 36 + {'0': 36, '1': 36, 'L': 13, 'l': 13}, + # 37 + {'+': 42, '-': 42, '0': 43, '1': 43, + '2': 43, '3': 43, '4': 43, '5': 43, + '6': 43, '7': 43, '8': 43, '9': 43}, + # 38 + {'0': 39, '1': 39, '2': 39, '3': 39, + '4': 39, '5': 39, '6': 39, '7': 39, + '8': 39, '9': 39}, + # 39 + {'0': 39, '1': 39, '2': 39, '3': 39, + '4': 39, '5': 39, '6': 39, '7': 39, + '8': 39, '9': 39, 'J': 13, 'j': 13}, + # 40 + {automata.DEFAULT: 40, '\n': 27, + '\r': 27, "'": 13, '\\': 29}, + # 41 + {automata.DEFAULT: 41, '\n': 27, + '\r': 27, '"': 13, '\\': 32}, + # 42 + {'0': 43, '1': 43, '2': 43, '3': 43, + '4': 43, '5': 43, '6': 43, '7': 43, + '8': 43, '9': 43}, + # 43 + {'0': 43, '1': 43, '2': 43, '3': 43, + '4': 43, '5': 43, '6': 43, '7': 43, + '8': 43, '9': 43, 'J': 13, 'j': 13}, + ] +pseudoDFA = automata.DFA(states, accepts) + +accepts = [False, False, False, False, False, True] +states = [ + # 0 + {automata.DEFAULT: 0, '"': 1, '\\': 2}, + # 1 + {automata.DEFAULT: 4, '"': 3, '\\': 2}, + # 2 + {automata.DEFAULT: 4}, + # 3 + {automata.DEFAULT: 4, '"': 5, '\\': 2}, + # 4 + {automata.DEFAULT: 4, '"': 1, '\\': 2}, + # 5 + {automata.DEFAULT: 4, '"': 5, '\\': 2}, + ] +double3DFA = automata.NonGreedyDFA(states, accepts) + +accepts = [False, False, False, False, False, True] +states = [ + # 0 + {automata.DEFAULT: 0, "'": 1, '\\': 2}, + # 1 + {automata.DEFAULT: 4, "'": 3, '\\': 2}, + # 2 + {automata.DEFAULT: 4}, + # 3 + {automata.DEFAULT: 4, "'": 5, '\\': 2}, + # 4 + {automata.DEFAULT: 4, "'": 1, '\\': 2}, + # 5 + {automata.DEFAULT: 4, "'": 5, '\\': 2}, + ] +single3DFA = automata.NonGreedyDFA(states, accepts) + +accepts = [False, True, False, False] +states = [ + # 0 + {automata.DEFAULT: 0, "'": 1, '\\': 2}, + # 1 + {}, + # 2 + {automata.DEFAULT: 3}, + # 3 + {automata.DEFAULT: 3, "'": 1, '\\': 2}, + ] +singleDFA = automata.DFA(states, accepts) + +accepts = [False, True, False, False] +states = [ + # 0 + {automata.DEFAULT: 0, '"': 1, '\\': 2}, + # 1 + {}, + # 2 + {automata.DEFAULT: 3}, + # 3 + {automata.DEFAULT: 3, '"': 1, '\\': 2}, + ] +doubleDFA = automata.DFA(states, accepts) + +#_______________________________________________________________________ +# End of automatically generated DFA's + +endDFAs = {"'" : singleDFA, + '"' : doubleDFA, + 'r' : None, + 'R' : None, + 'u' : None, + 'U' : None, + 'b' : None, + 'B' : None} + +for uniPrefix in ("", "u", "U", "b", "B"): + for rawPrefix in ("", "r", "R"): + prefix = uniPrefix + rawPrefix + endDFAs[prefix + "'''"] = single3DFA + endDFAs[prefix + '"""'] = double3DFA + +whiteSpaceStatesAccepts = [True] +whiteSpaceStates = [{'\t': 0, ' ': 0, '\x0c': 0}] +whiteSpaceDFA = automata.DFA(whiteSpaceStates, whiteSpaceStatesAccepts) + +# ______________________________________________________________________ +# COPIED: + +triple_quoted = {} +for t in ("'''", '"""', + "r'''", 'r"""', "R'''", 'R"""', + "u'''", 'u"""', "U'''", 'U"""', + "b'''", 'b"""', "B'''", 'B"""', + "ur'''", 'ur"""', "Ur'''", 'Ur"""', + "uR'''", 'uR"""', "UR'''", 'UR"""', + "br'''", 'br"""', "Br'''", 'Br"""', + "bR'''", 'bR"""', "BR'''", 'BR"""'): + triple_quoted[t] = t +single_quoted = {} +for t in ("'", '"', + "r'", 'r"', "R'", 'R"', + "u'", 'u"', "U'", 'U"', + "b'", 'b"', "B'", 'B"', + "ur'", 'ur"', "Ur'", 'Ur"', + "uR'", 'uR"', "UR'", 'UR"', + "br'", 'br"', "Br'", 'Br"', + "bR'", 'bR"', "BR'", 'BR"'): + single_quoted[t] = t + +tabsize = 8 + +# PYPY MODIFICATION: removed TokenError class as it's not needed here + +# PYPY MODIFICATION: removed StopTokenizing class as it's not needed here + +# PYPY MODIFICATION: removed printtoken() as it's not needed here + +# PYPY MODIFICATION: removed tokenize() as it's not needed here + +# PYPY MODIFICATION: removed tokenize_loop() as it's not needed here + +# PYPY MODIFICATION: removed generate_tokens() as it was copied / modified +# in pythonlexer.py + +# PYPY MODIFICATION: removed main() as it's not needed here + +# ______________________________________________________________________ +# End of pytokenize.py + diff -r 28e2996df412 -r 051349b537b2 pyparser/pytokenizer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/pytokenizer.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,273 @@ +from pyparser import automata +from pyparser.pygram import tokens +from pyparser.pytoken import python_opmap +from pyparser.error import TokenError, TokenIndentationError +from pyparser.pytokenize import tabsize, whiteSpaceDFA, \ + triple_quoted, endDFAs, single_quoted, pseudoDFA +from pyparser import consts + +NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' +NUMCHARS = '0123456789' +ALNUMCHARS = NAMECHARS + NUMCHARS +EXTENDED_ALNUMCHARS = ALNUMCHARS + '-.' +WHITESPACES = ' \t\n\r\v\f' + +def match_encoding_declaration(comment): + """returns the declared encoding or None + + This function is a replacement for : + >>> py_encoding = re.compile(r"coding[:=]\s*([-\w.]+)") + >>> py_encoding.search(comment) + """ + index = comment.find('coding') + if index < 0: + return None + next_char = comment[index + 6] + if next_char not in ':=': + return None + end_of_decl = comment[index + 7:] + index = 0 + for char in end_of_decl: + if char not in WHITESPACES: + break + index += 1 + else: + return None + encoding = '' + for char in end_of_decl[index:]: + if char in EXTENDED_ALNUMCHARS: + encoding += char + else: + break + if encoding != '': + return encoding + return None + + +DUMMY_DFA = automata.DFA([], []) + +def generate_tokens(lines, flags): + """ + This is a rewrite of pypy.module.parser.pytokenize.generate_tokens since + the original function is not RPYTHON (uses yield) + It was also slightly modified to generate Token instances instead + of the original 5-tuples -- it's now a 4-tuple of + + * the Token instance + * the whole line as a string + * the line number (the real one, counting continuation lines) + * the position on the line of the end of the token. + + Original docstring :: + + The generate_tokens() generator requires one argment, readline, which + must be a callable object which provides the same interface as the + readline() method of built-in file objects. Each call to the function + should return one line of input as a string. + + The generator produces 5-tuples with these members: the token type; the + token string; a 2-tuple (srow, scol) of ints specifying the row and + column where the token begins in the source; a 2-tuple (erow, ecol) of + ints specifying the row and column where the token ends in the source; + and the line on which the token was found. The line passed is the + logical line; continuation lines are included. + """ + token_list = [] + lnum = parenlev = continued = 0 + namechars = NAMECHARS + numchars = NUMCHARS + contstr, needcont = '', 0 + contline = None + indents = [0] + last_comment = '' + parenlevstart = (0, 0, "") + + # make the annotator happy + endDFA = DUMMY_DFA + # make the annotator happy + line = '' + pos = 0 + lines.append("") + strstart = (0, 0, "") + for line in lines: + lnum = lnum + 1 + line = universal_newline(line) + pos, max = 0, len(line) + + if contstr: + if not line: + raise TokenError( + "EOF while scanning triple-quoted string literal", + strstart[2], strstart[0], strstart[1]+1, + token_list, lnum-1) + endmatch = endDFA.recognize(line) + if endmatch >= 0: + pos = end = endmatch + tok = (tokens.STRING, contstr + line[:end], strstart[0], + strstart[1], line) + token_list.append(tok) + last_comment = '' + contstr, needcont = '', 0 + contline = None + elif (needcont and not line.endswith('\\\n') and + not line.endswith('\\\r\n')): + tok = (tokens.ERRORTOKEN, contstr + line, strstart[0], + strstart[1], line) + token_list.append(tok) + last_comment = '' + contstr = '' + contline = None + continue + else: + contstr = contstr + line + contline = contline + line + continue + + elif parenlev == 0 and not continued: # new statement + if not line: break + column = 0 + while pos < max: # measure leading whitespace + if line[pos] == ' ': column = column + 1 + elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize + elif line[pos] == '\f': column = 0 + else: break + pos = pos + 1 + if pos == max: break + + if line[pos] in '#\r\n': + # skip comments or blank lines + continue + + if column > indents[-1]: # count indents or dedents + indents.append(column) + token_list.append((tokens.INDENT, line[:pos], lnum, 0, line)) + last_comment = '' + while column < indents[-1]: + indents = indents[:-1] + token_list.append((tokens.DEDENT, '', lnum, pos, line)) + last_comment = '' + if column != indents[-1]: + err = "unindent does not match any outer indentation level" + raise TokenIndentationError(err, line, lnum, 0, token_list) + + else: # continued statement + if not line: + if parenlev > 0: + lnum1, start1, line1 = parenlevstart + raise TokenError("parenthesis is never closed", line1, + lnum1, start1 + 1, token_list, lnum) + raise TokenError("EOF in multi-line statement", line, + lnum, 0, token_list) + continued = 0 + + while pos < max: + pseudomatch = pseudoDFA.recognize(line, pos) + if pseudomatch >= 0: # scan for tokens + # JDR: Modified + start = whiteSpaceDFA.recognize(line, pos) + if start < 0: + start = pos + end = pseudomatch + + if start == end: + raise TokenError("Unknown character", line, + lnum, start + 1, token_list) + + pos = end + token, initial = line[start:end], line[start] + if initial in numchars or \ + (initial == '.' and token != '.'): # ordinary number + token_list.append((tokens.NUMBER, token, lnum, start, line)) + last_comment = '' + elif initial in '\r\n': + if parenlev <= 0: + tok = (tokens.NEWLINE, last_comment, lnum, start, line) + token_list.append(tok) + last_comment = '' + elif initial == '#': + # skip comment + last_comment = token + elif token in triple_quoted: + endDFA = endDFAs[token] + endmatch = endDFA.recognize(line, pos) + if endmatch >= 0: # all on one line + pos = endmatch + token = line[start:pos] + tok = (tokens.STRING, token, lnum, start, line) + token_list.append(tok) + last_comment = '' + else: + strstart = (lnum, start, line) + contstr = line[start:] + contline = line + break + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: + if token[-1] == '\n': # continued string + strstart = (lnum, start, line) + endDFA = (endDFAs[initial] or endDFAs[token[1]] or + endDFAs[token[2]]) + contstr, needcont = line[start:], 1 + contline = line + break + else: # ordinary string + tok = (tokens.STRING, token, lnum, start, line) + token_list.append(tok) + last_comment = '' + elif initial in namechars: # ordinary name + token_list.append((tokens.NAME, token, lnum, start, line)) + last_comment = '' + elif initial == '\\': # continued stmt + continued = 1 + else: + if initial in '([{': + if parenlev == 0: + parenlevstart = (lnum, start, line) + parenlev = parenlev + 1 + elif initial in ')]}': + parenlev = parenlev - 1 + if parenlev < 0: + raise TokenError("unmatched '%s'" % initial, line, + lnum, start + 1, token_list) + if token in python_opmap: + punct = python_opmap[token] + else: + punct = tokens.OP + token_list.append((punct, token, lnum, start, line)) + last_comment = '' + else: + start = whiteSpaceDFA.recognize(line, pos) + if start < 0: + start = pos + if start= 0 and line[-2] == '\r' and line[-1] == '\n': + return line[:line_len_m2] + '\n' + line_len_m1 = len(line) - 1 + if line_len_m1 >= 0 and line[-1] == '\r': + return line[:line_len_m1] + '\n' + return line diff -r 28e2996df412 -r 051349b537b2 pyparser/test/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/test/__init__.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,1 @@ + diff -r 28e2996df412 -r 051349b537b2 pyparser/test/expressions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/test/expressions.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,510 @@ +""" +list of tested expressions / suites (used by test_parser and test_astbuilder) +""" + +constants = [ + "0", + "7", + "-3", + "053", + "0x18", + "14L", + "1.0", + "3.9", + "-3.6", + "1.8e19", + "90000000000000", + "90000000000000.", + "3j" + ] + +expressions = [ + "x = a + 1", + "x = 1 - a", + "x = a * b", + "x = a ** 2", + "x = a / b", + "x = a & b", + "x = a | b", + "x = a ^ b", + "x = a // b", + "x = a * b + 1", + "x = a + 1 * b", + "x = a * b / c", + "x = a * (1 + c)", + "x, y, z = 1, 2, 3", + "x = 'a' 'b' 'c'", + "del foo", + "del foo[bar]", + "del foo.bar", + "l[0]", + "k[v,]", + "m[a,b]", + "a.b.c[d]", + "file('some.txt').read()", + "a[0].read()", + "a[1:1].read()", + "f('foo')('bar')('spam')", + "f('foo')('bar')('spam').read()[0]", + "a.b[0][0]", + "a.b[0][:]", + "a.b[0][::]", + "a.b[0][0].pop()[0].push('bar')('baz').spam", + "a.b[0].read()[1][2].foo().spam()[0].bar", + "a**2", + "a**2**2", + "a.b[0]**2", + "a.b[0].read()[1][2].foo().spam()[0].bar ** 2", + "l[start:end] = l2", + "l[::] = l2", + "a = `s`", + "a = `1 + 2 + f(3, 4)`", + "[a, b] = c", + "(a, b) = c", + "[a, (b,c), d] = e", + "a, (b, c), d = e", + ] + +# We do not export the following tests because we would have to implement 2.5 +# features in the stable compiler (other than just building the AST). +expressions_inbetweenversions = expressions + [ + "1 if True else 2", + "1 if False else 2", + ] + +funccalls = [ + "l = func()", + "l = func(10)", + "l = func(10, 12, a, b=c, *args)", + "l = func(10, 12, a, b=c, **kwargs)", + "l = func(10, 12, a, b=c, *args, **kwargs)", + "l = func(10, 12, a, b=c)", + "e = l.pop(3)", + "e = k.l.pop(3)", + "simplefilter('ignore', category=PendingDeprecationWarning, append=1)", + """methodmap = dict(subdirs=phase4, + same_files=phase3, diff_files=phase3, funny_files=phase3, + common_dirs = phase2, common_files=phase2, common_funny=phase2, + common=phase1, left_only=phase1, right_only=phase1, + left_list=phase0, right_list=phase0)""", + "odata = b2a_qp(data, quotetabs = quotetabs, header = header)", + ] + +listmakers = [ + "l = []", + "l = [1, 2, 3]", + "l = [i for i in range(10)]", + "l = [i for i in range(10) if i%2 == 0]", + "l = [i for i in range(10) if i%2 == 0 or i%2 == 1]", # <-- + "l = [i for i in range(10) if i%2 == 0 and i%2 == 1]", + "l = [i for j in range(10) for i in range(j)]", + "l = [i for j in range(10) for i in range(j) if j%2 == 0]", + "l = [i for j in range(10) for i in range(j) if j%2 == 0 and i%2 == 0]", + "l = [(a, b) for (a,b,c) in l2]", + "l = [{a:b} for (a,b,c) in l2]", + "l = [i for j in k if j%2 == 0 if j*2 < 20 for i in j if i%2==0]", + ] + +genexps = [ + "l = (i for i in j)", + "l = (i for i in j if i%2 == 0)", + "l = (i for j in k for i in j)", + "l = (i for j in k for i in j if j%2==0)", + "l = (i for j in k if j%2 == 0 if j*2 < 20 for i in j if i%2==0)", + "l = (i for i in [ j*2 for j in range(10) ] )", + "l = [i for i in ( j*2 for j in range(10) ) ]", + "l = (i for i in [ j*2 for j in ( k*3 for k in range(10) ) ] )", + "l = [i for j in ( j*2 for j in [ k*3 for k in range(10) ] ) ]", + "l = f(i for i in j)", + ] + + +dictmakers = [ + "l = {a : b, 'c' : 0}", + "l = {}", + ] + +backtrackings = [ + "f = lambda x: x+1", + "f = lambda x,y: x+y", + "f = lambda x,y=1,z=t: x+y", + "f = lambda x,y=1,z=t,*args,**kwargs: x+y", + "f = lambda x,y=1,z=t,*args: x+y", + "f = lambda x,y=1,z=t,**kwargs: x+y", + "f = lambda: 1", + "f = lambda *args: 1", + "f = lambda **kwargs: 1", + ] + +comparisons = [ + "a < b", + "a > b", + "a not in b", + "a is not b", + "a in b", + "a is b", + "3 < x < 5", + "(3 < x) < 5", + "a < b < c < d", + "(a < b) < (c < d)", + "a < (b < c) < d", + ] + +multiexpr = [ + 'a = b; c = d;', + 'a = b = c = d', + ] + +attraccess = [ + 'a.b = 2', + 'x = a.b', + ] + +slices = [ + "l[:]", + "l[::]", + "l[1:2]", + "l[1:]", + "l[:2]", + "l[1::]", + "l[:1:]", + "l[::1]", + "l[1:2:]", + "l[:1:2]", + "l[1::2]", + "l[0:1:2]", + "a.b.l[:]", + "a.b.l[1:2]", + "a.b.l[1:]", + "a.b.l[:2]", + "a.b.l[0:1:2]", + "a[1:2:3, 100]", + "a[:2:3, 100]", + "a[1::3, 100,]", + "a[1:2:, 100]", + "a[1:2, 100]", + "a[1:, 100,]", + "a[:2, 100]", + "a[:, 100]", + "a[100, 1:2:3,]", + "a[100, :2:3]", + "a[100, 1::3]", + "a[100, 1:2:,]", + "a[100, 1:2]", + "a[100, 1:]", + "a[100, :2,]", + "a[100, :]", + ] + +imports = [ + 'import os', + 'import sys, os', + 'import os.path', + 'import os.path, sys', + 'import sys, os.path as osp', + 'import os.path as osp', + 'import os.path as osp, sys as _sys', + 'import a.b.c.d', + 'import a.b.c.d as abcd', + 'from os import path', + 'from os import path, system', + ] + +imports_newstyle = [ + 'from os import path, system', + 'from os import path as P, system as S', + 'from os import (path as P, system as S,)', + 'from os import *', + ] + +if_stmts = [ + "if a == 1: a+= 2", + """if a == 1: + a += 2 +elif a == 2: + a += 3 +else: + a += 4 +""", + "if a and not b == c: pass", + "if a and not not not b == c: pass", + "if 0: print 'foo'" + ] + +asserts = [ + 'assert False', + 'assert a == 1', + 'assert a == 1 and b == 2', + 'assert a == 1 and b == 2, "assertion failed"', + ] + +execs = [ + 'exec a', + 'exec "a=b+3"', + 'exec a in f()', + 'exec a in f(), g()', + ] + +prints = [ + 'print', + 'print a', + 'print a,', + 'print a, b', + 'print a, "b", c', + 'print >> err', + 'print >> err, "error"', + 'print >> err, "error",', + 'print >> err, "error", a', + ] + +globs = [ + 'global a', + 'global a,b,c', + ] + +raises_ = [ # NB. 'raises' creates a name conflict with py.test magic + 'raise', + 'raise ValueError', + 'raise ValueError("error")', + 'raise ValueError, "error"', + 'raise ValueError, "error", foo', + ] + +tryexcepts = [ + """try: + a + b +except: + pass +""", + """try: + a + b +except NameError: + pass +""", + """try: + a + b +except NameError, err: + pass +""", + """try: + a + b +except (NameError, ValueError): + pass +""", + """try: + a + b +except (NameError, ValueError), err: + pass +""", + """try: + a +except NameError, err: + pass +except ValueError, err: + pass +""", + """def f(): + try: + a + except NameError, err: + a = 1 + b = 2 + except ValueError, err: + a = 2 + return a +""" + """try: + a +except NameError, err: + a = 1 +except ValueError, err: + a = 2 +else: + a += 3 +""", + """try: + a +finally: + b +""", + """def f(): + try: + return a + finally: + a = 3 + return 1 +""", + + ] + +one_stmt_funcdefs = [ + "def f(): return 1", + "def f(x): return x+1", + "def f(x,y): return x+y", + "def f(x,y=1,z=t): return x+y", + "def f(x,y=1,z=t,*args,**kwargs): return x+y", + "def f(x,y=1,z=t,*args): return x+y", + "def f(x,y=1,z=t,**kwargs): return x+y", + "def f(*args): return 1", + "def f(**kwargs): return 1", + "def f(t=()): pass", + "def f(a, b, (c, d), e): pass", + "def f(a, b, (c, (d, e), f, (g, h))): pass", + "def f(a, b, (c, (d, e), f, (g, h)), i): pass", + "def f((a)): pass", + ] + +one_stmt_classdefs = [ + "class Pdb(bdb.Bdb, cmd.Cmd): pass", + "class A: pass", + ] + +docstrings = [ + '''def foo(): return 1''', + '''class Foo: pass''', + '''class Foo: "foo"''', + '''def foo(): + """foo docstring""" + return 1 +''', + '''def foo(): + """foo docstring""" + a = 1 + """bar""" + return a +''', + '''def foo(): + """doc"""; print 1 + a=1 +''', + '''"""Docstring""";print 1''', + ] + +returns = [ + 'def f(): return', + 'def f(): return 1', + 'def f(): return a.b', + 'def f(): return a', + 'def f(): return a,b,c,d', + #'return (a,b,c,d)', --- this one makes no sense, as far as I can tell + ] + +augassigns = [ + 'a=1;a+=2', + 'a=1;a-=2', + 'a=1;a*=2', + 'a=1;a/=2', + 'a=1;a//=2', + 'a=1;a%=2', + 'a=1;a**=2', + 'a=1;a>>=2', + 'a=1;a<<=2', + 'a=1;a&=2', + 'a=1;a^=2', + 'a=1;a|=2', + + 'a=A();a.x+=2', + 'a=A();a.x-=2', + 'a=A();a.x*=2', + 'a=A();a.x/=2', + 'a=A();a.x//=2', + 'a=A();a.x%=2', + 'a=A();a.x**=2', + 'a=A();a.x>>=2', + 'a=A();a.x<<=2', + 'a=A();a.x&=2', + 'a=A();a.x^=2', + 'a=A();a.x|=2', + + 'a=A();a[0]+=2', + 'a=A();a[0]-=2', + 'a=A();a[0]*=2', + 'a=A();a[0]/=2', + 'a=A();a[0]//=2', + 'a=A();a[0]%=2', + 'a=A();a[0]**=2', + 'a=A();a[0]>>=2', + 'a=A();a[0]<<=2', + 'a=A();a[0]&=2', + 'a=A();a[0]^=2', + 'a=A();a[0]|=2', + + 'a=A();a[0:2]+=2', + 'a=A();a[0:2]-=2', + 'a=A();a[0:2]*=2', + 'a=A();a[0:2]/=2', + 'a=A();a[0:2]//=2', + 'a=A();a[0:2]%=2', + 'a=A();a[0:2]**=2', + 'a=A();a[0:2]>>=2', + 'a=A();a[0:2]<<=2', + 'a=A();a[0:2]&=2', + 'a=A();a[0:2]^=2', + 'a=A();a[0:2]|=2', + ] + +PY23_TESTS = [ + constants, + expressions, + augassigns, + comparisons, + funccalls, + backtrackings, + listmakers, # ERRORS + dictmakers, + multiexpr, + attraccess, + slices, + imports, + execs, + prints, + globs, + raises_, + + ] + +OPTIONAL_TESTS = [ + # expressions_inbetweenversions, + genexps, + imports_newstyle, + asserts, + ] + +TESTS = PY23_TESTS + OPTIONAL_TESTS + + +## TESTS = [ +## ["l = [i for i in range(10) if i%2 == 0 or i%2 == 1]"], +## ] + +CHANGES_25_INPUTS = [ + ["class A(): pass"], + ["def f(): x = yield 3"] + ] + +EXEC_INPUTS = [ + one_stmt_classdefs, + one_stmt_funcdefs, + if_stmts, + tryexcepts, + docstrings, + returns, + ] + +SINGLE_INPUTS = [ + one_stmt_funcdefs, + ['\t # hello\n', + 'print 6*7', + 'if 1: x\n', + 'x = 5', + 'x = 5 ', + '''"""Docstring""";print 1''', + '''"Docstring"''', + '''"Docstring" "\\x00"''', + ] +] diff -r 28e2996df412 -r 051349b537b2 pyparser/test/test_automata.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/test/test_automata.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,12 @@ +from pyparser.automata import DFA, DEFAULT + +def test_states(): + d = DFA([{"\x00": 1}, {"\x01": 0}], [False, True]) + assert d.states == "\x01\xff\xff\x00" + assert d.defaults == "\xff\xff" + assert d.max_char == 2 + + d = DFA([{"\x00": 1}, {DEFAULT: 0}], [False, True]) + assert d.states == "\x01\x00" + assert d.defaults == "\xff\x00" + assert d.max_char == 1 diff -r 28e2996df412 -r 051349b537b2 pyparser/test/test_gendfa.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/test/test_gendfa.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,16 @@ +from pyparser.automata import DFA, DEFAULT +from pyparser.genpytokenize import output + +def test_states(): + states = [{"\x00": 1}, {"\x01": 0}] + d = DFA(states[:], [False, True]) + assert output('test', DFA, d, states) == """\ +accepts = [False, True] +states = [ + # 0 + {'\\x00': 1}, + # 1 + {'\\x01': 0}, + ] +test = automata.pyparser.automata.DFA(states, accepts) +""" diff -r 28e2996df412 -r 051349b537b2 pyparser/test/test_metaparser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/test/test_metaparser.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,112 @@ +import py +import os +import glob +import tokenize +import token +import StringIO +from pyparser.metaparser import ParserGenerator, PgenError +from pyparser.pygram import PythonGrammar +from pyparser import parser + + +class MyGrammar(parser.Grammar): + TOKENS = token.__dict__ + OPERATOR_MAP = { + "+" : token.OP, + "-" : token.OP, + } + KEYWORD_TOKEN = token.NAME + + +class TestParserGenerator: + + def gram_for(self, grammar_source): + p = ParserGenerator(grammar_source + "\n") + return p.build_grammar(MyGrammar) + + def test_multiple_rules(self): + g = self.gram_for("foo: NAME bar\nbar: STRING") + assert len(g.dfas) == 2 + assert g.start == g.symbol_ids["foo"] + + def test_simple(self): + g = self.gram_for("eval: NAME\n") + assert len(g.dfas) == 1 + eval_sym = g.symbol_ids["eval"] + assert g.start == eval_sym + states, first = g.dfas[eval_sym - 256] + assert states == [([(1, 1)], False), ([], True)] + assert g.labels[0] == 0 + + def test_load_python_grammars(self): + gram_pat = os.path.join(os.path.dirname(__file__), "..", "data", + "Grammar*") + for gram_file in glob.glob(gram_pat): + fp = open(gram_file, "r") + try: + ParserGenerator(fp.read()).build_grammar(PythonGrammar) + finally: + fp.close() + + def test_items(self): + g = self.gram_for("foo: NAME STRING OP '+'") + assert len(g.dfas) == 1 + states = g.dfas[g.symbol_ids["foo"] - 256][0] + last = states[0][0][0][1] + for state in states[1:-1]: + assert last < state[0][0][1] + last = state[0][0][1] + + def test_alternatives(self): + g = self.gram_for("foo: STRING | OP") + assert len(g.dfas) == 1 + + def test_optional(self): + g = self.gram_for("foo: [NAME]") + + def test_grouping(self): + g = self.gram_for("foo: (NAME | STRING) OP") + + def test_keyword(self): + g = self.gram_for("foo: 'some_keyword' 'for'") + assert len(g.keyword_ids) == 2 + assert len(g.token_ids) == 0 + + def test_token(self): + g = self.gram_for("foo: NAME") + assert len(g.token_ids) == 1 + + def test_operator(self): + g = self.gram_for("add: NUMBER '+' NUMBER") + assert len(g.keyword_ids) == 0 + assert len(g.token_ids) == 2 + + exc = py.test.raises(PgenError, self.gram_for, "add: '/'").value + assert str(exc) == "no such operator: '/'" + + def test_symbol(self): + g = self.gram_for("foo: some_other_rule\nsome_other_rule: NAME") + assert len(g.dfas) == 2 + assert len(g.labels) == 3 + + exc = py.test.raises(PgenError, self.gram_for, "foo: no_rule").value + assert str(exc) == "no such rule: 'no_rule'" + + def test_repeaters(self): + g1 = self.gram_for("foo: NAME+") + g2 = self.gram_for("foo: NAME*") + assert g1.dfas != g2.dfas + + g = self.gram_for("foo: (NAME | STRING)*") + g = self.gram_for("foo: (NAME | STRING)+") + + def test_error(self): + exc = py.test.raises(PgenError, self.gram_for, "hi").value + assert str(exc) == "expected token OP but got NEWLINE" + assert exc.location == ((1, 2), (1, 3), "hi\n") + exc = py.test.raises(PgenError, self.gram_for, "hi+").value + assert str(exc) == "expected ':' but got '+'" + assert exc.location == ((1, 2), (1, 3), "hi+\n") + + def test_comments_and_whitespace(self): + self.gram_for("\n\n# comment\nrule: NAME # comment") diff -r 28e2996df412 -r 051349b537b2 pyparser/test/test_parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/test/test_parser.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,293 @@ +# New parser tests. +import py +import tokenize +import token +import StringIO +from pyparser import parser, metaparser, pygram +from pyparser.test.test_metaparser import MyGrammar + + +class SimpleParser(parser.Parser): + + def parse(self, input): + self.prepare() + rl = StringIO.StringIO(input + "\n").readline + gen = tokenize.generate_tokens(rl) + for tp, value, begin, end, line in gen: + if self.add_token(tp, value, begin[0], begin[1], line): + py.test.raises(StopIteration, gen.next) + return self.root + + +def tree_from_string(expected, gram): + def count_indent(s): + indent = 0 + for char in s: + if char != " ": + break + indent += 1 + return indent + last_newline_index = 0 + for i, char in enumerate(expected): + if char == "\n": + last_newline_index = i + elif char != " ": + break + if last_newline_index: + expected = expected[last_newline_index + 1:] + base_indent = count_indent(expected) + assert not divmod(base_indent, 4)[1], "not using 4 space indentation" + lines = [line[base_indent:] for line in expected.splitlines()] + last_indent = 0 + node_stack = [] + for line in lines: + if not line.strip(): + continue + data = line.split() + if data[0].isupper(): + tp = getattr(token, data[0]) + if len(data) == 2: + value = data[1].strip("\"") + elif tp == token.NEWLINE: + value = "\n" + else: + value = "" + n = parser.Terminal(tp, value, 0, 0) + else: + tp = gram.symbol_ids[data[0]] + children = [] + n = parser.Nonterminal(tp, children) + new_indent = count_indent(line) + if new_indent >= last_indent: + if new_indent == last_indent and node_stack: + node_stack.pop() + if node_stack: + node_stack[-1].append_child(n) + node_stack.append(n) + else: + diff = last_indent - new_indent + pop_nodes = diff // 4 + 1 + del node_stack[-pop_nodes:] + node_stack[-1].append_child(n) + node_stack.append(n) + last_indent = new_indent + return node_stack[0] + + +class TestParser: + + def parser_for(self, gram, add_endmarker=True): + if add_endmarker: + gram += " NEWLINE ENDMARKER\n" + pgen = metaparser.ParserGenerator(gram) + g = pgen.build_grammar(MyGrammar) + return SimpleParser(g), g + + def test_multiple_rules(self): + gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER +bar: NAME NUMBER\n""" + p, gram = self.parser_for(gram, False) + expected = """ + foo + NAME "next_rule" + bar + NAME "a_name" + NUMBER "42" + NAME "end" + NEWLINE + ENDMARKER""" + input = "next_rule a_name 42 end" + assert tree_from_string(expected, gram) == p.parse(input) + + def test_recursive_rule(self): + gram = """foo: NAME bar STRING NEWLINE ENDMARKER +bar: NAME [bar] NUMBER\n""" + p, gram = self.parser_for(gram, False) + expected = """ + foo + NAME "hi" + bar + NAME "hello" + bar + NAME "a_name" + NUMBER "32" + NUMBER "42" + STRING "'string'" + NEWLINE + ENDMARKER""" + input = "hi hello a_name 32 42 'string'" + assert tree_from_string(expected, gram) == p.parse(input) + + def test_symbol(self): + gram = """parent: first_child second_child NEWLINE ENDMARKER +first_child: NAME age +second_child: STRING +age: NUMBER\n""" + p, gram = self.parser_for(gram, False) + expected = """ + parent + first_child + NAME "harry" + age + NUMBER "13" + second_child + STRING "'fred'" + NEWLINE + ENDMARKER""" + input = "harry 13 'fred'" + assert tree_from_string(expected, gram) == p.parse(input) + + def test_token(self): + p, gram = self.parser_for("foo: NAME") + expected = """ + foo + NAME "hi" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("hi") + py.test.raises(parser.ParseError, p.parse, "567") + p, gram = self.parser_for("foo: NUMBER NAME STRING") + expected = """ + foo + NUMBER "42" + NAME "hi" + STRING "'bar'" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'") + + def test_optional(self): + p, gram = self.parser_for("foo: [NAME] 'end'") + expected = """ + foo + NAME "hi" + NAME "end" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("hi end") + expected = """ + foo + NAME "end" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("end") + + def test_grouping(self): + p, gram = self.parser_for( + "foo: ((NUMBER NAME | STRING) | 'second_option')") + expected = """ + foo + NUMBER "42" + NAME "hi" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("42 hi") + expected = """ + foo + STRING "'hi'" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("'hi'") + expected = """ + foo + NAME "second_option" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("second_option") + py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'") + py.test.raises(parser.ParseError, p.parse, "42 second_option") + + def test_alternative(self): + p, gram = self.parser_for("foo: (NAME | NUMBER)") + expected = """ + foo + NAME "hi" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("hi") + expected = """ + foo + NUMBER "42" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("42") + py.test.raises(parser.ParseError, p.parse, "hi 23") + py.test.raises(parser.ParseError, p.parse, "23 hi") + py.test.raises(parser.ParseError, p.parse, "'some string'") + + def test_keyword(self): + p, gram = self.parser_for("foo: 'key'") + expected = """ + foo + NAME "key" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("key") + py.test.raises(parser.ParseError, p.parse, "") + p, gram = self.parser_for("foo: NAME 'key'") + expected = """ + foo + NAME "some_name" + NAME "key" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("some_name key") + py.test.raises(parser.ParseError, p.parse, "some_name") + + def test_repeaters(self): + p, gram = self.parser_for("foo: NAME+ 'end'") + expected = """ + foo + NAME "hi" + NAME "bye" + NAME "nothing" + NAME "end" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("hi bye nothing end") + py.test.raises(parser.ParseError, p.parse, "end") + py.test.raises(parser.ParseError, p.parse, "hi bye") + p, gram = self.parser_for("foo: NAME* 'end'") + expected = """ + foo + NAME "hi" + NAME "bye" + NAME "end" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("hi bye end") + py.test.raises(parser.ParseError, p.parse, "hi bye") + expected = """ + foo + NAME "end" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("end") + + p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'") + expected = """ + foo + NAME "a_name" + NAME "name_two" + NAME "end" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("a_name name_two end") + expected = """ + foo + NUMBER "42" + NAME "name" + NAME "end" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("42 name end") + py.test.raises(parser.ParseError, p.parse, "end") + p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'") + expected = """ + foo + NAME "hi" + NUMBER 42 + NAME "end" + NEWLINE + ENDMARKER""" + assert tree_from_string(expected, gram) == p.parse("hi 42 end") diff -r 28e2996df412 -r 051349b537b2 pyparser/test/test_pyparse.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/test/test_pyparse.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +import py +from pyparser import pyparse +from pyparser.pygram import syms, tokens +from pyparser.error import SyntaxError, IndentationError +from pyparser import consts + + +class TestPythonParser: + + def setup_class(self): + self.parser = pyparse.PythonParser() + + def parse(self, source, mode="exec", info=None): + if info is None: + info = pyparse.CompileInfo("", mode) + return self.parser.parse_source(source, info) + + def test_with_and_as(self): + py.test.raises(SyntaxError, self.parse, "with = 23") + py.test.raises(SyntaxError, self.parse, "as = 2") + + def test_dont_imply_dedent(self): + info = pyparse.CompileInfo("", "single", + consts.PyCF_DONT_IMPLY_DEDENT) + self.parse('if 1:\n x\n', info=info) + self.parse('x = 5 ', info=info) + + def test_clear_state(self): + assert self.parser.root is None + tree = self.parse("name = 32") + assert self.parser.root is None + + def test_encoding(self): + info = pyparse.CompileInfo("", "exec") + tree = self.parse("""# coding: latin-1 +stuff = "nothing" +""", info=info) + assert tree.type == syms.file_input + assert info.encoding == "iso-8859-1" + sentence = u"u'Die Männer ärgen sich!'" + input = (u"# coding: utf-7\nstuff = %s" % (sentence,)).encode("utf-7") + tree = self.parse(input, info=info) + assert info.encoding == "utf-7" + input = "# coding: iso-8859-15\nx" + self.parse(input, info=info) + assert info.encoding == "iso-8859-15" + input = "\xEF\xBB\xBF# coding: utf-8\nx" + self.parse(input, info=info) + assert info.encoding == "utf-8" + input = "# coding: utf-8\nx" + info.flags |= consts.PyCF_SOURCE_IS_UTF8 + exc = py.test.raises(SyntaxError, self.parse, input, info=info).value + info.flags &= ~consts.PyCF_SOURCE_IS_UTF8 + assert exc.msg == "coding declaration in unicode string" + input = "\xEF\xBB\xBF# coding: latin-1\nx" + exc = py.test.raises(SyntaxError, self.parse, input).value + assert exc.msg == "UTF-8 BOM with latin-1 coding cookie" + input = "# coding: not-here" + exc = py.test.raises(SyntaxError, self.parse, input).value + assert exc.msg == "Unknown encoding: not-here" + input = u"# coding: ascii\n\xe2".encode('utf-8') + exc = py.test.raises(SyntaxError, self.parse, input).value + assert exc.msg == ("'ascii' codec can't decode byte 0xc3 " + "in position 16: ordinal not in range(128)") + + def test_non_unicode_codec(self): + exc = py.test.raises(SyntaxError, self.parse, """\ +# coding: string-escape +\x70\x72\x69\x6e\x74\x20\x32\x2b\x32\x0a +""").value + assert exc.msg == "codec did not return a unicode object" + + def test_syntax_error(self): + parse = self.parse + exc = py.test.raises(SyntaxError, parse, "name another for").value + assert exc.msg == "invalid syntax" + assert exc.lineno == 1 + assert exc.offset == 5 + assert exc.text.startswith("name another for") + exc = py.test.raises(SyntaxError, parse, "x = \"blah\n\n\n").value + assert exc.msg == "EOL while scanning string literal" + assert exc.lineno == 1 + assert exc.offset == 5 + exc = py.test.raises(SyntaxError, parse, "x = '''\n\n\n").value + assert exc.msg == "EOF while scanning triple-quoted string literal" + assert exc.lineno == 1 + assert exc.offset == 5 + assert exc.lastlineno == 3 + for input in ("())", "(()", "((", "))"): + py.test.raises(SyntaxError, parse, input) + exc = py.test.raises(SyntaxError, parse, "x = (\n\n(),\n(),").value + assert exc.msg == "parenthesis is never closed" + assert exc.lineno == 1 + assert exc.offset == 5 + assert exc.lastlineno == 5 + exc = py.test.raises(SyntaxError, parse, "abc)").value + assert exc.msg == "unmatched ')'" + assert exc.lineno == 1 + assert exc.offset == 4 + + def test_is(self): + self.parse("x is y") + self.parse("x is not y") + + def test_indentation_error(self): + parse = self.parse + input = """ +def f(): +pass""" + exc = py.test.raises(IndentationError, parse, input).value + assert exc.msg == "expected an indented block" + assert exc.lineno == 3 + assert exc.text.startswith("pass") + assert exc.offset == 0 + input = "hi\n indented" + exc = py.test.raises(IndentationError, parse, input).value + assert exc.msg == "unexpected indent" + input = "def f():\n pass\n next_stmt" + exc = py.test.raises(IndentationError, parse, input).value + assert exc.msg == "unindent does not match any outer indentation level" + assert exc.lineno == 3 + + def test_mac_newline(self): + self.parse("this_is\ra_mac\rfile") + + def test_mode(self): + assert self.parse("x = 43*54").type == syms.file_input + tree = self.parse("43**54", "eval") + assert tree.type == syms.eval_input + py.test.raises(SyntaxError, self.parse, "x = 54", "eval") + tree = self.parse("x = 43", "single") + assert tree.type == syms.single_input + + def test_multiline_string(self): + self.parse("''' \n '''") + self.parse("r''' \n '''") + + def test_bytes_literal(self): + self.parse('b" "') + self.parse('br" "') + self.parse('b""" """') + self.parse("b''' '''") + self.parse("br'\\\n'") + + py.test.raises(SyntaxError, self.parse, "b'a\\n") + + def test_new_octal_literal(self): + self.parse('0777') + self.parse('0o777') + self.parse('0o777L') + py.test.raises(SyntaxError, self.parse, "0o778") + + def test_new_binary_literal(self): + self.parse('0b1101') + self.parse('0b0l') + py.test.raises(SyntaxError, self.parse, "0b112") + + def test_universal_newlines(self): + fmt = 'stuff = """hello%sworld"""' + expected_tree = self.parse(fmt % '\n') + for linefeed in ["\r\n","\r"]: + tree = self.parse(fmt % linefeed) + assert expected_tree == tree diff -r 28e2996df412 -r 051349b537b2 pyparser/test/unittest_samples.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyparser/test/unittest_samples.py Sun Jan 08 20:20:39 2017 +0100 @@ -0,0 +1,95 @@ +"""test module for CPython / PyPy nested tuples comparison""" + +import os, os.path as osp +import sys +from pyparser.pythonutil import python_parse, pypy_parse +from pprint import pprint +from pyparser import grammar +grammar.DEBUG = False +from symbol import sym_name + + +def name(elt): + return "%s[%s]"% (sym_name.get(elt,elt),elt) + +def read_samples_dir(): + return [osp.join('samples', fname) for fname in os.listdir('samples') if fname.endswith('.py')] + +def print_sym_tuple(nested, level=0, limit=15, names=False, trace=()): + buf = [] + if level <= limit: + buf.append("%s(" % (" "*level)) + else: + buf.append("(") + for index, elt in enumerate(nested): + # Test if debugging and if on last element of error path + if trace and not trace[1:] and index == trace[0]: + buf.append('\n----> ') + if type(elt) is int: + if names: + buf.append(name(elt)) + else: + buf.append(str(elt)) + buf.append(', ') + elif type(elt) is str: + buf.append(repr(elt)) + else: + if level < limit: + buf.append('\n') + buf.extend(print_sym_tuple(elt, level+1, limit, + names, trace[1:])) + buf.append(')') + return buf + +def assert_tuples_equal(tup1, tup2, curpos = ()): + for index, (elt1, elt2) in enumerate(zip(tup1, tup2)): + if elt1 != elt2: + if type(elt1) is tuple and type(elt2) is tuple: + assert_tuples_equal(elt1, elt2, curpos + (index,)) + raise AssertionError('Found difference at %s : %s != %s' % + (curpos, name(elt1), name(elt2) ), curpos) + +from time import time, clock +def test_samples( samples ): + time_reports = {} + for sample in samples: + print "testing", sample + tstart1, cstart1 = time(), clock() + pypy_tuples = pypy_parse(sample) + tstart2, cstart2 = time(), clock() + python_tuples = python_parse(sample) + time_reports[sample] = (time() - tstart2, tstart2-tstart1, clock() - cstart2, cstart2-cstart1 ) + #print "-"*10, "PyPy parse results", "-"*10 + #print ''.join(print_sym_tuple(pypy_tuples, names=True)) + #print "-"*10, "CPython parse results", "-"*10 + #print ''.join(print_sym_tuple(python_tuples, names=True)) + print + try: + assert_tuples_equal(pypy_tuples, python_tuples) + except AssertionError as e: + error_path = e.args[-1] + print "ERROR PATH =", error_path + print "="*80 + print file(sample).read() + print "="*80 + print "-"*10, "PyPy parse results", "-"*10 + print ''.join(print_sym_tuple(pypy_tuples, names=True, trace=error_path)) + print "-"*10, "CPython parse results", "-"*10 + print ''.join(print_sym_tuple(python_tuples, names=True, trace=error_path)) + print "Failed on (%s)" % sample + # raise + pprint(time_reports) + +if __name__=="__main__": + import getopt + opts, args = getopt.getopt( sys.argv[1:], "d:", [] ) + for opt, val in opts: + if opt == "-d": + pass +# set_debug(int(val)) + if args: + samples = args + else: + samples = read_samples_dir() + + test_samples( samples )