diff --git a/ext/ply/ANNOUNCE b/ext/ply/ANNOUNCE
index 0a155cec3f..3e582501d8 100644
--- a/ext/ply/ANNOUNCE
+++ b/ext/ply/ANNOUNCE
@@ -1,13 +1,12 @@
-March 24, 2009
+February 15, 2018
- Announcing : PLY-3.2 (Python Lex-Yacc)
+ Announcing : PLY-3.11 (Python Lex-Yacc)
http://www.dabeaz.com/ply
-I'm pleased to announce a significant new update to PLY---a 100% Python
-implementation of the common parsing tools lex and yacc. PLY-3.2 adds
-compatibility for Python 2.6 and 3.0, provides some new customization
-options, and cleans up a lot of internal implementation details.
+I'm pleased to announce PLY-3.11--a pure Python implementation of the
+common parsing tools lex and yacc. PLY-3.11 is a minor bug fix
+release. It supports both Python 2 and Python 3.
If you are new to PLY, here are a few highlights:
diff --git a/ext/ply/CHANGES b/ext/ply/CHANGES
index 9d8b25d5a9..44050072e9 100644
--- a/ext/ply/CHANGES
+++ b/ext/ply/CHANGES
@@ -1,3 +1,341 @@
+Version 3.11
+---------------------
+02/15/18 beazley
+ Fixed some minor bugs related to re flags and token order.
+ Github pull requests #151 and #153.
+
+02/15/18 beazley
+ Added a set_lexpos() method to grammar symbols. Github issue #148.
+
+
+04/13/17 beazley
+ Mostly minor bug fixes and small code cleanups.
+
+Version 3.10
+---------------------
+01/31/17: beazley
+ Changed grammar signature computation to not involve hashing
+ functions. Parts are just combined into a big string.
+
+10/07/16: beazley
+ Fixed Issue #101: Incorrect shift-reduce conflict resolution with
+ precedence specifier.
+
+ PLY was incorrectly resolving shift-reduce conflicts in certain
+ cases. For example, in the example/calc/calc.py example, you
+ could trigger it doing this:
+
+ calc > -3 - 4
+ 1 (correct answer should be -7)
+ calc >
+
+ Issue and suggested patch contributed by https://github.com/RomaVis
+
+Version 3.9
+---------------------
+08/30/16: beazley
+ Exposed the parser state number as the parser.state attribute
+ in productions and error functions. For example:
+
+ def p_somerule(p):
+ '''
+ rule : A B C
+ '''
+ print('State:', p.parser.state)
+
+ May address issue #65 (publish current state in error callback).
+
+08/30/16: beazley
+ Fixed Issue #88. Python3 compatibility with ply/cpp.
+
+08/30/16: beazley
+ Fixed Issue #93. Ply can crash if SyntaxError is raised inside
+ a production. Not actually sure if the original implementation
+ worked as documented at all. Yacc has been modified to follow
+ the spec as outlined in the CHANGES noted for 11/27/07 below.
+
+08/30/16: beazley
+ Fixed Issue #97. Failure with code validation when the original
+ source files aren't present. Validation step now ignores
+ the missing file.
+
+08/30/16: beazley
+ Minor fixes to version numbers.
+
+Version 3.8
+---------------------
+10/02/15: beazley
+ Fixed issues related to Python 3.5. Patch contributed by Barry Warsaw.
+
+Version 3.7
+---------------------
+08/25/15: beazley
+ Fixed problems when reading table files from pickled data.
+
+05/07/15: beazley
+ Fixed regression in handling of table modules if specified as module
+ objects. See https://github.com/dabeaz/ply/issues/63
+
+Version 3.6
+---------------------
+04/25/15: beazley
+ If PLY is unable to create the 'parser.out' or 'parsetab.py' files due
+ to permission issues, it now just issues a warning message and
+ continues to operate. This could happen if a module using PLY
+ is installed in a funny way where tables have to be regenerated, but
+ for whatever reason, the user doesn't have write permission on
+ the directory where PLY wants to put them.
+
+04/24/15: beazley
+ Fixed some issues related to use of packages and table file
+ modules. Just to emphasize, PLY now generates its special
+ files such as 'parsetab.py' and 'lextab.py' in the *SAME*
+ directory as the source file that uses lex() and yacc().
+
+ If for some reason, you want to change the name of the table
+ module, use the tabmodule and lextab options:
+
+ lexer = lex.lex(lextab='spamlextab')
+ parser = yacc.yacc(tabmodule='spamparsetab')
+
+ If you specify a simple name as shown, the module will still be
+ created in the same directory as the file invoking lex() or yacc().
+ If you want the table files to be placed into a different package,
+ then give a fully qualified package name. For example:
+
+ lexer = lex.lex(lextab='pkgname.files.lextab')
+ parser = yacc.yacc(tabmodule='pkgname.files.parsetab')
+
+ For this to work, 'pkgname.files' must already exist as a valid
+ Python package (i.e., the directories must already exist and be
+ set up with the proper __init__.py files, etc.).
+
+Version 3.5
+---------------------
+04/21/15: beazley
+ Added support for defaulted_states in the parser. A
+ defaulted_state is a state where the only legal action is a
+ reduction of a single grammar rule across all valid input
+ tokens. For such states, the rule is reduced and the
+ reading of the next lookahead token is delayed until it is
+ actually needed at a later point in time.
+
+ This delay in consuming the next lookahead token is a
+ potentially important feature in advanced parsing
+ applications that require tight interaction between the
+ lexer and the parser. For example, a grammar rule change
+ modify the lexer state upon reduction and have such changes
+ take effect before the next input token is read.
+
+ *** POTENTIAL INCOMPATIBILITY ***
+ One potential danger of defaulted_states is that syntax
+ errors might be deferred to a a later point of processing
+ than where they were detected in past versions of PLY.
+ Thus, it's possible that your error handling could change
+ slightly on the same inputs. defaulted_states do not change
+ the overall parsing of the input (i.e., the same grammar is
+ accepted).
+
+ If for some reason, you need to disable defaulted states,
+ you can do this:
+
+ parser = yacc.yacc()
+ parser.defaulted_states = {}
+
+04/21/15: beazley
+ Fixed debug logging in the parser. It wasn't properly reporting goto states
+ on grammar rule reductions.
+
+04/20/15: beazley
+ Added actions to be defined to character literals (Issue #32). For example:
+
+ literals = [ '{', '}' ]
+
+ def t_lbrace(t):
+ r'\{'
+ # Some action
+ t.type = '{'
+ return t
+
+ def t_rbrace(t):
+ r'\}'
+ # Some action
+ t.type = '}'
+ return t
+
+04/19/15: beazley
+ Import of the 'parsetab.py' file is now constrained to only consider the
+ directory specified by the outputdir argument to yacc(). If not supplied,
+ the import will only consider the directory in which the grammar is defined.
+ This should greatly reduce problems with the wrong parsetab.py file being
+ imported by mistake. For example, if it's found somewhere else on the path
+ by accident.
+
+ *** POTENTIAL INCOMPATIBILITY *** It's possible that this might break some
+ packaging/deployment setup if PLY was instructed to place its parsetab.py
+ in a different location. You'll have to specify a proper outputdir= argument
+ to yacc() to fix this if needed.
+
+04/19/15: beazley
+ Changed default output directory to be the same as that in which the
+ yacc grammar is defined. If your grammar is in a file 'calc.py',
+ then the parsetab.py and parser.out files should be generated in the
+ same directory as that file. The destination directory can be changed
+ using the outputdir= argument to yacc().
+
+04/19/15: beazley
+ Changed the parsetab.py file signature slightly so that the parsetab won't
+ regenerate if created on a different major version of Python (ie., a
+ parsetab created on Python 2 will work with Python 3).
+
+04/16/15: beazley
+ Fixed Issue #44 call_errorfunc() should return the result of errorfunc()
+
+04/16/15: beazley
+ Support for versions of Python <2.7 is officially dropped. PLY may work, but
+ the unit tests requires Python 2.7 or newer.
+
+04/16/15: beazley
+ Fixed bug related to calling yacc(start=...). PLY wasn't regenerating the
+ table file correctly for this case.
+
+04/16/15: beazley
+ Added skipped tests for PyPy and Java. Related to use of Python's -O option.
+
+05/29/13: beazley
+ Added filter to make unit tests pass under 'python -3'.
+ Reported by Neil Muller.
+
+05/29/13: beazley
+ Fixed CPP_INTEGER regex in ply/cpp.py (Issue 21).
+ Reported by @vbraun.
+
+05/29/13: beazley
+ Fixed yacc validation bugs when from __future__ import unicode_literals
+ is being used. Reported by Kenn Knowles.
+
+05/29/13: beazley
+ Added support for Travis-CI. Contributed by Kenn Knowles.
+
+05/29/13: beazley
+ Added a .gitignore file. Suggested by Kenn Knowles.
+
+05/29/13: beazley
+ Fixed validation problems for source files that include a
+ different source code encoding specifier. Fix relies on
+ the inspect module. Should work on Python 2.6 and newer.
+ Not sure about older versions of Python.
+ Contributed by Michael Droettboom
+
+05/21/13: beazley
+ Fixed unit tests for yacc to eliminate random failures due to dict hash value
+ randomization in Python 3.3
+ Reported by Arfrever
+
+10/15/12: beazley
+ Fixed comment whitespace processing bugs in ply/cpp.py.
+ Reported by Alexei Pososin.
+
+10/15/12: beazley
+ Fixed token names in ply/ctokens.py to match rule names.
+ Reported by Alexei Pososin.
+
+04/26/12: beazley
+ Changes to functions available in panic mode error recover. In previous versions
+ of PLY, the following global functions were available for use in the p_error() rule:
+
+ yacc.errok() # Reset error state
+ yacc.token() # Get the next token
+ yacc.restart() # Reset the parsing stack
+
+ The use of global variables was problematic for code involving multiple parsers
+ and frankly was a poor design overall. These functions have been moved to methods
+ of the parser instance created by the yacc() function. You should write code like
+ this:
+
+ def p_error(p):
+ ...
+ parser.errok()
+
+ parser = yacc.yacc()
+
+ *** POTENTIAL INCOMPATIBILITY *** The original global functions now issue a
+ DeprecationWarning.
+
+04/19/12: beazley
+ Fixed some problems with line and position tracking and the use of error
+ symbols. If you have a grammar rule involving an error rule like this:
+
+ def p_assignment_bad(p):
+ '''assignment : location EQUALS error SEMI'''
+ ...
+
+ You can now do line and position tracking on the error token. For example:
+
+ def p_assignment_bad(p):
+ '''assignment : location EQUALS error SEMI'''
+ start_line = p.lineno(3)
+ start_pos = p.lexpos(3)
+
+ If the trackng=True option is supplied to parse(), you can additionally get
+ spans:
+
+ def p_assignment_bad(p):
+ '''assignment : location EQUALS error SEMI'''
+ start_line, end_line = p.linespan(3)
+ start_pos, end_pos = p.lexspan(3)
+
+ Note that error handling is still a hairy thing in PLY. This won't work
+ unless your lexer is providing accurate information. Please report bugs.
+ Suggested by a bug reported by Davis Herring.
+
+04/18/12: beazley
+ Change to doc string handling in lex module. Regex patterns are now first
+ pulled from a function's .regex attribute. If that doesn't exist, then
+ .doc is checked as a fallback. The @TOKEN decorator now sets the .regex
+ attribute of a function instead of its doc string.
+ Changed suggested by Kristoffer Ellersgaard Koch.
+
+04/18/12: beazley
+ Fixed issue #1: Fixed _tabversion. It should use __tabversion__ instead of __version__
+ Reported by Daniele Tricoli
+
+04/18/12: beazley
+ Fixed issue #8: Literals empty list causes IndexError
+ Reported by Walter Nissen.
+
+04/18/12: beazley
+ Fixed issue #12: Typo in code snippet in documentation
+ Reported by florianschanda.
+
+04/18/12: beazley
+ Fixed issue #10: Correctly escape t_XOREQUAL pattern.
+ Reported by Andy Kittner.
+
+Version 3.4
+---------------------
+02/17/11: beazley
+ Minor patch to make cpp.py compatible with Python 3. Note: This
+ is an experimental file not currently used by the rest of PLY.
+
+02/17/11: beazley
+ Fixed setup.py trove classifiers to properly list PLY as
+ Python 3 compatible.
+
+01/02/11: beazley
+ Migration of repository to github.
+
+Version 3.3
+-----------------------------
+08/25/09: beazley
+ Fixed issue 15 related to the set_lineno() method in yacc. Reported by
+ mdsherry.
+
+08/25/09: beazley
+ Fixed a bug related to regular expression compilation flags not being
+ properly stored in lextab.py files created by the lexer when running
+ in optimize mode. Reported by Bruce Frederiksen.
+
Version 3.2
-----------------------------
diff --git a/ext/ply/MANIFEST.in b/ext/ply/MANIFEST.in
new file mode 100644
index 0000000000..0d37431b0b
--- /dev/null
+++ b/ext/ply/MANIFEST.in
@@ -0,0 +1,8 @@
+recursive-include example *
+recursive-include doc *
+recursive-include test *
+include ANNOUNCE
+include README.md
+include CHANGES
+include TODO
+global-exclude *.pyc
diff --git a/ext/ply/PKG-INFO b/ext/ply/PKG-INFO
new file mode 100644
index 0000000000..f2d8c8ae08
--- /dev/null
+++ b/ext/ply/PKG-INFO
@@ -0,0 +1,23 @@
+Metadata-Version: 1.1
+Name: ply
+Version: 3.11
+Summary: Python Lex & Yacc
+Home-page: http://www.dabeaz.com/ply/
+Author: David Beazley
+Author-email: dave@dabeaz.com
+License: BSD
+Description-Content-Type: UNKNOWN
+Description:
+ PLY is yet another implementation of lex and yacc for Python. Some notable
+ features include the fact that its implemented entirely in Python and it
+ uses LALR(1) parsing which is efficient and well suited for larger grammars.
+
+ PLY provides most of the standard lex/yacc features including support for empty
+ productions, precedence rules, error recovery, and support for ambiguous grammars.
+
+ PLY is extremely easy to use and provides very extensive error checking.
+ It is compatible with both Python 2 and Python 3.
+
+Platform: UNKNOWN
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 2
diff --git a/ext/ply/README b/ext/ply/README.md
similarity index 64%
rename from ext/ply/README
rename to ext/ply/README.md
index d3b785fa22..05df32a5b9 100644
--- a/ext/ply/README
+++ b/ext/ply/README.md
@@ -1,6 +1,8 @@
-PLY (Python Lex-Yacc) Version 3.2
+# PLY (Python Lex-Yacc) Version 3.11
-Copyright (C) 2001-2009,
+[](https://travis-ci.org/dabeaz/ply)
+
+Copyright (C) 2001-2018
David M. Beazley (Dabeaz LLC)
All rights reserved.
@@ -96,7 +98,7 @@ A simple example is found at the end of this document
Requirements
============
-PLY requires the use of Python 2.2 or greater. However, you should
+PLY requires the use of Python 2.6 or greater. However, you should
use the latest Python release if possible. It should work on just
about any platform. PLY has been tested with both CPython and Jython.
It also seems to work with IronPython.
@@ -112,7 +114,11 @@ book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and
Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown
may also be useful.
-A Google group for PLY can be found at
+The GitHub page for PLY can be found at:
+
+ https://github.com/dabeaz/ply
+
+An old and relatively inactive discussion group for PLY is found at:
http://groups.google.com/group/ply-hack
@@ -130,7 +136,7 @@ and testing a revised LALR(1) implementation for PLY-2.0.
Special Note for PLY-3.0
========================
PLY-3.0 the first PLY release to support Python 3. However, backwards
-compatibility with Python 2.2 is still preserved. PLY provides dual
+compatibility with Python 2.6 is still preserved. PLY provides dual
Python 2/3 compatibility by restricting its implementation to a common
subset of basic language features. You should not convert PLY using
2to3--it is not necessary and may in fact break the implementation.
@@ -141,109 +147,109 @@ Example
Here is a simple example showing a PLY implementation of a calculator
with variables.
-# -----------------------------------------------------------------------------
-# calc.py
-#
-# A simple calculator with variables.
-# -----------------------------------------------------------------------------
+ # -----------------------------------------------------------------------------
+ # calc.py
+ #
+ # A simple calculator with variables.
+ # -----------------------------------------------------------------------------
-tokens = (
- 'NAME','NUMBER',
- 'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
- 'LPAREN','RPAREN',
- )
+ tokens = (
+ 'NAME','NUMBER',
+ 'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
+ 'LPAREN','RPAREN',
+ )
-# Tokens
+ # Tokens
-t_PLUS = r'\+'
-t_MINUS = r'-'
-t_TIMES = r'\*'
-t_DIVIDE = r'/'
-t_EQUALS = r'='
-t_LPAREN = r'\('
-t_RPAREN = r'\)'
-t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
+ t_PLUS = r'\+'
+ t_MINUS = r'-'
+ t_TIMES = r'\*'
+ t_DIVIDE = r'/'
+ t_EQUALS = r'='
+ t_LPAREN = r'\('
+ t_RPAREN = r'\)'
+ t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
-def t_NUMBER(t):
- r'\d+'
- t.value = int(t.value)
- return t
+ def t_NUMBER(t):
+ r'\d+'
+ t.value = int(t.value)
+ return t
-# Ignored characters
-t_ignore = " \t"
+ # Ignored characters
+ t_ignore = " \t"
-def t_newline(t):
- r'\n+'
- t.lexer.lineno += t.value.count("\n")
-
-def t_error(t):
- print "Illegal character '%s'" % t.value[0]
- t.lexer.skip(1)
-
-# Build the lexer
-import ply.lex as lex
-lex.lex()
+ def t_newline(t):
+ r'\n+'
+ t.lexer.lineno += t.value.count("\n")
-# Precedence rules for the arithmetic operators
-precedence = (
- ('left','PLUS','MINUS'),
- ('left','TIMES','DIVIDE'),
- ('right','UMINUS'),
- )
+ def t_error(t):
+ print("Illegal character '%s'" % t.value[0])
+ t.lexer.skip(1)
-# dictionary of names (for storing variables)
-names = { }
+ # Build the lexer
+ import ply.lex as lex
+ lex.lex()
-def p_statement_assign(p):
- 'statement : NAME EQUALS expression'
- names[p[1]] = p[3]
+ # Precedence rules for the arithmetic operators
+ precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
-def p_statement_expr(p):
- 'statement : expression'
- print p[1]
+ # dictionary of names (for storing variables)
+ names = { }
-def p_expression_binop(p):
- '''expression : expression PLUS expression
- | expression MINUS expression
- | expression TIMES expression
- | expression DIVIDE expression'''
- if p[2] == '+' : p[0] = p[1] + p[3]
- elif p[2] == '-': p[0] = p[1] - p[3]
- elif p[2] == '*': p[0] = p[1] * p[3]
- elif p[2] == '/': p[0] = p[1] / p[3]
+ def p_statement_assign(p):
+ 'statement : NAME EQUALS expression'
+ names[p[1]] = p[3]
-def p_expression_uminus(p):
- 'expression : MINUS expression %prec UMINUS'
- p[0] = -p[2]
+ def p_statement_expr(p):
+ 'statement : expression'
+ print(p[1])
-def p_expression_group(p):
- 'expression : LPAREN expression RPAREN'
- p[0] = p[2]
+ def p_expression_binop(p):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if p[2] == '+' : p[0] = p[1] + p[3]
+ elif p[2] == '-': p[0] = p[1] - p[3]
+ elif p[2] == '*': p[0] = p[1] * p[3]
+ elif p[2] == '/': p[0] = p[1] / p[3]
-def p_expression_number(p):
- 'expression : NUMBER'
- p[0] = p[1]
+ def p_expression_uminus(p):
+ 'expression : MINUS expression %prec UMINUS'
+ p[0] = -p[2]
-def p_expression_name(p):
- 'expression : NAME'
- try:
- p[0] = names[p[1]]
- except LookupError:
- print "Undefined name '%s'" % p[1]
- p[0] = 0
+ def p_expression_group(p):
+ 'expression : LPAREN expression RPAREN'
+ p[0] = p[2]
-def p_error(p):
- print "Syntax error at '%s'" % p.value
+ def p_expression_number(p):
+ 'expression : NUMBER'
+ p[0] = p[1]
-import ply.yacc as yacc
-yacc.yacc()
+ def p_expression_name(p):
+ 'expression : NAME'
+ try:
+ p[0] = names[p[1]]
+ except LookupError:
+ print("Undefined name '%s'" % p[1])
+ p[0] = 0
-while 1:
- try:
- s = raw_input('calc > ')
- except EOFError:
- break
- yacc.parse(s)
+ def p_error(p):
+ print("Syntax error at '%s'" % p.value)
+
+ import ply.yacc as yacc
+ yacc.yacc()
+
+ while True:
+ try:
+ s = raw_input('calc > ') # use input() on Python 3
+ except EOFError:
+ break
+ yacc.parse(s)
Bug Reports and Patches
@@ -252,12 +258,10 @@ My goal with PLY is to simply have a decent lex/yacc implementation
for Python. As a general rule, I don't spend huge amounts of time
working on it unless I receive very specific bug reports and/or
patches to fix problems. I also try to incorporate submitted feature
-requests and enhancements into each new version. To contact me about
-bugs and/or new features, please send email to dave@dabeaz.com.
-
-In addition there is a Google group for discussing PLY related issues at
-
- http://groups.google.com/group/ply-hack
+requests and enhancements into each new version. Please visit the PLY
+github page at https://github.com/dabeaz/ply to submit issues and pull
+requests. To contact me about bugs and/or new features, please send
+email to dave@dabeaz.com.
-- Dave
diff --git a/ext/ply/doc/internal.html b/ext/ply/doc/internal.html
index 3fabfe28c0..57e87dfc7e 100644
--- a/ext/ply/doc/internal.html
+++ b/ext/ply/doc/internal.html
@@ -12,7 +12,7 @@ dave@dabeaz.com
-PLY Version: 3.0 +PLY Version: 3.11
diff --git a/ext/ply/doc/ply.html b/ext/ply/doc/ply.html
index 3345e79294..b35ba44611 100644
--- a/ext/ply/doc/ply.html
+++ b/ext/ply/doc/ply.html
@@ -12,13 +12,13 @@ dave@dabeaz.com
-PLY Version: 3.0 +PLY Version: 3.11
@@ -90,12 +96,8 @@ into a big development project with PLY.
-PLY-3.0 is compatible with both Python 2 and Python 3. Be aware that -Python 3 support is new and has not been extensively tested (although -all of the examples and unit tests pass under Python 3.0). If you are -using Python 2, you should try to use Python 2.4 or newer. Although PLY -works with versions as far back as Python 2.2, some of its optional features -require more modern library modules. +PLY-3.5 is compatible with both Python 2 and Python 3. If you are using +Python 2, you have to use Python 2.6 or newer.
Early versions of PLY were developed to support an Introduction to -Compilers Course I taught in 2001 at the University of Chicago. In this course, -students built a fully functional compiler for a simple Pascal-like -language. Their compiler, implemented entirely in Python, had to -include lexical analysis, parsing, type checking, type inference, -nested scoping, and code generation for the SPARC processor. -Approximately 30 different compiler implementations were completed in -this course. Most of PLY's interface and operation has been influenced by common -usability problems encountered by students. Since 2001, PLY has -continued to be improved as feedback has been received from users. -PLY-3.0 represents a major refactoring of the original implementation -with an eye towards future enhancements. - -
+Compilers Course I taught in 2001 at the University of Chicago. Since PLY was primarily developed as an instructional tool, you will find it to be fairly picky about token and grammar rule specification. In part, this @@ -137,10 +127,10 @@ to be a parsing framework. Instead, you will find a bare-bones, yet fully capable lex/yacc implementation written entirely in Python.
-The rest of this document assumes that you are somewhat familar with +The rest of this document assumes that you are somewhat familiar with parsing theory, syntax directed translation, and the use of compiler construction tools such as lex and yacc in other programming -languages. If you are unfamilar with these topics, you will probably +languages. If you are unfamiliar with these topics, you will probably want to consult an introductory text such as "Compilers: Principles, Techniques, and Tools", by Aho, Sethi, and Ullman. O'Reilly's "Lex and Yacc" by John Levine may also be handy. In fact, the O'Reilly book can be @@ -149,13 +139,14 @@ used as a reference for PLY as the concepts are virtually identical.
PLY consists of two separate modules; lex.py and yacc.py, both of which are found in a Python package called ply. The lex.py module is used to break input text into a collection of tokens specified by a collection of regular expression rules. yacc.py is used to recognize language syntax that has -been specified in the form of a context free grammar. yacc.py uses LR parsing and generates its parsing tables -using either the LALR(1) (the default) or SLR table generation algorithms. +been specified in the form of a context free grammar. +
The two tools are meant to work together. Specifically, @@ -171,7 +162,7 @@ simple one-pass compilers. Like its Unix counterpart, yacc.py provides most of the features you expect including extensive error checking, grammar validation, support for empty productions, error tokens, and ambiguity -resolution via precedence rules. In fact, everything that is possible in traditional yacc +resolution via precedence rules. In fact, almost everything that is possible in traditional yacc should be supported in PLY.
@@ -282,7 +273,7 @@ t_ignore = ' \t' # Error handling rule def t_error(t): - print "Illegal character '%s'" % t.value[0] + print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lexer @@ -310,8 +301,9 @@ lexer.input(data) # Tokenize while True: tok = lexer.token() - if not tok: break # No more input - print tok + if not tok: + break # No more input + print(tok) @@ -338,7 +330,7 @@ Lexers also support the iteration protocol. So, you can write the above loop
@@ -353,8 +345,9 @@ accessing these attributes: # Tokenize while True: tok = lexer.token() - if not tok: break # No more input - print tok.type, tok.value, tok.line, tok.lexpos + if not tok: + break # No more input + print(tok.type, tok.value, tok.lineno, tok.lexpos) @@ -367,10 +360,12 @@ token relative to the start of the input text.for tok in lexer: - print tok + print(tok)
All lexers must provide a list tokens that defines all of the possible token names that can be produced by the lexer. This list is always required and is used to perform a variety of validation checks. The tokens list is also used by the yacc.py module to identify terminals. +
In the example, the following code specified the token names: @@ -392,7 +387,7 @@ tokens = (
-Internally, lex.py uses the re module to do its patten matching. When building the master regular expression, +Internally, lex.py uses the re module to do its pattern matching. Patterns are compiled +using the re.VERBOSE flag which can be used to help readability. However, be aware that unescaped +whitespace is ignored and comments are allowed in this mode. If your pattern involves whitespace, make sure you +use \s. If you need to match the # character, use [#]. +
+ ++When building the master regular expression, rules are added in the following order: +
+
-lex.py does not perform and kind of automatic column tracking. However, it does record positional +lex.py does not perform any kind of automatic column tracking. However, it does record positional information related to each token in the lexpos attribute. Using this, it is usually possible to compute column information as a separate step. For instance, just count backwards until you reach a newline.
-# Compute column.
+# Compute column.
# input is the input text string
# token is a token instance
-def find_column(input,token):
- last_cr = input.rfind('\n',0,token.lexpos)
- if last_cr < 0:
- last_cr = 0
- column = (token.lexpos - last_cr) + 1
- return column
+def find_column(input, token):
+ line_start = input.rfind('\n', 0, token.lexpos) + 1
+ return (token.lexpos - line_start) + 1
@@ -580,6 +581,15 @@ Although it is possible to define a regular expression rule for whitespace in a
similar to t_newline(), the use of t_ignore provides substantially better
lexing performance because it is handled as a special case and is checked in a much
more efficient manner than the normal regular expression rules.
+
+
++The characters given in t_ignore are not ignored when such characters are part of +other regular expression patterns. For example, if you had a rule to capture quoted text, +that pattern can include the ignored characters (which will be captured in the normal way). The +main purpose of t_ignore is to ignore whitespace and other padding between the +tokens that you actually want to parse. +
When a literal token is returned, both its type and value attributes are set to the character itself. For example, '+'. +
+ ++It's possible to write token functions that perform additional actions +when literals are matched. However, you'll need to set the token type +appropriately. For example: +
+ +
+
+literals = [ '{', '}' ]
+
+def t_lbrace(t):
+ r'\{'
+ t.type = '{' # Set token type to the expected literal
+ return t
+
+def t_rbrace(t):
+ r'\}'
+ t.type = '}' # Set token type to the expected literal
+ return t
+
+
-Finally, the t_error() +The t_error() function is used to handle lexing errors that occur when illegal characters are detected. In this case, the t.value attribute contains the rest of the input string that has not been tokenized. In the example, the error function @@ -621,49 +655,67 @@ was defined as follows:
# Error handling rule
def t_error(t):
- print "Illegal character '%s'" % t.value[0]
+ print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
In this case, we simply print the offending character and skip ahead one character by calling t.lexer.skip(1).
--To build the lexer, the function lex.lex() is used. This function -uses Python reflection (or introspection) to read the the regular expression rules +The t_eof() function is used to handle an end-of-file (EOF) condition in the input. As input, it +receives a token type 'eof' with the lineno and lexpos attributes set appropriately. +The main use of this function is provide more input to the lexer so that it can continue to parse. Here is an +example of how this works: +
+ +
+
+# EOF handling rule
+def t_eof(t):
+ # Get more input (Example)
+ more = raw_input('... ')
+ if more:
+ self.lexer.input(more)
+ return self.lexer.token()
+ return None
+
+
+
++The EOF function should return the next available token (by calling self.lexer.token()) or None to +indicate no more data. Be aware that setting more input with the self.lexer.input() method does +NOT reset the lexer state or the lineno attribute used for position tracking. The lexpos +attribute is reset so be aware of that if you're using it in error reporting. +
+ ++To build the lexer, the function lex.lex() is used. For example:
+ +++ ++lexer = lex.lex() ++
This function +uses Python reflection (or introspection) to read the regular expression rules out of the calling context and build the lexer. Once the lexer has been built, two methods can be used to control the lexer. - +
-- --lex.lex() -lex.input(sometext) -while 1: - tok = lex.token() - if not tok: break - print tok --
-In this example, the module-level functions lex.input() and lex.token() are bound to the input() -and token() methods of the last lexer created by the lex module. This interface may go away at some point so -it's probably best not to use it. - -
+This will attach identifier to the docstring for t_ID() allowing lex.py to work normally. +
--- -NOTE: Use of @TOKEN requires Python-2.4 or newer. If you're concerned about backwards compatibility with older -versions of Python, use the alternative approach of setting the docstring directly. - --def t_ID(t): - ... - -t_ID.__doc__ = identifier --
-To change the name of the lexer-generated file, use the lextab keyword argument. For example: +To change the name of the lexer-generated module, use the lextab keyword argument. For example: +
@@ -860,7 +903,7 @@ The module option can also be used to define lexers from instances of a@@ -747,7 +790,7 @@ lexer = lex.lex(optimize=1,lextab="footab") When running in optimized mode, it is important to note that lex disables most error checking. Thus, this is really only recommended if you're sure everything is working correctly and you're ready to start releasing production code. -4.13 Debugging
+4.14 Debugging
For the purpose of debugging, you can run lex() in a debugging mode as follows: @@ -779,7 +822,7 @@ if __name__ == '__main__': Please refer to the "Debugging" section near the end for some more advanced details of debugging. -4.14 Alternative specification of lexers
+4.15 Alternative specification of lexers
As shown in the example, lexers are specified all within one Python module. If you want to @@ -830,7 +873,7 @@ t_ignore = ' \t' # Error handling rule def t_error(t): - print "Illegal character '%s'" % t.value[0] + print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1)
import ply.lex as lex
-class MyLexer:
+class MyLexer(object):
# List of token names. This is always required
tokens = (
'NUMBER',
@@ -897,7 +940,7 @@ class MyLexer:
# Error handling rule
def t_error(self,t):
- print "Illegal character '%s'" % t.value[0]
+ print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
@@ -908,9 +951,10 @@ class MyLexer:
def test(self,data):
self.lexer.input(data)
while True:
- tok = lexer.token()
- if not tok: break
- print tok
+ tok = self.lexer.token()
+ if not tok:
+ break
+ print(tok)
# Build the lexer and try it out
m = MyLexer()
@@ -928,7 +972,7 @@ PLY only works properly if the lexer actions are defined by bound-methods.
When using the module option to lex(), PLY collects symbols
from the underlying object using the dir() function. There is no
direct access to the __dict__ attribute of the object supplied as a
-module value.
+module value.
Finally, if you want to keep things nicely encapsulated, but don't want to use a
@@ -974,7 +1018,7 @@ def MyLexer():
# Error handling rule
def t_error(t):
- print "Illegal character '%s'" % t.value[0]
+ print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer from my environment and return it
@@ -982,8 +1026,13 @@ def MyLexer():
++Important note: If you are defining a lexer using a class or closure, be aware that PLY still requires you to only +define a single lexer per module (source file). There are extensive validation/error checking parts of the PLY that +may falsely report error messages if you don't follow this rule. +
-@@ -1080,7 +1129,7 @@ def MyLexer(): -
@@ -1105,7 +1154,7 @@ cloned lexers could be used to handle different input files.
Creating a clone is different than calling lex.lex() in that -PLY doesn't regenerate any of the internal tables or regular expressions. So, +PLY doesn't regenerate any of the internal tables or regular expressions.
Special considerations need to be made when cloning lexers that also @@ -1129,7 +1178,7 @@ important to emphasize that clone() is only meant to create a new lexer that reuses the regular expressions and environment of another lexer. If you need to make a totally new copy of a lexer, then call lex() again. -
To define a new lexing state, it must first be declared. This is done by including a "states" declaration in your @@ -1244,8 +1293,8 @@ t_INITIAL_NUMBER = r'\d+'
-States are also associated with the special t_ignore and t_error() declarations. For example, if a state treats -these differently, you can declare: +States are also associated with the special t_ignore, t_error(), and t_eof() declarations. For example, if a state treats +these differently, you can declare:
@@ -1677,15 +1733,25 @@ calc > +@@ -1336,7 +1385,7 @@ def t_ccode_rbrace(t): # C or C++ comment (ignore) def t_ccode_comment(t): - r'(/\*(.|\n)*?*/)|(//.*)' + r'(/\*(.|\n)*?\*/)|(//.*)' pass # C string @@ -1366,13 +1415,16 @@ However, if the closing right brace is encountered, the rule t_ccode_rbrace< position), stores it, and returns a token 'CCODE' containing all of that text. When returning the token, the lexing state is restored back to its initial state. -4.19 Miscellaneous Issues
+4.20 Miscellaneous Issues
- The lexer requires input to be supplied as a single input string. Since most machines have more than enough memory, this rarely presents a performance concern. However, it means that the lexer currently can't be used with streaming data -such as open files or sockets. This limitation is primarily a side-effect of using the re module. +such as open files or sockets. This limitation is primarily a side-effect of using the re module. You might be +able to work around this by implementing an appropriate def t_eof() end-of-file handling rule. The main complication +here is that you'll probably need to ensure that data is fed to the lexer in a way so that it doesn't split in in the middle +of a token.
- The lexer should work properly with both Unicode strings given as token and pattern matching rules as @@ -1383,10 +1435,13 @@ well as for input text.
+Note: by default, reflags is set to re.VERBOSE. If you provide +your own flags, you may need to include this for PLY to preserve its normal behavior. +-lex.lex(reflags=re.UNICODE) +lex.lex(reflags=re.UNICODE | re.VERBOSE)
- Since the lexer is written entirely in Python, its performance is largely determined by that of the Python re module. Although @@ -1403,7 +1458,8 @@ it only needs to conform to the following requirements:
- It must provide a token() method that returns the next token or None if no more tokens are available. -
- The token() method must return an object tok that has type and value attributes. +
- The token() method must return an object tok that has type and value attributes. If +line number tracking is being used, then the token should also define a lineno attribute.
5. Parsing basics
@@ -1595,7 +1651,7 @@ def p_factor_expr(p): # Error rule for syntax errors def p_error(p): - print "Syntax error in input!" + print("Syntax error in input!") # Build the parser parser = yacc.yacc() @@ -1607,7 +1663,7 @@ while True: break if not s: continue result = parser.parse(s) - print result + print(result)
Since table construction is relatively expensive (especially for large -grammars), the resulting parsing table is written to the current -directory in a file called parsetab.py. In addition, a +grammars), the resulting parsing table is written to +a file called parsetab.py. In addition, a debugging file called parser.out is created. On subsequent executions, yacc will reload the table from parsetab.py unless it has detected a change in the underlying grammar (in which case the tables and parsetab.py file are -regenerated). Note: The names of parser output files can be changed -if necessary. See the PLY Reference for details. +regenerated). Both of these files are written to the same directory +as the module in which the parser is specified. +The name of the parsetab module can be changed using the +tabmodule keyword argument to yacc(). For example: +
+ +++parser = yacc.yacc(tabmodule='fooparsetab') ++
If any errors are detected in your grammar specification, yacc.py will produce @@ -1824,7 +1890,7 @@ literals = ['+','-','*','/' ] Character literals are limited to a single character. Thus, it is not legal to specify literals such as '<=' or '=='. For this, use the normal lexing rules (e.g., define a rule such as t_EQ = r'=='). -
@@ -2060,7 +2126,7 @@ of UMINUS in the precedence specifier.-yacc.yacc(start='foo') +parser = yacc.yacc(start='foo')
At first, the use of UMINUS in this example may appear very confusing. -UMINUS is not an input token or a grammer rule. Instead, you should +UMINUS is not an input token or a grammar rule. Instead, you should think of it as the name of a special marker in the precedence table. When you use the %prec qualifier, you're simply telling yacc that you want the precedence of the expression to be the same as for this special marker instead of the usual precedence. @@ -2123,7 +2189,7 @@ the rule assignment : ID EQUALS expression.
It should be noted that reduce/reduce conflicts are notoriously -difficult to spot simply looking at the input grammer. When a +difficult to spot simply looking at the input grammar. When a reduce/reduce conflict occurs, yacc() will try to help by printing a warning message such as this: @@ -2142,7 +2208,7 @@ the contents of the parser.out debugging file with an appropriately high level of caffeination. -
def p_statement_print_error(p):
'statement : PRINT error SEMI'
- print "Syntax error in print statement. Bad expression"
+ print("Syntax error in print statement. Bad expression")
@@ -2519,7 +2586,7 @@ on the right in an error rule. For example:
def p_statement_print_error(p):
'statement : PRINT error'
- print "Syntax error in print statement. Bad expression"
+ print("Syntax error in print statement. Bad expression")
@@ -2541,12 +2608,17 @@ parser in its initial state.
def p_error(p):
- print "Whoa. You are seriously hosed."
+ print("Whoa. You are seriously hosed.")
+ if not p:
+ print("End of File!")
+ return
+
# Read ahead looking for a closing '}'
- while 1:
- tok = yacc.token() # Get the next token
- if not tok or tok.type == 'RBRACE': break
- yacc.restart()
+ while True:
+ tok = parser.token() # Get the next token
+ if not tok or tok.type == 'RBRACE':
+ break
+ parser.restart()
@@ -2556,32 +2628,33 @@ This function simply discards the bad token and tells the parser that the error
def p_error(p):
- print "Syntax error at token", p.type
- # Just discard the token and tell the parser it's okay.
- yacc.errok()
+ if p:
+ print("Syntax error at token", p.type)
+ # Just discard the token and tell the parser it's okay.
+ parser.errok()
+ else:
+ print("Syntax error at EOF")
-Within the p_error() function, three functions are available to control the behavior -of the parser: +More information on these methods is as follows: +
+
-
-
To supply the next lookahead token to the parser, p_error() can return a token. This might be useful if trying to synchronize on special characters. For example: @@ -2590,17 +2663,24 @@ useful if trying to synchronize on special characters. For example:
def p_error(p):
# Read ahead looking for a terminating ";"
- while 1:
- tok = yacc.token() # Get the next token
+ while True:
+ tok = parser.token() # Get the next token
if not tok or tok.type == 'SEMI': break
- yacc.errok()
+ parser.errok()
# Return SEMI to the parser as the next lookahead token
return tok
-+Keep in mind in that the above error handling functions, +parser is an instance of the parser created by +yacc(). You'll need to save this instance someplace in your +code so that you can refer to it during error handling. +
+ +Note: This feature of PLY is meant to mimic the behavior of the YYERROR macro in yacc. +
+In most cases, yacc will handle errors as soon as a bad input token is +detected on the input. However, be aware that yacc may choose to +delay error handling until after it has reduced one or more grammar +rules first. This behavior might be unexpected, but it's related to +special states in the underlying parsing table known as "defaulted +states." A defaulted state is parsing condition where the same +grammar rule will be reduced regardless of what valid token +comes next on the input. For such states, yacc chooses to go ahead +and reduce the grammar rule without reading the next input +token. If the next token is bad, yacc will eventually get around to reading it and +report a syntax error. It's just a little unusual in that you might +see some of your grammar rules firing immediately prior to the syntax +error. +
+ ++Usually, the delayed error reporting with defaulted states is harmless +(and there are other reasons for wanting PLY to behave in this way). +However, if you need to turn this behavior off for some reason. You +can clear the defaulted states table like this: +
+ +
+
+parser = yacc.yacc()
+parser.defaulted_states = {}
+
+
+
++Disabling defaulted states is not recommended if your grammar makes use +of embedded actions as described in Section 6.11.
+ +
def p_foo(p):
"foo : A B C D"
- print "Parsed a foo", p[1],p[2],p[3],p[4]
+ print("Parsed a foo", p[1],p[2],p[3],p[4])
@@ -2860,12 +2976,12 @@ been parsed. To do this, write an empty rule like this:
def p_foo(p):
"foo : A seen_A B C D"
- print "Parsed a foo", p[1],p[3],p[4],p[5]
- print "seen_A returned", p[2]
+ print("Parsed a foo", p[1],p[3],p[4],p[5])
+ print("seen_A returned", p[2])
def p_seen_A(p):
"seen_A :"
- print "Saw an A = ", p[-1] # Access grammar symbol to left
+ print("Saw an A = ", p[-1]) # Access grammar symbol to left
p[0] = some_value # Assign value to seen_A
@@ -2956,25 +3072,13 @@ might undo the operations performed in the embedded action
--Note: LALR table generation takes approximately twice as long as SLR table generation. There is no -difference in actual parsing performance---the same code is used in both cases. LALR is preferred when working -with more complicated grammars since it is more powerful. - --yacc.yacc(method="SLR") --
in this case, x must be a Lexer object that minimally has a x.token() method for retrieving the next @@ -2986,7 +3090,7 @@ To disable this, use-yacc.parse(lexer=x) +parser = yacc.parse(lexer=x)
@@ -2995,23 +3099,36 @@ yacc.yacc(debug=0)-yacc.yacc(debug=0) +parser = yacc.yacc(debug=False)
+-yacc.yacc(tabmodule="foo") +parser = yacc.yacc(tabmodule="foo")
+Normally, the parsetab.py file is placed into the same directory as +the module where the parser is defined. If you want it to go somewhere else, you can +given an absolute package name for tabmodule instead. In that case, the +tables will be written there. +
+
+-yacc.yacc(tabmodule="foo",outputdir="somedirectory") +parser = yacc.yacc(tabmodule="foo",outputdir="somedirectory")
+Note: Be aware that unless the directory specified is also on Python's path (sys.path), subsequent +imports of the table file will fail. As a general rule, it's better to specify a destination using the +tabmodule argument instead of directly specifying a directory using the outputdir argument. +
+
@@ -3023,24 +3140,10 @@ each time it runs (which may take awhile depending on how large your grammar is)-yacc.yacc(write_tables=0) +parser = yacc.yacc(write_tables=False)
--yacc.parse(debug=1) +parser.parse(input_text, debug=True)
-
-- -Note: The function yacc.parse() is bound to the last parser that was generated. --p = yacc.yacc() -... -p.parse() --
It should be noted that table generation is reasonably efficient, even for grammars that involve around a 100 rules -and several hundred states. For more complex languages such as C, table generation may take 30-60 seconds on a slow -machine. Please be patient. +and several hundred states.
+
+
++ ++from functools import wraps +from nodes import Collection + + +def strict(*types): + def decorate(func): + @wraps(func) + def wrapper(p): + func(p) + if not isinstance(p[0], types): + raise TypeError + + wrapper.co_firstlineno = func.__code__.co_firstlineno + return wrapper + + return decorate + +@strict(Collection) +def p_collection(p): + """ + collection : sequence + | map + """ + p[0] = p[1] ++
Debugging a compiler is typically not an easy task. PLY provides some -advanced diagonistic capabilities through the use of Python's +advanced diagostic capabilities through the use of Python's logging module. The next two sections describe this:
+If you are distributing a package that makes use of PLY, you should +spend a few moments thinking about how you want to handle the files +that are automatically generated. For example, the parsetab.py +file generated by the yacc() function.
+ ++Starting in PLY-3.6, the table files are created in the same directory +as the file where a parser is defined. This means that the +parsetab.py file will live side-by-side with your parser +specification. In terms of packaging, this is probably the easiest and +most sane approach to manage. You don't need to give yacc() +any extra arguments and it should just "work."
+ ++One concern is the management of the parsetab.py file itself. +For example, should you have this file checked into version control (e.g., GitHub), +should it be included in a package distribution as a normal file, or should you +just let PLY generate it automatically for the user when they install your package? +
+ ++As of PLY-3.6, the parsetab.py file should be compatible across all versions +of Python including Python 2 and 3. Thus, a table file generated in Python 2 should +work fine if it's used on Python 3. Because of this, it should be relatively harmless +to distribute the parsetab.py file yourself if you need to. However, be aware +that older/newer versions of PLY may try to regenerate the file if there are future +enhancements or changes to its format. +
+ ++To make the generation of table files easier for the purposes of installation, you might +way to make your parser files executable using the -m option or similar. For +example: +
+ +++ ++# calc.py +... +... +def make_parser(): + parser = yacc.yacc() + return parser + +if __name__ == '__main__': + make_parser() ++
+You can then use a command such as python -m calc.py to generate the tables. Alternatively, +a setup.py script, can import the module and use make_parser() to create the +parsing tables. +
+ ++If you're willing to sacrifice a little startup time, you can also instruct PLY to never write the +tables using yacc.yacc(write_tables=False, debug=False). In this mode, PLY will regenerate +the parsing tables from scratch each time. For a small grammar, you probably won't notice. For a +large grammar, you should probably reconsider--the parsing tables are meant to dramatically speed up this process. +
+ ++During operation, is is normal for PLY to produce diagnostic error +messages (usually printed to standard error). These are generated +entirely using the logging module. If you want to redirect +these messages or silence them, you can provide your own logging +object to yacc(). For example: +
+ +
+
+import logging
+log = logging.getLogger('ply')
+...
+parser = yacc.yacc(errorlog=log)
+
+
+
+['"]).*?(?P=quote)''' -t_NUMBER = r'\d+' +t_NUMBER = r'\d+' + def t_SECTION(t): r'%%' - if getattr(t.lexer,"lastsection",0): - t.value = t.lexer.lexdata[t.lexpos+2:] - t.lexer.lexpos = len(t.lexer.lexdata) + if getattr(t.lexer, "lastsection", 0): + t.value = t.lexer.lexdata[t.lexpos + 2:] + t.lexer.lexpos = len(t.lexer.lexdata) else: - t.lexer.lastsection = 0 + t.lexer.lastsection = 0 return t # Comments + + def t_ccomment(t): r'/\*(.|\n)*?\*/' t.lexer.lineno += t.value.count('\n') t_ignore_cppcomment = r'//.*' + def t_LITERAL(t): - r'%\{(.|\n)*?%\}' - t.lexer.lineno += t.value.count("\n") - return t + r'%\{(.|\n)*?%\}' + t.lexer.lineno += t.value.count("\n") + return t + def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 + r'\n' + t.lexer.lineno += 1 + def t_code(t): - r'\{' - t.lexer.codestart = t.lexpos - t.lexer.level = 1 - t.lexer.begin('code') + r'\{' + t.lexer.codestart = t.lexpos + t.lexer.level = 1 + t.lexer.begin('code') + def t_code_ignore_string(t): r'\"([^\\\n]|(\\.))*?\"' + def t_code_ignore_char(t): r'\'([^\\\n]|(\\.))*?\'' + def t_code_ignore_comment(t): - r'/\*(.|\n)*?\*/' + r'/\*(.|\n)*?\*/' + def t_code_ignore_cppcom(t): - r'//.*' + r'//.*' + def t_code_lbrace(t): r'\{' t.lexer.level += 1 + def t_code_rbrace(t): r'\}' t.lexer.level -= 1 if t.lexer.level == 0: - t.type = 'CODE' - t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos+1] - t.lexer.begin('INITIAL') - t.lexer.lineno += t.value.count('\n') - return t + t.type = 'CODE' + t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos + 1] + t.lexer.begin('INITIAL') + t.lexer.lineno += t.value.count('\n') + return t -t_code_ignore_nonspace = r'[^\s\}\'\"\{]+' +t_code_ignore_nonspace = r'[^\s\}\'\"\{]+' t_code_ignore_whitespace = r'\s+' t_code_ignore = "" + def t_code_error(t): raise RuntimeError + def t_error(t): - print "%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0]) - print t.value + print("%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0])) + print(t.value) t.lexer.skip(1) lex.lex() if __name__ == '__main__': lex.runmain() - - - - - - - diff --git a/ext/ply/example/yply/yparse.py b/ext/ply/example/yply/yparse.py index ab5b884514..1f2e8d0922 100644 --- a/ext/ply/example/yply/yparse.py +++ b/ext/ply/example/yply/yparse.py @@ -9,53 +9,61 @@ tokens = ylex.tokens from ply import * tokenlist = [] -preclist = [] +preclist = [] emit_code = 1 + def p_yacc(p): '''yacc : defsection rulesection''' + def p_defsection(p): '''defsection : definitions SECTION | SECTION''' p.lexer.lastsection = 1 - print "tokens = ", repr(tokenlist) - print - print "precedence = ", repr(preclist) - print - print "# -------------- RULES ----------------" - print + print("tokens = ", repr(tokenlist)) + print() + print("precedence = ", repr(preclist)) + print() + print("# -------------- RULES ----------------") + print() + def p_rulesection(p): '''rulesection : rules SECTION''' - print "# -------------- RULES END ----------------" - print_code(p[2],0) + print("# -------------- RULES END ----------------") + print_code(p[2], 0) + def p_definitions(p): '''definitions : definitions definition | definition''' + def p_definition_literal(p): '''definition : LITERAL''' - print_code(p[1],0) + print_code(p[1], 0) + def p_definition_start(p): '''definition : START ID''' - print "start = '%s'" % p[2] + print("start = '%s'" % p[2]) + def p_definition_token(p): '''definition : toktype opttype idlist optsemi ''' for i in p[3]: - if i[0] not in "'\"": - tokenlist.append(i) + if i[0] not in "'\"": + tokenlist.append(i) if p[1] == '%left': preclist.append(('left',) + tuple(p[3])) elif p[1] == '%right': preclist.append(('right',) + tuple(p[3])) elif p[1] == '%nonassoc': - preclist.append(('nonassoc',)+ tuple(p[3])) + preclist.append(('nonassoc',) + tuple(p[3])) + def p_toktype(p): '''toktype : TOKEN @@ -64,10 +72,12 @@ def p_toktype(p): | NONASSOC''' p[0] = p[1] + def p_opttype(p): '''opttype : '<' ID '>' | empty''' + def p_idlist(p): '''idlist : idlist optcomma tokenid | tokenid''' @@ -77,141 +87,158 @@ def p_idlist(p): p[0] = p[1] p[1].append(p[3]) + def p_tokenid(p): '''tokenid : ID | ID NUMBER | QLITERAL | QLITERAL NUMBER''' p[0] = p[1] - + + def p_optsemi(p): '''optsemi : ';' | empty''' + def p_optcomma(p): '''optcomma : ',' | empty''' + def p_definition_type(p): '''definition : TYPE '<' ID '>' namelist optsemi''' # type declarations are ignored + def p_namelist(p): '''namelist : namelist optcomma ID | ID''' + def p_definition_union(p): '''definition : UNION CODE optsemi''' # Union declarations are ignored + def p_rules(p): '''rules : rules rule | rule''' if len(p) == 2: - rule = p[1] + rule = p[1] else: - rule = p[2] + rule = p[2] # Print out a Python equivalent of this rule - embedded = [ ] # Embedded actions (a mess) + embedded = [] # Embedded actions (a mess) embed_count = 0 rulename = rule[0] rulecount = 1 for r in rule[1]: # r contains one of the rule possibilities - print "def p_%s_%d(p):" % (rulename,rulecount) + print("def p_%s_%d(p):" % (rulename, rulecount)) prod = [] prodcode = "" for i in range(len(r)): - item = r[i] - if item[0] == '{': # A code block - if i == len(r) - 1: - prodcode = item - break - else: - # an embedded action - embed_name = "_embed%d_%s" % (embed_count,rulename) - prod.append(embed_name) - embedded.append((embed_name,item)) - embed_count += 1 - else: - prod.append(item) - print " '''%s : %s'''" % (rulename, " ".join(prod)) + item = r[i] + if item[0] == '{': # A code block + if i == len(r) - 1: + prodcode = item + break + else: + # an embedded action + embed_name = "_embed%d_%s" % (embed_count, rulename) + prod.append(embed_name) + embedded.append((embed_name, item)) + embed_count += 1 + else: + prod.append(item) + print(" '''%s : %s'''" % (rulename, " ".join(prod))) # Emit code - print_code(prodcode,4) - print + print_code(prodcode, 4) + print() rulecount += 1 - for e,code in embedded: - print "def p_%s(p):" % e - print " '''%s : '''" % e - print_code(code,4) - print + for e, code in embedded: + print("def p_%s(p):" % e) + print(" '''%s : '''" % e) + print_code(code, 4) + print() + def p_rule(p): - '''rule : ID ':' rulelist ';' ''' - p[0] = (p[1],[p[3]]) + '''rule : ID ':' rulelist ';' ''' + p[0] = (p[1], [p[3]]) + def p_rule2(p): - '''rule : ID ':' rulelist morerules ';' ''' - p[4].insert(0,p[3]) - p[0] = (p[1],p[4]) + '''rule : ID ':' rulelist morerules ';' ''' + p[4].insert(0, p[3]) + p[0] = (p[1], p[4]) + def p_rule_empty(p): - '''rule : ID ':' ';' ''' - p[0] = (p[1],[[]]) + '''rule : ID ':' ';' ''' + p[0] = (p[1], [[]]) + def p_rule_empty2(p): - '''rule : ID ':' morerules ';' ''' - - p[3].insert(0,[]) - p[0] = (p[1],p[3]) + '''rule : ID ':' morerules ';' ''' + + p[3].insert(0, []) + p[0] = (p[1], p[3]) + def p_morerules(p): - '''morerules : morerules '|' rulelist - | '|' rulelist - | '|' ''' - - if len(p) == 2: - p[0] = [[]] - elif len(p) == 3: - p[0] = [p[2]] - else: - p[0] = p[1] - p[0].append(p[3]) + '''morerules : morerules '|' rulelist + | '|' rulelist + | '|' ''' + + if len(p) == 2: + p[0] = [[]] + elif len(p) == 3: + p[0] = [p[2]] + else: + p[0] = p[1] + p[0].append(p[3]) + +# print("morerules", len(p), p[0]) -# print "morerules", len(p), p[0] def p_rulelist(p): - '''rulelist : rulelist ruleitem - | ruleitem''' + '''rulelist : rulelist ruleitem + | ruleitem''' - if len(p) == 2: + if len(p) == 2: p[0] = [p[1]] - else: + else: p[0] = p[1] p[1].append(p[2]) + def p_ruleitem(p): - '''ruleitem : ID - | QLITERAL - | CODE - | PREC''' - p[0] = p[1] + '''ruleitem : ID + | QLITERAL + | CODE + | PREC''' + p[0] = p[1] + def p_empty(p): '''empty : ''' + def p_error(p): pass yacc.yacc(debug=0) -def print_code(code,indent): - if not emit_code: return + +def print_code(code, indent): + if not emit_code: + return codelines = code.splitlines() for c in codelines: - print "%s# %s" % (" "*indent,c) - + print("%s# %s" % (" " * indent, c)) diff --git a/ext/ply/example/yply/yply.py b/ext/ply/example/yply/yply.py index a4398171ea..e24616c831 100755 --- a/ext/ply/example/yply/yply.py +++ b/ext/ply/example/yply/yply.py @@ -21,7 +21,7 @@ # import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") import ylex import yparse @@ -29,25 +29,23 @@ import yparse from ply import * if len(sys.argv) == 1: - print "usage : yply.py [-nocode] inputfile" + print("usage : yply.py [-nocode] inputfile") raise SystemExit if len(sys.argv) == 3: if sys.argv[1] == '-nocode': - yparse.emit_code = 0 + yparse.emit_code = 0 else: - print "Unknown option '%s'" % sys.argv[1] - raise SystemExit + print("Unknown option '%s'" % sys.argv[1]) + raise SystemExit filename = sys.argv[2] else: filename = sys.argv[1] yacc.parse(open(filename).read()) -print """ +print(""" if __name__ == '__main__': from ply import * yacc.yacc() -""" - - +""") diff --git a/ext/ply/ply.egg-info/PKG-INFO b/ext/ply/ply.egg-info/PKG-INFO new file mode 100644 index 0000000000..f2d8c8ae08 --- /dev/null +++ b/ext/ply/ply.egg-info/PKG-INFO @@ -0,0 +1,23 @@ +Metadata-Version: 1.1 +Name: ply +Version: 3.11 +Summary: Python Lex & Yacc +Home-page: http://www.dabeaz.com/ply/ +Author: David Beazley +Author-email: dave@dabeaz.com +License: BSD +Description-Content-Type: UNKNOWN +Description: + PLY is yet another implementation of lex and yacc for Python. Some notable + features include the fact that its implemented entirely in Python and it + uses LALR(1) parsing which is efficient and well suited for larger grammars. + + PLY provides most of the standard lex/yacc features including support for empty + productions, precedence rules, error recovery, and support for ambiguous grammars. + + PLY is extremely easy to use and provides very extensive error checking. + It is compatible with both Python 2 and Python 3. + +Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 2 diff --git a/ext/ply/ply.egg-info/SOURCES.txt b/ext/ply/ply.egg-info/SOURCES.txt new file mode 100644 index 0000000000..f96bab6cdf --- /dev/null +++ b/ext/ply/ply.egg-info/SOURCES.txt @@ -0,0 +1,190 @@ +ANNOUNCE +CHANGES +MANIFEST.in +README.md +TODO +setup.cfg +setup.py +doc/internal.html +doc/makedoc.py +doc/ply.html +example/README +example/cleanup.sh +example/BASIC/README +example/BASIC/basic.py +example/BASIC/basiclex.py +example/BASIC/basiclog.py +example/BASIC/basinterp.py +example/BASIC/basparse.py +example/BASIC/dim.bas +example/BASIC/func.bas +example/BASIC/gcd.bas +example/BASIC/gosub.bas +example/BASIC/hello.bas +example/BASIC/linear.bas +example/BASIC/maxsin.bas +example/BASIC/powers.bas +example/BASIC/rand.bas +example/BASIC/sales.bas +example/BASIC/sears.bas +example/BASIC/sqrt1.bas +example/BASIC/sqrt2.bas +example/GardenSnake/GardenSnake.py +example/GardenSnake/README +example/ansic/README +example/ansic/clex.py +example/ansic/cparse.py +example/calc/calc.py +example/calcdebug/calc.py +example/calceof/calc.py +example/classcalc/calc.py +example/closurecalc/calc.py +example/hedit/hedit.py +example/newclasscalc/calc.py +example/optcalc/README +example/optcalc/calc.py +example/unicalc/calc.py +example/yply/README +example/yply/ylex.py +example/yply/yparse.py +example/yply/yply.py +ply/__init__.py +ply/cpp.py +ply/ctokens.py +ply/lex.py +ply/yacc.py +ply/ygen.py +ply.egg-info/PKG-INFO +ply.egg-info/SOURCES.txt +ply.egg-info/dependency_links.txt +ply.egg-info/top_level.txt +test/README +test/calclex.py +test/cleanup.sh +test/lex_closure.py +test/lex_doc1.py +test/lex_dup1.py +test/lex_dup2.py +test/lex_dup3.py +test/lex_empty.py +test/lex_error1.py +test/lex_error2.py +test/lex_error3.py +test/lex_error4.py +test/lex_hedit.py +test/lex_ignore.py +test/lex_ignore2.py +test/lex_literal1.py +test/lex_literal2.py +test/lex_literal3.py +test/lex_many_tokens.py +test/lex_module.py +test/lex_module_import.py +test/lex_object.py +test/lex_opt_alias.py +test/lex_optimize.py +test/lex_optimize2.py +test/lex_optimize3.py +test/lex_optimize4.py +test/lex_re1.py +test/lex_re2.py +test/lex_re3.py +test/lex_rule1.py +test/lex_rule2.py +test/lex_rule3.py +test/lex_state1.py +test/lex_state2.py +test/lex_state3.py +test/lex_state4.py +test/lex_state5.py +test/lex_state_noerror.py +test/lex_state_norule.py +test/lex_state_try.py +test/lex_token1.py +test/lex_token2.py +test/lex_token3.py +test/lex_token4.py +test/lex_token5.py +test/lex_token_dup.py +test/parser.out +test/testcpp.py +test/testlex.py +test/testyacc.py +test/yacc_badargs.py +test/yacc_badid.py +test/yacc_badprec.py +test/yacc_badprec2.py +test/yacc_badprec3.py +test/yacc_badrule.py +test/yacc_badtok.py +test/yacc_dup.py +test/yacc_error1.py +test/yacc_error2.py +test/yacc_error3.py +test/yacc_error4.py +test/yacc_error5.py +test/yacc_error6.py +test/yacc_error7.py +test/yacc_inf.py +test/yacc_literal.py +test/yacc_misplaced.py +test/yacc_missing1.py +test/yacc_nested.py +test/yacc_nodoc.py +test/yacc_noerror.py +test/yacc_nop.py +test/yacc_notfunc.py +test/yacc_notok.py +test/yacc_prec1.py +test/yacc_rr.py +test/yacc_rr_unused.py +test/yacc_simple.py +test/yacc_sr.py +test/yacc_term1.py +test/yacc_unicode_literals.py +test/yacc_unused.py +test/yacc_unused_rule.py +test/yacc_uprec.py +test/yacc_uprec2.py +test/pkg_test1/__init__.py +test/pkg_test1/parsing/__init__.py +test/pkg_test1/parsing/calclex.py +test/pkg_test1/parsing/calcparse.py +test/pkg_test1/parsing/lextab.py +test/pkg_test1/parsing/parser.out +test/pkg_test1/parsing/parsetab.py +test/pkg_test2/__init__.py +test/pkg_test2/parsing/__init__.py +test/pkg_test2/parsing/calclex.py +test/pkg_test2/parsing/calclextab.py +test/pkg_test2/parsing/calcparse.py +test/pkg_test2/parsing/calcparsetab.py +test/pkg_test2/parsing/parser.out +test/pkg_test3/__init__.py +test/pkg_test3/generated/__init__.py +test/pkg_test3/generated/lextab.py +test/pkg_test3/generated/parser.out +test/pkg_test3/generated/parsetab.py +test/pkg_test3/parsing/__init__.py +test/pkg_test3/parsing/calclex.py +test/pkg_test3/parsing/calcparse.py +test/pkg_test4/__init__.py +test/pkg_test4/parsing/__init__.py +test/pkg_test4/parsing/calclex.py +test/pkg_test4/parsing/calcparse.py +test/pkg_test5/__init__.py +test/pkg_test5/parsing/__init__.py +test/pkg_test5/parsing/calclex.py +test/pkg_test5/parsing/calcparse.py +test/pkg_test5/parsing/lextab.py +test/pkg_test5/parsing/parser.out +test/pkg_test5/parsing/parsetab.py +test/pkg_test6/__init__.py +test/pkg_test6/parsing/__init__.py +test/pkg_test6/parsing/calclex.py +test/pkg_test6/parsing/calcparse.py +test/pkg_test6/parsing/expression.py +test/pkg_test6/parsing/lextab.py +test/pkg_test6/parsing/parser.out +test/pkg_test6/parsing/parsetab.py +test/pkg_test6/parsing/statement.py \ No newline at end of file diff --git a/ext/ply/ply.egg-info/dependency_links.txt b/ext/ply/ply.egg-info/dependency_links.txt new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/ext/ply/ply.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/ext/ply/ply.egg-info/top_level.txt b/ext/ply/ply.egg-info/top_level.txt new file mode 100644 index 0000000000..90412f0683 --- /dev/null +++ b/ext/ply/ply.egg-info/top_level.txt @@ -0,0 +1 @@ +ply diff --git a/ext/ply/ply/__init__.py b/ext/ply/ply/__init__.py index 853a985542..23707c6354 100644 --- a/ext/ply/ply/__init__.py +++ b/ext/ply/ply/__init__.py @@ -1,4 +1,5 @@ # PLY package # Author: David Beazley (dave@dabeaz.com) +__version__ = '3.11' __all__ = ['lex','yacc'] diff --git a/ext/ply/ply/cpp.py b/ext/ply/ply/cpp.py index 39f9d47f33..2422916c9f 100644 --- a/ext/ply/ply/cpp.py +++ b/ext/ply/ply/cpp.py @@ -5,17 +5,26 @@ # Copyright (C) 2007 # All rights reserved # -# This module implements an ANSI-C style lexical preprocessor for PLY. +# This module implements an ANSI-C style lexical preprocessor for PLY. # ----------------------------------------------------------------------------- from __future__ import generators +import sys + +# Some Python 3 compatibility shims +if sys.version_info.major < 3: + STRING_TYPES = (str, unicode) +else: + STRING_TYPES = str + xrange = range + # ----------------------------------------------------------------------------- # Default preprocessor lexer definitions. These tokens are enough to get # a basic preprocessor working. Other modules may import these if they want # ----------------------------------------------------------------------------- tokens = ( - 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND' + 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND' ) literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" @@ -34,7 +43,7 @@ t_CPP_ID = r'[A-Za-z_][\w_]*' # Integer literal def CPP_INTEGER(t): - r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)' + r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)' return t t_CPP_INTEGER = CPP_INTEGER @@ -55,11 +64,21 @@ def t_CPP_CHAR(t): return t # Comment -def t_CPP_COMMENT(t): - r'(/\*(.|\n)*?\*/)|(//.*?\n)' - t.lexer.lineno += t.value.count("\n") +def t_CPP_COMMENT1(t): + r'(/\*(.|\n)*?\*/)' + ncr = t.value.count("\n") + t.lexer.lineno += ncr + # replace with one space or a number of '\n' + t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' ' return t - + +# Line comment +def t_CPP_COMMENT2(t): + r'(//.*?(\n|$))' + # replace with '/n' + t.type = 'CPP_WS'; t.value = '\n' + return t + def t_error(t): t.type = t.value[0] t.value = t.value[0] @@ -73,8 +92,8 @@ import os.path # ----------------------------------------------------------------------------- # trigraph() -# -# Given an input string, this function replaces all trigraph sequences. +# +# Given an input string, this function replaces all trigraph sequences. # The following mapping is used: # # ??= # @@ -176,7 +195,7 @@ class Preprocessor(object): # ---------------------------------------------------------------------- def error(self,file,line,msg): - print >>sys.stderr,"%s:%d %s" % (file,line,msg) + print("%s:%d %s" % (file,line,msg)) # ---------------------------------------------------------------------- # lexprobe() @@ -193,7 +212,7 @@ class Preprocessor(object): self.lexer.input("identifier") tok = self.lexer.token() if not tok or tok.value != "identifier": - print "Couldn't determine identifier type" + print("Couldn't determine identifier type") else: self.t_ID = tok.type @@ -201,7 +220,7 @@ class Preprocessor(object): self.lexer.input("12345") tok = self.lexer.token() if not tok or int(tok.value) != 12345: - print "Couldn't determine integer type" + print("Couldn't determine integer type") else: self.t_INTEGER = tok.type self.t_INTEGER_TYPE = type(tok.value) @@ -210,7 +229,7 @@ class Preprocessor(object): self.lexer.input("\"filename\"") tok = self.lexer.token() if not tok or tok.value != "\"filename\"": - print "Couldn't determine string type" + print("Couldn't determine string type") else: self.t_STRING = tok.type @@ -227,7 +246,7 @@ class Preprocessor(object): tok = self.lexer.token() if not tok or tok.value != "\n": self.t_NEWLINE = None - print "Couldn't determine token for newlines" + print("Couldn't determine token for newlines") else: self.t_NEWLINE = tok.type @@ -239,12 +258,12 @@ class Preprocessor(object): self.lexer.input(c) tok = self.lexer.token() if not tok or tok.value != c: - print "Unable to lex '%s' required for preprocessor" % c + print("Unable to lex '%s' required for preprocessor" % c) # ---------------------------------------------------------------------- # add_path() # - # Adds a search path to the preprocessor. + # Adds a search path to the preprocessor. # ---------------------------------------------------------------------- def add_path(self,path): @@ -288,7 +307,7 @@ class Preprocessor(object): # ---------------------------------------------------------------------- # tokenstrip() - # + # # Remove leading/trailing whitespace tokens from a token list # ---------------------------------------------------------------------- @@ -314,7 +333,7 @@ class Preprocessor(object): # argument. Each argument is represented by a list of tokens. # # When collecting arguments, leading and trailing whitespace is removed - # from each argument. + # from each argument. # # This function properly handles nested parenthesis and commas---these do not # define new arguments. @@ -326,7 +345,7 @@ class Preprocessor(object): current_arg = [] nesting = 1 tokenlen = len(tokenlist) - + # Search for the opening '('. i = 0 while (i < tokenlen) and (tokenlist[i].type in self.t_WS): @@ -360,7 +379,7 @@ class Preprocessor(object): else: current_arg.append(t) i += 1 - + # Missing end argument self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") return 0, [],[] @@ -372,9 +391,9 @@ class Preprocessor(object): # This is used to speed up macro expansion later on---we'll know # right away where to apply patches to the value to form the expansion # ---------------------------------------------------------------------- - + def macro_prescan(self,macro): - macro.patch = [] # Standard macro arguments + macro.patch = [] # Standard macro arguments macro.str_patch = [] # String conversion expansion macro.var_comma_patch = [] # Variadic macro comma patch i = 0 @@ -392,10 +411,11 @@ class Preprocessor(object): elif (i > 0 and macro.value[i-1].value == '##'): macro.patch.append(('c',argnum,i-1)) del macro.value[i-1] + i -= 1 continue elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): macro.patch.append(('c',argnum,i)) - i += 1 + del macro.value[i + 1] continue # Standard expansion else: @@ -421,7 +441,7 @@ class Preprocessor(object): rep = [copy.copy(_x) for _x in macro.value] # Make string expansion patches. These do not alter the length of the replacement sequence - + str_expansion = {} for argnum, i in macro.str_patch: if argnum not in str_expansion: @@ -439,7 +459,7 @@ class Preprocessor(object): # Make all other patches. The order of these matters. It is assumed that the patch list # has been sorted in reverse order of patch location since replacements will cause the # size of the replacement sequence to expand from the patch point. - + expanded = { } for ptype, argnum, i in macro.patch: # Concatenation. Argument is left unexpanded @@ -476,7 +496,7 @@ class Preprocessor(object): if t.value in self.macros and t.value not in expanded: # Yes, we found a macro match expanded[t.value] = True - + m = self.macros[t.value] if not m.arglist: # A simple macro @@ -490,7 +510,7 @@ class Preprocessor(object): j = i + 1 while j < len(tokens) and tokens[j].type in self.t_WS: j += 1 - if tokens[j].value == '(': + if j < len(tokens) and tokens[j].value == '(': tokcount,args,positions = self.collect_args(tokens[j:]) if not m.variadic and len(args) != len(m.arglist): self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) @@ -508,7 +528,7 @@ class Preprocessor(object): else: args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] del args[len(m.arglist):] - + # Get macro replacement text rep = self.macro_expand_args(m,args) rep = self.expand_macros(rep,expanded) @@ -516,18 +536,24 @@ class Preprocessor(object): r.lineno = t.lineno tokens[i:j+tokcount] = rep i += len(rep) + else: + # This is not a macro. It is just a word which + # equals to name of the macro. Hence, go to the + # next token. + i += 1 + del expanded[t.value] continue elif t.value == '__LINE__': t.type = self.t_INTEGER t.value = self.t_INTEGER_TYPE(t.lineno) - + i += 1 return tokens - # ---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # evalexpr() - # + # # Evaluate an expression token sequence for the purposes of evaluating # integral expressions. # ---------------------------------------------------------------------- @@ -574,14 +600,14 @@ class Preprocessor(object): tokens[i].value = str(tokens[i].value) while tokens[i].value[-1] not in "0123456789abcdefABCDEF": tokens[i].value = tokens[i].value[:-1] - + expr = "".join([str(x.value) for x in tokens]) expr = expr.replace("&&"," and ") expr = expr.replace("||"," or ") expr = expr.replace("!"," not ") try: result = eval(expr) - except StandardError: + except Exception: self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") result = 0 return result @@ -599,7 +625,7 @@ class Preprocessor(object): if not source: source = "" - + self.define("__FILE__ \"%s\"" % source) self.source = source @@ -614,10 +640,11 @@ class Preprocessor(object): if tok.value == '#': # Preprocessor directive + # insert necessary whitespace instead of eaten tokens for tok in x: - if tok in self.t_WS and '\n' in tok.value: + if tok.type in self.t_WS and '\n' in tok.value: chunk.append(tok) - + dirtokens = self.tokenstrip(x[i+1:]) if dirtokens: name = dirtokens[0].value @@ -625,7 +652,7 @@ class Preprocessor(object): else: name = "" args = [] - + if name == 'define': if enable: for tok in self.expand_macros(chunk): @@ -685,7 +712,7 @@ class Preprocessor(object): iftrigger = True else: self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") - + elif name == 'else': if ifstack: if ifstack[-1][0]: @@ -737,7 +764,7 @@ class Preprocessor(object): break i += 1 else: - print "Malformed #include <...>" + print("Malformed #include <...>") return filename = "".join([x.value for x in tokens[1:i]]) path = self.path + [""] + self.temp_path @@ -745,7 +772,7 @@ class Preprocessor(object): filename = tokens[0].value[1:-1] path = self.temp_path + [""] + self.path else: - print "Malformed #include statement" + print("Malformed #include statement") return for p in path: iname = os.path.join(p,filename) @@ -759,10 +786,10 @@ class Preprocessor(object): if dname: del self.temp_path[0] break - except IOError,e: + except IOError: pass else: - print "Couldn't find '%s'" % filename + print("Couldn't find '%s'" % filename) # ---------------------------------------------------------------------- # define() @@ -771,7 +798,7 @@ class Preprocessor(object): # ---------------------------------------------------------------------- def define(self,tokens): - if isinstance(tokens,(str,unicode)): + if isinstance(tokens,STRING_TYPES): tokens = self.tokenize(tokens) linetok = tokens @@ -794,7 +821,7 @@ class Preprocessor(object): variadic = False for a in args: if variadic: - print "No more arguments may follow a variadic argument" + print("No more arguments may follow a variadic argument") break astr = "".join([str(_i.value) for _i in a]) if astr == "...": @@ -813,7 +840,7 @@ class Preprocessor(object): a[0].value = a[0].value[:-3] continue if len(a) > 1 or a[0].type != self.t_ID: - print "Invalid macro argument" + print("Invalid macro argument") break else: mvalue = self.tokenstrip(linetok[1+tokcount:]) @@ -830,9 +857,9 @@ class Preprocessor(object): self.macro_prescan(m) self.macros[name.value] = m else: - print "Bad macro definition" + print("Bad macro definition") except LookupError: - print "Bad macro definition" + print("Bad macro definition") # ---------------------------------------------------------------------- # undef() @@ -855,7 +882,7 @@ class Preprocessor(object): def parse(self,input,source=None,ignore={}): self.ignore = ignore self.parser = self.parsegen(input,source) - + # ---------------------------------------------------------------------- # token() # @@ -864,7 +891,7 @@ class Preprocessor(object): def token(self): try: while True: - tok = self.parser.next() + tok = next(self.parser) if tok.type not in self.ignore: return tok except StopIteration: self.parser = None @@ -884,15 +911,4 @@ if __name__ == '__main__': while True: tok = p.token() if not tok: break - print p.source, tok - - - - - - - - - - - + print(p.source, tok) diff --git a/ext/ply/ply/ctokens.py b/ext/ply/ply/ctokens.py index dd5f102dc8..b265e59ff8 100644 --- a/ext/ply/ply/ctokens.py +++ b/ext/ply/ply/ctokens.py @@ -9,27 +9,27 @@ tokens = [ # Literals (identifier, integer constant, float constant, string constant, char const) - 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', + 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER', # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO', 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', 'LOR', 'LAND', 'LNOT', 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', - + # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', # Increment/decrement (++,--) - 'PLUSPLUS', 'MINUSMINUS', + 'INCREMENT', 'DECREMENT', # Structure dereference (->) 'ARROW', # Ternary operator (?) 'TERNARY', - + # Delimeters ( ) [ ] { } , . ; : 'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', @@ -39,7 +39,7 @@ tokens = [ # Ellipsis (...) 'ELLIPSIS', ] - + # Operators t_PLUS = r'\+' t_MINUS = r'-' @@ -74,7 +74,7 @@ t_LSHIFTEQUAL = r'<<=' t_RSHIFTEQUAL = r'>>=' t_ANDEQUAL = r'&=' t_OREQUAL = r'\|=' -t_XOREQUAL = r'^=' +t_XOREQUAL = r'\^=' # Increment/decrement t_INCREMENT = r'\+\+' @@ -125,9 +125,3 @@ def t_CPPCOMMENT(t): r'//.*\n' t.lexer.lineno += 1 return t - - - - - - diff --git a/ext/ply/ply/lex.py b/ext/ply/ply/lex.py index 4759d1b7a6..f95bcdbf1b 100644 --- a/ext/ply/ply/lex.py +++ b/ext/ply/ply/lex.py @@ -1,22 +1,22 @@ # ----------------------------------------------------------------------------- # ply: lex.py # -# Copyright (C) 2001-2009, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. +# and/or other materials provided with the distribution. # * Neither the name of the David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -31,10 +31,15 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -__version__ = "3.2" -__tabversion__ = "3.2" # Version of table file used +__version__ = '3.11' +__tabversion__ = '3.10' -import re, sys, types, copy, os +import re +import sys +import types +import copy +import os +import inspect # This tuple contains known string types try: @@ -44,59 +49,55 @@ except AttributeError: # Python 3.0 StringTypes = (str, bytes) -# Extract the code attribute of a function. Different implementations -# are for Python 2/3 compatibility. - -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ - # This regular expression is used to match valid token names _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') # Exception thrown when invalid token encountered and no default error # handler is defined. - class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s + def __init__(self, message, s): + self.args = (message,) + self.text = s + # Token class. This class is used to represent the tokens produced. class LexToken(object): def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) + return 'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, self.lexpos) + def __repr__(self): return str(self) -# This object is a stand-in for a logging object created by the -# logging module. + +# This object is a stand-in for a logging object created by the +# logging module. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def critical(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def critical(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') info = critical debug = critical + # Null logger is used when no output is generated. Does nothing. class NullLogger(object): - def __getattribute__(self,name): + def __getattribute__(self, name): return self - def __call__(self,*args,**kwargs): + + def __call__(self, *args, **kwargs): return self + # ----------------------------------------------------------------------------- # === Lexing Engine === # @@ -114,31 +115,33 @@ class NullLogger(object): class Lexer: def __init__(self): self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled + # tuples (re, findex) where re is a compiled # regular expression and findex is a list # mapping regex group numbers to rules self.lexretext = None # Current regular expression strings self.lexstatere = {} # Dictionary mapping lexer states to master regexs self.lexstateretext = {} # Dictionary mapping lexer states to regex strings self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = "INITIAL" # Current lexer state + self.lexstate = 'INITIAL' # Current lexer state self.lexstatestack = [] # Stack of lexer states self.lexstateinfo = None # State information self.lexstateignore = {} # Dictionary of ignored characters for each state self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state self.lexreflags = 0 # Optional re compile flags self.lexdata = None # Actual input data (as a string) self.lexpos = 0 # Current position in input text self.lexlen = 0 # Length of the input text self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through + self.lexignore = '' # Ignored characters + self.lexliterals = '' # Literal characters that can be passed through self.lexmodule = None # Module self.lineno = 1 # Current line number - self.lexoptimize = 0 # Optimized mode + self.lexoptimize = False # Optimized mode - def clone(self,object=None): + def clone(self, object=None): c = copy.copy(self) # If the object parameter has been supplied, it means we are attaching the @@ -146,113 +149,110 @@ class Lexer: # the lexstatere and lexstateerrorf tables. if object: - newtab = { } + newtab = {} for key, ritem in self.lexstatere.items(): newre = [] for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object, f[0].__name__), f[1])) + newre.append((cre, newfindex)) newtab[key] = newre c.lexstatere = newtab - c.lexstateerrorf = { } + c.lexstateerrorf = {} for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) + c.lexstateerrorf[key] = getattr(object, ef.__name__) c.lexmodule = object return c # ------------------------------------------------------------ # writetab() - Write lexer information to a table file # ------------------------------------------------------------ - def writetab(self,tabfile,outputdir=""): - if isinstance(tabfile,types.ModuleType): - return - basetabfilename = tabfile.split(".")[-1] - filename = os.path.join(outputdir,basetabfilename)+".py" - tf = open(filename,"w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_tabversion = %s\n" % repr(__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) + def writetab(self, lextab, outputdir=''): + if isinstance(lextab, types.ModuleType): + raise IOError("Won't overwrite existing lextab module") + basetabmodule = lextab.split('.')[-1] + filename = os.path.join(outputdir, basetabmodule) + '.py' + with open(filename, 'w') as tf: + tf.write('# %s.py. This file automatically created by PLY (version %s). Don\'t edit!\n' % (basetabmodule, __version__)) + tf.write('_tabversion = %s\n' % repr(__tabversion__)) + tf.write('_lextokens = set(%s)\n' % repr(tuple(sorted(self.lextokens)))) + tf.write('_lexreflags = %s\n' % repr(int(self.lexreflags))) + tf.write('_lexliterals = %s\n' % repr(self.lexliterals)) + tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo)) - tabre = { } - # Collect all functions in the initial state - initial = self.lexstatere["INITIAL"] - initialfuncs = [] - for part in initial: - for f in part[1]: - if f and f[0]: - initialfuncs.append(f) + # Rewrite the lexstatere table, replacing function objects with function names + tabre = {} + for statename, lre in self.lexstatere.items(): + titem = [] + for (pat, func), retext, renames in zip(lre, self.lexstateretext[statename], self.lexstaterenames[statename]): + titem.append((retext, _funcs_to_names(func, renames))) + tabre[statename] = titem - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) - tabre[key] = titem + tf.write('_lexstatere = %s\n' % repr(tabre)) + tf.write('_lexstateignore = %s\n' % repr(self.lexstateignore)) - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) + taberr = {} + for statename, ef in self.lexstateerrorf.items(): + taberr[statename] = ef.__name__ if ef else None + tf.write('_lexstateerrorf = %s\n' % repr(taberr)) - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() + tabeof = {} + for statename, ef in self.lexstateeoff.items(): + tabeof[statename] = ef.__name__ if ef else None + tf.write('_lexstateeoff = %s\n' % repr(tabeof)) # ------------------------------------------------------------ # readtab() - Read lexer information from a tab file # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - if isinstance(tabfile,types.ModuleType): + def readtab(self, tabfile, fdict): + if isinstance(tabfile, types.ModuleType): lextab = tabfile else: - if sys.version_info[0] < 3: - exec("import %s as lextab" % tabfile) - else: - env = { } - exec("import %s as lextab" % tabfile, env,env) - lextab = env['lextab'] + exec('import %s' % tabfile) + lextab = sys.modules[tabfile] - if getattr(lextab,"_tabversion","0.0") != __version__: - raise ImportError("Inconsistent PLY version") + if getattr(lextab, '_tabversion', '0.0') != __tabversion__: + raise ImportError('Inconsistent PLY version') self.lextokens = lextab._lextokens self.lexreflags = lextab._lexreflags self.lexliterals = lextab._lexliterals + self.lextokens_all = self.lextokens | set(self.lexliterals) self.lexstateinfo = lextab._lexstateinfo self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] + self.lexstatere = {} + self.lexstateretext = {} + for statename, lre in lextab._lexstatere.items(): + titem = [] + txtitem = [] + for pat, func_name in lre: + titem.append((re.compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict))) + + self.lexstatere[statename] = titem + self.lexstateretext[statename] = txtitem + + self.lexstateerrorf = {} + for statename, ef in lextab._lexstateerrorf.items(): + self.lexstateerrorf[statename] = fdict[ef] + + self.lexstateeoff = {} + for statename, ef in lextab._lexstateeoff.items(): + self.lexstateeoff[statename] = fdict[ef] + self.begin('INITIAL') # ------------------------------------------------------------ # input() - Push a new string into the lexer # ------------------------------------------------------------ - def input(self,s): + def input(self, s): # Pull off the first character to see if s looks like a string c = s[:1] - if not isinstance(c,StringTypes): - raise ValueError("Expected a string") + if not isinstance(c, StringTypes): + raise ValueError('Expected a string') self.lexdata = s self.lexpos = 0 self.lexlen = len(s) @@ -260,19 +260,20 @@ class Lexer: # ------------------------------------------------------------ # begin() - Changes the lexing state # ------------------------------------------------------------ - def begin(self,state): - if not state in self.lexstatere: - raise ValueError("Undefined state") + def begin(self, state): + if state not in self.lexstatere: + raise ValueError('Undefined state') self.lexre = self.lexstatere[state] self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexignore = self.lexstateignore.get(state, '') + self.lexerrorf = self.lexstateerrorf.get(state, None) + self.lexeoff = self.lexstateeoff.get(state, None) self.lexstate = state # ------------------------------------------------------------ # push_state() - Changes the lexing state and saves old on stack # ------------------------------------------------------------ - def push_state(self,state): + def push_state(self, state): self.lexstatestack.append(self.lexstate) self.begin(state) @@ -291,7 +292,7 @@ class Lexer: # ------------------------------------------------------------ # skip() - Skip ahead n characters # ------------------------------------------------------------ - def skip(self,n): + def skip(self, n): self.lexpos += n # ------------------------------------------------------------ @@ -315,9 +316,10 @@ class Lexer: continue # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue + for lexre, lexindexfunc in self.lexre: + m = lexre.match(lexdata, lexpos) + if not m: + continue # Create a token for return tok = LexToken() @@ -326,16 +328,16 @@ class Lexer: tok.lexpos = lexpos i = m.lastindex - func,tok.type = lexindexfunc[i] + func, tok.type = lexindexfunc[i] if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break lexpos = m.end() @@ -355,10 +357,10 @@ class Lexer: # Verify type of the token. If not in the token map, raise an error if not self.lexoptimize: - if not newtok.type in self.lextokens: + if newtok.type not in self.lextokens_all: raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func_code(func).co_filename, func_code(func).co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) + func.__code__.co_filename, func.__code__.co_firstlineno, + func.__name__, newtok.type), lexdata[lexpos:]) return newtok else: @@ -377,7 +379,7 @@ class Lexer: tok = LexToken() tok.value = self.lexdata[lexpos:] tok.lineno = self.lineno - tok.type = "error" + tok.type = 'error' tok.lexer = self tok.lexpos = lexpos self.lexpos = lexpos @@ -386,15 +388,27 @@ class Lexer: # Error method didn't change text position at all. This is an error. raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) lexpos = self.lexpos - if not newtok: continue + if not newtok: + continue return newtok self.lexpos = lexpos - raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:]) + + if self.lexeoff: + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok self.lexpos = lexpos + 1 if self.lexdata is None: - raise RuntimeError("No input string given with input()") + raise RuntimeError('No input string given with input()') return None # Iterator interface @@ -416,6 +430,15 @@ class Lexer: # and build a Lexer object from it. # ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- +# _get_regex(func) +# +# Returns the regular expression assigned to a function either as a doc string +# or as a .regex attribute attached by the @TOKEN decorator. +# ----------------------------------------------------------------------------- +def _get_regex(func): + return getattr(func, 'regex', func.__doc__) + # ----------------------------------------------------------------------------- # get_caller_module_dict() # @@ -423,21 +446,12 @@ class Lexer: # a caller further down the call stack. This is used to get the environment # associated with the yacc() call if none was provided. # ----------------------------------------------------------------------------- - def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict # ----------------------------------------------------------------------------- # _funcs_to_names() @@ -445,14 +459,13 @@ def get_caller_module_dict(levels): # Given a list of regular expression functions, this converts it to a list # suitable for output to a table file # ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist,namelist): +def _funcs_to_names(funclist, namelist): result = [] - for f,name in zip(funclist,namelist): - if f and f[0]: - result.append((name, f[1])) - else: - result.append(f) + for f, name in zip(funclist, namelist): + if f and f[0]: + result.append((name, f[1])) + else: + result.append(f) return result # ----------------------------------------------------------------------------- @@ -461,15 +474,14 @@ def _funcs_to_names(funclist,namelist): # Given a list of regular expression function names, this converts it back to # functions. # ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result +def _names_to_funcs(namelist, fdict): + result = [] + for n in namelist: + if n and n[0]: + result.append((fdict[n[0]], n[1])) + else: + result.append(n) + return result # ----------------------------------------------------------------------------- # _form_master_re() @@ -478,36 +490,37 @@ def _names_to_funcs(namelist,fdict): # form the master regular expression. Given limitations in the Python re # module, it may be necessary to break the master regex into separate expressions. # ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict,toknames): - if not relist: return [] - regex = "|".join(relist) +def _form_master_re(relist, reflags, ldict, toknames): + if not relist: + return [] + regex = '|'.join(relist) try: - lexre = re.compile(regex,re.VERBOSE | reflags) + lexre = re.compile(regex, reflags) # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) lexindexnames = lexindexfunc[:] - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,toknames[f]) + lexindexfunc[i] = (handle, toknames[f]) lexindexnames[i] = f elif handle is not None: lexindexnames[i] = f - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) else: lexindexfunc[i] = (None, toknames[f]) - - return [(lexre,lexindexfunc)],[regex],[lexindexnames] + + return [(lexre, lexindexfunc)], [regex], [lexindexnames] except Exception: m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) - rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) - return llist+rlist, lre+rre, lnames+rnames + if m == 0: + m = 1 + llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) + rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) + return (llist+rlist), (lre+rre), (lnames+rnames) # ----------------------------------------------------------------------------- # def _statetoken(s,names) @@ -517,22 +530,22 @@ def _form_master_re(relist,reflags,ldict,toknames): # is a tuple of state names and tokenname is the name of the token. For example, # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') # ----------------------------------------------------------------------------- +def _statetoken(s, names): + parts = s.split('_') + for i, part in enumerate(parts[1:], 1): + if part not in names and part != 'ANY': + break -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not parts[i] in names and parts[i] != 'ANY': break if i > 1: - states = tuple(parts[1:i]) + states = tuple(parts[1:i]) else: - states = ('INITIAL',) + states = ('INITIAL',) if 'ANY' in states: - states = tuple(names) + states = tuple(names) - tokenname = "_".join(parts[i:]) - return (states,tokenname) + tokenname = '_'.join(parts[i:]) + return (states, tokenname) # ----------------------------------------------------------------------------- @@ -542,19 +555,15 @@ def _statetoken(s,names): # user's input file. # ----------------------------------------------------------------------------- class LexerReflect(object): - def __init__(self,ldict,log=None,reflags=0): + def __init__(self, ldict, log=None, reflags=0): self.ldict = ldict self.error_func = None self.tokens = [] self.reflags = reflags - self.stateinfo = { 'INITIAL' : 'inclusive'} - self.files = {} - self.error = 0 - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log + self.stateinfo = {'INITIAL': 'inclusive'} + self.modules = set() + self.error = False + self.log = PlyLogger(sys.stderr) if log is None else log # Get all of the basic information def get_all(self): @@ -562,7 +571,7 @@ class LexerReflect(object): self.get_literals() self.get_states() self.get_rules() - + # Validate all of the information def validate_all(self): self.validate_tokens() @@ -572,20 +581,20 @@ class LexerReflect(object): # Get the tokens map def get_tokens(self): - tokens = self.ldict.get("tokens",None) + tokens = self.ldict.get('tokens', None) if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - + if not tokens: - self.log.error("tokens is empty") - self.error = 1 + self.log.error('tokens is empty') + self.error = True return self.tokens = tokens @@ -595,280 +604,274 @@ class LexerReflect(object): terminals = {} for n in self.tokens: if not _is_identifier.match(n): - self.log.error("Bad token name '%s'",n) - self.error = 1 + self.log.error("Bad token name '%s'", n) + self.error = True if n in terminals: self.log.warning("Token '%s' multiply defined", n) terminals[n] = 1 # Get the literals specifier def get_literals(self): - self.literals = self.ldict.get("literals","") + self.literals = self.ldict.get('literals', '') + if not self.literals: + self.literals = '' # Validate literals def validate_literals(self): try: for c in self.literals: - if not isinstance(c,StringTypes) or len(c) > 1: - self.log.error("Invalid literal %s. Must be a single character", repr(c)) - self.error = 1 - continue + if not isinstance(c, StringTypes) or len(c) > 1: + self.log.error('Invalid literal %s. Must be a single character', repr(c)) + self.error = True except TypeError: - self.log.error("Invalid literals specification. literals must be a sequence of characters") - self.error = 1 + self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.error = True def get_states(self): - self.states = self.ldict.get("states",None) + self.states = self.ldict.get('states', None) # Build statemap if self.states: - if not isinstance(self.states,(tuple,list)): - self.log.error("states must be defined as a tuple or list") - self.error = 1 - else: - for s in self.states: - if not isinstance(s,tuple) or len(s) != 2: - self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) - self.error = 1 - continue - name, statetype = s - if not isinstance(name,StringTypes): - self.log.error("State name %s must be a string", repr(name)) - self.error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) - self.error = 1 - continue - if name in self.stateinfo: - self.log.error("State '%s' already defined",name) - self.error = 1 - continue - self.stateinfo[name] = statetype + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + else: + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", repr(s)) + self.error = True + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %s must be a string', repr(name)) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State '%s' already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype # Get all of the symbols with a t_ prefix and sort them into various # categories (functions, strings, error functions, and ignore characters) def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_' ] + tsymbols = [f for f in self.ldict if f[:2] == 't_'] # Now build up a list of functions and a list of strings - - self.toknames = { } # Mapping of symbols to token names - self.funcsym = { } # Symbols defined as functions - self.strsym = { } # Symbols defined as strings - self.ignore = { } # Ignore strings by state - self.errorf = { } # Error functions by state + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] + self.funcsym[s] = [] + self.strsym[s] = [] if len(tsymbols) == 0: - self.log.error("No rules of the form t_rulename are defined") - self.error = 1 + self.log.error('No rules of the form t_rulename are defined') + self.error = True return for f in tsymbols: t = self.ldict[f] - states, tokname = _statetoken(f,self.stateinfo) + states, tokname = _statetoken(f, self.stateinfo) self.toknames[f] = tokname - if hasattr(t,"__call__"): + if hasattr(t, '__call__'): if tokname == 'error': for s in states: self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t elif tokname == 'ignore': - line = func_code(t).co_firstlineno - file = func_code(t).co_filename - self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) - self.error = 1 + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__) + self.error = True else: - for s in states: - self.funcsym[s].append((f,t)) + for s in states: + self.funcsym[s].append((f, t)) elif isinstance(t, StringTypes): if tokname == 'ignore': for s in states: self.ignore[s] = t - if "\\" in t: - self.log.warning("%s contains a literal backslash '\\'",f) + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) elif tokname == 'error': self.log.error("Rule '%s' must be defined as a function", f) - self.error = 1 + self.error = True else: - for s in states: - self.strsym[s].append((f,t)) + for s in states: + self.strsym[s].append((f, t)) else: - self.log.error("%s not defined as a function or string", f) - self.error = 1 + self.log.error('%s not defined as a function or string', f) + self.error = True # Sort the functions by line number for f in self.funcsym.values(): - if sys.version_info[0] < 3: - f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) - else: - # Python 3.0 - f.sort(key=lambda x: func_code(x[1]).co_firstlineno) + f.sort(key=lambda x: x[1].__code__.co_firstlineno) # Sort the strings by regular expression length for s in self.strsym.values(): - if sys.version_info[0] < 3: - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - else: - # Python 3.0 - s.sort(key=lambda x: len(x[1]),reverse=True) + s.sort(key=lambda x: len(x[1]), reverse=True) - # Validate all of the t_rules collected + # Validate all of the t_rules collected def validate_rules(self): for state in self.stateinfo: # Validate all rules defined by functions - - for fname, f in self.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) tokname = self.toknames[fname] if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True continue if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True continue - if not f.__doc__: - self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) - self.error = 1 + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) - if c.match(""): - self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) - if '#' in f.__doc__: - self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) - self.error = 1 + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__) + self.error = True # Validate all rules defined by strings - for name,r in self.strsym[state]: + for name, r in self.strsym[state]: tokname = self.toknames[name] if tokname == 'error': self.log.error("Rule '%s' must be defined as a function", name) - self.error = 1 + self.error = True continue - if not tokname in self.tokens and tokname.find("ignore_") < 0: - self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) - self.error = 1 + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) - if (c.match("")): - self.log.error("Regular expression for rule '%s' matches empty string",name) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("Invalid regular expression for rule '%s'. %s",name,e) + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + if (c.match('')): + self.log.error("Regular expression for rule '%s' matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule '%s'. %s", name, e) if '#' in r: - self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) - self.error = 1 + self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name) + self.error = True if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state '%s'",state) - self.error = 1 + self.log.error("No rules defined for state '%s'", state) + self.error = True # Validate the error function - efunc = self.errorf.get(state,None) + efunc = self.errorf.get(state, None) if efunc: f = efunc - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 - - for f in self.files: - self.validate_file(f) + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True + for module in self.modules: + self.validate_module(module) # ----------------------------------------------------------------------------- - # validate_file() + # validate_module() # # This checks to see if there are duplicated t_rulename() functions or strings # in the parser input file. This is done using a simple regular expression - # match on each line in the given file. + # match on each line in the source code of the given module. # ----------------------------------------------------------------------------- - def validate_file(self,filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return # No idea what the file is. Return OK - + def validate_module(self, module): try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: - return # Couldn't find the file. Don't worry about it + return fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - counthash = { } - linen = 1 - for l in lines: - m = fre.match(l) + counthash = {} + linen += 1 + for line in lines: + m = fre.match(line) if not m: - m = sre.match(l) + m = sre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) - self.error = 1 + filename = inspect.getsourcefile(module) + self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + self.error = True linen += 1 - + # ----------------------------------------------------------------------------- # lex(module) # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): +def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab', + reflags=int(re.VERBOSE), nowarn=False, outputdir=None, debuglog=None, errorlog=None): + + if lextab is None: + lextab = 'lextab' + global lexer + ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} + stateinfo = {'INITIAL': 'inclusive'} lexobj = Lexer() lexobj.lexoptimize = optimize - global token,input + global token, input if errorlog is None: errorlog = PlyLogger(sys.stderr) @@ -878,16 +881,28 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now debuglog = PlyLogger(sys.stderr) # Get the module dictionary used for the lexer - if object: module = object + if object: + module = object + # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] ldict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in ldict: + ldict['__file__'] = sys.modules[ldict['__module__']].__file__ else: ldict = get_caller_module_dict(2) + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = ldict.get('__package__') + if pkg and isinstance(lextab, str): + if '.' not in lextab: + lextab = pkg + '.' + lextab + # Collect parser information from the dictionary - linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) + linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) linfo.get_all() if not optimize: if linfo.validate_all(): @@ -895,7 +910,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now if optimize and lextab: try: - lexobj.readtab(lextab,ldict) + lexobj.readtab(lextab, ldict) token = lexobj.token input = lexobj.input lexer = lexobj @@ -906,92 +921,97 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # Dump some basic debugging information if debug: - debuglog.info("lex: tokens = %r", linfo.tokens) - debuglog.info("lex: literals = %r", linfo.literals) - debuglog.info("lex: states = %r", linfo.stateinfo) + debuglog.info('lex: tokens = %r', linfo.tokens) + debuglog.info('lex: literals = %r', linfo.literals) + debuglog.info('lex: states = %r', linfo.stateinfo) # Build a dictionary of valid token names - lexobj.lextokens = { } + lexobj.lextokens = set() for n in linfo.tokens: - lexobj.lextokens[n] = 1 + lexobj.lextokens.add(n) # Get literals specification - if isinstance(linfo.literals,(list,tuple)): + if isinstance(linfo.literals, (list, tuple)): lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) else: lexobj.lexliterals = linfo.literals + lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + # Get the stateinfo dictionary stateinfo = linfo.stateinfo - regexs = { } + regexs = {} # Build the master regular expressions for state in stateinfo: regex_list = [] # Add rules defined by functions first for fname, f in linfo.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) # Now add all of the simple rules - for name,r in linfo.strsym[state]: - regex_list.append("(?P<%s>%s)" % (name,r)) + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) regexs[state] = regex_list # Build the master regular expressions if debug: - debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") + debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) + lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) lexobj.lexstatere[state] = lexre lexobj.lexstateretext[state] = re_text lexobj.lexstaterenames[state] = re_names if debug: - for i in range(len(re_text)): - debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) + for i, text in enumerate(re_text): + debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) # For inclusive states, we need to add the regular expressions from the INITIAL state - for state,stype in stateinfo.items(): - if state != "INITIAL" and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + for state, stype in stateinfo.items(): + if state != 'INITIAL' and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] + lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.lexretext = lexobj.lexstateretext['INITIAL'] + lexobj.lexreflags = reflags # Set up ignore variables lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") + lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') # Set up error functions lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) + lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) if not lexobj.lexerrorf: - errorlog.warning("No t_error rule is defined") + errorlog.warning('No t_error rule is defined') + + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get('INITIAL', None) # Check state information for ignore and error rules - for s,stype in stateinfo.items(): + for s, stype in stateinfo.items(): if stype == 'exclusive': - if not s in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state '%s'", s) - if not s in linfo.ignore and lexobj.lexignore: - errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) + if s not in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state '%s'", s) + if s not in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) elif stype == 'inclusive': - if not s in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get("INITIAL",None) - if not s in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get("INITIAL","") + if s not in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get('INITIAL', None) + if s not in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get('INITIAL', '') # Create global versions of the token() and input() functions token = lexobj.token @@ -1000,7 +1020,28 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # If in optimize mode, we write the lextab if lextab and optimize: - lexobj.writetab(lextab,outputdir) + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If lextab specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(lextab, types.ModuleType): + srcfile = lextab.__file__ + else: + if '.' not in lextab: + srcfile = ldict['__file__'] + else: + parts = lextab.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + try: + lexobj.writetab(lextab, outputdir) + if lextab in sys.modules: + del sys.modules[lextab] + except IOError as e: + errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e)) return lexobj @@ -1010,7 +1051,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # This runs the lexer as a main program # ----------------------------------------------------------------------------- -def runmain(lexer=None,data=None): +def runmain(lexer=None, data=None): if not data: try: filename = sys.argv[1] @@ -1018,7 +1059,7 @@ def runmain(lexer=None,data=None): data = f.read() f.close() except IndexError: - sys.stdout.write("Reading from standard input (type EOF to end):\n") + sys.stdout.write('Reading from standard input (type EOF to end):\n') data = sys.stdin.read() if lexer: @@ -1031,10 +1072,11 @@ def runmain(lexer=None,data=None): else: _token = token - while 1: + while True: tok = _token() - if not tok: break - sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) + if not tok: + break + sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, tok.lexpos)) # ----------------------------------------------------------------------------- # @TOKEN(regex) @@ -1044,14 +1086,13 @@ def runmain(lexer=None,data=None): # ----------------------------------------------------------------------------- def TOKEN(r): - def set_doc(f): - if hasattr(r,"__call__"): - f.__doc__ = r.__doc__ + def set_regex(f): + if hasattr(r, '__call__'): + f.regex = _get_regex(r) else: - f.__doc__ = r + f.regex = r return f - return set_doc + return set_regex # Alternative spelling of the TOKEN decorator Token = TOKEN - diff --git a/ext/ply/ply/yacc.py b/ext/ply/ply/yacc.py index d4bb8822db..88188a1e8e 100644 --- a/ext/ply/ply/yacc.py +++ b/ext/ply/ply/yacc.py @@ -1,22 +1,22 @@ # ----------------------------------------------------------------------------- # ply: yacc.py # -# Copyright (C) 2001-2009, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. +# and/or other materials provided with the distribution. # * Neither the name of the David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -32,7 +32,7 @@ # ----------------------------------------------------------------------------- # # This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside +# as Python functions. The grammar is specified by supplying the BNF inside # Python documentation strings. The inspiration for this technique was borrowed # from John Aycock's Spark parsing system. PLY might be viewed as cross between # Spark and the GNU bison utility. @@ -59,8 +59,15 @@ # own risk! # ---------------------------------------------------------------------------- -__version__ = "3.2" -__tabversion__ = "3.2" # Table version +import re +import types +import sys +import os.path +import inspect +import warnings + +__version__ = '3.11' +__tabversion__ = '3.10' #----------------------------------------------------------------------------- # === User configurable parameters === @@ -68,7 +75,7 @@ __tabversion__ = "3.2" # Table version # Change these to modify the default behavior of yacc (if you wish) #----------------------------------------------------------------------------- -yaccdebug = 0 # Debugging mode. If set, yacc generates a +yaccdebug = True # Debugging mode. If set, yacc generates a # a 'parser.out' file in the current directory debug_file = 'parser.out' # Default name of the debugging file @@ -77,86 +84,117 @@ default_lr = 'LALR' # Default LR table generation method error_count = 3 # Number of symbols that must be shifted to leave recovery mode -yaccdevel = 0 # Set to True if developing yacc. This turns off optimized +yaccdevel = False # Set to True if developing yacc. This turns off optimized # implementations of certain functions. resultlimit = 40 # Size limit of results when running in debug mode. pickle_protocol = 0 # Protocol to use when writing pickle files -import re, types, sys, os.path - -# Compatibility function for python 2.6/3.0 +# String type-checking compatibility if sys.version_info[0] < 3: - def func_code(f): - return f.func_code + string_types = basestring else: - def func_code(f): - return f.__code__ + string_types = str -# Compatibility -try: - MAXINT = sys.maxint -except AttributeError: - MAXINT = sys.maxsize +MAXINT = sys.maxsize -# Python 2.x/3.0 compatibility. -def load_ply_lex(): - if sys.version_info[0] < 3: - import lex - else: - import ply.lex as lex - return lex - -# This object is a stand-in for a logging object created by the +# This object is a stand-in for a logging object created by the # logging module. PLY will use this by default to create things # such as the parser.out file. If a user wants more detailed # information, they can create their own logging object and pass # it into PLY. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def debug(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - info = debug - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + info = debug + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') critical = debug # Null logger is used when no output is generated. Does nothing. class NullLogger(object): - def __getattribute__(self,name): + def __getattribute__(self, name): return self - def __call__(self,*args,**kwargs): + + def __call__(self, *args, **kwargs): return self - + # Exception raised for yacc-related errors -class YaccError(Exception): pass +class YaccError(Exception): + pass # Format the result message that the parser produces when running in debug mode. def format_result(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit]+" ..." - result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) + repr_str = repr_str[:resultlimit] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) return result - # Format stack entries when the parser is running in debug mode def format_stack_entry(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) < 16: return repr_str else: - return "<%s @ 0x%x>" % (type(r).__name__,id(r)) + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) + +# Panic mode error recovery support. This feature is being reworked--much of the +# code here is to offer a deprecation/backwards compatible transition + +_errok = None +_token = None +_restart = None +_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). +Instead, invoke the methods on the associated parser instance: + + def p_error(p): + ... + # Use parser.errok(), parser.token(), parser.restart() + ... + + parser = yacc.yacc() +''' + +def errok(): + warnings.warn(_warnmsg) + return _errok() + +def restart(): + warnings.warn(_warnmsg) + return _restart() + +def token(): + warnings.warn(_warnmsg) + return _token() + +# Utility function to call the p_error() function with some deprecation hacks +def call_errorfunc(errorfunc, token, parser): + global _errok, _token, _restart + _errok = parser.errok + _token = parser.token + _restart = parser.restart + r = errorfunc(token) + try: + del _errok, _token, _restart + except NameError: + pass + return r #----------------------------------------------------------------------------- # === LR Parsing Engine === @@ -176,8 +214,11 @@ def format_stack_entry(r): # .endlexpos = Ending lex position (optional, set automatically) class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) + def __str__(self): + return self.type + + def __repr__(self): + return str(self) # This class is a wrapper around the objects actually passed to each # grammar rule. Index lookup and assignment actually assign the @@ -189,46 +230,53 @@ class YaccSymbol: # representing the range of positional information for a symbol. class YaccProduction: - def __init__(self,s,stack=None): + def __init__(self, s, stack=None): self.slice = s self.stack = stack self.lexer = None - self.parser= None - def __getitem__(self,n): - if n >= 0: return self.slice[n].value - else: return self.stack[n].value + self.parser = None - def __setitem__(self,n,v): + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value + + def __setitem__(self, n, v): self.slice[n].value = v - def __getslice__(self,i,j): + def __getslice__(self, i, j): return [s.value for s in self.slice[i:j]] def __len__(self): return len(self.slice) - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) - def set_lineno(self,n,lineno): - self.slice[n].lineno = n + def set_lineno(self, n, lineno): + self.slice[n].lineno = lineno - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos + + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos def error(self): - raise SyntaxError - + raise SyntaxError # ----------------------------------------------------------------------------- # == LRParser == @@ -237,14 +285,16 @@ class YaccProduction: # ----------------------------------------------------------------------------- class LRParser: - def __init__(self,lrtab,errorf): + def __init__(self, lrtab, errorf): self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True def errok(self): - self.errorok = 1 + self.errorok = True def restart(self): del self.statestack[:] @@ -254,24 +304,42 @@ class LRParser: self.symstack.append(sym) self.statestack.append(0) - def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). + # + # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): if debug or yaccdevel: - if isinstance(debug,int): + if isinstance(debug, int): debug = PlyLogger(sys.stderr) - return self.parsedebug(input,lexer,debug,tracking,tokenfunc) + return self.parsedebug(input, lexer, debug, tracking, tokenfunc) elif tracking: - return self.parseopt(input,lexer,debug,tracking,tokenfunc) + return self.parseopt(input, lexer, debug, tracking, tokenfunc) else: - return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) - + return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parsedebug(). # # This is the debugging enabled version of parse(). All changes made to the - # parsing engine should be made here. For the non-debugging version, - # copy this code to a method parseopt() and delete all of the sections - # enclosed in: + # parsing engine should be made here. Optimized versions of this function + # are automatically created by the ply/ygen.py script. This script cuts out + # sections enclosed in markers such as this: # # #--! DEBUG # statements @@ -279,22 +347,24 @@ class LRParser: # # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery + def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parsedebug-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery - # --! DEBUG - debug.info("PLY: PARSE DEBUG START") - # --! DEBUG + #--! DEBUG + debug.info('PLY: PARSE DEBUG START') + #--! DEBUG # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer # Set up the lexer and parser objects on pslice @@ -306,16 +376,19 @@ class LRParser: lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -325,52 +398,59 @@ class LRParser: statestack.append(0) sym = YaccSymbol() - sym.type = "$end" + sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - # --! DEBUG + #--! DEBUG debug.debug('') debug.debug('State : %s', state) - # --! DEBUG + #--! DEBUG - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() + if state not in defaulted_states: if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' - # --! DEBUG + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + #--! DEBUG + debug.debug('Defaulted state %s: Reduce using %d', state, -t) + #--! DEBUG + + #--! DEBUG debug.debug('Stack : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG if t is not None: if t > 0: # shift a symbol on the stack statestack.append(t) state = t - - # --! DEBUG - debug.debug("Action : Shift and goto state %s", t) - # --! DEBUG + + #--! DEBUG + debug.debug('Action : Shift and goto state %s', t) + #--! DEBUG symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -384,72 +464,77 @@ class LRParser: sym.type = pname # Production name sym.value = None - # --! DEBUG + #--! DEBUG if plen: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', + goto[statestack[-1-plen]][pname]) else: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) - - # --! DEBUG + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][pname]) + + #--! DEBUG if plen: targ = symstack[-plen-1:] targ[0] = sym - # --! TRACKING + #--! TRACKING if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG + del statestack[-plen:] + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - # --! TRACKING + #--! TRACKING if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING - targ = [ sym ] + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -457,41 +542,43 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - result = getattr(n,"value",None) - # --! DEBUG - debug.info("Done : Returning %s", format_result(result)) - debug.info("PLY: PARSE DEBUG END") - # --! DEBUG + result = getattr(n, 'value', None) + #--! DEBUG + debug.info('Done : Returning %s', format_result(result)) + debug.info('PLY: PARSE DEBUG END') + #--! DEBUG return result - if t == None: + if t is None: - # --! DEBUG + #--! DEBUG debug.error('Error : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -505,20 +592,15 @@ class LRParser: # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead - if errtoken.type == "$end": + if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -528,14 +610,16 @@ class LRParser: continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -545,7 +629,7 @@ class LRParser: # entire parse has been rolled back and we're completely hosed. The token is # discarded and we just keep going. - if len(statestack) <= 1 and lookahead.type != "$end": + if len(statestack) <= 1 and lookahead.type != '$end': lookahead = None errtoken = None state = 0 @@ -557,7 +641,7 @@ class LRParser: # at the end of the file. nuke the top entry and generate an error token # Start nuking entries on the stack - if lookahead.type == "$end": + if lookahead.type == '$end': # Whoa. We're really hosed here. Bail out return @@ -566,48 +650,67 @@ class LRParser: if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parsedebug-end # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parseopt(). # - # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. - # Edit the debug version above, then copy any modifications to the method - # below while removing #--! DEBUG sections. + # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! + # This code is automatically generated by the ply/ygen.py script. Make + # changes to the parsedebug() method instead. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery - def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer - + # Set up the lexer and parser objects on pslice pslice.lexer = lexer pslice.parser = self @@ -617,16 +720,19 @@ class LRParser: lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -639,23 +745,28 @@ class LRParser: sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + if t is not None: if t > 0: @@ -663,11 +774,13 @@ class LRParser: statestack.append(t) state = t + symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -681,61 +794,64 @@ class LRParser: sym.type = pname # Production name sym.value = None + if plen: targ = symstack[-plen-1:] targ[0] = sym - # --! TRACKING + #--! TRACKING if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - # --! TRACKING + #--! TRACKING if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING - targ = [ sym ] + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -743,28 +859,32 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - return getattr(n,"value",None) + result = getattr(n, 'value', None) + return result + + if t is None: - if t == None: # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -778,20 +898,15 @@ class LRParser: # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -801,14 +916,16 @@ class LRParser: continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -839,47 +956,67 @@ class LRParser: if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-end # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parseopt_notrack(). # - # Optimized version of parseopt() with line number tracking removed. - # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove - # code in the #--! TRACKING sections + # Optimized version of parseopt() with line number tracking removed. + # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated + # by the ply/ygen.py script. Make changes to the parsedebug() method instead. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery + def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-notrack-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer - + # Set up the lexer and parser objects on pslice pslice.lexer = lexer pslice.parser = self @@ -889,16 +1026,19 @@ class LRParser: lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -911,23 +1051,28 @@ class LRParser: sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + if t is not None: if t > 0: @@ -935,11 +1080,13 @@ class LRParser: statestack.append(t) state = t + symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -953,44 +1100,50 @@ class LRParser: sym.type = pname # Production name sym.value = None + if plen: targ = symstack[-plen-1:] targ[0] = sym + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - targ = [ sym ] + + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -998,28 +1151,32 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - return getattr(n,"value",None) + result = getattr(n, 'value', None) + return result + + if t is None: - if t == None: # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -1033,20 +1190,15 @@ class LRParser: # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -1056,14 +1208,16 @@ class LRParser: continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -1096,32 +1250,37 @@ class LRParser: # symbol and continue lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-notrack-end # ----------------------------------------------------------------------------- # === Grammar Representation === # # The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. +# manipulate the rules that make up a grammar. # ----------------------------------------------------------------------------- -import re - # regex matching identifiers _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') @@ -1131,7 +1290,7 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') # This class stores the raw information about a single production or grammar rule. # A grammar rule refers to a specification such as this: # -# expr : expr PLUS term +# expr : expr PLUS term # # Here are the basic attributes defined on all productions # @@ -1151,7 +1310,7 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') class Production(object): reduced = 0 - def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): self.name = name self.prod = tuple(prod) self.number = number @@ -1162,11 +1321,11 @@ class Production(object): self.prec = precedence # Internal settings used during table construction - + self.len = len(self.prod) # Length of the production # Create a list of unique production symbols used in the production - self.usyms = [ ] + self.usyms = [] for s in self.prod: if s not in self.usyms: self.usyms.append(s) @@ -1177,15 +1336,15 @@ class Production(object): # Create a string representation if self.prod: - self.str = "%s -> %s" % (self.name," ".join(self.prod)) + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - self.str = "%s ->" % self.name + self.str = '%s -> ' % self.name def __str__(self): return self.str def __repr__(self): - return "Production("+str(self)+")" + return 'Production(' + str(self) + ')' def __len__(self): return len(self.prod) @@ -1193,28 +1352,27 @@ class Production(object): def __nonzero__(self): return 1 - def __getitem__(self,index): + def __getitem__(self, index): return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self,n): - if n > len(self.prod): return None - p = LRItem(self,n) - # Precompute the list of productions immediately following. Hack. Remove later + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. try: - p.lr_after = Prodnames[p.prod[n+1]] - except (IndexError,KeyError): + p.lr_after = self.Prodnames[p.prod[n+1]] + except (IndexError, KeyError): p.lr_after = [] try: p.lr_before = p.prod[n-1] except IndexError: p.lr_before = None - return p - + # Bind the production function name to a callable - def bind(self,pdict): + def bind(self, pdict): if self.func: self.callable = pdict[self.func] @@ -1223,7 +1381,7 @@ class Production(object): # actually used by the LR parsing engine, plus some additional # debugging information. class MiniProduction(object): - def __init__(self,str,name,len,func,file,line): + def __init__(self, str, name, len, func, file, line): self.name = name self.len = len self.func = func @@ -1231,13 +1389,15 @@ class MiniProduction(object): self.file = file self.line = line self.str = str + def __str__(self): return self.str + def __repr__(self): - return "MiniProduction(%s)" % self.str + return 'MiniProduction(%s)' % self.str # Bind the production function name to a callable - def bind(self,pdict): + def bind(self, pdict): if self.func: self.callable = pdict[self.func] @@ -1246,9 +1406,9 @@ class MiniProduction(object): # class LRItem # # This class represents a specific stage of parsing a production rule. For -# example: +# example: # -# expr : expr . PLUS term +# expr : expr . PLUS term # # In the above, the "." represents the current location of the parse. Here # basic attributes: @@ -1267,26 +1427,26 @@ class MiniProduction(object): # ----------------------------------------------------------------------------- class LRItem(object): - def __init__(self,p,n): + def __init__(self, p, n): self.name = p.name self.prod = list(p.prod) self.number = p.number self.lr_index = n - self.lookaheads = { } - self.prod.insert(n,".") + self.lookaheads = {} + self.prod.insert(n, '.') self.prod = tuple(self.prod) self.len = len(self.prod) self.usyms = p.usyms def __str__(self): if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) + s = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - s = "%s -> " % self.name + s = '%s -> ' % self.name return s def __repr__(self): - return "LRItem("+str(self)+")" + return 'LRItem(' + str(self) + ')' # ----------------------------------------------------------------------------- # rightmost_terminal() @@ -1309,21 +1469,22 @@ def rightmost_terminal(symbols, terminals): # This data is used for critical parts of the table generation process later. # ----------------------------------------------------------------------------- -class GrammarError(YaccError): pass +class GrammarError(YaccError): + pass class Grammar(object): - def __init__(self,terminals): + def __init__(self, terminals): self.Productions = [None] # A list of all of the productions. The first # entry is always reserved for the purpose of # building an augmented grammar - self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all # productions of that nonterminal. - self.Prodmap = { } # A dictionary that is only used to detect duplicate + self.Prodmap = {} # A dictionary that is only used to detect duplicate # productions. - self.Terminals = { } # A dictionary mapping the names of terminal symbols to a + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a # list of the rules where they are used. for term in terminals: @@ -1331,17 +1492,17 @@ class Grammar(object): self.Terminals['error'] = [] - self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list # of rule numbers where they are used. - self.First = { } # A dictionary of precomputed FIRST(x) symbols + self.First = {} # A dictionary of precomputed FIRST(x) symbols - self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the # form ('right',level) or ('nonassoc', level) or ('left',level) - self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. # This is only used to provide error checking and to generate # a warning about unused precedence rules. @@ -1351,7 +1512,7 @@ class Grammar(object): def __len__(self): return len(self.Productions) - def __getitem__(self,index): + def __getitem__(self, index): return self.Productions[index] # ----------------------------------------------------------------------------- @@ -1362,14 +1523,14 @@ class Grammar(object): # # ----------------------------------------------------------------------------- - def set_precedence(self,term,assoc,level): - assert self.Productions == [None],"Must call set_precedence() before add_production()" + def set_precedence(self, term, assoc, level): + assert self.Productions == [None], 'Must call set_precedence() before add_production()' if term in self.Precedence: - raise GrammarError("Precedence already specified for terminal '%s'" % term) - if assoc not in ['left','right','nonassoc']: + raise GrammarError('Precedence already specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc,level) - + self.Precedence[term] = (assoc, level) + # ----------------------------------------------------------------------------- # add_production() # @@ -1387,72 +1548,74 @@ class Grammar(object): # are valid and that %prec is used correctly. # ----------------------------------------------------------------------------- - def add_production(self,prodname,syms,func=None,file='',line=0): + def add_production(self, prodname, syms, func=None, file='', line=0): if prodname in self.Terminals: - raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) if prodname == 'error': - raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) if not _is_identifier.match(prodname): - raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) - # Look for literal tokens - for n,s in enumerate(syms): + # Look for literal tokens + for n, s in enumerate(syms): if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) - if not c in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % + (file, line, s, prodname)) + if c not in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass if not _is_identifier.match(s) and s != '%prec': - raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) - + raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + # Determine the precedence level if '%prec' in syms: if syms[-1] == '%prec': - raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) + raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) if syms[-2] != '%prec': - raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) + raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % + (file, line)) precname = syms[-1] - prodprec = self.Precedence.get(precname,None) + prodprec = self.Precedence.get(precname) if not prodprec: - raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) + raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) else: - self.UsedPrecedence[precname] = 1 + self.UsedPrecedence.add(precname) del syms[-2:] # Drop %prec from the rule else: # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms,self.Terminals) - prodprec = self.Precedence.get(precname,('right',0)) - + precname = rightmost_terminal(syms, self.Terminals) + prodprec = self.Precedence.get(precname, ('right', 0)) + # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) + map = '%s -> %s' % (prodname, syms) if map in self.Prodmap: m = self.Prodmap[map] - raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + - "Previous definition at %s:%d" % (m.file, m.line)) + raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + + 'Previous definition at %s:%d' % (m.file, m.line)) # From this point on, everything is valid. Create a new Production instance pnumber = len(self.Productions) - if not prodname in self.Nonterminals: - self.Nonterminals[prodname] = [ ] + if prodname not in self.Nonterminals: + self.Nonterminals[prodname] = [] # Add the production number to Terminals and Nonterminals for t in syms: if t in self.Terminals: self.Terminals[t].append(pnumber) else: - if not t in self.Nonterminals: - self.Nonterminals[t] = [ ] + if t not in self.Nonterminals: + self.Nonterminals[t] = [] self.Nonterminals[t].append(pnumber) # Create a production and add it to the list of productions - p = Production(pnumber,prodname,syms,prodprec,func,file,line) + p = Production(pnumber, prodname, syms, prodprec, func, file, line) self.Productions.append(p) self.Prodmap[map] = p @@ -1460,22 +1623,21 @@ class Grammar(object): try: self.Prodnames[prodname].append(p) except KeyError: - self.Prodnames[prodname] = [ p ] - return 0 + self.Prodnames[prodname] = [p] # ----------------------------------------------------------------------------- # set_start() # - # Sets the starting symbol and creates the augmented grammar. Production + # Sets the starting symbol and creates the augmented grammar. Production # rule 0 is S' -> start where start is the start symbol. # ----------------------------------------------------------------------------- - def set_start(self,start=None): + def set_start(self, start=None): if not start: start = self.Productions[1].name if start not in self.Nonterminals: - raise GrammarError("start symbol %s undefined" % start) - self.Productions[0] = Production(0,"S'",[start]) + raise GrammarError('start symbol %s undefined' % start) + self.Productions[0] = Production(0, "S'", [start]) self.Nonterminals[start].append(0) self.Start = start @@ -1487,26 +1649,20 @@ class Grammar(object): # ----------------------------------------------------------------------------- def find_unreachable(self): - + # Mark all symbols that are reachable from a symbol s def mark_reachable_from(s): - if reachable[s]: - # We've already reached symbol s. + if s in reachable: return - reachable[s] = 1 - for p in self.Prodnames.get(s,[]): + reachable.add(s) + for p in self.Prodnames.get(s, []): for r in p.prod: mark_reachable_from(r) - reachable = { } - for s in list(self.Terminals) + list(self.Nonterminals): - reachable[s] = 0 + reachable = set() + mark_reachable_from(self.Productions[0].prod[0]) + return [s for s in self.Nonterminals if s not in reachable] - mark_reachable_from( self.Productions[0].prod[0] ) - - return [s for s in list(self.Nonterminals) - if not reachable[s]] - # ----------------------------------------------------------------------------- # infinite_cycles() # @@ -1520,20 +1676,20 @@ class Grammar(object): # Terminals: for t in self.Terminals: - terminates[t] = 1 + terminates[t] = True - terminates['$end'] = 1 + terminates['$end'] = True # Nonterminals: # Initialize to false: for n in self.Nonterminals: - terminates[n] = 0 + terminates[n] = False # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in self.Prodnames.items(): + while True: + some_change = False + for (n, pl) in self.Prodnames.items(): # Nonterminal n terminates iff any of its productions terminates. for p in pl: # Production p terminates iff all of its rhs symbols terminate. @@ -1541,19 +1697,19 @@ class Grammar(object): if not terminates[s]: # The symbol s does not terminate, # so production p does not terminate. - p_terminates = 0 + p_terminates = False break else: # didn't break from the loop, # so every symbol s terminates # so production p terminates. - p_terminates = 1 + p_terminates = True if p_terminates: # symbol n terminates! if not terminates[n]: - terminates[n] = 1 - some_change = 1 + terminates[n] = True + some_change = True # Don't need to consider any more productions for this n. break @@ -1561,9 +1717,9 @@ class Grammar(object): break infinite = [] - for (s,term) in terminates.items(): + for (s, term) in terminates.items(): if not term: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': + if s not in self.Prodnames and s not in self.Terminals and s != 'error': # s is used-but-not-defined, and we've already warned of that, # so it would be overkill to say that it's also non-terminating. pass @@ -1572,22 +1728,22 @@ class Grammar(object): return infinite - # ----------------------------------------------------------------------------- # undefined_symbols() # # Find all symbols that were used the grammar, but not defined as tokens or # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. + # and prod is the production where the symbol was used. # ----------------------------------------------------------------------------- def undefined_symbols(self): result = [] for p in self.Productions: - if not p: continue + if not p: + continue for s in p.prod: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': - result.append((s,p)) + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + result.append((s, p)) return result # ----------------------------------------------------------------------------- @@ -1598,7 +1754,7 @@ class Grammar(object): # ----------------------------------------------------------------------------- def unused_terminals(self): unused_tok = [] - for s,v in self.Terminals.items(): + for s, v in self.Terminals.items(): if s != 'error' and not v: unused_tok.append(s) @@ -1613,7 +1769,7 @@ class Grammar(object): def unused_rules(self): unused_prod = [] - for s,v in self.Nonterminals.items(): + for s, v in self.Nonterminals.items(): if not v: p = self.Prodnames[s][0] unused_prod.append(p) @@ -1625,15 +1781,15 @@ class Grammar(object): # Returns a list of tuples (term,precedence) corresponding to precedence # rules that were never used by the grammar. term is the name of the terminal # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. + # 'right' corresponding to the type of precedence. # ----------------------------------------------------------------------------- def unused_precedence(self): unused = [] for termname in self.Precedence: if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname,self.Precedence[termname][0])) - + unused.append((termname, self.Precedence[termname][0])) + return unused # ------------------------------------------------------------------------- @@ -1644,19 +1800,20 @@ class Grammar(object): # During execution of compute_first1, the result may be incomplete. # Afterward (e.g., when called from compute_follow()), it will be complete. # ------------------------------------------------------------------------- - def _first(self,beta): + def _first(self, beta): # We are computing First(x1,x2,x3,...,xn) - result = [ ] + result = [] for x in beta: - x_produces_empty = 0 + x_produces_empty = False # Add all the non- symbols of First[x] to the result. for f in self.First[x]: if f == ' ': - x_produces_empty = 1 + x_produces_empty = True else: - if f not in result: result.append(f) + if f not in result: + result.append(f) if x_produces_empty: # We have to consider the next x in beta, @@ -1695,17 +1852,17 @@ class Grammar(object): self.First[n] = [] # Then propagate symbols until no change: - while 1: - some_change = 0 + while True: + some_change = False for n in self.Nonterminals: for p in self.Prodnames[n]: for f in self._first(p.prod): if f not in self.First[n]: - self.First[n].append( f ) - some_change = 1 + self.First[n].append(f) + some_change = True if not some_change: break - + return self.First # --------------------------------------------------------------------- @@ -1715,7 +1872,7 @@ class Grammar(object): # follow set is the set of all symbols that might follow a given # non-terminal. See the Dragon book, 2nd Ed. p. 189. # --------------------------------------------------------------------- - def compute_follow(self,start=None): + def compute_follow(self, start=None): # If already computed, return the result if self.Follow: return self.Follow @@ -1726,36 +1883,36 @@ class Grammar(object): # Add '$end' to the follow list of the start symbol for k in self.Nonterminals: - self.Follow[k] = [ ] + self.Follow[k] = [] if not start: start = self.Productions[1].name - self.Follow[start] = [ '$end' ] + self.Follow[start] = ['$end'] - while 1: - didadd = 0 + while True: + didadd = False for p in self.Productions[1:]: # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] + for i, B in enumerate(p.prod): if B in self.Nonterminals: # Okay. We got a non-terminal in a production fst = self._first(p.prod[i+1:]) - hasempty = 0 + hasempty = False for f in fst: if f != ' ' and f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 + didadd = True if f == ' ': - hasempty = 1 + hasempty = True if hasempty or i == (len(p.prod)-1): # Add elements of follow(a) to follow(b) for f in self.Follow[p.name]: if f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 - if not didadd: break + didadd = True + if not didadd: + break return self.Follow @@ -1779,15 +1936,15 @@ class Grammar(object): lastlri = p i = 0 lr_items = [] - while 1: + while True: if i > len(p): lri = None else: - lri = LRItem(p,i) + lri = LRItem(p, i) # Precompute the list of productions immediately following try: lri.lr_after = self.Prodnames[lri.prod[i+1]] - except (IndexError,KeyError): + except (IndexError, KeyError): lri.lr_after = [] try: lri.lr_before = lri.prod[i-1] @@ -1795,7 +1952,8 @@ class Grammar(object): lri.lr_before = None lastlri.lr_next = lri - if not lri: break + if not lri: + break lr_items.append(lri) lastlri = lri i += 1 @@ -1804,12 +1962,13 @@ class Grammar(object): # ----------------------------------------------------------------------------- # == Class LRTable == # -# This basic class represents a basic table of LR parsing information. +# This basic class represents a basic table of LR parsing information. # Methods for generating the tables are not defined here. They are defined # in the derived class LRGeneratedTable. # ----------------------------------------------------------------------------- -class VersionError(YaccError): pass +class VersionError(YaccError): + pass class LRTable(object): def __init__(self): @@ -1818,19 +1977,15 @@ class LRTable(object): self.lr_productions = None self.lr_method = None - def read_table(self,module): - if isinstance(module,types.ModuleType): + def read_table(self, module): + if isinstance(module, types.ModuleType): parsetab = module else: - if sys.version_info[0] < 3: - exec("import %s as parsetab" % module) - else: - env = { } - exec("import %s as parsetab" % module, env, env) - parsetab = env['parsetab'] + exec('import %s' % module) + parsetab = sys.modules[module] if parsetab._tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") + raise VersionError('yacc table file version is out of date') self.lr_action = parsetab._lr_action self.lr_goto = parsetab._lr_goto @@ -1842,17 +1997,20 @@ class LRTable(object): self.lr_method = parsetab._lr_method return parsetab._lr_signature - def read_pickle(self,filename): + def read_pickle(self, filename): try: import cPickle as pickle except ImportError: import pickle - in_f = open(filename,"rb") + if not os.path.exists(filename): + raise ImportError + + in_f = open(filename, 'rb') tabversion = pickle.load(in_f) if tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") + raise VersionError('yacc table file version is out of date') self.lr_method = pickle.load(in_f) signature = pickle.load(in_f) self.lr_action = pickle.load(in_f) @@ -1867,14 +2025,15 @@ class LRTable(object): return signature # Bind all production function names to callable objects in pdict - def bind_callables(self,pdict): + def bind_callables(self, pdict): for p in self.lr_productions: p.bind(pdict) - + + # ----------------------------------------------------------------------------- # === LR Generator === # -# The following classes and functions are used to generate LR parsing tables on +# The following classes and functions are used to generate LR parsing tables on # a grammar. # ----------------------------------------------------------------------------- @@ -1895,17 +2054,18 @@ class LRTable(object): # FP - Set-valued function # ------------------------------------------------------------------------------ -def digraph(X,R,FP): - N = { } +def digraph(X, R, FP): + N = {} for x in X: - N[x] = 0 + N[x] = 0 stack = [] - F = { } + F = {} for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) + if N[x] == 0: + traverse(x, N, stack, F, X, R, FP) return F -def traverse(x,N,stack,F,X,R,FP): +def traverse(x, N, stack, F, X, R, FP): stack.append(x) d = len(stack) N[x] = d @@ -1914,20 +2074,22 @@ def traverse(x,N,stack,F,X,R,FP): rel = R(x) # Get y's related to x for y in rel: if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) + traverse(y, N, stack, F, X, R, FP) + N[x] = min(N[x], N[y]) + for a in F.get(y, []): + if a not in F[x]: + F[x].append(a) if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() -class LALRError(YaccError): pass +class LALRError(YaccError): + pass # ----------------------------------------------------------------------------- # == LRGeneratedTable == @@ -1937,9 +2099,9 @@ class LALRError(YaccError): pass # ----------------------------------------------------------------------------- class LRGeneratedTable(LRTable): - def __init__(self,grammar,method='LALR',log=None): - if method not in ['SLR','LALR']: - raise LALRError("Unsupported method %s" % method) + def __init__(self, grammar, method='LALR', log=None): + if method not in ['SLR', 'LALR']: + raise LALRError('Unsupported method %s' % method) self.grammar = grammar self.lr_method = method @@ -1974,21 +2136,22 @@ class LRGeneratedTable(LRTable): # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - def lr0_closure(self,I): + def lr0_closure(self, I): self._add_count += 1 # Add everything in I to J J = I[:] - didadd = 1 + didadd = True while didadd: - didadd = 0 + didadd = False for j in J: for x in j.lr_after: - if getattr(x,"lr0_added",0) == self._add_count: continue + if getattr(x, 'lr0_added', 0) == self._add_count: + continue # Add B --> .G to J J.append(x.lr_next) x.lr0_added = self._add_count - didadd = 1 + didadd = True return J @@ -1999,43 +2162,43 @@ class LRGeneratedTable(LRTable): # objects). With uniqueness, we can later do fast set comparisons using # id(obj) instead of element-wise comparison. - def lr0_goto(self,I,x): + def lr0_goto(self, I, x): # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I),x),None) - if g: return g + g = self.lr_goto_cache.get((id(I), x)) + if g: + return g # Now we generate the goto set in a way that guarantees uniqueness # of the result - s = self.lr_goto_cache.get(x,None) + s = self.lr_goto_cache.get(x) if not s: - s = { } + s = {} self.lr_goto_cache[x] = s - gs = [ ] + gs = [] for p in I: n = p.lr_next if n and n.lr_before == x: - s1 = s.get(id(n),None) + s1 = s.get(id(n)) if not s1: - s1 = { } + s1 = {} s[id(n)] = s1 gs.append(n) s = s1 - g = s.get('$end',None) + g = s.get('$end') if not g: if gs: g = self.lr0_closure(gs) s['$end'] = g else: s['$end'] = gs - self.lr_goto_cache[(id(I),x)] = g + self.lr_goto_cache[(id(I), x)] = g return g # Compute the LR(0) sets of item function def lr0_items(self): - - C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] + C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] i = 0 for I in C: self.lr0_cidhash[id(I)] = i @@ -2048,15 +2211,15 @@ class LRGeneratedTable(LRTable): i += 1 # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } + asyms = {} for ii in I: for s in ii.usyms: asyms[s] = None for x in asyms: - g = self.lr0_goto(I,x) - if not g: continue - if id(g) in self.lr0_cidhash: continue + g = self.lr0_goto(I, x) + if not g or id(g) in self.lr0_cidhash: + continue self.lr0_cidhash[id(g)] = len(C) C.append(g) @@ -2091,19 +2254,21 @@ class LRGeneratedTable(LRTable): # ----------------------------------------------------------------------------- def compute_nullable_nonterminals(self): - nullable = {} + nullable = set() num_nullable = 0 - while 1: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 + while True: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) continue - for t in p.prod: - if not t in nullable: break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + if len(nullable) == num_nullable: + break + num_nullable = len(nullable) return nullable # ----------------------------------------------------------------------------- @@ -2117,16 +2282,16 @@ class LRGeneratedTable(LRTable): # The input C is the set of LR(0) items. # ----------------------------------------------------------------------------- - def find_nonterminal_transitions(self,C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: trans.append(t) - state = state + 1 - return trans + def find_nonterminal_transitions(self, C): + trans = [] + for stateno, state in enumerate(C): + for p in state: + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) + return trans # ----------------------------------------------------------------------------- # dr_relation() @@ -2137,21 +2302,21 @@ class LRGeneratedTable(LRTable): # Returns a list of terminals. # ----------------------------------------------------------------------------- - def dr_relation(self,C,trans,nullable): - dr_set = { } - state,N = trans + def dr_relation(self, C, trans, nullable): + state, N = trans terms = [] - g = self.lr0_goto(C[state],N) + g = self.lr0_goto(C[state], N) for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if a in self.grammar.Terminals: - if a not in terms: terms.append(a) + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) # This extra bit is to handle the start state if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append('$end') + terms.append('$end') return terms @@ -2161,18 +2326,18 @@ class LRGeneratedTable(LRTable): # Computes the READS() relation (p,A) READS (t,C). # ----------------------------------------------------------------------------- - def reads_relation(self,C, trans, empty): + def reads_relation(self, C, trans, empty): # Look for empty transitions rel = [] state, N = trans - g = self.lr0_goto(C[state],N) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(C[state], N) + j = self.lr0_cidhash.get(id(g), -1) for p in g: if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j,a)) + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) return rel @@ -2204,8 +2369,7 @@ class LRGeneratedTable(LRTable): # # ----------------------------------------------------------------------------- - def compute_lookback_includes(self,C,trans,nullable): - + def compute_lookback_includes(self, C, trans, nullable): lookdict = {} # Dictionary of lookback relations includedict = {} # Dictionary of include relations @@ -2215,11 +2379,12 @@ class LRGeneratedTable(LRTable): dtrans[t] = 1 # Loop over all transitions and compute lookbacks and includes - for state,N in trans: + for state, N in trans: lookb = [] includes = [] for p in C[state]: - if p.name != N: continue + if p.name != N: + continue # Okay, we have a name match. We now follow the production all the way # through the state machine until we get the . on the right hand side @@ -2227,44 +2392,50 @@ class LRGeneratedTable(LRTable): lr_index = p.lr_index j = state while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] + lr_index = lr_index + 1 + t = p.prod[lr_index] - # Check to see if this symbol and state are a non-terminal transition - if (j,t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty + # Check to see if this symbol and state are a non-terminal transition + if (j, t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: break # No forget it - if not p.prod[li] in nullable: break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j, t)) - g = self.lr0_goto(C[j],t) # Go to next set - j = self.lr0_cidhash.get(id(g),-1) # Go to next state + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state # When we get here, j is the final state, now we have to locate the production for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: + break + i = i + 1 + else: + lookb.append((j, r)) for i in includes: - if not i in includedict: includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, N)) + lookdict[(state, N)] = lookb - return lookdict,includedict + return lookdict, includedict # ----------------------------------------------------------------------------- # compute_read_sets() @@ -2278,10 +2449,10 @@ class LRGeneratedTable(LRTable): # Returns a set containing the read sets # ----------------------------------------------------------------------------- - def compute_read_sets(self,C, ntrans, nullable): - FP = lambda x: self.dr_relation(C,x,nullable) - R = lambda x: self.reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) + def compute_read_sets(self, C, ntrans, nullable): + FP = lambda x: self.dr_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) + F = digraph(ntrans, R, FP) return F # ----------------------------------------------------------------------------- @@ -2300,11 +2471,11 @@ class LRGeneratedTable(LRTable): # Returns a set containing the follow sets # ----------------------------------------------------------------------------- - def compute_follow_sets(self,ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F + def compute_follow_sets(self, ntrans, readsets, inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x, []) + F = digraph(ntrans, R, FP) + return F # ----------------------------------------------------------------------------- # add_lookaheads() @@ -2318,15 +2489,16 @@ class LRGeneratedTable(LRTable): # in the lookbacks set # ----------------------------------------------------------------------------- - def add_lookaheads(self,lookbacks,followset): - for trans,lb in lookbacks.items(): + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): # Loop over productions in lookback - for state,p in lb: - if not state in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) + for state, p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans, []) + for a in f: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) # ----------------------------------------------------------------------------- # add_lalr_lookaheads() @@ -2335,7 +2507,7 @@ class LRGeneratedTable(LRTable): # with LALR parsing # ----------------------------------------------------------------------------- - def add_lalr_lookaheads(self,C): + def add_lalr_lookaheads(self, C): # Determine all of the nullable nonterminals nullable = self.compute_nullable_nonterminals() @@ -2343,16 +2515,16 @@ class LRGeneratedTable(LRTable): trans = self.find_nonterminal_transitions(C) # Compute read sets - readsets = self.compute_read_sets(C,trans,nullable) + readsets = self.compute_read_sets(C, trans, nullable) # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C,trans,nullable) + lookd, included = self.compute_lookback_includes(C, trans, nullable) # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans,readsets,included) + followsets = self.compute_follow_sets(trans, readsets, included) # Add all of the lookaheads - self.add_lookaheads(lookd,followsets) + self.add_lookaheads(lookd, followsets) # ----------------------------------------------------------------------------- # lr_parse_table() @@ -2366,9 +2538,9 @@ class LRGeneratedTable(LRTable): action = self.lr_action # Action array log = self.log # Logger for output - actionp = { } # Action production array (temporary) - - log.info("Parsing method: %s", self.lr_method) + actionp = {} # Action production array (temporary) + + log.info('Parsing method: %s', self.lr_method) # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items # This determines the number of states @@ -2382,23 +2554,23 @@ class LRGeneratedTable(LRTable): st = 0 for I in C: # Loop over each production in I - actlist = [ ] # List of actions - st_action = { } - st_actionp = { } - st_goto = { } - log.info("") - log.info("state %d", st) - log.info("") + actlist = [] # List of actions + st_action = {} + st_actionp = {} + st_goto = {} + log.info('') + log.info('state %d', st) + log.info('') for p in I: - log.info(" (%d) %s", p.number, str(p)) - log.info("") + log.info(' (%d) %s', p.number, p) + log.info('') for p in I: if p.len == p.lr_index + 1: if p.name == "S'": # Start symbol. Accept! - st_action["$end"] = 0 - st_actionp["$end"] = p + st_action['$end'] = 0 + st_actionp['$end'] = p else: # We are at the end of a production. Reduce! if self.lr_method == 'LALR': @@ -2406,31 +2578,36 @@ class LRGeneratedTable(LRTable): else: laheads = self.grammar.Follow[p.name] for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = st_action.get(a,None) + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) if r is not None: # Whoa. Have a shift/reduce or reduce/reduce conflict if r > 0: # Need to decide on shift or reduce here # By default we favor shifting. Need to add # some precedence rules here. - sprec,slevel = Productions[st_actionp[a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): # We really need to reduce here. st_action[a] = -p.number st_actionp[a] = p if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) Productions[p.number].reduced += 1 elif (slevel == rlevel) and (rprec == 'nonassoc'): st_action[a] = None else: # Hmmm. Guess we'll keep the shift if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) elif r < 0: # Reduce/reduce conflict. In this case, we favor the rule # that was defined first in the grammar file @@ -2439,15 +2616,16 @@ class LRGeneratedTable(LRTable): if oldp.line > pp.line: st_action[a] = -p.number st_actionp[a] = p - chosenp,rejectp = pp,oldp + chosenp, rejectp = pp, oldp Productions[p.number].reduced += 1 Productions[oldp.number].reduced -= 1 else: - chosenp,rejectp = oldp,pp - self.rr_conflicts.append((st,chosenp,rejectp)) - log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) else: - raise LALRError("Unknown conflict in state %d" % st) + raise LALRError('Unknown conflict in state %d' % st) else: st_action[a] = -p.number st_actionp[a] = p @@ -2456,205 +2634,211 @@ class LRGeneratedTable(LRTable): i = p.lr_index a = p.prod[i+1] # Get symbol right after the "." if a in self.grammar.Terminals: - g = self.lr0_goto(I,a) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) if j >= 0: # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = st_action.get(a,None) + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) if r is not None: # Whoa have a shift/reduce or shift/shift conflict if r > 0: if r != j: - raise LALRError("Shift/shift conflict in state %d" % st) + raise LALRError('Shift/shift conflict in state %d' % st) elif r < 0: # Do a precedence check. # - if precedence of reduce rule is higher, we reduce. # - if precedence of reduce is same and left assoc, we reduce. # - otherwise we shift - rprec,rlevel = Productions[st_actionp[a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): # We decide to shift here... highest precedence to shift Productions[st_actionp[a].number].reduced -= 1 st_action[a] = j st_actionp[a] = p if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) elif (slevel == rlevel) and (rprec == 'nonassoc'): st_action[a] = None else: # Hmmm. Guess we'll keep the reduce if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) else: - raise LALRError("Unknown conflict in state %d" % st) + raise LALRError('Unknown conflict in state %d' % st) else: st_action[a] = j st_actionp[a] = p # Print the actions associated with each terminal - _actprint = { } - for a,p,m in actlist: + _actprint = {} + for a, p, m in actlist: if a in st_action: if p is st_actionp[a]: - log.info(" %-15s %s",a,m) - _actprint[(a,m)] = 1 - log.info("") + log.info(' %-15s %s', a, m) + _actprint[(a, m)] = 1 + log.info('') # Print the actions that were not used. (debugging) not_used = 0 - for a,p,m in actlist: + for a, p, m in actlist: if a in st_action: if p is not st_actionp[a]: - if not (a,m) in _actprint: - log.debug(" ! %-15s [ %s ]",a,m) + if not (a, m) in _actprint: + log.debug(' ! %-15s [ %s ]', a, m) not_used = 1 - _actprint[(a,m)] = 1 + _actprint[(a, m)] = 1 if not_used: - log.debug("") + log.debug('') # Construct the goto table for this state - nkeys = { } + nkeys = {} for ii in I: for s in ii.usyms: if s in self.grammar.Nonterminals: nkeys[s] = None for n in nkeys: - g = self.lr0_goto(I,n) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) if j >= 0: st_goto[n] = j - log.info(" %-30s shift and go to state %d",n,j) + log.info(' %-30s shift and go to state %d', n, j) action[st] = st_action actionp[st] = st_actionp goto[st] = st_goto st += 1 - # ----------------------------------------------------------------------------- # write() # # This function writes the LR parsing tables to a file # ----------------------------------------------------------------------------- - def write_table(self,modulename,outputdir='',signature=""): - basemodulename = modulename.split(".")[-1] - filename = os.path.join(outputdir,basemodulename) + ".py" - try: - f = open(filename,"w") + def write_table(self, tabmodule, outputdir='', signature=''): + if isinstance(tabmodule, types.ModuleType): + raise IOError("Won't overwrite existing tabmodule") - f.write(""" + basemodulename = tabmodule.split('.')[-1] + filename = os.path.join(outputdir, basemodulename) + '.py' + try: + f = open(filename, 'w') + + f.write(''' # %s # This file is automatically generated. Do not edit. +# pylint: disable=W,C,R _tabversion = %r _lr_method = %r _lr_signature = %r - """ % (filename, __tabversion__, self.lr_method, signature)) + ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) # Change smaller to 0 to go back to original tables smaller = 1 # Factor out names to try and make smaller if smaller: - items = { } + items = {} - for s,nd in self.lr_action.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) + for s, nd in self.lr_action.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) + f.write('\n_lr_action_items = {') + for k, v in items.items(): + f.write('%r:([' % k) for i in v[0]: - f.write("%r," % i) - f.write("],[") + f.write('%r,' % i) + f.write('],[') for i in v[1]: - f.write("%r," % i) + f.write('%r,' % i) - f.write("]),") - f.write("}\n") + f.write(']),') + f.write('}\n') - f.write(""" -_lr_action = { } + f.write(''' +_lr_action = {} for _k, _v in _lr_action_items.items(): for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } + if not _x in _lr_action: _lr_action[_x] = {} _lr_action[_x][_k] = _y del _lr_action_items -""") +''') else: - f.write("\n_lr_action = { "); - for k,v in self.lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); + f.write('\n_lr_action = { ') + for k, v in self.lr_action.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') if smaller: # Factor out names to try and make smaller - items = { } + items = {} - for s,nd in self.lr_goto.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) + for s, nd in self.lr_goto.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) + f.write('\n_lr_goto_items = {') + for k, v in items.items(): + f.write('%r:([' % k) for i in v[0]: - f.write("%r," % i) - f.write("],[") + f.write('%r,' % i) + f.write('],[') for i in v[1]: - f.write("%r," % i) + f.write('%r,' % i) - f.write("]),") - f.write("}\n") + f.write(']),') + f.write('}\n') - f.write(""" -_lr_goto = { } + f.write(''' +_lr_goto = {} for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} _lr_goto[_x][_k] = _y del _lr_goto_items -""") +''') else: - f.write("\n_lr_goto = { "); - for k,v in self.lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); + f.write('\n_lr_goto = { ') + for k, v in self.lr_goto.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') # Write production table - f.write("_lr_productions = [\n") + f.write('_lr_productions = [\n') for p in self.lr_productions: if p.func: - f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) + f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, + p.func, os.path.basename(p.file), p.line)) else: - f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) - f.write("]\n") + f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) + f.write(']\n') f.close() - except IOError: - e = sys.exc_info()[1] - sys.stderr.write("Unable to create '%s'\n" % filename) - sys.stderr.write(str(e)+"\n") - return + except IOError as e: + raise # ----------------------------------------------------------------------------- @@ -2663,26 +2847,25 @@ del _lr_goto_items # This function pickles the LR parsing tables to a supplied file object # ----------------------------------------------------------------------------- - def pickle_table(self,filename,signature=""): + def pickle_table(self, filename, signature=''): try: import cPickle as pickle except ImportError: import pickle - outf = open(filename,"wb") - pickle.dump(__tabversion__,outf,pickle_protocol) - pickle.dump(self.lr_method,outf,pickle_protocol) - pickle.dump(signature,outf,pickle_protocol) - pickle.dump(self.lr_action,outf,pickle_protocol) - pickle.dump(self.lr_goto,outf,pickle_protocol) + with open(filename, 'wb') as outf: + pickle.dump(__tabversion__, outf, pickle_protocol) + pickle.dump(self.lr_method, outf, pickle_protocol) + pickle.dump(signature, outf, pickle_protocol) + pickle.dump(self.lr_action, outf, pickle_protocol) + pickle.dump(self.lr_goto, outf, pickle_protocol) - outp = [] - for p in self.lr_productions: - if p.func: - outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) - else: - outp.append((str(p),p.name,p.len,None,None,None)) - pickle.dump(outp,outf,pickle_protocol) - outf.close() + outp = [] + for p in self.lr_productions: + if p.func: + outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) + else: + outp.append((str(p), p.name, p.len, None, None, None)) + pickle.dump(outp, outf, pickle_protocol) # ----------------------------------------------------------------------------- # === INTROSPECTION === @@ -2700,26 +2883,18 @@ del _lr_goto_items # ----------------------------------------------------------------------------- def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict # ----------------------------------------------------------------------------- # parse_grammar() # # This takes a raw grammar rule string and parses it into production data # ----------------------------------------------------------------------------- -def parse_grammar(doc,file,line): +def parse_grammar(doc, file, line): grammar = [] # Split the doc string into lines pstrings = doc.splitlines() @@ -2728,12 +2903,13 @@ def parse_grammar(doc,file,line): for ps in pstrings: dline += 1 p = ps.split() - if not p: continue + if not p: + continue try: if p[0] == '|': # This is a continuation of a previous rule if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) prodname = lastp syms = p[1:] else: @@ -2742,13 +2918,13 @@ def parse_grammar(doc,file,line): syms = p[2:] assign = p[1] if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) - grammar.append((file,dline,prodname,syms)) + grammar.append((file, dline, prodname, syms)) except SyntaxError: raise except Exception: - raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) return grammar @@ -2760,14 +2936,14 @@ def parse_grammar(doc,file,line): # etc. # ----------------------------------------------------------------------------- class ParserReflect(object): - def __init__(self,pdict,log=None): + def __init__(self, pdict, log=None): self.pdict = pdict self.start = None self.error_func = None self.tokens = None - self.files = {} + self.modules = set() self.grammar = [] - self.error = 0 + self.error = False if log is None: self.log = PlyLogger(sys.stderr) @@ -2781,7 +2957,7 @@ class ParserReflect(object): self.get_tokens() self.get_precedence() self.get_pfunctions() - + # Validate all of the information def validate_all(self): self.validate_start() @@ -2789,32 +2965,28 @@ class ParserReflect(object): self.validate_tokens() self.validate_precedence() self.validate_pfunctions() - self.validate_files() + self.validate_modules() return self.error # Compute a signature over the grammar def signature(self): + parts = [] try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() if self.start: - sig.update(self.start.encode('latin-1')) + parts.append(self.start) if self.prec: - sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) + parts.append(''.join([''.join(p) for p in self.prec])) if self.tokens: - sig.update(" ".join(self.tokens).encode('latin-1')) + parts.append(' '.join(self.tokens)) for f in self.pfuncs: if f[3]: - sig.update(f[3].encode('latin-1')) - except (TypeError,ValueError): + parts.append(f[3]) + except (TypeError, ValueError): pass - return sig.digest() + return ''.join(parts) # ----------------------------------------------------------------------------- - # validate_file() + # validate_modules() # # This method checks to see if there are duplicated p_rulename() functions # in the parser module file. Without this function, it is really easy for @@ -2824,32 +2996,29 @@ class ParserReflect(object): # to try and detect duplicates. # ----------------------------------------------------------------------------- - def validate_files(self): + def validate_modules(self): # Match def p_funcname( fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - for filename in self.files.keys(): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - + for module in self.modules: try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: continue - counthash = { } - for linen,l in enumerate(lines): + counthash = {} + for linen, line in enumerate(lines): linen += 1 - m = fre.match(l) + m = fre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) # Get the start symbol def get_start(self): @@ -2858,7 +3027,7 @@ class ParserReflect(object): # Validate the start symbol def validate_start(self): if self.start is not None: - if not isinstance(self.start,str): + if not isinstance(self.start, string_types): self.log.error("'start' must be a string") # Look for error handler @@ -2868,162 +3037,173 @@ class ParserReflect(object): # Validate the error function def validate_error_func(self): if self.error_func: - if isinstance(self.error_func,types.FunctionType): + if isinstance(self.error_func, types.FunctionType): ismethod = 0 elif isinstance(self.error_func, types.MethodType): ismethod = 1 else: self.log.error("'p_error' defined, but is not a function or method") - self.error = 1 + self.error = True return - eline = func_code(self.error_func).co_firstlineno - efile = func_code(self.error_func).co_filename - self.files[efile] = 1 + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) - if (func_code(self.error_func).co_argcount != 1+ismethod): - self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) - self.error = 1 + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True # Get the tokens map def get_tokens(self): - tokens = self.pdict.get("tokens",None) + tokens = self.pdict.get('tokens') if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 - return - - if not tokens: - self.log.error("tokens is empty") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - self.tokens = tokens + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = sorted(tokens) # Validate the tokens def validate_tokens(self): # Validate the tokens. if 'error' in self.tokens: self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = 1 + self.error = True return - terminals = {} + terminals = set() for n in self.tokens: if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 + self.log.warning('Token %r multiply defined', n) + terminals.add(n) # Get the precedence map (if any) def get_precedence(self): - self.prec = self.pdict.get("precedence",None) + self.prec = self.pdict.get('precedence') # Validate and parse the precedence map def validate_precedence(self): preclist = [] if self.prec: - if not isinstance(self.prec,(list,tuple)): - self.log.error("precedence must be a list or tuple") - self.error = 1 + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedence must be a list or tuple') + self.error = True return - for level,p in enumerate(self.prec): - if not isinstance(p,(list,tuple)): - self.log.error("Bad precedence table") - self.error = 1 + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('Bad precedence table') + self.error = True return if len(p) < 2: - self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) - self.error = 1 + self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.error = True return assoc = p[0] - if not isinstance(assoc,str): - self.log.error("precedence associativity must be a string") - self.error = 1 + if not isinstance(assoc, string_types): + self.log.error('precedence associativity must be a string') + self.error = True return for term in p[1:]: - if not isinstance(term,str): - self.log.error("precedence items must be strings") - self.error = 1 + if not isinstance(term, string_types): + self.log.error('precedence items must be strings') + self.error = True return - preclist.append((term,assoc,level+1)) + preclist.append((term, assoc, level+1)) self.preclist = preclist # Get all p_functions from the grammar def get_pfunctions(self): p_functions = [] for name, item in self.pdict.items(): - if name[:2] != 'p_': continue - if name == 'p_error': continue - if isinstance(item,(types.FunctionType,types.MethodType)): - line = func_code(item).co_firstlineno - file = func_code(item).co_filename - p_functions.append((line,file,name,item.__doc__)) + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) - # Sort all of the actions by line number - p_functions.sort() + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) self.pfuncs = p_functions - # Validate all of the p_functions def validate_pfunctions(self): grammar = [] # Check for non-empty symbols if len(self.pfuncs) == 0: - self.log.error("no rules of the form p_rulename are defined") - self.error = 1 - return - - for line, file, name, doc in self.pfuncs: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) func = self.pdict[name] if isinstance(func, types.MethodType): reqargs = 2 else: reqargs = 1 - if func_code(func).co_argcount > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) - self.error = 1 - elif func_code(func).co_argcount < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) - self.error = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True elif not func.__doc__: - self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) else: try: - parsed_g = parse_grammar(doc,file,line) + parsed_g = parse_grammar(doc, file, line) for g in parsed_g: grammar.append((name, g)) - except SyntaxError: - e = sys.exc_info()[1] + except SyntaxError as e: self.log.error(str(e)) - self.error = 1 + self.error = True # Looks like a valid grammar rule # Mark the file in which defined. - self.files[file] = 1 + self.modules.add(module) # Secondary validation step that looks for p_ definitions that are not functions # or functions that look like they might be grammar rules. - for n,v in self.pdict.items(): - if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue - if n[0:2] == 't_': continue - if n[0:2] == 'p_' and n != 'p_error': - self.log.warning("'%s' not defined as a function", n) - if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or - (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", - func_code(v).co_filename, func_code(v).co_firstlineno,n) - except Exception: - pass + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass self.grammar = grammar @@ -3033,14 +3213,17 @@ class ParserReflect(object): # Build a parser # ----------------------------------------------------------------------------- -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, - check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', - debuglog=None, errorlog = None, picklefile=None): +def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, + check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, + outputdir=None, debuglog=None, errorlog=None, picklefile=None): - global parse # Reference to the parsing method of the last built parser + if tabmodule is None: + tabmodule = tab_module + + # Reference to the parsing method of the last built parser + global parse # If pickling is enabled, table files are not created - if picklefile: write_tables = 0 @@ -3049,17 +3232,54 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] pdict = dict(_items) + # If no __file__ or __package__ attributes are available, try to obtain them + # from the __module__ instead + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ else: pdict = get_caller_module_dict(2) + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If tabmodule specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(tabmodule, types.ModuleType): + srcfile = tabmodule.__file__ + else: + if '.' not in tabmodule: + srcfile = pdict['__file__'] + else: + parts = tabmodule.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = pdict.get('__package__') + if pkg and isinstance(tabmodule, str): + if '.' not in tabmodule: + tabmodule = pkg + '.' + tabmodule + + + + # Set start symbol if it's specified directly using an argument + if start is not None: + pdict['start'] = start + # Collect parser information from the dictionary - pinfo = ParserReflect(pdict,log=errorlog) + pinfo = ParserReflect(pdict, log=errorlog) pinfo.get_all() if pinfo.error: - raise YaccError("Unable to build parser") + raise YaccError('Unable to build parser') # Check signature against table files (if any) signature = pinfo.signature() @@ -3074,35 +3294,36 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star if optimize or (read_signature == signature): try: lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) + parser = LRParser(lr, pinfo.error_func) parse = parser.parse return parser - except Exception: - e = sys.exc_info()[1] - errorlog.warning("There was a problem loading the table file: %s", repr(e)) - except VersionError: - e = sys.exc_info() + except Exception as e: + errorlog.warning('There was a problem loading the table file: %r', e) + except VersionError as e: errorlog.warning(str(e)) - except Exception: + except ImportError: pass if debuglog is None: if debug: - debuglog = PlyLogger(open(debugfile,"w")) + try: + debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) + except IOError as e: + errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) + debuglog = NullLogger() else: debuglog = NullLogger() - debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) + debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) - - errors = 0 + errors = False # Validate the parser information if pinfo.validate_all(): - raise YaccError("Unable to build parser") - + raise YaccError('Unable to build parser') + if not pinfo.error_func: - errorlog.warning("no p_error() function is defined") + errorlog.warning('no p_error() function is defined') # Create a grammar object grammar = Grammar(pinfo.tokens) @@ -3110,20 +3331,18 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star # Set precedence level for terminals for term, assoc, level in pinfo.preclist: try: - grammar.set_precedence(term,assoc,level) - except GrammarError: - e = sys.exc_info()[1] - errorlog.warning("%s",str(e)) + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) # Add productions to the grammar for funcname, gram in pinfo.grammar: file, line, prodname, syms = gram try: - grammar.add_production(prodname,syms,funcname,file,line) - except GrammarError: - e = sys.exc_info()[1] - errorlog.error("%s",str(e)) - errors = 1 + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + errors = True # Set the grammar start symbols try: @@ -3131,146 +3350,153 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star grammar.set_start(pinfo.start) else: grammar.set_start(start) - except GrammarError: - e = sys.exc_info()[1] + except GrammarError as e: errorlog.error(str(e)) - errors = 1 + errors = True if errors: - raise YaccError("Unable to build parser") + raise YaccError('Unable to build parser') # Verify the grammar structure undefined_symbols = grammar.undefined_symbols() for sym, prod in undefined_symbols: - errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) - errors = 1 + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True unused_terminals = grammar.unused_terminals() if unused_terminals: - debuglog.info("") - debuglog.info("Unused terminals:") - debuglog.info("") + debuglog.info('') + debuglog.info('Unused terminals:') + debuglog.info('') for term in unused_terminals: - errorlog.warning("Token '%s' defined, but not used", term) - debuglog.info(" %s", term) + errorlog.warning('Token %r defined, but not used', term) + debuglog.info(' %s', term) # Print out all productions to the debug log if debug: - debuglog.info("") - debuglog.info("Grammar") - debuglog.info("") - for n,p in enumerate(grammar.Productions): - debuglog.info("Rule %-5d %s", n, p) + debuglog.info('') + debuglog.info('Grammar') + debuglog.info('') + for n, p in enumerate(grammar.Productions): + debuglog.info('Rule %-5d %s', n, p) # Find unused non-terminals unused_rules = grammar.unused_rules() for prod in unused_rules: - errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) if len(unused_terminals) == 1: - errorlog.warning("There is 1 unused token") + errorlog.warning('There is 1 unused token') if len(unused_terminals) > 1: - errorlog.warning("There are %d unused tokens", len(unused_terminals)) + errorlog.warning('There are %d unused tokens', len(unused_terminals)) if len(unused_rules) == 1: - errorlog.warning("There is 1 unused rule") + errorlog.warning('There is 1 unused rule') if len(unused_rules) > 1: - errorlog.warning("There are %d unused rules", len(unused_rules)) + errorlog.warning('There are %d unused rules', len(unused_rules)) if debug: - debuglog.info("") - debuglog.info("Terminals, with rules where they appear") - debuglog.info("") + debuglog.info('') + debuglog.info('Terminals, with rules where they appear') + debuglog.info('') terms = list(grammar.Terminals) terms.sort() for term in terms: - debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) - - debuglog.info("") - debuglog.info("Nonterminals, with rules where they appear") - debuglog.info("") + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info('') + debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('') nonterms = list(grammar.Nonterminals) nonterms.sort() for nonterm in nonterms: - debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) - debuglog.info("") + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('') if check_recursion: unreachable = grammar.find_unreachable() for u in unreachable: - errorlog.warning("Symbol '%s' is unreachable",u) + errorlog.warning('Symbol %r is unreachable', u) infinite = grammar.infinite_cycles() for inf in infinite: - errorlog.error("Infinite recursion detected for symbol '%s'", inf) - errors = 1 - + errorlog.error('Infinite recursion detected for symbol %r', inf) + errors = True + unused_prec = grammar.unused_precedence() for term, assoc in unused_prec: - errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) - errors = 1 + errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errors = True if errors: - raise YaccError("Unable to build parser") - + raise YaccError('Unable to build parser') + # Run the LRGeneratedTable on the grammar if debug: - errorlog.debug("Generating %s tables", method) - - lr = LRGeneratedTable(grammar,method,debuglog) + errorlog.debug('Generating %s tables', method) + + lr = LRGeneratedTable(grammar, method, debuglog) if debug: num_sr = len(lr.sr_conflicts) # Report shift/reduce and reduce/reduce conflicts if num_sr == 1: - errorlog.warning("1 shift/reduce conflict") + errorlog.warning('1 shift/reduce conflict') elif num_sr > 1: - errorlog.warning("%d shift/reduce conflicts", num_sr) + errorlog.warning('%d shift/reduce conflicts', num_sr) num_rr = len(lr.rr_conflicts) if num_rr == 1: - errorlog.warning("1 reduce/reduce conflict") + errorlog.warning('1 reduce/reduce conflict') elif num_rr > 1: - errorlog.warning("%d reduce/reduce conflicts", num_rr) + errorlog.warning('%d reduce/reduce conflicts', num_rr) # Write out conflicts to the output file if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning("") - debuglog.warning("Conflicts:") - debuglog.warning("") + debuglog.warning('') + debuglog.warning('Conflicts:') + debuglog.warning('') for state, tok, resolution in lr.sr_conflicts: - debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) - - already_reported = {} + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + + already_reported = set() for state, rule, rejected in lr.rr_conflicts: - if (state,id(rule),id(rejected)) in already_reported: + if (state, id(rule), id(rejected)) in already_reported: continue - debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - debuglog.warning("rejected rule (%s) in state %d", rejected,state) - errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - errorlog.warning("rejected rule (%s) in state %d", rejected, state) - already_reported[state,id(rule),id(rejected)] = 1 - + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + errorlog.warning('rejected rule (%s) in state %d', rejected, state) + already_reported.add((state, id(rule), id(rejected))) + warned_never = [] for state, rule, rejected in lr.rr_conflicts: if not rejected.reduced and (rejected not in warned_never): - debuglog.warning("Rule (%s) is never reduced", rejected) - errorlog.warning("Rule (%s) is never reduced", rejected) + debuglog.warning('Rule (%s) is never reduced', rejected) + errorlog.warning('Rule (%s) is never reduced', rejected) warned_never.append(rejected) # Write the table file if requested if write_tables: - lr.write_table(tabmodule,outputdir,signature) + try: + lr.write_table(tabmodule, outputdir, signature) + if tabmodule in sys.modules: + del sys.modules[tabmodule] + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) # Write a pickled version of the tables if picklefile: - lr.pickle_table(picklefile,signature) + try: + lr.pickle_table(picklefile, signature) + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) # Build the parser lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) + parser = LRParser(lr, pinfo.error_func) parse = parser.parse return parser diff --git a/ext/ply/ply/ygen.py b/ext/ply/ply/ygen.py new file mode 100644 index 0000000000..03b93180a7 --- /dev/null +++ b/ext/ply/ply/ygen.py @@ -0,0 +1,69 @@ +# ply: ygen.py +# +# This is a support program that auto-generates different versions of the YACC parsing +# function with different features removed for the purposes of performance. +# +# Users should edit the method LRParser.parsedebug() in yacc.py. The source code +# for that method is then used to create the other methods. See the comments in +# yacc.py for further details. + +import os.path +import shutil + +def get_source_range(lines, tag): + srclines = enumerate(lines) + start_tag = '#--! %s-start' % tag + end_tag = '#--! %s-end' % tag + + for start_index, line in srclines: + if line.strip().startswith(start_tag): + break + + for end_index, line in srclines: + if line.strip().endswith(end_tag): + break + + return (start_index + 1, end_index) + +def filter_section(lines, tag): + filtered_lines = [] + include = True + tag_text = '#--! %s' % tag + for line in lines: + if line.strip().startswith(tag_text): + include = not include + elif include: + filtered_lines.append(line) + return filtered_lines + +def main(): + dirname = os.path.dirname(__file__) + shutil.copy2(os.path.join(dirname, 'yacc.py'), os.path.join(dirname, 'yacc.py.bak')) + with open(os.path.join(dirname, 'yacc.py'), 'r') as f: + lines = f.readlines() + + parse_start, parse_end = get_source_range(lines, 'parsedebug') + parseopt_start, parseopt_end = get_source_range(lines, 'parseopt') + parseopt_notrack_start, parseopt_notrack_end = get_source_range(lines, 'parseopt-notrack') + + # Get the original source + orig_lines = lines[parse_start:parse_end] + + # Filter the DEBUG sections out + parseopt_lines = filter_section(orig_lines, 'DEBUG') + + # Filter the TRACKING sections out + parseopt_notrack_lines = filter_section(parseopt_lines, 'TRACKING') + + # Replace the parser source sections with updated versions + lines[parseopt_notrack_start:parseopt_notrack_end] = parseopt_notrack_lines + lines[parseopt_start:parseopt_end] = parseopt_lines + + lines = [line.rstrip()+'\n' for line in lines] + with open(os.path.join(dirname, 'yacc.py'), 'w') as f: + f.writelines(lines) + + print('Updated yacc.py') + +if __name__ == '__main__': + main() diff --git a/ext/ply/setup.cfg b/ext/ply/setup.cfg new file mode 100644 index 0000000000..819449e4c5 --- /dev/null +++ b/ext/ply/setup.cfg @@ -0,0 +1,10 @@ +[bdist_wheel] +universal = 1 + +[metadata] +description-file = README.md + +[egg_info] +tag_build = +tag_date = 0 + diff --git a/ext/ply/setup.py b/ext/ply/setup.py index 606b29cde4..46bc6b34c4 100644 --- a/ext/ply/setup.py +++ b/ext/ply/setup.py @@ -14,13 +14,18 @@ PLY provides most of the standard lex/yacc features including support for empty productions, precedence rules, error recovery, and support for ambiguous grammars. PLY is extremely easy to use and provides very extensive error checking. +It is compatible with both Python 2 and Python 3. """, license="""BSD""", - version = "3.2", + version = "3.11", author = "David Beazley", author_email = "dave@dabeaz.com", maintainer = "David Beazley", maintainer_email = "dave@dabeaz.com", url = "http://www.dabeaz.com/ply/", packages = ['ply'], + classifiers = [ + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 2', + ] ) diff --git a/ext/ply/test/README b/ext/ply/test/README index aac12b058b..03b167ce24 100644 --- a/ext/ply/test/README +++ b/ext/ply/test/README @@ -1,11 +1,8 @@ This directory mostly contains tests for various types of error conditions. To run: - $ python testlex.py . - $ python testyacc.py . - -The tests can also be run using the Python unittest module. - - $ python rununit.py + $ python testlex.py + $ python testyacc.py + $ python testcpp.py The script 'cleanup.sh' cleans up this directory to its original state. diff --git a/ext/ply/test/calclex.py b/ext/ply/test/calclex.py index 67d245f19e..030a9863dd 100644 --- a/ext/ply/test/calclex.py +++ b/ext/ply/test/calclex.py @@ -36,14 +36,14 @@ t_ignore = " \t" def t_newline(t): r'\n+' - t.lineno += t.value.count("\n") + t.lexer.lineno += t.value.count("\n") def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lexer -lex.lex() +lexer = lex.lex() diff --git a/ext/ply/test/cleanup.sh b/ext/ply/test/cleanup.sh index 9db936837e..9374f2c60b 100755 --- a/ext/ply/test/cleanup.sh +++ b/ext/ply/test/cleanup.sh @@ -1,4 +1,4 @@ #!/bin/sh -rm -f *~ *.pyc *.pyo *.dif *.out +rm -rf *~ *.pyc *.pyo *.dif *.out __pycache__ diff --git a/ext/ply/test/lex_literal3.py b/ext/ply/test/lex_literal3.py new file mode 100644 index 0000000000..91ab980c84 --- /dev/null +++ b/ext/ply/test/lex_literal3.py @@ -0,0 +1,26 @@ +# lex_literal3.py +# +# An empty literal specification given as a list +# Issue 8 : Literals empty list causes IndexError + +import sys +if ".." not in sys.path: sys.path.insert(0,"..") + +import ply.lex as lex + +tokens = [ + "NUMBER", + ] + +literals = [] + +def t_NUMBER(t): + r'\d+' + return t + +def t_error(t): + pass + +lex.lex() + + diff --git a/ext/ply/test/lex_optimize3.py b/ext/ply/test/lex_optimize3.py index c6c8cce652..b8df5aab2c 100644 --- a/ext/ply/test/lex_optimize3.py +++ b/ext/ply/test/lex_optimize3.py @@ -45,7 +45,7 @@ def t_error(t): t.lexer.skip(1) # Build the lexer -lex.lex(optimize=1,lextab="lexdir.sub.calctab",outputdir="lexdir/sub") +lex.lex(optimize=1,lextab="lexdir.sub.calctab" ,outputdir="lexdir/sub") lex.runmain(data="3+4") diff --git a/ext/ply/test/lex_optimize4.py b/ext/ply/test/lex_optimize4.py new file mode 100644 index 0000000000..cc6e2a9d20 --- /dev/null +++ b/ext/ply/test/lex_optimize4.py @@ -0,0 +1,26 @@ +# ----------------------------------------------------------------------------- +# lex_optimize4.py +# ----------------------------------------------------------------------------- +import re +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+?' +t_MINUS = r'-' +t_NUMBER = r'(\d+)' + +def t_error(t): + pass + + +# Build the lexer +lex.lex(optimize=True, lextab="opt4tab", reflags=re.UNICODE) +lex.runmain(data="3+4") diff --git a/ext/ply/test/pkg_test1/__init__.py b/ext/ply/test/pkg_test1/__init__.py new file mode 100644 index 0000000000..0e195589eb --- /dev/null +++ b/ext/ply/test/pkg_test1/__init__.py @@ -0,0 +1,9 @@ +# Tests proper handling of lextab and parsetab files in package structures + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +from .parsing.calcparse import parser + diff --git a/ext/ply/test/pkg_test1/parsing/__init__.py b/ext/ply/test/pkg_test1/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test1/parsing/calclex.py b/ext/ply/test/pkg_test1/parsing/calclex.py new file mode 100644 index 0000000000..b3c1a4d6bb --- /dev/null +++ b/ext/ply/test/pkg_test1/parsing/calclex.py @@ -0,0 +1,47 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex(optimize=True) + + + diff --git a/ext/ply/test/pkg_test1/parsing/calcparse.py b/ext/ply/test/pkg_test1/parsing/calcparse.py new file mode 100644 index 0000000000..c058e9f77d --- /dev/null +++ b/ext/ply/test/pkg_test1/parsing/calcparse.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc() + + + + + diff --git a/ext/ply/test/pkg_test1/parsing/lextab.py b/ext/ply/test/pkg_test1/parsing/lextab.py new file mode 100644 index 0000000000..52376b2aaa --- /dev/null +++ b/ext/ply/test/pkg_test1/parsing/lextab.py @@ -0,0 +1,10 @@ +# lextab.py. This file automatically created by PLY (version 3.11). Don't edit! +_tabversion = '3.10' +_lextokens = set(('DIVIDE', 'EQUALS', 'LPAREN', 'MINUS', 'NAME', 'NUMBER', 'PLUS', 'RPAREN', 'TIMES')) +_lexreflags = 64 +_lexliterals = '' +_lexstateinfo = {'INITIAL': 'inclusive'} +_lexstatere = {'INITIAL': [('(?P \\d+)|(?P \\n+)|(?P [a-zA-Z_][a-zA-Z0-9_]*)|(?P \\+)|(?P \\()|(?P \\*)|(?P \\))|(?P =)|(?P /)|(?P -)', [None, ('t_NUMBER', 'NUMBER'), ('t_newline', 'newline'), (None, 'NAME'), (None, 'PLUS'), (None, 'LPAREN'), (None, 'TIMES'), (None, 'RPAREN'), (None, 'EQUALS'), (None, 'DIVIDE'), (None, 'MINUS')])]} +_lexstateignore = {'INITIAL': ' \t'} +_lexstateerrorf = {'INITIAL': 't_error'} +_lexstateeoff = {} diff --git a/ext/ply/test/pkg_test2/__init__.py b/ext/ply/test/pkg_test2/__init__.py new file mode 100644 index 0000000000..0e195589eb --- /dev/null +++ b/ext/ply/test/pkg_test2/__init__.py @@ -0,0 +1,9 @@ +# Tests proper handling of lextab and parsetab files in package structures + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +from .parsing.calcparse import parser + diff --git a/ext/ply/test/pkg_test2/parsing/__init__.py b/ext/ply/test/pkg_test2/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test2/parsing/calclex.py b/ext/ply/test/pkg_test2/parsing/calclex.py new file mode 100644 index 0000000000..789e13f864 --- /dev/null +++ b/ext/ply/test/pkg_test2/parsing/calclex.py @@ -0,0 +1,47 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex(optimize=True, lextab='calclextab') + + + diff --git a/ext/ply/test/pkg_test2/parsing/calclextab.py b/ext/ply/test/pkg_test2/parsing/calclextab.py new file mode 100644 index 0000000000..a616c397c6 --- /dev/null +++ b/ext/ply/test/pkg_test2/parsing/calclextab.py @@ -0,0 +1,10 @@ +# calclextab.py. This file automatically created by PLY (version 3.11). Don't edit! +_tabversion = '3.10' +_lextokens = set(('DIVIDE', 'EQUALS', 'LPAREN', 'MINUS', 'NAME', 'NUMBER', 'PLUS', 'RPAREN', 'TIMES')) +_lexreflags = 64 +_lexliterals = '' +_lexstateinfo = {'INITIAL': 'inclusive'} +_lexstatere = {'INITIAL': [('(?P \\d+)|(?P \\n+)|(?P [a-zA-Z_][a-zA-Z0-9_]*)|(?P \\+)|(?P \\()|(?P \\*)|(?P \\))|(?P =)|(?P /)|(?P -)', [None, ('t_NUMBER', 'NUMBER'), ('t_newline', 'newline'), (None, 'NAME'), (None, 'PLUS'), (None, 'LPAREN'), (None, 'TIMES'), (None, 'RPAREN'), (None, 'EQUALS'), (None, 'DIVIDE'), (None, 'MINUS')])]} +_lexstateignore = {'INITIAL': ' \t'} +_lexstateerrorf = {'INITIAL': 't_error'} +_lexstateeoff = {} diff --git a/ext/ply/test/pkg_test2/parsing/calcparse.py b/ext/ply/test/pkg_test2/parsing/calcparse.py new file mode 100644 index 0000000000..f5193389b0 --- /dev/null +++ b/ext/ply/test/pkg_test2/parsing/calcparse.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc(tabmodule='calcparsetab') + + + + + diff --git a/ext/ply/test/pkg_test2/parsing/calcparsetab.py b/ext/ply/test/pkg_test2/parsing/calcparsetab.py new file mode 100644 index 0000000000..23e3208308 --- /dev/null +++ b/ext/ply/test/pkg_test2/parsing/calcparsetab.py @@ -0,0 +1,40 @@ + +# calcparsetab.py +# This file is automatically generated. Do not edit. +# pylint: disable=W,C,R +_tabversion = '3.10' + +_lr_method = 'LALR' + +_lr_signature = 'leftPLUSMINUSleftTIMESDIVIDErightUMINUSDIVIDE EQUALS LPAREN MINUS NAME NUMBER PLUS RPAREN TIMESstatement : NAME EQUALS expressionstatement : expressionexpression : expression PLUS expression\n | expression MINUS expression\n | expression TIMES expression\n | expression DIVIDE expressionexpression : MINUS expression %prec UMINUSexpression : LPAREN expression RPARENexpression : NUMBERexpression : NAME' + +_lr_action_items = {'PLUS':([2,4,6,7,8,9,15,16,17,18,19,20,],[-9,-10,11,-10,-7,11,-8,11,-3,-4,-6,-5,]),'MINUS':([0,1,2,3,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,],[1,1,-9,1,-10,12,-10,-7,12,1,1,1,1,1,-8,12,-3,-4,-6,-5,]),'EQUALS':([4,],[10,]),'NUMBER':([0,1,3,10,11,12,13,14,],[2,2,2,2,2,2,2,2,]),'LPAREN':([0,1,3,10,11,12,13,14,],[3,3,3,3,3,3,3,3,]),'NAME':([0,1,3,10,11,12,13,14,],[4,7,7,7,7,7,7,7,]),'TIMES':([2,4,6,7,8,9,15,16,17,18,19,20,],[-9,-10,14,-10,-7,14,-8,14,14,14,-6,-5,]),'$end':([2,4,5,6,7,8,15,16,17,18,19,20,],[-9,-10,0,-2,-10,-7,-8,-1,-3,-4,-6,-5,]),'RPAREN':([2,7,8,9,15,17,18,19,20,],[-9,-10,-7,15,-8,-3,-4,-6,-5,]),'DIVIDE':([2,4,6,7,8,9,15,16,17,18,19,20,],[-9,-10,13,-10,-7,13,-8,13,13,13,-6,-5,]),} + +_lr_action = {} +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = {} + _lr_action[_x][_k] = _y +del _lr_action_items + +_lr_goto_items = {'statement':([0,],[5,]),'expression':([0,1,3,10,11,12,13,14,],[6,8,9,16,17,18,19,20,]),} + +_lr_goto = {} +for _k, _v in _lr_goto_items.items(): + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} + _lr_goto[_x][_k] = _y +del _lr_goto_items +_lr_productions = [ + ("S' -> statement","S'",1,None,None,None), + ('statement -> NAME EQUALS expression','statement',3,'p_statement_assign','calcparse.py',21), + ('statement -> expression','statement',1,'p_statement_expr','calcparse.py',25), + ('expression -> expression PLUS expression','expression',3,'p_expression_binop','calcparse.py',29), + ('expression -> expression MINUS expression','expression',3,'p_expression_binop','calcparse.py',30), + ('expression -> expression TIMES expression','expression',3,'p_expression_binop','calcparse.py',31), + ('expression -> expression DIVIDE expression','expression',3,'p_expression_binop','calcparse.py',32), + ('expression -> MINUS expression','expression',2,'p_expression_uminus','calcparse.py',39), + ('expression -> LPAREN expression RPAREN','expression',3,'p_expression_group','calcparse.py',43), + ('expression -> NUMBER','expression',1,'p_expression_number','calcparse.py',47), + ('expression -> NAME','expression',1,'p_expression_name','calcparse.py',51), +] diff --git a/ext/ply/test/pkg_test3/__init__.py b/ext/ply/test/pkg_test3/__init__.py new file mode 100644 index 0000000000..0e195589eb --- /dev/null +++ b/ext/ply/test/pkg_test3/__init__.py @@ -0,0 +1,9 @@ +# Tests proper handling of lextab and parsetab files in package structures + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +from .parsing.calcparse import parser + diff --git a/ext/ply/test/pkg_test3/generated/__init__.py b/ext/ply/test/pkg_test3/generated/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test3/generated/lextab.py b/ext/ply/test/pkg_test3/generated/lextab.py new file mode 100644 index 0000000000..52376b2aaa --- /dev/null +++ b/ext/ply/test/pkg_test3/generated/lextab.py @@ -0,0 +1,10 @@ +# lextab.py. This file automatically created by PLY (version 3.11). Don't edit! +_tabversion = '3.10' +_lextokens = set(('DIVIDE', 'EQUALS', 'LPAREN', 'MINUS', 'NAME', 'NUMBER', 'PLUS', 'RPAREN', 'TIMES')) +_lexreflags = 64 +_lexliterals = '' +_lexstateinfo = {'INITIAL': 'inclusive'} +_lexstatere = {'INITIAL': [('(?P \\d+)|(?P \\n+)|(?P [a-zA-Z_][a-zA-Z0-9_]*)|(?P \\+)|(?P \\()|(?P \\*)|(?P \\))|(?P =)|(?P /)|(?P -)', [None, ('t_NUMBER', 'NUMBER'), ('t_newline', 'newline'), (None, 'NAME'), (None, 'PLUS'), (None, 'LPAREN'), (None, 'TIMES'), (None, 'RPAREN'), (None, 'EQUALS'), (None, 'DIVIDE'), (None, 'MINUS')])]} +_lexstateignore = {'INITIAL': ' \t'} +_lexstateerrorf = {'INITIAL': 't_error'} +_lexstateeoff = {} diff --git a/ext/ply/test/pkg_test3/parsing/__init__.py b/ext/ply/test/pkg_test3/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test3/parsing/calclex.py b/ext/ply/test/pkg_test3/parsing/calclex.py new file mode 100644 index 0000000000..6ca2c4f3c1 --- /dev/null +++ b/ext/ply/test/pkg_test3/parsing/calclex.py @@ -0,0 +1,47 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex(optimize=True, lextab='pkg_test3.generated.lextab') + + + diff --git a/ext/ply/test/pkg_test3/parsing/calcparse.py b/ext/ply/test/pkg_test3/parsing/calcparse.py new file mode 100644 index 0000000000..2dcb52b3c4 --- /dev/null +++ b/ext/ply/test/pkg_test3/parsing/calcparse.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc(tabmodule='pkg_test3.generated.parsetab') + + + + + diff --git a/ext/ply/test/pkg_test4/__init__.py b/ext/ply/test/pkg_test4/__init__.py new file mode 100644 index 0000000000..ba9ddacf6a --- /dev/null +++ b/ext/ply/test/pkg_test4/__init__.py @@ -0,0 +1,25 @@ +# Tests proper handling of lextab and parsetab files in package structures +# Check of warning messages when files aren't writable + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +import ply.lex +import ply.yacc + +def patched_open(filename, mode): + if 'w' in mode: + raise IOError("Permission denied %r" % filename) + return open(filename, mode) + +ply.lex.open = patched_open +ply.yacc.open = patched_open +try: + from .parsing.calcparse import parser +finally: + del ply.lex.open + del ply.yacc.open + + diff --git a/ext/ply/test/pkg_test4/parsing/__init__.py b/ext/ply/test/pkg_test4/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test4/parsing/calclex.py b/ext/ply/test/pkg_test4/parsing/calclex.py new file mode 100644 index 0000000000..b3c1a4d6bb --- /dev/null +++ b/ext/ply/test/pkg_test4/parsing/calclex.py @@ -0,0 +1,47 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex(optimize=True) + + + diff --git a/ext/ply/test/pkg_test4/parsing/calcparse.py b/ext/ply/test/pkg_test4/parsing/calcparse.py new file mode 100644 index 0000000000..c058e9f77d --- /dev/null +++ b/ext/ply/test/pkg_test4/parsing/calcparse.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc() + + + + + diff --git a/ext/ply/test/pkg_test5/__init__.py b/ext/ply/test/pkg_test5/__init__.py new file mode 100644 index 0000000000..0e195589eb --- /dev/null +++ b/ext/ply/test/pkg_test5/__init__.py @@ -0,0 +1,9 @@ +# Tests proper handling of lextab and parsetab files in package structures + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +from .parsing.calcparse import parser + diff --git a/ext/ply/test/pkg_test5/parsing/__init__.py b/ext/ply/test/pkg_test5/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test5/parsing/calclex.py b/ext/ply/test/pkg_test5/parsing/calclex.py new file mode 100644 index 0000000000..e8759b6f01 --- /dev/null +++ b/ext/ply/test/pkg_test5/parsing/calclex.py @@ -0,0 +1,48 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import os.path +lexer = lex.lex(optimize=True, outputdir=os.path.dirname(__file__)) + + + diff --git a/ext/ply/test/pkg_test5/parsing/calcparse.py b/ext/ply/test/pkg_test5/parsing/calcparse.py new file mode 100644 index 0000000000..2a1ddfe190 --- /dev/null +++ b/ext/ply/test/pkg_test5/parsing/calcparse.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +import os.path +parser = yacc.yacc(outputdir=os.path.dirname(__file__)) + + + + + diff --git a/ext/ply/test/pkg_test5/parsing/lextab.py b/ext/ply/test/pkg_test5/parsing/lextab.py new file mode 100644 index 0000000000..8cab2985d1 --- /dev/null +++ b/ext/ply/test/pkg_test5/parsing/lextab.py @@ -0,0 +1,10 @@ +# lextab.py. This file automatically created by PLY (version 3.11). Don't edit! +_tabversion = '3.10' +_lextokens = set(('DIVIDE', 'EQUALS', 'LPAREN', 'MINUS', 'NAME', 'NUMBER', 'PLUS', 'RPAREN', 'TIMES')) +_lexreflags = 64 +_lexliterals = '' +_lexstateinfo = {'INITIAL': 'inclusive'} +_lexstatere = {'INITIAL': [('(?P \\d+)|(?P \\n+)|(?P [a-zA-Z_][a-zA-Z0-9_]*)|(?P \\()|(?P \\+)|(?P \\*)|(?P \\))|(?P =)|(?P /)|(?P -)', [None, ('t_NUMBER', 'NUMBER'), ('t_newline', 'newline'), (None, 'NAME'), (None, 'LPAREN'), (None, 'PLUS'), (None, 'TIMES'), (None, 'RPAREN'), (None, 'EQUALS'), (None, 'DIVIDE'), (None, 'MINUS')])]} +_lexstateignore = {'INITIAL': ' \t'} +_lexstateerrorf = {'INITIAL': 't_error'} +_lexstateeoff = {} diff --git a/ext/ply/test/pkg_test6/__init__.py b/ext/ply/test/pkg_test6/__init__.py new file mode 100644 index 0000000000..5dbe0cbd1d --- /dev/null +++ b/ext/ply/test/pkg_test6/__init__.py @@ -0,0 +1,9 @@ +# Tests proper sorting of modules in yacc.ParserReflect.get_pfunctions + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +from .parsing.calcparse import parser + diff --git a/ext/ply/test/pkg_test6/parsing/__init__.py b/ext/ply/test/pkg_test6/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test6/parsing/calclex.py b/ext/ply/test/pkg_test6/parsing/calclex.py new file mode 100644 index 0000000000..e8759b6f01 --- /dev/null +++ b/ext/ply/test/pkg_test6/parsing/calclex.py @@ -0,0 +1,48 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import os.path +lexer = lex.lex(optimize=True, outputdir=os.path.dirname(__file__)) + + + diff --git a/ext/ply/test/pkg_test6/parsing/calcparse.py b/ext/ply/test/pkg_test6/parsing/calcparse.py new file mode 100644 index 0000000000..6defaf9748 --- /dev/null +++ b/ext/ply/test/pkg_test6/parsing/calcparse.py @@ -0,0 +1,33 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +from .statement import * + +from .expression import * + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +import os.path +parser = yacc.yacc(outputdir=os.path.dirname(__file__)) + + + + + diff --git a/ext/ply/test/pkg_test6/parsing/expression.py b/ext/ply/test/pkg_test6/parsing/expression.py new file mode 100644 index 0000000000..028f662724 --- /dev/null +++ b/ext/ply/test/pkg_test6/parsing/expression.py @@ -0,0 +1,31 @@ +# This file contains definitions of expression grammar + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 diff --git a/ext/ply/test/pkg_test6/parsing/lextab.py b/ext/ply/test/pkg_test6/parsing/lextab.py new file mode 100644 index 0000000000..8cab2985d1 --- /dev/null +++ b/ext/ply/test/pkg_test6/parsing/lextab.py @@ -0,0 +1,10 @@ +# lextab.py. This file automatically created by PLY (version 3.11). Don't edit! +_tabversion = '3.10' +_lextokens = set(('DIVIDE', 'EQUALS', 'LPAREN', 'MINUS', 'NAME', 'NUMBER', 'PLUS', 'RPAREN', 'TIMES')) +_lexreflags = 64 +_lexliterals = '' +_lexstateinfo = {'INITIAL': 'inclusive'} +_lexstatere = {'INITIAL': [('(?P \\d+)|(?P \\n+)|(?P [a-zA-Z_][a-zA-Z0-9_]*)|(?P \\()|(?P \\+)|(?P \\*)|(?P \\))|(?P =)|(?P /)|(?P -)', [None, ('t_NUMBER', 'NUMBER'), ('t_newline', 'newline'), (None, 'NAME'), (None, 'LPAREN'), (None, 'PLUS'), (None, 'TIMES'), (None, 'RPAREN'), (None, 'EQUALS'), (None, 'DIVIDE'), (None, 'MINUS')])]} +_lexstateignore = {'INITIAL': ' \t'} +_lexstateerrorf = {'INITIAL': 't_error'} +_lexstateeoff = {} diff --git a/ext/ply/test/pkg_test6/parsing/statement.py b/ext/ply/test/pkg_test6/parsing/statement.py new file mode 100644 index 0000000000..ef7dc55e3f --- /dev/null +++ b/ext/ply/test/pkg_test6/parsing/statement.py @@ -0,0 +1,9 @@ +# This file contains definitions of statement grammar + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] diff --git a/ext/ply/test/testcpp.py b/ext/ply/test/testcpp.py new file mode 100644 index 0000000000..2e98edd899 --- /dev/null +++ b/ext/ply/test/testcpp.py @@ -0,0 +1,101 @@ +from unittest import TestCase, main + +from multiprocessing import Process, Queue +from six.moves.queue import Empty + +import sys + +if ".." not in sys.path: + sys.path.insert(0, "..") + +from ply.lex import lex +from ply.cpp import * + + +def preprocessing(in_, out_queue): + out = None + + try: + p = Preprocessor(lex()) + p.parse(in_) + tokens = [t.value for t in p.parser] + out = "".join(tokens) + finally: + out_queue.put(out) + +class CPPTests(TestCase): + "Tests related to ANSI-C style lexical preprocessor." + + def __test_preprocessing(self, in_, expected, time_limit = 1.0): + out_queue = Queue() + + preprocessor = Process( + name = "PLY`s C preprocessor", + target = preprocessing, + args = (in_, out_queue) + ) + + preprocessor.start() + + try: + out = out_queue.get(timeout = time_limit) + except Empty: + preprocessor.terminate() + raise RuntimeError("Time limit exceeded!") + else: + self.assertMultiLineEqual(out, expected) + + def test_concatenation(self): + self.__test_preprocessing("""\ +#define a(x) x##_ +#define b(x) _##x +#define c(x) _##x##_ +#define d(x,y) _##x##y##_ + +a(i) +b(j) +c(k) +d(q,s)""" + , """\ + + + + + +i_ +_j +_k_ +_qs_""" + ) + + def test_deadloop_macro(self): + # If there is a word which equals to name of a parametrized macro, then + # attempt to expand such word as a macro manages the parser to fall + # into an infinite loop. + + self.__test_preprocessing("""\ +#define a(x) x + +a;""" + , """\ + + +a;""" + ) + + def test_index_error(self): + # If there are no tokens after a word ("a") which equals to name of + # a parameterized macro, then attempt to expand this word leads to + # IndexError. + + self.__test_preprocessing("""\ +#define a(x) x + +a""" + , """\ + + +a""" + ) + +main() diff --git a/ext/ply/test/testlex.py b/ext/ply/test/testlex.py index 606387d1d8..83070a7ab4 100755 --- a/ext/ply/test/testlex.py +++ b/ext/ply/test/testlex.py @@ -7,12 +7,57 @@ except ImportError: import io as StringIO import sys +import os +import warnings +import platform + sys.path.insert(0,"..") sys.tracebacklimit = 0 import ply.lex -def check_expected(result,expected): +try: + from importlib.util import cache_from_source +except ImportError: + # Python 2.7, but we don't care. + cache_from_source = None + + +def make_pymodule_path(filename, optimization=None): + path = os.path.dirname(filename) + file = os.path.basename(filename) + mod, ext = os.path.splitext(file) + + if sys.hexversion >= 0x3050000: + fullpath = cache_from_source(filename, optimization=optimization) + elif sys.hexversion >= 0x3040000: + fullpath = cache_from_source(filename, ext=='.pyc') + elif sys.hexversion >= 0x3020000: + import imp + modname = mod+"."+imp.get_tag()+ext + fullpath = os.path.join(path,'__pycache__',modname) + else: + fullpath = filename + return fullpath + +def pymodule_out_exists(filename, optimization=None): + return os.path.exists(make_pymodule_path(filename, + optimization=optimization)) + +def pymodule_out_remove(filename, optimization=None): + os.remove(make_pymodule_path(filename, optimization=optimization)) + +def implementation(): + if platform.system().startswith("Java"): + return "Jython" + elif hasattr(sys, "pypy_version_info"): + return "PyPy" + else: + return "CPython" + +test_pyo = (implementation() == 'CPython') + +def check_expected(result, expected, contains=False): if sys.version_info[0] >= 3: if isinstance(result,str): result = result.encode('ascii') @@ -21,13 +66,16 @@ def check_expected(result,expected): resultlines = result.splitlines() expectedlines = expected.splitlines() - if len(resultlines) != len(expectedlines): return False for rline,eline in zip(resultlines,expectedlines): - if not rline.endswith(eline): - return False + if contains: + if eline not in rline: + return False + else: + if not rline.endswith(eline): + return False return True def run_import(module): @@ -40,6 +88,9 @@ class LexErrorWarningTests(unittest.TestCase): def setUp(self): sys.stderr = StringIO.StringIO() sys.stdout = StringIO.StringIO() + if sys.hexversion >= 0x3020000: + warnings.filterwarnings('ignore',category=ResourceWarning) + def tearDown(self): sys.stderr = sys.__stderr__ sys.stdout = sys.__stdout__ @@ -114,8 +165,13 @@ class LexErrorWarningTests(unittest.TestCase): def test_lex_re1(self): self.assertRaises(SyntaxError,run_import,"lex_re1") result = sys.stderr.getvalue() + if sys.hexversion < 0x3050000: + msg = "Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis\n" + else: + msg = "Invalid regular expression for rule 't_NUMBER'. missing ), unterminated subpattern at position 0" self.assert_(check_expected(result, - "Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis\n")) + msg, + contains=True)) def test_lex_re2(self): self.assertRaises(SyntaxError,run_import,"lex_re2") @@ -126,9 +182,19 @@ class LexErrorWarningTests(unittest.TestCase): def test_lex_re3(self): self.assertRaises(SyntaxError,run_import,"lex_re3") result = sys.stderr.getvalue() +# self.assert_(check_expected(result, +# "Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" +# "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n")) + + if sys.hexversion < 0x3050000: + msg = ("Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" + "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n") + else: + msg = ("Invalid regular expression for rule 't_POUND'. missing ), unterminated subpattern at position 0\n" + "ERROR: Make sure '#' in rule 't_POUND' is escaped with '\#'") self.assert_(check_expected(result, - "Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" - "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n")) + msg, + contains=True), result) def test_lex_rule1(self): self.assertRaises(SyntaxError,run_import,"lex_rule1") @@ -294,6 +360,7 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) + def test_lex_optimize(self): try: os.remove("lextab.py") @@ -316,7 +383,6 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,4,1,2)\n")) self.assert_(os.path.exists("lextab.py")) - p = subprocess.Popen([sys.executable,'-O','lex_optimize.py'], stdout=subprocess.PIPE) result = p.stdout.read() @@ -325,9 +391,10 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lextab.pyo")) + if test_pyo: + self.assert_(pymodule_out_exists("lextab.pyo", 1)) + pymodule_out_remove("lextab.pyo", 1) - os.remove("lextab.pyo") p = subprocess.Popen([sys.executable,'-OO','lex_optimize.py'], stdout=subprocess.PIPE) result = p.stdout.read() @@ -335,17 +402,19 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lextab.pyo")) + + if test_pyo: + self.assert_(pymodule_out_exists("lextab.pyo", 2)) try: os.remove("lextab.py") except OSError: pass try: - os.remove("lextab.pyc") + pymodule_out_remove("lextab.pyc") except OSError: pass try: - os.remove("lextab.pyo") + pymodule_out_remove("lextab.pyo", 2) except OSError: pass @@ -377,8 +446,9 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("opt2tab.pyo")) - os.remove("opt2tab.pyo") + if test_pyo: + self.assert_(pymodule_out_exists("opt2tab.pyo", 1)) + pymodule_out_remove("opt2tab.pyo", 1) p = subprocess.Popen([sys.executable,'-OO','lex_optimize2.py'], stdout=subprocess.PIPE) result = p.stdout.read() @@ -386,17 +456,18 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("opt2tab.pyo")) + if test_pyo: + self.assert_(pymodule_out_exists("opt2tab.pyo", 2)) try: os.remove("opt2tab.py") except OSError: pass try: - os.remove("opt2tab.pyc") + pymodule_out_remove("opt2tab.pyc") except OSError: pass try: - os.remove("opt2tab.pyo") + pymodule_out_remove("opt2tab.pyo", 2) except OSError: pass @@ -425,8 +496,10 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lexdir/sub/calctab.pyo")) - os.remove("lexdir/sub/calctab.pyo") + if test_pyo: + self.assert_(pymodule_out_exists("lexdir/sub/calctab.pyo", 1)) + pymodule_out_remove("lexdir/sub/calctab.pyo", 1) + p = subprocess.Popen([sys.executable,'-OO','lex_optimize3.py'], stdout=subprocess.PIPE) result = p.stdout.read() @@ -434,12 +507,33 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lexdir/sub/calctab.pyo")) + if test_pyo: + self.assert_(pymodule_out_exists("lexdir/sub/calctab.pyo", 2)) try: shutil.rmtree("lexdir") except OSError: pass + def test_lex_optimize4(self): + + # Regression test to make sure that reflags works correctly + # on Python 3. + + for extension in ['py', 'pyc']: + try: + os.remove("opt4tab.{0}".format(extension)) + except OSError: + pass + + run_import("lex_optimize4") + run_import("lex_optimize4") + + for extension in ['py', 'pyc']: + try: + os.remove("opt4tab.{0}".format(extension)) + except OSError: + pass + def test_lex_opt_alias(self): try: os.remove("aliastab.py") @@ -468,8 +562,10 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(+,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("aliastab.pyo")) - os.remove("aliastab.pyo") + if test_pyo: + self.assert_(pymodule_out_exists("aliastab.pyo", 1)) + pymodule_out_remove("aliastab.pyo", 1) + p = subprocess.Popen([sys.executable,'-OO','lex_opt_alias.py'], stdout=subprocess.PIPE) result = p.stdout.read() @@ -477,17 +573,19 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(+,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("aliastab.pyo")) + + if test_pyo: + self.assert_(pymodule_out_exists("aliastab.pyo", 2)) try: os.remove("aliastab.py") except OSError: pass try: - os.remove("aliastab.pyc") + pymodule_out_remove("aliastab.pyc") except OSError: pass try: - os.remove("aliastab.pyo") + pymodule_out_remove("aliastab.pyo", 2) except OSError: pass @@ -518,21 +616,22 @@ class LexBuildOptionTests(unittest.TestCase): self.assert_(os.path.exists("manytab.py")) - p = subprocess.Popen([sys.executable,'-O','lex_many_tokens.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(TOK34,'TOK34:',1,0)\n" - "(TOK143,'TOK143:',1,7)\n" - "(TOK269,'TOK269:',1,15)\n" - "(TOK372,'TOK372:',1,23)\n" - "(TOK452,'TOK452:',1,31)\n" - "(TOK561,'TOK561:',1,39)\n" - "(TOK999,'TOK999:',1,47)\n" - )) + if implementation() == 'CPython': + p = subprocess.Popen([sys.executable,'-O','lex_many_tokens.py'], + stdout=subprocess.PIPE) + result = p.stdout.read() + self.assert_(check_expected(result, + "(TOK34,'TOK34:',1,0)\n" + "(TOK143,'TOK143:',1,7)\n" + "(TOK269,'TOK269:',1,15)\n" + "(TOK372,'TOK372:',1,23)\n" + "(TOK452,'TOK452:',1,31)\n" + "(TOK561,'TOK561:',1,39)\n" + "(TOK999,'TOK999:',1,47)\n" + )) - self.assert_(os.path.exists("manytab.pyo")) - os.remove("manytab.pyo") + self.assert_(pymodule_out_exists("manytab.pyo", 1)) + pymodule_out_remove("manytab.pyo", 1) try: os.remove("manytab.py") except OSError: diff --git a/ext/ply/test/testyacc.py b/ext/ply/test/testyacc.py index cc53b6d8f1..7e69f099d4 100644 --- a/ext/ply/test/testyacc.py +++ b/ext/ply/test/testyacc.py @@ -8,28 +8,68 @@ except ImportError: import sys import os +import warnings +import re +import platform sys.path.insert(0,"..") sys.tracebacklimit = 0 import ply.yacc -def check_expected(result,expected): - resultlines = [] +def make_pymodule_path(filename): + path = os.path.dirname(filename) + file = os.path.basename(filename) + mod, ext = os.path.splitext(file) + + if sys.hexversion >= 0x3040000: + import importlib.util + fullpath = importlib.util.cache_from_source(filename, ext=='.pyc') + elif sys.hexversion >= 0x3020000: + import imp + modname = mod+"."+imp.get_tag()+ext + fullpath = os.path.join(path,'__pycache__',modname) + else: + fullpath = filename + return fullpath + +def pymodule_out_exists(filename): + return os.path.exists(make_pymodule_path(filename)) + +def pymodule_out_remove(filename): + os.remove(make_pymodule_path(filename)) + +def implementation(): + if platform.system().startswith("Java"): + return "Jython" + elif hasattr(sys, "pypy_version_info"): + return "PyPy" + else: + return "CPython" + +# Check the output to see if it contains all of a set of expected output lines. +# This alternate implementation looks weird, but is needed to properly handle +# some variations in error message order that occurs due to dict hash table +# randomization that was introduced in Python 3.3 +def check_expected(result, expected): + # Normalize 'state n' text to account for randomization effects in Python 3.3 + expected = re.sub(r' state \d+', 'state ', expected) + result = re.sub(r' state \d+', 'state ', result) + + resultlines = set() for line in result.splitlines(): if line.startswith("WARNING: "): line = line[9:] elif line.startswith("ERROR: "): line = line[7:] - resultlines.append(line) + resultlines.add(line) - expectedlines = expected.splitlines() - if len(resultlines) != len(expectedlines): - return False - for rline,eline in zip(resultlines,expectedlines): - if not rline.endswith(eline): - return False - return True + # Selectively remove expected lines from the output + for eline in expected.splitlines(): + resultlines = set(line for line in resultlines if not line.endswith(eline)) + + # Return True if no result lines remain + return not bool(resultlines) def run_import(module): code = "import "+module @@ -43,10 +83,14 @@ class YaccErrorWarningTests(unittest.TestCase): sys.stdout = StringIO.StringIO() try: os.remove("parsetab.py") - os.remove("parsetab.pyc") + pymodule_out_remove("parsetab.pyc") except OSError: pass + if sys.hexversion >= 0x3020000: + warnings.filterwarnings('ignore', category=ResourceWarning) + warnings.filterwarnings('ignore', category=DeprecationWarning) + def tearDown(self): sys.stderr = sys.__stderr__ sys.stdout = sys.__stdout__ @@ -148,7 +192,38 @@ class YaccErrorWarningTests(unittest.TestCase): self.assert_(check_expected(result, "yacc_error4.py:62: Illegal rule name 'error'. Already defined as a token\n" )) - + + + def test_yacc_error5(self): + run_import("yacc_error5") + result = sys.stdout.getvalue() + self.assert_(check_expected(result, + "Group at 3:10 to 3:12\n" + "Undefined name 'a'\n" + "Syntax error at 'b'\n" + "Syntax error at 4:18 to 4:22\n" + "Assignment Error at 2:5 to 5:27\n" + "13\n" + )) + + def test_yacc_error6(self): + run_import("yacc_error6") + result = sys.stdout.getvalue() + self.assert_(check_expected(result, + "a=7\n" + "Line 3: Syntax error at '*'\n" + "c=21\n" + )) + + def test_yacc_error7(self): + run_import("yacc_error7") + result = sys.stdout.getvalue() + self.assert_(check_expected(result, + "a=7\n" + "Line 3: Syntax error at '*'\n" + "c=21\n" + )) + def test_yacc_inf(self): self.assertRaises(ply.yacc.YaccError,run_import,"yacc_inf") result = sys.stderr.getvalue() @@ -261,6 +336,7 @@ class YaccErrorWarningTests(unittest.TestCase): self.assert_(check_expected(result, "Generating LALR tables\n" )) + def test_yacc_sr(self): run_import("yacc_sr") result = sys.stderr.getvalue() @@ -276,6 +352,13 @@ class YaccErrorWarningTests(unittest.TestCase): "yacc_term1.py:24: Illegal rule name 'NUMBER'. Already defined as a token\n" )) + def test_yacc_unicode_literals(self): + run_import("yacc_unicode_literals") + result = sys.stderr.getvalue() + self.assert_(check_expected(result, + "Generating LALR tables\n" + )) + def test_yacc_unused(self): self.assertRaises(ply.yacc.YaccError,run_import,"yacc_unused") result = sys.stderr.getvalue() @@ -297,7 +380,6 @@ class YaccErrorWarningTests(unittest.TestCase): def test_yacc_uprec(self): self.assertRaises(ply.yacc.YaccError,run_import,"yacc_uprec") result = sys.stderr.getvalue() - print repr(result) self.assert_(check_expected(result, "yacc_uprec.py:37: Nothing known about the precedence of 'UMINUS'\n" )) @@ -319,6 +401,52 @@ class YaccErrorWarningTests(unittest.TestCase): "Precedence rule 'left' defined for unknown symbol '/'\n" )) + def test_pkg_test1(self): + from pkg_test1 import parser + self.assertTrue(os.path.exists('pkg_test1/parsing/parsetab.py')) + self.assertTrue(os.path.exists('pkg_test1/parsing/lextab.py')) + self.assertTrue(os.path.exists('pkg_test1/parsing/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) + + def test_pkg_test2(self): + from pkg_test2 import parser + self.assertTrue(os.path.exists('pkg_test2/parsing/calcparsetab.py')) + self.assertTrue(os.path.exists('pkg_test2/parsing/calclextab.py')) + self.assertTrue(os.path.exists('pkg_test2/parsing/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) + + def test_pkg_test3(self): + from pkg_test3 import parser + self.assertTrue(os.path.exists('pkg_test3/generated/parsetab.py')) + self.assertTrue(os.path.exists('pkg_test3/generated/lextab.py')) + self.assertTrue(os.path.exists('pkg_test3/generated/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) + + def test_pkg_test4(self): + from pkg_test4 import parser + self.assertFalse(os.path.exists('pkg_test4/parsing/parsetab.py')) + self.assertFalse(os.path.exists('pkg_test4/parsing/lextab.py')) + self.assertFalse(os.path.exists('pkg_test4/parsing/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) + + def test_pkg_test5(self): + from pkg_test5 import parser + self.assertTrue(os.path.exists('pkg_test5/parsing/parsetab.py')) + self.assertTrue(os.path.exists('pkg_test5/parsing/lextab.py')) + self.assertTrue(os.path.exists('pkg_test5/parsing/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) + + def test_pkg_test6(self): + from pkg_test6 import parser + self.assertTrue(os.path.exists('pkg_test6/parsing/parsetab.py')) + self.assertTrue(os.path.exists('pkg_test6/parsing/lextab.py')) + self.assertTrue(os.path.exists('pkg_test6/parsing/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) - unittest.main() diff --git a/ext/ply/test/yacc_error5.py b/ext/ply/test/yacc_error5.py new file mode 100644 index 0000000000..9eb0f8574a --- /dev/null +++ b/ext/ply/test/yacc_error5.py @@ -0,0 +1,94 @@ +# ----------------------------------------------------------------------------- +# yacc_error5.py +# +# Lineno and position tracking with error tokens +# ----------------------------------------------------------------------------- +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_assign_error(t): + 'statement : NAME EQUALS error' + line_start, line_end = t.linespan(3) + pos_start, pos_end = t.lexspan(3) + print("Assignment Error at %d:%d to %d:%d" % (line_start,pos_start,line_end,pos_end)) + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + line_start, line_end = t.linespan(2) + pos_start, pos_end = t.lexspan(2) + print("Group at %d:%d to %d:%d" % (line_start,pos_start, line_end, pos_end)) + t[0] = t[2] + +def p_expression_group_error(t): + 'expression : LPAREN error RPAREN' + line_start, line_end = t.linespan(2) + pos_start, pos_end = t.lexspan(2) + print("Syntax error at %d:%d to %d:%d" % (line_start,pos_start, line_end, pos_end)) + t[0] = 0 + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 +parser.parse(""" +a = 3 + +(4*5) + +(a b c) + ++ 6 + 7 +""", tracking=True) + + + + + + diff --git a/ext/ply/test/yacc_error6.py b/ext/ply/test/yacc_error6.py new file mode 100644 index 0000000000..8d0ec85bea --- /dev/null +++ b/ext/ply/test/yacc_error6.py @@ -0,0 +1,80 @@ +# ----------------------------------------------------------------------------- +# yacc_error6.py +# +# Panic mode recovery test +# ----------------------------------------------------------------------------- +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +def p_statements(t): + 'statements : statements statement' + pass + +def p_statements_1(t): + 'statements : statement' + pass + +def p_statement_assign(p): + 'statement : LPAREN NAME EQUALS expression RPAREN' + print("%s=%s" % (p[2],p[4])) + +def p_statement_expr(t): + 'statement : LPAREN expression RPAREN' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_error(p): + if p: + print("Line %d: Syntax error at '%s'" % (p.lineno, p.value)) + # Scan ahead looking for a name token + while True: + tok = parser.token() + if not tok or tok.type == 'RPAREN': + break + if tok: + parser.restart() + return None + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 + +parser.parse(""" +(a = 3 + 4) +(b = 4 + * 5 - 6 + *) +(c = 10 + 11) +""") + + + + + + diff --git a/ext/ply/test/yacc_error7.py b/ext/ply/test/yacc_error7.py new file mode 100644 index 0000000000..fb131beaba --- /dev/null +++ b/ext/ply/test/yacc_error7.py @@ -0,0 +1,80 @@ +# ----------------------------------------------------------------------------- +# yacc_error7.py +# +# Panic mode recovery test using deprecated functionality +# ----------------------------------------------------------------------------- +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +def p_statements(t): + 'statements : statements statement' + pass + +def p_statements_1(t): + 'statements : statement' + pass + +def p_statement_assign(p): + 'statement : LPAREN NAME EQUALS expression RPAREN' + print("%s=%s" % (p[2],p[4])) + +def p_statement_expr(t): + 'statement : LPAREN expression RPAREN' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_error(p): + if p: + print("Line %d: Syntax error at '%s'" % (p.lineno, p.value)) + # Scan ahead looking for a name token + while True: + tok = yacc.token() + if not tok or tok.type == 'RPAREN': + break + if tok: + yacc.restart() + return None + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 + +parser.parse(""" +(a = 3 + 4) +(b = 4 + * 5 - 6 + *) +(c = 10 + 11) +""") + + + + + + diff --git a/ext/ply/test/yacc_prec1.py b/ext/ply/test/yacc_prec1.py index 2ca6afc0b0..99fcd903bd 100644 --- a/ext/ply/test/yacc_prec1.py +++ b/ext/ply/test/yacc_prec1.py @@ -12,8 +12,8 @@ from calclex import tokens # Parsing rules precedence = ( - ('left','+','-'), - ('left','*','/'), + ('left', '+', '-'), + ('left', '*', '/'), ('right','UMINUS'), ) diff --git a/ext/ply/test/yacc_unicode_literals.py b/ext/ply/test/yacc_unicode_literals.py new file mode 100644 index 0000000000..5ae4f5b8a7 --- /dev/null +++ b/ext/ply/test/yacc_unicode_literals.py @@ -0,0 +1,70 @@ +# ----------------------------------------------------------------------------- +# yacc_unicode_literals +# +# Test for unicode literals on Python 2.x +# ----------------------------------------------------------------------------- +from __future__ import unicode_literals + +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + +