diff --git a/ext/ply/ANNOUNCE b/ext/ply/ANNOUNCE index 0a155cec3f..3e582501d8 100644 --- a/ext/ply/ANNOUNCE +++ b/ext/ply/ANNOUNCE @@ -1,13 +1,12 @@ -March 24, 2009 +February 15, 2018 - Announcing : PLY-3.2 (Python Lex-Yacc) + Announcing : PLY-3.11 (Python Lex-Yacc) http://www.dabeaz.com/ply -I'm pleased to announce a significant new update to PLY---a 100% Python -implementation of the common parsing tools lex and yacc. PLY-3.2 adds -compatibility for Python 2.6 and 3.0, provides some new customization -options, and cleans up a lot of internal implementation details. +I'm pleased to announce PLY-3.11--a pure Python implementation of the +common parsing tools lex and yacc. PLY-3.11 is a minor bug fix +release. It supports both Python 2 and Python 3. If you are new to PLY, here are a few highlights: diff --git a/ext/ply/CHANGES b/ext/ply/CHANGES index 9d8b25d5a9..44050072e9 100644 --- a/ext/ply/CHANGES +++ b/ext/ply/CHANGES @@ -1,3 +1,341 @@ +Version 3.11 +--------------------- +02/15/18 beazley + Fixed some minor bugs related to re flags and token order. + Github pull requests #151 and #153. + +02/15/18 beazley + Added a set_lexpos() method to grammar symbols. Github issue #148. + + +04/13/17 beazley + Mostly minor bug fixes and small code cleanups. + +Version 3.10 +--------------------- +01/31/17: beazley + Changed grammar signature computation to not involve hashing + functions. Parts are just combined into a big string. + +10/07/16: beazley + Fixed Issue #101: Incorrect shift-reduce conflict resolution with + precedence specifier. + + PLY was incorrectly resolving shift-reduce conflicts in certain + cases. For example, in the example/calc/calc.py example, you + could trigger it doing this: + + calc > -3 - 4 + 1 (correct answer should be -7) + calc > + + Issue and suggested patch contributed by https://github.com/RomaVis + +Version 3.9 +--------------------- +08/30/16: beazley + Exposed the parser state number as the parser.state attribute + in productions and error functions. For example: + + def p_somerule(p): + ''' + rule : A B C + ''' + print('State:', p.parser.state) + + May address issue #65 (publish current state in error callback). + +08/30/16: beazley + Fixed Issue #88. Python3 compatibility with ply/cpp. + +08/30/16: beazley + Fixed Issue #93. Ply can crash if SyntaxError is raised inside + a production. Not actually sure if the original implementation + worked as documented at all. Yacc has been modified to follow + the spec as outlined in the CHANGES noted for 11/27/07 below. + +08/30/16: beazley + Fixed Issue #97. Failure with code validation when the original + source files aren't present. Validation step now ignores + the missing file. + +08/30/16: beazley + Minor fixes to version numbers. + +Version 3.8 +--------------------- +10/02/15: beazley + Fixed issues related to Python 3.5. Patch contributed by Barry Warsaw. + +Version 3.7 +--------------------- +08/25/15: beazley + Fixed problems when reading table files from pickled data. + +05/07/15: beazley + Fixed regression in handling of table modules if specified as module + objects. See https://github.com/dabeaz/ply/issues/63 + +Version 3.6 +--------------------- +04/25/15: beazley + If PLY is unable to create the 'parser.out' or 'parsetab.py' files due + to permission issues, it now just issues a warning message and + continues to operate. This could happen if a module using PLY + is installed in a funny way where tables have to be regenerated, but + for whatever reason, the user doesn't have write permission on + the directory where PLY wants to put them. + +04/24/15: beazley + Fixed some issues related to use of packages and table file + modules. Just to emphasize, PLY now generates its special + files such as 'parsetab.py' and 'lextab.py' in the *SAME* + directory as the source file that uses lex() and yacc(). + + If for some reason, you want to change the name of the table + module, use the tabmodule and lextab options: + + lexer = lex.lex(lextab='spamlextab') + parser = yacc.yacc(tabmodule='spamparsetab') + + If you specify a simple name as shown, the module will still be + created in the same directory as the file invoking lex() or yacc(). + If you want the table files to be placed into a different package, + then give a fully qualified package name. For example: + + lexer = lex.lex(lextab='pkgname.files.lextab') + parser = yacc.yacc(tabmodule='pkgname.files.parsetab') + + For this to work, 'pkgname.files' must already exist as a valid + Python package (i.e., the directories must already exist and be + set up with the proper __init__.py files, etc.). + +Version 3.5 +--------------------- +04/21/15: beazley + Added support for defaulted_states in the parser. A + defaulted_state is a state where the only legal action is a + reduction of a single grammar rule across all valid input + tokens. For such states, the rule is reduced and the + reading of the next lookahead token is delayed until it is + actually needed at a later point in time. + + This delay in consuming the next lookahead token is a + potentially important feature in advanced parsing + applications that require tight interaction between the + lexer and the parser. For example, a grammar rule change + modify the lexer state upon reduction and have such changes + take effect before the next input token is read. + + *** POTENTIAL INCOMPATIBILITY *** + One potential danger of defaulted_states is that syntax + errors might be deferred to a a later point of processing + than where they were detected in past versions of PLY. + Thus, it's possible that your error handling could change + slightly on the same inputs. defaulted_states do not change + the overall parsing of the input (i.e., the same grammar is + accepted). + + If for some reason, you need to disable defaulted states, + you can do this: + + parser = yacc.yacc() + parser.defaulted_states = {} + +04/21/15: beazley + Fixed debug logging in the parser. It wasn't properly reporting goto states + on grammar rule reductions. + +04/20/15: beazley + Added actions to be defined to character literals (Issue #32). For example: + + literals = [ '{', '}' ] + + def t_lbrace(t): + r'\{' + # Some action + t.type = '{' + return t + + def t_rbrace(t): + r'\}' + # Some action + t.type = '}' + return t + +04/19/15: beazley + Import of the 'parsetab.py' file is now constrained to only consider the + directory specified by the outputdir argument to yacc(). If not supplied, + the import will only consider the directory in which the grammar is defined. + This should greatly reduce problems with the wrong parsetab.py file being + imported by mistake. For example, if it's found somewhere else on the path + by accident. + + *** POTENTIAL INCOMPATIBILITY *** It's possible that this might break some + packaging/deployment setup if PLY was instructed to place its parsetab.py + in a different location. You'll have to specify a proper outputdir= argument + to yacc() to fix this if needed. + +04/19/15: beazley + Changed default output directory to be the same as that in which the + yacc grammar is defined. If your grammar is in a file 'calc.py', + then the parsetab.py and parser.out files should be generated in the + same directory as that file. The destination directory can be changed + using the outputdir= argument to yacc(). + +04/19/15: beazley + Changed the parsetab.py file signature slightly so that the parsetab won't + regenerate if created on a different major version of Python (ie., a + parsetab created on Python 2 will work with Python 3). + +04/16/15: beazley + Fixed Issue #44 call_errorfunc() should return the result of errorfunc() + +04/16/15: beazley + Support for versions of Python <2.7 is officially dropped. PLY may work, but + the unit tests requires Python 2.7 or newer. + +04/16/15: beazley + Fixed bug related to calling yacc(start=...). PLY wasn't regenerating the + table file correctly for this case. + +04/16/15: beazley + Added skipped tests for PyPy and Java. Related to use of Python's -O option. + +05/29/13: beazley + Added filter to make unit tests pass under 'python -3'. + Reported by Neil Muller. + +05/29/13: beazley + Fixed CPP_INTEGER regex in ply/cpp.py (Issue 21). + Reported by @vbraun. + +05/29/13: beazley + Fixed yacc validation bugs when from __future__ import unicode_literals + is being used. Reported by Kenn Knowles. + +05/29/13: beazley + Added support for Travis-CI. Contributed by Kenn Knowles. + +05/29/13: beazley + Added a .gitignore file. Suggested by Kenn Knowles. + +05/29/13: beazley + Fixed validation problems for source files that include a + different source code encoding specifier. Fix relies on + the inspect module. Should work on Python 2.6 and newer. + Not sure about older versions of Python. + Contributed by Michael Droettboom + +05/21/13: beazley + Fixed unit tests for yacc to eliminate random failures due to dict hash value + randomization in Python 3.3 + Reported by Arfrever + +10/15/12: beazley + Fixed comment whitespace processing bugs in ply/cpp.py. + Reported by Alexei Pososin. + +10/15/12: beazley + Fixed token names in ply/ctokens.py to match rule names. + Reported by Alexei Pososin. + +04/26/12: beazley + Changes to functions available in panic mode error recover. In previous versions + of PLY, the following global functions were available for use in the p_error() rule: + + yacc.errok() # Reset error state + yacc.token() # Get the next token + yacc.restart() # Reset the parsing stack + + The use of global variables was problematic for code involving multiple parsers + and frankly was a poor design overall. These functions have been moved to methods + of the parser instance created by the yacc() function. You should write code like + this: + + def p_error(p): + ... + parser.errok() + + parser = yacc.yacc() + + *** POTENTIAL INCOMPATIBILITY *** The original global functions now issue a + DeprecationWarning. + +04/19/12: beazley + Fixed some problems with line and position tracking and the use of error + symbols. If you have a grammar rule involving an error rule like this: + + def p_assignment_bad(p): + '''assignment : location EQUALS error SEMI''' + ... + + You can now do line and position tracking on the error token. For example: + + def p_assignment_bad(p): + '''assignment : location EQUALS error SEMI''' + start_line = p.lineno(3) + start_pos = p.lexpos(3) + + If the trackng=True option is supplied to parse(), you can additionally get + spans: + + def p_assignment_bad(p): + '''assignment : location EQUALS error SEMI''' + start_line, end_line = p.linespan(3) + start_pos, end_pos = p.lexspan(3) + + Note that error handling is still a hairy thing in PLY. This won't work + unless your lexer is providing accurate information. Please report bugs. + Suggested by a bug reported by Davis Herring. + +04/18/12: beazley + Change to doc string handling in lex module. Regex patterns are now first + pulled from a function's .regex attribute. If that doesn't exist, then + .doc is checked as a fallback. The @TOKEN decorator now sets the .regex + attribute of a function instead of its doc string. + Changed suggested by Kristoffer Ellersgaard Koch. + +04/18/12: beazley + Fixed issue #1: Fixed _tabversion. It should use __tabversion__ instead of __version__ + Reported by Daniele Tricoli + +04/18/12: beazley + Fixed issue #8: Literals empty list causes IndexError + Reported by Walter Nissen. + +04/18/12: beazley + Fixed issue #12: Typo in code snippet in documentation + Reported by florianschanda. + +04/18/12: beazley + Fixed issue #10: Correctly escape t_XOREQUAL pattern. + Reported by Andy Kittner. + +Version 3.4 +--------------------- +02/17/11: beazley + Minor patch to make cpp.py compatible with Python 3. Note: This + is an experimental file not currently used by the rest of PLY. + +02/17/11: beazley + Fixed setup.py trove classifiers to properly list PLY as + Python 3 compatible. + +01/02/11: beazley + Migration of repository to github. + +Version 3.3 +----------------------------- +08/25/09: beazley + Fixed issue 15 related to the set_lineno() method in yacc. Reported by + mdsherry. + +08/25/09: beazley + Fixed a bug related to regular expression compilation flags not being + properly stored in lextab.py files created by the lexer when running + in optimize mode. Reported by Bruce Frederiksen. + Version 3.2 ----------------------------- diff --git a/ext/ply/MANIFEST.in b/ext/ply/MANIFEST.in new file mode 100644 index 0000000000..0d37431b0b --- /dev/null +++ b/ext/ply/MANIFEST.in @@ -0,0 +1,8 @@ +recursive-include example * +recursive-include doc * +recursive-include test * +include ANNOUNCE +include README.md +include CHANGES +include TODO +global-exclude *.pyc diff --git a/ext/ply/PKG-INFO b/ext/ply/PKG-INFO new file mode 100644 index 0000000000..f2d8c8ae08 --- /dev/null +++ b/ext/ply/PKG-INFO @@ -0,0 +1,23 @@ +Metadata-Version: 1.1 +Name: ply +Version: 3.11 +Summary: Python Lex & Yacc +Home-page: http://www.dabeaz.com/ply/ +Author: David Beazley +Author-email: dave@dabeaz.com +License: BSD +Description-Content-Type: UNKNOWN +Description: + PLY is yet another implementation of lex and yacc for Python. Some notable + features include the fact that its implemented entirely in Python and it + uses LALR(1) parsing which is efficient and well suited for larger grammars. + + PLY provides most of the standard lex/yacc features including support for empty + productions, precedence rules, error recovery, and support for ambiguous grammars. + + PLY is extremely easy to use and provides very extensive error checking. + It is compatible with both Python 2 and Python 3. + +Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 2 diff --git a/ext/ply/README b/ext/ply/README.md similarity index 64% rename from ext/ply/README rename to ext/ply/README.md index d3b785fa22..05df32a5b9 100644 --- a/ext/ply/README +++ b/ext/ply/README.md @@ -1,6 +1,8 @@ -PLY (Python Lex-Yacc) Version 3.2 +# PLY (Python Lex-Yacc) Version 3.11 -Copyright (C) 2001-2009, +[![Build Status](https://travis-ci.org/dabeaz/ply.svg?branch=master)](https://travis-ci.org/dabeaz/ply) + +Copyright (C) 2001-2018 David M. Beazley (Dabeaz LLC) All rights reserved. @@ -96,7 +98,7 @@ A simple example is found at the end of this document Requirements ============ -PLY requires the use of Python 2.2 or greater. However, you should +PLY requires the use of Python 2.6 or greater. However, you should use the latest Python release if possible. It should work on just about any platform. PLY has been tested with both CPython and Jython. It also seems to work with IronPython. @@ -112,7 +114,11 @@ book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown may also be useful. -A Google group for PLY can be found at +The GitHub page for PLY can be found at: + + https://github.com/dabeaz/ply + +An old and relatively inactive discussion group for PLY is found at: http://groups.google.com/group/ply-hack @@ -130,7 +136,7 @@ and testing a revised LALR(1) implementation for PLY-2.0. Special Note for PLY-3.0 ======================== PLY-3.0 the first PLY release to support Python 3. However, backwards -compatibility with Python 2.2 is still preserved. PLY provides dual +compatibility with Python 2.6 is still preserved. PLY provides dual Python 2/3 compatibility by restricting its implementation to a common subset of basic language features. You should not convert PLY using 2to3--it is not necessary and may in fact break the implementation. @@ -141,109 +147,109 @@ Example Here is a simple example showing a PLY implementation of a calculator with variables. -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. -# ----------------------------------------------------------------------------- + # ----------------------------------------------------------------------------- + # calc.py + # + # A simple calculator with variables. + # ----------------------------------------------------------------------------- -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) + tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) -# Tokens + # Tokens -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + t_PLUS = r'\+' + t_MINUS = r'-' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t + def t_NUMBER(t): + r'\d+' + t.value = int(t.value) + return t -# Ignored characters -t_ignore = " \t" + # Ignored characters + t_ignore = " \t" -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() + def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") -# Precedence rules for the arithmetic operators -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) + def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) -# dictionary of names (for storing variables) -names = { } + # Build the lexer + import ply.lex as lex + lex.lex() -def p_statement_assign(p): - 'statement : NAME EQUALS expression' - names[p[1]] = p[3] + # Precedence rules for the arithmetic operators + precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) -def p_statement_expr(p): - 'statement : expression' - print p[1] + # dictionary of names (for storing variables) + names = { } -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] + def p_statement_assign(p): + 'statement : NAME EQUALS expression' + names[p[1]] = p[3] -def p_expression_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] + def p_statement_expr(p): + 'statement : expression' + print(p[1]) -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] + def p_expression_binop(p): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if p[2] == '+' : p[0] = p[1] + p[3] + elif p[2] == '-': p[0] = p[1] - p[3] + elif p[2] == '*': p[0] = p[1] * p[3] + elif p[2] == '/': p[0] = p[1] / p[3] -def p_expression_number(p): - 'expression : NUMBER' - p[0] = p[1] + def p_expression_uminus(p): + 'expression : MINUS expression %prec UMINUS' + p[0] = -p[2] -def p_expression_name(p): - 'expression : NAME' - try: - p[0] = names[p[1]] - except LookupError: - print "Undefined name '%s'" % p[1] - p[0] = 0 + def p_expression_group(p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] -def p_error(p): - print "Syntax error at '%s'" % p.value + def p_expression_number(p): + 'expression : NUMBER' + p[0] = p[1] -import ply.yacc as yacc -yacc.yacc() + def p_expression_name(p): + 'expression : NAME' + try: + p[0] = names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - yacc.parse(s) + def p_error(p): + print("Syntax error at '%s'" % p.value) + + import ply.yacc as yacc + yacc.yacc() + + while True: + try: + s = raw_input('calc > ') # use input() on Python 3 + except EOFError: + break + yacc.parse(s) Bug Reports and Patches @@ -252,12 +258,10 @@ My goal with PLY is to simply have a decent lex/yacc implementation for Python. As a general rule, I don't spend huge amounts of time working on it unless I receive very specific bug reports and/or patches to fix problems. I also try to incorporate submitted feature -requests and enhancements into each new version. To contact me about -bugs and/or new features, please send email to dave@dabeaz.com. - -In addition there is a Google group for discussing PLY related issues at - - http://groups.google.com/group/ply-hack +requests and enhancements into each new version. Please visit the PLY +github page at https://github.com/dabeaz/ply to submit issues and pull +requests. To contact me about bugs and/or new features, please send +email to dave@dabeaz.com. -- Dave diff --git a/ext/ply/doc/internal.html b/ext/ply/doc/internal.html index 3fabfe28c0..57e87dfc7e 100644 --- a/ext/ply/doc/internal.html +++ b/ext/ply/doc/internal.html @@ -12,7 +12,7 @@ dave@dabeaz.com

-PLY Version: 3.0 +PLY Version: 3.11

diff --git a/ext/ply/doc/ply.html b/ext/ply/doc/ply.html index 3345e79294..b35ba44611 100644 --- a/ext/ply/doc/ply.html +++ b/ext/ply/doc/ply.html @@ -12,13 +12,13 @@ dave@dabeaz.com

-PLY Version: 3.0 +PLY Version: 3.11

  • Parsing basics
  • Yacc @@ -49,29 +50,31 @@ dave@dabeaz.com
  • An example
  • Combining Grammar Rule Functions
  • Character Literals -
  • Empty Productions +
  • Empty Productions
  • Changing the starting symbol
  • Dealing With Ambiguous Grammars -
  • The parser.out file +
  • The parser.out file
  • Syntax Error Handling
  • Line Number and Position Tracking
  • AST Construction -
  • Embedded Actions +
  • Embedded Actions
  • Miscellaneous Yacc Notes
  • Multiple Parsers and Lexers -
  • Using Python's Optimized Mode +
  • Using Python's Optimized Mode
  • Advanced Debugging +
  • Packaging Advice
  • Where to go from here?
  • @@ -79,7 +82,10 @@ dave@dabeaz.com
    -

    1. Preface and Requirements

    + + + +

    1. Preface and Requirements

    @@ -90,12 +96,8 @@ into a big development project with PLY.

    -PLY-3.0 is compatible with both Python 2 and Python 3. Be aware that -Python 3 support is new and has not been extensively tested (although -all of the examples and unit tests pass under Python 3.0). If you are -using Python 2, you should try to use Python 2.4 or newer. Although PLY -works with versions as far back as Python 2.2, some of its optional features -require more modern library modules. +PLY-3.5 is compatible with both Python 2 and Python 3. If you are using +Python 2, you have to use Python 2.6 or newer.

    2. Introduction

    @@ -111,19 +113,7 @@ relatively straightforward to use PLY.

    Early versions of PLY were developed to support an Introduction to -Compilers Course I taught in 2001 at the University of Chicago. In this course, -students built a fully functional compiler for a simple Pascal-like -language. Their compiler, implemented entirely in Python, had to -include lexical analysis, parsing, type checking, type inference, -nested scoping, and code generation for the SPARC processor. -Approximately 30 different compiler implementations were completed in -this course. Most of PLY's interface and operation has been influenced by common -usability problems encountered by students. Since 2001, PLY has -continued to be improved as feedback has been received from users. -PLY-3.0 represents a major refactoring of the original implementation -with an eye towards future enhancements. - -

    +Compilers Course I taught in 2001 at the University of Chicago. Since PLY was primarily developed as an instructional tool, you will find it to be fairly picky about token and grammar rule specification. In part, this @@ -137,10 +127,10 @@ to be a parsing framework. Instead, you will find a bare-bones, yet fully capable lex/yacc implementation written entirely in Python.

    -The rest of this document assumes that you are somewhat familar with +The rest of this document assumes that you are somewhat familiar with parsing theory, syntax directed translation, and the use of compiler construction tools such as lex and yacc in other programming -languages. If you are unfamilar with these topics, you will probably +languages. If you are unfamiliar with these topics, you will probably want to consult an introductory text such as "Compilers: Principles, Techniques, and Tools", by Aho, Sethi, and Ullman. O'Reilly's "Lex and Yacc" by John Levine may also be handy. In fact, the O'Reilly book can be @@ -149,13 +139,14 @@ used as a reference for PLY as the concepts are virtually identical.

    3. PLY Overview

    +

    PLY consists of two separate modules; lex.py and yacc.py, both of which are found in a Python package called ply. The lex.py module is used to break input text into a collection of tokens specified by a collection of regular expression rules. yacc.py is used to recognize language syntax that has -been specified in the form of a context free grammar. yacc.py uses LR parsing and generates its parsing tables -using either the LALR(1) (the default) or SLR table generation algorithms. +been specified in the form of a context free grammar. +

    The two tools are meant to work together. Specifically, @@ -171,7 +162,7 @@ simple one-pass compilers. Like its Unix counterpart, yacc.py provides most of the features you expect including extensive error checking, grammar validation, support for empty productions, error tokens, and ambiguity -resolution via precedence rules. In fact, everything that is possible in traditional yacc +resolution via precedence rules. In fact, almost everything that is possible in traditional yacc should be supported in PLY.

    @@ -282,7 +273,7 @@ t_ignore = ' \t' # Error handling rule def t_error(t): - print "Illegal character '%s'" % t.value[0] + print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lexer @@ -310,8 +301,9 @@ lexer.input(data) # Tokenize while True: tok = lexer.token() - if not tok: break # No more input - print tok + if not tok: + break # No more input + print(tok) @@ -338,7 +330,7 @@ Lexers also support the iteration protocol. So, you can write the above loop

     for tok in lexer:
    -    print tok
    +    print(tok)
     
    @@ -353,8 +345,9 @@ accessing these attributes: # Tokenize while True: tok = lexer.token() - if not tok: break # No more input - print tok.type, tok.value, tok.line, tok.lexpos + if not tok: + break # No more input + print(tok.type, tok.value, tok.lineno, tok.lexpos) @@ -367,10 +360,12 @@ token relative to the start of the input text.

    4.2 The tokens list

    +

    All lexers must provide a list tokens that defines all of the possible token names that can be produced by the lexer. This list is always required and is used to perform a variety of validation checks. The tokens list is also used by the yacc.py module to identify terminals. +

    In the example, the following code specified the token names: @@ -392,7 +387,7 @@ tokens = (

    4.3 Specification of tokens

    -Each token is specified by writing a regular expression rule. Each of these rules are +Each token is specified by writing a regular expression rule compatible with Python's re module. Each of these rules are defined by making declarations with a special prefix t_ to indicate that it defines a token. For simple tokens, the regular expression can be specified as strings such as this (note: Python raw strings are used since they are the @@ -429,8 +424,17 @@ when it is done, the resulting token should be returned. If no value is returne function, the token is simply discarded and the next token read.

    -Internally, lex.py uses the re module to do its patten matching. When building the master regular expression, +Internally, lex.py uses the re module to do its pattern matching. Patterns are compiled +using the re.VERBOSE flag which can be used to help readability. However, be aware that unescaped +whitespace is ignored and comments are allowed in this mode. If your pattern involves whitespace, make sure you +use \s. If you need to match the # character, use [#]. +

    + +

    +When building the master regular expression, rules are added in the following order: +

    +

    1. All tokens defined by functions are added in the same order as they appear in the lexer file. @@ -548,21 +552,18 @@ Within the rule, the lineno attribute of the underlying lexer t.lex After the line number is updated, the token is simply discarded since nothing is returned.

      -lex.py does not perform and kind of automatic column tracking. However, it does record positional +lex.py does not perform any kind of automatic column tracking. However, it does record positional information related to each token in the lexpos attribute. Using this, it is usually possible to compute column information as a separate step. For instance, just count backwards until you reach a newline.

      -# Compute column. 
      +# Compute column.
       #     input is the input text string
       #     token is a token instance
      -def find_column(input,token):
      -    last_cr = input.rfind('\n',0,token.lexpos)
      -    if last_cr < 0:
      -	last_cr = 0
      -    column = (token.lexpos - last_cr) + 1
      -    return column
      +def find_column(input, token):
      +    line_start = input.rfind('\n', 0, token.lexpos) + 1
      +    return (token.lexpos - line_start) + 1
       
      @@ -580,6 +581,15 @@ Although it is possible to define a regular expression rule for whitespace in a similar to t_newline(), the use of t_ignore provides substantially better lexing performance because it is handled as a special case and is checked in a much more efficient manner than the normal regular expression rules. +

      + +

      +The characters given in t_ignore are not ignored when such characters are part of +other regular expression patterns. For example, if you had a rule to capture quoted text, +that pattern can include the ignored characters (which will be captured in the normal way). The +main purpose of t_ignore is to ignore whitespace and other padding between the +tokens that you actually want to parse. +

      4.8 Literal characters

      @@ -604,14 +614,38 @@ literals = "+-*/" A literal character is simply a single character that is returned "as is" when encountered by the lexer. Literals are checked after all of the defined regular expression rules. Thus, if a rule starts with one of the literal characters, it will always take precedence. +

      When a literal token is returned, both its type and value attributes are set to the character itself. For example, '+'. +

      + +

      +It's possible to write token functions that perform additional actions +when literals are matched. However, you'll need to set the token type +appropriately. For example: +

      + +
      +
      +literals = [ '{', '}' ]
      +
      +def t_lbrace(t):
      +    r'\{'
      +    t.type = '{'      # Set token type to the expected literal
      +    return t
      +
      +def t_rbrace(t):
      +    r'\}'
      +    t.type = '}'      # Set token type to the expected literal
      +    return t
      +
      +

      4.9 Error handling

      -Finally, the t_error() +The t_error() function is used to handle lexing errors that occur when illegal characters are detected. In this case, the t.value attribute contains the rest of the input string that has not been tokenized. In the example, the error function @@ -621,49 +655,67 @@ was defined as follows:

       # Error handling rule
       def t_error(t):
      -    print "Illegal character '%s'" % t.value[0]
      +    print("Illegal character '%s'" % t.value[0])
           t.lexer.skip(1)
       
      In this case, we simply print the offending character and skip ahead one character by calling t.lexer.skip(1). -

      4.10 Building and using the lexer

      +

      4.10 EOF Handling

      -To build the lexer, the function lex.lex() is used. This function -uses Python reflection (or introspection) to read the the regular expression rules +The t_eof() function is used to handle an end-of-file (EOF) condition in the input. As input, it +receives a token type 'eof' with the lineno and lexpos attributes set appropriately. +The main use of this function is provide more input to the lexer so that it can continue to parse. Here is an +example of how this works: +

      + +
      +
      +# EOF handling rule
      +def t_eof(t):
      +    # Get more input (Example)
      +    more = raw_input('... ')
      +    if more:
      +        self.lexer.input(more)
      +        return self.lexer.token()
      +    return None
      +
      +
      + +

      +The EOF function should return the next available token (by calling self.lexer.token()) or None to +indicate no more data. Be aware that setting more input with the self.lexer.input() method does +NOT reset the lexer state or the lineno attribute used for position tracking. The lexpos +attribute is reset so be aware of that if you're using it in error reporting. +

      + +

      4.11 Building and using the lexer

      + + +

      +To build the lexer, the function lex.lex() is used. For example:

      + +
      +
      +lexer = lex.lex()
      +
      +
      + +

      This function +uses Python reflection (or introspection) to read the regular expression rules out of the calling context and build the lexer. Once the lexer has been built, two methods can be used to control the lexer. - +

      • lexer.input(data). Reset the lexer and store a new input string.
      • lexer.token(). Return the next token. Returns a special LexToken instance on success or None if the end of the input text has been reached.
      -The preferred way to use PLY is to invoke the above methods directly on the lexer object returned by the -lex() function. The legacy interface to PLY involves module-level functions lex.input() and lex.token(). -For example: - -
      -
      -lex.lex()
      -lex.input(sometext)
      -while 1:
      -    tok = lex.token()
      -    if not tok: break
      -    print tok
      -
      -
      - -

      -In this example, the module-level functions lex.input() and lex.token() are bound to the input() -and token() methods of the last lexer created by the lex module. This interface may go away at some point so -it's probably best not to use it. - -

      4.11 The @TOKEN decorator

      +

      4.12 The @TOKEN decorator

      In some applications, you may want to define build tokens from as a series of @@ -695,22 +747,11 @@ def t_ID(t): -This will attach identifier to the docstring for t_ID() allowing lex.py to work normally. An alternative -approach this problem is to set the docstring directly like this: +

      +This will attach identifier to the docstring for t_ID() allowing lex.py to work normally. +

      -
      -
      -def t_ID(t):
      -    ...
      -
      -t_ID.__doc__ = identifier
      -
      -
      - -NOTE: Use of @TOKEN requires Python-2.4 or newer. If you're concerned about backwards compatibility with older -versions of Python, use the alternative approach of setting the docstring directly. - -

      4.12 Optimized mode

      +

      4.13 Optimized mode

      For improved performance, it may be desirable to use Python's @@ -727,8 +768,9 @@ lexer = lex.lex(optimize=1) Next, run Python in its normal operating mode. When you do -this, lex.py will write a file called lextab.py to -the current directory. This file contains all of the regular +this, lex.py will write a file called lextab.py in +the same directory as the module containing the lexer specification. +This file contains all of the regular expression rules and tables used during lexing. On subsequent executions, lextab.py will simply be imported to build the lexer. This @@ -736,7 +778,8 @@ approach substantially improves the startup time of the lexer and it works in Python's optimized mode.

      -To change the name of the lexer-generated file, use the lextab keyword argument. For example: +To change the name of the lexer-generated module, use the lextab keyword argument. For example: +

      @@ -747,7 +790,7 @@ lexer = lex.lex(optimize=1,lextab="footab")
       When running in optimized mode, it is important to note that lex disables most error checking.  Thus, this is really only recommended
       if you're sure everything is working correctly and you're ready to start releasing production code.
       
      -

      4.13 Debugging

      +

      4.14 Debugging

      For the purpose of debugging, you can run lex() in a debugging mode as follows: @@ -779,7 +822,7 @@ if __name__ == '__main__': Please refer to the "Debugging" section near the end for some more advanced details of debugging. -

      4.14 Alternative specification of lexers

      +

      4.15 Alternative specification of lexers

      As shown in the example, lexers are specified all within one Python module. If you want to @@ -830,7 +873,7 @@ t_ignore = ' \t' # Error handling rule def t_error(t): - print "Illegal character '%s'" % t.value[0] + print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1)
      @@ -860,7 +903,7 @@ The module option can also be used to define lexers from instances of a
       import ply.lex as lex
       
      -class MyLexer:
      +class MyLexer(object):
           # List of token names.   This is always required
           tokens = (
              'NUMBER',
      @@ -897,7 +940,7 @@ class MyLexer:
       
           # Error handling rule
           def t_error(self,t):
      -        print "Illegal character '%s'" % t.value[0]
      +        print("Illegal character '%s'" % t.value[0])
               t.lexer.skip(1)
       
           # Build the lexer
      @@ -908,9 +951,10 @@ class MyLexer:
           def test(self,data):
               self.lexer.input(data)
               while True:
      -             tok = lexer.token()
      -             if not tok: break
      -             print tok
      +             tok = self.lexer.token()
      +             if not tok: 
      +                 break
      +             print(tok)
       
       # Build the lexer and try it out
       m = MyLexer()
      @@ -928,7 +972,7 @@ PLY only works properly if the lexer actions are defined by bound-methods.
       When using the module option to lex(), PLY collects symbols
       from the underlying object using the dir() function. There is no
       direct access to the __dict__ attribute of the object supplied as a 
      -module value.
      +module value. 

      Finally, if you want to keep things nicely encapsulated, but don't want to use a @@ -974,7 +1018,7 @@ def MyLexer(): # Error handling rule def t_error(t): - print "Illegal character '%s'" % t.value[0] + print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lexer from my environment and return it @@ -982,8 +1026,13 @@ def MyLexer():

      +

      +Important note: If you are defining a lexer using a class or closure, be aware that PLY still requires you to only +define a single lexer per module (source file). There are extensive validation/error checking parts of the PLY that +may falsely report error messages if you don't follow this rule. +

      -

      4.15 Maintaining state

      +

      4.16 Maintaining state

      In your lexer, you may want to maintain a variety of state @@ -1033,7 +1082,7 @@ Just to put your mind at some ease, all internal attributes of the lexer (with the exception of lineno) have names that are prefixed by lex (e.g., lexdata,lexpos, etc.). Thus, it is perfectly safe to store attributes in the lexer that -don't have names starting with that prefix or a name that conlicts with one of the +don't have names starting with that prefix or a name that conflicts with one of the predefined methods (e.g., input(), token(), etc.).

      @@ -1080,7 +1129,7 @@ def MyLexer(): -

      4.16 Lexer cloning

      +

      4.17 Lexer cloning

      @@ -1105,7 +1154,7 @@ cloned lexers could be used to handle different input files.

      Creating a clone is different than calling lex.lex() in that -PLY doesn't regenerate any of the internal tables or regular expressions. So, +PLY doesn't regenerate any of the internal tables or regular expressions.

      Special considerations need to be made when cloning lexers that also @@ -1129,7 +1178,7 @@ important to emphasize that clone() is only meant to create a new lexer that reuses the regular expressions and environment of another lexer. If you need to make a totally new copy of a lexer, then call lex() again. -

      4.17 Internal lexer state

      +

      4.18 Internal lexer state

      A Lexer object lexer has a number of internal attributes that may be useful in certain @@ -1167,7 +1216,7 @@ current token. If you have written a regular expression that contains named gro Note: This attribute is only updated when tokens are defined and processed by functions. -

      4.18 Conditional lexing and start conditions

      +

      4.19 Conditional lexing and start conditions

      In advanced parsing applications, it may be useful to have different @@ -1178,7 +1227,7 @@ a series of different states. Each state can have its own tokens, lexing rules, and so forth. The implementation is based largely on the "start condition" feature of GNU flex. Details of this can be found at http://www.gnu.org/software/flex/manual/html_chapter/flex_11.html.. +href="http://flex.sourceforge.net/manual/Start-Conditions.html">http://flex.sourceforge.net/manual/Start-Conditions.html.

      To define a new lexing state, it must first be declared. This is done by including a "states" declaration in your @@ -1244,8 +1293,8 @@ t_INITIAL_NUMBER = r'\d+'

      -States are also associated with the special t_ignore and t_error() declarations. For example, if a state treats -these differently, you can declare: +States are also associated with the special t_ignore, t_error(), and t_eof() declarations. For example, if a state treats +these differently, you can declare:

      @@ -1336,7 +1385,7 @@ def t_ccode_rbrace(t):
       
       # C or C++ comment (ignore)    
       def t_ccode_comment(t):
      -    r'(/\*(.|\n)*?*/)|(//.*)'
      +    r'(/\*(.|\n)*?\*/)|(//.*)'
           pass
       
       # C string
      @@ -1366,13 +1415,16 @@ However, if the closing right brace is encountered, the rule t_ccode_rbrace<
       position), stores it, and returns a token 'CCODE' containing all of that text.  When returning the token, the lexing state is restored back to its
       initial state.
       
      -

      4.19 Miscellaneous Issues

      +

      4.20 Miscellaneous Issues

    2. The lexer requires input to be supplied as a single input string. Since most machines have more than enough memory, this rarely presents a performance concern. However, it means that the lexer currently can't be used with streaming data -such as open files or sockets. This limitation is primarily a side-effect of using the re module. +such as open files or sockets. This limitation is primarily a side-effect of using the re module. You might be +able to work around this by implementing an appropriate def t_eof() end-of-file handling rule. The main complication +here is that you'll probably need to ensure that data is fed to the lexer in a way so that it doesn't split in in the middle +of a token.

    3. The lexer should work properly with both Unicode strings given as token and pattern matching rules as @@ -1383,10 +1435,13 @@ well as for input text.
      -lex.lex(reflags=re.UNICODE)
      +lex.lex(reflags=re.UNICODE | re.VERBOSE)
       
      +Note: by default, reflags is set to re.VERBOSE. If you provide +your own flags, you may need to include this for PLY to preserve its normal behavior. +

    4. Since the lexer is written entirely in Python, its performance is largely determined by that of the Python re module. Although @@ -1403,7 +1458,8 @@ it only needs to conform to the following requirements:
      • It must provide a token() method that returns the next token or None if no more tokens are available. -
      • The token() method must return an object tok that has type and value attributes. +
      • The token() method must return an object tok that has type and value attributes. If +line number tracking is being used, then the token should also define a lineno attribute.

      5. Parsing basics

      @@ -1595,7 +1651,7 @@ def p_factor_expr(p): # Error rule for syntax errors def p_error(p): - print "Syntax error in input!" + print("Syntax error in input!") # Build the parser parser = yacc.yacc() @@ -1607,7 +1663,7 @@ while True: break if not s: continue result = parser.parse(s) - print result + print(result)
    5. @@ -1677,15 +1733,25 @@ calc > +

      Since table construction is relatively expensive (especially for large -grammars), the resulting parsing table is written to the current -directory in a file called parsetab.py. In addition, a +grammars), the resulting parsing table is written to +a file called parsetab.py. In addition, a debugging file called parser.out is created. On subsequent executions, yacc will reload the table from parsetab.py unless it has detected a change in the underlying grammar (in which case the tables and parsetab.py file are -regenerated). Note: The names of parser output files can be changed -if necessary. See the PLY Reference for details. +regenerated). Both of these files are written to the same directory +as the module in which the parser is specified. +The name of the parsetab module can be changed using the +tabmodule keyword argument to yacc(). For example: +

      + +
      +
      +parser = yacc.yacc(tabmodule='fooparsetab')
      +
      +

      If any errors are detected in your grammar specification, yacc.py will produce @@ -1824,7 +1890,7 @@ literals = ['+','-','*','/' ] Character literals are limited to a single character. Thus, it is not legal to specify literals such as '<=' or '=='. For this, use the normal lexing rules (e.g., define a rule such as t_EQ = r'=='). -

      6.4 Empty Productions

      +

      6.4 Empty Productions

      yacc.py can handle empty productions by defining a rule like this: @@ -1880,7 +1946,7 @@ an argument to yacc(). For example:
      -yacc.yacc(start='foo')
      +parser = yacc.yacc(start='foo')
       
      @@ -2060,7 +2126,7 @@ of UMINUS in the precedence specifier.

      At first, the use of UMINUS in this example may appear very confusing. -UMINUS is not an input token or a grammer rule. Instead, you should +UMINUS is not an input token or a grammar rule. Instead, you should think of it as the name of a special marker in the precedence table. When you use the %prec qualifier, you're simply telling yacc that you want the precedence of the expression to be the same as for this special marker instead of the usual precedence. @@ -2123,7 +2189,7 @@ the rule assignment : ID EQUALS expression.

      It should be noted that reduce/reduce conflicts are notoriously -difficult to spot simply looking at the input grammer. When a +difficult to spot simply looking at the input grammar. When a reduce/reduce conflict occurs, yacc() will try to help by printing a warning message such as this: @@ -2142,7 +2208,7 @@ the contents of the parser.out debugging file with an appropriately high level of caffeination. -

      6.7 The parser.out file

      +

      6.7 The parser.out file

      Tracking down shift/reduce and reduce/reduce conflicts is one of the finer pleasures of using an LR @@ -2448,8 +2514,9 @@ When a syntax error occurs, yacc.py performs the following steps:
      1. On the first occurrence of an error, the user-defined p_error() function -is called with the offending token as an argument. However, if the syntax error is due to -reaching the end-of-file, p_error() is called with an argument of None. +is called with the offending token as an argument. However, if the syntax error is due to +reaching the end-of-file, p_error() is called with an + argument of None. Afterwards, the parser enters an "error-recovery" mode in which it will not make future calls to p_error() until it has successfully shifted at least 3 tokens onto the parsing stack. @@ -2495,7 +2562,7 @@ To account for the possibility of a bad expression, you might write an additiona
         def p_statement_print_error(p):
              'statement : PRINT error SEMI'
        -     print "Syntax error in print statement. Bad expression"
        +     print("Syntax error in print statement. Bad expression")
         
         
        @@ -2519,7 +2586,7 @@ on the right in an error rule. For example:
         def p_statement_print_error(p):
             'statement : PRINT error'
        -    print "Syntax error in print statement. Bad expression"
        +    print("Syntax error in print statement. Bad expression")
         
        @@ -2541,12 +2608,17 @@ parser in its initial state.
         def p_error(p):
        -    print "Whoa. You are seriously hosed."
        +    print("Whoa. You are seriously hosed.")
        +    if not p:
        +        print("End of File!")
        +        return
        +
             # Read ahead looking for a closing '}'
        -    while 1:
        -        tok = yacc.token()             # Get the next token
        -        if not tok or tok.type == 'RBRACE': break
        -    yacc.restart()
        +    while True:
        +        tok = parser.token()             # Get the next token
        +        if not tok or tok.type == 'RBRACE': 
        +            break
        +    parser.restart()
         
        @@ -2556,32 +2628,33 @@ This function simply discards the bad token and tells the parser that the error
         def p_error(p):
        -    print "Syntax error at token", p.type
        -    # Just discard the token and tell the parser it's okay.
        -    yacc.errok()
        +    if p:
        +         print("Syntax error at token", p.type)
        +         # Just discard the token and tell the parser it's okay.
        +         parser.errok()
        +    else:
        +         print("Syntax error at EOF")
         

        -Within the p_error() function, three functions are available to control the behavior -of the parser: +More information on these methods is as follows: +

        +

          -
        • yacc.errok(). This resets the parser state so it doesn't think it's in error-recovery +
        • parser.errok(). This resets the parser state so it doesn't think it's in error-recovery mode. This will prevent an error token from being generated and will reset the internal error counters so that the next syntax error will call p_error() again.

          -

        • yacc.token(). This returns the next token on the input stream. +
        • parser.token(). This returns the next token on the input stream.

          -

        • yacc.restart(). This discards the entire parsing stack and resets the parser +
        • parser.restart(). This discards the entire parsing stack and resets the parser to its initial state.
        -Note: these functions are only available when invoking p_error() and are not available -at any other time. -

        To supply the next lookahead token to the parser, p_error() can return a token. This might be useful if trying to synchronize on special characters. For example: @@ -2590,17 +2663,24 @@ useful if trying to synchronize on special characters. For example:

         def p_error(p):
             # Read ahead looking for a terminating ";"
        -    while 1:
        -        tok = yacc.token()             # Get the next token
        +    while True:
        +        tok = parser.token()             # Get the next token
                 if not tok or tok.type == 'SEMI': break
        -    yacc.errok()
        +    parser.errok()
         
             # Return SEMI to the parser as the next lookahead token
             return tok  
         
        -

        6.8.3 Signaling an error from a production

        +

        +Keep in mind in that the above error handling functions, +parser is an instance of the parser created by +yacc(). You'll need to save this instance someplace in your +code so that you can refer to it during error handling. +

        + +

        6.8.3 Signalling an error from a production

        If necessary, a production rule can manually force the parser to enter error recovery. This @@ -2629,8 +2709,44 @@ raises SyntaxError.

        Note: This feature of PLY is meant to mimic the behavior of the YYERROR macro in yacc. +

        6.8.4 When Do Syntax Errors Get Reported

        -

        6.8.4 General comments on error handling

        + +

        +In most cases, yacc will handle errors as soon as a bad input token is +detected on the input. However, be aware that yacc may choose to +delay error handling until after it has reduced one or more grammar +rules first. This behavior might be unexpected, but it's related to +special states in the underlying parsing table known as "defaulted +states." A defaulted state is parsing condition where the same +grammar rule will be reduced regardless of what valid token +comes next on the input. For such states, yacc chooses to go ahead +and reduce the grammar rule without reading the next input +token. If the next token is bad, yacc will eventually get around to reading it and +report a syntax error. It's just a little unusual in that you might +see some of your grammar rules firing immediately prior to the syntax +error. +

        + +

        +Usually, the delayed error reporting with defaulted states is harmless +(and there are other reasons for wanting PLY to behave in this way). +However, if you need to turn this behavior off for some reason. You +can clear the defaulted states table like this: +

        + +
        +
        +parser = yacc.yacc()
        +parser.defaulted_states = {}
        +
        +
        + +

        +Disabling defaulted states is not recommended if your grammar makes use +of embedded actions as described in Section 6.11.

        + +

        6.8.5 General comments on error handling

        For normal types of languages, error recovery with error rules and resynchronization characters is probably the most reliable @@ -2713,7 +2829,7 @@ example: def p_bad_func(p): 'funccall : fname LPAREN error RPAREN' # Line number reported from LPAREN token - print "Bad function call at line", p.lineno(2) + print("Bad function call at line", p.lineno(2)) @@ -2834,7 +2950,7 @@ def p_expression_binop(p): -

        6.11 Embedded Actions

        +

        6.11 Embedded Actions

        The parsing technique used by yacc only allows actions to be executed at the end of a rule. For example, @@ -2844,7 +2960,7 @@ suppose you have a rule like this:
         def p_foo(p):
             "foo : A B C D"
        -    print "Parsed a foo", p[1],p[2],p[3],p[4]
        +    print("Parsed a foo", p[1],p[2],p[3],p[4])
         
        @@ -2860,12 +2976,12 @@ been parsed. To do this, write an empty rule like this:
         def p_foo(p):
             "foo : A seen_A B C D"
        -    print "Parsed a foo", p[1],p[3],p[4],p[5]
        -    print "seen_A returned", p[2]
        +    print("Parsed a foo", p[1],p[3],p[4],p[5])
        +    print("seen_A returned", p[2])
         
         def p_seen_A(p):
             "seen_A :"
        -    print "Saw an A = ", p[-1]   # Access grammar symbol to left
        +    print("Saw an A = ", p[-1])   # Access grammar symbol to left
             p[0] = some_value            # Assign value to seen_A
         
         
        @@ -2956,25 +3072,13 @@ might undo the operations performed in the embedded action
          -
        • The default parsing method is LALR. To use SLR instead, run yacc() as follows: - -
          -
          -yacc.yacc(method="SLR")
          -
          -
          -Note: LALR table generation takes approximately twice as long as SLR table generation. There is no -difference in actual parsing performance---the same code is used in both cases. LALR is preferred when working -with more complicated grammars since it is more powerful. - -

        • By default, yacc.py relies on lex.py for tokenizing. However, an alternative tokenizer can be supplied as follows:
          -yacc.parse(lexer=x)
          +parser = yacc.parse(lexer=x)
           
          in this case, x must be a Lexer object that minimally has a x.token() method for retrieving the next @@ -2986,7 +3090,7 @@ To disable this, use
          -yacc.yacc(debug=0)
          +parser = yacc.yacc(debug=False)
           
          @@ -2995,23 +3099,36 @@ yacc.yacc(debug=0)
          -yacc.yacc(tabmodule="foo")
          +parser = yacc.yacc(tabmodule="foo")
           
          +

          +Normally, the parsetab.py file is placed into the same directory as +the module where the parser is defined. If you want it to go somewhere else, you can +given an absolute package name for tabmodule instead. In that case, the +tables will be written there. +

          +

        • To change the directory in which the parsetab.py file (and other output files) are written, use:
          -yacc.yacc(tabmodule="foo",outputdir="somedirectory")
          +parser = yacc.yacc(tabmodule="foo",outputdir="somedirectory")
           
          +

          +Note: Be aware that unless the directory specified is also on Python's path (sys.path), subsequent +imports of the table file will fail. As a general rule, it's better to specify a destination using the +tabmodule argument instead of directly specifying a directory using the outputdir argument. +

          +

        • To prevent yacc from generating any kind of parser table file, use:
          -yacc.yacc(write_tables=0)
          +parser = yacc.yacc(write_tables=False)
           
          @@ -3023,24 +3140,10 @@ each time it runs (which may take awhile depending on how large your grammar is)
          -yacc.parse(debug=1)     
          +parser.parse(input_text, debug=True)     
           
          -

          -

        • The yacc.yacc() function really returns a parser object. If you want to support multiple -parsers in the same application, do this: - -
          -
          -p = yacc.yacc()
          -...
          -p.parse()
          -
          -
          - -Note: The function yacc.parse() is bound to the last parser that was generated. -

        • Since the generation of the LALR tables is relatively expensive, previously generated tables are cached and reused if possible. The decision to regenerate the tables is determined by taking an MD5 @@ -3048,13 +3151,62 @@ checksum of all grammar rules and precedence rules. Only in the event of a mism

          It should be noted that table generation is reasonably efficient, even for grammars that involve around a 100 rules -and several hundred states. For more complex languages such as C, table generation may take 30-60 seconds on a slow -machine. Please be patient. +and several hundred states.

        • +

        • Since LR parsing is driven by tables, the performance of the parser is largely independent of the size of the grammar. The biggest bottlenecks will be the lexer and the complexity of the code in your grammar rules. +
        • +

          + +

          +

        • yacc() also allows parsers to be defined as classes and as closures (see the section on alternative specification of +lexers). However, be aware that only one parser may be defined in a single module (source file). There are various +error checks and validation steps that may issue confusing error messages if you try to define multiple parsers +in the same source file. +
        • +

          + +

          +

        • Decorators of production rules have to update the wrapped function's line number. wrapper.co_firstlineno = func.__code__.co_firstlineno: + +
          +
          +from functools import wraps
          +from nodes import Collection
          +
          +
          +def strict(*types):
          +    def decorate(func):
          +        @wraps(func)
          +        def wrapper(p):
          +            func(p)
          +            if not isinstance(p[0], types):
          +                raise TypeError
          +
          +        wrapper.co_firstlineno = func.__code__.co_firstlineno
          +        return wrapper
          +
          +    return decorate
          +
          +@strict(Collection)
          +def p_collection(p):
          +    """
          +    collection  : sequence
          +                | map
          +    """
          +    p[0] = p[1]
          +
          +
          + +
        • +

          + +
        +

        +

        7. Multiple Parsers and Lexers

        @@ -3097,7 +3249,7 @@ the lexer object that triggered the rule. For example: def t_NUMBER(t): r'\d+' ... - print t.lexer # Show lexer object + print(t.lexer) # Show lexer object @@ -3109,8 +3261,8 @@ and parser objects respectively. def p_expr_plus(p): 'expr : expr PLUS expr' ... - print p.parser # Show parser object - print p.lexer # Show lexer object + print(p.parser) # Show parser object + print(p.lexer) # Show lexer object @@ -3118,7 +3270,7 @@ If necessary, arbitrary attributes can be attached to the lexer or parser object For example, if you wanted to have different parsing modes, you could attach a mode attribute to the parser object and look at it later. -

        8. Using Python's Optimized Mode

        +

        8. Using Python's Optimized Mode

        Because PLY uses information from doc-strings, parsing and lexing @@ -3151,7 +3303,7 @@ specified and works).

        Debugging a compiler is typically not an easy task. PLY provides some -advanced diagonistic capabilities through the use of Python's +advanced diagostic capabilities through the use of Python's logging module. The next two sections describe this:

        9.1 Debugging the lex() and yacc() commands

        @@ -3244,7 +3396,90 @@ For very complicated problems, you should pass in a logging object that redirects to a file where you can more easily inspect the output after execution. -

        10. Where to go from here?

        +

        10. Packaging Advice

        + + +

        +If you are distributing a package that makes use of PLY, you should +spend a few moments thinking about how you want to handle the files +that are automatically generated. For example, the parsetab.py +file generated by the yacc() function.

        + +

        +Starting in PLY-3.6, the table files are created in the same directory +as the file where a parser is defined. This means that the +parsetab.py file will live side-by-side with your parser +specification. In terms of packaging, this is probably the easiest and +most sane approach to manage. You don't need to give yacc() +any extra arguments and it should just "work."

        + +

        +One concern is the management of the parsetab.py file itself. +For example, should you have this file checked into version control (e.g., GitHub), +should it be included in a package distribution as a normal file, or should you +just let PLY generate it automatically for the user when they install your package? +

        + +

        +As of PLY-3.6, the parsetab.py file should be compatible across all versions +of Python including Python 2 and 3. Thus, a table file generated in Python 2 should +work fine if it's used on Python 3. Because of this, it should be relatively harmless +to distribute the parsetab.py file yourself if you need to. However, be aware +that older/newer versions of PLY may try to regenerate the file if there are future +enhancements or changes to its format. +

        + +

        +To make the generation of table files easier for the purposes of installation, you might +way to make your parser files executable using the -m option or similar. For +example: +

        + +
        +
        +# calc.py
        +...
        +...
        +def make_parser():
        +    parser = yacc.yacc()
        +    return parser
        +
        +if __name__ == '__main__':
        +    make_parser()
        +
        +
        + +

        +You can then use a command such as python -m calc.py to generate the tables. Alternatively, +a setup.py script, can import the module and use make_parser() to create the +parsing tables. +

        + +

        +If you're willing to sacrifice a little startup time, you can also instruct PLY to never write the +tables using yacc.yacc(write_tables=False, debug=False). In this mode, PLY will regenerate +the parsing tables from scratch each time. For a small grammar, you probably won't notice. For a +large grammar, you should probably reconsider--the parsing tables are meant to dramatically speed up this process. +

        + +

        +During operation, is is normal for PLY to produce diagnostic error +messages (usually printed to standard error). These are generated +entirely using the logging module. If you want to redirect +these messages or silence them, you can provide your own logging +object to yacc(). For example: +

        + +
        +
        +import logging
        +log = logging.getLogger('ply')
        +...
        +parser = yacc.yacc(errorlog=log)
        +
        +
        + +

        11. Where to go from here?

        The examples directory of the PLY distribution contains several simple examples. Please consult a diff --git a/ext/ply/example/BASIC/basic.py b/ext/ply/example/BASIC/basic.py index b14483d2da..70ac9e7c74 100644 --- a/ext/ply/example/BASIC/basic.py +++ b/ext/ply/example/BASIC/basic.py @@ -2,7 +2,7 @@ # import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") if sys.version_info[0] >= 3: raw_input = input @@ -17,7 +17,8 @@ import basinterp if len(sys.argv) == 2: data = open(sys.argv[1]).read() prog = basparse.parse(data) - if not prog: raise SystemExit + if not prog: + raise SystemExit b = basinterp.BasicInterpreter(prog) try: b.run() @@ -39,33 +40,26 @@ while 1: line = raw_input("[BASIC] ") except EOFError: raise SystemExit - if not line: continue + if not line: + continue line += "\n" prog = basparse.parse(line) - if not prog: continue + if not prog: + continue keys = list(prog) if keys[0] > 0: - b.add_statements(prog) + b.add_statements(prog) else: - stat = prog[keys[0]] - if stat[0] == 'RUN': - try: - b.run() - except RuntimeError: - pass - elif stat[0] == 'LIST': - b.list() - elif stat[0] == 'BLANK': - b.del_line(stat[1]) - elif stat[0] == 'NEW': - b.new() - - - - - - - - - + stat = prog[keys[0]] + if stat[0] == 'RUN': + try: + b.run() + except RuntimeError: + pass + elif stat[0] == 'LIST': + b.list() + elif stat[0] == 'BLANK': + b.del_line(stat[1]) + elif stat[0] == 'NEW': + b.new() diff --git a/ext/ply/example/BASIC/basiclex.py b/ext/ply/example/BASIC/basiclex.py index 3d27cdeeb4..4151f4c34f 100644 --- a/ext/ply/example/BASIC/basiclex.py +++ b/ext/ply/example/BASIC/basiclex.py @@ -3,72 +3,59 @@ from ply import * keywords = ( - 'LET','READ','DATA','PRINT','GOTO','IF','THEN','FOR','NEXT','TO','STEP', - 'END','STOP','DEF','GOSUB','DIM','REM','RETURN','RUN','LIST','NEW', + 'LET', 'READ', 'DATA', 'PRINT', 'GOTO', 'IF', 'THEN', 'FOR', 'NEXT', 'TO', 'STEP', + 'END', 'STOP', 'DEF', 'GOSUB', 'DIM', 'REM', 'RETURN', 'RUN', 'LIST', 'NEW', ) tokens = keywords + ( - 'EQUALS','PLUS','MINUS','TIMES','DIVIDE','POWER', - 'LPAREN','RPAREN','LT','LE','GT','GE','NE', - 'COMMA','SEMI', 'INTEGER','FLOAT', 'STRING', - 'ID','NEWLINE' + 'EQUALS', 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'POWER', + 'LPAREN', 'RPAREN', 'LT', 'LE', 'GT', 'GE', 'NE', + 'COMMA', 'SEMI', 'INTEGER', 'FLOAT', 'STRING', + 'ID', 'NEWLINE' ) t_ignore = ' \t' + def t_REM(t): r'REM .*' return t + def t_ID(t): r'[A-Z][A-Z0-9]*' if t.value in keywords: t.type = t.value return t - -t_EQUALS = r'=' -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_POWER = r'\^' -t_DIVIDE = r'/' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LT = r'<' -t_LE = r'<=' -t_GT = r'>' -t_GE = r'>=' -t_NE = r'<>' -t_COMMA = r'\,' -t_SEMI = r';' -t_INTEGER = r'\d+' -t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))' -t_STRING = r'\".*?\"' + +t_EQUALS = r'=' +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_POWER = r'\^' +t_DIVIDE = r'/' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LT = r'<' +t_LE = r'<=' +t_GT = r'>' +t_GE = r'>=' +t_NE = r'<>' +t_COMMA = r'\,' +t_SEMI = r';' +t_INTEGER = r'\d+' +t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))' +t_STRING = r'\".*?\"' + def t_NEWLINE(t): r'\n' t.lexer.lineno += 1 return t + def t_error(t): print("Illegal character %s" % t.value[0]) t.lexer.skip(1) lex.lex(debug=0) - - - - - - - - - - - - - - - - - diff --git a/ext/ply/example/BASIC/basiclog.py b/ext/ply/example/BASIC/basiclog.py index ccfd7b9671..9dcc7feda6 100644 --- a/ext/ply/example/BASIC/basiclog.py +++ b/ext/ply/example/BASIC/basiclog.py @@ -2,16 +2,16 @@ # import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") if sys.version_info[0] >= 3: raw_input = input import logging logging.basicConfig( - level = logging.INFO, - filename = "parselog.txt", - filemode = "w" + level=logging.INFO, + filename="parselog.txt", + filemode="w" ) log = logging.getLogger() @@ -24,8 +24,9 @@ import basinterp # interactive mode below if len(sys.argv) == 2: data = open(sys.argv[1]).read() - prog = basparse.parse(data,debug=log) - if not prog: raise SystemExit + prog = basparse.parse(data, debug=log) + if not prog: + raise SystemExit b = basinterp.BasicInterpreter(prog) try: b.run() @@ -47,33 +48,26 @@ while 1: line = raw_input("[BASIC] ") except EOFError: raise SystemExit - if not line: continue + if not line: + continue line += "\n" - prog = basparse.parse(line,debug=log) - if not prog: continue + prog = basparse.parse(line, debug=log) + if not prog: + continue keys = list(prog) if keys[0] > 0: - b.add_statements(prog) + b.add_statements(prog) else: - stat = prog[keys[0]] - if stat[0] == 'RUN': - try: - b.run() - except RuntimeError: - pass - elif stat[0] == 'LIST': - b.list() - elif stat[0] == 'BLANK': - b.del_line(stat[1]) - elif stat[0] == 'NEW': - b.new() - - - - - - - - - + stat = prog[keys[0]] + if stat[0] == 'RUN': + try: + b.run() + except RuntimeError: + pass + elif stat[0] == 'LIST': + b.list() + elif stat[0] == 'BLANK': + b.del_line(stat[1]) + elif stat[0] == 'NEW': + b.new() diff --git a/ext/ply/example/BASIC/basinterp.py b/ext/ply/example/BASIC/basinterp.py index 3e8a7774a2..67762c797b 100644 --- a/ext/ply/example/BASIC/basinterp.py +++ b/ext/ply/example/BASIC/basinterp.py @@ -5,141 +5,167 @@ import sys import math import random + class BasicInterpreter: # Initialize the interpreter. prog is a dictionary # containing (line,statement) mappings - def __init__(self,prog): - self.prog = prog + def __init__(self, prog): + self.prog = prog - self.functions = { # Built-in function table - 'SIN' : lambda z: math.sin(self.eval(z)), - 'COS' : lambda z: math.cos(self.eval(z)), - 'TAN' : lambda z: math.tan(self.eval(z)), - 'ATN' : lambda z: math.atan(self.eval(z)), - 'EXP' : lambda z: math.exp(self.eval(z)), - 'ABS' : lambda z: abs(self.eval(z)), - 'LOG' : lambda z: math.log(self.eval(z)), - 'SQR' : lambda z: math.sqrt(self.eval(z)), - 'INT' : lambda z: int(self.eval(z)), - 'RND' : lambda z: random.random() - } + self.functions = { # Built-in function table + 'SIN': lambda z: math.sin(self.eval(z)), + 'COS': lambda z: math.cos(self.eval(z)), + 'TAN': lambda z: math.tan(self.eval(z)), + 'ATN': lambda z: math.atan(self.eval(z)), + 'EXP': lambda z: math.exp(self.eval(z)), + 'ABS': lambda z: abs(self.eval(z)), + 'LOG': lambda z: math.log(self.eval(z)), + 'SQR': lambda z: math.sqrt(self.eval(z)), + 'INT': lambda z: int(self.eval(z)), + 'RND': lambda z: random.random() + } # Collect all data statements def collect_data(self): - self.data = [] - for lineno in self.stat: - if self.prog[lineno][0] == 'DATA': - self.data = self.data + self.prog[lineno][1] - self.dc = 0 # Initialize the data counter + self.data = [] + for lineno in self.stat: + if self.prog[lineno][0] == 'DATA': + self.data = self.data + self.prog[lineno][1] + self.dc = 0 # Initialize the data counter # Check for end statements def check_end(self): - has_end = 0 - for lineno in self.stat: - if self.prog[lineno][0] == 'END' and not has_end: - has_end = lineno - if not has_end: - print("NO END INSTRUCTION") - self.error = 1 - return - if has_end != lineno: - print("END IS NOT LAST") - self.error = 1 + has_end = 0 + for lineno in self.stat: + if self.prog[lineno][0] == 'END' and not has_end: + has_end = lineno + if not has_end: + print("NO END INSTRUCTION") + self.error = 1 + return + if has_end != lineno: + print("END IS NOT LAST") + self.error = 1 # Check loops def check_loops(self): - for pc in range(len(self.stat)): - lineno = self.stat[pc] - if self.prog[lineno][0] == 'FOR': - forinst = self.prog[lineno] - loopvar = forinst[1] - for i in range(pc+1,len(self.stat)): - if self.prog[self.stat[i]][0] == 'NEXT': - nextvar = self.prog[self.stat[i]][1] - if nextvar != loopvar: continue - self.loopend[pc] = i - break - else: - print("FOR WITHOUT NEXT AT LINE %s" % self.stat[pc]) - self.error = 1 - - # Evaluate an expression - def eval(self,expr): - etype = expr[0] - if etype == 'NUM': return expr[1] - elif etype == 'GROUP': return self.eval(expr[1]) - elif etype == 'UNARY': - if expr[1] == '-': return -self.eval(expr[2]) - elif etype == 'BINOP': - if expr[1] == '+': return self.eval(expr[2])+self.eval(expr[3]) - elif expr[1] == '-': return self.eval(expr[2])-self.eval(expr[3]) - elif expr[1] == '*': return self.eval(expr[2])*self.eval(expr[3]) - elif expr[1] == '/': return float(self.eval(expr[2]))/self.eval(expr[3]) - elif expr[1] == '^': return abs(self.eval(expr[2]))**self.eval(expr[3]) - elif etype == 'VAR': - var,dim1,dim2 = expr[1] - if not dim1 and not dim2: - if var in self.vars: - return self.vars[var] - else: - print("UNDEFINED VARIABLE %s AT LINE %s" % (var, self.stat[self.pc])) - raise RuntimeError - # May be a list lookup or a function evaluation - if dim1 and not dim2: - if var in self.functions: - # A function - return self.functions[var](dim1) + for pc in range(len(self.stat)): + lineno = self.stat[pc] + if self.prog[lineno][0] == 'FOR': + forinst = self.prog[lineno] + loopvar = forinst[1] + for i in range(pc + 1, len(self.stat)): + if self.prog[self.stat[i]][0] == 'NEXT': + nextvar = self.prog[self.stat[i]][1] + if nextvar != loopvar: + continue + self.loopend[pc] = i + break else: - # A list evaluation - if var in self.lists: - dim1val = self.eval(dim1) - if dim1val < 1 or dim1val > len(self.lists[var]): - print("LIST INDEX OUT OF BOUNDS AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - return self.lists[var][dim1val-1] - if dim1 and dim2: - if var in self.tables: - dim1val = self.eval(dim1) - dim2val = self.eval(dim2) - if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]): - print("TABLE INDEX OUT OUT BOUNDS AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - return self.tables[var][dim1val-1][dim2val-1] - print("UNDEFINED VARIABLE %s AT LINE %s" % (var, self.stat[self.pc])) - raise RuntimeError + print("FOR WITHOUT NEXT AT LINE %s" % self.stat[pc]) + self.error = 1 + + # Evaluate an expression + def eval(self, expr): + etype = expr[0] + if etype == 'NUM': + return expr[1] + elif etype == 'GROUP': + return self.eval(expr[1]) + elif etype == 'UNARY': + if expr[1] == '-': + return -self.eval(expr[2]) + elif etype == 'BINOP': + if expr[1] == '+': + return self.eval(expr[2]) + self.eval(expr[3]) + elif expr[1] == '-': + return self.eval(expr[2]) - self.eval(expr[3]) + elif expr[1] == '*': + return self.eval(expr[2]) * self.eval(expr[3]) + elif expr[1] == '/': + return float(self.eval(expr[2])) / self.eval(expr[3]) + elif expr[1] == '^': + return abs(self.eval(expr[2]))**self.eval(expr[3]) + elif etype == 'VAR': + var, dim1, dim2 = expr[1] + if not dim1 and not dim2: + if var in self.vars: + return self.vars[var] + else: + print("UNDEFINED VARIABLE %s AT LINE %s" % + (var, self.stat[self.pc])) + raise RuntimeError + # May be a list lookup or a function evaluation + if dim1 and not dim2: + if var in self.functions: + # A function + return self.functions[var](dim1) + else: + # A list evaluation + if var in self.lists: + dim1val = self.eval(dim1) + if dim1val < 1 or dim1val > len(self.lists[var]): + print("LIST INDEX OUT OF BOUNDS AT LINE %s" % + self.stat[self.pc]) + raise RuntimeError + return self.lists[var][dim1val - 1] + if dim1 and dim2: + if var in self.tables: + dim1val = self.eval(dim1) + dim2val = self.eval(dim2) + if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]): + print("TABLE INDEX OUT OUT BOUNDS AT LINE %s" % + self.stat[self.pc]) + raise RuntimeError + return self.tables[var][dim1val - 1][dim2val - 1] + print("UNDEFINED VARIABLE %s AT LINE %s" % + (var, self.stat[self.pc])) + raise RuntimeError # Evaluate a relational expression - def releval(self,expr): - etype = expr[1] - lhs = self.eval(expr[2]) - rhs = self.eval(expr[3]) - if etype == '<': - if lhs < rhs: return 1 - else: return 0 + def releval(self, expr): + etype = expr[1] + lhs = self.eval(expr[2]) + rhs = self.eval(expr[3]) + if etype == '<': + if lhs < rhs: + return 1 + else: + return 0 - elif etype == '<=': - if lhs <= rhs: return 1 - else: return 0 + elif etype == '<=': + if lhs <= rhs: + return 1 + else: + return 0 - elif etype == '>': - if lhs > rhs: return 1 - else: return 0 + elif etype == '>': + if lhs > rhs: + return 1 + else: + return 0 - elif etype == '>=': - if lhs >= rhs: return 1 - else: return 0 + elif etype == '>=': + if lhs >= rhs: + return 1 + else: + return 0 - elif etype == '=': - if lhs == rhs: return 1 - else: return 0 + elif etype == '=': + if lhs == rhs: + return 1 + else: + return 0 - elif etype == '<>': - if lhs != rhs: return 1 - else: return 0 + elif etype == '<>': + if lhs != rhs: + return 1 + else: + return 0 # Assignment - def assign(self,target,value): + def assign(self, target, value): var, dim1, dim2 = target if not dim1 and not dim2: self.vars[var] = self.eval(value) @@ -147,42 +173,44 @@ class BasicInterpreter: # List assignment dim1val = self.eval(dim1) if not var in self.lists: - self.lists[var] = [0]*10 + self.lists[var] = [0] * 10 if dim1val > len(self.lists[var]): - print ("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - self.lists[var][dim1val-1] = self.eval(value) + print ("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc]) + raise RuntimeError + self.lists[var][dim1val - 1] = self.eval(value) elif dim1 and dim2: dim1val = self.eval(dim1) dim2val = self.eval(dim2) if not var in self.tables: - temp = [0]*10 - v = [] - for i in range(10): v.append(temp[:]) - self.tables[var] = v + temp = [0] * 10 + v = [] + for i in range(10): + v.append(temp[:]) + self.tables[var] = v # Variable already exists if dim1val > len(self.tables[var]) or dim2val > len(self.tables[var][0]): - print("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc]) - raise RuntimeError - self.tables[var][dim1val-1][dim2val-1] = self.eval(value) + print("DIMENSION TOO LARGE AT LINE %s" % self.stat[self.pc]) + raise RuntimeError + self.tables[var][dim1val - 1][dim2val - 1] = self.eval(value) # Change the current line number - def goto(self,linenum): - if not linenum in self.prog: - print("UNDEFINED LINE NUMBER %d AT LINE %d" % (linenum, self.stat[self.pc])) - raise RuntimeError - self.pc = self.stat.index(linenum) + def goto(self, linenum): + if not linenum in self.prog: + print("UNDEFINED LINE NUMBER %d AT LINE %d" % + (linenum, self.stat[self.pc])) + raise RuntimeError + self.pc = self.stat.index(linenum) # Run it def run(self): - self.vars = { } # All variables - self.lists = { } # List variables - self.tables = { } # Tables - self.loops = [ ] # Currently active loops - self.loopend= { } # Mapping saying where loops end - self.gosub = None # Gosub return point (if any) - self.error = 0 # Indicates program error + self.vars = {} # All variables + self.lists = {} # List variables + self.tables = {} # Tables + self.loops = [] # Currently active loops + self.loopend = {} # Mapping saying where loops end + self.gosub = None # Gosub return point (if any) + self.error = 0 # Indicates program error self.stat = list(self.prog) # Ordered list of all line numbers self.stat.sort() @@ -194,248 +222,275 @@ class BasicInterpreter: self.check_end() self.check_loops() - if self.error: raise RuntimeError + if self.error: + raise RuntimeError while 1: - line = self.stat[self.pc] + line = self.stat[self.pc] instr = self.prog[line] - + op = instr[0] # END and STOP statements if op == 'END' or op == 'STOP': - break # We're done + break # We're done # GOTO statement elif op == 'GOTO': - newline = instr[1] - self.goto(newline) - continue + newline = instr[1] + self.goto(newline) + continue # PRINT statement elif op == 'PRINT': - plist = instr[1] - out = "" - for label,val in plist: - if out: - out += ' '*(15 - (len(out) % 15)) - out += label - if val: - if label: out += " " - eval = self.eval(val) - out += str(eval) - sys.stdout.write(out) - end = instr[2] - if not (end == ',' or end == ';'): - sys.stdout.write("\n") - if end == ',': sys.stdout.write(" "*(15-(len(out) % 15))) - if end == ';': sys.stdout.write(" "*(3-(len(out) % 3))) - + plist = instr[1] + out = "" + for label, val in plist: + if out: + out += ' ' * (15 - (len(out) % 15)) + out += label + if val: + if label: + out += " " + eval = self.eval(val) + out += str(eval) + sys.stdout.write(out) + end = instr[2] + if not (end == ',' or end == ';'): + sys.stdout.write("\n") + if end == ',': + sys.stdout.write(" " * (15 - (len(out) % 15))) + if end == ';': + sys.stdout.write(" " * (3 - (len(out) % 3))) + # LET statement elif op == 'LET': - target = instr[1] - value = instr[2] - self.assign(target,value) + target = instr[1] + value = instr[2] + self.assign(target, value) # READ statement elif op == 'READ': - for target in instr[1]: - if self.dc < len(self.data): - value = ('NUM',self.data[self.dc]) - self.assign(target,value) - self.dc += 1 - else: - # No more data. Program ends - return + for target in instr[1]: + if self.dc < len(self.data): + value = ('NUM', self.data[self.dc]) + self.assign(target, value) + self.dc += 1 + else: + # No more data. Program ends + return elif op == 'IF': - relop = instr[1] - newline = instr[2] - if (self.releval(relop)): - self.goto(newline) - continue + relop = instr[1] + newline = instr[2] + if (self.releval(relop)): + self.goto(newline) + continue elif op == 'FOR': - loopvar = instr[1] - initval = instr[2] - finval = instr[3] - stepval = instr[4] - - # Check to see if this is a new loop - if not self.loops or self.loops[-1][0] != self.pc: - # Looks like a new loop. Make the initial assignment - newvalue = initval - self.assign((loopvar,None,None),initval) - if not stepval: stepval = ('NUM',1) - stepval = self.eval(stepval) # Evaluate step here - self.loops.append((self.pc,stepval)) - else: - # It's a repeat of the previous loop - # Update the value of the loop variable according to the step - stepval = ('NUM',self.loops[-1][1]) - newvalue = ('BINOP','+',('VAR',(loopvar,None,None)),stepval) + loopvar = instr[1] + initval = instr[2] + finval = instr[3] + stepval = instr[4] - if self.loops[-1][1] < 0: relop = '>=' - else: relop = '<=' - if not self.releval(('RELOP',relop,newvalue,finval)): - # Loop is done. Jump to the NEXT - self.pc = self.loopend[self.pc] - self.loops.pop() - else: - self.assign((loopvar,None,None),newvalue) + # Check to see if this is a new loop + if not self.loops or self.loops[-1][0] != self.pc: + # Looks like a new loop. Make the initial assignment + newvalue = initval + self.assign((loopvar, None, None), initval) + if not stepval: + stepval = ('NUM', 1) + stepval = self.eval(stepval) # Evaluate step here + self.loops.append((self.pc, stepval)) + else: + # It's a repeat of the previous loop + # Update the value of the loop variable according to the + # step + stepval = ('NUM', self.loops[-1][1]) + newvalue = ( + 'BINOP', '+', ('VAR', (loopvar, None, None)), stepval) + + if self.loops[-1][1] < 0: + relop = '>=' + else: + relop = '<=' + if not self.releval(('RELOP', relop, newvalue, finval)): + # Loop is done. Jump to the NEXT + self.pc = self.loopend[self.pc] + self.loops.pop() + else: + self.assign((loopvar, None, None), newvalue) elif op == 'NEXT': - if not self.loops: - print("NEXT WITHOUT FOR AT LINE %s" % line) - return - - nextvar = instr[1] - self.pc = self.loops[-1][0] - loopinst = self.prog[self.stat[self.pc]] - forvar = loopinst[1] - if nextvar != forvar: - print("NEXT DOESN'T MATCH FOR AT LINE %s" % line) - return - continue + if not self.loops: + print("NEXT WITHOUT FOR AT LINE %s" % line) + return + + nextvar = instr[1] + self.pc = self.loops[-1][0] + loopinst = self.prog[self.stat[self.pc]] + forvar = loopinst[1] + if nextvar != forvar: + print("NEXT DOESN'T MATCH FOR AT LINE %s" % line) + return + continue elif op == 'GOSUB': - newline = instr[1] - if self.gosub: - print("ALREADY IN A SUBROUTINE AT LINE %s" % line) - return - self.gosub = self.stat[self.pc] - self.goto(newline) - continue + newline = instr[1] + if self.gosub: + print("ALREADY IN A SUBROUTINE AT LINE %s" % line) + return + self.gosub = self.stat[self.pc] + self.goto(newline) + continue elif op == 'RETURN': - if not self.gosub: - print("RETURN WITHOUT A GOSUB AT LINE %s" % line) - return - self.goto(self.gosub) - self.gosub = None + if not self.gosub: + print("RETURN WITHOUT A GOSUB AT LINE %s" % line) + return + self.goto(self.gosub) + self.gosub = None elif op == 'FUNC': - fname = instr[1] - pname = instr[2] - expr = instr[3] - def eval_func(pvalue,name=pname,self=self,expr=expr): - self.assign((pname,None,None),pvalue) - return self.eval(expr) - self.functions[fname] = eval_func + fname = instr[1] + pname = instr[2] + expr = instr[3] + + def eval_func(pvalue, name=pname, self=self, expr=expr): + self.assign((pname, None, None), pvalue) + return self.eval(expr) + self.functions[fname] = eval_func elif op == 'DIM': - for vname,x,y in instr[1]: - if y == 0: - # Single dimension variable - self.lists[vname] = [0]*x - else: - # Double dimension variable - temp = [0]*y - v = [] - for i in range(x): - v.append(temp[:]) - self.tables[vname] = v + for vname, x, y in instr[1]: + if y == 0: + # Single dimension variable + self.lists[vname] = [0] * x + else: + # Double dimension variable + temp = [0] * y + v = [] + for i in range(x): + v.append(temp[:]) + self.tables[vname] = v - self.pc += 1 + self.pc += 1 # Utility functions for program listing - def expr_str(self,expr): + def expr_str(self, expr): etype = expr[0] - if etype == 'NUM': return str(expr[1]) - elif etype == 'GROUP': return "(%s)" % self.expr_str(expr[1]) + if etype == 'NUM': + return str(expr[1]) + elif etype == 'GROUP': + return "(%s)" % self.expr_str(expr[1]) elif etype == 'UNARY': - if expr[1] == '-': return "-"+str(expr[2]) + if expr[1] == '-': + return "-" + str(expr[2]) elif etype == 'BINOP': - return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3])) + return "%s %s %s" % (self.expr_str(expr[2]), expr[1], self.expr_str(expr[3])) elif etype == 'VAR': - return self.var_str(expr[1]) + return self.var_str(expr[1]) - def relexpr_str(self,expr): - return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3])) + def relexpr_str(self, expr): + return "%s %s %s" % (self.expr_str(expr[2]), expr[1], self.expr_str(expr[3])) - def var_str(self,var): - varname,dim1,dim2 = var - if not dim1 and not dim2: return varname - if dim1 and not dim2: return "%s(%s)" % (varname, self.expr_str(dim1)) - return "%s(%s,%s)" % (varname, self.expr_str(dim1),self.expr_str(dim2)) + def var_str(self, var): + varname, dim1, dim2 = var + if not dim1 and not dim2: + return varname + if dim1 and not dim2: + return "%s(%s)" % (varname, self.expr_str(dim1)) + return "%s(%s,%s)" % (varname, self.expr_str(dim1), self.expr_str(dim2)) # Create a program listing def list(self): - stat = list(self.prog) # Ordered list of all line numbers - stat.sort() - for line in stat: - instr = self.prog[line] - op = instr[0] - if op in ['END','STOP','RETURN']: - print("%s %s" % (line, op)) - continue - elif op == 'REM': - print("%s %s" % (line, instr[1])) - elif op == 'PRINT': - _out = "%s %s " % (line, op) - first = 1 - for p in instr[1]: - if not first: _out += ", " - if p[0] and p[1]: _out += '"%s"%s' % (p[0],self.expr_str(p[1])) - elif p[1]: _out += self.expr_str(p[1]) - else: _out += '"%s"' % (p[0],) - first = 0 - if instr[2]: _out += instr[2] - print(_out) - elif op == 'LET': - print("%s LET %s = %s" % (line,self.var_str(instr[1]),self.expr_str(instr[2]))) - elif op == 'READ': - _out = "%s READ " % line - first = 1 - for r in instr[1]: - if not first: _out += "," - _out += self.var_str(r) - first = 0 - print(_out) - elif op == 'IF': - print("%s IF %s THEN %d" % (line,self.relexpr_str(instr[1]),instr[2])) - elif op == 'GOTO' or op == 'GOSUB': - print("%s %s %s" % (line, op, instr[1])) - elif op == 'FOR': - _out = "%s FOR %s = %s TO %s" % (line,instr[1],self.expr_str(instr[2]),self.expr_str(instr[3])) - if instr[4]: _out += " STEP %s" % (self.expr_str(instr[4])) - print(_out) - elif op == 'NEXT': - print("%s NEXT %s" % (line, instr[1])) - elif op == 'FUNC': - print("%s DEF %s(%s) = %s" % (line,instr[1],instr[2],self.expr_str(instr[3]))) - elif op == 'DIM': - _out = "%s DIM " % line - first = 1 - for vname,x,y in instr[1]: - if not first: _out += "," - first = 0 - if y == 0: - _out += "%s(%d)" % (vname,x) - else: - _out += "%s(%d,%d)" % (vname,x,y) - - print(_out) - elif op == 'DATA': - _out = "%s DATA " % line - first = 1 - for v in instr[1]: - if not first: _out += "," - first = 0 - _out += v - print(_out) + stat = list(self.prog) # Ordered list of all line numbers + stat.sort() + for line in stat: + instr = self.prog[line] + op = instr[0] + if op in ['END', 'STOP', 'RETURN']: + print("%s %s" % (line, op)) + continue + elif op == 'REM': + print("%s %s" % (line, instr[1])) + elif op == 'PRINT': + _out = "%s %s " % (line, op) + first = 1 + for p in instr[1]: + if not first: + _out += ", " + if p[0] and p[1]: + _out += '"%s"%s' % (p[0], self.expr_str(p[1])) + elif p[1]: + _out += self.expr_str(p[1]) + else: + _out += '"%s"' % (p[0],) + first = 0 + if instr[2]: + _out += instr[2] + print(_out) + elif op == 'LET': + print("%s LET %s = %s" % + (line, self.var_str(instr[1]), self.expr_str(instr[2]))) + elif op == 'READ': + _out = "%s READ " % line + first = 1 + for r in instr[1]: + if not first: + _out += "," + _out += self.var_str(r) + first = 0 + print(_out) + elif op == 'IF': + print("%s IF %s THEN %d" % + (line, self.relexpr_str(instr[1]), instr[2])) + elif op == 'GOTO' or op == 'GOSUB': + print("%s %s %s" % (line, op, instr[1])) + elif op == 'FOR': + _out = "%s FOR %s = %s TO %s" % ( + line, instr[1], self.expr_str(instr[2]), self.expr_str(instr[3])) + if instr[4]: + _out += " STEP %s" % (self.expr_str(instr[4])) + print(_out) + elif op == 'NEXT': + print("%s NEXT %s" % (line, instr[1])) + elif op == 'FUNC': + print("%s DEF %s(%s) = %s" % + (line, instr[1], instr[2], self.expr_str(instr[3]))) + elif op == 'DIM': + _out = "%s DIM " % line + first = 1 + for vname, x, y in instr[1]: + if not first: + _out += "," + first = 0 + if y == 0: + _out += "%s(%d)" % (vname, x) + else: + _out += "%s(%d,%d)" % (vname, x, y) + + print(_out) + elif op == 'DATA': + _out = "%s DATA " % line + first = 1 + for v in instr[1]: + if not first: + _out += "," + first = 0 + _out += v + print(_out) # Erase the current program def new(self): - self.prog = {} - + self.prog = {} + # Insert statements - def add_statements(self,prog): - for line,stat in prog.items(): - self.prog[line] = stat + def add_statements(self, prog): + for line, stat in prog.items(): + self.prog[line] = stat # Delete a statement - def del_line(self,lineno): - try: - del self.prog[lineno] - except KeyError: - pass - + def del_line(self, lineno): + try: + del self.prog[lineno] + except KeyError: + pass diff --git a/ext/ply/example/BASIC/basparse.py b/ext/ply/example/BASIC/basparse.py index ccdeb16b6e..d610c7d909 100644 --- a/ext/ply/example/BASIC/basparse.py +++ b/ext/ply/example/BASIC/basparse.py @@ -7,64 +7,72 @@ import basiclex tokens = basiclex.tokens precedence = ( - ('left', 'PLUS','MINUS'), - ('left', 'TIMES','DIVIDE'), - ('left', 'POWER'), - ('right','UMINUS') + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('left', 'POWER'), + ('right', 'UMINUS') ) -#### A BASIC program is a series of statements. We represent the program as a -#### dictionary of tuples indexed by line number. +# A BASIC program is a series of statements. We represent the program as a +# dictionary of tuples indexed by line number. + def p_program(p): '''program : program statement | statement''' if len(p) == 2 and p[1]: - p[0] = { } - line,stat = p[1] - p[0][line] = stat - elif len(p) ==3: - p[0] = p[1] - if not p[0]: p[0] = { } - if p[2]: - line,stat = p[2] - p[0][line] = stat + p[0] = {} + line, stat = p[1] + p[0][line] = stat + elif len(p) == 3: + p[0] = p[1] + if not p[0]: + p[0] = {} + if p[2]: + line, stat = p[2] + p[0][line] = stat + +# This catch-all rule is used for any catastrophic errors. In this case, +# we simply return nothing -#### This catch-all rule is used for any catastrophic errors. In this case, -#### we simply return nothing def p_program_error(p): '''program : error''' p[0] = None p.parser.error = 1 -#### Format of all BASIC statements. +# Format of all BASIC statements. + def p_statement(p): '''statement : INTEGER command NEWLINE''' - if isinstance(p[2],str): - print("%s %s %s" % (p[2],"AT LINE", p[1])) + if isinstance(p[2], str): + print("%s %s %s" % (p[2], "AT LINE", p[1])) p[0] = None p.parser.error = 1 else: lineno = int(p[1]) - p[0] = (lineno,p[2]) + p[0] = (lineno, p[2]) + +# Interactive statements. -#### Interactive statements. def p_statement_interactive(p): '''statement : RUN NEWLINE | LIST NEWLINE | NEW NEWLINE''' - p[0] = (0, (p[1],0)) + p[0] = (0, (p[1], 0)) + +# Blank line number + -#### Blank line number def p_statement_blank(p): '''statement : INTEGER NEWLINE''' - p[0] = (0,('BLANK',int(p[1]))) + p[0] = (0, ('BLANK', int(p[1]))) + +# Error handling for malformed statements -#### Error handling for malformed statements def p_statement_bad(p): '''statement : INTEGER error NEWLINE''' @@ -72,191 +80,226 @@ def p_statement_bad(p): p[0] = None p.parser.error = 1 -#### Blank line +# Blank line + def p_statement_newline(p): '''statement : NEWLINE''' p[0] = None -#### LET statement +# LET statement + def p_command_let(p): '''command : LET variable EQUALS expr''' - p[0] = ('LET',p[2],p[4]) + p[0] = ('LET', p[2], p[4]) + def p_command_let_bad(p): '''command : LET variable EQUALS error''' p[0] = "BAD EXPRESSION IN LET" -#### READ statement +# READ statement + def p_command_read(p): '''command : READ varlist''' - p[0] = ('READ',p[2]) + p[0] = ('READ', p[2]) + def p_command_read_bad(p): '''command : READ error''' p[0] = "MALFORMED VARIABLE LIST IN READ" -#### DATA statement +# DATA statement + def p_command_data(p): '''command : DATA numlist''' - p[0] = ('DATA',p[2]) + p[0] = ('DATA', p[2]) + def p_command_data_bad(p): '''command : DATA error''' p[0] = "MALFORMED NUMBER LIST IN DATA" -#### PRINT statement +# PRINT statement + def p_command_print(p): '''command : PRINT plist optend''' - p[0] = ('PRINT',p[2],p[3]) + p[0] = ('PRINT', p[2], p[3]) + def p_command_print_bad(p): '''command : PRINT error''' p[0] = "MALFORMED PRINT STATEMENT" -#### Optional ending on PRINT. Either a comma (,) or semicolon (;) +# Optional ending on PRINT. Either a comma (,) or semicolon (;) + def p_optend(p): '''optend : COMMA | SEMI |''' - if len(p) == 2: - p[0] = p[1] + if len(p) == 2: + p[0] = p[1] else: - p[0] = None + p[0] = None + +# PRINT statement with no arguments -#### PRINT statement with no arguments def p_command_print_empty(p): '''command : PRINT''' - p[0] = ('PRINT',[],None) + p[0] = ('PRINT', [], None) + +# GOTO statement -#### GOTO statement def p_command_goto(p): '''command : GOTO INTEGER''' - p[0] = ('GOTO',int(p[2])) + p[0] = ('GOTO', int(p[2])) + def p_command_goto_bad(p): '''command : GOTO error''' p[0] = "INVALID LINE NUMBER IN GOTO" -#### IF-THEN statement +# IF-THEN statement + def p_command_if(p): '''command : IF relexpr THEN INTEGER''' - p[0] = ('IF',p[2],int(p[4])) + p[0] = ('IF', p[2], int(p[4])) + def p_command_if_bad(p): '''command : IF error THEN INTEGER''' p[0] = "BAD RELATIONAL EXPRESSION" + def p_command_if_bad2(p): '''command : IF relexpr THEN error''' p[0] = "INVALID LINE NUMBER IN THEN" -#### FOR statement +# FOR statement + def p_command_for(p): '''command : FOR ID EQUALS expr TO expr optstep''' - p[0] = ('FOR',p[2],p[4],p[6],p[7]) + p[0] = ('FOR', p[2], p[4], p[6], p[7]) + def p_command_for_bad_initial(p): '''command : FOR ID EQUALS error TO expr optstep''' p[0] = "BAD INITIAL VALUE IN FOR STATEMENT" + def p_command_for_bad_final(p): '''command : FOR ID EQUALS expr TO error optstep''' p[0] = "BAD FINAL VALUE IN FOR STATEMENT" + def p_command_for_bad_step(p): '''command : FOR ID EQUALS expr TO expr STEP error''' p[0] = "MALFORMED STEP IN FOR STATEMENT" -#### Optional STEP qualifier on FOR statement +# Optional STEP qualifier on FOR statement + def p_optstep(p): '''optstep : STEP expr | empty''' if len(p) == 3: - p[0] = p[2] + p[0] = p[2] else: - p[0] = None + p[0] = None + +# NEXT statement + -#### NEXT statement - def p_command_next(p): '''command : NEXT ID''' - p[0] = ('NEXT',p[2]) + p[0] = ('NEXT', p[2]) + def p_command_next_bad(p): '''command : NEXT error''' p[0] = "MALFORMED NEXT" -#### END statement +# END statement + def p_command_end(p): '''command : END''' p[0] = ('END',) -#### REM statement +# REM statement + def p_command_rem(p): '''command : REM''' - p[0] = ('REM',p[1]) + p[0] = ('REM', p[1]) + +# STOP statement -#### STOP statement def p_command_stop(p): '''command : STOP''' p[0] = ('STOP',) -#### DEF statement +# DEF statement + def p_command_def(p): '''command : DEF ID LPAREN ID RPAREN EQUALS expr''' - p[0] = ('FUNC',p[2],p[4],p[7]) + p[0] = ('FUNC', p[2], p[4], p[7]) + def p_command_def_bad_rhs(p): '''command : DEF ID LPAREN ID RPAREN EQUALS error''' p[0] = "BAD EXPRESSION IN DEF STATEMENT" + def p_command_def_bad_arg(p): '''command : DEF ID LPAREN error RPAREN EQUALS expr''' p[0] = "BAD ARGUMENT IN DEF STATEMENT" -#### GOSUB statement +# GOSUB statement + def p_command_gosub(p): '''command : GOSUB INTEGER''' - p[0] = ('GOSUB',int(p[2])) + p[0] = ('GOSUB', int(p[2])) + def p_command_gosub_bad(p): '''command : GOSUB error''' p[0] = "INVALID LINE NUMBER IN GOSUB" -#### RETURN statement +# RETURN statement + def p_command_return(p): '''command : RETURN''' p[0] = ('RETURN',) -#### DIM statement +# DIM statement + def p_command_dim(p): '''command : DIM dimlist''' - p[0] = ('DIM',p[2]) + p[0] = ('DIM', p[2]) + def p_command_dim_bad(p): '''command : DIM error''' p[0] = "MALFORMED VARIABLE LIST IN DIM" -#### List of variables supplied to DIM statement +# List of variables supplied to DIM statement + def p_dimlist(p): '''dimlist : dimlist COMMA dimitem @@ -267,17 +310,20 @@ def p_dimlist(p): else: p[0] = [p[1]] -#### DIM items +# DIM items + def p_dimitem_single(p): '''dimitem : ID LPAREN INTEGER RPAREN''' - p[0] = (p[1],eval(p[3]),0) + p[0] = (p[1], eval(p[3]), 0) + def p_dimitem_double(p): '''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN''' - p[0] = (p[1],eval(p[3]),eval(p[5])) + p[0] = (p[1], eval(p[3]), eval(p[5])) + +# Arithmetic expressions -#### Arithmetic expressions def p_expr_binary(p): '''expr : expr PLUS expr @@ -286,26 +332,31 @@ def p_expr_binary(p): | expr DIVIDE expr | expr POWER expr''' - p[0] = ('BINOP',p[2],p[1],p[3]) + p[0] = ('BINOP', p[2], p[1], p[3]) + def p_expr_number(p): '''expr : INTEGER | FLOAT''' - p[0] = ('NUM',eval(p[1])) + p[0] = ('NUM', eval(p[1])) + def p_expr_variable(p): '''expr : variable''' - p[0] = ('VAR',p[1]) + p[0] = ('VAR', p[1]) + def p_expr_group(p): '''expr : LPAREN expr RPAREN''' - p[0] = ('GROUP',p[2]) + p[0] = ('GROUP', p[2]) + def p_expr_unary(p): '''expr : MINUS expr %prec UMINUS''' - p[0] = ('UNARY','-',p[2]) + p[0] = ('UNARY', '-', p[2]) + +# Relational expressions -#### Relational expressions def p_relexpr(p): '''relexpr : expr LT expr @@ -314,111 +365,110 @@ def p_relexpr(p): | expr GE expr | expr EQUALS expr | expr NE expr''' - p[0] = ('RELOP',p[2],p[1],p[3]) + p[0] = ('RELOP', p[2], p[1], p[3]) + +# Variables -#### Variables def p_variable(p): '''variable : ID | ID LPAREN expr RPAREN | ID LPAREN expr COMMA expr RPAREN''' if len(p) == 2: - p[0] = (p[1],None,None) + p[0] = (p[1], None, None) elif len(p) == 5: - p[0] = (p[1],p[3],None) + p[0] = (p[1], p[3], None) else: - p[0] = (p[1],p[3],p[5]) + p[0] = (p[1], p[3], p[5]) + +# Builds a list of variable targets as a Python list -#### Builds a list of variable targets as a Python list def p_varlist(p): '''varlist : varlist COMMA variable | variable''' if len(p) > 2: - p[0] = p[1] - p[0].append(p[3]) + p[0] = p[1] + p[0].append(p[3]) else: - p[0] = [p[1]] + p[0] = [p[1]] -#### Builds a list of numbers as a Python list +# Builds a list of numbers as a Python list def p_numlist(p): '''numlist : numlist COMMA number | number''' if len(p) > 2: - p[0] = p[1] - p[0].append(p[3]) + p[0] = p[1] + p[0].append(p[3]) else: - p[0] = [p[1]] + p[0] = [p[1]] + +# A number. May be an integer or a float -#### A number. May be an integer or a float def p_number(p): '''number : INTEGER | FLOAT''' p[0] = eval(p[1]) -#### A signed number. +# A signed number. + def p_number_signed(p): '''number : MINUS INTEGER | MINUS FLOAT''' - p[0] = eval("-"+p[2]) + p[0] = eval("-" + p[2]) + +# List of targets for a print statement +# Returns a list of tuples (label,expr) -#### List of targets for a print statement -#### Returns a list of tuples (label,expr) def p_plist(p): '''plist : plist COMMA pitem | pitem''' if len(p) > 3: - p[0] = p[1] - p[0].append(p[3]) + p[0] = p[1] + p[0].append(p[3]) else: - p[0] = [p[1]] + p[0] = [p[1]] + def p_item_string(p): '''pitem : STRING''' - p[0] = (p[1][1:-1],None) + p[0] = (p[1][1:-1], None) + def p_item_string_expr(p): '''pitem : STRING expr''' - p[0] = (p[1][1:-1],p[2]) + p[0] = (p[1][1:-1], p[2]) + def p_item_expr(p): '''pitem : expr''' - p[0] = ("",p[1]) + p[0] = ("", p[1]) + +# Empty + -#### Empty - def p_empty(p): '''empty : ''' -#### Catastrophic error handler +# Catastrophic error handler + + def p_error(p): if not p: print("SYNTAX ERROR AT EOF") bparser = yacc.yacc() -def parse(data,debug=0): + +def parse(data, debug=0): bparser.error = 0 - p = bparser.parse(data,debug=debug) - if bparser.error: return None + p = bparser.parse(data, debug=debug) + if bparser.error: + return None return p - - - - - - - - - - - - - - diff --git a/ext/ply/example/GardenSnake/GardenSnake.py b/ext/ply/example/GardenSnake/GardenSnake.py index 2a7f45eb19..8b493b40dc 100644 --- a/ext/ply/example/GardenSnake/GardenSnake.py +++ b/ext/ply/example/GardenSnake/GardenSnake.py @@ -37,7 +37,7 @@ # Modifications for inclusion in PLY distribution import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") from ply import * ##### Lexer ###### @@ -69,18 +69,21 @@ tokens = ( 'INDENT', 'DEDENT', 'ENDMARKER', - ) +) #t_NUMBER = r'\d+' # taken from decmial.py but without the leading sign + + def t_NUMBER(t): r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" t.value = decimal.Decimal(t.value) return t + def t_STRING(t): r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... - t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun + t.value = t.value[1:-1].decode("string-escape") # .swapcase() # for fun return t t_COLON = r':' @@ -98,10 +101,11 @@ t_SEMICOLON = r';' # Ply nicely documented how to do this. RESERVED = { - "def": "DEF", - "if": "IF", - "return": "RETURN", - } + "def": "DEF", + "if": "IF", + "return": "RETURN", +} + def t_NAME(t): r'[a-zA-Z_][a-zA-Z0-9_]*' @@ -111,6 +115,8 @@ def t_NAME(t): # Putting this before t_WS let it consume lines with only comments in # them so the latter code never sees the WS part. Not consuming the # newline. Needed for "if 1: #comment" + + def t_comment(t): r"[ ]*\043[^\n]*" # \043 is '#' pass @@ -125,6 +131,8 @@ def t_WS(t): # Don't generate newline tokens when inside of parenthesis, eg # a = (1, # 2, 3) + + def t_newline(t): r'\n+' t.lexer.lineno += len(t.value) @@ -132,11 +140,13 @@ def t_newline(t): if t.lexer.paren_count == 0: return t + def t_LPAR(t): r'\(' t.lexer.paren_count += 1 return t + def t_RPAR(t): r'\)' # check for underflow? should be the job of the parser @@ -149,7 +159,7 @@ def t_error(t): print "Skipping", repr(t.value[0]) t.lexer.skip(1) -## I implemented INDENT / DEDENT generation as a post-processing filter +# I implemented INDENT / DEDENT generation as a post-processing filter # The original lex token stream contains WS and NEWLINE characters. # WS will only occur before any other tokens on a line. @@ -169,6 +179,8 @@ MAY_INDENT = 1 MUST_INDENT = 2 # only care about whitespace at the start of a line + + def track_tokens_filter(lexer, tokens): lexer.at_line_start = at_line_start = True indent = NO_INDENT @@ -180,7 +192,7 @@ def track_tokens_filter(lexer, tokens): at_line_start = False indent = MAY_INDENT token.must_indent = False - + elif token.type == "NEWLINE": at_line_start = True if indent == MAY_INDENT: @@ -204,6 +216,7 @@ def track_tokens_filter(lexer, tokens): yield token lexer.at_line_start = at_line_start + def _new_token(type, lineno): tok = lex.LexToken() tok.type = type @@ -212,10 +225,14 @@ def _new_token(type, lineno): return tok # Synthesize a DEDENT tag + + def DEDENT(lineno): return _new_token("DEDENT", lineno) # Synthesize an INDENT tag + + def INDENT(lineno): return _new_token("INDENT", lineno) @@ -228,14 +245,14 @@ def indentation_filter(tokens): depth = 0 prev_was_ws = False for token in tokens: -## if 1: -## print "Process", token, -## if token.at_line_start: -## print "at_line_start", -## if token.must_indent: -## print "must_indent", -## print - + # if 1: + # print "Process", token, + # if token.at_line_start: + # print "at_line_start", + # if token.must_indent: + # print "must_indent", + # print + # WS only occurs at the start of the line # There may be WS followed by NEWLINE so # only track the depth here. Don't indent/dedent @@ -274,14 +291,15 @@ def indentation_filter(tokens): # At the same level pass elif depth > levels[-1]: - raise IndentationError("indentation increase but not in new block") + raise IndentationError( + "indentation increase but not in new block") else: # Back up; but only if it matches a previous level try: i = levels.index(depth) except ValueError: raise IndentationError("inconsistent indentation") - for _ in range(i+1, len(levels)): + for _ in range(i + 1, len(levels)): yield DEDENT(token.lineno) levels.pop() @@ -294,11 +312,11 @@ def indentation_filter(tokens): assert token is not None for _ in range(1, len(levels)): yield DEDENT(token.lineno) - + # The top-level filter adds an ENDMARKER, if requested. # Python's grammar uses it. -def filter(lexer, add_endmarker = True): +def filter(lexer, add_endmarker=True): token = None tokens = iter(lexer.token, None) tokens = track_tokens_filter(lexer, tokens) @@ -313,14 +331,19 @@ def filter(lexer, add_endmarker = True): # Combine Ply and my filters into a new lexer + class IndentLexer(object): + def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): - self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags) + self.lexer = lex.lex(debug=debug, optimize=optimize, + lextab=lextab, reflags=reflags) self.token_stream = None + def input(self, s, add_endmarker=True): self.lexer.paren_count = 0 self.lexer.input(s) self.token_stream = filter(self.lexer, add_endmarker) + def token(self): try: return self.token_stream.next() @@ -336,6 +359,8 @@ class IndentLexer(object): from compiler import ast # Helper function + + def Assign(left, right): names = [] if isinstance(left, ast.Name): @@ -356,35 +381,39 @@ def Assign(left, right): # The grammar comments come from Python's Grammar/Grammar file -## NB: compound_stmt in single_input is followed by extra NEWLINE! +# NB: compound_stmt in single_input is followed by extra NEWLINE! # file_input: (NEWLINE | stmt)* ENDMARKER def p_file_input_end(p): """file_input_end : file_input ENDMARKER""" p[0] = ast.Stmt(p[1]) + + def p_file_input(p): """file_input : file_input NEWLINE | file_input stmt | NEWLINE | stmt""" - if isinstance(p[len(p)-1], basestring): + if isinstance(p[len(p) - 1], basestring): if len(p) == 3: p[0] = p[1] else: - p[0] = [] # p == 2 --> only a blank line + p[0] = [] # p == 2 --> only a blank line else: if len(p) == 3: p[0] = p[1] + p[2] else: p[0] = p[1] - + # funcdef: [decorators] 'def' NAME parameters ':' suite # ignoring decorators def p_funcdef(p): "funcdef : DEF NAME parameters COLON suite" p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) - + # parameters: '(' [varargslist] ')' + + def p_parameters(p): """parameters : LPAR RPAR | LPAR varargslist RPAR""" @@ -392,9 +421,9 @@ def p_parameters(p): p[0] = [] else: p[0] = p[2] - -# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | + +# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | # highly simplified def p_varargslist(p): """varargslist : varargslist COMMA NAME @@ -405,21 +434,27 @@ def p_varargslist(p): p[0] = [p[1]] # stmt: simple_stmt | compound_stmt + + def p_stmt_simple(p): """stmt : simple_stmt""" # simple_stmt is a list p[0] = p[1] - + + def p_stmt_compound(p): """stmt : compound_stmt""" p[0] = [p[1]] # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE + + def p_simple_stmt(p): """simple_stmt : small_stmts NEWLINE | small_stmts SEMICOLON NEWLINE""" p[0] = p[1] + def p_small_stmts(p): """small_stmts : small_stmts SEMICOLON small_stmt | small_stmt""" @@ -430,6 +465,8 @@ def p_small_stmts(p): # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | # import_stmt | global_stmt | exec_stmt | assert_stmt + + def p_small_stmt(p): """small_stmt : flow_stmt | expr_stmt""" @@ -439,6 +476,8 @@ def p_small_stmt(p): # ('=' (yield_expr|testlist))*) # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | # '<<=' | '>>=' | '**=' | '//=') + + def p_expr_stmt(p): """expr_stmt : testlist ASSIGN testlist | testlist """ @@ -448,11 +487,14 @@ def p_expr_stmt(p): else: p[0] = Assign(p[1], p[3]) + def p_flow_stmt(p): "flow_stmt : return_stmt" p[0] = p[1] # return_stmt: 'return' [testlist] + + def p_return_stmt(p): "return_stmt : RETURN testlist" p[0] = ast.Return(p[2]) @@ -463,10 +505,12 @@ def p_compound_stmt(p): | funcdef""" p[0] = p[1] + def p_if_stmt(p): 'if_stmt : IF test COLON suite' p[0] = ast.If([(p[2], p[4])], None) + def p_suite(p): """suite : simple_stmt | NEWLINE INDENT stmts DEDENT""" @@ -474,7 +518,7 @@ def p_suite(p): p[0] = ast.Stmt(p[1]) else: p[0] = ast.Stmt(p[3]) - + def p_stmts(p): """stmts : stmts stmt @@ -484,7 +528,7 @@ def p_stmts(p): else: p[0] = p[1] -## No using Python's approach because Ply supports precedence +# No using Python's approach because Ply supports precedence # comparison: expr (comp_op expr)* # arith_expr: term (('+'|'-') term)* @@ -492,12 +536,17 @@ def p_stmts(p): # factor: ('+'|'-'|'~') factor | power # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' + def make_lt_compare((left, right)): - return ast.Compare(left, [('<', right),]) + return ast.Compare(left, [('<', right), ]) + + def make_gt_compare((left, right)): - return ast.Compare(left, [('>', right),]) + return ast.Compare(left, [('>', right), ]) + + def make_eq_compare((left, right)): - return ast.Compare(left, [('==', right),]) + return ast.Compare(left, [('==', right), ]) binary_ops = { @@ -512,12 +561,13 @@ binary_ops = { unary_ops = { "+": ast.UnaryAdd, "-": ast.UnarySub, - } +} precedence = ( ("left", "EQ", "GT", "LT"), ("left", "PLUS", "MINUS"), ("left", "MULT", "DIV"), - ) +) + def p_comparison(p): """comparison : comparison PLUS comparison @@ -536,10 +586,12 @@ def p_comparison(p): p[0] = unary_ops[p[1]](p[2]) else: p[0] = p[1] - + # power: atom trailer* ['**' factor] # trailers enables function calls. I only allow one level of calls # so this is 'trailer' + + def p_power(p): """power : atom | atom trailer""" @@ -551,26 +603,33 @@ def p_power(p): else: raise AssertionError("not implemented") + def p_atom_name(p): """atom : NAME""" p[0] = ast.Name(p[1]) + def p_atom_number(p): """atom : NUMBER | STRING""" p[0] = ast.Const(p[1]) + def p_atom_tuple(p): """atom : LPAR testlist RPAR""" p[0] = p[2] # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME + + def p_trailer(p): "trailer : LPAR arglist RPAR" p[0] = ("CALL", p[2]) # testlist: test (',' test)* [','] # Contains shift/reduce error + + def p_testlist(p): """testlist : testlist_multi COMMA | testlist_multi """ @@ -586,6 +645,7 @@ def p_testlist(p): if isinstance(p[0], list): p[0] = ast.Tuple(p[0]) + def p_testlist_multi(p): """testlist_multi : testlist_multi COMMA test | test""" @@ -605,7 +665,6 @@ def p_testlist_multi(p): def p_test(p): "test : comparison" p[0] = p[1] - # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) @@ -619,17 +678,21 @@ def p_arglist(p): p[0] = [p[1]] # argument: test [gen_for] | test '=' test # Really [keyword '='] test + + def p_argument(p): "argument : test" p[0] = p[1] + def p_error(p): - #print "Error!", repr(p) + # print "Error!", repr(p) raise SyntaxError(p) class GardenSnakeParser(object): - def __init__(self, lexer = None): + + def __init__(self, lexer=None): if lexer is None: lexer = IndentLexer() self.lexer = lexer @@ -637,20 +700,23 @@ class GardenSnakeParser(object): def parse(self, code): self.lexer.input(code) - result = self.parser.parse(lexer = self.lexer) + result = self.parser.parse(lexer=self.lexer) return ast.Module(None, result) ###### Code generation ###### - + from compiler import misc, syntax, pycodegen + class GardenSnakeCompiler(object): + def __init__(self): self.parser = GardenSnakeParser() + def compile(self, code, filename=""): tree = self.parser.parse(code) - #print tree + # print tree misc.set_filename(filename, tree) syntax.check(tree) gen = pycodegen.ModuleCodeGenerator(tree) @@ -658,7 +724,7 @@ class GardenSnakeCompiler(object): return code ####### Test code ####### - + compile = GardenSnakeCompiler().compile code = r""" @@ -698,8 +764,10 @@ print('BIG DECIMAL', 1.234567891234567e12345) """ # Set up the GardenSnake run-time environment + + def print_(*args): - print "-->", " ".join(map(str,args)) + print "-->", " ".join(map(str, args)) globals()["print"] = print_ diff --git a/ext/ply/example/ansic/clex.py b/ext/ply/example/ansic/clex.py index 37fdd8e661..4bde1d730b 100644 --- a/ext/ply/example/ansic/clex.py +++ b/ext/ply/example/ansic/clex.py @@ -5,7 +5,7 @@ # ---------------------------------------------------------------------- import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") import ply.lex as lex @@ -15,10 +15,11 @@ reserved = ( 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', - ) +) tokens = reserved + ( - # Literals (identifier, integer constant, float constant, string constant, char const) + # Literals (identifier, integer constant, float constant, string constant, + # char const) 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) @@ -26,10 +27,10 @@ tokens = reserved + ( 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', 'LOR', 'LAND', 'LNOT', 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', - + # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', - 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', + 'LSHIFTEQUAL', 'RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', # Increment/decrement (++,--) 'PLUSPLUS', 'MINUSMINUS', @@ -39,7 +40,7 @@ tokens = reserved + ( # Conditional operator (?) 'CONDOP', - + # Delimeters ( ) [ ] { } , . ; : 'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', @@ -48,84 +49,87 @@ tokens = reserved + ( # Ellipsis (...) 'ELLIPSIS', - ) +) # Completely ignored characters -t_ignore = ' \t\x0c' +t_ignore = ' \t\x0c' # Newlines + + def t_NEWLINE(t): r'\n+' t.lexer.lineno += t.value.count("\n") - + # Operators -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_MOD = r'%' -t_OR = r'\|' -t_AND = r'&' -t_NOT = r'~' -t_XOR = r'\^' -t_LSHIFT = r'<<' -t_RSHIFT = r'>>' -t_LOR = r'\|\|' -t_LAND = r'&&' -t_LNOT = r'!' -t_LT = r'<' -t_GT = r'>' -t_LE = r'<=' -t_GE = r'>=' -t_EQ = r'==' -t_NE = r'!=' +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_MOD = r'%' +t_OR = r'\|' +t_AND = r'&' +t_NOT = r'~' +t_XOR = r'\^' +t_LSHIFT = r'<<' +t_RSHIFT = r'>>' +t_LOR = r'\|\|' +t_LAND = r'&&' +t_LNOT = r'!' +t_LT = r'<' +t_GT = r'>' +t_LE = r'<=' +t_GE = r'>=' +t_EQ = r'==' +t_NE = r'!=' # Assignment operators -t_EQUALS = r'=' -t_TIMESEQUAL = r'\*=' -t_DIVEQUAL = r'/=' -t_MODEQUAL = r'%=' -t_PLUSEQUAL = r'\+=' -t_MINUSEQUAL = r'-=' -t_LSHIFTEQUAL = r'<<=' -t_RSHIFTEQUAL = r'>>=' -t_ANDEQUAL = r'&=' -t_OREQUAL = r'\|=' -t_XOREQUAL = r'^=' +t_EQUALS = r'=' +t_TIMESEQUAL = r'\*=' +t_DIVEQUAL = r'/=' +t_MODEQUAL = r'%=' +t_PLUSEQUAL = r'\+=' +t_MINUSEQUAL = r'-=' +t_LSHIFTEQUAL = r'<<=' +t_RSHIFTEQUAL = r'>>=' +t_ANDEQUAL = r'&=' +t_OREQUAL = r'\|=' +t_XOREQUAL = r'\^=' # Increment/decrement -t_PLUSPLUS = r'\+\+' -t_MINUSMINUS = r'--' +t_PLUSPLUS = r'\+\+' +t_MINUSMINUS = r'--' # -> -t_ARROW = r'->' +t_ARROW = r'->' # ? -t_CONDOP = r'\?' +t_CONDOP = r'\?' # Delimeters -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LBRACKET = r'\[' -t_RBRACKET = r'\]' -t_LBRACE = r'\{' -t_RBRACE = r'\}' -t_COMMA = r',' -t_PERIOD = r'\.' -t_SEMI = r';' -t_COLON = r':' -t_ELLIPSIS = r'\.\.\.' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LBRACE = r'\{' +t_RBRACE = r'\}' +t_COMMA = r',' +t_PERIOD = r'\.' +t_SEMI = r';' +t_COLON = r':' +t_ELLIPSIS = r'\.\.\.' # Identifiers and reserved words -reserved_map = { } +reserved_map = {} for r in reserved: reserved_map[r.lower()] = r + def t_ID(t): r'[A-Za-z_][\w_]*' - t.type = reserved_map.get(t.value,"ID") + t.type = reserved_map.get(t.value, "ID") return t # Integer literal @@ -141,24 +145,24 @@ t_SCONST = r'\"([^\\\n]|(\\.))*?\"' t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' # Comments + + def t_comment(t): r'/\*(.|\n)*?\*/' t.lexer.lineno += t.value.count('\n') # Preprocessor directive (ignored) + + def t_preprocessor(t): r'\#(.)*?\n' t.lexer.lineno += 1 - + + def t_error(t): print("Illegal character %s" % repr(t.value[0])) t.lexer.skip(1) - -lexer = lex.lex(optimize=1) + +lexer = lex.lex() if __name__ == "__main__": lex.runmain(lexer) - - - - - diff --git a/ext/ply/example/ansic/cparse.py b/ext/ply/example/ansic/cparse.py index c9b9164555..5fe9bce042 100644 --- a/ext/ply/example/ansic/cparse.py +++ b/ext/ply/example/ansic/cparse.py @@ -13,88 +13,109 @@ tokens = clex.tokens # translation-unit: + def p_translation_unit_1(t): 'translation_unit : external_declaration' pass + def p_translation_unit_2(t): 'translation_unit : translation_unit external_declaration' pass # external-declaration: + def p_external_declaration_1(t): 'external_declaration : function_definition' pass + def p_external_declaration_2(t): 'external_declaration : declaration' pass # function-definition: + def p_function_definition_1(t): 'function_definition : declaration_specifiers declarator declaration_list compound_statement' pass + def p_function_definition_2(t): 'function_definition : declarator declaration_list compound_statement' pass + def p_function_definition_3(t): 'function_definition : declarator compound_statement' pass + def p_function_definition_4(t): 'function_definition : declaration_specifiers declarator compound_statement' pass # declaration: + def p_declaration_1(t): 'declaration : declaration_specifiers init_declarator_list SEMI' pass + def p_declaration_2(t): 'declaration : declaration_specifiers SEMI' pass # declaration-list: + def p_declaration_list_1(t): 'declaration_list : declaration' pass + def p_declaration_list_2(t): 'declaration_list : declaration_list declaration ' pass # declaration-specifiers + + def p_declaration_specifiers_1(t): 'declaration_specifiers : storage_class_specifier declaration_specifiers' pass + def p_declaration_specifiers_2(t): 'declaration_specifiers : type_specifier declaration_specifiers' pass + def p_declaration_specifiers_3(t): 'declaration_specifiers : type_qualifier declaration_specifiers' pass + def p_declaration_specifiers_4(t): 'declaration_specifiers : storage_class_specifier' pass + def p_declaration_specifiers_5(t): 'declaration_specifiers : type_specifier' pass + def p_declaration_specifiers_6(t): 'declaration_specifiers : type_qualifier' pass # storage-class-specifier + + def p_storage_class_specifier(t): '''storage_class_specifier : AUTO | REGISTER @@ -105,6 +126,8 @@ def p_storage_class_specifier(t): pass # type-specifier: + + def p_type_specifier(t): '''type_specifier : VOID | CHAR @@ -122,6 +145,8 @@ def p_type_specifier(t): pass # type-qualifier: + + def p_type_qualifier(t): '''type_qualifier : CONST | VOLATILE''' @@ -129,19 +154,24 @@ def p_type_qualifier(t): # struct-or-union-specifier + def p_struct_or_union_specifier_1(t): 'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE' pass + def p_struct_or_union_specifier_2(t): 'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE' pass + def p_struct_or_union_specifier_3(t): 'struct_or_union_specifier : struct_or_union ID' pass # struct-or-union: + + def p_struct_or_union(t): '''struct_or_union : STRUCT | UNION @@ -150,221 +180,273 @@ def p_struct_or_union(t): # struct-declaration-list: + def p_struct_declaration_list_1(t): 'struct_declaration_list : struct_declaration' pass + def p_struct_declaration_list_2(t): 'struct_declaration_list : struct_declaration_list struct_declaration' pass # init-declarator-list: + def p_init_declarator_list_1(t): 'init_declarator_list : init_declarator' pass + def p_init_declarator_list_2(t): 'init_declarator_list : init_declarator_list COMMA init_declarator' pass # init-declarator + def p_init_declarator_1(t): 'init_declarator : declarator' pass + def p_init_declarator_2(t): 'init_declarator : declarator EQUALS initializer' pass # struct-declaration: + def p_struct_declaration(t): 'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI' pass # specifier-qualifier-list: + def p_specifier_qualifier_list_1(t): 'specifier_qualifier_list : type_specifier specifier_qualifier_list' pass + def p_specifier_qualifier_list_2(t): 'specifier_qualifier_list : type_specifier' pass + def p_specifier_qualifier_list_3(t): 'specifier_qualifier_list : type_qualifier specifier_qualifier_list' pass + def p_specifier_qualifier_list_4(t): 'specifier_qualifier_list : type_qualifier' pass # struct-declarator-list: + def p_struct_declarator_list_1(t): 'struct_declarator_list : struct_declarator' pass + def p_struct_declarator_list_2(t): 'struct_declarator_list : struct_declarator_list COMMA struct_declarator' pass # struct-declarator: + def p_struct_declarator_1(t): 'struct_declarator : declarator' pass + def p_struct_declarator_2(t): 'struct_declarator : declarator COLON constant_expression' pass + def p_struct_declarator_3(t): 'struct_declarator : COLON constant_expression' pass # enum-specifier: + def p_enum_specifier_1(t): 'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE' pass + def p_enum_specifier_2(t): 'enum_specifier : ENUM LBRACE enumerator_list RBRACE' pass + def p_enum_specifier_3(t): 'enum_specifier : ENUM ID' pass # enumerator_list: + + def p_enumerator_list_1(t): 'enumerator_list : enumerator' pass + def p_enumerator_list_2(t): 'enumerator_list : enumerator_list COMMA enumerator' pass # enumerator: + + def p_enumerator_1(t): 'enumerator : ID' pass + def p_enumerator_2(t): 'enumerator : ID EQUALS constant_expression' pass # declarator: + def p_declarator_1(t): 'declarator : pointer direct_declarator' pass + def p_declarator_2(t): 'declarator : direct_declarator' pass # direct-declarator: + def p_direct_declarator_1(t): 'direct_declarator : ID' pass + def p_direct_declarator_2(t): 'direct_declarator : LPAREN declarator RPAREN' pass + def p_direct_declarator_3(t): 'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET' pass + def p_direct_declarator_4(t): 'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN ' pass + def p_direct_declarator_5(t): 'direct_declarator : direct_declarator LPAREN identifier_list RPAREN ' pass + def p_direct_declarator_6(t): 'direct_declarator : direct_declarator LPAREN RPAREN ' pass # pointer: + + def p_pointer_1(t): 'pointer : TIMES type_qualifier_list' pass + def p_pointer_2(t): 'pointer : TIMES' pass + def p_pointer_3(t): 'pointer : TIMES type_qualifier_list pointer' pass + def p_pointer_4(t): 'pointer : TIMES pointer' pass # type-qualifier-list: + def p_type_qualifier_list_1(t): 'type_qualifier_list : type_qualifier' pass + def p_type_qualifier_list_2(t): 'type_qualifier_list : type_qualifier_list type_qualifier' pass # parameter-type-list: + def p_parameter_type_list_1(t): 'parameter_type_list : parameter_list' pass + def p_parameter_type_list_2(t): 'parameter_type_list : parameter_list COMMA ELLIPSIS' pass # parameter-list: + def p_parameter_list_1(t): 'parameter_list : parameter_declaration' pass + def p_parameter_list_2(t): 'parameter_list : parameter_list COMMA parameter_declaration' pass # parameter-declaration: + + def p_parameter_declaration_1(t): 'parameter_declaration : declaration_specifiers declarator' pass + def p_parameter_declaration_2(t): 'parameter_declaration : declaration_specifiers abstract_declarator_opt' pass # identifier-list: + + def p_identifier_list_1(t): 'identifier_list : ID' pass + def p_identifier_list_2(t): 'identifier_list : identifier_list COMMA ID' pass # initializer: + def p_initializer_1(t): 'initializer : assignment_expression' pass + def p_initializer_2(t): '''initializer : LBRACE initializer_list RBRACE | LBRACE initializer_list COMMA RBRACE''' @@ -372,84 +454,102 @@ def p_initializer_2(t): # initializer-list: + def p_initializer_list_1(t): 'initializer_list : initializer' pass + def p_initializer_list_2(t): 'initializer_list : initializer_list COMMA initializer' pass # type-name: + def p_type_name(t): 'type_name : specifier_qualifier_list abstract_declarator_opt' pass + def p_abstract_declarator_opt_1(t): 'abstract_declarator_opt : empty' pass + def p_abstract_declarator_opt_2(t): 'abstract_declarator_opt : abstract_declarator' pass # abstract-declarator: + def p_abstract_declarator_1(t): 'abstract_declarator : pointer ' pass + def p_abstract_declarator_2(t): 'abstract_declarator : pointer direct_abstract_declarator' pass + def p_abstract_declarator_3(t): 'abstract_declarator : direct_abstract_declarator' pass # direct-abstract-declarator: + def p_direct_abstract_declarator_1(t): 'direct_abstract_declarator : LPAREN abstract_declarator RPAREN' pass + def p_direct_abstract_declarator_2(t): 'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET' pass + def p_direct_abstract_declarator_3(t): 'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET' pass + def p_direct_abstract_declarator_4(t): 'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN' pass + def p_direct_abstract_declarator_5(t): 'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN' pass # Optional fields in abstract declarators + def p_constant_expression_opt_1(t): 'constant_expression_opt : empty' pass + def p_constant_expression_opt_2(t): 'constant_expression_opt : constant_expression' pass + def p_parameter_type_list_opt_1(t): 'parameter_type_list_opt : empty' pass + def p_parameter_type_list_opt_2(t): 'parameter_type_list_opt : parameter_type_list' pass # statement: + def p_statement(t): ''' statement : labeled_statement @@ -463,124 +563,155 @@ def p_statement(t): # labeled-statement: + def p_labeled_statement_1(t): 'labeled_statement : ID COLON statement' pass + def p_labeled_statement_2(t): 'labeled_statement : CASE constant_expression COLON statement' pass + def p_labeled_statement_3(t): 'labeled_statement : DEFAULT COLON statement' pass # expression-statement: + + def p_expression_statement(t): 'expression_statement : expression_opt SEMI' pass # compound-statement: + def p_compound_statement_1(t): 'compound_statement : LBRACE declaration_list statement_list RBRACE' pass + def p_compound_statement_2(t): 'compound_statement : LBRACE statement_list RBRACE' pass + def p_compound_statement_3(t): 'compound_statement : LBRACE declaration_list RBRACE' pass + def p_compound_statement_4(t): 'compound_statement : LBRACE RBRACE' pass # statement-list: + def p_statement_list_1(t): 'statement_list : statement' pass + def p_statement_list_2(t): 'statement_list : statement_list statement' pass # selection-statement + def p_selection_statement_1(t): 'selection_statement : IF LPAREN expression RPAREN statement' pass + def p_selection_statement_2(t): 'selection_statement : IF LPAREN expression RPAREN statement ELSE statement ' pass + def p_selection_statement_3(t): 'selection_statement : SWITCH LPAREN expression RPAREN statement ' pass # iteration_statement: + def p_iteration_statement_1(t): 'iteration_statement : WHILE LPAREN expression RPAREN statement' pass + def p_iteration_statement_2(t): 'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement ' pass + def p_iteration_statement_3(t): 'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI' pass # jump_statement: + def p_jump_statement_1(t): 'jump_statement : GOTO ID SEMI' pass + def p_jump_statement_2(t): 'jump_statement : CONTINUE SEMI' pass + def p_jump_statement_3(t): 'jump_statement : BREAK SEMI' pass + def p_jump_statement_4(t): 'jump_statement : RETURN expression_opt SEMI' pass + def p_expression_opt_1(t): 'expression_opt : empty' pass + def p_expression_opt_2(t): 'expression_opt : expression' pass # expression: + + def p_expression_1(t): 'expression : assignment_expression' pass + def p_expression_2(t): 'expression : expression COMMA assignment_expression' pass # assigment_expression: + + def p_assignment_expression_1(t): 'assignment_expression : conditional_expression' pass + def p_assignment_expression_2(t): 'assignment_expression : unary_expression assignment_operator assignment_expression' pass # assignment_operator: + + def p_assignment_operator(t): ''' assignment_operator : EQUALS @@ -598,66 +729,80 @@ def p_assignment_operator(t): pass # conditional-expression + + def p_conditional_expression_1(t): 'conditional_expression : logical_or_expression' pass + def p_conditional_expression_2(t): 'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression ' pass # constant-expression + def p_constant_expression(t): 'constant_expression : conditional_expression' pass # logical-or-expression + def p_logical_or_expression_1(t): 'logical_or_expression : logical_and_expression' pass + def p_logical_or_expression_2(t): 'logical_or_expression : logical_or_expression LOR logical_and_expression' pass # logical-and-expression + def p_logical_and_expression_1(t): 'logical_and_expression : inclusive_or_expression' pass + def p_logical_and_expression_2(t): 'logical_and_expression : logical_and_expression LAND inclusive_or_expression' pass # inclusive-or-expression: + def p_inclusive_or_expression_1(t): 'inclusive_or_expression : exclusive_or_expression' pass + def p_inclusive_or_expression_2(t): 'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression' pass # exclusive-or-expression: + def p_exclusive_or_expression_1(t): 'exclusive_or_expression : and_expression' pass + def p_exclusive_or_expression_2(t): 'exclusive_or_expression : exclusive_or_expression XOR and_expression' pass # AND-expression + def p_and_expression_1(t): 'and_expression : equality_expression' pass + def p_and_expression_2(t): 'and_expression : and_expression AND equality_expression' pass @@ -668,10 +813,12 @@ def p_equality_expression_1(t): 'equality_expression : relational_expression' pass + def p_equality_expression_2(t): 'equality_expression : equality_expression EQ relational_expression' pass + def p_equality_expression_3(t): 'equality_expression : equality_expression NE relational_expression' pass @@ -682,104 +829,129 @@ def p_relational_expression_1(t): 'relational_expression : shift_expression' pass + def p_relational_expression_2(t): 'relational_expression : relational_expression LT shift_expression' pass + def p_relational_expression_3(t): 'relational_expression : relational_expression GT shift_expression' pass + def p_relational_expression_4(t): 'relational_expression : relational_expression LE shift_expression' pass + def p_relational_expression_5(t): 'relational_expression : relational_expression GE shift_expression' pass # shift-expression + def p_shift_expression_1(t): 'shift_expression : additive_expression' pass + def p_shift_expression_2(t): 'shift_expression : shift_expression LSHIFT additive_expression' pass + def p_shift_expression_3(t): 'shift_expression : shift_expression RSHIFT additive_expression' pass # additive-expression + def p_additive_expression_1(t): 'additive_expression : multiplicative_expression' pass + def p_additive_expression_2(t): 'additive_expression : additive_expression PLUS multiplicative_expression' pass + def p_additive_expression_3(t): 'additive_expression : additive_expression MINUS multiplicative_expression' pass # multiplicative-expression + def p_multiplicative_expression_1(t): 'multiplicative_expression : cast_expression' pass + def p_multiplicative_expression_2(t): 'multiplicative_expression : multiplicative_expression TIMES cast_expression' pass + def p_multiplicative_expression_3(t): 'multiplicative_expression : multiplicative_expression DIVIDE cast_expression' pass + def p_multiplicative_expression_4(t): 'multiplicative_expression : multiplicative_expression MOD cast_expression' pass # cast-expression: + def p_cast_expression_1(t): 'cast_expression : unary_expression' pass + def p_cast_expression_2(t): 'cast_expression : LPAREN type_name RPAREN cast_expression' pass # unary-expression: + + def p_unary_expression_1(t): 'unary_expression : postfix_expression' pass + def p_unary_expression_2(t): 'unary_expression : PLUSPLUS unary_expression' pass + def p_unary_expression_3(t): 'unary_expression : MINUSMINUS unary_expression' pass + def p_unary_expression_4(t): 'unary_expression : unary_operator cast_expression' pass + def p_unary_expression_5(t): 'unary_expression : SIZEOF unary_expression' pass + def p_unary_expression_6(t): 'unary_expression : SIZEOF LPAREN type_name RPAREN' pass - -#unary-operator + +# unary-operator + + def p_unary_operator(t): '''unary_operator : AND | TIMES @@ -790,39 +962,50 @@ def p_unary_operator(t): pass # postfix-expression: + + def p_postfix_expression_1(t): 'postfix_expression : primary_expression' pass + def p_postfix_expression_2(t): 'postfix_expression : postfix_expression LBRACKET expression RBRACKET' pass + def p_postfix_expression_3(t): 'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN' pass + def p_postfix_expression_4(t): 'postfix_expression : postfix_expression LPAREN RPAREN' pass + def p_postfix_expression_5(t): 'postfix_expression : postfix_expression PERIOD ID' pass + def p_postfix_expression_6(t): 'postfix_expression : postfix_expression ARROW ID' pass + def p_postfix_expression_7(t): 'postfix_expression : postfix_expression PLUSPLUS' pass + def p_postfix_expression_8(t): 'postfix_expression : postfix_expression MINUSMINUS' pass # primary-expression: + + def p_primary_expression(t): '''primary_expression : ID | constant @@ -831,33 +1014,35 @@ def p_primary_expression(t): pass # argument-expression-list: + + def p_argument_expression_list(t): '''argument_expression_list : assignment_expression | argument_expression_list COMMA assignment_expression''' pass # constant: -def p_constant(t): - '''constant : ICONST - | FCONST - | CCONST''' - pass + + +def p_constant(t): + '''constant : ICONST + | FCONST + | CCONST''' + pass def p_empty(t): 'empty : ' pass + def p_error(t): print("Whoa. We're hosed") import profile # Build the grammar -yacc.yacc(method='LALR') +yacc.yacc() +#yacc.yacc(method='LALR',write_tables=False,debug=False) #profile.run("yacc.yacc(method='LALR')") - - - - diff --git a/ext/ply/example/calc/calc.py b/ext/ply/example/calc/calc.py index b92378043a..824c3d7d0a 100644 --- a/ext/ply/example/calc/calc.py +++ b/ext/ply/example/calc/calc.py @@ -6,20 +6,21 @@ # ----------------------------------------------------------------------------- import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") if sys.version_info[0] >= 3: raw_input = input tokens = ( - 'NAME','NUMBER', - ) + 'NAME', 'NUMBER', +) -literals = ['=','+','-','*','/', '(',')'] +literals = ['=', '+', '-', '*', '/', '(', ')'] # Tokens -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + def t_NUMBER(t): r'\d+' @@ -28,14 +29,16 @@ def t_NUMBER(t): t_ignore = " \t" + def t_newline(t): r'\n+' t.lexer.lineno += t.value.count("\n") - + + def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) - + # Build the lexer import ply.lex as lex lex.lex() @@ -43,44 +46,55 @@ lex.lex() # Parsing rules precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) + ('left', '+', '-'), + ('left', '*', '/'), + ('right', 'UMINUS'), +) # dictionary of names -names = { } +names = {} + def p_statement_assign(p): 'statement : NAME "=" expression' names[p[1]] = p[3] + def p_statement_expr(p): 'statement : expression' print(p[1]) + def p_expression_binop(p): '''expression : expression '+' expression | expression '-' expression | expression '*' expression | expression '/' expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + def p_expression_uminus(p): "expression : '-' expression %prec UMINUS" p[0] = -p[2] + def p_expression_group(p): "expression : '(' expression ')'" p[0] = p[2] + def p_expression_number(p): "expression : NUMBER" p[0] = p[1] + def p_expression_name(p): "expression : NAME" try: @@ -89,6 +103,7 @@ def p_expression_name(p): print("Undefined name '%s'" % p[1]) p[0] = 0 + def p_error(p): if p: print("Syntax error at '%s'" % p.value) @@ -103,5 +118,6 @@ while 1: s = raw_input('calc > ') except EOFError: break - if not s: continue + if not s: + continue yacc.parse(s) diff --git a/ext/ply/example/calcdebug/calc.py b/ext/ply/example/calcdebug/calc.py index 6732f9f329..06831e2ca5 100644 --- a/ext/ply/example/calcdebug/calc.py +++ b/ext/ply/example/calcdebug/calc.py @@ -6,20 +6,21 @@ # ----------------------------------------------------------------------------- import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") if sys.version_info[0] >= 3: raw_input = input tokens = ( - 'NAME','NUMBER', - ) + 'NAME', 'NUMBER', +) -literals = ['=','+','-','*','/', '(',')'] +literals = ['=', '+', '-', '*', '/', '(', ')'] # Tokens -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + def t_NUMBER(t): r'\d+' @@ -28,14 +29,16 @@ def t_NUMBER(t): t_ignore = " \t" + def t_newline(t): r'\n+' t.lexer.lineno += t.value.count("\n") - + + def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) - + # Build the lexer import ply.lex as lex lex.lex() @@ -43,44 +46,55 @@ lex.lex() # Parsing rules precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) + ('left', '+', '-'), + ('left', '*', '/'), + ('right', 'UMINUS'), +) # dictionary of names -names = { } +names = {} + def p_statement_assign(p): 'statement : NAME "=" expression' names[p[1]] = p[3] + def p_statement_expr(p): 'statement : expression' print(p[1]) + def p_expression_binop(p): '''expression : expression '+' expression | expression '-' expression | expression '*' expression | expression '/' expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + def p_expression_uminus(p): "expression : '-' expression %prec UMINUS" p[0] = -p[2] + def p_expression_group(p): "expression : '(' expression ')'" p[0] = p[2] + def p_expression_number(p): "expression : NUMBER" p[0] = p[1] + def p_expression_name(p): "expression : NAME" try: @@ -89,6 +103,7 @@ def p_expression_name(p): print("Undefined name '%s'" % p[1]) p[0] = 0 + def p_error(p): if p: print("Syntax error at '%s'" % p.value) @@ -109,5 +124,6 @@ while 1: s = raw_input('calc > ') except EOFError: break - if not s: continue - yacc.parse(s,debug=logging.getLogger()) + if not s: + continue + yacc.parse(s, debug=logging.getLogger()) diff --git a/ext/ply/example/calceof/calc.py b/ext/ply/example/calceof/calc.py new file mode 100644 index 0000000000..22b39a41a8 --- /dev/null +++ b/ext/ply/example/calceof/calc.py @@ -0,0 +1,132 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. Asks the user for more input and +# demonstrates the use of the t_eof() rule. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0, "../..") + +if sys.version_info[0] >= 3: + raw_input = input + +tokens = ( + 'NAME', 'NUMBER', +) + +literals = ['=', '+', '-', '*', '/', '(', ')'] + +# Tokens + +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + +def t_NUMBER(t): + r'\d+' + t.value = int(t.value) + return t + +t_ignore = " \t" + + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + +def t_eof(t): + more = raw_input('... ') + if more: + t.lexer.input(more + '\n') + return t.lexer.token() + else: + return None + + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import ply.lex as lex +lex.lex() + +# Parsing rules + +precedence = ( + ('left', '+', '-'), + ('left', '*', '/'), + ('right', 'UMINUS'), +) + +# dictionary of names +names = {} + + +def p_statement_assign(p): + 'statement : NAME "=" expression' + names[p[1]] = p[3] + + +def p_statement_expr(p): + 'statement : expression' + print(p[1]) + + +def p_expression_binop(p): + '''expression : expression '+' expression + | expression '-' expression + | expression '*' expression + | expression '/' expression''' + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + + +def p_expression_uminus(p): + "expression : '-' expression %prec UMINUS" + p[0] = -p[2] + + +def p_expression_group(p): + "expression : '(' expression ')'" + p[0] = p[2] + + +def p_expression_number(p): + "expression : NUMBER" + p[0] = p[1] + + +def p_expression_name(p): + "expression : NAME" + try: + p[0] = names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + + +def p_error(p): + if p: + print("Syntax error at '%s'" % p.value) + else: + print("Syntax error at EOF") + +import ply.yacc as yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: + continue + yacc.parse(s + '\n') diff --git a/ext/ply/example/classcalc/calc.py b/ext/ply/example/classcalc/calc.py index b0712fc452..ada4afd426 100755 --- a/ext/ply/example/classcalc/calc.py +++ b/ext/ply/example/classcalc/calc.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # ----------------------------------------------------------------------------- # calc.py @@ -10,7 +10,7 @@ # ----------------------------------------------------------------------------- import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") if sys.version_info[0] >= 3: raw_input = input @@ -19,6 +19,7 @@ import ply.lex as lex import ply.yacc as yacc import os + class Parser: """ Base class for a lexer/parser that has the rules defined as methods @@ -28,14 +29,15 @@ class Parser: def __init__(self, **kw): self.debug = kw.get('debug', 0) - self.names = { } + self.names = {} try: - modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ + modname = os.path.split(os.path.splitext(__file__)[0])[ + 1] + "_" + self.__class__.__name__ except: - modname = "parser"+"_"+self.__class__.__name__ + modname = "parser" + "_" + self.__class__.__name__ self.debugfile = modname + ".dbg" self.tabmodule = modname + "_" + "parsetab" - #print self.debugfile, self.tabmodule + # print self.debugfile, self.tabmodule # Build the lexer and parser lex.lex(module=self, debug=self.debug) @@ -50,29 +52,30 @@ class Parser: s = raw_input('calc > ') except EOFError: break - if not s: continue + if not s: + continue yacc.parse(s) - + class Calc(Parser): tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) + 'NAME', 'NUMBER', + 'PLUS', 'MINUS', 'EXP', 'TIMES', 'DIVIDE', 'EQUALS', + 'LPAREN', 'RPAREN', + ) # Tokens - t_PLUS = r'\+' - t_MINUS = r'-' - t_EXP = r'\*\*' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + t_PLUS = r'\+' + t_MINUS = r'-' + t_EXP = r'\*\*' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' def t_NUMBER(self, t): r'\d+' @@ -81,7 +84,7 @@ class Calc(Parser): except ValueError: print("Integer value too large %s" % t.value) t.value = 0 - #print "parsed number %s" % repr(t.value) + # print "parsed number %s" % repr(t.value) return t t_ignore = " \t" @@ -89,7 +92,7 @@ class Calc(Parser): def t_newline(self, t): r'\n+' t.lexer.lineno += t.value.count("\n") - + def t_error(self, t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) @@ -97,11 +100,11 @@ class Calc(Parser): # Parsing rules precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), ('left', 'EXP'), - ('right','UMINUS'), - ) + ('right', 'UMINUS'), + ) def p_statement_assign(self, p): 'statement : NAME EQUALS expression' @@ -119,12 +122,17 @@ class Calc(Parser): | expression DIVIDE expression | expression EXP expression """ - #print [repr(p[i]) for i in range(0,4)] - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - elif p[2] == '**': p[0] = p[1] ** p[3] + # print [repr(p[i]) for i in range(0,4)] + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + elif p[2] == '**': + p[0] = p[1] ** p[3] def p_expression_uminus(self, p): 'expression : MINUS expression %prec UMINUS' diff --git a/ext/ply/example/closurecalc/calc.py b/ext/ply/example/closurecalc/calc.py index 6598f5844f..6031b05813 100644 --- a/ext/ply/example/closurecalc/calc.py +++ b/ext/ply/example/closurecalc/calc.py @@ -2,37 +2,38 @@ # calc.py # # A calculator parser that makes use of closures. The function make_calculator() -# returns a function that accepts an input string and returns a result. All +# returns a function that accepts an input string and returns a result. All # lexing rules, parsing rules, and internal state are held inside the function. # ----------------------------------------------------------------------------- import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") if sys.version_info[0] >= 3: raw_input = input # Make a calculator function + def make_calculator(): import ply.lex as lex import ply.yacc as yacc # ------- Internal calculator state - variables = { } # Dictionary of stored variables + variables = {} # Dictionary of stored variables # ------- Calculator tokenizing rules tokens = ( - 'NAME','NUMBER', + 'NAME', 'NUMBER', ) - literals = ['=','+','-','*','/', '(',')'] + literals = ['=', '+', '-', '*', '/', '(', ')'] t_ignore = " \t" - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' def t_NUMBER(t): r'\d+' @@ -42,20 +43,20 @@ def make_calculator(): def t_newline(t): r'\n+' t.lexer.lineno += t.value.count("\n") - + def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) - + # Build the lexer lexer = lex.lex() # ------- Calculator parsing rules precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), + ('left', '+', '-'), + ('left', '*', '/'), + ('right', 'UMINUS'), ) def p_statement_assign(p): @@ -72,10 +73,14 @@ def make_calculator(): | expression '-' expression | expression '*' expression | expression '/' expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] def p_expression_uminus(p): "expression : '-' expression %prec UMINUS" @@ -103,14 +108,13 @@ def make_calculator(): else: print("Syntax error at EOF") - # Build the parser parser = yacc.yacc() - # ------- Input function - + # ------- Input function + def input(text): - result = parser.parse(text,lexer=lexer) + result = parser.parse(text, lexer=lexer) return result return input @@ -126,5 +130,3 @@ while True: r = calc(s) if r: print(r) - - diff --git a/ext/ply/example/hedit/hedit.py b/ext/ply/example/hedit/hedit.py index 2e80675f99..32da745677 100644 --- a/ext/ply/example/hedit/hedit.py +++ b/ext/ply/example/hedit/hedit.py @@ -15,34 +15,34 @@ # ----------------------------------------------------------------------------- import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") tokens = ( 'H_EDIT_DESCRIPTOR', - ) +) # Tokens t_ignore = " \t\n" + def t_H_EDIT_DESCRIPTOR(t): r"\d+H.*" # This grabs all of the remaining text i = t.value.index('H') n = eval(t.value[:i]) - + # Adjust the tokenizing position - t.lexer.lexpos -= len(t.value) - (i+1+n) - - t.value = t.value[i+1:i+1+n] - return t - + t.lexer.lexpos -= len(t.value) - (i + 1 + n) + + t.value = t.value[i + 1:i + 1 + n] + return t + + def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) - + # Build the lexer import ply.lex as lex lex.lex() lex.runmain() - - diff --git a/ext/ply/example/newclasscalc/calc.py b/ext/ply/example/newclasscalc/calc.py index 5a8db84fc1..43c9506a8a 100755 --- a/ext/ply/example/newclasscalc/calc.py +++ b/ext/ply/example/newclasscalc/calc.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # ----------------------------------------------------------------------------- # calc.py @@ -12,7 +12,7 @@ # ----------------------------------------------------------------------------- import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") if sys.version_info[0] >= 3: raw_input = input @@ -21,6 +21,7 @@ import ply.lex as lex import ply.yacc as yacc import os + class Parser(object): """ Base class for a lexer/parser that has the rules defined as methods @@ -28,17 +29,17 @@ class Parser(object): tokens = () precedence = () - def __init__(self, **kw): self.debug = kw.get('debug', 0) - self.names = { } + self.names = {} try: - modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ + modname = os.path.split(os.path.splitext(__file__)[0])[ + 1] + "_" + self.__class__.__name__ except: - modname = "parser"+"_"+self.__class__.__name__ + modname = "parser" + "_" + self.__class__.__name__ self.debugfile = modname + ".dbg" self.tabmodule = modname + "_" + "parsetab" - #print self.debugfile, self.tabmodule + # print self.debugfile, self.tabmodule # Build the lexer and parser lex.lex(module=self, debug=self.debug) @@ -53,29 +54,30 @@ class Parser(object): s = raw_input('calc > ') except EOFError: break - if not s: continue + if not s: + continue yacc.parse(s) - + class Calc(Parser): tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) + 'NAME', 'NUMBER', + 'PLUS', 'MINUS', 'EXP', 'TIMES', 'DIVIDE', 'EQUALS', + 'LPAREN', 'RPAREN', + ) # Tokens - t_PLUS = r'\+' - t_MINUS = r'-' - t_EXP = r'\*\*' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + t_PLUS = r'\+' + t_MINUS = r'-' + t_EXP = r'\*\*' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' def t_NUMBER(self, t): r'\d+' @@ -84,7 +86,7 @@ class Calc(Parser): except ValueError: print("Integer value too large %s" % t.value) t.value = 0 - #print "parsed number %s" % repr(t.value) + # print "parsed number %s" % repr(t.value) return t t_ignore = " \t" @@ -92,7 +94,7 @@ class Calc(Parser): def t_newline(self, t): r'\n+' t.lexer.lineno += t.value.count("\n") - + def t_error(self, t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) @@ -100,11 +102,11 @@ class Calc(Parser): # Parsing rules precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), ('left', 'EXP'), - ('right','UMINUS'), - ) + ('right', 'UMINUS'), + ) def p_statement_assign(self, p): 'statement : NAME EQUALS expression' @@ -122,12 +124,17 @@ class Calc(Parser): | expression DIVIDE expression | expression EXP expression """ - #print [repr(p[i]) for i in range(0,4)] - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - elif p[2] == '**': p[0] = p[1] ** p[3] + # print [repr(p[i]) for i in range(0,4)] + if p[2] == '+': + p[0] = p[1] + p[3] + elif p[2] == '-': + p[0] = p[1] - p[3] + elif p[2] == '*': + p[0] = p[1] * p[3] + elif p[2] == '/': + p[0] = p[1] / p[3] + elif p[2] == '**': + p[0] = p[1] ** p[3] def p_expression_uminus(self, p): 'expression : MINUS expression %prec UMINUS' diff --git a/ext/ply/example/optcalc/calc.py b/ext/ply/example/optcalc/calc.py index dd83351a0c..0c223e5994 100644 --- a/ext/ply/example/optcalc/calc.py +++ b/ext/ply/example/optcalc/calc.py @@ -6,27 +6,28 @@ # ----------------------------------------------------------------------------- import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") if sys.version_info[0] >= 3: raw_input = input tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) + 'NAME', 'NUMBER', + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'EQUALS', + 'LPAREN', 'RPAREN', +) # Tokens -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + def t_NUMBER(t): r'\d+' @@ -39,14 +40,16 @@ def t_NUMBER(t): t_ignore = " \t" + def t_newline(t): r'\n+' t.lexer.lineno += t.value.count("\n") - + + def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) - + # Build the lexer import ply.lex as lex lex.lex(optimize=1) @@ -54,45 +57,57 @@ lex.lex(optimize=1) # Parsing rules precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), +) # dictionary of names -names = { } +names = {} + def p_statement_assign(t): 'statement : NAME EQUALS expression' names[t[1]] = t[3] + def p_statement_expr(t): 'statement : expression' print(t[1]) + def p_expression_binop(t): '''expression : expression PLUS expression | expression MINUS expression | expression TIMES expression | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - elif t[2] == '<': t[0] = t[1] < t[3] + if t[2] == '+': + t[0] = t[1] + t[3] + elif t[2] == '-': + t[0] = t[1] - t[3] + elif t[2] == '*': + t[0] = t[1] * t[3] + elif t[2] == '/': + t[0] = t[1] / t[3] + elif t[2] == '<': + t[0] = t[1] < t[3] + def p_expression_uminus(t): 'expression : MINUS expression %prec UMINUS' t[0] = -t[2] + def p_expression_group(t): 'expression : LPAREN expression RPAREN' t[0] = t[2] + def p_expression_number(t): 'expression : NUMBER' t[0] = t[1] + def p_expression_name(t): 'expression : NAME' try: @@ -101,6 +116,7 @@ def p_expression_name(t): print("Undefined name '%s'" % t[1]) t[0] = 0 + def p_error(t): if t: print("Syntax error at '%s'" % t.value) @@ -116,4 +132,3 @@ while 1: except EOFError: break yacc.parse(s) - diff --git a/ext/ply/example/unicalc/calc.py b/ext/ply/example/unicalc/calc.py index 55fb48df45..901c4b9d76 100644 --- a/ext/ply/example/unicalc/calc.py +++ b/ext/ply/example/unicalc/calc.py @@ -8,24 +8,25 @@ # ----------------------------------------------------------------------------- import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) + 'NAME', 'NUMBER', + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'EQUALS', + 'LPAREN', 'RPAREN', +) # Tokens -t_PLUS = ur'\+' -t_MINUS = ur'-' -t_TIMES = ur'\*' -t_DIVIDE = ur'/' -t_EQUALS = ur'=' -t_LPAREN = ur'\(' -t_RPAREN = ur'\)' -t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*' +t_PLUS = ur'\+' +t_MINUS = ur'-' +t_TIMES = ur'\*' +t_DIVIDE = ur'/' +t_EQUALS = ur'=' +t_LPAREN = ur'\(' +t_RPAREN = ur'\)' +t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*' + def t_NUMBER(t): ur'\d+' @@ -38,14 +39,16 @@ def t_NUMBER(t): t_ignore = u" \t" + def t_newline(t): ur'\n+' t.lexer.lineno += t.value.count("\n") - + + def t_error(t): print "Illegal character '%s'" % t.value[0] t.lexer.skip(1) - + # Build the lexer import ply.lex as lex lex.lex() @@ -53,44 +56,55 @@ lex.lex() # Parsing rules precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), +) # dictionary of names -names = { } +names = {} + def p_statement_assign(p): 'statement : NAME EQUALS expression' names[p[1]] = p[3] + def p_statement_expr(p): 'statement : expression' print p[1] + def p_expression_binop(p): '''expression : expression PLUS expression | expression MINUS expression | expression TIMES expression | expression DIVIDE expression''' - if p[2] == u'+' : p[0] = p[1] + p[3] - elif p[2] == u'-': p[0] = p[1] - p[3] - elif p[2] == u'*': p[0] = p[1] * p[3] - elif p[2] == u'/': p[0] = p[1] / p[3] + if p[2] == u'+': + p[0] = p[1] + p[3] + elif p[2] == u'-': + p[0] = p[1] - p[3] + elif p[2] == u'*': + p[0] = p[1] * p[3] + elif p[2] == u'/': + p[0] = p[1] / p[3] + def p_expression_uminus(p): 'expression : MINUS expression %prec UMINUS' p[0] = -p[2] + def p_expression_group(p): 'expression : LPAREN expression RPAREN' p[0] = p[2] + def p_expression_number(p): 'expression : NUMBER' p[0] = p[1] + def p_expression_name(p): 'expression : NAME' try: @@ -99,6 +113,7 @@ def p_expression_name(p): print "Undefined name '%s'" % p[1] p[0] = 0 + def p_error(p): if p: print "Syntax error at '%s'" % p.value @@ -113,5 +128,6 @@ while 1: s = raw_input('calc > ') except EOFError: break - if not s: continue + if not s: + continue yacc.parse(unicode(s)) diff --git a/ext/ply/example/yply/ylex.py b/ext/ply/example/yply/ylex.py index 84f2f7a738..16410e250e 100644 --- a/ext/ply/example/yply/ylex.py +++ b/ext/ply/example/yply/ylex.py @@ -9,104 +9,111 @@ sys.path.append("../..") from ply import * tokens = ( - 'LITERAL','SECTION','TOKEN','LEFT','RIGHT','PREC','START','TYPE','NONASSOC','UNION','CODE', - 'ID','QLITERAL','NUMBER', + 'LITERAL', 'SECTION', 'TOKEN', 'LEFT', 'RIGHT', 'PREC', 'START', 'TYPE', 'NONASSOC', 'UNION', 'CODE', + 'ID', 'QLITERAL', 'NUMBER', ) -states = (('code','exclusive'),) +states = (('code', 'exclusive'),) -literals = [ ';', ',', '<', '>', '|',':' ] +literals = [';', ',', '<', '>', '|', ':'] t_ignore = ' \t' -t_TOKEN = r'%token' -t_LEFT = r'%left' -t_RIGHT = r'%right' -t_NONASSOC = r'%nonassoc' -t_PREC = r'%prec' -t_START = r'%start' -t_TYPE = r'%type' -t_UNION = r'%union' -t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*' +t_TOKEN = r'%token' +t_LEFT = r'%left' +t_RIGHT = r'%right' +t_NONASSOC = r'%nonassoc' +t_PREC = r'%prec' +t_START = r'%start' +t_TYPE = r'%type' +t_UNION = r'%union' +t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*' t_QLITERAL = r'''(?P['"]).*?(?P=quote)''' -t_NUMBER = r'\d+' +t_NUMBER = r'\d+' + def t_SECTION(t): r'%%' - if getattr(t.lexer,"lastsection",0): - t.value = t.lexer.lexdata[t.lexpos+2:] - t.lexer.lexpos = len(t.lexer.lexdata) + if getattr(t.lexer, "lastsection", 0): + t.value = t.lexer.lexdata[t.lexpos + 2:] + t.lexer.lexpos = len(t.lexer.lexdata) else: - t.lexer.lastsection = 0 + t.lexer.lastsection = 0 return t # Comments + + def t_ccomment(t): r'/\*(.|\n)*?\*/' t.lexer.lineno += t.value.count('\n') t_ignore_cppcomment = r'//.*' + def t_LITERAL(t): - r'%\{(.|\n)*?%\}' - t.lexer.lineno += t.value.count("\n") - return t + r'%\{(.|\n)*?%\}' + t.lexer.lineno += t.value.count("\n") + return t + def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 + r'\n' + t.lexer.lineno += 1 + def t_code(t): - r'\{' - t.lexer.codestart = t.lexpos - t.lexer.level = 1 - t.lexer.begin('code') + r'\{' + t.lexer.codestart = t.lexpos + t.lexer.level = 1 + t.lexer.begin('code') + def t_code_ignore_string(t): r'\"([^\\\n]|(\\.))*?\"' + def t_code_ignore_char(t): r'\'([^\\\n]|(\\.))*?\'' + def t_code_ignore_comment(t): - r'/\*(.|\n)*?\*/' + r'/\*(.|\n)*?\*/' + def t_code_ignore_cppcom(t): - r'//.*' + r'//.*' + def t_code_lbrace(t): r'\{' t.lexer.level += 1 + def t_code_rbrace(t): r'\}' t.lexer.level -= 1 if t.lexer.level == 0: - t.type = 'CODE' - t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos+1] - t.lexer.begin('INITIAL') - t.lexer.lineno += t.value.count('\n') - return t + t.type = 'CODE' + t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos + 1] + t.lexer.begin('INITIAL') + t.lexer.lineno += t.value.count('\n') + return t -t_code_ignore_nonspace = r'[^\s\}\'\"\{]+' +t_code_ignore_nonspace = r'[^\s\}\'\"\{]+' t_code_ignore_whitespace = r'\s+' t_code_ignore = "" + def t_code_error(t): raise RuntimeError + def t_error(t): - print "%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0]) - print t.value + print("%d: Illegal character '%s'" % (t.lexer.lineno, t.value[0])) + print(t.value) t.lexer.skip(1) lex.lex() if __name__ == '__main__': lex.runmain() - - - - - - - diff --git a/ext/ply/example/yply/yparse.py b/ext/ply/example/yply/yparse.py index ab5b884514..1f2e8d0922 100644 --- a/ext/ply/example/yply/yparse.py +++ b/ext/ply/example/yply/yparse.py @@ -9,53 +9,61 @@ tokens = ylex.tokens from ply import * tokenlist = [] -preclist = [] +preclist = [] emit_code = 1 + def p_yacc(p): '''yacc : defsection rulesection''' + def p_defsection(p): '''defsection : definitions SECTION | SECTION''' p.lexer.lastsection = 1 - print "tokens = ", repr(tokenlist) - print - print "precedence = ", repr(preclist) - print - print "# -------------- RULES ----------------" - print + print("tokens = ", repr(tokenlist)) + print() + print("precedence = ", repr(preclist)) + print() + print("# -------------- RULES ----------------") + print() + def p_rulesection(p): '''rulesection : rules SECTION''' - print "# -------------- RULES END ----------------" - print_code(p[2],0) + print("# -------------- RULES END ----------------") + print_code(p[2], 0) + def p_definitions(p): '''definitions : definitions definition | definition''' + def p_definition_literal(p): '''definition : LITERAL''' - print_code(p[1],0) + print_code(p[1], 0) + def p_definition_start(p): '''definition : START ID''' - print "start = '%s'" % p[2] + print("start = '%s'" % p[2]) + def p_definition_token(p): '''definition : toktype opttype idlist optsemi ''' for i in p[3]: - if i[0] not in "'\"": - tokenlist.append(i) + if i[0] not in "'\"": + tokenlist.append(i) if p[1] == '%left': preclist.append(('left',) + tuple(p[3])) elif p[1] == '%right': preclist.append(('right',) + tuple(p[3])) elif p[1] == '%nonassoc': - preclist.append(('nonassoc',)+ tuple(p[3])) + preclist.append(('nonassoc',) + tuple(p[3])) + def p_toktype(p): '''toktype : TOKEN @@ -64,10 +72,12 @@ def p_toktype(p): | NONASSOC''' p[0] = p[1] + def p_opttype(p): '''opttype : '<' ID '>' | empty''' + def p_idlist(p): '''idlist : idlist optcomma tokenid | tokenid''' @@ -77,141 +87,158 @@ def p_idlist(p): p[0] = p[1] p[1].append(p[3]) + def p_tokenid(p): '''tokenid : ID | ID NUMBER | QLITERAL | QLITERAL NUMBER''' p[0] = p[1] - + + def p_optsemi(p): '''optsemi : ';' | empty''' + def p_optcomma(p): '''optcomma : ',' | empty''' + def p_definition_type(p): '''definition : TYPE '<' ID '>' namelist optsemi''' # type declarations are ignored + def p_namelist(p): '''namelist : namelist optcomma ID | ID''' + def p_definition_union(p): '''definition : UNION CODE optsemi''' # Union declarations are ignored + def p_rules(p): '''rules : rules rule | rule''' if len(p) == 2: - rule = p[1] + rule = p[1] else: - rule = p[2] + rule = p[2] # Print out a Python equivalent of this rule - embedded = [ ] # Embedded actions (a mess) + embedded = [] # Embedded actions (a mess) embed_count = 0 rulename = rule[0] rulecount = 1 for r in rule[1]: # r contains one of the rule possibilities - print "def p_%s_%d(p):" % (rulename,rulecount) + print("def p_%s_%d(p):" % (rulename, rulecount)) prod = [] prodcode = "" for i in range(len(r)): - item = r[i] - if item[0] == '{': # A code block - if i == len(r) - 1: - prodcode = item - break - else: - # an embedded action - embed_name = "_embed%d_%s" % (embed_count,rulename) - prod.append(embed_name) - embedded.append((embed_name,item)) - embed_count += 1 - else: - prod.append(item) - print " '''%s : %s'''" % (rulename, " ".join(prod)) + item = r[i] + if item[0] == '{': # A code block + if i == len(r) - 1: + prodcode = item + break + else: + # an embedded action + embed_name = "_embed%d_%s" % (embed_count, rulename) + prod.append(embed_name) + embedded.append((embed_name, item)) + embed_count += 1 + else: + prod.append(item) + print(" '''%s : %s'''" % (rulename, " ".join(prod))) # Emit code - print_code(prodcode,4) - print + print_code(prodcode, 4) + print() rulecount += 1 - for e,code in embedded: - print "def p_%s(p):" % e - print " '''%s : '''" % e - print_code(code,4) - print + for e, code in embedded: + print("def p_%s(p):" % e) + print(" '''%s : '''" % e) + print_code(code, 4) + print() + def p_rule(p): - '''rule : ID ':' rulelist ';' ''' - p[0] = (p[1],[p[3]]) + '''rule : ID ':' rulelist ';' ''' + p[0] = (p[1], [p[3]]) + def p_rule2(p): - '''rule : ID ':' rulelist morerules ';' ''' - p[4].insert(0,p[3]) - p[0] = (p[1],p[4]) + '''rule : ID ':' rulelist morerules ';' ''' + p[4].insert(0, p[3]) + p[0] = (p[1], p[4]) + def p_rule_empty(p): - '''rule : ID ':' ';' ''' - p[0] = (p[1],[[]]) + '''rule : ID ':' ';' ''' + p[0] = (p[1], [[]]) + def p_rule_empty2(p): - '''rule : ID ':' morerules ';' ''' - - p[3].insert(0,[]) - p[0] = (p[1],p[3]) + '''rule : ID ':' morerules ';' ''' + + p[3].insert(0, []) + p[0] = (p[1], p[3]) + def p_morerules(p): - '''morerules : morerules '|' rulelist - | '|' rulelist - | '|' ''' - - if len(p) == 2: - p[0] = [[]] - elif len(p) == 3: - p[0] = [p[2]] - else: - p[0] = p[1] - p[0].append(p[3]) + '''morerules : morerules '|' rulelist + | '|' rulelist + | '|' ''' + + if len(p) == 2: + p[0] = [[]] + elif len(p) == 3: + p[0] = [p[2]] + else: + p[0] = p[1] + p[0].append(p[3]) + +# print("morerules", len(p), p[0]) -# print "morerules", len(p), p[0] def p_rulelist(p): - '''rulelist : rulelist ruleitem - | ruleitem''' + '''rulelist : rulelist ruleitem + | ruleitem''' - if len(p) == 2: + if len(p) == 2: p[0] = [p[1]] - else: + else: p[0] = p[1] p[1].append(p[2]) + def p_ruleitem(p): - '''ruleitem : ID - | QLITERAL - | CODE - | PREC''' - p[0] = p[1] + '''ruleitem : ID + | QLITERAL + | CODE + | PREC''' + p[0] = p[1] + def p_empty(p): '''empty : ''' + def p_error(p): pass yacc.yacc(debug=0) -def print_code(code,indent): - if not emit_code: return + +def print_code(code, indent): + if not emit_code: + return codelines = code.splitlines() for c in codelines: - print "%s# %s" % (" "*indent,c) - + print("%s# %s" % (" " * indent, c)) diff --git a/ext/ply/example/yply/yply.py b/ext/ply/example/yply/yply.py index a4398171ea..e24616c831 100755 --- a/ext/ply/example/yply/yply.py +++ b/ext/ply/example/yply/yply.py @@ -21,7 +21,7 @@ # import sys -sys.path.insert(0,"../..") +sys.path.insert(0, "../..") import ylex import yparse @@ -29,25 +29,23 @@ import yparse from ply import * if len(sys.argv) == 1: - print "usage : yply.py [-nocode] inputfile" + print("usage : yply.py [-nocode] inputfile") raise SystemExit if len(sys.argv) == 3: if sys.argv[1] == '-nocode': - yparse.emit_code = 0 + yparse.emit_code = 0 else: - print "Unknown option '%s'" % sys.argv[1] - raise SystemExit + print("Unknown option '%s'" % sys.argv[1]) + raise SystemExit filename = sys.argv[2] else: filename = sys.argv[1] yacc.parse(open(filename).read()) -print """ +print(""" if __name__ == '__main__': from ply import * yacc.yacc() -""" - - +""") diff --git a/ext/ply/ply.egg-info/PKG-INFO b/ext/ply/ply.egg-info/PKG-INFO new file mode 100644 index 0000000000..f2d8c8ae08 --- /dev/null +++ b/ext/ply/ply.egg-info/PKG-INFO @@ -0,0 +1,23 @@ +Metadata-Version: 1.1 +Name: ply +Version: 3.11 +Summary: Python Lex & Yacc +Home-page: http://www.dabeaz.com/ply/ +Author: David Beazley +Author-email: dave@dabeaz.com +License: BSD +Description-Content-Type: UNKNOWN +Description: + PLY is yet another implementation of lex and yacc for Python. Some notable + features include the fact that its implemented entirely in Python and it + uses LALR(1) parsing which is efficient and well suited for larger grammars. + + PLY provides most of the standard lex/yacc features including support for empty + productions, precedence rules, error recovery, and support for ambiguous grammars. + + PLY is extremely easy to use and provides very extensive error checking. + It is compatible with both Python 2 and Python 3. + +Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 2 diff --git a/ext/ply/ply.egg-info/SOURCES.txt b/ext/ply/ply.egg-info/SOURCES.txt new file mode 100644 index 0000000000..f96bab6cdf --- /dev/null +++ b/ext/ply/ply.egg-info/SOURCES.txt @@ -0,0 +1,190 @@ +ANNOUNCE +CHANGES +MANIFEST.in +README.md +TODO +setup.cfg +setup.py +doc/internal.html +doc/makedoc.py +doc/ply.html +example/README +example/cleanup.sh +example/BASIC/README +example/BASIC/basic.py +example/BASIC/basiclex.py +example/BASIC/basiclog.py +example/BASIC/basinterp.py +example/BASIC/basparse.py +example/BASIC/dim.bas +example/BASIC/func.bas +example/BASIC/gcd.bas +example/BASIC/gosub.bas +example/BASIC/hello.bas +example/BASIC/linear.bas +example/BASIC/maxsin.bas +example/BASIC/powers.bas +example/BASIC/rand.bas +example/BASIC/sales.bas +example/BASIC/sears.bas +example/BASIC/sqrt1.bas +example/BASIC/sqrt2.bas +example/GardenSnake/GardenSnake.py +example/GardenSnake/README +example/ansic/README +example/ansic/clex.py +example/ansic/cparse.py +example/calc/calc.py +example/calcdebug/calc.py +example/calceof/calc.py +example/classcalc/calc.py +example/closurecalc/calc.py +example/hedit/hedit.py +example/newclasscalc/calc.py +example/optcalc/README +example/optcalc/calc.py +example/unicalc/calc.py +example/yply/README +example/yply/ylex.py +example/yply/yparse.py +example/yply/yply.py +ply/__init__.py +ply/cpp.py +ply/ctokens.py +ply/lex.py +ply/yacc.py +ply/ygen.py +ply.egg-info/PKG-INFO +ply.egg-info/SOURCES.txt +ply.egg-info/dependency_links.txt +ply.egg-info/top_level.txt +test/README +test/calclex.py +test/cleanup.sh +test/lex_closure.py +test/lex_doc1.py +test/lex_dup1.py +test/lex_dup2.py +test/lex_dup3.py +test/lex_empty.py +test/lex_error1.py +test/lex_error2.py +test/lex_error3.py +test/lex_error4.py +test/lex_hedit.py +test/lex_ignore.py +test/lex_ignore2.py +test/lex_literal1.py +test/lex_literal2.py +test/lex_literal3.py +test/lex_many_tokens.py +test/lex_module.py +test/lex_module_import.py +test/lex_object.py +test/lex_opt_alias.py +test/lex_optimize.py +test/lex_optimize2.py +test/lex_optimize3.py +test/lex_optimize4.py +test/lex_re1.py +test/lex_re2.py +test/lex_re3.py +test/lex_rule1.py +test/lex_rule2.py +test/lex_rule3.py +test/lex_state1.py +test/lex_state2.py +test/lex_state3.py +test/lex_state4.py +test/lex_state5.py +test/lex_state_noerror.py +test/lex_state_norule.py +test/lex_state_try.py +test/lex_token1.py +test/lex_token2.py +test/lex_token3.py +test/lex_token4.py +test/lex_token5.py +test/lex_token_dup.py +test/parser.out +test/testcpp.py +test/testlex.py +test/testyacc.py +test/yacc_badargs.py +test/yacc_badid.py +test/yacc_badprec.py +test/yacc_badprec2.py +test/yacc_badprec3.py +test/yacc_badrule.py +test/yacc_badtok.py +test/yacc_dup.py +test/yacc_error1.py +test/yacc_error2.py +test/yacc_error3.py +test/yacc_error4.py +test/yacc_error5.py +test/yacc_error6.py +test/yacc_error7.py +test/yacc_inf.py +test/yacc_literal.py +test/yacc_misplaced.py +test/yacc_missing1.py +test/yacc_nested.py +test/yacc_nodoc.py +test/yacc_noerror.py +test/yacc_nop.py +test/yacc_notfunc.py +test/yacc_notok.py +test/yacc_prec1.py +test/yacc_rr.py +test/yacc_rr_unused.py +test/yacc_simple.py +test/yacc_sr.py +test/yacc_term1.py +test/yacc_unicode_literals.py +test/yacc_unused.py +test/yacc_unused_rule.py +test/yacc_uprec.py +test/yacc_uprec2.py +test/pkg_test1/__init__.py +test/pkg_test1/parsing/__init__.py +test/pkg_test1/parsing/calclex.py +test/pkg_test1/parsing/calcparse.py +test/pkg_test1/parsing/lextab.py +test/pkg_test1/parsing/parser.out +test/pkg_test1/parsing/parsetab.py +test/pkg_test2/__init__.py +test/pkg_test2/parsing/__init__.py +test/pkg_test2/parsing/calclex.py +test/pkg_test2/parsing/calclextab.py +test/pkg_test2/parsing/calcparse.py +test/pkg_test2/parsing/calcparsetab.py +test/pkg_test2/parsing/parser.out +test/pkg_test3/__init__.py +test/pkg_test3/generated/__init__.py +test/pkg_test3/generated/lextab.py +test/pkg_test3/generated/parser.out +test/pkg_test3/generated/parsetab.py +test/pkg_test3/parsing/__init__.py +test/pkg_test3/parsing/calclex.py +test/pkg_test3/parsing/calcparse.py +test/pkg_test4/__init__.py +test/pkg_test4/parsing/__init__.py +test/pkg_test4/parsing/calclex.py +test/pkg_test4/parsing/calcparse.py +test/pkg_test5/__init__.py +test/pkg_test5/parsing/__init__.py +test/pkg_test5/parsing/calclex.py +test/pkg_test5/parsing/calcparse.py +test/pkg_test5/parsing/lextab.py +test/pkg_test5/parsing/parser.out +test/pkg_test5/parsing/parsetab.py +test/pkg_test6/__init__.py +test/pkg_test6/parsing/__init__.py +test/pkg_test6/parsing/calclex.py +test/pkg_test6/parsing/calcparse.py +test/pkg_test6/parsing/expression.py +test/pkg_test6/parsing/lextab.py +test/pkg_test6/parsing/parser.out +test/pkg_test6/parsing/parsetab.py +test/pkg_test6/parsing/statement.py \ No newline at end of file diff --git a/ext/ply/ply.egg-info/dependency_links.txt b/ext/ply/ply.egg-info/dependency_links.txt new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/ext/ply/ply.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/ext/ply/ply.egg-info/top_level.txt b/ext/ply/ply.egg-info/top_level.txt new file mode 100644 index 0000000000..90412f0683 --- /dev/null +++ b/ext/ply/ply.egg-info/top_level.txt @@ -0,0 +1 @@ +ply diff --git a/ext/ply/ply/__init__.py b/ext/ply/ply/__init__.py index 853a985542..23707c6354 100644 --- a/ext/ply/ply/__init__.py +++ b/ext/ply/ply/__init__.py @@ -1,4 +1,5 @@ # PLY package # Author: David Beazley (dave@dabeaz.com) +__version__ = '3.11' __all__ = ['lex','yacc'] diff --git a/ext/ply/ply/cpp.py b/ext/ply/ply/cpp.py index 39f9d47f33..2422916c9f 100644 --- a/ext/ply/ply/cpp.py +++ b/ext/ply/ply/cpp.py @@ -5,17 +5,26 @@ # Copyright (C) 2007 # All rights reserved # -# This module implements an ANSI-C style lexical preprocessor for PLY. +# This module implements an ANSI-C style lexical preprocessor for PLY. # ----------------------------------------------------------------------------- from __future__ import generators +import sys + +# Some Python 3 compatibility shims +if sys.version_info.major < 3: + STRING_TYPES = (str, unicode) +else: + STRING_TYPES = str + xrange = range + # ----------------------------------------------------------------------------- # Default preprocessor lexer definitions. These tokens are enough to get # a basic preprocessor working. Other modules may import these if they want # ----------------------------------------------------------------------------- tokens = ( - 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND' + 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND' ) literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" @@ -34,7 +43,7 @@ t_CPP_ID = r'[A-Za-z_][\w_]*' # Integer literal def CPP_INTEGER(t): - r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)' + r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)' return t t_CPP_INTEGER = CPP_INTEGER @@ -55,11 +64,21 @@ def t_CPP_CHAR(t): return t # Comment -def t_CPP_COMMENT(t): - r'(/\*(.|\n)*?\*/)|(//.*?\n)' - t.lexer.lineno += t.value.count("\n") +def t_CPP_COMMENT1(t): + r'(/\*(.|\n)*?\*/)' + ncr = t.value.count("\n") + t.lexer.lineno += ncr + # replace with one space or a number of '\n' + t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' ' return t - + +# Line comment +def t_CPP_COMMENT2(t): + r'(//.*?(\n|$))' + # replace with '/n' + t.type = 'CPP_WS'; t.value = '\n' + return t + def t_error(t): t.type = t.value[0] t.value = t.value[0] @@ -73,8 +92,8 @@ import os.path # ----------------------------------------------------------------------------- # trigraph() -# -# Given an input string, this function replaces all trigraph sequences. +# +# Given an input string, this function replaces all trigraph sequences. # The following mapping is used: # # ??= # @@ -176,7 +195,7 @@ class Preprocessor(object): # ---------------------------------------------------------------------- def error(self,file,line,msg): - print >>sys.stderr,"%s:%d %s" % (file,line,msg) + print("%s:%d %s" % (file,line,msg)) # ---------------------------------------------------------------------- # lexprobe() @@ -193,7 +212,7 @@ class Preprocessor(object): self.lexer.input("identifier") tok = self.lexer.token() if not tok or tok.value != "identifier": - print "Couldn't determine identifier type" + print("Couldn't determine identifier type") else: self.t_ID = tok.type @@ -201,7 +220,7 @@ class Preprocessor(object): self.lexer.input("12345") tok = self.lexer.token() if not tok or int(tok.value) != 12345: - print "Couldn't determine integer type" + print("Couldn't determine integer type") else: self.t_INTEGER = tok.type self.t_INTEGER_TYPE = type(tok.value) @@ -210,7 +229,7 @@ class Preprocessor(object): self.lexer.input("\"filename\"") tok = self.lexer.token() if not tok or tok.value != "\"filename\"": - print "Couldn't determine string type" + print("Couldn't determine string type") else: self.t_STRING = tok.type @@ -227,7 +246,7 @@ class Preprocessor(object): tok = self.lexer.token() if not tok or tok.value != "\n": self.t_NEWLINE = None - print "Couldn't determine token for newlines" + print("Couldn't determine token for newlines") else: self.t_NEWLINE = tok.type @@ -239,12 +258,12 @@ class Preprocessor(object): self.lexer.input(c) tok = self.lexer.token() if not tok or tok.value != c: - print "Unable to lex '%s' required for preprocessor" % c + print("Unable to lex '%s' required for preprocessor" % c) # ---------------------------------------------------------------------- # add_path() # - # Adds a search path to the preprocessor. + # Adds a search path to the preprocessor. # ---------------------------------------------------------------------- def add_path(self,path): @@ -288,7 +307,7 @@ class Preprocessor(object): # ---------------------------------------------------------------------- # tokenstrip() - # + # # Remove leading/trailing whitespace tokens from a token list # ---------------------------------------------------------------------- @@ -314,7 +333,7 @@ class Preprocessor(object): # argument. Each argument is represented by a list of tokens. # # When collecting arguments, leading and trailing whitespace is removed - # from each argument. + # from each argument. # # This function properly handles nested parenthesis and commas---these do not # define new arguments. @@ -326,7 +345,7 @@ class Preprocessor(object): current_arg = [] nesting = 1 tokenlen = len(tokenlist) - + # Search for the opening '('. i = 0 while (i < tokenlen) and (tokenlist[i].type in self.t_WS): @@ -360,7 +379,7 @@ class Preprocessor(object): else: current_arg.append(t) i += 1 - + # Missing end argument self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") return 0, [],[] @@ -372,9 +391,9 @@ class Preprocessor(object): # This is used to speed up macro expansion later on---we'll know # right away where to apply patches to the value to form the expansion # ---------------------------------------------------------------------- - + def macro_prescan(self,macro): - macro.patch = [] # Standard macro arguments + macro.patch = [] # Standard macro arguments macro.str_patch = [] # String conversion expansion macro.var_comma_patch = [] # Variadic macro comma patch i = 0 @@ -392,10 +411,11 @@ class Preprocessor(object): elif (i > 0 and macro.value[i-1].value == '##'): macro.patch.append(('c',argnum,i-1)) del macro.value[i-1] + i -= 1 continue elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): macro.patch.append(('c',argnum,i)) - i += 1 + del macro.value[i + 1] continue # Standard expansion else: @@ -421,7 +441,7 @@ class Preprocessor(object): rep = [copy.copy(_x) for _x in macro.value] # Make string expansion patches. These do not alter the length of the replacement sequence - + str_expansion = {} for argnum, i in macro.str_patch: if argnum not in str_expansion: @@ -439,7 +459,7 @@ class Preprocessor(object): # Make all other patches. The order of these matters. It is assumed that the patch list # has been sorted in reverse order of patch location since replacements will cause the # size of the replacement sequence to expand from the patch point. - + expanded = { } for ptype, argnum, i in macro.patch: # Concatenation. Argument is left unexpanded @@ -476,7 +496,7 @@ class Preprocessor(object): if t.value in self.macros and t.value not in expanded: # Yes, we found a macro match expanded[t.value] = True - + m = self.macros[t.value] if not m.arglist: # A simple macro @@ -490,7 +510,7 @@ class Preprocessor(object): j = i + 1 while j < len(tokens) and tokens[j].type in self.t_WS: j += 1 - if tokens[j].value == '(': + if j < len(tokens) and tokens[j].value == '(': tokcount,args,positions = self.collect_args(tokens[j:]) if not m.variadic and len(args) != len(m.arglist): self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) @@ -508,7 +528,7 @@ class Preprocessor(object): else: args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] del args[len(m.arglist):] - + # Get macro replacement text rep = self.macro_expand_args(m,args) rep = self.expand_macros(rep,expanded) @@ -516,18 +536,24 @@ class Preprocessor(object): r.lineno = t.lineno tokens[i:j+tokcount] = rep i += len(rep) + else: + # This is not a macro. It is just a word which + # equals to name of the macro. Hence, go to the + # next token. + i += 1 + del expanded[t.value] continue elif t.value == '__LINE__': t.type = self.t_INTEGER t.value = self.t_INTEGER_TYPE(t.lineno) - + i += 1 return tokens - # ---------------------------------------------------------------------- + # ---------------------------------------------------------------------- # evalexpr() - # + # # Evaluate an expression token sequence for the purposes of evaluating # integral expressions. # ---------------------------------------------------------------------- @@ -574,14 +600,14 @@ class Preprocessor(object): tokens[i].value = str(tokens[i].value) while tokens[i].value[-1] not in "0123456789abcdefABCDEF": tokens[i].value = tokens[i].value[:-1] - + expr = "".join([str(x.value) for x in tokens]) expr = expr.replace("&&"," and ") expr = expr.replace("||"," or ") expr = expr.replace("!"," not ") try: result = eval(expr) - except StandardError: + except Exception: self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") result = 0 return result @@ -599,7 +625,7 @@ class Preprocessor(object): if not source: source = "" - + self.define("__FILE__ \"%s\"" % source) self.source = source @@ -614,10 +640,11 @@ class Preprocessor(object): if tok.value == '#': # Preprocessor directive + # insert necessary whitespace instead of eaten tokens for tok in x: - if tok in self.t_WS and '\n' in tok.value: + if tok.type in self.t_WS and '\n' in tok.value: chunk.append(tok) - + dirtokens = self.tokenstrip(x[i+1:]) if dirtokens: name = dirtokens[0].value @@ -625,7 +652,7 @@ class Preprocessor(object): else: name = "" args = [] - + if name == 'define': if enable: for tok in self.expand_macros(chunk): @@ -685,7 +712,7 @@ class Preprocessor(object): iftrigger = True else: self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") - + elif name == 'else': if ifstack: if ifstack[-1][0]: @@ -737,7 +764,7 @@ class Preprocessor(object): break i += 1 else: - print "Malformed #include <...>" + print("Malformed #include <...>") return filename = "".join([x.value for x in tokens[1:i]]) path = self.path + [""] + self.temp_path @@ -745,7 +772,7 @@ class Preprocessor(object): filename = tokens[0].value[1:-1] path = self.temp_path + [""] + self.path else: - print "Malformed #include statement" + print("Malformed #include statement") return for p in path: iname = os.path.join(p,filename) @@ -759,10 +786,10 @@ class Preprocessor(object): if dname: del self.temp_path[0] break - except IOError,e: + except IOError: pass else: - print "Couldn't find '%s'" % filename + print("Couldn't find '%s'" % filename) # ---------------------------------------------------------------------- # define() @@ -771,7 +798,7 @@ class Preprocessor(object): # ---------------------------------------------------------------------- def define(self,tokens): - if isinstance(tokens,(str,unicode)): + if isinstance(tokens,STRING_TYPES): tokens = self.tokenize(tokens) linetok = tokens @@ -794,7 +821,7 @@ class Preprocessor(object): variadic = False for a in args: if variadic: - print "No more arguments may follow a variadic argument" + print("No more arguments may follow a variadic argument") break astr = "".join([str(_i.value) for _i in a]) if astr == "...": @@ -813,7 +840,7 @@ class Preprocessor(object): a[0].value = a[0].value[:-3] continue if len(a) > 1 or a[0].type != self.t_ID: - print "Invalid macro argument" + print("Invalid macro argument") break else: mvalue = self.tokenstrip(linetok[1+tokcount:]) @@ -830,9 +857,9 @@ class Preprocessor(object): self.macro_prescan(m) self.macros[name.value] = m else: - print "Bad macro definition" + print("Bad macro definition") except LookupError: - print "Bad macro definition" + print("Bad macro definition") # ---------------------------------------------------------------------- # undef() @@ -855,7 +882,7 @@ class Preprocessor(object): def parse(self,input,source=None,ignore={}): self.ignore = ignore self.parser = self.parsegen(input,source) - + # ---------------------------------------------------------------------- # token() # @@ -864,7 +891,7 @@ class Preprocessor(object): def token(self): try: while True: - tok = self.parser.next() + tok = next(self.parser) if tok.type not in self.ignore: return tok except StopIteration: self.parser = None @@ -884,15 +911,4 @@ if __name__ == '__main__': while True: tok = p.token() if not tok: break - print p.source, tok - - - - - - - - - - - + print(p.source, tok) diff --git a/ext/ply/ply/ctokens.py b/ext/ply/ply/ctokens.py index dd5f102dc8..b265e59ff8 100644 --- a/ext/ply/ply/ctokens.py +++ b/ext/ply/ply/ctokens.py @@ -9,27 +9,27 @@ tokens = [ # Literals (identifier, integer constant, float constant, string constant, char const) - 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', + 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER', # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO', 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', 'LOR', 'LAND', 'LNOT', 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', - + # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', # Increment/decrement (++,--) - 'PLUSPLUS', 'MINUSMINUS', + 'INCREMENT', 'DECREMENT', # Structure dereference (->) 'ARROW', # Ternary operator (?) 'TERNARY', - + # Delimeters ( ) [ ] { } , . ; : 'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', @@ -39,7 +39,7 @@ tokens = [ # Ellipsis (...) 'ELLIPSIS', ] - + # Operators t_PLUS = r'\+' t_MINUS = r'-' @@ -74,7 +74,7 @@ t_LSHIFTEQUAL = r'<<=' t_RSHIFTEQUAL = r'>>=' t_ANDEQUAL = r'&=' t_OREQUAL = r'\|=' -t_XOREQUAL = r'^=' +t_XOREQUAL = r'\^=' # Increment/decrement t_INCREMENT = r'\+\+' @@ -125,9 +125,3 @@ def t_CPPCOMMENT(t): r'//.*\n' t.lexer.lineno += 1 return t - - - - - - diff --git a/ext/ply/ply/lex.py b/ext/ply/ply/lex.py index 4759d1b7a6..f95bcdbf1b 100644 --- a/ext/ply/ply/lex.py +++ b/ext/ply/ply/lex.py @@ -1,22 +1,22 @@ # ----------------------------------------------------------------------------- # ply: lex.py # -# Copyright (C) 2001-2009, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. +# and/or other materials provided with the distribution. # * Neither the name of the David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -31,10 +31,15 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -__version__ = "3.2" -__tabversion__ = "3.2" # Version of table file used +__version__ = '3.11' +__tabversion__ = '3.10' -import re, sys, types, copy, os +import re +import sys +import types +import copy +import os +import inspect # This tuple contains known string types try: @@ -44,59 +49,55 @@ except AttributeError: # Python 3.0 StringTypes = (str, bytes) -# Extract the code attribute of a function. Different implementations -# are for Python 2/3 compatibility. - -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ - # This regular expression is used to match valid token names _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') # Exception thrown when invalid token encountered and no default error # handler is defined. - class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s + def __init__(self, message, s): + self.args = (message,) + self.text = s + # Token class. This class is used to represent the tokens produced. class LexToken(object): def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) + return 'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, self.lexpos) + def __repr__(self): return str(self) -# This object is a stand-in for a logging object created by the -# logging module. + +# This object is a stand-in for a logging object created by the +# logging module. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def critical(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def critical(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') info = critical debug = critical + # Null logger is used when no output is generated. Does nothing. class NullLogger(object): - def __getattribute__(self,name): + def __getattribute__(self, name): return self - def __call__(self,*args,**kwargs): + + def __call__(self, *args, **kwargs): return self + # ----------------------------------------------------------------------------- # === Lexing Engine === # @@ -114,31 +115,33 @@ class NullLogger(object): class Lexer: def __init__(self): self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled + # tuples (re, findex) where re is a compiled # regular expression and findex is a list # mapping regex group numbers to rules self.lexretext = None # Current regular expression strings self.lexstatere = {} # Dictionary mapping lexer states to master regexs self.lexstateretext = {} # Dictionary mapping lexer states to regex strings self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = "INITIAL" # Current lexer state + self.lexstate = 'INITIAL' # Current lexer state self.lexstatestack = [] # Stack of lexer states self.lexstateinfo = None # State information self.lexstateignore = {} # Dictionary of ignored characters for each state self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state self.lexreflags = 0 # Optional re compile flags self.lexdata = None # Actual input data (as a string) self.lexpos = 0 # Current position in input text self.lexlen = 0 # Length of the input text self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through + self.lexignore = '' # Ignored characters + self.lexliterals = '' # Literal characters that can be passed through self.lexmodule = None # Module self.lineno = 1 # Current line number - self.lexoptimize = 0 # Optimized mode + self.lexoptimize = False # Optimized mode - def clone(self,object=None): + def clone(self, object=None): c = copy.copy(self) # If the object parameter has been supplied, it means we are attaching the @@ -146,113 +149,110 @@ class Lexer: # the lexstatere and lexstateerrorf tables. if object: - newtab = { } + newtab = {} for key, ritem in self.lexstatere.items(): newre = [] for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object, f[0].__name__), f[1])) + newre.append((cre, newfindex)) newtab[key] = newre c.lexstatere = newtab - c.lexstateerrorf = { } + c.lexstateerrorf = {} for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) + c.lexstateerrorf[key] = getattr(object, ef.__name__) c.lexmodule = object return c # ------------------------------------------------------------ # writetab() - Write lexer information to a table file # ------------------------------------------------------------ - def writetab(self,tabfile,outputdir=""): - if isinstance(tabfile,types.ModuleType): - return - basetabfilename = tabfile.split(".")[-1] - filename = os.path.join(outputdir,basetabfilename)+".py" - tf = open(filename,"w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_tabversion = %s\n" % repr(__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) + def writetab(self, lextab, outputdir=''): + if isinstance(lextab, types.ModuleType): + raise IOError("Won't overwrite existing lextab module") + basetabmodule = lextab.split('.')[-1] + filename = os.path.join(outputdir, basetabmodule) + '.py' + with open(filename, 'w') as tf: + tf.write('# %s.py. This file automatically created by PLY (version %s). Don\'t edit!\n' % (basetabmodule, __version__)) + tf.write('_tabversion = %s\n' % repr(__tabversion__)) + tf.write('_lextokens = set(%s)\n' % repr(tuple(sorted(self.lextokens)))) + tf.write('_lexreflags = %s\n' % repr(int(self.lexreflags))) + tf.write('_lexliterals = %s\n' % repr(self.lexliterals)) + tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo)) - tabre = { } - # Collect all functions in the initial state - initial = self.lexstatere["INITIAL"] - initialfuncs = [] - for part in initial: - for f in part[1]: - if f and f[0]: - initialfuncs.append(f) + # Rewrite the lexstatere table, replacing function objects with function names + tabre = {} + for statename, lre in self.lexstatere.items(): + titem = [] + for (pat, func), retext, renames in zip(lre, self.lexstateretext[statename], self.lexstaterenames[statename]): + titem.append((retext, _funcs_to_names(func, renames))) + tabre[statename] = titem - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) - tabre[key] = titem + tf.write('_lexstatere = %s\n' % repr(tabre)) + tf.write('_lexstateignore = %s\n' % repr(self.lexstateignore)) - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) + taberr = {} + for statename, ef in self.lexstateerrorf.items(): + taberr[statename] = ef.__name__ if ef else None + tf.write('_lexstateerrorf = %s\n' % repr(taberr)) - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() + tabeof = {} + for statename, ef in self.lexstateeoff.items(): + tabeof[statename] = ef.__name__ if ef else None + tf.write('_lexstateeoff = %s\n' % repr(tabeof)) # ------------------------------------------------------------ # readtab() - Read lexer information from a tab file # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - if isinstance(tabfile,types.ModuleType): + def readtab(self, tabfile, fdict): + if isinstance(tabfile, types.ModuleType): lextab = tabfile else: - if sys.version_info[0] < 3: - exec("import %s as lextab" % tabfile) - else: - env = { } - exec("import %s as lextab" % tabfile, env,env) - lextab = env['lextab'] + exec('import %s' % tabfile) + lextab = sys.modules[tabfile] - if getattr(lextab,"_tabversion","0.0") != __version__: - raise ImportError("Inconsistent PLY version") + if getattr(lextab, '_tabversion', '0.0') != __tabversion__: + raise ImportError('Inconsistent PLY version') self.lextokens = lextab._lextokens self.lexreflags = lextab._lexreflags self.lexliterals = lextab._lexliterals + self.lextokens_all = self.lextokens | set(self.lexliterals) self.lexstateinfo = lextab._lexstateinfo self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] + self.lexstatere = {} + self.lexstateretext = {} + for statename, lre in lextab._lexstatere.items(): + titem = [] + txtitem = [] + for pat, func_name in lre: + titem.append((re.compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict))) + + self.lexstatere[statename] = titem + self.lexstateretext[statename] = txtitem + + self.lexstateerrorf = {} + for statename, ef in lextab._lexstateerrorf.items(): + self.lexstateerrorf[statename] = fdict[ef] + + self.lexstateeoff = {} + for statename, ef in lextab._lexstateeoff.items(): + self.lexstateeoff[statename] = fdict[ef] + self.begin('INITIAL') # ------------------------------------------------------------ # input() - Push a new string into the lexer # ------------------------------------------------------------ - def input(self,s): + def input(self, s): # Pull off the first character to see if s looks like a string c = s[:1] - if not isinstance(c,StringTypes): - raise ValueError("Expected a string") + if not isinstance(c, StringTypes): + raise ValueError('Expected a string') self.lexdata = s self.lexpos = 0 self.lexlen = len(s) @@ -260,19 +260,20 @@ class Lexer: # ------------------------------------------------------------ # begin() - Changes the lexing state # ------------------------------------------------------------ - def begin(self,state): - if not state in self.lexstatere: - raise ValueError("Undefined state") + def begin(self, state): + if state not in self.lexstatere: + raise ValueError('Undefined state') self.lexre = self.lexstatere[state] self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexignore = self.lexstateignore.get(state, '') + self.lexerrorf = self.lexstateerrorf.get(state, None) + self.lexeoff = self.lexstateeoff.get(state, None) self.lexstate = state # ------------------------------------------------------------ # push_state() - Changes the lexing state and saves old on stack # ------------------------------------------------------------ - def push_state(self,state): + def push_state(self, state): self.lexstatestack.append(self.lexstate) self.begin(state) @@ -291,7 +292,7 @@ class Lexer: # ------------------------------------------------------------ # skip() - Skip ahead n characters # ------------------------------------------------------------ - def skip(self,n): + def skip(self, n): self.lexpos += n # ------------------------------------------------------------ @@ -315,9 +316,10 @@ class Lexer: continue # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue + for lexre, lexindexfunc in self.lexre: + m = lexre.match(lexdata, lexpos) + if not m: + continue # Create a token for return tok = LexToken() @@ -326,16 +328,16 @@ class Lexer: tok.lexpos = lexpos i = m.lastindex - func,tok.type = lexindexfunc[i] + func, tok.type = lexindexfunc[i] if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break lexpos = m.end() @@ -355,10 +357,10 @@ class Lexer: # Verify type of the token. If not in the token map, raise an error if not self.lexoptimize: - if not newtok.type in self.lextokens: + if newtok.type not in self.lextokens_all: raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func_code(func).co_filename, func_code(func).co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) + func.__code__.co_filename, func.__code__.co_firstlineno, + func.__name__, newtok.type), lexdata[lexpos:]) return newtok else: @@ -377,7 +379,7 @@ class Lexer: tok = LexToken() tok.value = self.lexdata[lexpos:] tok.lineno = self.lineno - tok.type = "error" + tok.type = 'error' tok.lexer = self tok.lexpos = lexpos self.lexpos = lexpos @@ -386,15 +388,27 @@ class Lexer: # Error method didn't change text position at all. This is an error. raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) lexpos = self.lexpos - if not newtok: continue + if not newtok: + continue return newtok self.lexpos = lexpos - raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:]) + + if self.lexeoff: + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok self.lexpos = lexpos + 1 if self.lexdata is None: - raise RuntimeError("No input string given with input()") + raise RuntimeError('No input string given with input()') return None # Iterator interface @@ -416,6 +430,15 @@ class Lexer: # and build a Lexer object from it. # ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- +# _get_regex(func) +# +# Returns the regular expression assigned to a function either as a doc string +# or as a .regex attribute attached by the @TOKEN decorator. +# ----------------------------------------------------------------------------- +def _get_regex(func): + return getattr(func, 'regex', func.__doc__) + # ----------------------------------------------------------------------------- # get_caller_module_dict() # @@ -423,21 +446,12 @@ class Lexer: # a caller further down the call stack. This is used to get the environment # associated with the yacc() call if none was provided. # ----------------------------------------------------------------------------- - def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict # ----------------------------------------------------------------------------- # _funcs_to_names() @@ -445,14 +459,13 @@ def get_caller_module_dict(levels): # Given a list of regular expression functions, this converts it to a list # suitable for output to a table file # ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist,namelist): +def _funcs_to_names(funclist, namelist): result = [] - for f,name in zip(funclist,namelist): - if f and f[0]: - result.append((name, f[1])) - else: - result.append(f) + for f, name in zip(funclist, namelist): + if f and f[0]: + result.append((name, f[1])) + else: + result.append(f) return result # ----------------------------------------------------------------------------- @@ -461,15 +474,14 @@ def _funcs_to_names(funclist,namelist): # Given a list of regular expression function names, this converts it back to # functions. # ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result +def _names_to_funcs(namelist, fdict): + result = [] + for n in namelist: + if n and n[0]: + result.append((fdict[n[0]], n[1])) + else: + result.append(n) + return result # ----------------------------------------------------------------------------- # _form_master_re() @@ -478,36 +490,37 @@ def _names_to_funcs(namelist,fdict): # form the master regular expression. Given limitations in the Python re # module, it may be necessary to break the master regex into separate expressions. # ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict,toknames): - if not relist: return [] - regex = "|".join(relist) +def _form_master_re(relist, reflags, ldict, toknames): + if not relist: + return [] + regex = '|'.join(relist) try: - lexre = re.compile(regex,re.VERBOSE | reflags) + lexre = re.compile(regex, reflags) # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) lexindexnames = lexindexfunc[:] - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,toknames[f]) + lexindexfunc[i] = (handle, toknames[f]) lexindexnames[i] = f elif handle is not None: lexindexnames[i] = f - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) else: lexindexfunc[i] = (None, toknames[f]) - - return [(lexre,lexindexfunc)],[regex],[lexindexnames] + + return [(lexre, lexindexfunc)], [regex], [lexindexnames] except Exception: m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) - rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) - return llist+rlist, lre+rre, lnames+rnames + if m == 0: + m = 1 + llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) + rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) + return (llist+rlist), (lre+rre), (lnames+rnames) # ----------------------------------------------------------------------------- # def _statetoken(s,names) @@ -517,22 +530,22 @@ def _form_master_re(relist,reflags,ldict,toknames): # is a tuple of state names and tokenname is the name of the token. For example, # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') # ----------------------------------------------------------------------------- +def _statetoken(s, names): + parts = s.split('_') + for i, part in enumerate(parts[1:], 1): + if part not in names and part != 'ANY': + break -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not parts[i] in names and parts[i] != 'ANY': break if i > 1: - states = tuple(parts[1:i]) + states = tuple(parts[1:i]) else: - states = ('INITIAL',) + states = ('INITIAL',) if 'ANY' in states: - states = tuple(names) + states = tuple(names) - tokenname = "_".join(parts[i:]) - return (states,tokenname) + tokenname = '_'.join(parts[i:]) + return (states, tokenname) # ----------------------------------------------------------------------------- @@ -542,19 +555,15 @@ def _statetoken(s,names): # user's input file. # ----------------------------------------------------------------------------- class LexerReflect(object): - def __init__(self,ldict,log=None,reflags=0): + def __init__(self, ldict, log=None, reflags=0): self.ldict = ldict self.error_func = None self.tokens = [] self.reflags = reflags - self.stateinfo = { 'INITIAL' : 'inclusive'} - self.files = {} - self.error = 0 - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log + self.stateinfo = {'INITIAL': 'inclusive'} + self.modules = set() + self.error = False + self.log = PlyLogger(sys.stderr) if log is None else log # Get all of the basic information def get_all(self): @@ -562,7 +571,7 @@ class LexerReflect(object): self.get_literals() self.get_states() self.get_rules() - + # Validate all of the information def validate_all(self): self.validate_tokens() @@ -572,20 +581,20 @@ class LexerReflect(object): # Get the tokens map def get_tokens(self): - tokens = self.ldict.get("tokens",None) + tokens = self.ldict.get('tokens', None) if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - + if not tokens: - self.log.error("tokens is empty") - self.error = 1 + self.log.error('tokens is empty') + self.error = True return self.tokens = tokens @@ -595,280 +604,274 @@ class LexerReflect(object): terminals = {} for n in self.tokens: if not _is_identifier.match(n): - self.log.error("Bad token name '%s'",n) - self.error = 1 + self.log.error("Bad token name '%s'", n) + self.error = True if n in terminals: self.log.warning("Token '%s' multiply defined", n) terminals[n] = 1 # Get the literals specifier def get_literals(self): - self.literals = self.ldict.get("literals","") + self.literals = self.ldict.get('literals', '') + if not self.literals: + self.literals = '' # Validate literals def validate_literals(self): try: for c in self.literals: - if not isinstance(c,StringTypes) or len(c) > 1: - self.log.error("Invalid literal %s. Must be a single character", repr(c)) - self.error = 1 - continue + if not isinstance(c, StringTypes) or len(c) > 1: + self.log.error('Invalid literal %s. Must be a single character', repr(c)) + self.error = True except TypeError: - self.log.error("Invalid literals specification. literals must be a sequence of characters") - self.error = 1 + self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.error = True def get_states(self): - self.states = self.ldict.get("states",None) + self.states = self.ldict.get('states', None) # Build statemap if self.states: - if not isinstance(self.states,(tuple,list)): - self.log.error("states must be defined as a tuple or list") - self.error = 1 - else: - for s in self.states: - if not isinstance(s,tuple) or len(s) != 2: - self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) - self.error = 1 - continue - name, statetype = s - if not isinstance(name,StringTypes): - self.log.error("State name %s must be a string", repr(name)) - self.error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) - self.error = 1 - continue - if name in self.stateinfo: - self.log.error("State '%s' already defined",name) - self.error = 1 - continue - self.stateinfo[name] = statetype + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + else: + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", repr(s)) + self.error = True + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %s must be a string', repr(name)) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State '%s' already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype # Get all of the symbols with a t_ prefix and sort them into various # categories (functions, strings, error functions, and ignore characters) def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_' ] + tsymbols = [f for f in self.ldict if f[:2] == 't_'] # Now build up a list of functions and a list of strings - - self.toknames = { } # Mapping of symbols to token names - self.funcsym = { } # Symbols defined as functions - self.strsym = { } # Symbols defined as strings - self.ignore = { } # Ignore strings by state - self.errorf = { } # Error functions by state + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] + self.funcsym[s] = [] + self.strsym[s] = [] if len(tsymbols) == 0: - self.log.error("No rules of the form t_rulename are defined") - self.error = 1 + self.log.error('No rules of the form t_rulename are defined') + self.error = True return for f in tsymbols: t = self.ldict[f] - states, tokname = _statetoken(f,self.stateinfo) + states, tokname = _statetoken(f, self.stateinfo) self.toknames[f] = tokname - if hasattr(t,"__call__"): + if hasattr(t, '__call__'): if tokname == 'error': for s in states: self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t elif tokname == 'ignore': - line = func_code(t).co_firstlineno - file = func_code(t).co_filename - self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) - self.error = 1 + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__) + self.error = True else: - for s in states: - self.funcsym[s].append((f,t)) + for s in states: + self.funcsym[s].append((f, t)) elif isinstance(t, StringTypes): if tokname == 'ignore': for s in states: self.ignore[s] = t - if "\\" in t: - self.log.warning("%s contains a literal backslash '\\'",f) + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) elif tokname == 'error': self.log.error("Rule '%s' must be defined as a function", f) - self.error = 1 + self.error = True else: - for s in states: - self.strsym[s].append((f,t)) + for s in states: + self.strsym[s].append((f, t)) else: - self.log.error("%s not defined as a function or string", f) - self.error = 1 + self.log.error('%s not defined as a function or string', f) + self.error = True # Sort the functions by line number for f in self.funcsym.values(): - if sys.version_info[0] < 3: - f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) - else: - # Python 3.0 - f.sort(key=lambda x: func_code(x[1]).co_firstlineno) + f.sort(key=lambda x: x[1].__code__.co_firstlineno) # Sort the strings by regular expression length for s in self.strsym.values(): - if sys.version_info[0] < 3: - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - else: - # Python 3.0 - s.sort(key=lambda x: len(x[1]),reverse=True) + s.sort(key=lambda x: len(x[1]), reverse=True) - # Validate all of the t_rules collected + # Validate all of the t_rules collected def validate_rules(self): for state in self.stateinfo: # Validate all rules defined by functions - - for fname, f in self.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) tokname = self.toknames[fname] if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True continue if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True continue - if not f.__doc__: - self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) - self.error = 1 + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) - if c.match(""): - self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) - if '#' in f.__doc__: - self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) - self.error = 1 + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__) + self.error = True # Validate all rules defined by strings - for name,r in self.strsym[state]: + for name, r in self.strsym[state]: tokname = self.toknames[name] if tokname == 'error': self.log.error("Rule '%s' must be defined as a function", name) - self.error = 1 + self.error = True continue - if not tokname in self.tokens and tokname.find("ignore_") < 0: - self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) - self.error = 1 + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) - if (c.match("")): - self.log.error("Regular expression for rule '%s' matches empty string",name) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("Invalid regular expression for rule '%s'. %s",name,e) + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + if (c.match('')): + self.log.error("Regular expression for rule '%s' matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule '%s'. %s", name, e) if '#' in r: - self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) - self.error = 1 + self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name) + self.error = True if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state '%s'",state) - self.error = 1 + self.log.error("No rules defined for state '%s'", state) + self.error = True # Validate the error function - efunc = self.errorf.get(state,None) + efunc = self.errorf.get(state, None) if efunc: f = efunc - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 - - for f in self.files: - self.validate_file(f) + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True + for module in self.modules: + self.validate_module(module) # ----------------------------------------------------------------------------- - # validate_file() + # validate_module() # # This checks to see if there are duplicated t_rulename() functions or strings # in the parser input file. This is done using a simple regular expression - # match on each line in the given file. + # match on each line in the source code of the given module. # ----------------------------------------------------------------------------- - def validate_file(self,filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return # No idea what the file is. Return OK - + def validate_module(self, module): try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: - return # Couldn't find the file. Don't worry about it + return fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - counthash = { } - linen = 1 - for l in lines: - m = fre.match(l) + counthash = {} + linen += 1 + for line in lines: + m = fre.match(line) if not m: - m = sre.match(l) + m = sre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) - self.error = 1 + filename = inspect.getsourcefile(module) + self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + self.error = True linen += 1 - + # ----------------------------------------------------------------------------- # lex(module) # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): +def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab', + reflags=int(re.VERBOSE), nowarn=False, outputdir=None, debuglog=None, errorlog=None): + + if lextab is None: + lextab = 'lextab' + global lexer + ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} + stateinfo = {'INITIAL': 'inclusive'} lexobj = Lexer() lexobj.lexoptimize = optimize - global token,input + global token, input if errorlog is None: errorlog = PlyLogger(sys.stderr) @@ -878,16 +881,28 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now debuglog = PlyLogger(sys.stderr) # Get the module dictionary used for the lexer - if object: module = object + if object: + module = object + # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] ldict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in ldict: + ldict['__file__'] = sys.modules[ldict['__module__']].__file__ else: ldict = get_caller_module_dict(2) + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = ldict.get('__package__') + if pkg and isinstance(lextab, str): + if '.' not in lextab: + lextab = pkg + '.' + lextab + # Collect parser information from the dictionary - linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) + linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) linfo.get_all() if not optimize: if linfo.validate_all(): @@ -895,7 +910,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now if optimize and lextab: try: - lexobj.readtab(lextab,ldict) + lexobj.readtab(lextab, ldict) token = lexobj.token input = lexobj.input lexer = lexobj @@ -906,92 +921,97 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # Dump some basic debugging information if debug: - debuglog.info("lex: tokens = %r", linfo.tokens) - debuglog.info("lex: literals = %r", linfo.literals) - debuglog.info("lex: states = %r", linfo.stateinfo) + debuglog.info('lex: tokens = %r', linfo.tokens) + debuglog.info('lex: literals = %r', linfo.literals) + debuglog.info('lex: states = %r', linfo.stateinfo) # Build a dictionary of valid token names - lexobj.lextokens = { } + lexobj.lextokens = set() for n in linfo.tokens: - lexobj.lextokens[n] = 1 + lexobj.lextokens.add(n) # Get literals specification - if isinstance(linfo.literals,(list,tuple)): + if isinstance(linfo.literals, (list, tuple)): lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) else: lexobj.lexliterals = linfo.literals + lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + # Get the stateinfo dictionary stateinfo = linfo.stateinfo - regexs = { } + regexs = {} # Build the master regular expressions for state in stateinfo: regex_list = [] # Add rules defined by functions first for fname, f in linfo.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) # Now add all of the simple rules - for name,r in linfo.strsym[state]: - regex_list.append("(?P<%s>%s)" % (name,r)) + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) regexs[state] = regex_list # Build the master regular expressions if debug: - debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") + debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) + lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) lexobj.lexstatere[state] = lexre lexobj.lexstateretext[state] = re_text lexobj.lexstaterenames[state] = re_names if debug: - for i in range(len(re_text)): - debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) + for i, text in enumerate(re_text): + debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) # For inclusive states, we need to add the regular expressions from the INITIAL state - for state,stype in stateinfo.items(): - if state != "INITIAL" and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + for state, stype in stateinfo.items(): + if state != 'INITIAL' and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] + lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.lexretext = lexobj.lexstateretext['INITIAL'] + lexobj.lexreflags = reflags # Set up ignore variables lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") + lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') # Set up error functions lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) + lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) if not lexobj.lexerrorf: - errorlog.warning("No t_error rule is defined") + errorlog.warning('No t_error rule is defined') + + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get('INITIAL', None) # Check state information for ignore and error rules - for s,stype in stateinfo.items(): + for s, stype in stateinfo.items(): if stype == 'exclusive': - if not s in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state '%s'", s) - if not s in linfo.ignore and lexobj.lexignore: - errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) + if s not in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state '%s'", s) + if s not in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) elif stype == 'inclusive': - if not s in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get("INITIAL",None) - if not s in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get("INITIAL","") + if s not in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get('INITIAL', None) + if s not in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get('INITIAL', '') # Create global versions of the token() and input() functions token = lexobj.token @@ -1000,7 +1020,28 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # If in optimize mode, we write the lextab if lextab and optimize: - lexobj.writetab(lextab,outputdir) + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If lextab specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(lextab, types.ModuleType): + srcfile = lextab.__file__ + else: + if '.' not in lextab: + srcfile = ldict['__file__'] + else: + parts = lextab.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + try: + lexobj.writetab(lextab, outputdir) + if lextab in sys.modules: + del sys.modules[lextab] + except IOError as e: + errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e)) return lexobj @@ -1010,7 +1051,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # This runs the lexer as a main program # ----------------------------------------------------------------------------- -def runmain(lexer=None,data=None): +def runmain(lexer=None, data=None): if not data: try: filename = sys.argv[1] @@ -1018,7 +1059,7 @@ def runmain(lexer=None,data=None): data = f.read() f.close() except IndexError: - sys.stdout.write("Reading from standard input (type EOF to end):\n") + sys.stdout.write('Reading from standard input (type EOF to end):\n') data = sys.stdin.read() if lexer: @@ -1031,10 +1072,11 @@ def runmain(lexer=None,data=None): else: _token = token - while 1: + while True: tok = _token() - if not tok: break - sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) + if not tok: + break + sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, tok.lexpos)) # ----------------------------------------------------------------------------- # @TOKEN(regex) @@ -1044,14 +1086,13 @@ def runmain(lexer=None,data=None): # ----------------------------------------------------------------------------- def TOKEN(r): - def set_doc(f): - if hasattr(r,"__call__"): - f.__doc__ = r.__doc__ + def set_regex(f): + if hasattr(r, '__call__'): + f.regex = _get_regex(r) else: - f.__doc__ = r + f.regex = r return f - return set_doc + return set_regex # Alternative spelling of the TOKEN decorator Token = TOKEN - diff --git a/ext/ply/ply/yacc.py b/ext/ply/ply/yacc.py index d4bb8822db..88188a1e8e 100644 --- a/ext/ply/ply/yacc.py +++ b/ext/ply/ply/yacc.py @@ -1,22 +1,22 @@ # ----------------------------------------------------------------------------- # ply: yacc.py # -# Copyright (C) 2001-2009, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. +# and/or other materials provided with the distribution. # * Neither the name of the David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -32,7 +32,7 @@ # ----------------------------------------------------------------------------- # # This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside +# as Python functions. The grammar is specified by supplying the BNF inside # Python documentation strings. The inspiration for this technique was borrowed # from John Aycock's Spark parsing system. PLY might be viewed as cross between # Spark and the GNU bison utility. @@ -59,8 +59,15 @@ # own risk! # ---------------------------------------------------------------------------- -__version__ = "3.2" -__tabversion__ = "3.2" # Table version +import re +import types +import sys +import os.path +import inspect +import warnings + +__version__ = '3.11' +__tabversion__ = '3.10' #----------------------------------------------------------------------------- # === User configurable parameters === @@ -68,7 +75,7 @@ __tabversion__ = "3.2" # Table version # Change these to modify the default behavior of yacc (if you wish) #----------------------------------------------------------------------------- -yaccdebug = 0 # Debugging mode. If set, yacc generates a +yaccdebug = True # Debugging mode. If set, yacc generates a # a 'parser.out' file in the current directory debug_file = 'parser.out' # Default name of the debugging file @@ -77,86 +84,117 @@ default_lr = 'LALR' # Default LR table generation method error_count = 3 # Number of symbols that must be shifted to leave recovery mode -yaccdevel = 0 # Set to True if developing yacc. This turns off optimized +yaccdevel = False # Set to True if developing yacc. This turns off optimized # implementations of certain functions. resultlimit = 40 # Size limit of results when running in debug mode. pickle_protocol = 0 # Protocol to use when writing pickle files -import re, types, sys, os.path - -# Compatibility function for python 2.6/3.0 +# String type-checking compatibility if sys.version_info[0] < 3: - def func_code(f): - return f.func_code + string_types = basestring else: - def func_code(f): - return f.__code__ + string_types = str -# Compatibility -try: - MAXINT = sys.maxint -except AttributeError: - MAXINT = sys.maxsize +MAXINT = sys.maxsize -# Python 2.x/3.0 compatibility. -def load_ply_lex(): - if sys.version_info[0] < 3: - import lex - else: - import ply.lex as lex - return lex - -# This object is a stand-in for a logging object created by the +# This object is a stand-in for a logging object created by the # logging module. PLY will use this by default to create things # such as the parser.out file. If a user wants more detailed # information, they can create their own logging object and pass # it into PLY. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def debug(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - info = debug - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + info = debug + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') critical = debug # Null logger is used when no output is generated. Does nothing. class NullLogger(object): - def __getattribute__(self,name): + def __getattribute__(self, name): return self - def __call__(self,*args,**kwargs): + + def __call__(self, *args, **kwargs): return self - + # Exception raised for yacc-related errors -class YaccError(Exception): pass +class YaccError(Exception): + pass # Format the result message that the parser produces when running in debug mode. def format_result(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit]+" ..." - result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) + repr_str = repr_str[:resultlimit] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) return result - # Format stack entries when the parser is running in debug mode def format_stack_entry(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) < 16: return repr_str else: - return "<%s @ 0x%x>" % (type(r).__name__,id(r)) + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) + +# Panic mode error recovery support. This feature is being reworked--much of the +# code here is to offer a deprecation/backwards compatible transition + +_errok = None +_token = None +_restart = None +_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). +Instead, invoke the methods on the associated parser instance: + + def p_error(p): + ... + # Use parser.errok(), parser.token(), parser.restart() + ... + + parser = yacc.yacc() +''' + +def errok(): + warnings.warn(_warnmsg) + return _errok() + +def restart(): + warnings.warn(_warnmsg) + return _restart() + +def token(): + warnings.warn(_warnmsg) + return _token() + +# Utility function to call the p_error() function with some deprecation hacks +def call_errorfunc(errorfunc, token, parser): + global _errok, _token, _restart + _errok = parser.errok + _token = parser.token + _restart = parser.restart + r = errorfunc(token) + try: + del _errok, _token, _restart + except NameError: + pass + return r #----------------------------------------------------------------------------- # === LR Parsing Engine === @@ -176,8 +214,11 @@ def format_stack_entry(r): # .endlexpos = Ending lex position (optional, set automatically) class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) + def __str__(self): + return self.type + + def __repr__(self): + return str(self) # This class is a wrapper around the objects actually passed to each # grammar rule. Index lookup and assignment actually assign the @@ -189,46 +230,53 @@ class YaccSymbol: # representing the range of positional information for a symbol. class YaccProduction: - def __init__(self,s,stack=None): + def __init__(self, s, stack=None): self.slice = s self.stack = stack self.lexer = None - self.parser= None - def __getitem__(self,n): - if n >= 0: return self.slice[n].value - else: return self.stack[n].value + self.parser = None - def __setitem__(self,n,v): + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value + + def __setitem__(self, n, v): self.slice[n].value = v - def __getslice__(self,i,j): + def __getslice__(self, i, j): return [s.value for s in self.slice[i:j]] def __len__(self): return len(self.slice) - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) - def set_lineno(self,n,lineno): - self.slice[n].lineno = n + def set_lineno(self, n, lineno): + self.slice[n].lineno = lineno - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos + + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos def error(self): - raise SyntaxError - + raise SyntaxError # ----------------------------------------------------------------------------- # == LRParser == @@ -237,14 +285,16 @@ class YaccProduction: # ----------------------------------------------------------------------------- class LRParser: - def __init__(self,lrtab,errorf): + def __init__(self, lrtab, errorf): self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True def errok(self): - self.errorok = 1 + self.errorok = True def restart(self): del self.statestack[:] @@ -254,24 +304,42 @@ class LRParser: self.symstack.append(sym) self.statestack.append(0) - def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). + # + # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): if debug or yaccdevel: - if isinstance(debug,int): + if isinstance(debug, int): debug = PlyLogger(sys.stderr) - return self.parsedebug(input,lexer,debug,tracking,tokenfunc) + return self.parsedebug(input, lexer, debug, tracking, tokenfunc) elif tracking: - return self.parseopt(input,lexer,debug,tracking,tokenfunc) + return self.parseopt(input, lexer, debug, tracking, tokenfunc) else: - return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) - + return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parsedebug(). # # This is the debugging enabled version of parse(). All changes made to the - # parsing engine should be made here. For the non-debugging version, - # copy this code to a method parseopt() and delete all of the sections - # enclosed in: + # parsing engine should be made here. Optimized versions of this function + # are automatically created by the ply/ygen.py script. This script cuts out + # sections enclosed in markers such as this: # # #--! DEBUG # statements @@ -279,22 +347,24 @@ class LRParser: # # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery + def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parsedebug-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery - # --! DEBUG - debug.info("PLY: PARSE DEBUG START") - # --! DEBUG + #--! DEBUG + debug.info('PLY: PARSE DEBUG START') + #--! DEBUG # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer # Set up the lexer and parser objects on pslice @@ -306,16 +376,19 @@ class LRParser: lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -325,52 +398,59 @@ class LRParser: statestack.append(0) sym = YaccSymbol() - sym.type = "$end" + sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - # --! DEBUG + #--! DEBUG debug.debug('') debug.debug('State : %s', state) - # --! DEBUG + #--! DEBUG - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() + if state not in defaulted_states: if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' - # --! DEBUG + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + #--! DEBUG + debug.debug('Defaulted state %s: Reduce using %d', state, -t) + #--! DEBUG + + #--! DEBUG debug.debug('Stack : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG if t is not None: if t > 0: # shift a symbol on the stack statestack.append(t) state = t - - # --! DEBUG - debug.debug("Action : Shift and goto state %s", t) - # --! DEBUG + + #--! DEBUG + debug.debug('Action : Shift and goto state %s', t) + #--! DEBUG symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -384,72 +464,77 @@ class LRParser: sym.type = pname # Production name sym.value = None - # --! DEBUG + #--! DEBUG if plen: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', + goto[statestack[-1-plen]][pname]) else: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) - - # --! DEBUG + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][pname]) + + #--! DEBUG if plen: targ = symstack[-plen-1:] targ[0] = sym - # --! TRACKING + #--! TRACKING if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG + del statestack[-plen:] + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - # --! TRACKING + #--! TRACKING if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING - targ = [ sym ] + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -457,41 +542,43 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - result = getattr(n,"value",None) - # --! DEBUG - debug.info("Done : Returning %s", format_result(result)) - debug.info("PLY: PARSE DEBUG END") - # --! DEBUG + result = getattr(n, 'value', None) + #--! DEBUG + debug.info('Done : Returning %s', format_result(result)) + debug.info('PLY: PARSE DEBUG END') + #--! DEBUG return result - if t == None: + if t is None: - # --! DEBUG + #--! DEBUG debug.error('Error : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -505,20 +592,15 @@ class LRParser: # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead - if errtoken.type == "$end": + if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -528,14 +610,16 @@ class LRParser: continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -545,7 +629,7 @@ class LRParser: # entire parse has been rolled back and we're completely hosed. The token is # discarded and we just keep going. - if len(statestack) <= 1 and lookahead.type != "$end": + if len(statestack) <= 1 and lookahead.type != '$end': lookahead = None errtoken = None state = 0 @@ -557,7 +641,7 @@ class LRParser: # at the end of the file. nuke the top entry and generate an error token # Start nuking entries on the stack - if lookahead.type == "$end": + if lookahead.type == '$end': # Whoa. We're really hosed here. Bail out return @@ -566,48 +650,67 @@ class LRParser: if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parsedebug-end # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parseopt(). # - # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. - # Edit the debug version above, then copy any modifications to the method - # below while removing #--! DEBUG sections. + # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! + # This code is automatically generated by the ply/ygen.py script. Make + # changes to the parsedebug() method instead. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery - def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer - + # Set up the lexer and parser objects on pslice pslice.lexer = lexer pslice.parser = self @@ -617,16 +720,19 @@ class LRParser: lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -639,23 +745,28 @@ class LRParser: sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + if t is not None: if t > 0: @@ -663,11 +774,13 @@ class LRParser: statestack.append(t) state = t + symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -681,61 +794,64 @@ class LRParser: sym.type = pname # Production name sym.value = None + if plen: targ = symstack[-plen-1:] targ[0] = sym - # --! TRACKING + #--! TRACKING if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - # --! TRACKING + #--! TRACKING if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING - targ = [ sym ] + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -743,28 +859,32 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - return getattr(n,"value",None) + result = getattr(n, 'value', None) + return result + + if t is None: - if t == None: # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -778,20 +898,15 @@ class LRParser: # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -801,14 +916,16 @@ class LRParser: continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -839,47 +956,67 @@ class LRParser: if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-end # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parseopt_notrack(). # - # Optimized version of parseopt() with line number tracking removed. - # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove - # code in the #--! TRACKING sections + # Optimized version of parseopt() with line number tracking removed. + # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated + # by the ply/ygen.py script. Make changes to the parsedebug() method instead. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery + def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-notrack-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer - + # Set up the lexer and parser objects on pslice pslice.lexer = lexer pslice.parser = self @@ -889,16 +1026,19 @@ class LRParser: lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -911,23 +1051,28 @@ class LRParser: sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + if t is not None: if t > 0: @@ -935,11 +1080,13 @@ class LRParser: statestack.append(t) state = t + symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -953,44 +1100,50 @@ class LRParser: sym.type = pname # Production name sym.value = None + if plen: targ = symstack[-plen-1:] targ[0] = sym + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - targ = [ sym ] + + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -998,28 +1151,32 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - return getattr(n,"value",None) + result = getattr(n, 'value', None) + return result + + if t is None: - if t == None: # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -1033,20 +1190,15 @@ class LRParser: # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -1056,14 +1208,16 @@ class LRParser: continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -1096,32 +1250,37 @@ class LRParser: # symbol and continue lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-notrack-end # ----------------------------------------------------------------------------- # === Grammar Representation === # # The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. +# manipulate the rules that make up a grammar. # ----------------------------------------------------------------------------- -import re - # regex matching identifiers _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') @@ -1131,7 +1290,7 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') # This class stores the raw information about a single production or grammar rule. # A grammar rule refers to a specification such as this: # -# expr : expr PLUS term +# expr : expr PLUS term # # Here are the basic attributes defined on all productions # @@ -1151,7 +1310,7 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') class Production(object): reduced = 0 - def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): self.name = name self.prod = tuple(prod) self.number = number @@ -1162,11 +1321,11 @@ class Production(object): self.prec = precedence # Internal settings used during table construction - + self.len = len(self.prod) # Length of the production # Create a list of unique production symbols used in the production - self.usyms = [ ] + self.usyms = [] for s in self.prod: if s not in self.usyms: self.usyms.append(s) @@ -1177,15 +1336,15 @@ class Production(object): # Create a string representation if self.prod: - self.str = "%s -> %s" % (self.name," ".join(self.prod)) + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - self.str = "%s -> " % self.name + self.str = '%s -> ' % self.name def __str__(self): return self.str def __repr__(self): - return "Production("+str(self)+")" + return 'Production(' + str(self) + ')' def __len__(self): return len(self.prod) @@ -1193,28 +1352,27 @@ class Production(object): def __nonzero__(self): return 1 - def __getitem__(self,index): + def __getitem__(self, index): return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self,n): - if n > len(self.prod): return None - p = LRItem(self,n) - # Precompute the list of productions immediately following. Hack. Remove later + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. try: - p.lr_after = Prodnames[p.prod[n+1]] - except (IndexError,KeyError): + p.lr_after = self.Prodnames[p.prod[n+1]] + except (IndexError, KeyError): p.lr_after = [] try: p.lr_before = p.prod[n-1] except IndexError: p.lr_before = None - return p - + # Bind the production function name to a callable - def bind(self,pdict): + def bind(self, pdict): if self.func: self.callable = pdict[self.func] @@ -1223,7 +1381,7 @@ class Production(object): # actually used by the LR parsing engine, plus some additional # debugging information. class MiniProduction(object): - def __init__(self,str,name,len,func,file,line): + def __init__(self, str, name, len, func, file, line): self.name = name self.len = len self.func = func @@ -1231,13 +1389,15 @@ class MiniProduction(object): self.file = file self.line = line self.str = str + def __str__(self): return self.str + def __repr__(self): - return "MiniProduction(%s)" % self.str + return 'MiniProduction(%s)' % self.str # Bind the production function name to a callable - def bind(self,pdict): + def bind(self, pdict): if self.func: self.callable = pdict[self.func] @@ -1246,9 +1406,9 @@ class MiniProduction(object): # class LRItem # # This class represents a specific stage of parsing a production rule. For -# example: +# example: # -# expr : expr . PLUS term +# expr : expr . PLUS term # # In the above, the "." represents the current location of the parse. Here # basic attributes: @@ -1267,26 +1427,26 @@ class MiniProduction(object): # ----------------------------------------------------------------------------- class LRItem(object): - def __init__(self,p,n): + def __init__(self, p, n): self.name = p.name self.prod = list(p.prod) self.number = p.number self.lr_index = n - self.lookaheads = { } - self.prod.insert(n,".") + self.lookaheads = {} + self.prod.insert(n, '.') self.prod = tuple(self.prod) self.len = len(self.prod) self.usyms = p.usyms def __str__(self): if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) + s = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - s = "%s -> " % self.name + s = '%s -> ' % self.name return s def __repr__(self): - return "LRItem("+str(self)+")" + return 'LRItem(' + str(self) + ')' # ----------------------------------------------------------------------------- # rightmost_terminal() @@ -1309,21 +1469,22 @@ def rightmost_terminal(symbols, terminals): # This data is used for critical parts of the table generation process later. # ----------------------------------------------------------------------------- -class GrammarError(YaccError): pass +class GrammarError(YaccError): + pass class Grammar(object): - def __init__(self,terminals): + def __init__(self, terminals): self.Productions = [None] # A list of all of the productions. The first # entry is always reserved for the purpose of # building an augmented grammar - self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all # productions of that nonterminal. - self.Prodmap = { } # A dictionary that is only used to detect duplicate + self.Prodmap = {} # A dictionary that is only used to detect duplicate # productions. - self.Terminals = { } # A dictionary mapping the names of terminal symbols to a + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a # list of the rules where they are used. for term in terminals: @@ -1331,17 +1492,17 @@ class Grammar(object): self.Terminals['error'] = [] - self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list # of rule numbers where they are used. - self.First = { } # A dictionary of precomputed FIRST(x) symbols + self.First = {} # A dictionary of precomputed FIRST(x) symbols - self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the # form ('right',level) or ('nonassoc', level) or ('left',level) - self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. # This is only used to provide error checking and to generate # a warning about unused precedence rules. @@ -1351,7 +1512,7 @@ class Grammar(object): def __len__(self): return len(self.Productions) - def __getitem__(self,index): + def __getitem__(self, index): return self.Productions[index] # ----------------------------------------------------------------------------- @@ -1362,14 +1523,14 @@ class Grammar(object): # # ----------------------------------------------------------------------------- - def set_precedence(self,term,assoc,level): - assert self.Productions == [None],"Must call set_precedence() before add_production()" + def set_precedence(self, term, assoc, level): + assert self.Productions == [None], 'Must call set_precedence() before add_production()' if term in self.Precedence: - raise GrammarError("Precedence already specified for terminal '%s'" % term) - if assoc not in ['left','right','nonassoc']: + raise GrammarError('Precedence already specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc,level) - + self.Precedence[term] = (assoc, level) + # ----------------------------------------------------------------------------- # add_production() # @@ -1387,72 +1548,74 @@ class Grammar(object): # are valid and that %prec is used correctly. # ----------------------------------------------------------------------------- - def add_production(self,prodname,syms,func=None,file='',line=0): + def add_production(self, prodname, syms, func=None, file='', line=0): if prodname in self.Terminals: - raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) if prodname == 'error': - raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) if not _is_identifier.match(prodname): - raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) - # Look for literal tokens - for n,s in enumerate(syms): + # Look for literal tokens + for n, s in enumerate(syms): if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) - if not c in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % + (file, line, s, prodname)) + if c not in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass if not _is_identifier.match(s) and s != '%prec': - raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) - + raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + # Determine the precedence level if '%prec' in syms: if syms[-1] == '%prec': - raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) + raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) if syms[-2] != '%prec': - raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) + raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % + (file, line)) precname = syms[-1] - prodprec = self.Precedence.get(precname,None) + prodprec = self.Precedence.get(precname) if not prodprec: - raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) + raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) else: - self.UsedPrecedence[precname] = 1 + self.UsedPrecedence.add(precname) del syms[-2:] # Drop %prec from the rule else: # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms,self.Terminals) - prodprec = self.Precedence.get(precname,('right',0)) - + precname = rightmost_terminal(syms, self.Terminals) + prodprec = self.Precedence.get(precname, ('right', 0)) + # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) + map = '%s -> %s' % (prodname, syms) if map in self.Prodmap: m = self.Prodmap[map] - raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + - "Previous definition at %s:%d" % (m.file, m.line)) + raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + + 'Previous definition at %s:%d' % (m.file, m.line)) # From this point on, everything is valid. Create a new Production instance pnumber = len(self.Productions) - if not prodname in self.Nonterminals: - self.Nonterminals[prodname] = [ ] + if prodname not in self.Nonterminals: + self.Nonterminals[prodname] = [] # Add the production number to Terminals and Nonterminals for t in syms: if t in self.Terminals: self.Terminals[t].append(pnumber) else: - if not t in self.Nonterminals: - self.Nonterminals[t] = [ ] + if t not in self.Nonterminals: + self.Nonterminals[t] = [] self.Nonterminals[t].append(pnumber) # Create a production and add it to the list of productions - p = Production(pnumber,prodname,syms,prodprec,func,file,line) + p = Production(pnumber, prodname, syms, prodprec, func, file, line) self.Productions.append(p) self.Prodmap[map] = p @@ -1460,22 +1623,21 @@ class Grammar(object): try: self.Prodnames[prodname].append(p) except KeyError: - self.Prodnames[prodname] = [ p ] - return 0 + self.Prodnames[prodname] = [p] # ----------------------------------------------------------------------------- # set_start() # - # Sets the starting symbol and creates the augmented grammar. Production + # Sets the starting symbol and creates the augmented grammar. Production # rule 0 is S' -> start where start is the start symbol. # ----------------------------------------------------------------------------- - def set_start(self,start=None): + def set_start(self, start=None): if not start: start = self.Productions[1].name if start not in self.Nonterminals: - raise GrammarError("start symbol %s undefined" % start) - self.Productions[0] = Production(0,"S'",[start]) + raise GrammarError('start symbol %s undefined' % start) + self.Productions[0] = Production(0, "S'", [start]) self.Nonterminals[start].append(0) self.Start = start @@ -1487,26 +1649,20 @@ class Grammar(object): # ----------------------------------------------------------------------------- def find_unreachable(self): - + # Mark all symbols that are reachable from a symbol s def mark_reachable_from(s): - if reachable[s]: - # We've already reached symbol s. + if s in reachable: return - reachable[s] = 1 - for p in self.Prodnames.get(s,[]): + reachable.add(s) + for p in self.Prodnames.get(s, []): for r in p.prod: mark_reachable_from(r) - reachable = { } - for s in list(self.Terminals) + list(self.Nonterminals): - reachable[s] = 0 + reachable = set() + mark_reachable_from(self.Productions[0].prod[0]) + return [s for s in self.Nonterminals if s not in reachable] - mark_reachable_from( self.Productions[0].prod[0] ) - - return [s for s in list(self.Nonterminals) - if not reachable[s]] - # ----------------------------------------------------------------------------- # infinite_cycles() # @@ -1520,20 +1676,20 @@ class Grammar(object): # Terminals: for t in self.Terminals: - terminates[t] = 1 + terminates[t] = True - terminates['$end'] = 1 + terminates['$end'] = True # Nonterminals: # Initialize to false: for n in self.Nonterminals: - terminates[n] = 0 + terminates[n] = False # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in self.Prodnames.items(): + while True: + some_change = False + for (n, pl) in self.Prodnames.items(): # Nonterminal n terminates iff any of its productions terminates. for p in pl: # Production p terminates iff all of its rhs symbols terminate. @@ -1541,19 +1697,19 @@ class Grammar(object): if not terminates[s]: # The symbol s does not terminate, # so production p does not terminate. - p_terminates = 0 + p_terminates = False break else: # didn't break from the loop, # so every symbol s terminates # so production p terminates. - p_terminates = 1 + p_terminates = True if p_terminates: # symbol n terminates! if not terminates[n]: - terminates[n] = 1 - some_change = 1 + terminates[n] = True + some_change = True # Don't need to consider any more productions for this n. break @@ -1561,9 +1717,9 @@ class Grammar(object): break infinite = [] - for (s,term) in terminates.items(): + for (s, term) in terminates.items(): if not term: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': + if s not in self.Prodnames and s not in self.Terminals and s != 'error': # s is used-but-not-defined, and we've already warned of that, # so it would be overkill to say that it's also non-terminating. pass @@ -1572,22 +1728,22 @@ class Grammar(object): return infinite - # ----------------------------------------------------------------------------- # undefined_symbols() # # Find all symbols that were used the grammar, but not defined as tokens or # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. + # and prod is the production where the symbol was used. # ----------------------------------------------------------------------------- def undefined_symbols(self): result = [] for p in self.Productions: - if not p: continue + if not p: + continue for s in p.prod: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': - result.append((s,p)) + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + result.append((s, p)) return result # ----------------------------------------------------------------------------- @@ -1598,7 +1754,7 @@ class Grammar(object): # ----------------------------------------------------------------------------- def unused_terminals(self): unused_tok = [] - for s,v in self.Terminals.items(): + for s, v in self.Terminals.items(): if s != 'error' and not v: unused_tok.append(s) @@ -1613,7 +1769,7 @@ class Grammar(object): def unused_rules(self): unused_prod = [] - for s,v in self.Nonterminals.items(): + for s, v in self.Nonterminals.items(): if not v: p = self.Prodnames[s][0] unused_prod.append(p) @@ -1625,15 +1781,15 @@ class Grammar(object): # Returns a list of tuples (term,precedence) corresponding to precedence # rules that were never used by the grammar. term is the name of the terminal # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. + # 'right' corresponding to the type of precedence. # ----------------------------------------------------------------------------- def unused_precedence(self): unused = [] for termname in self.Precedence: if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname,self.Precedence[termname][0])) - + unused.append((termname, self.Precedence[termname][0])) + return unused # ------------------------------------------------------------------------- @@ -1644,19 +1800,20 @@ class Grammar(object): # During execution of compute_first1, the result may be incomplete. # Afterward (e.g., when called from compute_follow()), it will be complete. # ------------------------------------------------------------------------- - def _first(self,beta): + def _first(self, beta): # We are computing First(x1,x2,x3,...,xn) - result = [ ] + result = [] for x in beta: - x_produces_empty = 0 + x_produces_empty = False # Add all the non- symbols of First[x] to the result. for f in self.First[x]: if f == '': - x_produces_empty = 1 + x_produces_empty = True else: - if f not in result: result.append(f) + if f not in result: + result.append(f) if x_produces_empty: # We have to consider the next x in beta, @@ -1695,17 +1852,17 @@ class Grammar(object): self.First[n] = [] # Then propagate symbols until no change: - while 1: - some_change = 0 + while True: + some_change = False for n in self.Nonterminals: for p in self.Prodnames[n]: for f in self._first(p.prod): if f not in self.First[n]: - self.First[n].append( f ) - some_change = 1 + self.First[n].append(f) + some_change = True if not some_change: break - + return self.First # --------------------------------------------------------------------- @@ -1715,7 +1872,7 @@ class Grammar(object): # follow set is the set of all symbols that might follow a given # non-terminal. See the Dragon book, 2nd Ed. p. 189. # --------------------------------------------------------------------- - def compute_follow(self,start=None): + def compute_follow(self, start=None): # If already computed, return the result if self.Follow: return self.Follow @@ -1726,36 +1883,36 @@ class Grammar(object): # Add '$end' to the follow list of the start symbol for k in self.Nonterminals: - self.Follow[k] = [ ] + self.Follow[k] = [] if not start: start = self.Productions[1].name - self.Follow[start] = [ '$end' ] + self.Follow[start] = ['$end'] - while 1: - didadd = 0 + while True: + didadd = False for p in self.Productions[1:]: # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] + for i, B in enumerate(p.prod): if B in self.Nonterminals: # Okay. We got a non-terminal in a production fst = self._first(p.prod[i+1:]) - hasempty = 0 + hasempty = False for f in fst: if f != '' and f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 + didadd = True if f == '': - hasempty = 1 + hasempty = True if hasempty or i == (len(p.prod)-1): # Add elements of follow(a) to follow(b) for f in self.Follow[p.name]: if f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 - if not didadd: break + didadd = True + if not didadd: + break return self.Follow @@ -1779,15 +1936,15 @@ class Grammar(object): lastlri = p i = 0 lr_items = [] - while 1: + while True: if i > len(p): lri = None else: - lri = LRItem(p,i) + lri = LRItem(p, i) # Precompute the list of productions immediately following try: lri.lr_after = self.Prodnames[lri.prod[i+1]] - except (IndexError,KeyError): + except (IndexError, KeyError): lri.lr_after = [] try: lri.lr_before = lri.prod[i-1] @@ -1795,7 +1952,8 @@ class Grammar(object): lri.lr_before = None lastlri.lr_next = lri - if not lri: break + if not lri: + break lr_items.append(lri) lastlri = lri i += 1 @@ -1804,12 +1962,13 @@ class Grammar(object): # ----------------------------------------------------------------------------- # == Class LRTable == # -# This basic class represents a basic table of LR parsing information. +# This basic class represents a basic table of LR parsing information. # Methods for generating the tables are not defined here. They are defined # in the derived class LRGeneratedTable. # ----------------------------------------------------------------------------- -class VersionError(YaccError): pass +class VersionError(YaccError): + pass class LRTable(object): def __init__(self): @@ -1818,19 +1977,15 @@ class LRTable(object): self.lr_productions = None self.lr_method = None - def read_table(self,module): - if isinstance(module,types.ModuleType): + def read_table(self, module): + if isinstance(module, types.ModuleType): parsetab = module else: - if sys.version_info[0] < 3: - exec("import %s as parsetab" % module) - else: - env = { } - exec("import %s as parsetab" % module, env, env) - parsetab = env['parsetab'] + exec('import %s' % module) + parsetab = sys.modules[module] if parsetab._tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") + raise VersionError('yacc table file version is out of date') self.lr_action = parsetab._lr_action self.lr_goto = parsetab._lr_goto @@ -1842,17 +1997,20 @@ class LRTable(object): self.lr_method = parsetab._lr_method return parsetab._lr_signature - def read_pickle(self,filename): + def read_pickle(self, filename): try: import cPickle as pickle except ImportError: import pickle - in_f = open(filename,"rb") + if not os.path.exists(filename): + raise ImportError + + in_f = open(filename, 'rb') tabversion = pickle.load(in_f) if tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") + raise VersionError('yacc table file version is out of date') self.lr_method = pickle.load(in_f) signature = pickle.load(in_f) self.lr_action = pickle.load(in_f) @@ -1867,14 +2025,15 @@ class LRTable(object): return signature # Bind all production function names to callable objects in pdict - def bind_callables(self,pdict): + def bind_callables(self, pdict): for p in self.lr_productions: p.bind(pdict) - + + # ----------------------------------------------------------------------------- # === LR Generator === # -# The following classes and functions are used to generate LR parsing tables on +# The following classes and functions are used to generate LR parsing tables on # a grammar. # ----------------------------------------------------------------------------- @@ -1895,17 +2054,18 @@ class LRTable(object): # FP - Set-valued function # ------------------------------------------------------------------------------ -def digraph(X,R,FP): - N = { } +def digraph(X, R, FP): + N = {} for x in X: - N[x] = 0 + N[x] = 0 stack = [] - F = { } + F = {} for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) + if N[x] == 0: + traverse(x, N, stack, F, X, R, FP) return F -def traverse(x,N,stack,F,X,R,FP): +def traverse(x, N, stack, F, X, R, FP): stack.append(x) d = len(stack) N[x] = d @@ -1914,20 +2074,22 @@ def traverse(x,N,stack,F,X,R,FP): rel = R(x) # Get y's related to x for y in rel: if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) + traverse(y, N, stack, F, X, R, FP) + N[x] = min(N[x], N[y]) + for a in F.get(y, []): + if a not in F[x]: + F[x].append(a) if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() -class LALRError(YaccError): pass +class LALRError(YaccError): + pass # ----------------------------------------------------------------------------- # == LRGeneratedTable == @@ -1937,9 +2099,9 @@ class LALRError(YaccError): pass # ----------------------------------------------------------------------------- class LRGeneratedTable(LRTable): - def __init__(self,grammar,method='LALR',log=None): - if method not in ['SLR','LALR']: - raise LALRError("Unsupported method %s" % method) + def __init__(self, grammar, method='LALR', log=None): + if method not in ['SLR', 'LALR']: + raise LALRError('Unsupported method %s' % method) self.grammar = grammar self.lr_method = method @@ -1974,21 +2136,22 @@ class LRGeneratedTable(LRTable): # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - def lr0_closure(self,I): + def lr0_closure(self, I): self._add_count += 1 # Add everything in I to J J = I[:] - didadd = 1 + didadd = True while didadd: - didadd = 0 + didadd = False for j in J: for x in j.lr_after: - if getattr(x,"lr0_added",0) == self._add_count: continue + if getattr(x, 'lr0_added', 0) == self._add_count: + continue # Add B --> .G to J J.append(x.lr_next) x.lr0_added = self._add_count - didadd = 1 + didadd = True return J @@ -1999,43 +2162,43 @@ class LRGeneratedTable(LRTable): # objects). With uniqueness, we can later do fast set comparisons using # id(obj) instead of element-wise comparison. - def lr0_goto(self,I,x): + def lr0_goto(self, I, x): # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I),x),None) - if g: return g + g = self.lr_goto_cache.get((id(I), x)) + if g: + return g # Now we generate the goto set in a way that guarantees uniqueness # of the result - s = self.lr_goto_cache.get(x,None) + s = self.lr_goto_cache.get(x) if not s: - s = { } + s = {} self.lr_goto_cache[x] = s - gs = [ ] + gs = [] for p in I: n = p.lr_next if n and n.lr_before == x: - s1 = s.get(id(n),None) + s1 = s.get(id(n)) if not s1: - s1 = { } + s1 = {} s[id(n)] = s1 gs.append(n) s = s1 - g = s.get('$end',None) + g = s.get('$end') if not g: if gs: g = self.lr0_closure(gs) s['$end'] = g else: s['$end'] = gs - self.lr_goto_cache[(id(I),x)] = g + self.lr_goto_cache[(id(I), x)] = g return g # Compute the LR(0) sets of item function def lr0_items(self): - - C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] + C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] i = 0 for I in C: self.lr0_cidhash[id(I)] = i @@ -2048,15 +2211,15 @@ class LRGeneratedTable(LRTable): i += 1 # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } + asyms = {} for ii in I: for s in ii.usyms: asyms[s] = None for x in asyms: - g = self.lr0_goto(I,x) - if not g: continue - if id(g) in self.lr0_cidhash: continue + g = self.lr0_goto(I, x) + if not g or id(g) in self.lr0_cidhash: + continue self.lr0_cidhash[id(g)] = len(C) C.append(g) @@ -2091,19 +2254,21 @@ class LRGeneratedTable(LRTable): # ----------------------------------------------------------------------------- def compute_nullable_nonterminals(self): - nullable = {} + nullable = set() num_nullable = 0 - while 1: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 + while True: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) continue - for t in p.prod: - if not t in nullable: break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + if len(nullable) == num_nullable: + break + num_nullable = len(nullable) return nullable # ----------------------------------------------------------------------------- @@ -2117,16 +2282,16 @@ class LRGeneratedTable(LRTable): # The input C is the set of LR(0) items. # ----------------------------------------------------------------------------- - def find_nonterminal_transitions(self,C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: trans.append(t) - state = state + 1 - return trans + def find_nonterminal_transitions(self, C): + trans = [] + for stateno, state in enumerate(C): + for p in state: + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) + return trans # ----------------------------------------------------------------------------- # dr_relation() @@ -2137,21 +2302,21 @@ class LRGeneratedTable(LRTable): # Returns a list of terminals. # ----------------------------------------------------------------------------- - def dr_relation(self,C,trans,nullable): - dr_set = { } - state,N = trans + def dr_relation(self, C, trans, nullable): + state, N = trans terms = [] - g = self.lr0_goto(C[state],N) + g = self.lr0_goto(C[state], N) for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if a in self.grammar.Terminals: - if a not in terms: terms.append(a) + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) # This extra bit is to handle the start state if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append('$end') + terms.append('$end') return terms @@ -2161,18 +2326,18 @@ class LRGeneratedTable(LRTable): # Computes the READS() relation (p,A) READS (t,C). # ----------------------------------------------------------------------------- - def reads_relation(self,C, trans, empty): + def reads_relation(self, C, trans, empty): # Look for empty transitions rel = [] state, N = trans - g = self.lr0_goto(C[state],N) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(C[state], N) + j = self.lr0_cidhash.get(id(g), -1) for p in g: if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j,a)) + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) return rel @@ -2204,8 +2369,7 @@ class LRGeneratedTable(LRTable): # # ----------------------------------------------------------------------------- - def compute_lookback_includes(self,C,trans,nullable): - + def compute_lookback_includes(self, C, trans, nullable): lookdict = {} # Dictionary of lookback relations includedict = {} # Dictionary of include relations @@ -2215,11 +2379,12 @@ class LRGeneratedTable(LRTable): dtrans[t] = 1 # Loop over all transitions and compute lookbacks and includes - for state,N in trans: + for state, N in trans: lookb = [] includes = [] for p in C[state]: - if p.name != N: continue + if p.name != N: + continue # Okay, we have a name match. We now follow the production all the way # through the state machine until we get the . on the right hand side @@ -2227,44 +2392,50 @@ class LRGeneratedTable(LRTable): lr_index = p.lr_index j = state while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] + lr_index = lr_index + 1 + t = p.prod[lr_index] - # Check to see if this symbol and state are a non-terminal transition - if (j,t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty + # Check to see if this symbol and state are a non-terminal transition + if (j, t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: break # No forget it - if not p.prod[li] in nullable: break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j, t)) - g = self.lr0_goto(C[j],t) # Go to next set - j = self.lr0_cidhash.get(id(g),-1) # Go to next state + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state # When we get here, j is the final state, now we have to locate the production for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: + break + i = i + 1 + else: + lookb.append((j, r)) for i in includes: - if not i in includedict: includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, N)) + lookdict[(state, N)] = lookb - return lookdict,includedict + return lookdict, includedict # ----------------------------------------------------------------------------- # compute_read_sets() @@ -2278,10 +2449,10 @@ class LRGeneratedTable(LRTable): # Returns a set containing the read sets # ----------------------------------------------------------------------------- - def compute_read_sets(self,C, ntrans, nullable): - FP = lambda x: self.dr_relation(C,x,nullable) - R = lambda x: self.reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) + def compute_read_sets(self, C, ntrans, nullable): + FP = lambda x: self.dr_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) + F = digraph(ntrans, R, FP) return F # ----------------------------------------------------------------------------- @@ -2300,11 +2471,11 @@ class LRGeneratedTable(LRTable): # Returns a set containing the follow sets # ----------------------------------------------------------------------------- - def compute_follow_sets(self,ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F + def compute_follow_sets(self, ntrans, readsets, inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x, []) + F = digraph(ntrans, R, FP) + return F # ----------------------------------------------------------------------------- # add_lookaheads() @@ -2318,15 +2489,16 @@ class LRGeneratedTable(LRTable): # in the lookbacks set # ----------------------------------------------------------------------------- - def add_lookaheads(self,lookbacks,followset): - for trans,lb in lookbacks.items(): + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): # Loop over productions in lookback - for state,p in lb: - if not state in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) + for state, p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans, []) + for a in f: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) # ----------------------------------------------------------------------------- # add_lalr_lookaheads() @@ -2335,7 +2507,7 @@ class LRGeneratedTable(LRTable): # with LALR parsing # ----------------------------------------------------------------------------- - def add_lalr_lookaheads(self,C): + def add_lalr_lookaheads(self, C): # Determine all of the nullable nonterminals nullable = self.compute_nullable_nonterminals() @@ -2343,16 +2515,16 @@ class LRGeneratedTable(LRTable): trans = self.find_nonterminal_transitions(C) # Compute read sets - readsets = self.compute_read_sets(C,trans,nullable) + readsets = self.compute_read_sets(C, trans, nullable) # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C,trans,nullable) + lookd, included = self.compute_lookback_includes(C, trans, nullable) # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans,readsets,included) + followsets = self.compute_follow_sets(trans, readsets, included) # Add all of the lookaheads - self.add_lookaheads(lookd,followsets) + self.add_lookaheads(lookd, followsets) # ----------------------------------------------------------------------------- # lr_parse_table() @@ -2366,9 +2538,9 @@ class LRGeneratedTable(LRTable): action = self.lr_action # Action array log = self.log # Logger for output - actionp = { } # Action production array (temporary) - - log.info("Parsing method: %s", self.lr_method) + actionp = {} # Action production array (temporary) + + log.info('Parsing method: %s', self.lr_method) # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items # This determines the number of states @@ -2382,23 +2554,23 @@ class LRGeneratedTable(LRTable): st = 0 for I in C: # Loop over each production in I - actlist = [ ] # List of actions - st_action = { } - st_actionp = { } - st_goto = { } - log.info("") - log.info("state %d", st) - log.info("") + actlist = [] # List of actions + st_action = {} + st_actionp = {} + st_goto = {} + log.info('') + log.info('state %d', st) + log.info('') for p in I: - log.info(" (%d) %s", p.number, str(p)) - log.info("") + log.info(' (%d) %s', p.number, p) + log.info('') for p in I: if p.len == p.lr_index + 1: if p.name == "S'": # Start symbol. Accept! - st_action["$end"] = 0 - st_actionp["$end"] = p + st_action['$end'] = 0 + st_actionp['$end'] = p else: # We are at the end of a production. Reduce! if self.lr_method == 'LALR': @@ -2406,31 +2578,36 @@ class LRGeneratedTable(LRTable): else: laheads = self.grammar.Follow[p.name] for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = st_action.get(a,None) + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) if r is not None: # Whoa. Have a shift/reduce or reduce/reduce conflict if r > 0: # Need to decide on shift or reduce here # By default we favor shifting. Need to add # some precedence rules here. - sprec,slevel = Productions[st_actionp[a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): # We really need to reduce here. st_action[a] = -p.number st_actionp[a] = p if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) Productions[p.number].reduced += 1 elif (slevel == rlevel) and (rprec == 'nonassoc'): st_action[a] = None else: # Hmmm. Guess we'll keep the shift if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) elif r < 0: # Reduce/reduce conflict. In this case, we favor the rule # that was defined first in the grammar file @@ -2439,15 +2616,16 @@ class LRGeneratedTable(LRTable): if oldp.line > pp.line: st_action[a] = -p.number st_actionp[a] = p - chosenp,rejectp = pp,oldp + chosenp, rejectp = pp, oldp Productions[p.number].reduced += 1 Productions[oldp.number].reduced -= 1 else: - chosenp,rejectp = oldp,pp - self.rr_conflicts.append((st,chosenp,rejectp)) - log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) else: - raise LALRError("Unknown conflict in state %d" % st) + raise LALRError('Unknown conflict in state %d' % st) else: st_action[a] = -p.number st_actionp[a] = p @@ -2456,205 +2634,211 @@ class LRGeneratedTable(LRTable): i = p.lr_index a = p.prod[i+1] # Get symbol right after the "." if a in self.grammar.Terminals: - g = self.lr0_goto(I,a) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) if j >= 0: # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = st_action.get(a,None) + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) if r is not None: # Whoa have a shift/reduce or shift/shift conflict if r > 0: if r != j: - raise LALRError("Shift/shift conflict in state %d" % st) + raise LALRError('Shift/shift conflict in state %d' % st) elif r < 0: # Do a precedence check. # - if precedence of reduce rule is higher, we reduce. # - if precedence of reduce is same and left assoc, we reduce. # - otherwise we shift - rprec,rlevel = Productions[st_actionp[a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): # We decide to shift here... highest precedence to shift Productions[st_actionp[a].number].reduced -= 1 st_action[a] = j st_actionp[a] = p if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) elif (slevel == rlevel) and (rprec == 'nonassoc'): st_action[a] = None else: # Hmmm. Guess we'll keep the reduce if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) else: - raise LALRError("Unknown conflict in state %d" % st) + raise LALRError('Unknown conflict in state %d' % st) else: st_action[a] = j st_actionp[a] = p # Print the actions associated with each terminal - _actprint = { } - for a,p,m in actlist: + _actprint = {} + for a, p, m in actlist: if a in st_action: if p is st_actionp[a]: - log.info(" %-15s %s",a,m) - _actprint[(a,m)] = 1 - log.info("") + log.info(' %-15s %s', a, m) + _actprint[(a, m)] = 1 + log.info('') # Print the actions that were not used. (debugging) not_used = 0 - for a,p,m in actlist: + for a, p, m in actlist: if a in st_action: if p is not st_actionp[a]: - if not (a,m) in _actprint: - log.debug(" ! %-15s [ %s ]",a,m) + if not (a, m) in _actprint: + log.debug(' ! %-15s [ %s ]', a, m) not_used = 1 - _actprint[(a,m)] = 1 + _actprint[(a, m)] = 1 if not_used: - log.debug("") + log.debug('') # Construct the goto table for this state - nkeys = { } + nkeys = {} for ii in I: for s in ii.usyms: if s in self.grammar.Nonterminals: nkeys[s] = None for n in nkeys: - g = self.lr0_goto(I,n) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) if j >= 0: st_goto[n] = j - log.info(" %-30s shift and go to state %d",n,j) + log.info(' %-30s shift and go to state %d', n, j) action[st] = st_action actionp[st] = st_actionp goto[st] = st_goto st += 1 - # ----------------------------------------------------------------------------- # write() # # This function writes the LR parsing tables to a file # ----------------------------------------------------------------------------- - def write_table(self,modulename,outputdir='',signature=""): - basemodulename = modulename.split(".")[-1] - filename = os.path.join(outputdir,basemodulename) + ".py" - try: - f = open(filename,"w") + def write_table(self, tabmodule, outputdir='', signature=''): + if isinstance(tabmodule, types.ModuleType): + raise IOError("Won't overwrite existing tabmodule") - f.write(""" + basemodulename = tabmodule.split('.')[-1] + filename = os.path.join(outputdir, basemodulename) + '.py' + try: + f = open(filename, 'w') + + f.write(''' # %s # This file is automatically generated. Do not edit. +# pylint: disable=W,C,R _tabversion = %r _lr_method = %r _lr_signature = %r - """ % (filename, __tabversion__, self.lr_method, signature)) + ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) # Change smaller to 0 to go back to original tables smaller = 1 # Factor out names to try and make smaller if smaller: - items = { } + items = {} - for s,nd in self.lr_action.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) + for s, nd in self.lr_action.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) + f.write('\n_lr_action_items = {') + for k, v in items.items(): + f.write('%r:([' % k) for i in v[0]: - f.write("%r," % i) - f.write("],[") + f.write('%r,' % i) + f.write('],[') for i in v[1]: - f.write("%r," % i) + f.write('%r,' % i) - f.write("]),") - f.write("}\n") + f.write(']),') + f.write('}\n') - f.write(""" -_lr_action = { } + f.write(''' +_lr_action = {} for _k, _v in _lr_action_items.items(): for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } + if not _x in _lr_action: _lr_action[_x] = {} _lr_action[_x][_k] = _y del _lr_action_items -""") +''') else: - f.write("\n_lr_action = { "); - for k,v in self.lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); + f.write('\n_lr_action = { ') + for k, v in self.lr_action.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') if smaller: # Factor out names to try and make smaller - items = { } + items = {} - for s,nd in self.lr_goto.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) + for s, nd in self.lr_goto.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) + f.write('\n_lr_goto_items = {') + for k, v in items.items(): + f.write('%r:([' % k) for i in v[0]: - f.write("%r," % i) - f.write("],[") + f.write('%r,' % i) + f.write('],[') for i in v[1]: - f.write("%r," % i) + f.write('%r,' % i) - f.write("]),") - f.write("}\n") + f.write(']),') + f.write('}\n') - f.write(""" -_lr_goto = { } + f.write(''' +_lr_goto = {} for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} _lr_goto[_x][_k] = _y del _lr_goto_items -""") +''') else: - f.write("\n_lr_goto = { "); - for k,v in self.lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); + f.write('\n_lr_goto = { ') + for k, v in self.lr_goto.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') # Write production table - f.write("_lr_productions = [\n") + f.write('_lr_productions = [\n') for p in self.lr_productions: if p.func: - f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) + f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, + p.func, os.path.basename(p.file), p.line)) else: - f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) - f.write("]\n") + f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) + f.write(']\n') f.close() - except IOError: - e = sys.exc_info()[1] - sys.stderr.write("Unable to create '%s'\n" % filename) - sys.stderr.write(str(e)+"\n") - return + except IOError as e: + raise # ----------------------------------------------------------------------------- @@ -2663,26 +2847,25 @@ del _lr_goto_items # This function pickles the LR parsing tables to a supplied file object # ----------------------------------------------------------------------------- - def pickle_table(self,filename,signature=""): + def pickle_table(self, filename, signature=''): try: import cPickle as pickle except ImportError: import pickle - outf = open(filename,"wb") - pickle.dump(__tabversion__,outf,pickle_protocol) - pickle.dump(self.lr_method,outf,pickle_protocol) - pickle.dump(signature,outf,pickle_protocol) - pickle.dump(self.lr_action,outf,pickle_protocol) - pickle.dump(self.lr_goto,outf,pickle_protocol) + with open(filename, 'wb') as outf: + pickle.dump(__tabversion__, outf, pickle_protocol) + pickle.dump(self.lr_method, outf, pickle_protocol) + pickle.dump(signature, outf, pickle_protocol) + pickle.dump(self.lr_action, outf, pickle_protocol) + pickle.dump(self.lr_goto, outf, pickle_protocol) - outp = [] - for p in self.lr_productions: - if p.func: - outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) - else: - outp.append((str(p),p.name,p.len,None,None,None)) - pickle.dump(outp,outf,pickle_protocol) - outf.close() + outp = [] + for p in self.lr_productions: + if p.func: + outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) + else: + outp.append((str(p), p.name, p.len, None, None, None)) + pickle.dump(outp, outf, pickle_protocol) # ----------------------------------------------------------------------------- # === INTROSPECTION === @@ -2700,26 +2883,18 @@ del _lr_goto_items # ----------------------------------------------------------------------------- def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict # ----------------------------------------------------------------------------- # parse_grammar() # # This takes a raw grammar rule string and parses it into production data # ----------------------------------------------------------------------------- -def parse_grammar(doc,file,line): +def parse_grammar(doc, file, line): grammar = [] # Split the doc string into lines pstrings = doc.splitlines() @@ -2728,12 +2903,13 @@ def parse_grammar(doc,file,line): for ps in pstrings: dline += 1 p = ps.split() - if not p: continue + if not p: + continue try: if p[0] == '|': # This is a continuation of a previous rule if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) prodname = lastp syms = p[1:] else: @@ -2742,13 +2918,13 @@ def parse_grammar(doc,file,line): syms = p[2:] assign = p[1] if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) - grammar.append((file,dline,prodname,syms)) + grammar.append((file, dline, prodname, syms)) except SyntaxError: raise except Exception: - raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) return grammar @@ -2760,14 +2936,14 @@ def parse_grammar(doc,file,line): # etc. # ----------------------------------------------------------------------------- class ParserReflect(object): - def __init__(self,pdict,log=None): + def __init__(self, pdict, log=None): self.pdict = pdict self.start = None self.error_func = None self.tokens = None - self.files = {} + self.modules = set() self.grammar = [] - self.error = 0 + self.error = False if log is None: self.log = PlyLogger(sys.stderr) @@ -2781,7 +2957,7 @@ class ParserReflect(object): self.get_tokens() self.get_precedence() self.get_pfunctions() - + # Validate all of the information def validate_all(self): self.validate_start() @@ -2789,32 +2965,28 @@ class ParserReflect(object): self.validate_tokens() self.validate_precedence() self.validate_pfunctions() - self.validate_files() + self.validate_modules() return self.error # Compute a signature over the grammar def signature(self): + parts = [] try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() if self.start: - sig.update(self.start.encode('latin-1')) + parts.append(self.start) if self.prec: - sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) + parts.append(''.join([''.join(p) for p in self.prec])) if self.tokens: - sig.update(" ".join(self.tokens).encode('latin-1')) + parts.append(' '.join(self.tokens)) for f in self.pfuncs: if f[3]: - sig.update(f[3].encode('latin-1')) - except (TypeError,ValueError): + parts.append(f[3]) + except (TypeError, ValueError): pass - return sig.digest() + return ''.join(parts) # ----------------------------------------------------------------------------- - # validate_file() + # validate_modules() # # This method checks to see if there are duplicated p_rulename() functions # in the parser module file. Without this function, it is really easy for @@ -2824,32 +2996,29 @@ class ParserReflect(object): # to try and detect duplicates. # ----------------------------------------------------------------------------- - def validate_files(self): + def validate_modules(self): # Match def p_funcname( fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - for filename in self.files.keys(): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - + for module in self.modules: try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: continue - counthash = { } - for linen,l in enumerate(lines): + counthash = {} + for linen, line in enumerate(lines): linen += 1 - m = fre.match(l) + m = fre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) # Get the start symbol def get_start(self): @@ -2858,7 +3027,7 @@ class ParserReflect(object): # Validate the start symbol def validate_start(self): if self.start is not None: - if not isinstance(self.start,str): + if not isinstance(self.start, string_types): self.log.error("'start' must be a string") # Look for error handler @@ -2868,162 +3037,173 @@ class ParserReflect(object): # Validate the error function def validate_error_func(self): if self.error_func: - if isinstance(self.error_func,types.FunctionType): + if isinstance(self.error_func, types.FunctionType): ismethod = 0 elif isinstance(self.error_func, types.MethodType): ismethod = 1 else: self.log.error("'p_error' defined, but is not a function or method") - self.error = 1 + self.error = True return - eline = func_code(self.error_func).co_firstlineno - efile = func_code(self.error_func).co_filename - self.files[efile] = 1 + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) - if (func_code(self.error_func).co_argcount != 1+ismethod): - self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) - self.error = 1 + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True # Get the tokens map def get_tokens(self): - tokens = self.pdict.get("tokens",None) + tokens = self.pdict.get('tokens') if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 - return - - if not tokens: - self.log.error("tokens is empty") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - self.tokens = tokens + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = sorted(tokens) # Validate the tokens def validate_tokens(self): # Validate the tokens. if 'error' in self.tokens: self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = 1 + self.error = True return - terminals = {} + terminals = set() for n in self.tokens: if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 + self.log.warning('Token %r multiply defined', n) + terminals.add(n) # Get the precedence map (if any) def get_precedence(self): - self.prec = self.pdict.get("precedence",None) + self.prec = self.pdict.get('precedence') # Validate and parse the precedence map def validate_precedence(self): preclist = [] if self.prec: - if not isinstance(self.prec,(list,tuple)): - self.log.error("precedence must be a list or tuple") - self.error = 1 + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedence must be a list or tuple') + self.error = True return - for level,p in enumerate(self.prec): - if not isinstance(p,(list,tuple)): - self.log.error("Bad precedence table") - self.error = 1 + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('Bad precedence table') + self.error = True return if len(p) < 2: - self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) - self.error = 1 + self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.error = True return assoc = p[0] - if not isinstance(assoc,str): - self.log.error("precedence associativity must be a string") - self.error = 1 + if not isinstance(assoc, string_types): + self.log.error('precedence associativity must be a string') + self.error = True return for term in p[1:]: - if not isinstance(term,str): - self.log.error("precedence items must be strings") - self.error = 1 + if not isinstance(term, string_types): + self.log.error('precedence items must be strings') + self.error = True return - preclist.append((term,assoc,level+1)) + preclist.append((term, assoc, level+1)) self.preclist = preclist # Get all p_functions from the grammar def get_pfunctions(self): p_functions = [] for name, item in self.pdict.items(): - if name[:2] != 'p_': continue - if name == 'p_error': continue - if isinstance(item,(types.FunctionType,types.MethodType)): - line = func_code(item).co_firstlineno - file = func_code(item).co_filename - p_functions.append((line,file,name,item.__doc__)) + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) - # Sort all of the actions by line number - p_functions.sort() + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) self.pfuncs = p_functions - # Validate all of the p_functions def validate_pfunctions(self): grammar = [] # Check for non-empty symbols if len(self.pfuncs) == 0: - self.log.error("no rules of the form p_rulename are defined") - self.error = 1 - return - - for line, file, name, doc in self.pfuncs: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) func = self.pdict[name] if isinstance(func, types.MethodType): reqargs = 2 else: reqargs = 1 - if func_code(func).co_argcount > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) - self.error = 1 - elif func_code(func).co_argcount < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) - self.error = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True elif not func.__doc__: - self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) else: try: - parsed_g = parse_grammar(doc,file,line) + parsed_g = parse_grammar(doc, file, line) for g in parsed_g: grammar.append((name, g)) - except SyntaxError: - e = sys.exc_info()[1] + except SyntaxError as e: self.log.error(str(e)) - self.error = 1 + self.error = True # Looks like a valid grammar rule # Mark the file in which defined. - self.files[file] = 1 + self.modules.add(module) # Secondary validation step that looks for p_ definitions that are not functions # or functions that look like they might be grammar rules. - for n,v in self.pdict.items(): - if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue - if n[0:2] == 't_': continue - if n[0:2] == 'p_' and n != 'p_error': - self.log.warning("'%s' not defined as a function", n) - if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or - (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", - func_code(v).co_filename, func_code(v).co_firstlineno,n) - except Exception: - pass + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass self.grammar = grammar @@ -3033,14 +3213,17 @@ class ParserReflect(object): # Build a parser # ----------------------------------------------------------------------------- -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, - check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', - debuglog=None, errorlog = None, picklefile=None): +def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, + check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, + outputdir=None, debuglog=None, errorlog=None, picklefile=None): - global parse # Reference to the parsing method of the last built parser + if tabmodule is None: + tabmodule = tab_module + + # Reference to the parsing method of the last built parser + global parse # If pickling is enabled, table files are not created - if picklefile: write_tables = 0 @@ -3049,17 +3232,54 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] pdict = dict(_items) + # If no __file__ or __package__ attributes are available, try to obtain them + # from the __module__ instead + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ else: pdict = get_caller_module_dict(2) + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If tabmodule specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(tabmodule, types.ModuleType): + srcfile = tabmodule.__file__ + else: + if '.' not in tabmodule: + srcfile = pdict['__file__'] + else: + parts = tabmodule.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = pdict.get('__package__') + if pkg and isinstance(tabmodule, str): + if '.' not in tabmodule: + tabmodule = pkg + '.' + tabmodule + + + + # Set start symbol if it's specified directly using an argument + if start is not None: + pdict['start'] = start + # Collect parser information from the dictionary - pinfo = ParserReflect(pdict,log=errorlog) + pinfo = ParserReflect(pdict, log=errorlog) pinfo.get_all() if pinfo.error: - raise YaccError("Unable to build parser") + raise YaccError('Unable to build parser') # Check signature against table files (if any) signature = pinfo.signature() @@ -3074,35 +3294,36 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star if optimize or (read_signature == signature): try: lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) + parser = LRParser(lr, pinfo.error_func) parse = parser.parse return parser - except Exception: - e = sys.exc_info()[1] - errorlog.warning("There was a problem loading the table file: %s", repr(e)) - except VersionError: - e = sys.exc_info() + except Exception as e: + errorlog.warning('There was a problem loading the table file: %r', e) + except VersionError as e: errorlog.warning(str(e)) - except Exception: + except ImportError: pass if debuglog is None: if debug: - debuglog = PlyLogger(open(debugfile,"w")) + try: + debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) + except IOError as e: + errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) + debuglog = NullLogger() else: debuglog = NullLogger() - debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) + debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) - - errors = 0 + errors = False # Validate the parser information if pinfo.validate_all(): - raise YaccError("Unable to build parser") - + raise YaccError('Unable to build parser') + if not pinfo.error_func: - errorlog.warning("no p_error() function is defined") + errorlog.warning('no p_error() function is defined') # Create a grammar object grammar = Grammar(pinfo.tokens) @@ -3110,20 +3331,18 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star # Set precedence level for terminals for term, assoc, level in pinfo.preclist: try: - grammar.set_precedence(term,assoc,level) - except GrammarError: - e = sys.exc_info()[1] - errorlog.warning("%s",str(e)) + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) # Add productions to the grammar for funcname, gram in pinfo.grammar: file, line, prodname, syms = gram try: - grammar.add_production(prodname,syms,funcname,file,line) - except GrammarError: - e = sys.exc_info()[1] - errorlog.error("%s",str(e)) - errors = 1 + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + errors = True # Set the grammar start symbols try: @@ -3131,146 +3350,153 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star grammar.set_start(pinfo.start) else: grammar.set_start(start) - except GrammarError: - e = sys.exc_info()[1] + except GrammarError as e: errorlog.error(str(e)) - errors = 1 + errors = True if errors: - raise YaccError("Unable to build parser") + raise YaccError('Unable to build parser') # Verify the grammar structure undefined_symbols = grammar.undefined_symbols() for sym, prod in undefined_symbols: - errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) - errors = 1 + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True unused_terminals = grammar.unused_terminals() if unused_terminals: - debuglog.info("") - debuglog.info("Unused terminals:") - debuglog.info("") + debuglog.info('') + debuglog.info('Unused terminals:') + debuglog.info('') for term in unused_terminals: - errorlog.warning("Token '%s' defined, but not used", term) - debuglog.info(" %s", term) + errorlog.warning('Token %r defined, but not used', term) + debuglog.info(' %s', term) # Print out all productions to the debug log if debug: - debuglog.info("") - debuglog.info("Grammar") - debuglog.info("") - for n,p in enumerate(grammar.Productions): - debuglog.info("Rule %-5d %s", n, p) + debuglog.info('') + debuglog.info('Grammar') + debuglog.info('') + for n, p in enumerate(grammar.Productions): + debuglog.info('Rule %-5d %s', n, p) # Find unused non-terminals unused_rules = grammar.unused_rules() for prod in unused_rules: - errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) if len(unused_terminals) == 1: - errorlog.warning("There is 1 unused token") + errorlog.warning('There is 1 unused token') if len(unused_terminals) > 1: - errorlog.warning("There are %d unused tokens", len(unused_terminals)) + errorlog.warning('There are %d unused tokens', len(unused_terminals)) if len(unused_rules) == 1: - errorlog.warning("There is 1 unused rule") + errorlog.warning('There is 1 unused rule') if len(unused_rules) > 1: - errorlog.warning("There are %d unused rules", len(unused_rules)) + errorlog.warning('There are %d unused rules', len(unused_rules)) if debug: - debuglog.info("") - debuglog.info("Terminals, with rules where they appear") - debuglog.info("") + debuglog.info('') + debuglog.info('Terminals, with rules where they appear') + debuglog.info('') terms = list(grammar.Terminals) terms.sort() for term in terms: - debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) - - debuglog.info("") - debuglog.info("Nonterminals, with rules where they appear") - debuglog.info("") + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info('') + debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('') nonterms = list(grammar.Nonterminals) nonterms.sort() for nonterm in nonterms: - debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) - debuglog.info("") + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('') if check_recursion: unreachable = grammar.find_unreachable() for u in unreachable: - errorlog.warning("Symbol '%s' is unreachable",u) + errorlog.warning('Symbol %r is unreachable', u) infinite = grammar.infinite_cycles() for inf in infinite: - errorlog.error("Infinite recursion detected for symbol '%s'", inf) - errors = 1 - + errorlog.error('Infinite recursion detected for symbol %r', inf) + errors = True + unused_prec = grammar.unused_precedence() for term, assoc in unused_prec: - errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) - errors = 1 + errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errors = True if errors: - raise YaccError("Unable to build parser") - + raise YaccError('Unable to build parser') + # Run the LRGeneratedTable on the grammar if debug: - errorlog.debug("Generating %s tables", method) - - lr = LRGeneratedTable(grammar,method,debuglog) + errorlog.debug('Generating %s tables', method) + + lr = LRGeneratedTable(grammar, method, debuglog) if debug: num_sr = len(lr.sr_conflicts) # Report shift/reduce and reduce/reduce conflicts if num_sr == 1: - errorlog.warning("1 shift/reduce conflict") + errorlog.warning('1 shift/reduce conflict') elif num_sr > 1: - errorlog.warning("%d shift/reduce conflicts", num_sr) + errorlog.warning('%d shift/reduce conflicts', num_sr) num_rr = len(lr.rr_conflicts) if num_rr == 1: - errorlog.warning("1 reduce/reduce conflict") + errorlog.warning('1 reduce/reduce conflict') elif num_rr > 1: - errorlog.warning("%d reduce/reduce conflicts", num_rr) + errorlog.warning('%d reduce/reduce conflicts', num_rr) # Write out conflicts to the output file if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning("") - debuglog.warning("Conflicts:") - debuglog.warning("") + debuglog.warning('') + debuglog.warning('Conflicts:') + debuglog.warning('') for state, tok, resolution in lr.sr_conflicts: - debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) - - already_reported = {} + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + + already_reported = set() for state, rule, rejected in lr.rr_conflicts: - if (state,id(rule),id(rejected)) in already_reported: + if (state, id(rule), id(rejected)) in already_reported: continue - debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - debuglog.warning("rejected rule (%s) in state %d", rejected,state) - errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - errorlog.warning("rejected rule (%s) in state %d", rejected, state) - already_reported[state,id(rule),id(rejected)] = 1 - + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + errorlog.warning('rejected rule (%s) in state %d', rejected, state) + already_reported.add((state, id(rule), id(rejected))) + warned_never = [] for state, rule, rejected in lr.rr_conflicts: if not rejected.reduced and (rejected not in warned_never): - debuglog.warning("Rule (%s) is never reduced", rejected) - errorlog.warning("Rule (%s) is never reduced", rejected) + debuglog.warning('Rule (%s) is never reduced', rejected) + errorlog.warning('Rule (%s) is never reduced', rejected) warned_never.append(rejected) # Write the table file if requested if write_tables: - lr.write_table(tabmodule,outputdir,signature) + try: + lr.write_table(tabmodule, outputdir, signature) + if tabmodule in sys.modules: + del sys.modules[tabmodule] + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) # Write a pickled version of the tables if picklefile: - lr.pickle_table(picklefile,signature) + try: + lr.pickle_table(picklefile, signature) + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) # Build the parser lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) + parser = LRParser(lr, pinfo.error_func) parse = parser.parse return parser diff --git a/ext/ply/ply/ygen.py b/ext/ply/ply/ygen.py new file mode 100644 index 0000000000..03b93180a7 --- /dev/null +++ b/ext/ply/ply/ygen.py @@ -0,0 +1,69 @@ +# ply: ygen.py +# +# This is a support program that auto-generates different versions of the YACC parsing +# function with different features removed for the purposes of performance. +# +# Users should edit the method LRParser.parsedebug() in yacc.py. The source code +# for that method is then used to create the other methods. See the comments in +# yacc.py for further details. + +import os.path +import shutil + +def get_source_range(lines, tag): + srclines = enumerate(lines) + start_tag = '#--! %s-start' % tag + end_tag = '#--! %s-end' % tag + + for start_index, line in srclines: + if line.strip().startswith(start_tag): + break + + for end_index, line in srclines: + if line.strip().endswith(end_tag): + break + + return (start_index + 1, end_index) + +def filter_section(lines, tag): + filtered_lines = [] + include = True + tag_text = '#--! %s' % tag + for line in lines: + if line.strip().startswith(tag_text): + include = not include + elif include: + filtered_lines.append(line) + return filtered_lines + +def main(): + dirname = os.path.dirname(__file__) + shutil.copy2(os.path.join(dirname, 'yacc.py'), os.path.join(dirname, 'yacc.py.bak')) + with open(os.path.join(dirname, 'yacc.py'), 'r') as f: + lines = f.readlines() + + parse_start, parse_end = get_source_range(lines, 'parsedebug') + parseopt_start, parseopt_end = get_source_range(lines, 'parseopt') + parseopt_notrack_start, parseopt_notrack_end = get_source_range(lines, 'parseopt-notrack') + + # Get the original source + orig_lines = lines[parse_start:parse_end] + + # Filter the DEBUG sections out + parseopt_lines = filter_section(orig_lines, 'DEBUG') + + # Filter the TRACKING sections out + parseopt_notrack_lines = filter_section(parseopt_lines, 'TRACKING') + + # Replace the parser source sections with updated versions + lines[parseopt_notrack_start:parseopt_notrack_end] = parseopt_notrack_lines + lines[parseopt_start:parseopt_end] = parseopt_lines + + lines = [line.rstrip()+'\n' for line in lines] + with open(os.path.join(dirname, 'yacc.py'), 'w') as f: + f.writelines(lines) + + print('Updated yacc.py') + +if __name__ == '__main__': + main() diff --git a/ext/ply/setup.cfg b/ext/ply/setup.cfg new file mode 100644 index 0000000000..819449e4c5 --- /dev/null +++ b/ext/ply/setup.cfg @@ -0,0 +1,10 @@ +[bdist_wheel] +universal = 1 + +[metadata] +description-file = README.md + +[egg_info] +tag_build = +tag_date = 0 + diff --git a/ext/ply/setup.py b/ext/ply/setup.py index 606b29cde4..46bc6b34c4 100644 --- a/ext/ply/setup.py +++ b/ext/ply/setup.py @@ -14,13 +14,18 @@ PLY provides most of the standard lex/yacc features including support for empty productions, precedence rules, error recovery, and support for ambiguous grammars. PLY is extremely easy to use and provides very extensive error checking. +It is compatible with both Python 2 and Python 3. """, license="""BSD""", - version = "3.2", + version = "3.11", author = "David Beazley", author_email = "dave@dabeaz.com", maintainer = "David Beazley", maintainer_email = "dave@dabeaz.com", url = "http://www.dabeaz.com/ply/", packages = ['ply'], + classifiers = [ + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 2', + ] ) diff --git a/ext/ply/test/README b/ext/ply/test/README index aac12b058b..03b167ce24 100644 --- a/ext/ply/test/README +++ b/ext/ply/test/README @@ -1,11 +1,8 @@ This directory mostly contains tests for various types of error conditions. To run: - $ python testlex.py . - $ python testyacc.py . - -The tests can also be run using the Python unittest module. - - $ python rununit.py + $ python testlex.py + $ python testyacc.py + $ python testcpp.py The script 'cleanup.sh' cleans up this directory to its original state. diff --git a/ext/ply/test/calclex.py b/ext/ply/test/calclex.py index 67d245f19e..030a9863dd 100644 --- a/ext/ply/test/calclex.py +++ b/ext/ply/test/calclex.py @@ -36,14 +36,14 @@ t_ignore = " \t" def t_newline(t): r'\n+' - t.lineno += t.value.count("\n") + t.lexer.lineno += t.value.count("\n") def t_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lexer -lex.lex() +lexer = lex.lex() diff --git a/ext/ply/test/cleanup.sh b/ext/ply/test/cleanup.sh index 9db936837e..9374f2c60b 100755 --- a/ext/ply/test/cleanup.sh +++ b/ext/ply/test/cleanup.sh @@ -1,4 +1,4 @@ #!/bin/sh -rm -f *~ *.pyc *.pyo *.dif *.out +rm -rf *~ *.pyc *.pyo *.dif *.out __pycache__ diff --git a/ext/ply/test/lex_literal3.py b/ext/ply/test/lex_literal3.py new file mode 100644 index 0000000000..91ab980c84 --- /dev/null +++ b/ext/ply/test/lex_literal3.py @@ -0,0 +1,26 @@ +# lex_literal3.py +# +# An empty literal specification given as a list +# Issue 8 : Literals empty list causes IndexError + +import sys +if ".." not in sys.path: sys.path.insert(0,"..") + +import ply.lex as lex + +tokens = [ + "NUMBER", + ] + +literals = [] + +def t_NUMBER(t): + r'\d+' + return t + +def t_error(t): + pass + +lex.lex() + + diff --git a/ext/ply/test/lex_optimize3.py b/ext/ply/test/lex_optimize3.py index c6c8cce652..b8df5aab2c 100644 --- a/ext/ply/test/lex_optimize3.py +++ b/ext/ply/test/lex_optimize3.py @@ -45,7 +45,7 @@ def t_error(t): t.lexer.skip(1) # Build the lexer -lex.lex(optimize=1,lextab="lexdir.sub.calctab",outputdir="lexdir/sub") +lex.lex(optimize=1,lextab="lexdir.sub.calctab" ,outputdir="lexdir/sub") lex.runmain(data="3+4") diff --git a/ext/ply/test/lex_optimize4.py b/ext/ply/test/lex_optimize4.py new file mode 100644 index 0000000000..cc6e2a9d20 --- /dev/null +++ b/ext/ply/test/lex_optimize4.py @@ -0,0 +1,26 @@ +# ----------------------------------------------------------------------------- +# lex_optimize4.py +# ----------------------------------------------------------------------------- +import re +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+?' +t_MINUS = r'-' +t_NUMBER = r'(\d+)' + +def t_error(t): + pass + + +# Build the lexer +lex.lex(optimize=True, lextab="opt4tab", reflags=re.UNICODE) +lex.runmain(data="3+4") diff --git a/ext/ply/test/pkg_test1/__init__.py b/ext/ply/test/pkg_test1/__init__.py new file mode 100644 index 0000000000..0e195589eb --- /dev/null +++ b/ext/ply/test/pkg_test1/__init__.py @@ -0,0 +1,9 @@ +# Tests proper handling of lextab and parsetab files in package structures + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +from .parsing.calcparse import parser + diff --git a/ext/ply/test/pkg_test1/parsing/__init__.py b/ext/ply/test/pkg_test1/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test1/parsing/calclex.py b/ext/ply/test/pkg_test1/parsing/calclex.py new file mode 100644 index 0000000000..b3c1a4d6bb --- /dev/null +++ b/ext/ply/test/pkg_test1/parsing/calclex.py @@ -0,0 +1,47 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex(optimize=True) + + + diff --git a/ext/ply/test/pkg_test1/parsing/calcparse.py b/ext/ply/test/pkg_test1/parsing/calcparse.py new file mode 100644 index 0000000000..c058e9f77d --- /dev/null +++ b/ext/ply/test/pkg_test1/parsing/calcparse.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc() + + + + + diff --git a/ext/ply/test/pkg_test1/parsing/lextab.py b/ext/ply/test/pkg_test1/parsing/lextab.py new file mode 100644 index 0000000000..52376b2aaa --- /dev/null +++ b/ext/ply/test/pkg_test1/parsing/lextab.py @@ -0,0 +1,10 @@ +# lextab.py. This file automatically created by PLY (version 3.11). Don't edit! +_tabversion = '3.10' +_lextokens = set(('DIVIDE', 'EQUALS', 'LPAREN', 'MINUS', 'NAME', 'NUMBER', 'PLUS', 'RPAREN', 'TIMES')) +_lexreflags = 64 +_lexliterals = '' +_lexstateinfo = {'INITIAL': 'inclusive'} +_lexstatere = {'INITIAL': [('(?P\\d+)|(?P\\n+)|(?P[a-zA-Z_][a-zA-Z0-9_]*)|(?P\\+)|(?P\\()|(?P\\*)|(?P\\))|(?P=)|(?P/)|(?P-)', [None, ('t_NUMBER', 'NUMBER'), ('t_newline', 'newline'), (None, 'NAME'), (None, 'PLUS'), (None, 'LPAREN'), (None, 'TIMES'), (None, 'RPAREN'), (None, 'EQUALS'), (None, 'DIVIDE'), (None, 'MINUS')])]} +_lexstateignore = {'INITIAL': ' \t'} +_lexstateerrorf = {'INITIAL': 't_error'} +_lexstateeoff = {} diff --git a/ext/ply/test/pkg_test2/__init__.py b/ext/ply/test/pkg_test2/__init__.py new file mode 100644 index 0000000000..0e195589eb --- /dev/null +++ b/ext/ply/test/pkg_test2/__init__.py @@ -0,0 +1,9 @@ +# Tests proper handling of lextab and parsetab files in package structures + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +from .parsing.calcparse import parser + diff --git a/ext/ply/test/pkg_test2/parsing/__init__.py b/ext/ply/test/pkg_test2/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test2/parsing/calclex.py b/ext/ply/test/pkg_test2/parsing/calclex.py new file mode 100644 index 0000000000..789e13f864 --- /dev/null +++ b/ext/ply/test/pkg_test2/parsing/calclex.py @@ -0,0 +1,47 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex(optimize=True, lextab='calclextab') + + + diff --git a/ext/ply/test/pkg_test2/parsing/calclextab.py b/ext/ply/test/pkg_test2/parsing/calclextab.py new file mode 100644 index 0000000000..a616c397c6 --- /dev/null +++ b/ext/ply/test/pkg_test2/parsing/calclextab.py @@ -0,0 +1,10 @@ +# calclextab.py. This file automatically created by PLY (version 3.11). Don't edit! +_tabversion = '3.10' +_lextokens = set(('DIVIDE', 'EQUALS', 'LPAREN', 'MINUS', 'NAME', 'NUMBER', 'PLUS', 'RPAREN', 'TIMES')) +_lexreflags = 64 +_lexliterals = '' +_lexstateinfo = {'INITIAL': 'inclusive'} +_lexstatere = {'INITIAL': [('(?P\\d+)|(?P\\n+)|(?P[a-zA-Z_][a-zA-Z0-9_]*)|(?P\\+)|(?P\\()|(?P\\*)|(?P\\))|(?P=)|(?P/)|(?P-)', [None, ('t_NUMBER', 'NUMBER'), ('t_newline', 'newline'), (None, 'NAME'), (None, 'PLUS'), (None, 'LPAREN'), (None, 'TIMES'), (None, 'RPAREN'), (None, 'EQUALS'), (None, 'DIVIDE'), (None, 'MINUS')])]} +_lexstateignore = {'INITIAL': ' \t'} +_lexstateerrorf = {'INITIAL': 't_error'} +_lexstateeoff = {} diff --git a/ext/ply/test/pkg_test2/parsing/calcparse.py b/ext/ply/test/pkg_test2/parsing/calcparse.py new file mode 100644 index 0000000000..f5193389b0 --- /dev/null +++ b/ext/ply/test/pkg_test2/parsing/calcparse.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc(tabmodule='calcparsetab') + + + + + diff --git a/ext/ply/test/pkg_test2/parsing/calcparsetab.py b/ext/ply/test/pkg_test2/parsing/calcparsetab.py new file mode 100644 index 0000000000..23e3208308 --- /dev/null +++ b/ext/ply/test/pkg_test2/parsing/calcparsetab.py @@ -0,0 +1,40 @@ + +# calcparsetab.py +# This file is automatically generated. Do not edit. +# pylint: disable=W,C,R +_tabversion = '3.10' + +_lr_method = 'LALR' + +_lr_signature = 'leftPLUSMINUSleftTIMESDIVIDErightUMINUSDIVIDE EQUALS LPAREN MINUS NAME NUMBER PLUS RPAREN TIMESstatement : NAME EQUALS expressionstatement : expressionexpression : expression PLUS expression\n | expression MINUS expression\n | expression TIMES expression\n | expression DIVIDE expressionexpression : MINUS expression %prec UMINUSexpression : LPAREN expression RPARENexpression : NUMBERexpression : NAME' + +_lr_action_items = {'PLUS':([2,4,6,7,8,9,15,16,17,18,19,20,],[-9,-10,11,-10,-7,11,-8,11,-3,-4,-6,-5,]),'MINUS':([0,1,2,3,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,],[1,1,-9,1,-10,12,-10,-7,12,1,1,1,1,1,-8,12,-3,-4,-6,-5,]),'EQUALS':([4,],[10,]),'NUMBER':([0,1,3,10,11,12,13,14,],[2,2,2,2,2,2,2,2,]),'LPAREN':([0,1,3,10,11,12,13,14,],[3,3,3,3,3,3,3,3,]),'NAME':([0,1,3,10,11,12,13,14,],[4,7,7,7,7,7,7,7,]),'TIMES':([2,4,6,7,8,9,15,16,17,18,19,20,],[-9,-10,14,-10,-7,14,-8,14,14,14,-6,-5,]),'$end':([2,4,5,6,7,8,15,16,17,18,19,20,],[-9,-10,0,-2,-10,-7,-8,-1,-3,-4,-6,-5,]),'RPAREN':([2,7,8,9,15,17,18,19,20,],[-9,-10,-7,15,-8,-3,-4,-6,-5,]),'DIVIDE':([2,4,6,7,8,9,15,16,17,18,19,20,],[-9,-10,13,-10,-7,13,-8,13,13,13,-6,-5,]),} + +_lr_action = {} +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = {} + _lr_action[_x][_k] = _y +del _lr_action_items + +_lr_goto_items = {'statement':([0,],[5,]),'expression':([0,1,3,10,11,12,13,14,],[6,8,9,16,17,18,19,20,]),} + +_lr_goto = {} +for _k, _v in _lr_goto_items.items(): + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} + _lr_goto[_x][_k] = _y +del _lr_goto_items +_lr_productions = [ + ("S' -> statement","S'",1,None,None,None), + ('statement -> NAME EQUALS expression','statement',3,'p_statement_assign','calcparse.py',21), + ('statement -> expression','statement',1,'p_statement_expr','calcparse.py',25), + ('expression -> expression PLUS expression','expression',3,'p_expression_binop','calcparse.py',29), + ('expression -> expression MINUS expression','expression',3,'p_expression_binop','calcparse.py',30), + ('expression -> expression TIMES expression','expression',3,'p_expression_binop','calcparse.py',31), + ('expression -> expression DIVIDE expression','expression',3,'p_expression_binop','calcparse.py',32), + ('expression -> MINUS expression','expression',2,'p_expression_uminus','calcparse.py',39), + ('expression -> LPAREN expression RPAREN','expression',3,'p_expression_group','calcparse.py',43), + ('expression -> NUMBER','expression',1,'p_expression_number','calcparse.py',47), + ('expression -> NAME','expression',1,'p_expression_name','calcparse.py',51), +] diff --git a/ext/ply/test/pkg_test3/__init__.py b/ext/ply/test/pkg_test3/__init__.py new file mode 100644 index 0000000000..0e195589eb --- /dev/null +++ b/ext/ply/test/pkg_test3/__init__.py @@ -0,0 +1,9 @@ +# Tests proper handling of lextab and parsetab files in package structures + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +from .parsing.calcparse import parser + diff --git a/ext/ply/test/pkg_test3/generated/__init__.py b/ext/ply/test/pkg_test3/generated/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test3/generated/lextab.py b/ext/ply/test/pkg_test3/generated/lextab.py new file mode 100644 index 0000000000..52376b2aaa --- /dev/null +++ b/ext/ply/test/pkg_test3/generated/lextab.py @@ -0,0 +1,10 @@ +# lextab.py. This file automatically created by PLY (version 3.11). Don't edit! +_tabversion = '3.10' +_lextokens = set(('DIVIDE', 'EQUALS', 'LPAREN', 'MINUS', 'NAME', 'NUMBER', 'PLUS', 'RPAREN', 'TIMES')) +_lexreflags = 64 +_lexliterals = '' +_lexstateinfo = {'INITIAL': 'inclusive'} +_lexstatere = {'INITIAL': [('(?P\\d+)|(?P\\n+)|(?P[a-zA-Z_][a-zA-Z0-9_]*)|(?P\\+)|(?P\\()|(?P\\*)|(?P\\))|(?P=)|(?P/)|(?P-)', [None, ('t_NUMBER', 'NUMBER'), ('t_newline', 'newline'), (None, 'NAME'), (None, 'PLUS'), (None, 'LPAREN'), (None, 'TIMES'), (None, 'RPAREN'), (None, 'EQUALS'), (None, 'DIVIDE'), (None, 'MINUS')])]} +_lexstateignore = {'INITIAL': ' \t'} +_lexstateerrorf = {'INITIAL': 't_error'} +_lexstateeoff = {} diff --git a/ext/ply/test/pkg_test3/parsing/__init__.py b/ext/ply/test/pkg_test3/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test3/parsing/calclex.py b/ext/ply/test/pkg_test3/parsing/calclex.py new file mode 100644 index 0000000000..6ca2c4f3c1 --- /dev/null +++ b/ext/ply/test/pkg_test3/parsing/calclex.py @@ -0,0 +1,47 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex(optimize=True, lextab='pkg_test3.generated.lextab') + + + diff --git a/ext/ply/test/pkg_test3/parsing/calcparse.py b/ext/ply/test/pkg_test3/parsing/calcparse.py new file mode 100644 index 0000000000..2dcb52b3c4 --- /dev/null +++ b/ext/ply/test/pkg_test3/parsing/calcparse.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc(tabmodule='pkg_test3.generated.parsetab') + + + + + diff --git a/ext/ply/test/pkg_test4/__init__.py b/ext/ply/test/pkg_test4/__init__.py new file mode 100644 index 0000000000..ba9ddacf6a --- /dev/null +++ b/ext/ply/test/pkg_test4/__init__.py @@ -0,0 +1,25 @@ +# Tests proper handling of lextab and parsetab files in package structures +# Check of warning messages when files aren't writable + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +import ply.lex +import ply.yacc + +def patched_open(filename, mode): + if 'w' in mode: + raise IOError("Permission denied %r" % filename) + return open(filename, mode) + +ply.lex.open = patched_open +ply.yacc.open = patched_open +try: + from .parsing.calcparse import parser +finally: + del ply.lex.open + del ply.yacc.open + + diff --git a/ext/ply/test/pkg_test4/parsing/__init__.py b/ext/ply/test/pkg_test4/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test4/parsing/calclex.py b/ext/ply/test/pkg_test4/parsing/calclex.py new file mode 100644 index 0000000000..b3c1a4d6bb --- /dev/null +++ b/ext/ply/test/pkg_test4/parsing/calclex.py @@ -0,0 +1,47 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex(optimize=True) + + + diff --git a/ext/ply/test/pkg_test4/parsing/calcparse.py b/ext/ply/test/pkg_test4/parsing/calcparse.py new file mode 100644 index 0000000000..c058e9f77d --- /dev/null +++ b/ext/ply/test/pkg_test4/parsing/calcparse.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc() + + + + + diff --git a/ext/ply/test/pkg_test5/__init__.py b/ext/ply/test/pkg_test5/__init__.py new file mode 100644 index 0000000000..0e195589eb --- /dev/null +++ b/ext/ply/test/pkg_test5/__init__.py @@ -0,0 +1,9 @@ +# Tests proper handling of lextab and parsetab files in package structures + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +from .parsing.calcparse import parser + diff --git a/ext/ply/test/pkg_test5/parsing/__init__.py b/ext/ply/test/pkg_test5/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test5/parsing/calclex.py b/ext/ply/test/pkg_test5/parsing/calclex.py new file mode 100644 index 0000000000..e8759b6f01 --- /dev/null +++ b/ext/ply/test/pkg_test5/parsing/calclex.py @@ -0,0 +1,48 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import os.path +lexer = lex.lex(optimize=True, outputdir=os.path.dirname(__file__)) + + + diff --git a/ext/ply/test/pkg_test5/parsing/calcparse.py b/ext/ply/test/pkg_test5/parsing/calcparse.py new file mode 100644 index 0000000000..2a1ddfe190 --- /dev/null +++ b/ext/ply/test/pkg_test5/parsing/calcparse.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +import os.path +parser = yacc.yacc(outputdir=os.path.dirname(__file__)) + + + + + diff --git a/ext/ply/test/pkg_test5/parsing/lextab.py b/ext/ply/test/pkg_test5/parsing/lextab.py new file mode 100644 index 0000000000..8cab2985d1 --- /dev/null +++ b/ext/ply/test/pkg_test5/parsing/lextab.py @@ -0,0 +1,10 @@ +# lextab.py. This file automatically created by PLY (version 3.11). Don't edit! +_tabversion = '3.10' +_lextokens = set(('DIVIDE', 'EQUALS', 'LPAREN', 'MINUS', 'NAME', 'NUMBER', 'PLUS', 'RPAREN', 'TIMES')) +_lexreflags = 64 +_lexliterals = '' +_lexstateinfo = {'INITIAL': 'inclusive'} +_lexstatere = {'INITIAL': [('(?P\\d+)|(?P\\n+)|(?P[a-zA-Z_][a-zA-Z0-9_]*)|(?P\\()|(?P\\+)|(?P\\*)|(?P\\))|(?P=)|(?P/)|(?P-)', [None, ('t_NUMBER', 'NUMBER'), ('t_newline', 'newline'), (None, 'NAME'), (None, 'LPAREN'), (None, 'PLUS'), (None, 'TIMES'), (None, 'RPAREN'), (None, 'EQUALS'), (None, 'DIVIDE'), (None, 'MINUS')])]} +_lexstateignore = {'INITIAL': ' \t'} +_lexstateerrorf = {'INITIAL': 't_error'} +_lexstateeoff = {} diff --git a/ext/ply/test/pkg_test6/__init__.py b/ext/ply/test/pkg_test6/__init__.py new file mode 100644 index 0000000000..5dbe0cbd1d --- /dev/null +++ b/ext/ply/test/pkg_test6/__init__.py @@ -0,0 +1,9 @@ +# Tests proper sorting of modules in yacc.ParserReflect.get_pfunctions + +# Here for testing purposes +import sys +if '..' not in sys.path: + sys.path.insert(0, '..') + +from .parsing.calcparse import parser + diff --git a/ext/ply/test/pkg_test6/parsing/__init__.py b/ext/ply/test/pkg_test6/parsing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ext/ply/test/pkg_test6/parsing/calclex.py b/ext/ply/test/pkg_test6/parsing/calclex.py new file mode 100644 index 0000000000..e8759b6f01 --- /dev/null +++ b/ext/ply/test/pkg_test6/parsing/calclex.py @@ -0,0 +1,48 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import os.path +lexer = lex.lex(optimize=True, outputdir=os.path.dirname(__file__)) + + + diff --git a/ext/ply/test/pkg_test6/parsing/calcparse.py b/ext/ply/test/pkg_test6/parsing/calcparse.py new file mode 100644 index 0000000000..6defaf9748 --- /dev/null +++ b/ext/ply/test/pkg_test6/parsing/calcparse.py @@ -0,0 +1,33 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- + +from .calclex import tokens +from ply import yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +from .statement import * + +from .expression import * + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +import os.path +parser = yacc.yacc(outputdir=os.path.dirname(__file__)) + + + + + diff --git a/ext/ply/test/pkg_test6/parsing/expression.py b/ext/ply/test/pkg_test6/parsing/expression.py new file mode 100644 index 0000000000..028f662724 --- /dev/null +++ b/ext/ply/test/pkg_test6/parsing/expression.py @@ -0,0 +1,31 @@ +# This file contains definitions of expression grammar + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 diff --git a/ext/ply/test/pkg_test6/parsing/lextab.py b/ext/ply/test/pkg_test6/parsing/lextab.py new file mode 100644 index 0000000000..8cab2985d1 --- /dev/null +++ b/ext/ply/test/pkg_test6/parsing/lextab.py @@ -0,0 +1,10 @@ +# lextab.py. This file automatically created by PLY (version 3.11). Don't edit! +_tabversion = '3.10' +_lextokens = set(('DIVIDE', 'EQUALS', 'LPAREN', 'MINUS', 'NAME', 'NUMBER', 'PLUS', 'RPAREN', 'TIMES')) +_lexreflags = 64 +_lexliterals = '' +_lexstateinfo = {'INITIAL': 'inclusive'} +_lexstatere = {'INITIAL': [('(?P\\d+)|(?P\\n+)|(?P[a-zA-Z_][a-zA-Z0-9_]*)|(?P\\()|(?P\\+)|(?P\\*)|(?P\\))|(?P=)|(?P/)|(?P-)', [None, ('t_NUMBER', 'NUMBER'), ('t_newline', 'newline'), (None, 'NAME'), (None, 'LPAREN'), (None, 'PLUS'), (None, 'TIMES'), (None, 'RPAREN'), (None, 'EQUALS'), (None, 'DIVIDE'), (None, 'MINUS')])]} +_lexstateignore = {'INITIAL': ' \t'} +_lexstateerrorf = {'INITIAL': 't_error'} +_lexstateeoff = {} diff --git a/ext/ply/test/pkg_test6/parsing/statement.py b/ext/ply/test/pkg_test6/parsing/statement.py new file mode 100644 index 0000000000..ef7dc55e3f --- /dev/null +++ b/ext/ply/test/pkg_test6/parsing/statement.py @@ -0,0 +1,9 @@ +# This file contains definitions of statement grammar + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + t[0] = t[1] diff --git a/ext/ply/test/testcpp.py b/ext/ply/test/testcpp.py new file mode 100644 index 0000000000..2e98edd899 --- /dev/null +++ b/ext/ply/test/testcpp.py @@ -0,0 +1,101 @@ +from unittest import TestCase, main + +from multiprocessing import Process, Queue +from six.moves.queue import Empty + +import sys + +if ".." not in sys.path: + sys.path.insert(0, "..") + +from ply.lex import lex +from ply.cpp import * + + +def preprocessing(in_, out_queue): + out = None + + try: + p = Preprocessor(lex()) + p.parse(in_) + tokens = [t.value for t in p.parser] + out = "".join(tokens) + finally: + out_queue.put(out) + +class CPPTests(TestCase): + "Tests related to ANSI-C style lexical preprocessor." + + def __test_preprocessing(self, in_, expected, time_limit = 1.0): + out_queue = Queue() + + preprocessor = Process( + name = "PLY`s C preprocessor", + target = preprocessing, + args = (in_, out_queue) + ) + + preprocessor.start() + + try: + out = out_queue.get(timeout = time_limit) + except Empty: + preprocessor.terminate() + raise RuntimeError("Time limit exceeded!") + else: + self.assertMultiLineEqual(out, expected) + + def test_concatenation(self): + self.__test_preprocessing("""\ +#define a(x) x##_ +#define b(x) _##x +#define c(x) _##x##_ +#define d(x,y) _##x##y##_ + +a(i) +b(j) +c(k) +d(q,s)""" + , """\ + + + + + +i_ +_j +_k_ +_qs_""" + ) + + def test_deadloop_macro(self): + # If there is a word which equals to name of a parametrized macro, then + # attempt to expand such word as a macro manages the parser to fall + # into an infinite loop. + + self.__test_preprocessing("""\ +#define a(x) x + +a;""" + , """\ + + +a;""" + ) + + def test_index_error(self): + # If there are no tokens after a word ("a") which equals to name of + # a parameterized macro, then attempt to expand this word leads to + # IndexError. + + self.__test_preprocessing("""\ +#define a(x) x + +a""" + , """\ + + +a""" + ) + +main() diff --git a/ext/ply/test/testlex.py b/ext/ply/test/testlex.py index 606387d1d8..83070a7ab4 100755 --- a/ext/ply/test/testlex.py +++ b/ext/ply/test/testlex.py @@ -7,12 +7,57 @@ except ImportError: import io as StringIO import sys +import os +import warnings +import platform + sys.path.insert(0,"..") sys.tracebacklimit = 0 import ply.lex -def check_expected(result,expected): +try: + from importlib.util import cache_from_source +except ImportError: + # Python 2.7, but we don't care. + cache_from_source = None + + +def make_pymodule_path(filename, optimization=None): + path = os.path.dirname(filename) + file = os.path.basename(filename) + mod, ext = os.path.splitext(file) + + if sys.hexversion >= 0x3050000: + fullpath = cache_from_source(filename, optimization=optimization) + elif sys.hexversion >= 0x3040000: + fullpath = cache_from_source(filename, ext=='.pyc') + elif sys.hexversion >= 0x3020000: + import imp + modname = mod+"."+imp.get_tag()+ext + fullpath = os.path.join(path,'__pycache__',modname) + else: + fullpath = filename + return fullpath + +def pymodule_out_exists(filename, optimization=None): + return os.path.exists(make_pymodule_path(filename, + optimization=optimization)) + +def pymodule_out_remove(filename, optimization=None): + os.remove(make_pymodule_path(filename, optimization=optimization)) + +def implementation(): + if platform.system().startswith("Java"): + return "Jython" + elif hasattr(sys, "pypy_version_info"): + return "PyPy" + else: + return "CPython" + +test_pyo = (implementation() == 'CPython') + +def check_expected(result, expected, contains=False): if sys.version_info[0] >= 3: if isinstance(result,str): result = result.encode('ascii') @@ -21,13 +66,16 @@ def check_expected(result,expected): resultlines = result.splitlines() expectedlines = expected.splitlines() - if len(resultlines) != len(expectedlines): return False for rline,eline in zip(resultlines,expectedlines): - if not rline.endswith(eline): - return False + if contains: + if eline not in rline: + return False + else: + if not rline.endswith(eline): + return False return True def run_import(module): @@ -40,6 +88,9 @@ class LexErrorWarningTests(unittest.TestCase): def setUp(self): sys.stderr = StringIO.StringIO() sys.stdout = StringIO.StringIO() + if sys.hexversion >= 0x3020000: + warnings.filterwarnings('ignore',category=ResourceWarning) + def tearDown(self): sys.stderr = sys.__stderr__ sys.stdout = sys.__stdout__ @@ -114,8 +165,13 @@ class LexErrorWarningTests(unittest.TestCase): def test_lex_re1(self): self.assertRaises(SyntaxError,run_import,"lex_re1") result = sys.stderr.getvalue() + if sys.hexversion < 0x3050000: + msg = "Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis\n" + else: + msg = "Invalid regular expression for rule 't_NUMBER'. missing ), unterminated subpattern at position 0" self.assert_(check_expected(result, - "Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis\n")) + msg, + contains=True)) def test_lex_re2(self): self.assertRaises(SyntaxError,run_import,"lex_re2") @@ -126,9 +182,19 @@ class LexErrorWarningTests(unittest.TestCase): def test_lex_re3(self): self.assertRaises(SyntaxError,run_import,"lex_re3") result = sys.stderr.getvalue() +# self.assert_(check_expected(result, +# "Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" +# "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n")) + + if sys.hexversion < 0x3050000: + msg = ("Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" + "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n") + else: + msg = ("Invalid regular expression for rule 't_POUND'. missing ), unterminated subpattern at position 0\n" + "ERROR: Make sure '#' in rule 't_POUND' is escaped with '\#'") self.assert_(check_expected(result, - "Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" - "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n")) + msg, + contains=True), result) def test_lex_rule1(self): self.assertRaises(SyntaxError,run_import,"lex_rule1") @@ -294,6 +360,7 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) + def test_lex_optimize(self): try: os.remove("lextab.py") @@ -316,7 +383,6 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,4,1,2)\n")) self.assert_(os.path.exists("lextab.py")) - p = subprocess.Popen([sys.executable,'-O','lex_optimize.py'], stdout=subprocess.PIPE) result = p.stdout.read() @@ -325,9 +391,10 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lextab.pyo")) + if test_pyo: + self.assert_(pymodule_out_exists("lextab.pyo", 1)) + pymodule_out_remove("lextab.pyo", 1) - os.remove("lextab.pyo") p = subprocess.Popen([sys.executable,'-OO','lex_optimize.py'], stdout=subprocess.PIPE) result = p.stdout.read() @@ -335,17 +402,19 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lextab.pyo")) + + if test_pyo: + self.assert_(pymodule_out_exists("lextab.pyo", 2)) try: os.remove("lextab.py") except OSError: pass try: - os.remove("lextab.pyc") + pymodule_out_remove("lextab.pyc") except OSError: pass try: - os.remove("lextab.pyo") + pymodule_out_remove("lextab.pyo", 2) except OSError: pass @@ -377,8 +446,9 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("opt2tab.pyo")) - os.remove("opt2tab.pyo") + if test_pyo: + self.assert_(pymodule_out_exists("opt2tab.pyo", 1)) + pymodule_out_remove("opt2tab.pyo", 1) p = subprocess.Popen([sys.executable,'-OO','lex_optimize2.py'], stdout=subprocess.PIPE) result = p.stdout.read() @@ -386,17 +456,18 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("opt2tab.pyo")) + if test_pyo: + self.assert_(pymodule_out_exists("opt2tab.pyo", 2)) try: os.remove("opt2tab.py") except OSError: pass try: - os.remove("opt2tab.pyc") + pymodule_out_remove("opt2tab.pyc") except OSError: pass try: - os.remove("opt2tab.pyo") + pymodule_out_remove("opt2tab.pyo", 2) except OSError: pass @@ -425,8 +496,10 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lexdir/sub/calctab.pyo")) - os.remove("lexdir/sub/calctab.pyo") + if test_pyo: + self.assert_(pymodule_out_exists("lexdir/sub/calctab.pyo", 1)) + pymodule_out_remove("lexdir/sub/calctab.pyo", 1) + p = subprocess.Popen([sys.executable,'-OO','lex_optimize3.py'], stdout=subprocess.PIPE) result = p.stdout.read() @@ -434,12 +507,33 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(PLUS,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("lexdir/sub/calctab.pyo")) + if test_pyo: + self.assert_(pymodule_out_exists("lexdir/sub/calctab.pyo", 2)) try: shutil.rmtree("lexdir") except OSError: pass + def test_lex_optimize4(self): + + # Regression test to make sure that reflags works correctly + # on Python 3. + + for extension in ['py', 'pyc']: + try: + os.remove("opt4tab.{0}".format(extension)) + except OSError: + pass + + run_import("lex_optimize4") + run_import("lex_optimize4") + + for extension in ['py', 'pyc']: + try: + os.remove("opt4tab.{0}".format(extension)) + except OSError: + pass + def test_lex_opt_alias(self): try: os.remove("aliastab.py") @@ -468,8 +562,10 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(+,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("aliastab.pyo")) - os.remove("aliastab.pyo") + if test_pyo: + self.assert_(pymodule_out_exists("aliastab.pyo", 1)) + pymodule_out_remove("aliastab.pyo", 1) + p = subprocess.Popen([sys.executable,'-OO','lex_opt_alias.py'], stdout=subprocess.PIPE) result = p.stdout.read() @@ -477,17 +573,19 @@ class LexBuildOptionTests(unittest.TestCase): "(NUMBER,3,1,0)\n" "(+,'+',1,1)\n" "(NUMBER,4,1,2)\n")) - self.assert_(os.path.exists("aliastab.pyo")) + + if test_pyo: + self.assert_(pymodule_out_exists("aliastab.pyo", 2)) try: os.remove("aliastab.py") except OSError: pass try: - os.remove("aliastab.pyc") + pymodule_out_remove("aliastab.pyc") except OSError: pass try: - os.remove("aliastab.pyo") + pymodule_out_remove("aliastab.pyo", 2) except OSError: pass @@ -518,21 +616,22 @@ class LexBuildOptionTests(unittest.TestCase): self.assert_(os.path.exists("manytab.py")) - p = subprocess.Popen([sys.executable,'-O','lex_many_tokens.py'], - stdout=subprocess.PIPE) - result = p.stdout.read() - self.assert_(check_expected(result, - "(TOK34,'TOK34:',1,0)\n" - "(TOK143,'TOK143:',1,7)\n" - "(TOK269,'TOK269:',1,15)\n" - "(TOK372,'TOK372:',1,23)\n" - "(TOK452,'TOK452:',1,31)\n" - "(TOK561,'TOK561:',1,39)\n" - "(TOK999,'TOK999:',1,47)\n" - )) + if implementation() == 'CPython': + p = subprocess.Popen([sys.executable,'-O','lex_many_tokens.py'], + stdout=subprocess.PIPE) + result = p.stdout.read() + self.assert_(check_expected(result, + "(TOK34,'TOK34:',1,0)\n" + "(TOK143,'TOK143:',1,7)\n" + "(TOK269,'TOK269:',1,15)\n" + "(TOK372,'TOK372:',1,23)\n" + "(TOK452,'TOK452:',1,31)\n" + "(TOK561,'TOK561:',1,39)\n" + "(TOK999,'TOK999:',1,47)\n" + )) - self.assert_(os.path.exists("manytab.pyo")) - os.remove("manytab.pyo") + self.assert_(pymodule_out_exists("manytab.pyo", 1)) + pymodule_out_remove("manytab.pyo", 1) try: os.remove("manytab.py") except OSError: diff --git a/ext/ply/test/testyacc.py b/ext/ply/test/testyacc.py index cc53b6d8f1..7e69f099d4 100644 --- a/ext/ply/test/testyacc.py +++ b/ext/ply/test/testyacc.py @@ -8,28 +8,68 @@ except ImportError: import sys import os +import warnings +import re +import platform sys.path.insert(0,"..") sys.tracebacklimit = 0 import ply.yacc -def check_expected(result,expected): - resultlines = [] +def make_pymodule_path(filename): + path = os.path.dirname(filename) + file = os.path.basename(filename) + mod, ext = os.path.splitext(file) + + if sys.hexversion >= 0x3040000: + import importlib.util + fullpath = importlib.util.cache_from_source(filename, ext=='.pyc') + elif sys.hexversion >= 0x3020000: + import imp + modname = mod+"."+imp.get_tag()+ext + fullpath = os.path.join(path,'__pycache__',modname) + else: + fullpath = filename + return fullpath + +def pymodule_out_exists(filename): + return os.path.exists(make_pymodule_path(filename)) + +def pymodule_out_remove(filename): + os.remove(make_pymodule_path(filename)) + +def implementation(): + if platform.system().startswith("Java"): + return "Jython" + elif hasattr(sys, "pypy_version_info"): + return "PyPy" + else: + return "CPython" + +# Check the output to see if it contains all of a set of expected output lines. +# This alternate implementation looks weird, but is needed to properly handle +# some variations in error message order that occurs due to dict hash table +# randomization that was introduced in Python 3.3 +def check_expected(result, expected): + # Normalize 'state n' text to account for randomization effects in Python 3.3 + expected = re.sub(r' state \d+', 'state ', expected) + result = re.sub(r' state \d+', 'state ', result) + + resultlines = set() for line in result.splitlines(): if line.startswith("WARNING: "): line = line[9:] elif line.startswith("ERROR: "): line = line[7:] - resultlines.append(line) + resultlines.add(line) - expectedlines = expected.splitlines() - if len(resultlines) != len(expectedlines): - return False - for rline,eline in zip(resultlines,expectedlines): - if not rline.endswith(eline): - return False - return True + # Selectively remove expected lines from the output + for eline in expected.splitlines(): + resultlines = set(line for line in resultlines if not line.endswith(eline)) + + # Return True if no result lines remain + return not bool(resultlines) def run_import(module): code = "import "+module @@ -43,10 +83,14 @@ class YaccErrorWarningTests(unittest.TestCase): sys.stdout = StringIO.StringIO() try: os.remove("parsetab.py") - os.remove("parsetab.pyc") + pymodule_out_remove("parsetab.pyc") except OSError: pass + if sys.hexversion >= 0x3020000: + warnings.filterwarnings('ignore', category=ResourceWarning) + warnings.filterwarnings('ignore', category=DeprecationWarning) + def tearDown(self): sys.stderr = sys.__stderr__ sys.stdout = sys.__stdout__ @@ -148,7 +192,38 @@ class YaccErrorWarningTests(unittest.TestCase): self.assert_(check_expected(result, "yacc_error4.py:62: Illegal rule name 'error'. Already defined as a token\n" )) - + + + def test_yacc_error5(self): + run_import("yacc_error5") + result = sys.stdout.getvalue() + self.assert_(check_expected(result, + "Group at 3:10 to 3:12\n" + "Undefined name 'a'\n" + "Syntax error at 'b'\n" + "Syntax error at 4:18 to 4:22\n" + "Assignment Error at 2:5 to 5:27\n" + "13\n" + )) + + def test_yacc_error6(self): + run_import("yacc_error6") + result = sys.stdout.getvalue() + self.assert_(check_expected(result, + "a=7\n" + "Line 3: Syntax error at '*'\n" + "c=21\n" + )) + + def test_yacc_error7(self): + run_import("yacc_error7") + result = sys.stdout.getvalue() + self.assert_(check_expected(result, + "a=7\n" + "Line 3: Syntax error at '*'\n" + "c=21\n" + )) + def test_yacc_inf(self): self.assertRaises(ply.yacc.YaccError,run_import,"yacc_inf") result = sys.stderr.getvalue() @@ -261,6 +336,7 @@ class YaccErrorWarningTests(unittest.TestCase): self.assert_(check_expected(result, "Generating LALR tables\n" )) + def test_yacc_sr(self): run_import("yacc_sr") result = sys.stderr.getvalue() @@ -276,6 +352,13 @@ class YaccErrorWarningTests(unittest.TestCase): "yacc_term1.py:24: Illegal rule name 'NUMBER'. Already defined as a token\n" )) + def test_yacc_unicode_literals(self): + run_import("yacc_unicode_literals") + result = sys.stderr.getvalue() + self.assert_(check_expected(result, + "Generating LALR tables\n" + )) + def test_yacc_unused(self): self.assertRaises(ply.yacc.YaccError,run_import,"yacc_unused") result = sys.stderr.getvalue() @@ -297,7 +380,6 @@ class YaccErrorWarningTests(unittest.TestCase): def test_yacc_uprec(self): self.assertRaises(ply.yacc.YaccError,run_import,"yacc_uprec") result = sys.stderr.getvalue() - print repr(result) self.assert_(check_expected(result, "yacc_uprec.py:37: Nothing known about the precedence of 'UMINUS'\n" )) @@ -319,6 +401,52 @@ class YaccErrorWarningTests(unittest.TestCase): "Precedence rule 'left' defined for unknown symbol '/'\n" )) + def test_pkg_test1(self): + from pkg_test1 import parser + self.assertTrue(os.path.exists('pkg_test1/parsing/parsetab.py')) + self.assertTrue(os.path.exists('pkg_test1/parsing/lextab.py')) + self.assertTrue(os.path.exists('pkg_test1/parsing/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) + + def test_pkg_test2(self): + from pkg_test2 import parser + self.assertTrue(os.path.exists('pkg_test2/parsing/calcparsetab.py')) + self.assertTrue(os.path.exists('pkg_test2/parsing/calclextab.py')) + self.assertTrue(os.path.exists('pkg_test2/parsing/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) + + def test_pkg_test3(self): + from pkg_test3 import parser + self.assertTrue(os.path.exists('pkg_test3/generated/parsetab.py')) + self.assertTrue(os.path.exists('pkg_test3/generated/lextab.py')) + self.assertTrue(os.path.exists('pkg_test3/generated/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) + + def test_pkg_test4(self): + from pkg_test4 import parser + self.assertFalse(os.path.exists('pkg_test4/parsing/parsetab.py')) + self.assertFalse(os.path.exists('pkg_test4/parsing/lextab.py')) + self.assertFalse(os.path.exists('pkg_test4/parsing/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) + + def test_pkg_test5(self): + from pkg_test5 import parser + self.assertTrue(os.path.exists('pkg_test5/parsing/parsetab.py')) + self.assertTrue(os.path.exists('pkg_test5/parsing/lextab.py')) + self.assertTrue(os.path.exists('pkg_test5/parsing/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) + + def test_pkg_test6(self): + from pkg_test6 import parser + self.assertTrue(os.path.exists('pkg_test6/parsing/parsetab.py')) + self.assertTrue(os.path.exists('pkg_test6/parsing/lextab.py')) + self.assertTrue(os.path.exists('pkg_test6/parsing/parser.out')) + r = parser.parse('3+4+5') + self.assertEqual(r, 12) - unittest.main() diff --git a/ext/ply/test/yacc_error5.py b/ext/ply/test/yacc_error5.py new file mode 100644 index 0000000000..9eb0f8574a --- /dev/null +++ b/ext/ply/test/yacc_error5.py @@ -0,0 +1,94 @@ +# ----------------------------------------------------------------------------- +# yacc_error5.py +# +# Lineno and position tracking with error tokens +# ----------------------------------------------------------------------------- +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_assign_error(t): + 'statement : NAME EQUALS error' + line_start, line_end = t.linespan(3) + pos_start, pos_end = t.lexspan(3) + print("Assignment Error at %d:%d to %d:%d" % (line_start,pos_start,line_end,pos_end)) + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + line_start, line_end = t.linespan(2) + pos_start, pos_end = t.lexspan(2) + print("Group at %d:%d to %d:%d" % (line_start,pos_start, line_end, pos_end)) + t[0] = t[2] + +def p_expression_group_error(t): + 'expression : LPAREN error RPAREN' + line_start, line_end = t.linespan(2) + pos_start, pos_end = t.lexspan(2) + print("Syntax error at %d:%d to %d:%d" % (line_start,pos_start, line_end, pos_end)) + t[0] = 0 + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 +parser.parse(""" +a = 3 + +(4*5) + +(a b c) + ++ 6 + 7 +""", tracking=True) + + + + + + diff --git a/ext/ply/test/yacc_error6.py b/ext/ply/test/yacc_error6.py new file mode 100644 index 0000000000..8d0ec85bea --- /dev/null +++ b/ext/ply/test/yacc_error6.py @@ -0,0 +1,80 @@ +# ----------------------------------------------------------------------------- +# yacc_error6.py +# +# Panic mode recovery test +# ----------------------------------------------------------------------------- +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +def p_statements(t): + 'statements : statements statement' + pass + +def p_statements_1(t): + 'statements : statement' + pass + +def p_statement_assign(p): + 'statement : LPAREN NAME EQUALS expression RPAREN' + print("%s=%s" % (p[2],p[4])) + +def p_statement_expr(t): + 'statement : LPAREN expression RPAREN' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_error(p): + if p: + print("Line %d: Syntax error at '%s'" % (p.lineno, p.value)) + # Scan ahead looking for a name token + while True: + tok = parser.token() + if not tok or tok.type == 'RPAREN': + break + if tok: + parser.restart() + return None + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 + +parser.parse(""" +(a = 3 + 4) +(b = 4 + * 5 - 6 + *) +(c = 10 + 11) +""") + + + + + + diff --git a/ext/ply/test/yacc_error7.py b/ext/ply/test/yacc_error7.py new file mode 100644 index 0000000000..fb131beaba --- /dev/null +++ b/ext/ply/test/yacc_error7.py @@ -0,0 +1,80 @@ +# ----------------------------------------------------------------------------- +# yacc_error7.py +# +# Panic mode recovery test using deprecated functionality +# ----------------------------------------------------------------------------- +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +def p_statements(t): + 'statements : statements statement' + pass + +def p_statements_1(t): + 'statements : statement' + pass + +def p_statement_assign(p): + 'statement : LPAREN NAME EQUALS expression RPAREN' + print("%s=%s" % (p[2],p[4])) + +def p_statement_expr(t): + 'statement : LPAREN expression RPAREN' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_error(p): + if p: + print("Line %d: Syntax error at '%s'" % (p.lineno, p.value)) + # Scan ahead looking for a name token + while True: + tok = yacc.token() + if not tok or tok.type == 'RPAREN': + break + if tok: + yacc.restart() + return None + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 + +parser.parse(""" +(a = 3 + 4) +(b = 4 + * 5 - 6 + *) +(c = 10 + 11) +""") + + + + + + diff --git a/ext/ply/test/yacc_prec1.py b/ext/ply/test/yacc_prec1.py index 2ca6afc0b0..99fcd903bd 100644 --- a/ext/ply/test/yacc_prec1.py +++ b/ext/ply/test/yacc_prec1.py @@ -12,8 +12,8 @@ from calclex import tokens # Parsing rules precedence = ( - ('left','+','-'), - ('left','*','/'), + ('left', '+', '-'), + ('left', '*', '/'), ('right','UMINUS'), ) diff --git a/ext/ply/test/yacc_unicode_literals.py b/ext/ply/test/yacc_unicode_literals.py new file mode 100644 index 0000000000..5ae4f5b8a7 --- /dev/null +++ b/ext/ply/test/yacc_unicode_literals.py @@ -0,0 +1,70 @@ +# ----------------------------------------------------------------------------- +# yacc_unicode_literals +# +# Test for unicode literals on Python 2.x +# ----------------------------------------------------------------------------- +from __future__ import unicode_literals + +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + +