Source for file Parser.php
Documentation is available at Parser.php
/* Driver template for the PHP_PHP_LexerGenerator_ParserrGenerator parser generator. (PHP port of LEMON)
* This can be used to store both the string representation of
* a token, and any useful meta-data associated with the token.
* meta-data should be stored as an array
$value->metadata : $value;
$this->metadata[$offset] = $value->metadata;
/** The following structure represents a single element of the
* parser's stack. Information stored includes:
* + The state number for the parser at this level of the stack.
* + The value of the token stored at this level of the stack.
* (In other words, the "major" token.)
* + The semantic value stored at this level of the stack. This is
* the information used by the action routines in the grammar.
* It is sometimes called the "minor" token.
public $major; /* The major token value. This is the code
** number for the token at this stack level */
public $minor; /* The user-supplied minor token value. This
** is the value of the token */
// code external to the class is included here
* PHP_LexerGenerator, a php 5 lexer generator.
* This lexer generator translates a file in a format similar to
* re2c ({@link http://re2c.org}) and translates it into a PHP 5-based lexer
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_LexerGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @since File available since Release 0.1.0
* For regular expression validation
require_once 'PHP/LexerGenerator/Regex/Lexer.php';
require_once 'PHP/LexerGenerator/Regex/Parser.php';
require_once 'PHP/LexerGenerator/Exception.php';
* Token parser for plex files.
* This parser converts tokens pulled from {@link PHP_LexerGenerator_Lexer}
* into abstract patterns and rules, then creates the output file
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @version @package_version@
* @since Class available since Release 0.1.0
// declare_class is output here
/* First off, code is included which follows the "include_class" declaration
private $_patternIndex = 0;
$this->out = fopen($outfile, 'wb');
throw new Exception('unable to open lexer output file "' . $outfile . '"');
foreach ($rules as $rule) {
$ruleMap[$i++ ] = $actualindex;
$tokenindex[$actualindex] = $rule['subpatterns'];
$actualindex += $rule['subpatterns'] + 1;
$patterns[] = '^(' . $rule['pattern'] . ')';
$tokenindex = var_export($tokenindex, true);
$tokenindex = explode("\n", $tokenindex);
$tokenindex = implode("\n ", $tokenindex);
$pattern .= implode('|', $patterns);
function yylex' . $ruleindex . '()
$tokenMap = ' . $tokenindex . ';
if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
return false; // end of input
fwrite($this->out, '$yy_global_pattern = "' .
if (preg_match($yy_global_pattern, substr(' . $this->input . ', ' .
$yysubmatches = $yymatches;
$yymatches = array_filter($yymatches, \'strlen\'); // remove empty sub-patterns
if (!count($yymatches)) {
throw new Exception(\'Error: lexing failed because a rule matched\' .
\'an empty string. Input "\' . substr(' . $this->input . ',
' . $this->counter . ', 5) . \'... state ' . $statename . '\');
next($yymatches); // skip global match
' . $this->token . ' = key($yymatches); // token number
if ($tokenMap[' . $this->token . ']) {
// extract sub-patterns for passing to lex function
$yysubmatches = array_slice($yysubmatches, ' . $this->token . ' + 1,
$tokenMap[' . $this->token . ']);
' . $this->value . ' = current($yymatches); // token value
$r = $this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}($yysubmatches);
' . $this->counter . ' += strlen($this->value);
' . $this->line . ' += substr_count("\n", ' . $this->value . ');
// process this token in the new state
} elseif ($r === false) {
' . $this->counter . ' += strlen($this->value);
' . $this->line . ' += substr_count("\n", ' . $this->value . ');
if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
return false; // end of input
fwrite($this->out, ' $yy_yymore_patterns = array(' . "\n");
for($i = 0; count($patterns); $i++ ) {
fwrite($this->out, ' ' . $ruleMap[$i] . ' => "' .
implode('|', $patterns) . "\",\n");
fwrite($this->out, ' );' . "\n");
if (!strlen($yy_yymore_patterns[' . $this->token . '])) {
throw new Exception(\'cannot do yymore for the last token\');
if (preg_match($yy_yymore_patterns[' . $this->token . '],
substr(' . $this->input . ', ' . $this->counter . '), $yymatches)) {
$yymatches = array_filter($yymatches, \'strlen\'); // remove empty sub-patterns
next($yymatches); // skip global match
' . $this->token . ' = key($yymatches); // token number
' . $this->value . ' = current($yymatches); // token value
' . $this->line . ' = substr_count("\n", ' . $this->value . ');
} while ($this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}() !== null);
' . $this->counter . ' += strlen($this->value);
' . $this->line . ' += substr_count("\n", ' . $this->value . ');
throw new Exception(\'Unexpected input at line\' . ' . $this->line . ' .
\': \' . ' . $this->input . '[' . $this->counter . ']);
const ' . $statename . ' = ' . $ruleindex . ';
foreach ($rules as $i => $rule) {
fwrite($this->out, ' function yy_r' . $ruleindex . '_' . $ruleMap[$i] . '($yy_subpatterns)
$ruleindex++ ; // for next set of rules
echo 'Error on line ' . $this->lex->line . ': ' , $msg;
$this->_regexLexer->reset($pattern);
$this->_regexParser->reset($this->_patternIndex, $update);
while ($this->_regexLexer->yylex()) {
$this->_regexLexer->token, $this->_regexLexer->value);
$this->_regexParser->doParse(0, 0);
$this->error($e->getMessage());
return $this->_regexParser->result;
/* Next is all token values, as class constants
** These constants (all generated automatically by the parser generator)
** specify the various kinds of tokens (terminals) that the parser
** Each symbol here is a terminal symbol in the grammar.
/* Next are that tables used to determine what action to take based on the
** current state and lookahead token. These tables are used to implement
** functions that take a state number and lookahead value and return an
** Suppose the action integer is N. Then the action is determined as
** 0 <= N < self::YYNSTATE Shift N. That is,
** self::YYNSTATE <= N < self::YYNSTATE+self::YYNRULE Reduce by rule N-YYNSTATE.
** N == self::YYNSTATE+self::YYNRULE A syntax error has occurred.
** N == self::YYNSTATE+self::YYNRULE+1 The parser accepts its
** input. (and concludes parsing)
** N == self::YYNSTATE+self::YYNRULE+2 No such action. Denotes unused
** slots in the yy_action[] table.
** The action table is constructed as a single large static array $yy_action.
** Given state S and lookahead X, the action is computed as
** self::$yy_action[self::$yy_shift_ofst[S] + X ]
** If the index value self::$yy_shift_ofst[S]+X is out of range or if the value
** self::$yy_lookahead[self::$yy_shift_ofst[S]+X] is not equal to X or if
** self::$yy_shift_ofst[S] is equal to self::YY_SHIFT_USE_DFLT, it means that
** the action is not in the table and that self::$yy_default[S] should be used instead.
** The formula above is for computing the action when the lookahead is
** a terminal symbol. If the lookahead is a non-terminal (as occurs after
** a reduce action) then the static $yy_reduce_ofst array is used in place of
** the static $yy_shift_ofst array and self::YY_REDUCE_USE_DFLT is used in place of
** self::YY_SHIFT_USE_DFLT.
** The following are the tables generated in this section:
** self::$yy_action A single table containing all actions.
** self::$yy_lookahead A table containing the lookahead for each entry in
** yy_action. Used to detect hash collisions.
** self::$yy_shift_ofst For each state, the offset into self::$yy_action for
** self::$yy_reduce_ofst For each state, the offset into self::$yy_action for
** shifting non-terminals after a reduce.
** self::$yy_default Default action for each state.
static public $yy_action = array(
/* 0 */ 35, 24, 50, 50, 48, 51, 51, 54, 47, 43,
/* 10 */ 53, 54, 45, 31, 53, 32, 30, 50, 50, 1,
/* 20 */ 51, 51, 34, 50, 17, 8, 51, 90, 52, 6,
/* 30 */ 3, 29, 50, 50, 25, 51, 51, 11, 38, 18,
/* 40 */ 1, 41, 42, 39, 10, 36, 18, 12, 37, 18,
/* 50 */ 20, 7, 2, 16, 13, 15, 18, 27, 9, 2,
/* 60 */ 5, 28, 14, 1, 44, 40, 33, 49, 56, 46,
/* 70 */ 26, 19, 1, 55, 2, 21, 4, 23, 22, 8,
static public $yy_lookahead = array(
/* 0 */ 3, 3, 5, 5, 1, 8, 8, 5, 6, 2,
/* 10 */ 8, 5, 6, 13, 8, 3, 3, 5, 5, 19,
/* 20 */ 8, 8, 4, 5, 1, 2, 8, 10, 11, 12,
/* 30 */ 5, 4, 5, 5, 13, 8, 8, 18, 5, 20,
/* 40 */ 19, 8, 5, 6, 18, 5, 20, 18, 8, 20,
/* 50 */ 4, 1, 2, 7, 18, 7, 20, 13, 1, 2,
/* 60 */ 5, 14, 15, 19, 5, 6, 13, 1, 1, 1,
/* 70 */ 16, 20, 19, 3, 2, 17, 12, 4, 17, 2,
static public $yy_shift_ofst = array(
/* 0 */ 23, 27, 18, 28, 50, 28, 57, 72, 73, 72,
/* 10 */ 13, 12, - 3, - 2, 46, 40, 40, 77, 2, 6,
/* 20 */ 59, 33, 33, 37, 3, 7, 48, 7, 70, 55,
/* 30 */ 68, 7, 67, 7, 25, 66,
|