Sample parser file format PHP code (PHP_LexerGenerator's parser)
<?php
/* Driver template for the PHP_PHP_LexerGenerator_ParserrGenerator parser generator. (PHP port of LEMON)
*/
/**
* This can be used to store both the string representation of
* a token, and any useful meta-data associated with the token.
*
* meta-data should be stored as an array
*/
class PHP_LexerGenerator_ParseryyToken implements ArrayAccess
{
public $string = '';
public $metadata = array();
function __construct($s, $m = array())
{
if ($s instanceof PHP_LexerGenerator_ParseryyToken) {
$this->string = $s->string;
$this->metadata = $s->metadata;
} else {
$this->string = (string) $s;
if ($m instanceof PHP_LexerGenerator_ParseryyToken) {
$this->metadata = $m->metadata;
$this->metadata = $m;
}
}
}
function __toString()
{
return $this->_string;
}
function offsetExists($offset)
{
return isset($this->metadata[$offset]);
}
function offsetGet($offset)
{
return $this->metadata[$offset];
}
function offsetSet($offset, $value)
{
if ($offset === null) {
if (isset($value[0])) {
$x = ($value instanceof PHP_LexerGenerator_ParseryyToken) ?
$value->metadata : $value;
return;
}
$offset = count($this->metadata);
}
if ($value === null) {
return;
}
if ($value instanceof PHP_LexerGenerator_ParseryyToken) {
if ($value->metadata) {
$this->metadata[$offset] = $value->metadata;
}
} elseif ($value) {
$this->metadata[$offset] = $value;
}
}
function offsetUnset($offset)
{
unset($this->metadata[$offset]);
}
}
// code external to the class is included here
#line 3 "LexerGenerator\Parser.y"
/* ?><?php {//*/
/**
* PHP_LexerGenerator, a php 5 lexer generator.
*
* This lexer generator translates a file in a format similar to
* re2c ({@link http://re2c.org}) and translates it into a PHP 5-based lexer
*
* PHP version 5
*
* LICENSE: This source file is subject to version 3.01 of the PHP license
* that is available through the world-wide-web at the following URI:
* http://www.php.net/license/3_01.txt. If you did not receive a copy of
* the PHP License and are unable to obtain it through the web, please
* send a note to license@php.net so we can mail you a copy immediately.
*
* @category php
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @version CVS: $Id$
* @since File available since Release 0.1.0
*/
/**
* Token parser for plex files.
*
* This parser converts tokens pulled from {@link PHP_LexerGenerator_Lexer}
* into abstract patterns and rules, then creates the output file
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @version @package_version@
* @since Class available since Release 0.1.0
*/
#line 115 "LexerGenerator\Parser.php"
/** The following structure represents a single element of the
* parser's stack. Information stored includes:
*
* + The state number for the parser at this level of the stack.
*
* + The value of the token stored at this level of the stack.
* (In other words, the "major" token.)
*
* + The semantic value stored at this level of the stack. This is
* the information used by the action routines in the grammar.
* It is sometimes called the "minor" token.
*/
class PHP_LexerGenerator_ParseryyStackEntry
{
public $stateno; /* The state-number */
public $major; /* The major token value. This is the code
** number for the token at this stack level */
public $minor; /* The user-supplied minor token value. This
** is the value of the token */
};
// any extra class_declaration (extends/implements) are defined here
/**
* The state of the parser is completely contained in an instance of
* the following structure
*/
#line 2 "LexerGenerator\Parser.y"
class PHP_LexerGenerator_Parser#line 145 "LexerGenerator\Parser.php"
{
/* First off, code is included which follows the "include_class" declaration
** in the input file. */
#line 52 "LexerGenerator\Parser.y"
private $patterns;
private $out;
private $lex;
private $input;
private $counter;
private $token;
private $value;
private $line;
public $transTable = array(
1 => self::PHPCODE,
2 => self::COMMENTSTART,
3 => self::COMMENTEND,
4 => self::QUOTE,
5 => self::PATTERN,
6 => self::CODE,
7 => self::SUBPATTERN,
8 => self::PI,
);
function __construct($outfile, $lex)
{
$this->out = fopen($outfile, 'wb');
if (!$this->out) {
throw new Exception('unable to open lexer output file "' . $outfile . '"');
}
$this->lex = $lex;
}
function outputRules($rules, $statename)
{
static $ruleindex = 1;
$patterns = array();
$pattern = '/';
foreach ($rules as $rule) {
$patterns[] = '^(' . $rule['pattern'] . ')';
}
$pattern .= implode('|', $patterns);
$pattern .= '/';
if ($statename) {
const ' . $statename . ' = ' . $ruleindex . ';
');
}
function yylex' . $ruleindex . '()
{
if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
return false; // end of input
}
');
fwrite($this->out, ' $yy_global_pattern = "' .
$pattern . '";' . "\n");
do {
if (preg_match($yy_global_pattern, substr(' . $this->input . ', ' .
$this->counter .
'), $yymatches)) {
$yymatches = array_filter($yymatches, \'strlen\'); // remove empty sub-patterns
if (!count($yymatches)) {
throw new Exception(\'Error: lexing failed because a rule matched\' .
\'an empty string\');
}
next($yymatches); // skip global match
' . $this->token . ' = key($yymatches); // token number
' . $this->value . ' = current($yymatches); // token value
$r = $this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}();
if ($r === null) {
' . $this->counter . ' += strlen($this->value);
' . $this->line . ' += substr_count("\n", ' . $this->value . ');
// accept this token
return true;
} elseif ($r === true) {
// we have changed state
// process this token in the new state
return $this->yylex();
} elseif ($r === false) {
' . $this->counter . ' += strlen($this->value);
' . $this->line . ' += substr_count("\n", ' . $this->value . ');
if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
return false; // end of input
}
// skip this token
continue;
} else {');
fwrite($this->out, ' $yy_yymore_patterns = array(' . "\n");
for($i = 0; count($patterns); $i++ ) {
unset($patterns[$i]);
fwrite($this->out, ' ' . ($i + 1) . ' => "' .
implode('|', $patterns) . "\",\n");
}
fwrite($this->out, ' );' . "\n");
// yymore is needed
do {
if (!strlen($yy_yymore_patterns[' . $this->token . '])) {
throw new Exception(\'cannot do yymore for the last token\');
}
if (preg_match($yy_yymore_patterns[' . $this->token . '],
substr(' . $this->input . ', ' . $this->counter . '), $yymatches)) {
$yymatches = array_filter($yymatches, \'strlen\'); // remove empty sub-patterns
next($yymatches); // skip global match
' . $this->token . ' = key($yymatches); // token number
' . $this->value . ' = current($yymatches); // token value
' . $this->line . ' = substr_count("\n", ' . $this->value . ');
}
} while ($this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}() !== null);
// accept
' . $this->counter . ' += strlen($this->value);
' . $this->line . ' += substr_count("\n", ' . $this->value . ');
return true;
}
} else {
throw new Exception(\'Unexpected input at line\' . ' . $this->line . ' .
\': \' . ' . $this->input . '[' . $this->counter . ']);
}
break;
} while (true);
} // end function
');
foreach ($rules as $i => $rule) {
fwrite($this->out, ' function yy_r' . $ruleindex . '_' . ($i + 1) . '()
{
' . $rule['code'] .
' }
');
}
$ruleindex++; // for next set of rules
}
function error($msg)
{
echo 'Error on line ' . $this->lex->line . ': ' . $msg;
}
function _validatePattern($pattern)
{
if ($pattern[0] == '^') {
$this->error('Pattern "' . $pattern .
'" should not begin with ^, lexer may fail');
}
if ($pattern[strlen($pattern) - 1] == '$') {
$this->error('Pattern "' . $pattern .
'" should not end with $, lexer may fail');
}
// match ( but not \( or (?:
$savepattern = $pattern;
if (preg_match('/\([^?][^:]|\(\?[^:]|\(\?$|\($/', $pattern)) {
$this->error('Pattern "' . $savepattern .
'" must not contain sub-patterns (like this), generated lexer will fail');
}
}
#line 307 "LexerGenerator\Parser.php"
/* Next is all token values, in a form suitable for use by makeheaders.
** This section will be null unless lemon is run with the -m switch.
*/
/*
** These constants (all generated automatically by the parser generator)
** specify the various kinds of tokens (terminals) that the parser
** understands.
**
** Each symbol here is a terminal symbol in the grammar.
*/
const PHPCODE = 1;
const COMMENTSTART = 2;
const COMMENTEND = 3;
const PI = 4;
const SUBPATTERN = 5;
const CODE = 6;
const PATTERN = 7;
const QUOTE = 8;
const YY_NO_ACTION = 94;
const YY_ACCEPT_ACTION = 93;
const YY_ERROR_ACTION = 92;
/* Next are that tables used to determine what action to take based on the
** current state and lookahead token. These tables are used to implement
** functions that take a state number and lookahead value and return an
** action integer.
**
** Suppose the action integer is N. Then the action is determined as
** follows
**
** 0 <= N < YYNSTATE Shift N. That is, push the lookahead
** token onto the stack and goto state N.
**
** YYNSTATE <= N < YYNSTATE+YYNRULE Reduce by rule N-YYNSTATE.
**
** N == YYNSTATE+YYNRULE A syntax error has occurred.
**
** N == YYNSTATE+YYNRULE+1 The parser accepts its input.
**
** N == YYNSTATE+YYNRULE+2 No such action. Denotes unused
** slots in the yy_action[] table.
**
** The action table is constructed as a single large table named yy_action[].
** Given state S and lookahead X, the action is computed as
**
** yy_action[ yy_shift_ofst[S] + X ]
**
** If the index value yy_shift_ofst[S]+X is out of range or if the value
** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X or if yy_shift_ofst[S]
** is equal to YY_SHIFT_USE_DFLT, it means that the action is not in the table
** and that yy_default[S] should be used instead.
**
** The formula above is for computing the action when the lookahead is
** a terminal symbol. If the lookahead is a non-terminal (as occurs after
** a reduce action) then the yy_reduce_ofst[] array is used in place of
** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of
** YY_SHIFT_USE_DFLT.
**
** The following are the tables generated in this section:
**
** yy_action[] A single table containing all actions.
** yy_lookahead[] A table containing the lookahead for each entry in
** yy_action. Used to detect hash collisions.
** yy_shift_ofst[] For each state, the offset into yy_action for
** shifting terminals.
** yy_reduce_ofst[] For each state, the offset into yy_action for
** shifting non-terminals after a reduce.
** yy_default[] Default action for each state.
*/
const YY_SZ_ACTTAB = 87;
static public $yy_action = array(
/* 0 */ 33, 31, 58, 58, 3, 50, 50, 57, 44, 39,
/* 10 */ 42, 58, 57, 55, 50, 42, 51, 36, 58, 58,
/* 20 */ 59, 50, 50, 38, 58, 46, 45, 50, 35, 58,
/* 30 */ 17, 2, 50, 93, 52, 16, 18, 6, 24, 19,
/* 40 */ 2, 12, 41, 53, 48, 40, 30, 60, 1, 4,
/* 50 */ 34, 10, 20, 43, 49, 32, 14, 58, 7, 20,
/* 60 */ 50, 8, 20, 9, 20, 37, 47, 11, 20, 56,
/* 70 */ 15, 5, 22, 54, 28, 23, 53, 21, 29, 25,
/* 80 */ 2, 27, 6, 13, 53, 53, 26,
);
static public $yy_lookahead = array(
/* 0 */ 3, 3, 5, 5, 5, 8, 8, 5, 6, 3,
/* 10 */ 8, 5, 5, 6, 8, 8, 3, 3, 5, 5,
/* 20 */ 3, 8, 8, 4, 5, 5, 6, 8, 4, 5,
/* 30 */ 1, 2, 8, 10, 11, 12, 1, 2, 4, 1,
/* 40 */ 2, 7, 5, 1, 5, 8, 16, 8, 2, 5,
/* 50 */ 4, 18, 19, 5, 6, 14, 15, 5, 18, 19,
/* 60 */ 8, 18, 19, 18, 19, 5, 1, 18, 19, 1,
/* 70 */ 7, 2, 13, 1, 17, 13, 20, 19, 4, 13,
/* 80 */ 2, 17, 2, 12, 20, 20, 13,
);
const YY_SHIFT_USE_DFLT = -4;
const YY_SHIFT_MAX = 39;
static public $yy_shift_ofst = array(
/* 0 */ 35, 24, 19, 52, 52, 52, 74, 13, -2, -3,
/* 10 */ 14, 6, 37, 38, 34, 37, 29, 78, 80, 78,
/* 20 */ 7, 2, 46, 46, 48, 46, 46, 39, 39, 20,
/* 30 */ 63, 42, 17, 72, 60, -1, 68, 69, 44, 65,
);
const YY_REDUCE_USE_DFLT = -1;
const YY_REDUCE_MAX = 19;
static public $yy_reduce_ofst = array(
/* 0 */ 23, 49, 45, 33, 43, 40, 41, 58, 58, 58,
/* 10 */ 58, 58, 64, 59, 30, 57, 62, 73, 71, 66,
);
static public $yyExpectedTokens = array(
/* 0 */ array(1, 2, ),
/* 1 */ array(4, 5, 8, ),
/* 2 */ array(4, 5, 8, ),
/* 3 */ array(5, 8, ),
/* 4 */ array(5, 8, ),
/* 5 */ array(5, 8, ),
/* 6 */ array(4, ),
/* 7 */ array(3, 5, 8, ),
/* 8 */ array(3, 5, 8, ),
/* 9 */ array(3, 5, 8, ),
/* 10 */ array(3, 5, 8, ),
/* 11 */ array(3, 5, 8, ),
/* 12 */ array(5, 8, ),
/* 13 */ array(1, 2, ),
/* 14 */ array(4, 7, ),
/* 15 */ array(5, 8, ),
/* 16 */ array(1, 2, ),
/* 17 */ array(2, ),
/* 18 */ array(2, ),
/* 19 */ array(2, ),
/* 20 */ array(5, 6, 8, ),
/* 21 */ array(5, 6, 8, ),
/* 22 */ array(2, 4, ),
/* 23 */ array(2, 4, ),
/* 24 */ array(5, 6, ),
/* 25 */ array(2, 4, ),
/* 26 */ array(2, 4, ),
/* 27 */ array(5, 8, ),
/* 28 */ array(5, 8, ),
/* 29 */ array(5, 6, ),
/* 30 */ array(7, ),
/* 31 */ array(1, ),
/* 32 */ array(3, ),
/* 33 */ array(1, ),
/* 34 */ array(5, ),
/* 35 */ array(5, ),
/* 36 */ array(1, ),
/* 37 */ array(2, ),
/* 38 */ array(5, ),
/* 39 */ array(1, ),
/* 40 */ array(),
/* 41 */ array(),
/* 42 */ array(),
/* 43 */ array(),
/* 44 */ array(),
/* 45 */ array(),
/* 46 */ array(),
/* 47 */ array(),
/* 48 */ array(),
/* 49 */ array(),
/* 50 */ array(),
/* 51 */ array(),
/* 52 */ array(),
/* 53 */ array(),
/* 54 */ array(),
/* 55 */ array(),
/* 56 */ array(),
/* 57 */ array(),
/* 58 */ array(),
/* 59 */ array(),
/* 60 */ array(),
);
static public $yy_default = array(
/* 0 */ &n
|
|