Source for file Parser.php
Documentation is available at Parser.php
* PHP_ParserGenerator, a php 5 parser generator.
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
* LICENSE: This source file is subject to version 3.01 of the PHP license
* that is available through the world-wide-web at the following URI:
* http://www.php.net/license/3_01.txt. If you did not receive a copy of
* the PHP License and are unable to obtain it through the web, please
* send a note to license@php.net so we can mail you a copy immediately.
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @since File available since Release 0.1.0
* The grammar parser for lemon grammar files.
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @since Class available since Release 0.1.0
const WAITING_FOR_DECL_OR_RULE = 2;
const WAITING_FOR_DECL_KEYWORD = 3;
const WAITING_FOR_DECL_ARG = 4;
const WAITING_FOR_PRECEDENCE_SYMBOL = 5;
const WAITING_FOR_ARROW = 6;
const PRECEDENCE_MARK_1 = 13;
const PRECEDENCE_MARK_2 = 14;
const RESYNC_AFTER_RULE_ERROR = 15;
const RESYNC_AFTER_DECL_ERROR = 16;
const WAITING_FOR_DESTRUCTOR_SYMBOL = 17;
const WAITING_FOR_DATATYPE_SYMBOL = 18;
const WAITING_FOR_FALLBACK_ID = 19;
* Linenumber at which current token starts
* Number of parsing errors so far
* Index of current token within the input string
* @var PHP_ParserGenerator_Data
* Parser state (one of the class constants for this class)
* - PHP_ParserGenerator_Parser::INITIALIZE,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DECL_OR_RULE,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DECL_KEYWORD,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DECL_ARG,
* - PHP_ParserGenerator_Parser::WAITING_FOR_PRECEDENCE_SYMBOL,
* - PHP_ParserGenerator_Parser::WAITING_FOR_ARROW,
* - PHP_ParserGenerator_Parser::IN_RHS,
* - PHP_ParserGenerator_Parser::LHS_ALIAS_1,
* - PHP_ParserGenerator_Parser::LHS_ALIAS_2,
* - PHP_ParserGenerator_Parser::LHS_ALIAS_3,
* - PHP_ParserGenerator_Parser::RHS_ALIAS_1,
* - PHP_ParserGenerator_Parser::RHS_ALIAS_2,
* - PHP_ParserGenerator_Parser::PRECEDENCE_MARK_1,
* - PHP_ParserGenerator_Parser::PRECEDENCE_MARK_2,
* - PHP_ParserGenerator_Parser::RESYNC_AFTER_RULE_ERROR,
* - PHP_ParserGenerator_Parser::RESYNC_AFTER_DECL_ERROR,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DESTRUCTOR_SYMBOL,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DATATYPE_SYMBOL,
* - PHP_ParserGenerator_Parser::WAITING_FOR_FALLBACK_ID
* @var PHP_ParserGenerator_Symbol
* Left-hand side of the current rule
* @var PHP_ParserGenerator_Symbol
* Number of right-hand side symbols seen
* Right-hand side symbols
* @var array array of {@link PHP_ParserGenerator_Symbol} objects
* Aliases for each RHS symbol name (or NULL)
* @var array array of strings
* @var PHP_ParserGenerator_Rule
* Keyword of a declaration
* This is one of the %keyword keywords in the grammar file
* Where the declaration argument should be put
* This is assigned as a reference to an internal variable
* Where the declaration linenumber is put
* This is assigned as a reference to an internal variable
public $declassoc; /* Assign this association to decl arguments */
public $preccounter; /* Assign this precedence to decl arguments */
* @var PHP_ParserGenerator_Rule
public $firstrule; /* Pointer to first rule in the grammar */
* @var PHP_ParserGenerator_Rule
public $lastrule; /* Pointer to the most recently parsed rule */
* @var PHP_ParserGenerator
* Run the preprocessor over the input file text. The Lemon variable
* $azDefine contains the names of all defined
* macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and
* comments them out. Text in between is also commented out as appropriate.
private function preprocess_input(&$z)
for ($i= 0; $i < strlen($z); $i++ ) {
if ($z[$i] != '%' || ($i > 0 && $z[$i- 1] != "\n")) {
if (substr($z, $i, 6) === "%endif" && trim($z[$i+ 6]) === '') {
for ($j = $start; $j < $i; $j++ ) {
if ($z[$j] != "\n") $z[$j] = ' ';
for ($j = $i; $j < strlen($z) && $z[$j] != "\n"; $j++ ) {
} elseif (substr($z, $i, 6) === "%ifdef" && trim($z[$i+ 6]) === '' ||
substr($z, $i, 7) === "%ifndef" && trim($z[$i+ 7]) === '') {
if (isset ($this->lemon->azDefine[$n])) {
// this is a rather obtuse way of checking whether this is %ifndef
//for ($j = $i; $j < strlen($z) && $z[$j] != "\n"; $j++) $z[$j] = ' ';
$z = substr($z, 0, $i); // remove instead of adding ' '
$z = substr($z, 0, $i) . substr($z, $i + $j); // remove instead of adding ' '
throw new Exception("unterminated %ifdef starting on line $start_lineno\n");
* In spite of its name, this function is really a scanner.
* It reads in the entire input file (all at once) then tokenizes it.
* Each token is passed to the function "parseonetoken" which builds all
* the appropriate data structures in the global state vector "gp".
* @param PHP_ParserGenerator_Data
function Parse(PHP_ParserGenerator_Data $gp)
$this->state = self::INITIALIZE;
/* Begin by reading the input file */
ErrorMsg($this->filename, 0, "Can't read in all %d bytes of this file.",
/* Make an initial pass through the file to handle %ifdef and %ifndef */
$this->preprocess_input($filebuf);
/* Now scan the text of the input file */
for ($cp = 0, $c = $filebuf[0]; $cp < strlen($filebuf); $cp++ ) {
if ($c == "\n") $lineno++ ; /* Keep track of the line number */
} /* Skip all white space */
if ($filebuf[$cp] == '/' && ($cp + 1 < strlen($filebuf)) && $filebuf[$cp + 1] == '/') {
/* Skip C++ style comments */
if ($filebuf[$cp] == '/' && ($cp + 1 < strlen($filebuf)) && $filebuf[$cp + 1] == '*') {
/* Skip C style comments */
$this->tokenstart = $cp; /* Mark the beginning of the token */
$this->tokenlineno = $lineno; /* Linenumber on which token begins */
if ($filebuf[$cp] == '"') { /* String literals */
"String starting on this line is not terminated before the end of the file.");
$nextcp = $cp = strlen($filebuf);
} elseif ($filebuf[$cp] == '{') { /* A block of C code */
for ($level = 1; $cp < strlen($filebuf) && ($level > 1 || $filebuf[$cp] != '}'); $cp++ ) {
if ($filebuf[$cp] == "\n") {
} elseif ($filebuf[$cp] == '{') {
} elseif ($filebuf[$cp] == '}') {
} elseif ($filebuf[$cp] == '/' && $filebuf[$cp + 1] == '*') {
} elseif ($filebuf[$cp] == '/' && $filebuf[$cp + 1] == '/') {
/* Skip C++ style comments too */
} elseif ($filebuf[$cp] == "'" || $filebuf[$cp] == '"') {
/* String a character literals */
$startchar = $filebuf[$cp];
for ($cp++ ; $cp < strlen($filebuf) && ($filebuf[$cp] != $startchar || $prevc === '\\'); $cp++ ) {
if ($filebuf[$cp] == "\n") {
if ($cp >= strlen($filebuf)) {
"PHP code starting on this line is not terminated before the end of the file.");
} elseif (preg_match('/[a-zA-Z0-9]/', $filebuf[$cp])) {
$cp += strlen($preg_results[0]);
} elseif ($filebuf[$cp] == ':' && $filebuf[$cp + 1] == ':' &&
$filebuf[$cp + 2] == '=') {
} elseif (($filebuf[$cp] == '/' || $filebuf[$cp] == '|') &&
$cp += strlen($preg_results[0]);
/* All other (one character) operators */
$this->a = 0; // for referencing in WAITING_FOR_DECL_KEYWORD
printf("%s:%d: Token=[%s] state=%d\n",
/* Fall thru to next case */
case self::WAITING_FOR_DECL_OR_RULE:
$this->state = self::WAITING_FOR_DECL_KEYWORD;
$this->state = self::WAITING_FOR_ARROW;
} elseif ($x[0] == '{') {
"There is no prior rule opon which to attach the code
fragment which begins on this line.");
"Code fragment beginning on this line is not the first \
to follow the previous rule.");
} elseif ($x[0] == '[') {
$this->state = self::PRECEDENCE_MARK_1;
"Token \"%s\" should be either \"%%\" or a nonterminal name.",
case self::PRECEDENCE_MARK_1:
"The precedence symbol must be a terminal.");
"There is no prior rule to assign precedence \"[%s]\".", $x);
"Precedence mark on this line is not the first to follow the previous rule.");
$this->state = self::PRECEDENCE_MARK_2;
case self::PRECEDENCE_MARK_2:
"Missing \"]\" on precedence mark.");
$this->state = self::WAITING_FOR_DECL_OR_RULE;
case self::WAITING_FOR_ARROW:
if ($x[0] == ':' && $x[1] == ':' && $x[2] == '=') {
$this->state = self::IN_RHS;
} elseif ($x[0] == '(') {
$this->state = self::LHS_ALIAS_1;
"Expected to see a \":\" following the LHS symbol \"%s\".",
$this->state = self::RESYNC_AFTER_RULE_ERROR;
$this->state = self::LHS_ALIAS_2;
"\"%s\" is not a valid alias for the LHS \"%s\"\n",
$this->state = self::RESYNC_AFTER_RULE_ERROR;
$this->state = self::LHS_ALIAS_3;
"Missing \")\" following LHS alias name \"%s\".",$this->lhsalias);
$this->state = self::RESYNC_AFTER_RULE_ERROR;
$this->state = self::IN_RHS;
"Missing \"->\" following: \"%s(%s)\".",
$this->state = self::RESYNC_AFTER_RULE_ERROR;
for ($i = 0; $i < $this->nrhs; $i++ ) {
$rp->rhs[$i] = $this->rhs[$i];
$rp->rhsalias[$i] = $this->alias[$i];
$rp->nextlhs = $rp->lhs->rule;
$this->state = self::WAITING_FOR_DECL_OR_RULE;
"Too many symbols on RHS or rule beginning at \"%s\".",
$this->state = self::RESYNC_AFTER_RULE_ERROR;
if (isset ($this->rhs[$this->nrhs - 1])) {
$msp = $this->rhs[$this->nrhs - 1];
array($this, '_printmulti'), '');
'WARNING: symbol ' . $x . ' will not' .
' be part of previous multiterminal %s',
} elseif (($x[0] == '|' || $x[0] == '/') && $this->nrhs > 0) {
$msp = $this->rhs[$this->nrhs - 1];
$msp->subsym = array($origsp);
$msp->name = $origsp->name;
$this->rhs[$this->nrhs - 1] = $msp;
preg_match('/[a-z]/', $msp->subsym[0]->name[0])) {
"Cannot form a compound containing a non-terminal");
} elseif ($x[0] == '(' && $this->nrhs > 0) {
$this->state = self::RHS_ALIAS_1;
"Illegal character on RHS of rule: \"%s\".", $x);
$this->state = self::RESYNC_AFTER_RULE_ERROR;
$this->state = self::RHS_ALIAS_2;
"\"%s\" is not a valid alias for the RHS symbol \"%s\"\n",
$x, $this->rhs[$this->nrhs - 1]->name);
$this->state = self::RESYNC_AFTER_RULE_ERROR;
$this->state = self::IN_RHS;
"Missing \")\" following LHS alias name \"%s\".", $this->lhsalias);
$this->state = self::RESYNC_AFTER_RULE_ERROR;
case self::WAITING_FOR_DECL_KEYWORD:
$this->state = self::WAITING_FOR_DECL_ARG;
} elseif ('include' == $x) {
} elseif ('include_class' == $x) {
} elseif ('declare_class' == $x) {
} elseif ('code' == $x) {
} elseif ('token_destructor' == $x) {
} elseif ('default_destructor' == $x) {
} elseif ('token_prefix' == $x) {
} elseif ('syntax_error' == $x) {
} elseif ('parse_accept' == $x) {
} elseif ('parse_failure' == $x) {
} elseif ('stack_overflow' == $x) {
} elseif ('token_type' == $x) {
} elseif ('default_type' == $x) {
} elseif ('stack_size' == $x) {
} elseif ('start_symbol' == $x) {
} elseif ('left' == $x) {
$this->state = self::WAITING_FOR_PRECEDENCE_SYMBOL;
} elseif ('right' == $x) {
$this->state = self::WAITING_FOR_PRECEDENCE_SYMBOL;
} elseif ('nonassoc' == $x) {
$this->state = self::WAITING_FOR_PRECEDENCE_SYMBOL;
} elseif ('destructor' == $x) {
$this->state = self::WAITING_FOR_DESTRUCTOR_SYMBOL;
} elseif ('type' == $x) {
$this->state = self::WAITING_FOR_DATATYPE_SYMBOL;
} elseif ('fallback' == $x) {
$this->state = self::WAITING_FOR_FALLBACK_ID;
"Unknown declaration keyword: \"%%%s\".", $x);
$this->state = self::RESYNC_AFTER_DECL_ERROR;
"Illegal declaration keyword: \"%s\".", $x);
$this->state = self::RESYNC_AFTER_DECL_ERROR;
case self::WAITING_FOR_DESTRUCTOR_SYMBOL:
"Symbol name missing after %destructor keyword");
$this->state = self::RESYNC_AFTER_DECL_ERROR;
$this->state = self::WAITING_FOR_DECL_ARG;
case self::WAITING_FOR_DATATYPE_SYMBOL:
"Symbol name missing after %destructor keyword");
$this->state = self::RESYNC_AFTER_DECL_ERROR;
$this->state = self::WAITING_FOR_DECL_ARG;
case self::WAITING_FOR_PRECEDENCE_SYMBOL:
$this->state = self::WAITING_FOR_DECL_OR_RULE;
"Symbol \"%s\" has already been given a precedence.", $x);
"Can't assign a precedence to \"%s\".", $x);
case self::WAITING_FOR_DECL_ARG:
"The argument \"%s\" to declaration \"%%%s\" is not the first.",
$this->state = self::RESYNC_AFTER_DECL_ERROR;
$this->state = self::WAITING_FOR_DECL_OR_RULE;
"Illegal argument to %%%s: %s",$this->declkeyword, $x);
$this->state = self::RESYNC_AFTER_DECL_ERROR;
case self::WAITING_FOR_FALLBACK_ID:
$this->state = self::WAITING_FOR_DECL_OR_RULE;
"%%fallback argument \"%s\" should be a token", $x);
"More than one fallback assigned to token %s", $x);
case self::RESYNC_AFTER_RULE_ERROR:
/* if ($x[0] == '.') $this->state = self::WAITING_FOR_DECL_OR_RULE;
case self::RESYNC_AFTER_DECL_ERROR:
$this->state = self::WAITING_FOR_DECL_OR_RULE;
$this->state = self::WAITING_FOR_DECL_KEYWORD;
* return a descriptive string for a multi-terminal token.
private function _printmulti($a, $b)
|