Source for file Parser.php
Documentation is available at Parser.php
* PHP_ParserGenerator, a php 5 parser generator.
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
* LICENSE: This source file is subject to version 3.01 of the PHP license
* that is available through the world-wide-web at the following URI:
* http://www.php.net/license/3_01.txt. If you did not receive a copy of
* the PHP License and are unable to obtain it through the web, please
* send a note to license@php.net so we can mail you a copy immediately.
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @since File available since Release 0.1.0
* The grammar parser for lemon grammar files.
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @since Class available since Release 0.1.0
const WAITING_FOR_DECL_OR_RULE = 2;
const WAITING_FOR_DECL_KEYWORD = 3;
const WAITING_FOR_DECL_ARG = 4;
const WAITING_FOR_PRECEDENCE_SYMBOL = 5;
const WAITING_FOR_ARROW = 6;
const PRECEDENCE_MARK_1 = 13;
const PRECEDENCE_MARK_2 = 14;
const RESYNC_AFTER_RULE_ERROR = 15;
const RESYNC_AFTER_DECL_ERROR = 16;
const WAITING_FOR_DESTRUCTOR_SYMBOL = 17;
const WAITING_FOR_DATATYPE_SYMBOL = 18;
const WAITING_FOR_FALLBACK_ID = 19;
* Linenumber at which current token starts
* Number of parsing errors so far
* Index of current token within the input string
* @var PHP_ParserGenerator_Data
* Parser state (one of the class constants for this class)
* - PHP_ParserGenerator_Parser::INITIALIZE,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DECL_OR_RULE,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DECL_KEYWORD,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DECL_ARG,
* - PHP_ParserGenerator_Parser::WAITING_FOR_PRECEDENCE_SYMBOL,
* - PHP_ParserGenerator_Parser::WAITING_FOR_ARROW,
* - PHP_ParserGenerator_Parser::IN_RHS,
* - PHP_ParserGenerator_Parser::LHS_ALIAS_1,
* - PHP_ParserGenerator_Parser::LHS_ALIAS_2,
* - PHP_ParserGenerator_Parser::LHS_ALIAS_3,
* - PHP_ParserGenerator_Parser::RHS_ALIAS_1,
* - PHP_ParserGenerator_Parser::RHS_ALIAS_2,
* - PHP_ParserGenerator_Parser::PRECEDENCE_MARK_1,
* - PHP_ParserGenerator_Parser::PRECEDENCE_MARK_2,
* - PHP_ParserGenerator_Parser::RESYNC_AFTER_RULE_ERROR,
* - PHP_ParserGenerator_Parser::RESYNC_AFTER_DECL_ERROR,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DESTRUCTOR_SYMBOL,
* - PHP_ParserGenerator_Parser::WAITING_FOR_DATATYPE_SYMBOL,
* - PHP_ParserGenerator_Parser::WAITING_FOR_FALLBACK_ID
* @var PHP_ParserGenerator_Symbol
* Left-hand side of the current rule
* @var PHP_ParserGenerator_Symbol
* Number of right-hand side symbols seen
* Right-hand side symbols
* @var array array of {@link PHP_ParserGenerator_Symbol} objects
* Aliases for each RHS symbol name (or NULL)
* @var array array of strings
* @var PHP_ParserGenerator_Rule
* Keyword of a declaration
* This is one of the %keyword keywords in the grammar file
* Where the declaration argument should be put
* This is assigned as a reference to an internal variable
* Where the declaration linenumber is put
* This is assigned as a reference to an internal variable
public $declassoc; /* Assign this association to decl arguments */
public $preccounter; /* Assign this precedence to decl arguments */
* @var PHP_ParserGenerator_Rule
public $firstrule; /* Pointer to first rule in the grammar */
* @var PHP_ParserGenerator_Rule
public $lastrule; /* Pointer to the most recently parsed rule */
* @var PHP_ParserGenerator
* Run the preprocessor over the input file text. The Lemon variable
* $azDefine contains the names of all defined
* macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and
* comments them out. Text in between is also commented out as appropriate.
private function preprocess_input(&$z)
for ($i= 0; $i < strlen($z); $i++ ) {
if ($z[$i] != '%' || ($i > 0 && $z[$i- 1] != "\n")) {
if (substr($z, $i, 6) === "%endif" && trim($z[$i+ 6]) === '') {
for ($j = $start; $j < $i; $j++ ) {
if ($z[$j] != "\n") $z[$j] = ' ';
for ($j = $i; $j < strlen($z) && $z[$j] != "\n"; $j++ ) {
} elseif (substr($z, $i, 6) === "%ifdef" && trim($z[$i+ 6]) === '' ||
substr($z, $i, 7) === "%ifndef" && trim($z[$i+ 7]) === '') {
if (isset ($this->lemon->azDefine[$n])) {
// this is a rather obtuse way of checking whether this is %ifndef
//for ($j = $i; $j < strlen($z) && $z[$j] != "\n"; $j++) $z[$j] = ' ';
$z = substr($z, 0, $i); // remove instead of adding ' '
$z = substr($z, 0, $i) . substr($z, $i + $j); // remove instead of adding ' '
throw new Exception("unterminated %ifdef starting on line $start_lineno\n");
* In spite of its name, this function is really a scanner.
* It reads in the entire input file (all at once) then tokenizes it.
* Each token is passed to the function "parseonetoken" which builds all
* the appropriate data structures in the global state vector "gp".
* @param PHP_ParserGenerator_Data
function Parse(PHP_ParserGenerator_Data $gp)
$this->state = self::INITIALIZE;
/* Begin by reading the input file */
ErrorMsg($this->filename, 0, "Can't read in all %d bytes of this file.",
/* Make an initial pass through the file to handle %ifdef and %ifndef */
$this->preprocess_input($filebuf);
/* Now scan the text of the input file */
for ($cp = 0, $c = $filebuf[0]; $cp < strlen($filebuf); $cp++ ) {
if ($c == "\n") $lineno++ ; /* Keep track of the line number */
} /* Skip all white space */
if ($filebuf[$cp] == '/' && ($cp + 1 < strlen($filebuf)) && $filebuf[$cp + 1] == '/') {
/* Skip C++ style comments */
if ($filebuf[$cp] == '/' && ($cp + 1 < strlen($filebuf)) && $filebuf[$cp + 1] == '*') {
/* Skip C style comments */
$this->tokenstart = $cp; /* Mark the beginning of the token */
$this->tokenlineno = $lineno; /* Linenumber on which token begins */
if ($filebuf[$cp] == '"') { /* String literals */
"String starting on this line is not terminated before the end of the file.");
$nextcp = $cp = strlen($filebuf);
} elseif ($filebuf[$cp] == '{') { /* A block of C code */
for ($level = 1; $cp < strlen($filebuf) && ($level > 1 || $filebuf[$cp] != '}'); $cp++ ) {
if ($filebuf[$cp] == "\n") {
} elseif ($filebuf[$cp] == '{') {
} elseif ($filebuf[$cp] == '}') {
} elseif ($filebuf[$cp] == '/' && $filebuf[$cp + 1] == '*') {
} elseif ($filebuf[$cp] == '/' && $filebuf[$cp + 1] == '/') {
/* Skip C++ style comments too */
} elseif ($filebuf[$cp] == "'" || $filebuf[$cp] == '"') {
/* String a character literals */
$startchar = $filebuf[$cp];
for ($cp++ ; $cp < strlen($filebuf) && ($filebuf[$cp] != $startchar || $prevc === '\\'); $cp++ ) {
if ($filebuf[$cp] == "\n") {
if ($cp >= strlen($filebuf)) {
"PHP code starting on this line is not terminated before the end of the file.");
} elseif (preg_match('/[a-zA-Z0-9]/', $filebuf[$cp])) {
$cp += strlen($preg_results[0]);
} elseif ($filebuf[$cp] == ':' && $filebuf[$cp + 1] == ':' &&
$filebuf[$cp + 2] == '=') {
} elseif (($filebuf[$cp] == '/' || $filebuf[$cp] == '|') &&
$cp += strlen($preg_results[0]);
/* All other (one character) operators */
$this->a = 0; // for referencing in WAITING_FOR_DECL_KEYWORD
printf("%s:%d: Token=[%s] state=%d\n",
/* Fall thru to next case */
case self::WAITING_FOR_DECL_OR_RULE:
$this->state = self::WAITING_FOR_DECL_KEYWORD;
$this->state = self::WAITING_FOR_ARROW;
} elseif ($x[0] == '{') {
|