Source for file ParserGenerator.php
Documentation is available at ParserGenerator.php
* PHP_ParserGenerator, a php 5 parser generator.
* This is a direct port of the Lemon parser generator, found at
* {@link http://www.hwaci.com/sw/lemon/}
* There are a few PHP-specific changes to the lemon parser generator.
* - %extra_argument is removed, as class constructor can be used to
* pass in extra information
* - %token_type and company are irrelevant in PHP, and so are removed
* - %declare_class is added to define the parser class name and any
* implements/extends information
* - %include_class is added to allow insertion of extra class information
* such as constants, a class constructor, etc.
* Other changes make the parser more robust, and also make reporting
* syntax errors simpler. Detection of expected tokens eliminates some
* problematic edge cases where an unexpected token could cause the parser
* to simply accept input.
* Otherwise, the file format is identical to the Lemon parser generator
* Copyright (c) 2006, Gregory Beaver <cellog@php.net>
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* * Neither the name of the PHP_ParserGenerator nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @since File available since Release 0.1.0
* Basic components of the parser generator
require_once 'PHP/ParserGenerator/Action.php';
require_once 'PHP/ParserGenerator/ActionTable.php';
require_once 'PHP/ParserGenerator/Config.php';
require_once 'PHP/ParserGenerator/Data.php';
require_once 'PHP/ParserGenerator/Symbol.php';
require_once 'PHP/ParserGenerator/Rule.php';
require_once 'PHP/ParserGenerator/Parser.php';
require_once 'PHP/ParserGenerator/PropagationLink.php';
require_once 'PHP/ParserGenerator/State.php';
* The basic home class for the parser generator
* @package PHP_ParserGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.opensource.org/licenses/bsd-license.php New BSD License
* @version @package_version@
* @since Class available since Release 0.1.0
* @example examples/Parser.y Sample parser file format (PHP_LexerGenerator's parser)
* @example examples/Parser.php Sample parser file format PHP code (PHP_LexerGenerator's parser)
* Set this to 1 to turn on debugging of Lemon's parsing of
const OPT_FLAG = 1, OPT_INT = 2, OPT_DBL = 3, OPT_STR = 4,
OPT_FFLAG = 5, OPT_FINT = 6, OPT_FDBL = 7, OPT_FSTR = 8;
private static $options = array(
'type' => self::OPT_FLAG,
'message' => 'Print only the basis in report.'
'type' => self::OPT_FLAG,
'message' => 'Don\'t compress the action table.'
'type' => self::OPT_FSTR,
'arg' => 'handle_D_option',
'message' => 'Define an %ifdef macro.'
'type' => self::OPT_FLAG,
'message' => 'Print grammar without actions.'
'type' => self::OPT_FLAG,
'message' => 'Output a makeheaders compatible file'
'type' => self::OPT_FLAG,
'message' => '(Quiet) Don\'t print the report file.'
'type' => self::OPT_FLAG,
'message' => 'Print parser stats to standard output.'
'type' => self::OPT_FLAG,
'message' => 'Print the version number.'
* Process a flag command line argument.
if (!isset ($argv[1]) || !isset (self::$options[$argv[$i][1]])) {
throw new Exception('Command line syntax error: undefined option "' . $argv[$i] . '"');
$v = self::$options[$argv[$i][1]] == '-';
if (self::$options[$argv[$i][1]]['type'] == self::OPT_FLAG) {
$this->{self::$options[$argv[$i][1]]['arg']} = (int) $v;
} elseif (self::$options[$argv[$i][1]]['type'] == self::OPT_FFLAG) {
$this->{self::$options[$argv[$i][1]]['arg']}($v);
} elseif (self::$options[$argv[$i][1]]['type'] == self::OPT_FSTR) {
$this->{self::$options[$argv[$i][1]]['arg']}(substr($v, 2));
throw new Exception('Command line syntax error: missing argument on switch: "' . $argv[$i] . '"');
* Process a command line switch which has an argument.
throw new Exception('INTERNAL ERROR: handleswitch passed bad argument, no "=" in arg');
if (!isset (self::$options[$argv[$i]])) {
throw new Exception('Command line syntax error: undefined option "' . $argv[$i] .
switch (self::$options[$argv[$i]]['type']) {
throw new Exception('Command line syntax error: option requires an argument "' .
$argv[$i] . '=' . $cp . '"');
switch(self::$options[$argv[$i]]['type']) {
$this->$ {self::$options[$argv[$i]]['arg']} = $dv;
$this->$ {self::$options[$argv[$i]]['arg']}($dv);
$this->$ {self::$options[$argv[$i]]['arg']} = $lv;
$this->$ {self::$options[$argv[$i]]['arg']}($lv);
$this->$ {self::$options[$argv[$i]]['arg']} = $sv;
$this->$ {self::$options[$argv[$i]]['arg']}($sv);
* @param array valid options
for($i = 1; $i < count($argv); $i++ ) {
if ($argv[$i][0] == '+' || $argv[$i][0] == '-') {
} elseif (strstr($argv[$i],'=')) {
* Return the index of the N-th non-switch argument. Return -1
private function argindex($n, $a)
for ($i= 1; $i < count($a); $i++ ) {
if ($dashdash || !($a[$i][0] == '-' || $a[$i][0] == '+' ||
if ($_SERVER['argv'][$i] == '--') {
* Return the value of the non-option argument as indexed by $i
* @param array the value of $argv
private function OptArg($i, $a)
if (- 1 == ($ind = $this->argindex($i, $a))) {
* @return int number of arguments
for($i = 1; $i < count($a); $i++ ) {
if ($dashdash || !($a[$i][0] == '-' || $a[$i][0] == '+' ||
* Print out command-line options
foreach (self::$options as $label => $info) {
$len = strlen($label) + 1;
$len += 9; /* length of "<integer>" */
$len += 6; /* length of "<real>" */
$len += 8; /* length of "<string>" */
foreach (self::$options as $label => $info) {
printf(" -%-*s %s\n", $max, $label, $info['message']);
printf(" %s=<integer>%*s %s\n", $label, $max - strlen($label) - 9,
printf(" %s=<real>%*s %s\n", $label, $max - strlen($label) - 6,
printf(" %s=<string>%*s %s\n", $label, $max - strlen($label) - 8,
* This routine is called with the argument to each -D command-line option.
* Add the macro defined to the azDefine array.
private function handle_D_option($z)
$z = substr($a, 1); // strip first =
/**************** From the file "main.c" ************************************/
** Main program file for the LEMON parser generator.
/* The main program. Parse the command line and do it... */
echo "Lemon version 1.0/PHP_ParserGenerator port version @package_version@\n";
if ($this->OptNArgs($_SERVER['argv']) != 1) {
echo "Exactly one filename argument is required.\n";
/* Initialize the machine */
$lem->argv0 = $_SERVER['argv'][0];
$lem->filename = $this->OptArg(0, $_SERVER['argv']);
if (isset ($a['extension'])) {
$ext = '.' . $a['extension'];
$lem->filenosuffix = $lem->filename;
$lem->basisflag = $this->basisflag;
$lem->name = $lem->include_code = $lem->include_classcode = $lem->arg =
$lem->tokentype = $lem->start = 0;
$lem->error = $lem->overflow = $lem->failure = $lem->accept = $lem->tokendest =
$lem->tokenprefix = $lem->outname = $lem->extracode = 0;
/* Parse the input file */
/* Count and index the symbols of the grammar */
for ($i = 0; $i <= $lem->nsymbol; $i++ ) {
$lem->symbols[$i]->index = $i;
usort($lem->symbols, array('PHP_ParserGenerator_Symbol', 'sortSymbols'));
for ($i = 0; $i <= $lem->nsymbol; $i++ ) {
$lem->symbols[$i]->index = $i;
// find the first lower-case symbol
for($i = 1; ord($lem->symbols[$i]->name[0]) < ord ('Z'); $i++ );
/* Generate a reprint of the grammar, if requested on the command line */
/* Initialize the size for all follow and first sets */
/* Find the precedence for every production rule (that has one) */
$lem->FindRulePrecedences();
/* Compute the lambda-nonterminals and the first-sets for every
/* Compute all LR(0) states. Also record follow-set propagation
** links so that the follow-set can be computed later */
/* Tie up loose ends on the propagation links */
/* Compute the follow set of every reducible configuration */
/* Compute the action tables */
/* Compress the action tables */
if ($this->compress=== 0) {
/* Reorder and renumber the states so that states with fewer choices
/* Generate a report of the parser generated. (the "y.output" file) */
/* Generate the source code for the parser */
$lem->ReportTable($this->mhflag);
/* Produce a header file for use by the scanner. (This step is
** omitted if the "-m" option is used because makeheaders will
** generate the file for us.) */
// $this->ReportHeader();
printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n",
$lem->nterminal, $lem->nsymbol - $lem->nterminal, $lem->nrule);
printf(" %d states, %d parser table entries, %d conflicts\n",
$lem->nstate, $lem->tablesize, $lem->nconflict);
printf("%d parsing conflicts.\n", $lem->nconflict);
exit($lem->errorcnt + $lem->nconflict);
return ($lem->errorcnt + $lem->nconflict);
* Merge in a merge sort for a linked list
* - a: A sorted, null-terminated linked list. (May be null).
* - b: A sorted, null-terminated linked list. (May be null).
* - cmp: A pointer to the comparison function.
* - offset: Offset in the structure to the "next" field.
* A pointer to the head of a sorted list containing the elements
* The "next" pointers for elements in the lists a and b are
static function merge($a, $b, $cmp, $offset)
** list: Pointer to a singly-linked list of structures.
** next: Pointer to pointer to the second element of the list.
** cmp: A comparison function.
** A pointer to the head of a sorted list containing the elements
** The "next" pointers for elements in list are changed.
static function msort($list, $next, $cmp)
if ($list->$next === 0) {
for ($i = 0; $i < 29 && $set[$i] !== 0; $i++ ) {
$ep = self::merge($ep, $set[$i], $cmp, $next);
for ($i = 0; $i < 30; $i++ ) {
$ep = self::merge($ep, $set[$i], $cmp, $next);
/* Find a good place to break "msg" so that its length is at least "min"
** but no more than "max". Make the point as close to max as possible.
for ($i = $spot = $min; $i <= $max && $i < strlen($msg); $i++ ) {
if ($c == '-' && $i < $max - 1) {
static function ErrorMsg($filename, $lineno, $format)
/* Prepare a prefix to be prepended to every output line */
$prefix = sprintf("%20s:%d: ", $filename, $lineno);
$prefix = sprintf("%20s: ", $filename);
$prefixsize = strlen($prefix);
$availablewidth = 79 - $prefixsize;
/* Generate the error message */
/* Remove trailing "\n"s from the error message. */
while ($linewidth > 0 && in_array($errmsg[$linewidth- 1], array("\n", "\r"), true)) {
/* Print the error message */
$errmsg = str_replace(array("\r", "\n", "\t"), array(' ', ' ', ' '), $errmsg);
$end = $restart = self::findbreak($errmsg, 0, $availablewidth);
if (strlen($errmsg) <= 79 && $end < strlen($errmsg) && $end <= 79) {
$end = $restart = strlen($errmsg);
while (isset ($errmsg[$restart]) && $errmsg[$restart] == ' ') {
printf("%s%.${end}s\n", $prefix, $errmsg);
$errmsg = substr($errmsg, $restart);
* Duplicate the input file without comments and without actions
printf("// Reprint of input file \"%s\".\n// Symbols:\n", $this->filename);
for ($i = 0; $i < $this->nsymbol; $i++ ) {
$sp = $this->symbols[$i];
$ncolumns = 76 / ($maxlen + 5);
$skip = ($this->nsymbol + $ncolumns - 1) / $ncolumns;
for ($i = 0; $i < $skip; $i++ ) {
for ($j = $i; $j < $this->nsymbol; $j += $skip) {
$sp = $this->symbols[$j];
//assert( sp->index==j );
printf(" %3d %-${maxlen}.${maxlen}s", $j, $sp->name);
for ($rp = $this->rule; $rp; $rp = $rp->next) {
printf("(%s)", $rp->lhsalias);
for ($i = 0; $i < $rp->nrhs; $i++ ) {
for ($j = 1; $j < $sp->nsubsym; $j++ ) {
printf("|%s", $sp->subsym[$j]->name);
/* if ($rp->rhsalias[$i]) {
printf("(%s)", $rp->rhsalias[$i]);
printf(" [%s]", $rp->precsym->name);
print "\n " . $rp->code);
//$a = new PHP_ParserGenerator;
//$_SERVER['argv'] = array('lemon', '-s', '/development/lemon/PHP_Parser.y');
//$_SERVER['argv'] = array('lemon', '-s', '/development/File_ChessPGN/ChessPGN/Parser.y');
|