Source for file Lexer.php
Documentation is available at Lexer.php
* PHP_LexerGenerator, a php 5 lexer generator.
* This lexer generator translates a file in a format similar to
* re2c ({@link http://re2c.org}) and translates it into a PHP 5-based lexer
* LICENSE: This source file is subject to version 3.01 of the PHP license
* that is available through the world-wide-web at the following URI:
* http://www.php.net/license/3_01.txt. If you did not receive a copy of
* the PHP License and are unable to obtain it through the web, please
* send a note to license@php.net so we can mail you a copy immediately.
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @since File available since Release 0.1.0
require_once 'PHP/LexerGenerator/Parser.php';
* Token scanner for plex files.
* This scanner detects comments beginning with "/*!lex2php" and
* then returns their components (processing instructions, patterns, strings
* action code, and regexes)
* @package PHP_LexerGenerator
* @author Gregory Beaver <cellog@php.net>
* @copyright 2006 Gregory Beaver
* @license http://www.php.net/license/3_01.txt PHP License 3.01
* @version @package_version@
* @since Class available since Release 0.1.0
* Current line number in input
* Number of scanning errors detected
* integer identifier of the current token
* string content of current token
const PHPCODE = PHP_LexerGenerator_Parser::PHPCODE;
const COMMENTSTART = PHP_LexerGenerator_Parser::COMMENTSTART;
const COMMENTEND = PHP_LexerGenerator_Parser::COMMENTEND;
const QUOTE = PHP_LexerGenerator_Parser::QUOTE;
const PATTERN = PHP_LexerGenerator_Parser::PATTERN;
const CODE = PHP_LexerGenerator_Parser::CODE;
const SUBPATTERN = PHP_LexerGenerator_Parser::SUBPATTERN;
const PI = PHP_LexerGenerator_Parser::PI;
* @param string the input
* Output an error message
private function error($msg)
echo 'Error on line ' . $this->line . ': ' . $msg;
* Initial scanning state lexer
private function lexStart()
if ($this->N >= strlen($this->data)) {
$a = strpos($this->data, '/*!lex2php' . "\n", $this->N);
$this->N = strlen($this->data);
$this->token = self::PHPCODE;
$this->value = substr($this->data, $this->N, $a - $this->N);
$this->token = self::PHPCODE;
$this->value = '/*!lex2php' . "\n";
$this->N += 11; // strlen("/*lex2php\n")
$this->token = self::COMMENTSTART;
$this->state = 'Declare';
* lexer for top-level canning state after the initial declaration comment
private function lexStartNonDeclare()
if ($this->N >= strlen($this->data)) {
$a = strpos($this->data, '/*!lex2php' . "\n", $this->N);
$this->N = strlen($this->data);
$this->token = self::PHPCODE;
$this->value = substr($this->data, $this->N, $a - $this->N);
$this->token = self::PHPCODE;
$this->value = '/*!lex2php' . "\n";
$this->N += 11; // strlen("/*lex2php\n")
$this->token = self::COMMENTSTART;
* lexer for declaration comment state
private function lexDeclare()
if ($this->data[$this->N] == '*' && $this->data[$this->N + 1] == '/') {
$this->state = 'StartNonDeclare';
$this->token = self::COMMENTEND;
$this->value = $token[1];
$this->N += strlen($token[1]) + 1;
$this->state = 'DeclarePI';
$this->value = $token[0];
$this->token = self::PATTERN;
$this->N += strlen($token[0]);
$this->state = 'DeclareEquals';
$this->error('expecting declaration of sub-patterns');
* lexer for processor instructions within declaration comment
private function lexDeclarePI()
while ($this->N < strlen($this->data) &&
($this->data[$this->N] == ' ' ||
$this->data[$this->N] == "\t")) {
$this->N++ ; // skip whitespace
if ($this->data[$this->N] == "\n") {
$this->state = 'Declare';
return $this->lexDeclare();
if ($this->data[$this->N] == '{') {
$this->error('Unexpected end of file');
$this->value = $token[0];
$this->token = self::SUBPATTERN;
* lexer for processor instructions inside rule comments
private function lexDeclarePIRule()
while ($this->N < strlen($this->data) &&
($this->data[$this->N] == ' ' ||
$this->data[$this->N] == "\t")) {
$this->N++ ; // skip whitespace
if ($this->data[$this->N] == "\n") {
if ($this->data[$this->N] == '{') {
$this->error('Unexpected end of file');
$this->value = $token[0];
$this->token = self::SUBPATTERN;
* lexer for the state representing scanning between a pattern and the "=" sign
private function lexDeclareEquals()
while ($this->N < strlen($this->data) &&
($this->data[$this->N] == ' ' || $this->data[$this->N] == "\t")) {
$this->N++ ; // skip whitespace
if ($this->N >= strlen($this->data)) {
$this->error('unexpected end of input, expecting "=" for sub-pattern declaration');
if ($this->data[$this->N] != '=') {
$this->error('expecting "=" for sub-pattern declaration');
$this->state = 'DeclareRightside';
while ($this->N < strlen($this->data) &&
($this->data[$this->N] == ' ' || $this->data[$this->N] == "\t")) {
$this->N++ ; // skip whitespace
if ($this->N >= strlen($this->data)) {
$this->error('unexpected end of file, expecting right side of sub-pattern declaration');
return $this->lexDeclareRightside();
* lexer for the right side of a pattern, detects quotes or regexes
private function lexDeclareRightside()
if ($this->data[$this->N] == "\n") {
$this->state = 'lexDeclare';
return $this->lexDeclare();
if ($this->data[$this->N] == '"') {
return $this->lexQuote();
while ($this->N < strlen($this->data) &&
($this->data[$this->N] == ' ' ||
$this->data[$this->N] == "\t")) {
$this->N++ ; // skip all whitespace
$test = $this->data[$this->N];
$token = strpos($this->data, $test, $token);
} while ($token !== false && ($this->data[$token - 1] == '\\'
&& $this->data[$token - 2] != '\\'));
$this->error('Unterminated regex pattern (started with "' . $test . '"');
if (substr_count($this->data, "\n", $this->N, $token - $this->N)) {
$this->error('Regex pattern extends over multiple lines');
$this->value = substr($this->data, $this->N + 1, $token - $this->N - 1);
// unescape the regex marker
// we will re-escape when creating the final regex
$this->token = self::SUBPATTERN;
* lexer for quoted literals
private function lexQuote()
$token = strpos($this->data, '"', $token);
} while ($token !== false && $token < strlen($this->data) &&
($this->data[$token - 1] == '\\' && $this->data[$token - 2] != '\\'));
$this->error('unterminated quote');
if (substr_count($this->data, "\n", $this->N, $token - $this->N)) {
$this->error('quote extends over multiple lines');
$this->value = substr($this->data, $this->N + 1, $token - $this->N - 1);
$this->token = self::QUOTE;
private function lexRule()
while ($this->N < strlen($this->data) &&
($this->data[$this->N] == ' ' ||
$this->data[$this->N] == "\t" ||
$this->data[$this->N] == "\n")) {
if ($this->data[$this->N] == "\n") {
$this->N++ ; // skip all whitespace
if ($this->N >= strlen($this->data)) {
$this->error('unexpected end of input, expecting rule declaration');
if ($this->data[$this->N] == '*' && $this->data[$this->N + 1] == '/') {
$this->state = 'StartNonDeclare';
$this->token = self::COMMENTEND;
$this->value = $token[1];
$this->N += strlen($token[1]) + 1;
$this->state = 'DeclarePIRule';
if ($this->data[$this->N] == "{") {
if ($this->data[$this->N] == '"') {
return $this->lexQuote();
$this->value = $token[0];
$this->N += strlen($token[0]);
$this->token = self::SUBPATTERN;
$this->error('expecting token rule (quotes or sub-patterns)');
* lexer for php code blocks
private function lexCode()
for ($level = 1; $cp < strlen($this->data) && ($level > 1 || $this->data[$cp] != '}'); $cp++ ) {
if ($this->data[$cp] == '{') {
} elseif ($this->data[$cp] == '}') {
} elseif ($this->data[$cp] == '/' && $this->data[$cp + 1] == '/') {
/* Skip C++ style comments */
$z = strpos($this->data, "\n", $cp);
} elseif ($this->data[$cp] == "'" || $this->data[$cp] == '"') {
/* String a character literals */
$startchar = $this->data[$cp];
for ($cp++ ; $cp < strlen($this->data) && ($this->data[$cp] != $startchar || $prevc === '\\'); $cp++ ) {
|