Source for file RdqlParser.php
Documentation is available at RdqlParser.php
// ----------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------
* This class contains methods for parsing an Rdql query string into PHP variables.
* The output of the RdqlParser is an array with variables and constraints
* of each query clause (Select, From, Where, And, Using).
* To perform an RDQL query this array has to be passed to the RdqlEngine.
* @version $Id: fsource_rdql__rdqlRdqlParser.php.html,v 1.10 2006/06/26 12:34:14 tgauss Exp $
* @author Radoslaw Oldakowski <radol@gmx.de>
* Parsed query variables and constraints.
* { } are only used within the parser class and are not returned as parsed query.
* ( [] stands for an integer index - 0..N )
* @var array ['selectVars'][] = ?VARNAME
* ['sources'][]{['value']} = URI | QName
* {['is_qname'] = boolean}
* ['patterns'][]['subject']['value'] = VARorURI
* {['is_qname'] = boolean}
* ['predicate']['value'] = VARorURI
* {['is_qname'] = boolean}
* ['object']['value'] = VARorURIorLiterl
* {['is_qname'] = boolean}
* ['is_literal'] = boolean
* {['l_dtype_is_qname'] = boolean}
* ['filters'][]['string'] = string
* ['evalFilterStr'] = string
* ['reqexEqExprs'][]['var'] = ?VARNAME
* ['operator'] = (eq | ne)
* ['strEqExprs'][]['var'] = ?VARNAME
* ['operator'] = (eq | ne)
* ['value_type'] = ('variable' | 'URI' | 'QName' | 'Literal')
* ['value_lang'] = string
* ['value_dtype'] = string
* {['value_dtype_is_qname'] = boolean}
* ['numExpr']['vars'][] = ?VARNAME
* {['ns'][PREFIX] = NAMESPACE}
* Query string divided into a sequence of tokens.
* A token is either: ' ' or "\n" or "\r" or "\t" or ',' or '(' or ')'
* or a string containing any characters except from the above.
* Parse the given RDQL query string and return an array with query variables and constraints.
* @param string $queryString
* @return array $this->parsedQuery
* Remove comments from the passed query string.
for ($i=
0; $i<=
$last; $i++
) {
// don't search for comments inside a 'literal'@lang^^dtype or "literal"@lang^^dtype
if ($query{$i} ==
"'" ||
$query{$i} ==
'"') {
while($i <
$last &&
$query{$i} !=
$quotMark);
if ($query{$i+
1} ==
'@') {
if ($query{$i+
1} ==
'^' &&
$query{$i+
2} ==
'^')
}while ($i <
$last &&
$query{$i} !=
' ' &&
$query{$i} !=
"\t"
&&
$query{$i} !=
"\n" &&
$query{$i} !=
"\r");
if ($query{$i+
1} ==
'^' &&
$query{$i+
2} ==
'^') {
while ($i <
$last &&
$query{$i} !=
' ' &&
$query{$i} !=
"\t"
&&
$query{$i} !=
"\n" &&
$query{$i} !=
"\r" );
// don't search for comments inside an <URI> either
}elseif ($query{$i} ==
'<') {
}while($i <
$last &&
$query{$i} !=
'>');
}elseif ($query{$i} ==
'/') {
if ($i <
$last &&
$query{$i+
1} ==
'/') {
while($i <
$last &&
$query{$i} !=
"\n" &&
$query{$i} !=
"\r")
}elseif ($i <
$last-
2 &&
$query{$i+
1} ==
'*') {
while($i <
$last &&
($query{$i} !=
'*' ||
$query{$i+
1} !=
'/'))
if ($i >=
$last &&
($query{$last-
1} !=
'*' ||
$query{$last} !=
'/'))
trigger_error(RDQL_SYN_ERR .
": unterminated comment - '*/' missing", E_USER_ERROR);
* Divide the query string into tokens.
* A token is either: ' ' or "\n" or "\r" or '\t' or ',' or '(' or ')'
* or a string containing any character except from the above.
* @param string $queryString
$queryString =
trim($queryString, " \r\n\t");
$specialChars =
array (" ", "\t", "\r", "\n", ",", "(", ")");
for ($i=
0; $i<
$len; ++
$i) {
if (!in_array($queryString{$i}, $specialChars))
$this->tokens[$n] .=
$queryString{$i};
$this->tokens[$n] =
$queryString{$i};
* Start parsing of the tokenized query string.
* Parse the SELECT clause of an Rdql query.
* When the parsing of the SELECT clause is finished, this method will call
* a suitable method to parse the subsequent clause.
// Check if the queryString contains a "SELECT" token
.
"' - SELECT keyword expected", E_USER_ERROR);
.
"' - SOURCE or WHERE clause expected", E_USER_ERROR);
// Parse SELECT ?Var (, ?Var)*
case ',':
if (!$commaExpected)
trigger_error(RDQL_SEL_ERR .
" ',' - unexpected comma", E_USER_ERROR);
case ')':
trigger_error(RDQL_SEL_ERR .
" '$token' - illegal input", E_USER_ERROR);
trigger_error(RDQL_SEL_ERR .
" ',' - unexpected comma", E_USER_ERROR);
}elseif (!strcasecmp('WHERE', $token) &&
!$comma) {
trigger_error(RDQL_SEL_ERR .
" ',' - unexpected comma", E_USER_ERROR);
trigger_error(RDQL_SEL_ERR .
" '$token' - '?' missing", E_USER_ERROR);
trigger_error(RDQL_SYN_ERR .
': WHERE clause missing', E_USER_ERROR);
* Parse the FROM/SOURCES clause of an Rdql query
* When the parsing of this clause is finished, parseWhere() will be called.
trigger_error(RDQL_SEL_ERR .
" ',' - unexpected comma", E_USER_ERROR);
trigger_error(RDQL_SRC_ERR .
"',' - unecpected comma", E_USER_ERROR);
$this->parsedQuery['sources'][++
$i]['value'] =
$this->_validateURI($token, RDQL_SRC_ERR);
trigger_error(RDQL_SYN_ERR .
': WHERE clause missing', E_USER_ERROR);
* Parse the WHERE clause of an Rdql query.
* When the parsing of the WHERE clause is finished, this method will call
* a suitable method to parse the subsequent clause if provided.
trigger_error(RDQL_WHR_ERR .
" ',' - unexpected comma", E_USER_ERROR);
trigger_error(RDQL_WHR_ERR .
" ',' - unexpected comma", E_USER_ERROR);
.
"' - '(' expected", E_USER_ERROR);
trigger_error(RDQL_WHR_ERR .
" ',' - unexpected comma", E_USER_ERROR);
* Parse the AND clause of an Rdql query
}elseif ($token ==
',') {
* Parse the USING clause of an Rdql query
trigger_error(RDQL_USG_ERR .
" keyword: 'FOR' missing in the namespace declaration: '", E_USER_ERROR);
trigger_error(RDQL_WHR_ERR .
" ',' - unexpected comma", E_USER_ERROR);
* Check if a filter from the AND clause contains an equal number of '(' and ')'
* and parse filter expressions.
trigger_error(RDQL_AND_ERR .
" ',' - unexpected comma", E_USER_ERROR);
* Parse expressions inside the passed filter:
* 1) regex equality expressions: ?var [~~ | =~ | !~ ] REG_EX
* 2a) string equality expressions: ?var [eq | ne] "literal"@lang^^dtype.
* 2b) string equality expressions: ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
* 3) numerical expressions: e.q. (?var1 - ?var2)*4 >= 20
* In cases 1-2 parse each expression of the given filter into an array of variables.
* For each parsed expression put a place holder (e.g. ##RegEx_1##) into the filterStr.
* The RDQLengine will then replace each place holder with the outcomming boolean value
* of the corresponding expression.
* The remaining filterStr contains only numerical expressions and place holders.
* @param string $filteStr
* @return array ['string'] = string
* ['evalFilterStr'] = string
* ['reqexEqExprs'][]['var'] = ?VARNAME
* ['operator'] = (eq | ne)
* ['strEqExprs'][]['var'] = ?VARNAME
* ['operator'] = (eq | ne)
* ['value_type'] = ('variable' | 'URI' | 'QName'| 'Literal')
* ['value_lang'] = string
* ['value_dtype'] = string
* ['value_dtype_is_qname'] = boolean
* ['numExpr']['vars'][] = ?VARNAME
$parsedFilter['string'] =
$filterStr;
$parsedFilter['regexEqExprs'] =
array();
$parsedFilter['strEqExprs'] =
array();
$parsedFilter['numExprVars'] =
array();
// parse regex string equality expressions, e.g. ?x ~~ !//foo.com/r!i
$reg_ex =
"/(\?[a-zA-Z0-9_]+)\s+([~!=]~)\s+(['|\"])?([^\s'\"]+)(['|\"])?/";
foreach ($eqExprs[0] as $i =>
$eqExpr) {
$parsedFilter['regexEqExprs'][$i]['var'] =
$this->_isDefined($eqExprs[1][$i]);
$parsedFilter['regexEqExprs'][$i]['operator'] =
$eqExprs[2][$i];
$parsedFilter['regexEqExprs'][$i]['regex'] =
$eqExprs[4][$i];
$filterStr =
str_replace($eqExpr, " ##RegEx_$i## ", $filterStr);
// parse ?var [eq | ne] "literal"@lang^^dtype
$reg_ex =
"/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\'[^\']*\'|\"[^\"]*\")";
$reg_ex .=
"(@[a-zA-Z]+)?(\^{2}\S+:?\S+)?/i";
foreach ($eqExprs[0] as $i =>
$eqExpr) {
$parsedFilter['strEqExprs'][$i]['var'] =
$this->_isDefined($eqExprs[1][$i]);#
$parsedFilter['strEqExprs'][$i]['operator'] =
strtolower($eqExprs[2][$i]);
$parsedFilter['strEqExprs'][$i]['value'] =
trim($eqExprs[3][$i],"'\"");
$parsedFilter['strEqExprs'][$i]['value_type'] =
'Literal';
$parsedFilter['strEqExprs'][$i]['value_lang'] =
substr($eqExprs[4][$i], 1);
$dtype =
substr($eqExprs[5][$i], 2);
$parsedFilter['strEqExprs'][$i]['value_dtype'] =
$this->_validateUri($dtype, RDQL_AND_ERR);
$parsedFilter['strEqExprs'][$i]['value_dtype_is_qname'] =
TRUE;
$parsedFilter['strEqExprs'][$i]['value_dtype'] =
'';
$filterStr =
str_replace($eqExprs[0][$i], " ##strEqExpr_$i## ", $filterStr);
// parse ?var [eq | ne] ?var
$ii =
count($parsedFilter['strEqExprs']);
$reg_ex =
"/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+(\?[a-zA-Z0-9_]+)/i";
foreach ($eqExprs[0] as $i =>
$eqExpr) {
$parsedFilter['strEqExprs'][$ii]['var'] =
$this->_isDefined($eqExprs[1][$i]);
$parsedFilter['strEqExprs'][$ii]['operator'] =
strtolower($eqExprs[2][$i]);
$parsedFilter['strEqExprs'][$ii]['value'] =
$this->_isDefined($eqExprs[3][$i]);
$parsedFilter['strEqExprs'][$ii]['value_type'] =
'variable';
$filterStr =
str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
// parse ?var [eq | ne] <URI> or ?var [eq | ne] prefix:local_name
$reg_ex =
"/(\?[a-zA-Z0-9_]+)\s+(eq|ne)\s+((<\S+>)|(\S+:\S*))/i";
foreach ($eqExprs[0] as $i =>
$eqExpr) {
$parsedFilter['strEqExprs'][$ii]['var'] =
$this->_isDefined($eqExprs[1][$i]);
$parsedFilter['strEqExprs'][$ii]['operator'] =
strtolower($eqExprs[2][$i]);
$parsedFilter['strEqExprs'][$ii]['value'] =
trim($eqExprs[4][$i], "<>");
$parsedFilter['strEqExprs'][$ii]['value_type'] =
'URI';
}else if($eqExprs[5][$i]){
$parsedFilter['strEqExprs'][$ii]['value'] =
$eqExprs[5][$i];
$parsedFilter['strEqExprs'][$ii]['value_type'] =
'QName';
$filterStr =
str_replace($eqExprs[0][$i], " ##strEqExpr_$ii## ", $filterStr);
$parsedFilter['evalFilterStr'] =
$filterStr;
// all that is left are numerical expressions and place holders for the above expressions
foreach ($vars[0] as $var) {
$parsedFilter['numExprVars'][] =
$this->_isDefined($var);
* Find all query variables used in the WHERE clause.
* @return array [] = ?VARNAME
foreach ($pattern as $v) {
if ($v['value'] &&
$v['value']{0} ==
'?') {
trigger_error(RDQL_WHR_ERR .
'pattern contains no variables', E_USER_ERROR);
* Replace all namespace prefixes in the pattern and constraint clause of an rdql query
* with the namespaces declared in the USING clause and default namespaces.
global $default_prefixes;
// add default namespaces
// if in an rdql query a reserved prefix (e.g. rdf: rdfs:) is used
// it will be overridden by the default namespace defined in constants.php
// replace namespace prefixes in the FROM clause
foreach ($this->parsedQuery['sources'] as $n =>
$source) {
if (isset
($source['is_qname']))
$source['value'] =
eregi_replace("$prefix:", $uri, $source['value']);
// replace namespace prefixes in the where clause
foreach ($this->parsedQuery['patterns'] as $n =>
$pattern) {
foreach ($pattern as $key =>
$v)
if ($v['value'] &&
$v['value']{0} !=
'?') {
if (isset
($v['is_qname'])) {
unset
($this->parsedQuery['patterns'][$n][$key]['is_qname']);
} else { // is quoted URI (== <URI>) or Literal
if (isset
($this->parsedQuery['patterns'][$n][$key]['is_literal'])) {
if (isset
($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname'])) {
unset
($this->parsedQuery['patterns'][$n][$key]['l_dtype_is_qname']);
// replace prefixes in the constraint clause
foreach ($this->parsedQuery['filters'] as $n =>
$filter)
foreach ($filter['strEqExprs'] as $i =>
$expr) {
if ($expr['value_type'] ==
'QName') {
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_type'] =
'URI';
if ($expr['value_type'] ==
'URI')
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value']);
elseif ($expr['value_type'] ==
'Literal') {
if (isset
($expr['value_dtype_is_qname'])) {
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
unset
($this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype_is_qname']);
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']
$this->parsedQuery['filters'][$n]['strEqExprs'][$i]['value_dtype']);
// =============================================================================
// *************************** helper functions ********************************
// =============================================================================
* Remove whitespace-tokens from the array $this->tokens
* Check if the query string of the given clause contains an undesired ','.
* If a comma was correctly placed then remove it and clear all whitespaces.
* @param string $commaExpected
* @param string $clause_error
trigger_error($clause_error .
"',' - unexpected comma", E_USER_ERROR);
* Check if the given token is either a variable (?var) or the first token of an URI (<URI>).
* In case of an URI this function returns the whole URI string.
* @return array ['value'] = string
$token_res['value'] =
$this->_validateVar($token, RDQL_WHR_ERR);
$token_res['value'] =
$this->_validateUri($token, RDQL_WHR_ERR);
$token_res['is_qname'] =
TRUE;
* Check if the given token is either a variable (?var) or the first token
* of either an URI (<URI>) or a literal ("Literal").
* In case of a literal return an array with literal properties (value, language, datatype).
* In case of a variable or an URI return only ['value'] = string.
* @return array ['value'] = string
* ['is_literal'] = boolean
$statement_object['value'] =
$this->_validateVar($token, RDQL_WHR_ERR);
elseif ($token{0} ==
"'" ||
$token{0} ==
'"')
elseif ($token{0} ==
'<')
$statement_object['value'] =
$this->_validateUri($token, RDQL_WHR_ERR);
elseif (ereg(':', $token)) {
$statement_object['value'] =
$this->_validateUri($token, RDQL_WHR_ERR);
$statement_object['is_qname'] =
TRUE;
trigger_error(RDQL_WHR_ERR .
" '$token' - ?Variable, <URI>, QName, or \"LITERAL\" expected", E_USER_ERROR);
return $statement_object;
* Check if the given token is a valid variable name (?var).
* @param string $clause_error
if (!isset
($match[0]) ||
$match[0] !=
$token)
.
"' - variable name contains illegal characters", E_USER_ERROR);
* Check if $token is the first token of a valid URI (<URI>) and return the whole URI string
* @param string $clause_error
return rtrim($token, ':');
if ($clause_error ==
RDQL_WHR_ERR)
$errmsg .=
"- ?Variable or <URI> or QName expected";
$errmsg .=
"- <URI> or QName expected";
while($token{strlen($token)-
1} !=
'>' &&
$token !=
NULL) {
if ($token ==
'(' ||
$token ==
')' ||
$token ==
',' ||
$token ==
' ' ||
$token ==
"\n" ||
$token ==
"\r") {
.
"' - illegal input: '$token' - '>' missing", E_USER_ERROR);
return trim($token_res, '<>');
* Check if $token is the first token of a valid literal ("LITERAL") and
* return an array with literal properties (value, language, datatype).
* @return array ['value'] = string
* ['is_literal'] = boolean
* ['l_dtype_is_qname'] = boolean
$quotation_mark =
$token{0};
$statement_object =
array ('value' =>
'',
foreach ($this->tokens as $k =>
$token) {
if ($token !=
NULL &&
$token{strlen($token)-
1} ==
$quotation_mark) {
$token =
rtrim($token, $quotation_mark);
// parse @language(^^datatype)?
}elseif (strpos($token, $quotation_mark .
'@') ||
substr($token, 0, 2) ==
$quotation_mark .
'@') {
$lang =
substr($token, strpos($token, $quotation_mark .
'@')+
2);
trigger_error(RDQL_WHR_ERR .
$quotation_mark .
$statement_object['value']
.
$token .
" - datatype expected" ,E_USER_ERROR);
$statement_object['l_dtype'] =
$this->_validateUri($dtype, RDQL_WHR_ERR);
$statement_object['l_dtype_is_qname'] =
TRUE;
trigger_error(RDQL_WHR_ERR .
$quotation_mark .
$statement_object['value']
.
$token .
" - language expected" ,E_USER_ERROR);
$statement_object['l_lang'] =
$lang;
$token =
substr($token, 0, strpos($token, $quotation_mark .
'@'));
}elseif (strpos($token, $quotation_mark .
'^^') ||
substr($token, 0, 3) ==
$quotation_mark .
'^^') {
$dtype =
substr($token, strpos($token, $quotation_mark .
'^^')+
3);
trigger_error(RDQL_WHR_ERR .
$quotation_mark .
$statement_object['value']
.
$token .
" - datatype expected" ,E_USER_ERROR);
$statement_object['l_dtype'] =
$this->_validateUri($dtype, RDQL_WHR_ERR);
$statement_object['l_dtype_is_qname'] =
TRUE;
$token =
substr($token, 0, strpos($token, $quotation_mark .
'^^'));
}elseif (strpos($token, $quotation_mark))
trigger_error(RDQL_WHR_ERR .
"'$token' - illegal input", E_USER_ERROR);
$statement_object['value'] .=
$token;
return $statement_object;
trigger_error(RDQL_WHR_ERR .
"quotation end mark: $quotation_mark missing", E_USER_ERROR);
* Check if the given token is a valid QName.
* @param string $clause_error
trigger_error($clause_error .
"illegal QName: '$token'", E_USER_ERROR);
trigger_error($clause_error .
"illegal prefix in QName: '$token'", E_USER_ERROR);
trigger_error($clause_error .
"illegal local part in QName: '$token'", E_USER_ERROR);
* Check if the given token is a valid NCName.
preg_match("/[a-zA-Z_]+[a-zA-Z_0-9.\-]*/", $token, $match);
if (isset
($match[0]) &&
$match[0] ==
$token)
* Check if the given token is a valid namespace prefix.
.
"' - illegal input, namespace prefix expected", E_USER_ERROR);
* Replace a prefix in a given QName and return a full URI.
* @param string $clasue_error
$qName_parts =
explode(':', $qName);
trigger_error($clause_error .
'undefined prefix: \'' .
$qName_parts[0] .
'\' in: \'' .
$qName .
'\'', E_USER_ERROR);
return $this->parsedQuery['ns'][$qName_parts[0]] .
$qName_parts[1];
* Check if all variables from the SELECT clause are defined in the WHERE clause
* Check if the given variable is defined in the WHERE clause.
trigger_error(RDQL_SYN_ERR .
": '$var' - variable must be defined in the WHERE clause", E_USER_ERROR);
* Throw an error if the regular expression from the AND clause is not quoted.
* @param string $filterString
* @param string $lQuotMark
* @param string $rQuotMark
trigger_error(RDQL_AND_ERR .
"'$filterString' - regular expressions must be quoted", E_USER_ERROR);
if ($lQuotMark !=
$rQuotMark)
trigger_error(RDQL_AND_ERR .
"'$filterString' - quotation end mark in the regular expression missing", E_USER_ERROR);
} // end: Class RdqlParser
Documentation generated on Mon, 26 Jun 2006 14:26:03 +0200 by phpDocumentor 1.3.0RC6