зеркало из https://github.com/mozilla/gecko-dev.git
509 строки
14 KiB
C++
509 строки
14 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
|
* The contents of this file are subject to the Mozilla Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is TransforMiiX XSLT processor.
|
|
*
|
|
* The Initial Developer of the Original Code is The MITRE Corporation.
|
|
* Portions created by MITRE are Copyright (C) 1999 The MITRE Corporation.
|
|
*
|
|
* Portions created by Keith Visco as a Non MITRE employee,
|
|
* (C) 1999 Keith Visco. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Keith Visco, kvisco@ziplink.net
|
|
* -- original author.
|
|
* -- fixed bug with '<=' and '>=' reported by Bob Miller
|
|
*
|
|
* Bob Miller, Oblix Inc., kbob@oblix.com
|
|
* -- fixed bug with single quotes inside double quotes
|
|
*
|
|
* Marina Mechtcheriakova, mmarina@mindspring.com
|
|
* -- Fixed bug in parse method so that we make sure we check for
|
|
* axis identifier wild cards, such as ancestor::*
|
|
*
|
|
* Axel Hecht <axel@pike.org>
|
|
* -- big beating, general overhaul
|
|
*
|
|
*/
|
|
|
|
/**
|
|
* Lexical analyzer for XPath expressions
|
|
**/
|
|
|
|
#include "ExprLexer.h"
|
|
#include "XMLUtils.h"
|
|
|
|
//---------------------------/
|
|
//- Implementation of Token -/
|
|
//---------------------------/
|
|
|
|
|
|
/**
|
|
* Default constructor for Token
|
|
**/
|
|
Token::Token()
|
|
{
|
|
this->type =0;
|
|
} //-- Token;
|
|
|
|
/**
|
|
* Constructor for Token
|
|
* @param type, the type of Token being represented
|
|
**/
|
|
Token::Token(short type)
|
|
{
|
|
this->type = type;
|
|
} //-- Token;
|
|
|
|
/**
|
|
* Constructor for Token
|
|
* @param value the value of this Token
|
|
* @param type, the type of Token being represented
|
|
**/
|
|
Token::Token(const String& value, short type)
|
|
{
|
|
this->type = type;
|
|
//-- make copy of value String
|
|
this->value = value;
|
|
} //-- Token
|
|
|
|
Token::Token(PRUnichar uniChar, short type)
|
|
{
|
|
this->type = type;
|
|
this->value.Append(uniChar);
|
|
} //-- Token
|
|
|
|
/**
|
|
* Copy Constructor
|
|
**/
|
|
Token::Token(const Token& token)
|
|
{
|
|
this->type = token.type;
|
|
this->value = token.value;
|
|
} //-- Token
|
|
|
|
/**
|
|
* Destructor for Token
|
|
**/
|
|
Token::~Token()
|
|
{
|
|
//-- currently nothing is needed
|
|
} //-- ~Token
|
|
|
|
|
|
//--------------------------------/
|
|
//- Implementation of ExprLexer -/
|
|
//-------------------------------/
|
|
|
|
/*
|
|
* Complex Tokens
|
|
*/
|
|
//-- Nodetype tokens
|
|
const String ExprLexer::COMMENT(NS_LITERAL_STRING("comment"));
|
|
const String ExprLexer::NODE(NS_LITERAL_STRING("node"));
|
|
const String ExprLexer::PROC_INST(NS_LITERAL_STRING("processing-instruction"));
|
|
const String ExprLexer::TEXT(NS_LITERAL_STRING("text"));
|
|
|
|
//-- boolean
|
|
const String ExprLexer::AND(NS_LITERAL_STRING("and"));
|
|
const String ExprLexer::OR(NS_LITERAL_STRING("or"));
|
|
|
|
//-- multiplicative operators
|
|
const String ExprLexer::MODULUS(NS_LITERAL_STRING("mod"));
|
|
const String ExprLexer::DIVIDE(NS_LITERAL_STRING("div"));
|
|
|
|
/**
|
|
* The set of Lexer error messages
|
|
**/
|
|
const String ExprLexer::error_message[] =
|
|
{
|
|
String(NS_LITERAL_STRING("VariableReference expected")),
|
|
String(NS_LITERAL_STRING("Operator expected")),
|
|
String(NS_LITERAL_STRING("Literal is not closed")),
|
|
String(NS_LITERAL_STRING(": not expected")),
|
|
String(NS_LITERAL_STRING("! not expected, use != or not()")),
|
|
String(NS_LITERAL_STRING("found a unkown character"))
|
|
};
|
|
|
|
//---------------/
|
|
//- Contructors -/
|
|
//---------------/
|
|
|
|
/**
|
|
* Creates a new ExprLexer using the given String
|
|
**/
|
|
ExprLexer::ExprLexer(const String& pattern)
|
|
{
|
|
firstItem = 0;
|
|
lastItem = 0;
|
|
tokenCount = 0;
|
|
prevToken = 0;
|
|
endToken.type = Token::END;
|
|
parse(pattern);
|
|
currentItem = firstItem;
|
|
} //-- ExprLexer
|
|
|
|
/**
|
|
* Destroys this instance of an ExprLexer
|
|
**/
|
|
ExprLexer::~ExprLexer()
|
|
{
|
|
//-- delete tokens
|
|
currentItem = firstItem;
|
|
while (currentItem) {
|
|
TokenListItem* temp = currentItem->next;
|
|
delete currentItem->token;
|
|
delete currentItem;
|
|
currentItem = temp;
|
|
}
|
|
} //-- ~ExprLexer
|
|
|
|
|
|
MBool ExprLexer::hasMoreTokens()
|
|
{
|
|
return (currentItem != 0);
|
|
} //-- hasMoreTokens
|
|
|
|
Token* ExprLexer::nextToken()
|
|
{
|
|
if (currentItem) {
|
|
Token* token = currentItem->token;
|
|
currentItem = currentItem->next;
|
|
return token;
|
|
}
|
|
return &endToken;
|
|
} //-- nextToken
|
|
|
|
void ExprLexer::pushBack()
|
|
{
|
|
if (!currentItem)
|
|
currentItem = lastItem;
|
|
else
|
|
currentItem = currentItem->previous;
|
|
} //-- pushBack
|
|
|
|
Token* ExprLexer::peek()
|
|
{
|
|
if (currentItem)
|
|
return currentItem->token;
|
|
return &endToken;
|
|
} //-- peek
|
|
|
|
void ExprLexer::addToken(Token* token)
|
|
{
|
|
TokenListItem* tlItem = new TokenListItem;
|
|
tlItem->token = token;
|
|
tlItem->next = 0;
|
|
if (lastItem) {
|
|
tlItem->previous = lastItem;
|
|
lastItem->next = tlItem;
|
|
}
|
|
if (!firstItem)
|
|
firstItem = tlItem;
|
|
lastItem = tlItem;
|
|
prevToken = token;
|
|
++tokenCount;
|
|
} //-- addToken
|
|
|
|
/**
|
|
* Returns true if the following Token should be an operator.
|
|
* This is a helper for the first bullet of [XPath 3.7]
|
|
* Lexical Structure
|
|
**/
|
|
MBool ExprLexer::nextIsOperatorToken(Token* token)
|
|
{
|
|
if (!token || token->type == Token::NULL_TOKEN)
|
|
return MB_FALSE;
|
|
/* This relies on the tokens having the right order in ExprLexer.h */
|
|
if (token->type >= Token::COMMA &&
|
|
token->type <= Token::UNION_OP)
|
|
return MB_FALSE;
|
|
return MB_TRUE;
|
|
} //-- nextIsOperatorToken
|
|
|
|
/**
|
|
* Parses the given String into the set of Tokens
|
|
**/
|
|
void ExprLexer::parse(const String& pattern)
|
|
{
|
|
if (pattern.IsEmpty())
|
|
return;
|
|
|
|
String tokenBuffer;
|
|
PRUint32 iter = 0, start;
|
|
PRUint32 size = pattern.Length();
|
|
short defType;
|
|
PRUnichar ch;
|
|
|
|
//-- initialize previous token, this will automatically get
|
|
//-- deleted when it goes out of scope
|
|
Token nullToken('\0', Token::NULL_TOKEN);
|
|
|
|
prevToken = &nullToken;
|
|
|
|
while (iter < size) {
|
|
|
|
ch = pattern.CharAt(iter);
|
|
defType = Token::CNAME;
|
|
|
|
if (ch==DOLLAR_SIGN) {
|
|
if (++iter == size || !XMLUtils::isLetter(ch=pattern.CharAt(iter))) {
|
|
// Error, VariableReference expected
|
|
errorPos = iter;
|
|
errorCode = ERROR_UNRESOLVED_VAR_REFERENCE;
|
|
if (firstItem)
|
|
firstItem->token->type=Token::ERROR;
|
|
else
|
|
addToken(new Token('\0',Token::ERROR));
|
|
iter=size; // bail
|
|
}
|
|
else
|
|
defType = Token::VAR_REFERENCE;
|
|
}
|
|
// just reuse the QName parsing, which will use defType
|
|
// the token to construct
|
|
|
|
if (XMLUtils::isLetter(ch)) {
|
|
// NCName, can get QName or OperatorName;
|
|
// FunctionName, NodeName, and AxisSpecifier may want whitespace,
|
|
// and are dealt with below
|
|
start = iter;
|
|
while (++iter < size &&
|
|
XMLUtils::isNCNameChar(pattern.CharAt(iter))) /* just go */ ;
|
|
if (iter < size && pattern.CharAt(iter)==COLON) {
|
|
// try QName or wildcard, might need to step back for axis
|
|
if (++iter < size)
|
|
if (XMLUtils::isLetter(pattern.CharAt(iter)))
|
|
while (++iter < size &&
|
|
XMLUtils::isNCNameChar(pattern.CharAt(iter))) /* just go */ ;
|
|
else if (pattern.CharAt(iter)=='*'
|
|
&& defType != Token::VAR_REFERENCE)
|
|
++iter; /* eat wildcard for NameTest, bail for var ref at COLON */
|
|
else
|
|
iter--; // step back
|
|
}
|
|
if (nextIsOperatorToken(prevToken)) {
|
|
if (pattern.subString(start,iter,subStr).Equals(AND))
|
|
defType = Token::AND_OP;
|
|
else if (pattern.subString(start,iter,subStr).Equals(OR))
|
|
defType = Token::OR_OP;
|
|
else if (pattern.subString(start,iter,subStr).Equals(MODULUS))
|
|
defType = Token::MODULUS_OP;
|
|
else if (pattern.subString(start,iter,subStr).Equals(DIVIDE))
|
|
defType = Token::DIVIDE_OP;
|
|
else {
|
|
// Error "operator expected"
|
|
// XXX QUESTION: spec is not too precise
|
|
// badops is sure an error, but is bad:ops, too? We say yes!
|
|
errorPos = iter;
|
|
errorCode = ERROR_OP_EXPECTED;
|
|
if (firstItem)
|
|
firstItem->token->type=Token::ERROR;
|
|
else
|
|
addToken(new Token('\0',Token::ERROR));
|
|
iter=size; // bail
|
|
}
|
|
}
|
|
addToken(new Token(pattern.subString(start,iter,subStr),defType));
|
|
}
|
|
else if (isXPathDigit(ch)) {
|
|
start = iter;
|
|
while (++iter < size &&
|
|
isXPathDigit(pattern.CharAt(iter))) /* just go */;
|
|
if (iter < size && pattern.CharAt(iter) == '.')
|
|
while (++iter < size &&
|
|
isXPathDigit(pattern.CharAt(iter))) /* just go */;
|
|
addToken(new Token(pattern.subString(start,iter,subStr),Token::NUMBER));
|
|
}
|
|
else {
|
|
switch (ch) {
|
|
//-- ignore whitespace
|
|
case SPACE:
|
|
case TX_TAB:
|
|
case TX_CR:
|
|
case TX_LF:
|
|
++iter;
|
|
break;
|
|
case S_QUOTE :
|
|
case D_QUOTE :
|
|
start=iter;
|
|
iter = pattern.indexOf(ch, (PRInt32)start + 1);
|
|
if ((PRInt32)iter == kNotFound) {
|
|
// XXX Error reporting "unclosed literal"
|
|
errorPos = start;
|
|
errorCode = ERROR_UNCLOSED_LITERAL;
|
|
if (firstItem)
|
|
firstItem->token->type=Token::ERROR;
|
|
else
|
|
addToken(new Token('\0',Token::ERROR));
|
|
iter=size; // bail
|
|
}
|
|
else {
|
|
addToken(new Token(pattern.subString(start+1,iter,subStr),
|
|
Token::LITERAL));
|
|
++iter;
|
|
}
|
|
break;
|
|
case PERIOD:
|
|
// period can be .., .(DIGITS)+ or ., check next
|
|
if (++iter < size) {
|
|
ch=pattern.CharAt(iter);
|
|
if (isXPathDigit(ch)) {
|
|
start=iter-1;
|
|
while (++iter < size &&
|
|
isXPathDigit(pattern.CharAt(iter))) /* just go */;
|
|
addToken(new Token(pattern.subString(start,iter,subStr),
|
|
Token::NUMBER));
|
|
}
|
|
else if (ch==PERIOD) {
|
|
addToken(new Token(pattern.subString(iter-1,iter++,subStr),
|
|
Token::PARENT_NODE));
|
|
}
|
|
else
|
|
addToken(new Token(PERIOD, Token::SELF_NODE));
|
|
}
|
|
else
|
|
addToken(new Token(ch, Token::SELF_NODE));
|
|
// iter++ is already in the number test
|
|
|
|
break;
|
|
case COLON: // QNames are dealt above, must be axis ident
|
|
if (++iter < size && pattern.CharAt(iter)==COLON &&
|
|
prevToken->type == Token::CNAME) {
|
|
prevToken->type = Token::AXIS_IDENTIFIER;
|
|
++iter;
|
|
}
|
|
else {
|
|
// XXX Error report "colon is neither QName nor axis"
|
|
errorPos = iter;
|
|
errorCode = ERROR_COLON;
|
|
if (firstItem)
|
|
firstItem->token->type=Token::ERROR;
|
|
else
|
|
addToken(new Token('\0',Token::ERROR));
|
|
iter=size; // bail
|
|
}
|
|
break;
|
|
case FORWARD_SLASH :
|
|
if (++iter < size && pattern.CharAt(iter)==ch) {
|
|
addToken(new Token(pattern.subString(iter-1,++iter,subStr),
|
|
Token::ANCESTOR_OP));
|
|
}
|
|
else {
|
|
addToken(new Token(ch, Token::PARENT_OP));
|
|
}
|
|
break;
|
|
case BANG : // can only be !=
|
|
if (++iter < size && pattern.CharAt(iter)==EQUAL) {
|
|
addToken(new Token(pattern.subString(iter-1,++iter,subStr),
|
|
Token::NOT_EQUAL_OP));
|
|
}
|
|
else {
|
|
// Error ! is not not()
|
|
errorPos = iter;
|
|
errorCode = ERROR_BANG;
|
|
if (firstItem)
|
|
firstItem->token->type=Token::ERROR;
|
|
else
|
|
addToken(new Token('\0',Token::ERROR));
|
|
iter=size; // bail
|
|
}
|
|
break;
|
|
case EQUAL:
|
|
addToken(new Token(ch,Token::EQUAL_OP));
|
|
++iter;
|
|
break;
|
|
case L_ANGLE:
|
|
if (++iter < size && pattern.CharAt(iter)==EQUAL) {
|
|
addToken(new Token(pattern.subString(iter-1,++iter,subStr),
|
|
Token::LESS_OR_EQUAL_OP));
|
|
}
|
|
else
|
|
addToken(new Token(ch,Token::LESS_THAN_OP));
|
|
break;
|
|
case R_ANGLE:
|
|
if (++iter < size && pattern.CharAt(iter)==EQUAL) {
|
|
addToken(new Token(pattern.subString(iter-1,++iter,subStr),
|
|
Token::GREATER_OR_EQUAL_OP));
|
|
}
|
|
else
|
|
addToken(new Token(ch,Token::GREATER_THAN_OP));
|
|
break;
|
|
case HYPHEN :
|
|
addToken(new Token(ch,Token::SUBTRACTION_OP));
|
|
++iter;
|
|
break;
|
|
case ASTERIX:
|
|
if (nextIsOperatorToken(prevToken))
|
|
addToken(new Token(ch,Token::MULTIPLY_OP));
|
|
else
|
|
addToken(new Token(ch,Token::CNAME));
|
|
++iter;
|
|
break;
|
|
case L_PAREN:
|
|
if (prevToken->type == Token::CNAME) {
|
|
if (prevToken->value.Equals(COMMENT))
|
|
prevToken->type = Token::COMMENT;
|
|
else if (prevToken->value.Equals(NODE))
|
|
prevToken->type = Token::NODE;
|
|
else if (prevToken->value.Equals(PROC_INST))
|
|
prevToken->type = Token::PROC_INST;
|
|
else if (prevToken->value.Equals(TEXT))
|
|
prevToken->type = Token::TEXT;
|
|
else
|
|
prevToken->type = Token::FUNCTION_NAME;
|
|
}
|
|
++iter;
|
|
addToken(new Token(ch,Token::L_PAREN));
|
|
break;
|
|
case R_PAREN:
|
|
++iter;
|
|
addToken(new Token(ch,Token::R_PAREN));
|
|
break;
|
|
case L_BRACKET:
|
|
++iter;
|
|
addToken(new Token(ch,Token::L_BRACKET));
|
|
break;
|
|
case R_BRACKET:
|
|
++iter;
|
|
addToken(new Token(ch,Token::R_BRACKET));
|
|
break;
|
|
case COMMA:
|
|
++iter;
|
|
addToken(new Token(ch,Token::COMMA));
|
|
break;
|
|
case AT_SIGN :
|
|
++iter;
|
|
addToken(new Token(ch,Token::AT_SIGN));
|
|
break;
|
|
case PLUS:
|
|
++iter;
|
|
addToken(new Token(ch,Token::ADDITION_OP));
|
|
break;
|
|
case VERT_BAR:
|
|
++iter;
|
|
addToken(new Token(ch,Token::UNION_OP));
|
|
break;
|
|
default:
|
|
// Error, don't grok character :-(
|
|
errorPos = iter;
|
|
errorCode = ERROR_UNKNOWN_CHAR;
|
|
if (firstItem)
|
|
firstItem->token->type=Token::ERROR;
|
|
else
|
|
addToken(new Token('\0',Token::ERROR));
|
|
iter=size; // bail
|
|
}
|
|
}
|
|
}
|
|
} //-- parse
|
|
|