Added support for CDATA sections.

This commit is contained in:
vidur%netscape.com 1998-11-12 23:54:37 +00:00
Родитель 40cc4672bf
Коммит 422c941109
12 изменённых файлов: 400 добавлений и 14 удалений

Просмотреть файл

@ -236,7 +236,7 @@ void CTokenRecycler::RecycleToken(CToken* aToken) {
/**
*
* @update gess8/4/98
* @update vidur 11/12/98
* @param
* @return
*/
@ -262,6 +262,7 @@ CToken* CTokenRecycler::CreateTokenOfType(eHTMLTokenTypes aType,eHTMLTags aTag,
case eToken_style: result=new CStyleToken(); break;
case eToken_skippedcontent: result=new CSkippedContentToken(aString); break;
case eToken_instruction:result=new CInstructionToken(); break;
case eToken_cdatasection:result=new CCDATASectionToken(); break;
default:
break;
}

Просмотреть файл

@ -456,6 +456,110 @@ nsresult CTextToken::Consume(PRUnichar aChar, CScanner& aScanner) {
return result;
}
/*
* default constructor
*
* @update vidur 11/12/98
* @param aName -- string to init token name with
* @return
*/
CCDATASectionToken::CCDATASectionToken() : CHTMLToken(eHTMLTag_unknown) {
}
/*
* string based constructor
*
* @update vidur 11/12/98
* @param aName -- string to init token name with
* @return
*/
CCDATASectionToken::CCDATASectionToken(const nsString& aName) : CHTMLToken(aName) {
mTypeID=eHTMLTag_unknown;
}
/*
*
*
* @update vidur 11/12/98
* @param
* @return
*/
const char* CCDATASectionToken::GetClassName(void) {
return "cdatasection";
}
/*
*
*
* @update vidur 11/12/98
* @param
* @return
*/
PRInt32 CCDATASectionToken::GetTokenType(void) {
return eToken_cdatasection;
}
/*
* Consume as much marked test from scanner as possible.
*
* @update vidur 11/12/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CCDATASectionToken::Consume(PRUnichar aChar, CScanner& aScanner) {
static nsAutoString terminals("]\r");
nsresult result=NS_OK;
PRBool done=PR_FALSE;
while((NS_OK==result) && (!done)) {
result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE,PR_FALSE);
if(NS_OK==result) {
result=aScanner.Peek(aChar);
if(kCR==aChar) {
result=aScanner.GetChar(aChar); //strip off the \r
result=aScanner.Peek(aChar); //then see what's next.
switch(aChar) {
case kCR:
result=aScanner.GetChar(aChar); //strip off the \r
mTextValue.Append("\n\n");
break;
case kNewLine:
//which means we saw \r\n, which becomes \n
result=aScanner.GetChar(aChar); //strip off the \n
//now fall through on purpose...
default:
mTextValue.Append("\n");
break;
}
}
else if (kRightSquareBracket==aChar) {
result=aScanner.GetChar(aChar); //strip off the ]
result=aScanner.Peek(aChar); //then see what's next.
if (kRightSquareBracket==aChar) {
result=aScanner.GetChar(aChar); //strip off the second ]
result=aScanner.Peek(aChar); //then see what's next.
if (kGreaterThan==aChar) {
result=aScanner.GetChar(aChar); //strip off the >
done=PR_TRUE;
}
else {
// This isn't the end of the CDATA section so go on
mTextValue.Append("]");
}
}
else {
// This isn't the end of the CDATA section so go on
mTextValue.Append("]");
}
}
else done=PR_TRUE;
}
}
return result;
}
/*
* default constructor
*

Просмотреть файл

@ -54,6 +54,7 @@ enum eHTMLTokenTypes {
eToken_start=1, eToken_end, eToken_comment, eToken_entity,
eToken_whitespace, eToken_newline, eToken_text, eToken_attribute,
eToken_script, eToken_style, eToken_skippedcontent, eToken_instruction,
eToken_cdatasection,
eToken_last //make sure this stays the last token...
};
@ -79,7 +80,6 @@ public:
protected:
};
/**
* This declares start tokens, which always take the form <xxxx>.
* This class also knows how to consume related attributes.
@ -207,6 +207,23 @@ class CTextToken: public CHTMLToken {
};
/**
* CDATASection tokens contain raw unescaped text content delimited by
* a ![CDATA[ and ]].
* XXX Not really a HTML construct - maybe we need a separation
*
* @update vidur 11/12/98
*/
class CCDATASectionToken : public CHTMLToken {
public:
CCDATASectionToken();
CCDATASectionToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
};
/**
* Attribute tokens are used to contain attribute key/value
* pairs whereever they may occur. Typically, they should

Просмотреть файл

@ -73,7 +73,8 @@ const PRUint32 kRightParen = ')';
const PRUint32 kLeftBrace = '{';
const PRUint32 kRightBrace = '}';
const PRUint32 kQuestionMark = '?';
const PRUint32 kLeftSquareBracket = '[';
const PRUint32 kRightSquareBracket = ']';
#endif

Просмотреть файл

@ -473,6 +473,74 @@ NS_IMETHODIMP CWellFormedDTD::ConsumeComment(PRUnichar aChar,CScanner& aScanner,
return result;
}
/*
* Consume characters as long as they match the string passed in.
* If they don't match, put them all back.
* XXX The scanner should be able to do this.
*
* @update vidur 11/12/98
*/
static nsresult
ConsumeConditional(CScanner& aScanner,
const nsString& aMatchString,
PRBool& aMatch)
{
nsresult result=NS_OK;
PRUnichar matchChar;
PRInt32 i, count = aMatchString.Length();
for (i=0; i < count; i++) {
result = aScanner.GetChar(matchChar);
if ((NS_OK != result) || (aMatchString.CharAt(i) != matchChar)) {
break;
}
}
if (NS_OK == result) {
if (i != count) {
for (; i >= 0; i--) {
aScanner.PutBack(aMatchString.CharAt(i));
}
aMatch = PR_FALSE;
}
else {
aMatch = PR_TRUE;
}
}
return result;
}
/**
* This method is called when we see a "<!" sequence. The result
* could be a comment or a CDATASection.
* XXX "Escaped Content" is not the right term, but I couldn't think
* of a good one.
*
* @update vidur 11/12/98
* @param aChar: last char read
* @param aScanner: see nsScanner.h
* @param aToken is the newly created token (comment or cdatasection)
* @return error code
*/
NS_IMETHODIMP CWellFormedDTD::ConsumeEscapedContent(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
nsresult result=NS_OK;
nsAutoString CDATAString("[CDATA[");
PRBool isCDATA = PR_FALSE;
result = ConsumeConditional(aScanner, CDATAString, isCDATA);
if (NS_OK == result) {
if (isCDATA) {
aToken=gTokenRecycler.CreateTokenOfType(eToken_cdatasection,eHTMLTag_unknown,gEmpty);
}
else {
aToken=gTokenRecycler.CreateTokenOfType(eToken_comment,eHTMLTag_comment,gEmpty);
}
}
return result;
}
/**
* This method is called just after a newline has been consumed.
*
@ -496,7 +564,7 @@ NS_IMETHODIMP CWellFormedDTD::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,
* and we know we're at the start of some kind of tagged
* element. We don't know yet if it's a tag or a comment.
*
* @update gess 5/12/98
* @update vidur 11/12/98
* @param aChar is the last char read
* @param aScanner is represents our input source
* @param aToken is the out arg holding our new token
@ -520,7 +588,7 @@ NS_IMETHODIMP CWellFormedDTD::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CTok
break;
case kExclamation:
aToken=gTokenRecycler.CreateTokenOfType(eToken_comment,eHTMLTag_comment,gEmpty);
result = ConsumeEscapedContent(aChar, aScanner, aToken);
break;
case kQuestionMark: //it must be an XML processing instruction...
@ -599,7 +667,7 @@ NS_IMETHODIMP CWellFormedDTD::ConsumeToken(CToken*& aToken){
case kCR: case kLF:
result=ConsumeNewline(theChar,*theScanner,aToken);
break;
case kNotFound:
break;
@ -699,7 +767,7 @@ PRBool CWellFormedDTD::IsContainer(PRInt32 aTag) const{
/**
*
* @update gess 3/25/98
* @update vidur 11/12/98
* @param aToken -- token object to be put into content model
* @return 0 if all is well; non-zero is an error
*/
@ -716,6 +784,7 @@ NS_IMETHODIMP CWellFormedDTD::HandleToken(CToken* aToken) {
case eToken_entity:
case eToken_whitespace:
case eToken_text:
case eToken_cdatasection:
result=mSink->AddLeaf(theNode);
break;

Просмотреть файл

@ -225,6 +225,7 @@ protected:
NS_IMETHODIMP ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeEscapedContent(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartToken* aToken);

Просмотреть файл

@ -236,7 +236,7 @@ void CTokenRecycler::RecycleToken(CToken* aToken) {
/**
*
* @update gess8/4/98
* @update vidur 11/12/98
* @param
* @return
*/
@ -262,6 +262,7 @@ CToken* CTokenRecycler::CreateTokenOfType(eHTMLTokenTypes aType,eHTMLTags aTag,
case eToken_style: result=new CStyleToken(); break;
case eToken_skippedcontent: result=new CSkippedContentToken(aString); break;
case eToken_instruction:result=new CInstructionToken(); break;
case eToken_cdatasection:result=new CCDATASectionToken(); break;
default:
break;
}

Просмотреть файл

@ -456,6 +456,110 @@ nsresult CTextToken::Consume(PRUnichar aChar, CScanner& aScanner) {
return result;
}
/*
* default constructor
*
* @update vidur 11/12/98
* @param aName -- string to init token name with
* @return
*/
CCDATASectionToken::CCDATASectionToken() : CHTMLToken(eHTMLTag_unknown) {
}
/*
* string based constructor
*
* @update vidur 11/12/98
* @param aName -- string to init token name with
* @return
*/
CCDATASectionToken::CCDATASectionToken(const nsString& aName) : CHTMLToken(aName) {
mTypeID=eHTMLTag_unknown;
}
/*
*
*
* @update vidur 11/12/98
* @param
* @return
*/
const char* CCDATASectionToken::GetClassName(void) {
return "cdatasection";
}
/*
*
*
* @update vidur 11/12/98
* @param
* @return
*/
PRInt32 CCDATASectionToken::GetTokenType(void) {
return eToken_cdatasection;
}
/*
* Consume as much marked test from scanner as possible.
*
* @update vidur 11/12/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CCDATASectionToken::Consume(PRUnichar aChar, CScanner& aScanner) {
static nsAutoString terminals("]\r");
nsresult result=NS_OK;
PRBool done=PR_FALSE;
while((NS_OK==result) && (!done)) {
result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE,PR_FALSE);
if(NS_OK==result) {
result=aScanner.Peek(aChar);
if(kCR==aChar) {
result=aScanner.GetChar(aChar); //strip off the \r
result=aScanner.Peek(aChar); //then see what's next.
switch(aChar) {
case kCR:
result=aScanner.GetChar(aChar); //strip off the \r
mTextValue.Append("\n\n");
break;
case kNewLine:
//which means we saw \r\n, which becomes \n
result=aScanner.GetChar(aChar); //strip off the \n
//now fall through on purpose...
default:
mTextValue.Append("\n");
break;
}
}
else if (kRightSquareBracket==aChar) {
result=aScanner.GetChar(aChar); //strip off the ]
result=aScanner.Peek(aChar); //then see what's next.
if (kRightSquareBracket==aChar) {
result=aScanner.GetChar(aChar); //strip off the second ]
result=aScanner.Peek(aChar); //then see what's next.
if (kGreaterThan==aChar) {
result=aScanner.GetChar(aChar); //strip off the >
done=PR_TRUE;
}
else {
// This isn't the end of the CDATA section so go on
mTextValue.Append("]");
}
}
else {
// This isn't the end of the CDATA section so go on
mTextValue.Append("]");
}
}
else done=PR_TRUE;
}
}
return result;
}
/*
* default constructor
*

Просмотреть файл

@ -54,6 +54,7 @@ enum eHTMLTokenTypes {
eToken_start=1, eToken_end, eToken_comment, eToken_entity,
eToken_whitespace, eToken_newline, eToken_text, eToken_attribute,
eToken_script, eToken_style, eToken_skippedcontent, eToken_instruction,
eToken_cdatasection,
eToken_last //make sure this stays the last token...
};
@ -79,7 +80,6 @@ public:
protected:
};
/**
* This declares start tokens, which always take the form <xxxx>.
* This class also knows how to consume related attributes.
@ -207,6 +207,23 @@ class CTextToken: public CHTMLToken {
};
/**
* CDATASection tokens contain raw unescaped text content delimited by
* a ![CDATA[ and ]].
* XXX Not really a HTML construct - maybe we need a separation
*
* @update vidur 11/12/98
*/
class CCDATASectionToken : public CHTMLToken {
public:
CCDATASectionToken();
CCDATASectionToken(const nsString& aString);
virtual nsresult Consume(PRUnichar aChar,CScanner& aScanner);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
};
/**
* Attribute tokens are used to contain attribute key/value
* pairs whereever they may occur. Typically, they should

Просмотреть файл

@ -73,7 +73,8 @@ const PRUint32 kRightParen = ')';
const PRUint32 kLeftBrace = '{';
const PRUint32 kRightBrace = '}';
const PRUint32 kQuestionMark = '?';
const PRUint32 kLeftSquareBracket = '[';
const PRUint32 kRightSquareBracket = ']';
#endif

Просмотреть файл

@ -473,6 +473,74 @@ NS_IMETHODIMP CWellFormedDTD::ConsumeComment(PRUnichar aChar,CScanner& aScanner,
return result;
}
/*
* Consume characters as long as they match the string passed in.
* If they don't match, put them all back.
* XXX The scanner should be able to do this.
*
* @update vidur 11/12/98
*/
static nsresult
ConsumeConditional(CScanner& aScanner,
const nsString& aMatchString,
PRBool& aMatch)
{
nsresult result=NS_OK;
PRUnichar matchChar;
PRInt32 i, count = aMatchString.Length();
for (i=0; i < count; i++) {
result = aScanner.GetChar(matchChar);
if ((NS_OK != result) || (aMatchString.CharAt(i) != matchChar)) {
break;
}
}
if (NS_OK == result) {
if (i != count) {
for (; i >= 0; i--) {
aScanner.PutBack(aMatchString.CharAt(i));
}
aMatch = PR_FALSE;
}
else {
aMatch = PR_TRUE;
}
}
return result;
}
/**
* This method is called when we see a "<!" sequence. The result
* could be a comment or a CDATASection.
* XXX "Escaped Content" is not the right term, but I couldn't think
* of a good one.
*
* @update vidur 11/12/98
* @param aChar: last char read
* @param aScanner: see nsScanner.h
* @param aToken is the newly created token (comment or cdatasection)
* @return error code
*/
NS_IMETHODIMP CWellFormedDTD::ConsumeEscapedContent(PRUnichar aChar,CScanner& aScanner,CToken*& aToken){
nsresult result=NS_OK;
nsAutoString CDATAString("[CDATA[");
PRBool isCDATA = PR_FALSE;
result = ConsumeConditional(aScanner, CDATAString, isCDATA);
if (NS_OK == result) {
if (isCDATA) {
aToken=gTokenRecycler.CreateTokenOfType(eToken_cdatasection,eHTMLTag_unknown,gEmpty);
}
else {
aToken=gTokenRecycler.CreateTokenOfType(eToken_comment,eHTMLTag_comment,gEmpty);
}
}
return result;
}
/**
* This method is called just after a newline has been consumed.
*
@ -496,7 +564,7 @@ NS_IMETHODIMP CWellFormedDTD::ConsumeNewline(PRUnichar aChar,CScanner& aScanner,
* and we know we're at the start of some kind of tagged
* element. We don't know yet if it's a tag or a comment.
*
* @update gess 5/12/98
* @update vidur 11/12/98
* @param aChar is the last char read
* @param aScanner is represents our input source
* @param aToken is the out arg holding our new token
@ -520,7 +588,7 @@ NS_IMETHODIMP CWellFormedDTD::ConsumeTag(PRUnichar aChar,CScanner& aScanner,CTok
break;
case kExclamation:
aToken=gTokenRecycler.CreateTokenOfType(eToken_comment,eHTMLTag_comment,gEmpty);
result = ConsumeEscapedContent(aChar, aScanner, aToken);
break;
case kQuestionMark: //it must be an XML processing instruction...
@ -599,7 +667,7 @@ NS_IMETHODIMP CWellFormedDTD::ConsumeToken(CToken*& aToken){
case kCR: case kLF:
result=ConsumeNewline(theChar,*theScanner,aToken);
break;
case kNotFound:
break;
@ -699,7 +767,7 @@ PRBool CWellFormedDTD::IsContainer(PRInt32 aTag) const{
/**
*
* @update gess 3/25/98
* @update vidur 11/12/98
* @param aToken -- token object to be put into content model
* @return 0 if all is well; non-zero is an error
*/
@ -716,6 +784,7 @@ NS_IMETHODIMP CWellFormedDTD::HandleToken(CToken* aToken) {
case eToken_entity:
case eToken_whitespace:
case eToken_text:
case eToken_cdatasection:
result=mSink->AddLeaf(theNode);
break;

Просмотреть файл

@ -225,6 +225,7 @@ protected:
NS_IMETHODIMP ConsumeText(const nsString& aString,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeNewline(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeWhitespace(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeEscapedContent(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeComment(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeEntity(PRUnichar aChar,CScanner& aScanner,CToken*& aToken);
NS_IMETHODIMP ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CStartToken* aToken);