From 755b162d911001f9a49635100e3efedcbdbcdc8b Mon Sep 17 00:00:00 2001 From: "bzbarsky%mit.edu" Date: Sun, 12 Sep 2004 01:50:53 +0000 Subject: [PATCH] Make CTextToken::ConsumeUntil not consume too much text. Bug 258082, patch by Blake Kaplan , r=bzbarsky, sr=jst --- parser/htmlparser/public/nsHTMLTokens.h | 2 +- parser/htmlparser/src/nsHTMLTokenizer.cpp | 38 +++++++++++++++++++---- parser/htmlparser/src/nsHTMLTokens.cpp | 25 ++++++++------- 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/parser/htmlparser/public/nsHTMLTokens.h b/parser/htmlparser/public/nsHTMLTokens.h index 73780f5abec..299fa0ed2fc 100644 --- a/parser/htmlparser/public/nsHTMLTokens.h +++ b/parser/htmlparser/public/nsHTMLTokens.h @@ -283,7 +283,7 @@ public: CTextToken(const nsAString& aString); virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); nsresult ConsumeUntil(PRUnichar aChar, PRBool aIgnoreComments, - nsScanner& aScanner, nsString& aEndTagName, + nsScanner& aScanner, const nsAString& aEndTagName, PRInt32 aFlag, PRBool& aFlushTokens); virtual PRInt32 GetTokenType(void); virtual PRInt32 GetTextLength(void); diff --git a/parser/htmlparser/src/nsHTMLTokenizer.cpp b/parser/htmlparser/src/nsHTMLTokenizer.cpp index 8ea43b99ca1..efba7a9d88b 100644 --- a/parser/htmlparser/src/nsHTMLTokenizer.cpp +++ b/parser/htmlparser/src/nsHTMLTokenizer.cpp @@ -752,12 +752,17 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan //if((eHTMLTag_style==theTag) || (eHTMLTag_script==theTag)) { if(gHTMLElements[theTag].CanContainType(kCDATA)) { - nsAutoString endTagName; - endTagName.Assign(nsHTMLTags::GetStringValue(theTag)); + nsDependentString endTagName(nsHTMLTags::GetStringValue(theTag)); CToken* text=theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text); CTextToken* textToken=NS_STATIC_CAST(CTextToken*,text); - result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endTagName,mFlags,aFlushTokens); //tell new token to finish consuming text... + + //tell new token to finish consuming text... + result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script, + aScanner, + endTagName, + mFlags, + aFlushTokens); // Fix bug 44186 // Support XML like syntax, i.e., @@ -768,10 +773,31 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan // it is. if((!(mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) && !theStartToken->IsEmpty()) || aFlushTokens) { - theStartToken->SetEmpty(PR_FALSE); // Setting this would make cases like work. - CToken* endToken=theAllocator->CreateTokenOfType(eToken_end,theTag,endTagName); + // Setting this would make cases like work. + theStartToken->SetEmpty(PR_FALSE); + // do this up here so we can just add the end token later on AddToken(text,result,&mTokenDeque,theAllocator); - AddToken(endToken,result,&mTokenDeque,theAllocator); + + CToken* endToken=nsnull; + + if (NS_SUCCEEDED(result) && aFlushTokens) { + PRUnichar theChar; + // Get the < + result = aScanner.GetChar(theChar); + NS_ASSERTION(NS_SUCCEEDED(result) && theChar == kLessThan, + "CTextToken::ConsumeUntil is broken!"); +#ifdef DEBUG + // Ensure we have a / + PRUnichar tempChar; // Don't change non-debug vars in debug-only code + result = aScanner.Peek(tempChar); + NS_ASSERTION(NS_SUCCEEDED(result) && theChar == kForwardSlash, + "CTextToken::ConsumeUntil is broken!"); +#endif + result = ConsumeEndTag(PRUnichar('/'),endToken,aScanner); + } else if (!(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) { + endToken=theAllocator->CreateTokenOfType(eToken_end,theTag,endTagName); + AddToken(endToken,result,&mTokenDeque,theAllocator); + } } else { IF_FREE(text, mTokenAllocator); diff --git a/parser/htmlparser/src/nsHTMLTokens.cpp b/parser/htmlparser/src/nsHTMLTokens.cpp index 5562aec537f..80352137aec 100644 --- a/parser/htmlparser/src/nsHTMLTokens.cpp +++ b/parser/htmlparser/src/nsHTMLTokens.cpp @@ -489,6 +489,7 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) /* * Consume as much clear text from scanner as possible. + * The scanner is left on the < of the perceived end tag. * * @update gess 3/25/98 * @param aChar -- last char consumed from stream @@ -496,7 +497,8 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) * @return error result */ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner, - nsString& aEndTagName,PRInt32 aFlag,PRBool& aFlushTokens){ + const nsAString& aEndTagName,PRInt32 aFlag, + PRBool& aFlushTokens){ nsresult result=NS_OK; nsScannerIterator theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos; PRBool done=PR_FALSE; @@ -584,15 +586,10 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann } } - // Make sure to preserve the end tag's representation if needed - if(aFlag & (NS_IPARSER_FLAG_VIEW_SOURCE | NS_IPARSER_FLAG_PRESERVE_CONTENT)) { - CopyUnicodeTo(ltOffset.advance(2),gtOffset,aEndTagName); - } - aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos); - aScanner.SetPosition(gtOffset.advance(1)); + aScanner.SetPosition(ltOffset); - // We found ...permit flushing -> Ref: Bug 22485 + // We found or ...permit flushing -> Ref: Bug 22485 aFlushTokens=PR_TRUE; done = PR_TRUE; } @@ -1633,9 +1630,6 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a nsresult result; - //I changed a bit of this method to use aRetain so that we do the right - //thing in viewsource. The ws/cr/lf sequences are now maintained, and viewsource looks good. - nsScannerIterator wsstart, wsend; if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { @@ -1750,6 +1744,15 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a if (NS_OK==result) { result=aScanner.Peek(aChar); + + if (mTextValue.Length() == 0 && mTextKey.Length() == 0 && + aChar == kLessThan) { + // This attribute is completely bogus, tell the tokenizer. + // This happens when we have stuff like: + // .... + return NS_ERROR_HTMLPARSER_BADATTRIBUTE; + } + #ifdef DEBUG mLastAttribute = (kGreaterThan == aChar || kEOF == result); #endif