From f404ddb2db688de0da4252124527ae82443612a7 Mon Sep 17 00:00:00 2001 From: "mrbkap%gmail.com" Date: Wed, 1 Dec 2004 04:37:36 +0000 Subject: [PATCH] bug 88952: Remove trailing content. This checkin fixes a whole slew of textarea bugs. r=jst sr=rbs --- parser/htmlparser/public/nsHTMLTokens.h | 19 +- parser/htmlparser/public/nsIParser.h | 14 +- parser/htmlparser/public/nsITokenizer.h | 4 +- parser/htmlparser/src/CNavDTD.cpp | 129 +----- parser/htmlparser/src/CParserContext.cpp | 30 +- parser/htmlparser/src/CParserContext.h | 4 +- parser/htmlparser/src/nsElementTable.cpp | 7 +- parser/htmlparser/src/nsExpatDriver.cpp | 6 - parser/htmlparser/src/nsHTMLTokenizer.cpp | 193 ++++----- parser/htmlparser/src/nsHTMLTokenizer.h | 9 +- parser/htmlparser/src/nsHTMLTokens.cpp | 494 ++++++++++++++-------- parser/htmlparser/src/nsLoggingSink.h | 4 +- parser/htmlparser/src/nsParser.cpp | 20 +- 13 files changed, 490 insertions(+), 443 deletions(-) diff --git a/parser/htmlparser/public/nsHTMLTokens.h b/parser/htmlparser/public/nsHTMLTokens.h index 29b4cde26c6..e741352687d 100644 --- a/parser/htmlparser/public/nsHTMLTokens.h +++ b/parser/htmlparser/public/nsHTMLTokens.h @@ -153,7 +153,6 @@ public: } nsString mTextValue; - nsString mTrailingContent; protected: eContainerInfo mContainerInfo; PRPackedBool mEmpty; @@ -282,9 +281,6 @@ public: CTextToken(); CTextToken(const nsAString& aString); virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - nsresult ConsumeUntil(PRUnichar aChar, PRBool aIgnoreComments, - nsScanner& aScanner, const nsAString& aEndTagName, - PRInt32 aFlag, PRBool& aFlushTokens); virtual PRInt32 GetTokenType(void); virtual PRInt32 GetTextLength(void); virtual void CopyTo(nsAString& aStr); @@ -293,6 +289,21 @@ public: nsScannerIterator& aEnd); virtual void Bind(const nsAString& aStr); + nsresult ConsumeCharacterData(PRUnichar aChar, + PRBool aConservativeConsume, + PRBool aIgnoreComments, + nsScanner& aScanner, + const nsAString& aEndTagName, + PRInt32 aFlag, + PRBool& aFlushTokens); + + nsresult ConsumeParsedCharacterData(PRUnichar aChar, + PRBool aConservativeConsume, + nsScanner& aScanner, + const nsAString& aEndTagName, + PRInt32 aFlag, + PRBool& aFound); + protected: nsScannerSubstring mTextValue; }; diff --git a/parser/htmlparser/public/nsIParser.h b/parser/htmlparser/public/nsIParser.h index 9355c134ce9..f889ff88cde 100644 --- a/parser/htmlparser/public/nsIParser.h +++ b/parser/htmlparser/public/nsIParser.h @@ -310,7 +310,8 @@ class nsIParser : public nsISupports { #define NS_ERROR_HTMLPARSER_STOPPARSING NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1015) #define NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1016) #define NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1017) - +#define NS_ERROR_HTMLPARSER_FAKE_ENDTAG NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1018) +#define NS_ERROR_HTMLPARSER_INVALID_COMMENT NS_ERROR_GENERATE_FAILURE(NS_ERROR_MODULE_HTMLPARSER,1019) #define NS_ERROR_HTMLPARSER_CONTINUE NS_OK @@ -325,6 +326,8 @@ const PRUint32 kInvalidParserContext = NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT const PRUint32 kBlocked = NS_ERROR_HTMLPARSER_BLOCK; const PRUint32 kBadStringLiteral = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL; const PRUint32 kHierarchyTooDeep = NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP; +const PRUint32 kFakeEndTag = NS_ERROR_HTMLPARSER_FAKE_ENDTAG; +const PRUint32 kNotAComment = NS_ERROR_HTMLPARSER_INVALID_COMMENT; const PRUnichar kNewLine = '\n'; const PRUnichar kCR = '\r'; @@ -381,9 +384,10 @@ const PRUnichar kNullCh = '\0'; #define NS_IPARSER_FLAG_VIEW_NORMAL 0x00000020 #define NS_IPARSER_FLAG_VIEW_SOURCE 0x00000040 #define NS_IPARSER_FLAG_VIEW_ERRORS 0x00000080 -#define NS_IPARSER_FLAG_PRESERVE_CONTENT 0x00000100 -#define NS_IPARSER_FLAG_PLAIN_TEXT 0x00000200 -#define NS_IPARSER_FLAG_XML 0x00000400 -#define NS_IPARSER_FLAG_HTML 0x00000800 +#define NS_IPARSER_FLAG_PLAIN_TEXT 0x00000100 +#define NS_IPARSER_FLAG_XML 0x00000200 +#define NS_IPARSER_FLAG_HTML 0x00000400 +#define NS_IPARSER_FLAG_SCRIPT_ENABLED 0x00000800 +#define NS_IPARSER_FLAG_FRAMES_ENABLED 0x00001000 #endif diff --git a/parser/htmlparser/public/nsITokenizer.h b/parser/htmlparser/public/nsITokenizer.h index 11b24377025..3027b43d925 100644 --- a/parser/htmlparser/public/nsITokenizer.h +++ b/parser/htmlparser/public/nsITokenizer.h @@ -86,7 +86,6 @@ public: NS_IMETHOD_(PRInt32) GetCount(void)=0; NS_IMETHOD_(nsTokenAllocator*) GetTokenAllocator(void)=0; NS_IMETHOD_(void) PrependTokens(nsDeque& aDeque)=0; - NS_IMETHOD CopyState(nsITokenizer* aTokenizer) = 0; }; @@ -101,8 +100,7 @@ public: NS_IMETHOD_(CToken*) GetTokenAt(PRInt32 anIndex);\ NS_IMETHOD_(PRInt32) GetCount(void);\ NS_IMETHOD_(nsTokenAllocator*) GetTokenAllocator(void);\ - NS_IMETHOD_(void) PrependTokens(nsDeque& aDeque);\ - NS_IMETHOD CopyState(nsITokenizer* aTokenizer); + NS_IMETHOD_(void) PrependTokens(nsDeque& aDeque); #endif diff --git a/parser/htmlparser/src/CNavDTD.cpp b/parser/htmlparser/src/CNavDTD.cpp index 1172aff20be..574b3d6234e 100644 --- a/parser/htmlparser/src/CNavDTD.cpp +++ b/parser/htmlparser/src/CNavDTD.cpp @@ -116,12 +116,10 @@ static char gShowCRC; #define NS_DTD_FLAG_HAD_BODY 0x00000010 #define NS_DTD_FLAG_HAD_FRAMESET 0x00000020 #define NS_DTD_FLAG_ENABLE_RESIDUAL_STYLE 0x00000040 -#define NS_DTD_FLAG_SCRIPT_ENABLED 0x00000100 -#define NS_DTD_FLAG_FRAMES_ENABLED 0x00000200 -#define NS_DTD_FLAG_ALTERNATE_CONTENT 0x00000400 // NOFRAMES, NOSCRIPT -#define NS_DTD_FLAG_MISPLACED_CONTENT 0x00000800 -#define NS_DTD_FLAG_IN_MISPLACED_CONTENT 0x00001000 -#define NS_DTD_FLAG_STOP_PARSING 0x00002000 +#define NS_DTD_FLAG_ALTERNATE_CONTENT 0x00000080 // NOFRAMES, NOSCRIPT +#define NS_DTD_FLAG_MISPLACED_CONTENT 0x00000100 +#define NS_DTD_FLAG_IN_MISPLACED_CONTENT 0x00000200 +#define NS_DTD_FLAG_STOP_PARSING 0x00000400 /** * This method gets called as part of our COM-like interfaces. @@ -384,15 +382,15 @@ nsresult CNavDTD::WillBuildModel(const CParserContext& aParserContext, #endif if(mSink) { - PRBool enabled; + PRBool enabled = PR_TRUE; mSink->IsEnabled(eHTMLTag_frameset, &enabled); if(enabled) { - mFlags |= NS_DTD_FLAG_FRAMES_ENABLED; + mFlags |= NS_IPARSER_FLAG_FRAMES_ENABLED; } mSink->IsEnabled(eHTMLTag_script, &enabled); if(enabled) { - mFlags |= NS_DTD_FLAG_SCRIPT_ENABLED; + mFlags |= NS_IPARSER_FLAG_SCRIPT_ENABLED; } } @@ -439,7 +437,7 @@ nsresult CNavDTD::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsIToke } // always open a body if frames are disabled.... - if(!(mFlags & NS_DTD_FLAG_FRAMES_ENABLED)) { + if(!(mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) { theToken=NS_STATIC_CAST(CStartToken*,mTokenAllocator->CreateTokenOfType(eToken_start,eHTMLTag_body,NS_LITERAL_STRING("body"))); mTokenizer->PushTokenFront(theToken); } @@ -522,37 +520,6 @@ CNavDTD::BuildNeglectedTarget(eHTMLTags aTarget, NS_ASSERTION(mTokenAllocator, "unable to create tokens without an allocator."); if (!mTokenizer || !mTokenAllocator) return NS_OK; - if (eHTMLTag_unknown != mSkipTarget && eHTMLTag_title == aTarget) { - PRInt32 size = mSkippedContent.GetSize(); - // Note: The first location of the skipped content - // deque contains the opened-skip-target. Do not include - // that when guessing title contents. The term "guessing" - // is used because the document did not contain an end title - // and hence it's almost impossible to know what markup - // should belong in the title. The assumption used here is that - // if the markup is anything other than "text", or "entity" or, - // "whitespace" then it's least likely to belong in the title. - PRInt32 index; - for (index = 1; index < size; index++) { - CHTMLToken* token = - NS_REINTERPRET_CAST(CHTMLToken*, mSkippedContent.ObjectAt(index)); - NS_ASSERTION(token, "there is a null token in the skipped content list!"); - eHTMLTokenTypes type = eHTMLTokenTypes(token->GetTokenType()); - if (eToken_whitespace != type && - eToken_newline != type && - eToken_text != type && - eToken_entity != type && - eToken_attribute != type) { - // Now pop the tokens that do not belong ( just a guess work ) - // in the title and push them into the tokens queue. - while (size != index++) { - token = NS_REINTERPRET_CAST(CHTMLToken*, mSkippedContent.Pop()); - mTokenizer->PushTokenFront(token); - } - break; - } - } - } CHTMLToken* target = NS_STATIC_CAST(CHTMLToken*, mTokenAllocator->CreateTokenOfType(aType, aTarget)); mTokenizer->PushTokenFront(target); @@ -846,30 +813,6 @@ nsresult CNavDTD::HandleToken(CToken* aToken,nsIParser* aParser){ return result; } } - else if(mFlags & NS_DTD_FLAG_ALTERNATE_CONTENT) { - if(theTag != mBodyContext->Last() || theType!=eToken_end) { - // attribute source is a part of start token. - if(theType!=eToken_attribute) { - aToken->AppendSourceTo(mScratch); - } - IF_FREE(aToken, mTokenAllocator); - return result; - } - else { - // If you're here then we have either seen a /noscript, - // or /noframes, or /iframe. After handling the text token - // intentionally fall thro' to handle the current end token. - CTextToken theTextToken(mScratch); - result=HandleStartToken(&theTextToken); - - if(NS_FAILED(result)) { - return result; - } - - mScratch.Truncate(); - mScratch.SetCapacity(0); - } - } else if(mFlags & NS_DTD_FLAG_MISPLACED_CONTENT) { // Included TD & TH to fix Bug# 20797 static eHTMLTags gLegalElements[]={eHTMLTag_table,eHTMLTag_thead,eHTMLTag_tbody, @@ -949,7 +892,9 @@ nsresult CNavDTD::HandleToken(CToken* aToken,nsIParser* aParser){ } default: if(!gHTMLElements[eHTMLTag_html].SectionContains(theTag,PR_FALSE)) { - if(!(mFlags & (NS_DTD_FLAG_HAD_BODY | NS_DTD_FLAG_HAD_FRAMESET))) { + if(!(mFlags & (NS_DTD_FLAG_HAD_BODY | + NS_DTD_FLAG_HAD_FRAMESET | + NS_DTD_FLAG_ALTERNATE_CONTENT))) { //For bug examples from this code, see bugs: 18928, 20989. @@ -1075,28 +1020,6 @@ nsresult CNavDTD::DidHandleStartTag(nsIParserNode& aNode,eHTMLTags aChildTag){ }//if } break; - - case eHTMLTag_xmp: - //grab the skipped content and dump it out as text... - { - STOP_TIMER() - MOZ_TIMER_DEBUGLOG(("Stop: Parse Time: CNavDTD::DidHandleStartTag(), this=%p\n", this)); - nsAutoString theString; - PRInt32 lineNo = 0; - - result = CollectSkippedContent(aChildTag, theString, lineNo); - NS_ENSURE_SUCCESS(result, result); - - if(0CreateTokenOfType(eToken_text,eHTMLTag_text,theString)); - nsCParserNode theNode(theToken, mTokenAllocator); - result=mSink->AddLeaf(theNode); //when the node get's destructed, so does the new token - } - MOZ_TIMER_DEBUGLOG(("Start: Parse Time: CNavDTD::DidHandleStartTag(), this=%p\n", this)); - START_TIMER() - } - break; - #ifdef DEBUG case eHTMLTag_counter: { @@ -2485,30 +2408,10 @@ CNavDTD::CollectSkippedContent(PRInt32 aTag, nsAString& aContent, PRInt32 &aLine PRInt32 tagCount = mSkippedContent.GetSize(); for (i = 0; i< tagCount; ++i){ CHTMLToken* theNextToken = (CHTMLToken*)mSkippedContent.PopFront(); - if (theNextToken) { - eHTMLTokenTypes theTokenType = (eHTMLTokenTypes)theNextToken->GetTokenType(); - - // Dont worry about attributes here because it's already stored in - // the start token as mTrailing content and will get appended in - // start token's GetSource(); - if (eToken_attribute!=theTokenType) { - if ((eToken_entity==theTokenType) && - ((eHTMLTag_textarea == aTag) || (eHTMLTag_title == aTag))) { - mScratch.Truncate(); - ((CEntityToken*)theNextToken)->TranslateToUnicodeStr(mScratch); - if (!mScratch.IsEmpty()){ - aContent.Append(mScratch); - } - else { - // We thought it was an entity but it is not! - bug 79492 - aContent.Append(PRUnichar('&')); - aContent.Append(theNextToken->GetStringValue()); - } - } - else theNextToken->AppendSourceTo(aContent); - } + theNextToken->AppendSourceTo(aContent); } + IF_FREE(theNextToken, mTokenAllocator); } @@ -3433,8 +3336,7 @@ CNavDTD::OpenContainer(const nsCParserNode *aNode, // If the script is disabled noscript should not be // in the content model until the layout can somehow // turn noscript's display property to block <-- bug 67899 - if(mFlags & NS_DTD_FLAG_SCRIPT_ENABLED) { - mScratch.Truncate(); + if(mFlags & NS_IPARSER_FLAG_SCRIPT_ENABLED) { mFlags |= NS_DTD_FLAG_ALTERNATE_CONTENT; } break; @@ -3442,8 +3344,7 @@ CNavDTD::OpenContainer(const nsCParserNode *aNode, case eHTMLTag_iframe: // Bug 84491 case eHTMLTag_noframes: done=PR_FALSE; - if(mFlags & NS_DTD_FLAG_FRAMES_ENABLED) { - mScratch.Truncate(); + if(mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED) { mFlags |= NS_DTD_FLAG_ALTERNATE_CONTENT; } break; diff --git a/parser/htmlparser/src/CParserContext.cpp b/parser/htmlparser/src/CParserContext.cpp index 36f0b58181a..5197a6488bb 100644 --- a/parser/htmlparser/src/CParserContext.cpp +++ b/parser/htmlparser/src/CParserContext.cpp @@ -40,6 +40,7 @@ #include "CParserContext.h" #include "nsToken.h" #include "prenv.h" +#include "nsIHTMLContentSink.h" #include "nsHTMLTokenizer.h" #include "nsExpatDriver.h" @@ -169,16 +170,33 @@ void CParserContext::SetMimeType(const nsACString& aMimeType){ } nsresult -CParserContext::GetTokenizer(PRInt32 aType, nsITokenizer*& aTokenizer) { +CParserContext::GetTokenizer(PRInt32 aType, + nsIContentSink* aSink, + nsITokenizer*& aTokenizer) { nsresult result = NS_OK; if(!mTokenizer) { if (aType == NS_IPARSER_FLAG_HTML || mParserCommand == eViewSource) { - result = NS_NewHTMLTokenizer(&mTokenizer,mDTDMode,mDocType,mParserCommand); - // Propagate tokenizer state so that information is preserved - // between document.write. This fixes bug 99467 - if (mTokenizer && mPrevContext) - mTokenizer->CopyState(mPrevContext->mTokenizer); + nsCOMPtr theSink = do_QueryInterface(aSink); + PRUint16 theFlags = 0; + + if (theSink) { + // XXX This code is repeated both here and in CNavDTD. Can the two + // callsites be combined? + PRBool enabled; + theSink->IsEnabled(eHTMLTag_frameset, &enabled); + if(enabled) { + theFlags |= NS_IPARSER_FLAG_FRAMES_ENABLED; + } + + theSink->IsEnabled(eHTMLTag_script, &enabled); + if(enabled) { + theFlags |= NS_IPARSER_FLAG_SCRIPT_ENABLED; + } + } + + result = NS_NewHTMLTokenizer(&mTokenizer,mDTDMode,mDocType, + mParserCommand,theFlags); } else if (aType == NS_IPARSER_FLAG_XML) { diff --git a/parser/htmlparser/src/CParserContext.h b/parser/htmlparser/src/CParserContext.h index 695a8603d45..f73531b7ce3 100644 --- a/parser/htmlparser/src/CParserContext.h +++ b/parser/htmlparser/src/CParserContext.h @@ -76,7 +76,9 @@ public: CParserContext( const CParserContext& aContext); ~CParserContext(); - nsresult GetTokenizer(PRInt32 aType, nsITokenizer*& aTokenizer); + nsresult GetTokenizer(PRInt32 aType, + nsIContentSink* aSink, + nsITokenizer*& aTokenizer); void SetMimeType(const nsACString& aMimeType); nsCOMPtr mRequest; // provided by necko to differnciate different input streams diff --git a/parser/htmlparser/src/nsElementTable.cpp b/parser/htmlparser/src/nsElementTable.cpp index d32ffc3e0b4..763e3111349 100644 --- a/parser/htmlparser/src/nsElementTable.cpp +++ b/parser/htmlparser/src/nsElementTable.cpp @@ -1228,9 +1228,9 @@ void InitializeElementTable(void) { /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kInlineEntity|kPreformatted, kNone, kNone, + /*parent,incl,exclgroups*/ kInlineEntity|kPreformatted, kCDATA, kNone, /*special props, prop-range*/ kNone,kDefaultPropRange, - /*special parents,kids,skip*/ 0,0,eHTMLTag_xmp); + /*special parents,kids,skip*/ 0,0,eHTMLTag_unknown); Initialize( /*tag*/ eHTMLTag_text, @@ -2270,7 +2270,8 @@ PRBool nsHTMLElement::CanContain(eHTMLTags aChild,nsDTDMode aMode) const{ } if(nsHTMLElement::IsTextTag(aChild)) { - if(nsHTMLElement::IsInlineParent(mTagID)){ + // Allow to contain text. + if(nsHTMLElement::IsInlineParent(mTagID) || CanContainType(kCDATA)){ return PR_TRUE; } } diff --git a/parser/htmlparser/src/nsExpatDriver.cpp b/parser/htmlparser/src/nsExpatDriver.cpp index a559dca2de6..0c247bbad21 100644 --- a/parser/htmlparser/src/nsExpatDriver.cpp +++ b/parser/htmlparser/src/nsExpatDriver.cpp @@ -1168,12 +1168,6 @@ nsExpatDriver::PrependTokens(nsDeque& aDeque) } -NS_IMETHODIMP -nsExpatDriver::CopyState(nsITokenizer* aTokenizer) -{ - return NS_OK; -} - NS_IMETHODIMP nsExpatDriver::HandleToken(CToken* aToken,nsIParser* aParser) { diff --git a/parser/htmlparser/src/nsHTMLTokenizer.cpp b/parser/htmlparser/src/nsHTMLTokenizer.cpp index fcab7d35c76..157172b826f 100644 --- a/parser/htmlparser/src/nsHTMLTokenizer.cpp +++ b/parser/htmlparser/src/nsHTMLTokenizer.cpp @@ -103,13 +103,14 @@ nsresult nsHTMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr) nsresult NS_NewHTMLTokenizer(nsITokenizer** aInstancePtrResult, PRInt32 aFlag, eParserDocType aDocType, - eParserCommands aCommand) + eParserCommands aCommand, + PRInt32 aFlags) { NS_PRECONDITION(nsnull != aInstancePtrResult, "null ptr"); if (nsnull == aInstancePtrResult) { return NS_ERROR_NULL_POINTER; } - nsHTMLTokenizer* it = new nsHTMLTokenizer(aFlag,aDocType,aCommand); + nsHTMLTokenizer* it = new nsHTMLTokenizer(aFlag,aDocType,aCommand,aFlags); if (nsnull == it) { return NS_ERROR_OUT_OF_MEMORY; } @@ -128,23 +129,24 @@ NS_IMPL_RELEASE(nsHTMLTokenizer) * @param * @return */ - nsHTMLTokenizer::nsHTMLTokenizer(PRInt32 aParseMode, - eParserDocType aDocType, - eParserCommands aCommand) : - nsITokenizer(), mTokenDeque(0) +nsHTMLTokenizer::nsHTMLTokenizer(PRInt32 aParseMode, + eParserDocType aDocType, + eParserCommands aCommand, + PRUint16 aFlags) : + nsITokenizer(), mTokenDeque(0), mFlags(aFlags) { if (aParseMode==eDTDMode_full_standards || aParseMode==eDTDMode_almost_standards) { - mFlags = NS_IPARSER_FLAG_STRICT_MODE; + mFlags |= NS_IPARSER_FLAG_STRICT_MODE; } else if (aParseMode==eDTDMode_quirks) { - mFlags = NS_IPARSER_FLAG_QUIRKS_MODE; + mFlags |= NS_IPARSER_FLAG_QUIRKS_MODE; } else if (aParseMode==eDTDMode_autodetect) { - mFlags = NS_IPARSER_FLAG_AUTO_DETECT_MODE; + mFlags |= NS_IPARSER_FLAG_AUTO_DETECT_MODE; } else { - mFlags = NS_IPARSER_FLAG_UNKNOWN_MODE; + mFlags |= NS_IPARSER_FLAG_UNKNOWN_MODE; } if (aDocType==ePlainText) { @@ -167,7 +169,6 @@ NS_IMPL_RELEASE(nsHTMLTokenizer) mTokenAllocator = nsnull; mTokenScanPos = 0; - mPreserveTarget = eHTMLTag_unknown; } @@ -311,19 +312,6 @@ void nsHTMLTokenizer::PrependTokens(nsDeque& aDeque){ } -NS_IMETHODIMP -nsHTMLTokenizer::CopyState(nsITokenizer* aTokenizer) -{ - if (aTokenizer) { - mFlags &= ~NS_IPARSER_FLAG_PRESERVE_CONTENT; - mPreserveTarget = - NS_STATIC_CAST(nsHTMLTokenizer*, aTokenizer)->mPreserveTarget; - if (mPreserveTarget != eHTMLTag_unknown) - mFlags |= NS_IPARSER_FLAG_PRESERVE_CONTENT; - } - return NS_OK; -} - /** * This is a utilty method for ScanDocStructure, which finds a given * tag in the stack. @@ -649,10 +637,11 @@ nsresult nsHTMLTokenizer::ConsumeAttributes(PRUnichar aChar, const nsSubstring& key=theToken->GetKey(); const nsAString& text=theToken->GetValue(); - // support XML like syntax to fix bugs like 44186 if(!key.IsEmpty() && kForwardSlash==key.First() && text.IsEmpty()) { - isUsableAttr = PRBool(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE); // Fix bug 103095 - aToken->SetEmpty(isUsableAttr); + if(!(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) { + // We only care about these in view-source. + isUsableAttr = PR_FALSE; + } } if(isUsableAttr) { ++theAttrCount; @@ -721,10 +710,6 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan aToken=theAllocator->CreateTokenOfType(eToken_start,eHTMLTag_unknown); if(aToken) { - // Save the position after '<' for use in recording traling contents. Ref: Bug. 15204. - nsScannerIterator origin; - aScanner.CurrentPosition(origin); - result= aToken->Consume(aChar,aScanner,mFlags); //tell new token to finish consuming text... if(NS_SUCCEEDED(result)) { @@ -757,68 +742,81 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan */ if(NS_SUCCEEDED(result) && !(mFlags & NS_IPARSER_FLAG_XML)) { CStartToken* theStartToken = NS_STATIC_CAST(CStartToken*,aToken); - //XXX - Find a better soution to record content - if(!(mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) && - (theTag == eHTMLTag_textarea || - theTag == eHTMLTag_xmp || - theTag == eHTMLTag_noscript || - theTag == eHTMLTag_noframes)) { - NS_ASSERTION(mPreserveTarget == eHTMLTag_unknown, - "mPreserveTarget set but not preserving content?"); - mPreserveTarget = theTag; - mFlags |= NS_IPARSER_FLAG_PRESERVE_CONTENT; + + PRBool isCDATA = gHTMLElements[theTag].CanContainType(kCDATA); + PRBool isPCDATA = eHTMLTag_textarea == theTag || + eHTMLTag_title == theTag; + + if ((eHTMLTag_iframe == theTag && (mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) || + (eHTMLTag_noframes == theTag && (mFlags & NS_IPARSER_FLAG_FRAMES_ENABLED)) || + (eHTMLTag_noscript == theTag && (mFlags & NS_IPARSER_FLAG_SCRIPT_ENABLED))) { + isCDATA = PR_TRUE; } - - if (mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) - PreserveToken(theStartToken, aScanner, origin); - - //if((eHTMLTag_style==theTag) || (eHTMLTag_script==theTag)) { - if(gHTMLElements[theTag].CanContainType(kCDATA)) { + + + if (isCDATA || isPCDATA) { + PRBool done = PR_FALSE; nsDependentString endTagName(nsHTMLTags::GetStringValue(theTag)); - CToken* text=theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text); - CTextToken* textToken=NS_STATIC_CAST(CTextToken*,text); + CToken* text = + theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text); + CTextToken* textToken = NS_STATIC_CAST(CTextToken*,text); - //tell new token to finish consuming text... - result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script, - aScanner, - endTagName, - mFlags, - aFlushTokens); - - // Fix bug 44186 - // Support XML like syntax, i.e., <script src="external.js"/> == <script src="external.js"></script> - // Note: if aFlushTokens is TRUE then we have seen an </script> - // We do NOT want to output the end token if we didn't see a - // </script> and have a preserve target. If that happens, then we'd - // be messing up the text inside the <textarea> or <xmp> or whatever - // it is. - if((!(mFlags & NS_IPARSER_FLAG_PRESERVE_CONTENT) && - !theStartToken->IsEmpty()) || aFlushTokens) { - // Setting this would make cases like <script/>d.w("text");</script> work. - theStartToken->SetEmpty(PR_FALSE); - // do this up here so we can just add the end token later on - AddToken(text,result,&mTokenDeque,theAllocator); + if (isCDATA) { + // The only tags that consume conservatively are <script> and + // <style>, the rest all consume until the end of the document. + result = textToken->ConsumeCharacterData(0, + theTag==eHTMLTag_script || + theTag==eHTMLTag_style, + theTag!=eHTMLTag_script, + aScanner, + endTagName, + mFlags, + done); + aFlushTokens = done; + } + else if (isPCDATA) { + // Title is consumed conservatively in order to not regress + // bug 42945 + result = textToken->ConsumeParsedCharacterData(0, + theTag==eHTMLTag_title, + aScanner, + endTagName, + mFlags, + done); - CToken* endToken=nsnull; + // Note: we *don't* set aFlushTokens here. + } + + // We want to do this unless result is kEOF, in which case we will + // simply unwind our stack and wait for more data anyway. + if (kEOF != result) { + AddToken(text,NS_OK,&mTokenDeque,theAllocator); + CToken* endToken = nsnull; - if (NS_SUCCEEDED(result) && aFlushTokens) { + if (NS_SUCCEEDED(result) && done) { PRUnichar theChar; // Get the < result = aScanner.GetChar(theChar); NS_ASSERTION(NS_SUCCEEDED(result) && theChar == kLessThan, - "CTextToken::ConsumeUntil is broken!"); + "CTextToken::Consume*Data is broken!"); #ifdef DEBUG // Ensure we have a / PRUnichar tempChar; // Don't change non-debug vars in debug-only code result = aScanner.Peek(tempChar); NS_ASSERTION(NS_SUCCEEDED(result) && tempChar == kForwardSlash, - "CTextToken::ConsumeUntil is broken!"); + "CTextToken::Consume*Data is broken!"); #endif result = ConsumeEndTag(PRUnichar('/'),endToken,aScanner); - } else if (!(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) { + } else if (result == kFakeEndTag && + !(mFlags & NS_IPARSER_FLAG_VIEW_SOURCE)) { + result = NS_OK; endToken=theAllocator->CreateTokenOfType(eToken_end,theTag,endTagName); AddToken(endToken,result,&mTokenDeque,theAllocator); + } else if (result == kFakeEndTag) { + // If we are here, we are both faking having seen the end tag + // and are in view-source. + result = NS_OK; } } else { @@ -826,7 +824,7 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan } } } - + //EEEEECCCCKKKK!!! //This code is confusing, so pay attention. //If you're here, it's because we were in the midst of consuming a start @@ -879,15 +877,6 @@ nsresult nsHTMLTokenizer::ConsumeEndTag(PRUnichar aChar,CToken*& aToken,nsScanne aScanner.GetChar(aChar); } - if (NS_SUCCEEDED(result)) { - eHTMLTags theTag = (eHTMLTags)aToken->GetTypeID(); - if (mPreserveTarget == theTag) { - // Target reached. Stop preserving content. - mPreserveTarget = eHTMLTag_unknown; - mFlags &= ~NS_IPARSER_FLAG_PRESERVE_CONTENT; - } - } - // Do the same thing as we do in ConsumeStartTag. Basically, if we've run // out of room in this *section* of the document, pop all of the tokens // we've consumed this round and wait for more data. @@ -984,6 +973,12 @@ nsresult nsHTMLTokenizer::ConsumeComment(PRUnichar aChar,CToken*& aToken,nsScann result=aToken->Consume(aChar,aScanner,mFlags); AddToken(aToken,result,&mTokenDeque,theAllocator); } + + if (kNotAComment == result) { + // AddToken has IF_FREE()'d our token, so... + return ConsumeText(aToken, aScanner); + } + return result; } @@ -1109,33 +1104,3 @@ nsresult nsHTMLTokenizer::ConsumeProcessingInstruction(PRUnichar aChar,CToken*& } return result; } - -/** - * This method keeps a copy of contents within the start token. - * The stored content could later be used in displaying TEXTAREA, - * and also in view source. - * - * @update harishd 11/09/99 - * @param aStartToken: The token whose trailing contents are to be recorded - * @param aScanner: see nsScanner.h - * - */ - -void nsHTMLTokenizer::PreserveToken(CStartToken* aStartToken, - nsScanner& aScanner, - nsScannerIterator aOrigin) { - if(aStartToken) { - nsScannerIterator theCurrentPosition; - aScanner.CurrentPosition(theCurrentPosition); - - nsString& trailingContent = aStartToken->mTrailingContent; - PRUint32 oldLength = trailingContent.Length(); - trailingContent.SetLength(oldLength + Distance(aOrigin, theCurrentPosition)); - - nsWritingIterator<PRUnichar> beginWriting; - trailingContent.BeginWriting(beginWriting); - beginWriting.advance(oldLength); - - copy_string( aOrigin, theCurrentPosition, beginWriting ); - } -} diff --git a/parser/htmlparser/src/nsHTMLTokenizer.h b/parser/htmlparser/src/nsHTMLTokenizer.h index 7cb8a416941..8e5000d7020 100644 --- a/parser/htmlparser/src/nsHTMLTokenizer.h +++ b/parser/htmlparser/src/nsHTMLTokenizer.h @@ -58,7 +58,6 @@ {0xe4238ddd, 0x9eb6, 0x11d2, \ {0xba, 0xa5, 0x0, 0x10, 0x4b, 0x98, 0x3f, 0xd4 }} - /*************************************************************** Notes: ***************************************************************/ @@ -74,7 +73,8 @@ public: NS_DECL_NSITOKENIZER nsHTMLTokenizer(PRInt32 aParseMode = eDTDMode_quirks, eParserDocType aDocType = eHTML3_Quirks, - eParserCommands aCommand = eViewNormal); + eParserCommands aCommand = eViewNormal, + PRUint16 aPrefs = 0); virtual ~nsHTMLTokenizer(); protected: @@ -93,8 +93,6 @@ protected: nsresult ScanDocStructure(PRBool aIsFinalChunk); - virtual void PreserveToken(CStartToken* aStartToken, nsScanner& aScanner, nsScannerIterator aOrigin); - static void AddToken(CToken*& aToken,nsresult aResult,nsDeque* aDeque,nsTokenAllocator* aTokenAllocator); nsDeque mTokenDeque; @@ -102,12 +100,11 @@ protected: nsTokenAllocator* mTokenAllocator; PRInt32 mTokenScanPos; PRUint32 mFlags; - eHTMLTags mPreserveTarget; // Tag whose content is preserved }; extern nsresult NS_NewHTMLTokenizer(nsITokenizer** aInstancePtrResult, PRInt32 aMode,eParserDocType aDocType, - eParserCommands aCommand); + eParserCommands aCommand, PRInt32 aFlags); #endif diff --git a/parser/htmlparser/src/nsHTMLTokens.cpp b/parser/htmlparser/src/nsHTMLTokens.cpp index d8bb3046037..935c31db280 100644 --- a/parser/htmlparser/src/nsHTMLTokens.cpp +++ b/parser/htmlparser/src/nsHTMLTokens.cpp @@ -61,7 +61,139 @@ static const PRUnichar kAttributeTerminalChars[] = { PRUnichar('>'), PRUnichar(0) }; - + +static void AppendNCR(nsSubstring& aString, PRInt32 aNCRValue); +/* + * @param aScanner -- controller of underlying input source + * @param aFlag -- If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities... + * @return error result + * + */ +static +nsresult ConsumeEntity(nsScannerSharedSubstring& aString, + nsScanner& aScanner, + PRInt32 aFlag) +{ + nsresult result=NS_OK; + + PRUnichar ch; + result=aScanner.Peek(ch, 1); + + if (NS_SUCCEEDED(result)) { + PRUnichar amp=0; + PRInt32 theNCRValue=0; + nsAutoString entity; + + if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { + result=CEntityToken::ConsumeEntity(ch,entity,aScanner); + if (NS_SUCCEEDED(result)) { + theNCRValue = nsHTMLEntities::EntityToUnicode(entity); + PRUnichar theTermChar=entity.Last(); + // If an entity value is greater than 255 then: + // Nav 4.x does not treat it as an entity, + // IE treats it as an entity if terminated with a semicolon. + // Resembling IE!! + + nsSubstring &writable = aString.writable(); + if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) { + // Looks like we're not dealing with an entity + writable.Append(kAmpersand); + writable.Append(entity); + } + else { + // A valid entity so reduce it. + writable.Append(PRUnichar(theNCRValue)); + } + } + } + else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { + result=CEntityToken::ConsumeEntity(ch,entity,aScanner); + if (NS_SUCCEEDED(result)) { + nsSubstring &writable = aString.writable(); + if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) { + // Looked like an entity but it's not + aScanner.GetChar(amp); + writable.Append(amp); + result = NS_OK; // just being safe.. + } + else { + PRInt32 err; + theNCRValue=entity.ToInteger(&err,kAutoDetect); + AppendNCR(writable, theNCRValue); + } + } + } + else { + // What we thought as entity is not really an entity... + aScanner.GetChar(amp); + aString.writable().Append(amp); + }//if + } + + return result; +} + +/* + * This general purpose method is used when you want to + * consume attributed text value. + * Note: It also reduces entities. + * + * @param aNewlineCount -- the newline count to increment when hitting newlines + * @param aScanner -- controller of underlying input source + * @param aTerminalChars -- characters that stop consuming attribute. + * @param aAllowNewlines -- whether to allow newlines in the value. + * XXX it would be nice to roll this info into + * aTerminalChars somehow.... + * @param aFlag - contains information such as |dtd mode|view mode|doctype|etc... + * @return error result + */ +static +nsresult ConsumeUntil(nsScannerSharedSubstring& aString, + PRInt32& aNewlineCount, + nsScanner& aScanner, + const nsReadEndCondition& aEndCondition, + PRBool aAllowNewlines, + PRInt32 aFlag) +{ + nsresult result = NS_OK; + PRBool done = PR_FALSE; + + do { + result = aScanner.ReadUntil(aString,aEndCondition,PR_FALSE); + if(NS_SUCCEEDED(result)) { + PRUnichar ch; + aScanner.Peek(ch); + if(ch == kAmpersand) { + result = ConsumeEntity(aString,aScanner,aFlag); + } + else if(ch == kCR && aAllowNewlines) { + aScanner.GetChar(ch); + result = aScanner.Peek(ch); + if (NS_SUCCEEDED(result)) { + nsSubstring &writable = aString.writable(); + if(ch == kNewLine) { + writable.AppendLiteral("\r\n"); + aScanner.GetChar(ch); + } + else { + writable.Append(PRUnichar('\r')); + } + ++aNewlineCount; + } + } + else if(ch == kNewLine && aAllowNewlines) { + aScanner.GetChar(ch); + aString.writable().Append(PRUnichar('\n')); + ++aNewlineCount; + } + else { + done = PR_TRUE; + } + } + } while (NS_SUCCEEDED(result) && !done); + + return result; +} /************************************************************** And now for the token classes... @@ -244,15 +376,12 @@ void CStartToken::AppendSourceTo(nsAString& anOutputString){ /* * Watch out for Bug 15204 */ - if(!mTrailingContent.IsEmpty()) - anOutputString.Append(mTrailingContent); - else { - if(!mTextValue.IsEmpty()) - anOutputString.Append(mTextValue); - else - anOutputString.Append(GetTagName(mTypeID)); - anOutputString.Append(PRUnichar('>')); - } + if(!mTextValue.IsEmpty()) + anOutputString.Append(mTextValue); + else + anOutputString.Append(GetTagName(mTypeID)); + + anOutputString.Append(PRUnichar('>')); } /* @@ -293,8 +422,8 @@ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) mTypeID = (PRInt32)nsHTMLTags::LookupTag(tagIdent.str()); // Save the original tag string if this is user-defined or if we // are viewing source - if(eHTMLTag_userdefined==mTypeID || - (aFlag & (NS_IPARSER_FLAG_VIEW_SOURCE | NS_IPARSER_FLAG_PRESERVE_CONTENT))) { + if(eHTMLTag_userdefined==mTypeID || + (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { mTextValue = tagIdent.str(); } } @@ -388,6 +517,7 @@ void CEndToken::AppendSourceTo(nsAString& anOutputString){ anOutputString.Append(mTextValue); else anOutputString.Append(GetTagName(mTypeID)); + anOutputString.Append(PRUnichar('>')); } @@ -498,14 +628,24 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) * Consume as much clear text from scanner as possible. * The scanner is left on the < of the perceived end tag. * - * @update gess 3/25/98 * @param aChar -- last char consumed from stream + * @param aConservativeConsume -- controls our handling of content with no + * terminating string. + * @param aIgnoreComments -- whether or not we should take comments into + * account in looking for the end tag. * @param aScanner -- controller of underlying input source + * @param aEndTagname -- the terminal tag name. + * @param aFlag -- dtd modes and such. + * @param aFlushTokens -- PR_TRUE if we found the terminal tag. * @return error result */ -nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner, - const nsAString& aEndTagName,PRInt32 aFlag, - PRBool& aFlushTokens){ +nsresult CTextToken::ConsumeCharacterData(PRUnichar aChar, + PRBool aConservativeConsume, + PRBool aIgnoreComments, + nsScanner& aScanner, + const nsAString& aEndTagName, + PRInt32 aFlag, + PRBool& aFlushTokens) { nsresult result=NS_OK; nsScannerIterator theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos; PRBool done=PR_FALSE; @@ -532,7 +672,9 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann // 4. Amen...If you found a terminal string and '-->'. Otherwise goto step 1. // 5. If the end of the document is reached and if we still don't have the condition in step 4. then // assume that the prematured terminal string is the actual terminal string and goto step 1. This - // will be our last iteration. + // will be our last iteration. If there is no premature terminal string and we're being + // conservative in our consumption (aConservativeConsume), then don't consume anything + // from the scanner. Otherwise, we consume all the way until the end (for <xmp>). NS_NAMED_LITERAL_STRING(ltslash, "</"); const nsString theTerminalString = ltslash + aEndTagName; @@ -609,20 +751,171 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann // a) when the buffer runs out ot data. // b) when the terminal string is not found. if(!aScanner.IsIncremental()) { - if(theAltTermStrPos != endPos) { + if(theAltTermStrPos != endPos && aConservativeConsume) { // If you're here it means..we hit the rock bottom and therefore switch to plan B. theCurrOffset = theAltTermStrPos; theLastIteration = PR_TRUE; } - else { + else if (!aConservativeConsume) { done = PR_TRUE; // Do this to fix Bug. 35456 + result = kFakeEndTag; + aScanner.BindSubstring(mTextValue, theStartOffset, endPos.advance(-1)); + aScanner.SetPosition(endPos.advance(1)); + } + else { + done = PR_TRUE; + result = kFakeEndTag; + // We need to bind our value to a non-empty string. + aScanner.BindSubstring(mTextValue, theStartOffset, theStartOffset); } } else { - result=kEOF; + result=kEOF; } } } + + return result; +} + +/* + * Consume as much clear text from scanner as possible. Reducing entities. + * The scanner is left on the < of the perceived end tag. + * + * @param aChar -- last char consumed from stream + * @param aConservativeConsume -- controls our handling of content with no + * terminating string. + * @param aScanner -- controller of underlying input source + * @param aEndTagname -- the terminal tag name. + * @param aFlag -- dtd modes and such. + * @param aFlushTokens -- PR_TRUE if we found the terminal tag. + * @return error result + */ +nsresult CTextToken::ConsumeParsedCharacterData(PRUnichar aChar, + PRBool aConservativeConsume, + nsScanner& aScanner, + const nsAString& aEndTagName, + PRInt32 aFlag, + PRBool& aFound) +{ + // This function is fairly straightforward except if there is no terminating + // string. If there is, we simply loop through all of the entities, reducing + // them as necessary and skipping over non-terminal strings starting with <. + // If there is *no* terminal string, then we examine aConservativeConsume. + // If we want to be conservative, we backtrack to the first place in the + // document that looked like the end of PCDATA (i.e., the first tag). This + // is for compatibility and so we don't regress bug 42945. If we are not + // conservative, then we consume everything, all the way up to the end of + // the document. + + static const PRUnichar terminalChars[] = { + PRUnichar('&'), PRUnichar('<'), + PRUnichar(0) + }; + static const nsReadEndCondition theEndCondition(terminalChars); + + nsScannerIterator currPos,endPos,altEndPos; + PRUint32 truncPos = 0; + aScanner.CurrentPosition(currPos); + aScanner.EndReading(endPos); + + altEndPos = endPos; + + nsScannerSharedSubstring theContent; + PRUnichar ch = 0; + + NS_NAMED_LITERAL_STRING(commentStart, "<!--"); + NS_NAMED_LITERAL_STRING(ltslash, "</"); + const nsString theTerminalString = ltslash + aEndTagName; + PRUint32 termStrLen = theTerminalString.Length(); + PRUint32 commentStartLen = commentStart.Length(); + + nsresult result = NS_OK; + + while (currPos != endPos) { + result = ConsumeUntil(theContent, mNewlineCount, aScanner, + theEndCondition, PR_TRUE, aFlag); + + if (NS_FAILED(result)) { + if (kEOF == result && !aScanner.IsIncremental()) { + aFound = PR_TRUE; // this is as good as it gets. + result = kFakeEndTag; + + if (aConservativeConsume && altEndPos != endPos) { + // We ran out of room looking for a </title>. Go back to the first + // place that looked like a tag and use that as our stopping point. + theContent.writable().Truncate(truncPos); + aScanner.SetPosition(altEndPos); + } + // else we take everything we consumed. + mTextValue.Rebind(theContent.str()); + } + else { + aFound = PR_FALSE; + } + + return result; + } + + aScanner.CurrentPosition(currPos); + aScanner.GetChar(ch); // this character must be '&' or '<' + + if (ch == kLessThan && altEndPos == endPos) { + // Keep this position in case we need it for later. + altEndPos = currPos; + truncPos = theContent.str().Length(); + } + + if (Distance(currPos, endPos) >= termStrLen) { + nsScannerIterator start(currPos), end(currPos); + end.advance(termStrLen); + + if (CaseInsensitiveFindInReadable(theTerminalString,start,end)) { + if (end != endPos && (*end == '>' || *end == ' ' || + *end == '\t' || *end == '\n' || + *end == '\r' || *end == '\b')) { + aFound = PR_TRUE; + mTextValue.Rebind(theContent.str()); + aScanner.SetPosition(currPos); + break; + } + } + } + // IE only consumes <!-- --> as comments in PCDATA. We'll accept a bit + // more in quirks mode, but lets ensure that this really is a comment + // start to maintain the illusion of compatability. + if (Distance(currPos, endPos) >= commentStartLen) { + nsScannerIterator start(currPos), end(currPos); + end.advance(commentStartLen); + + if (CaseInsensitiveFindInReadable(commentStart,start,end)) { + CCommentToken consumer; // stack allocated. + + // CCommentToken expects us to be on the '-' + aScanner.SetPosition(currPos.advance(2)); + result = consumer.Consume(*currPos, aScanner, aFlag); + if (kEOF == result) { + return kEOF; // this can only happen if we're really out of space. + } + else if (kNotAComment == result) { + // Fall through and consume this as text. + aScanner.CurrentPosition(currPos); + aScanner.SetPosition(currPos.advance(1)); + } + else { + consumer.AppendSourceTo(theContent.writable()); + mNewlineCount += consumer.GetNewlineCount(); + continue; + } + } + } + + result = kEOF; + // We did not find the terminal string yet so + // include the character that stopped consumption. + theContent.writable().Append(ch); + } + return result; } @@ -1036,10 +1329,9 @@ nsresult CCommentToken::ConsumeStrictComment(nsScanner& aScanner) return kEOF; // not really an nsresult, but... } - // XXX We should return kNotAComment, parse comment open as text, and parse - // the rest of the document normally. Now we ALMOST do that: <! is - // missing from the content model. - return NS_OK; + // There was no terminating string, parse this comment as text. + aScanner.SetPosition(lt); + return kNotAComment; } nsresult CCommentToken::ConsumeQuirksComment(nsScanner& aScanner) @@ -1435,140 +1727,6 @@ void CAttributeToken::AppendSourceTo(nsAString& anOutputString){ // anOutputString.AppendLiteral(";"); } -static void AppendNCR(nsSubstring& aString, PRInt32 aNCRValue); -/* - * @param aScanner -- controller of underlying input source - * @param aFlag -- If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities... - * @return error result - * - */ -static -nsresult ConsumeAttributeEntity(nsScannerSharedSubstring& aString, - nsScanner& aScanner, - PRInt32 aFlag) -{ - - nsresult result=NS_OK; - - PRUnichar ch; - result=aScanner.Peek(ch, 1); - - if (NS_SUCCEEDED(result)) { - PRUnichar amp=0; - PRInt32 theNCRValue=0; - nsAutoString entity; - - if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - result=CEntityToken::ConsumeEntity(ch,entity,aScanner); - if (NS_SUCCEEDED(result)) { - theNCRValue = nsHTMLEntities::EntityToUnicode(entity); - PRUnichar theTermChar=entity.Last(); - // If an entity value is greater than 255 then: - // Nav 4.x does not treat it as an entity, - // IE treats it as an entity if terminated with a semicolon. - // Resembling IE!! - - nsSubstring &writable = aString.writable(); - if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) { - // Looks like we're not dealing with an entity - writable.Append(kAmpersand); - writable.Append(entity); - } - else { - // A valid entity so reduce it. - writable.Append(PRUnichar(theNCRValue)); - } - } - } - else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - result=CEntityToken::ConsumeEntity(ch,entity,aScanner); - if (NS_SUCCEEDED(result)) { - nsSubstring &writable = aString.writable(); - if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) { - // Looked like an entity but it's not - aScanner.GetChar(amp); - writable.Append(amp); - result = NS_OK; // just being safe.. - } - else { - PRInt32 err; - theNCRValue=entity.ToInteger(&err,kAutoDetect); - AppendNCR(writable, theNCRValue); - } - } - } - else { - // What we thought as entity is not really an entity... - aScanner.GetChar(amp); - aString.writable().Append(amp); - }//if - } - - return result; -} - -/* - * This general purpose method is used when you want to - * consume attributed text value. - * Note: It also reduces entities within attributes. - * - * @param aNewlineCount -- the newline count to increment when hitting newlines - * @param aScanner -- controller of underlying input source - * @param aTerminalChars -- characters that stop consuming attribute. - * @param aAllowNewlines -- whether to allow newlines in the value. - * XXX it would be nice to roll this info into - * aTerminalChars somehow.... - * @param aFlag - contains information such as |dtd mode|view mode|doctype|etc... - * @return error result - */ -static -nsresult ConsumeAttributeValueText(nsScannerSharedSubstring& aString, - PRInt32& aNewlineCount, - nsScanner& aScanner, - const nsReadEndCondition& aEndCondition, - PRBool aAllowNewlines, - PRInt32 aFlag) -{ - nsresult result = NS_OK; - PRBool done = PR_FALSE; - - do { - result = aScanner.ReadUntil(aString,aEndCondition,PR_FALSE); - if(NS_SUCCEEDED(result)) { - PRUnichar ch; - aScanner.Peek(ch); - if(ch == kAmpersand) { - result = ConsumeAttributeEntity(aString,aScanner,aFlag); - } - else if(ch == kCR && aAllowNewlines) { - aScanner.GetChar(ch); - result = aScanner.Peek(ch); - if (NS_SUCCEEDED(result)) { - nsSubstring &writable = aString.writable(); - if(ch == kNewLine) { - writable.AppendLiteral("\r\n"); - aScanner.GetChar(ch); - } - else { - writable.Append(PRUnichar('\r')); - } - ++aNewlineCount; - } - } - else if(ch == kNewLine && aAllowNewlines) { - aScanner.GetChar(ch); - aString.writable().Append(PRUnichar('\n')); - ++aNewlineCount; - } - else { - done = PR_TRUE; - } - } - } while (NS_SUCCEEDED(result) && !done); - - return result; -} - /* * This general purpose method is used when you want to * consume a known quoted string. @@ -1609,8 +1767,8 @@ nsresult ConsumeQuotedString(PRUnichar aChar, nsScannerIterator theOffset; aScanner.CurrentPosition(theOffset); - result=ConsumeAttributeValueText(aString,aNewlineCount,aScanner, - *terminateCondition,PR_TRUE,aFlag); + result=ConsumeUntil(aString,aNewlineCount,aScanner, + *terminateCondition,PR_TRUE,aFlag); if(NS_SUCCEEDED(result)) { result = aScanner.GetChar(aChar); // aChar should be " or ' @@ -1625,8 +1783,8 @@ nsresult ConsumeQuotedString(PRUnichar aChar, theAttributeTerminator(kAttributeTerminalChars); aString.writable().Truncate(origLen); aScanner.SetPosition(theOffset, PR_FALSE, PR_TRUE); - result=ConsumeAttributeValueText(aString,aNewlineCount,aScanner, - theAttributeTerminator,PR_FALSE,aFlag); + result=ConsumeUntil(aString,aNewlineCount,aScanner, + theAttributeTerminator,PR_FALSE,aFlag); if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { // Remember that this string literal was unterminated. result = NS_ERROR_HTMLPARSER_UNTERMINATEDSTRINGLITERAL; @@ -1770,12 +1928,12 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a else { static const nsReadEndCondition theAttributeTerminator(kAttributeTerminalChars); - result=ConsumeAttributeValueText(mTextValue, - mNewlineCount, - aScanner, - theAttributeTerminator, - PR_FALSE, - aFlag); + result=ConsumeUntil(mTextValue, + mNewlineCount, + aScanner, + theAttributeTerminator, + PR_FALSE, + aFlag); } }//if if (NS_OK==result) { diff --git a/parser/htmlparser/src/nsLoggingSink.h b/parser/htmlparser/src/nsLoggingSink.h index db245b2714c..e7cb45ecea4 100644 --- a/parser/htmlparser/src/nsLoggingSink.h +++ b/parser/htmlparser/src/nsLoggingSink.h @@ -90,7 +90,9 @@ public: NS_IMETHOD CloseMap(); NS_IMETHOD OpenFrameset(const nsIParserNode& aNode); NS_IMETHOD CloseFrameset(); - NS_IMETHOD IsEnabled(PRInt32 aTag, PRBool* aReturn) { return NS_OK; } + NS_IMETHOD IsEnabled(PRInt32 aTag, PRBool* aReturn) + /* Take the largest possible feature set. */ + { NS_ENSURE_ARG_POINTER(aReturn); *aReturn = PR_TRUE; return NS_OK; } NS_IMETHOD NotifyTagObservers(nsIParserNode* aNode) { return NS_OK; } NS_IMETHOD_(PRBool) IsFormOnStack() { return PR_FALSE; } diff --git a/parser/htmlparser/src/nsParser.cpp b/parser/htmlparser/src/nsParser.cpp index 2d1bcf28914..556ebedb94c 100644 --- a/parser/htmlparser/src/nsParser.cpp +++ b/parser/htmlparser/src/nsParser.cpp @@ -1294,7 +1294,7 @@ nsParser::WillBuildModel(nsString& aFilename) return rv; nsITokenizer* tokenizer; - mParserContext->GetTokenizer(mParserContext->mDTD->GetType(), tokenizer); + mParserContext->GetTokenizer(mParserContext->mDTD->GetType(), mSink, tokenizer); return mParserContext->mDTD->WillBuildModel(*mParserContext, tokenizer, mSink); } @@ -1355,10 +1355,6 @@ CParserContext* nsParser::PopContext() if (mParserContext->mStreamListenerState != eOnStop) { mParserContext->mStreamListenerState = oldContext->mStreamListenerState; } - // Preserve tokenizer state so that information is not lost - // between document.write. This fixes bug 99467 - if (mParserContext->mTokenizer) - mParserContext->mTokenizer->CopyState(oldContext->mTokenizer); } } return oldContext; @@ -1644,7 +1640,6 @@ nsParser::Parse(nsIInputStream* aStream, return result; } - /** * Call this method if all you want to do is parse 1 string full of HTML text. * In particular, this method should be called by the DOM when it has an HTML @@ -1718,7 +1713,8 @@ nsParser::Parse(const nsAString& aSourceBuffer, } } - pc = new CParserContext(theScanner, aKey, mCommand, 0, theDTD, theStatus, aLastCall); + pc = new CParserContext(theScanner, aKey, mCommand, + 0, theDTD, theStatus, aLastCall); NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY); PushContext(*pc); @@ -2010,7 +2006,7 @@ nsresult nsParser::BuildModel() { nsresult result = NS_OK; if (mParserContext) { PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML; - mParserContext->GetTokenizer(type, theTokenizer); + mParserContext->GetTokenizer(type, mSink, theTokenizer); } if (theTokenizer) { @@ -2047,7 +2043,7 @@ nsresult nsParser::GetTokenizer(nsITokenizer*& aTokenizer) { aTokenizer = nsnull; if(mParserContext) { PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML; - result = mParserContext->GetTokenizer(type, aTokenizer); + result = mParserContext->GetTokenizer(type, mSink, aTokenizer); } return result; } @@ -2658,7 +2654,7 @@ PRBool nsParser::WillTokenize(PRBool aIsFinalChunk){ nsresult result = NS_OK; if (mParserContext) { PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML; - mParserContext->GetTokenizer(type, theTokenizer); + mParserContext->GetTokenizer(type, mSink, theTokenizer); } if (theTokenizer) { @@ -2684,7 +2680,7 @@ nsresult nsParser::Tokenize(PRBool aIsFinalChunk){ if (mParserContext) { PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML; - mParserContext->GetTokenizer(type, theTokenizer); + mParserContext->GetTokenizer(type, mSink, theTokenizer); } if (theTokenizer) { @@ -2755,7 +2751,7 @@ PRBool nsParser::DidTokenize(PRBool aIsFinalChunk){ nsresult rv = NS_OK; if (mParserContext) { PRInt32 type = mParserContext->mDTD ? mParserContext->mDTD->GetType() : NS_IPARSER_FLAG_HTML; - mParserContext->GetTokenizer(type, theTokenizer); + mParserContext->GetTokenizer(type, mSink, theTokenizer); } if (NS_SUCCEEDED(rv) && theTokenizer) {