From cf71c8e54e531f1cc83393f335947c777413fb4a Mon Sep 17 00:00:00 2001 From: "rickg%netscape.com" Date: Thu, 21 Sep 2000 02:58:12 +0000 Subject: [PATCH] fixed nsbeta3+PDT[n] bugs: 48351, 49278, 50070, 52478. Partial fix (WIP) for 49678 and 52725. R=attinasi, buster, syd. A=buster --- htmlparser/src/CNavDTD.cpp | 26 +++++----- htmlparser/src/CRtfDTD.cpp | 4 +- htmlparser/src/nsElementTable.cpp | 6 +-- htmlparser/src/nsHTMLTokenizer.cpp | 4 +- htmlparser/src/nsHTMLTokens.cpp | 3 +- htmlparser/src/nsHTMLTokens.h | 2 +- htmlparser/src/nsParser.cpp | 63 ++++++++++++++--------- htmlparser/src/nsParserNode.cpp | 6 +-- parser/htmlparser/src/CNavDTD.cpp | 26 +++++----- parser/htmlparser/src/CRtfDTD.cpp | 4 +- parser/htmlparser/src/nsElementTable.cpp | 6 +-- parser/htmlparser/src/nsHTMLTokenizer.cpp | 4 +- parser/htmlparser/src/nsHTMLTokens.cpp | 3 +- parser/htmlparser/src/nsHTMLTokens.h | 2 +- parser/htmlparser/src/nsParser.cpp | 63 ++++++++++++++--------- parser/htmlparser/src/nsParserNode.cpp | 6 +-- 16 files changed, 134 insertions(+), 94 deletions(-) diff --git a/htmlparser/src/CNavDTD.cpp b/htmlparser/src/CNavDTD.cpp index 12c2bd4fa1a4..76e17c50cb6c 100644 --- a/htmlparser/src/CNavDTD.cpp +++ b/htmlparser/src/CNavDTD.cpp @@ -336,15 +336,7 @@ eAutoDetectResult CNavDTD::CanParse(CParserContext& aParserContext,nsString& aBu } else { if(PR_TRUE==aParserContext.mMimeType.EqualsWithConversion(kHTMLTextContentType)) { - switch(aParserContext.mDTDMode) { - case eDTDMode_strict: - case eDTDMode_transitional: - result=eValidDetect; - break; - default: - result=ePrimaryDetect; - break; - } + result=ePrimaryDetect; } else if(PR_TRUE==aParserContext.mMimeType.EqualsWithConversion(kPlainTextContentType)) { result=ePrimaryDetect; @@ -897,6 +889,19 @@ nsresult CNavDTD::DidHandleStartTag(nsCParserNode& aNode,eHTMLTags aChildTag){ break; }//switch + //handle tags by generating a close tag... + //added this to fix bug 48351, which contains XHTML and uses empty tags. + if(nsHTMLElement::IsContainer(aChildTag)) { + CStartToken *theToken=NS_STATIC_CAST(CStartToken*,aNode.mToken); + if(theToken->IsEmpty()){ + + CToken *theEndToken=mTokenAllocator->CreateTokenOfType(eToken_end,aChildTag); + if(theEndToken) { + result=HandleEndToken(theEndToken); + } + } + } + return result; } @@ -2257,9 +2262,6 @@ nsresult CNavDTD::CollectSkippedContent(nsCParserNode& aNode,PRInt32 &aCount) { } else theNextToken->AppendSource(*aNode.mSkippedContent); } - else { - theNextToken->AppendSource(*aNode.mSkippedContent); - } IF_FREE(theNextToken); } diff --git a/htmlparser/src/CRtfDTD.cpp b/htmlparser/src/CRtfDTD.cpp index 1f09e2ed506c..38a6915356b4 100644 --- a/htmlparser/src/CRtfDTD.cpp +++ b/htmlparser/src/CRtfDTD.cpp @@ -995,10 +995,10 @@ nsresult nsRTFTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr) } if(aIID.Equals(kISupportsIID)) { //do IUnknown... - *aInstancePtr = (nsIDTD*)(this); + *aInstancePtr = (nsISupports*)(this); } else if(aIID.Equals(kITokenizerIID)) { //do IParser base class... - *aInstancePtr = (nsIDTD*)(this); + *aInstancePtr = (nsITokenizer*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (nsRTFTokenizer*)(this); diff --git a/htmlparser/src/nsElementTable.cpp b/htmlparser/src/nsElementTable.cpp index 99e724c75df8..8612a6175ab4 100644 --- a/htmlparser/src/nsElementTable.cpp +++ b/htmlparser/src/nsElementTable.cpp @@ -949,7 +949,7 @@ void InitializeElementTable(void) { /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPreformatted, (kSelf|kFlowEntity), kNone, //I'm allowing WAY too much in here. Spec says inline. + /*parent,incl,exclgroups*/ kInlineEntity|kPreformatted, (kSelf|kFlowEntity), kNone, //I'm allowing WAY too much in here. Spec says inline. /*special props, prop-range*/ 0, kDefaultPropRange, /*special parents,kids,skip*/ 0,&gPreKids,eHTMLTag_unknown); @@ -1238,8 +1238,8 @@ void InitializeElementTable(void) { /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPreformatted, kNone, kNone, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, + /*parent,incl,exclgroups*/ kInlineEntity|kPreformatted, kNone, kNone, + /*special props, prop-range*/ kNone,kDefaultPropRange, /*special parents,kids,skip*/ 0,0,eHTMLTag_xmp); Initialize( diff --git a/htmlparser/src/nsHTMLTokenizer.cpp b/htmlparser/src/nsHTMLTokenizer.cpp index 07353b3c82dd..21f285b167ba 100644 --- a/htmlparser/src/nsHTMLTokenizer.cpp +++ b/htmlparser/src/nsHTMLTokenizer.cpp @@ -59,10 +59,10 @@ nsresult nsHTMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr) } if(aIID.Equals(kISupportsIID)) { //do IUnknown... - *aInstancePtr = (nsIDTD*)(this); + *aInstancePtr = (nsISupports*)(this); } else if(aIID.Equals(kITokenizerIID)) { //do IParser base class... - *aInstancePtr = (nsIDTD*)(this); + *aInstancePtr = (nsITokenizer*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (nsHTMLTokenizer*)(this); diff --git a/htmlparser/src/nsHTMLTokens.cpp b/htmlparser/src/nsHTMLTokens.cpp index f57db666ecec..5b7d75a5944e 100644 --- a/htmlparser/src/nsHTMLTokens.cpp +++ b/htmlparser/src/nsHTMLTokens.cpp @@ -639,7 +639,8 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann theTermStrPos=kNotFound; tempOffset=theBuffer.FindChar(kGreaterThan,PR_TRUE,tempOffset); if(tempOffset>-1) { - theTermStrPos=theBuffer.RFind(aTerminalString,PR_TRUE,tempOffset,termStrLen+2); + //theTermStrPos=theBuffer.RFind(aTerminalString,PR_TRUE,tempOffset,termStrLen+2); + theTermStrPos=theBuffer.RFind(aTerminalString,PR_TRUE,tempOffset,tempOffset-(theCurrOffset-2)); //bug43513... if(theTermStrPos>-1) break; tempOffset++; } diff --git a/htmlparser/src/nsHTMLTokens.h b/htmlparser/src/nsHTMLTokens.h index 558cee9ce951..228e0c9dcac9 100644 --- a/htmlparser/src/nsHTMLTokens.h +++ b/htmlparser/src/nsHTMLTokens.h @@ -157,7 +157,7 @@ class CStartToken: public CHTMLToken { PRBool mEmpty; eContainerInfo mContainerInfo; - nsCOMPtr mIDAttributeAtom; + nsCOMPtr mIDAttributeAtom; }; diff --git a/htmlparser/src/nsParser.cpp b/htmlparser/src/nsParser.cpp index 0d193d44750f..222b66e337c9 100644 --- a/htmlparser/src/nsParser.cpp +++ b/htmlparser/src/nsParser.cpp @@ -23,10 +23,10 @@ #define DEBUG_XMLENCODING #define XMLENCODING_PEEKBYTES 64 -//#define TEST_DOCTYPES #define DISABLE_TRANSITIONAL_MODE + #include "nsParser.h" #include "nsIContentSink.h" #include "nsString.h" @@ -106,21 +106,16 @@ public: nsIDTD* theDTD; - NS_NewNavHTMLDTD(&theDTD); //do this as a default HTML DTD... - mDTDDeque.Push(theDTD); - - -#if 1 //to fix bug 50070. - const char* theStrictDTDEnabled="true"; -#else const char* theStrictDTDEnabled=PR_GetEnv("ENABLE_STRICT"); //always false (except rickg's machine) -#endif if(theStrictDTDEnabled) { NS_NewOtherHTMLDTD(&mOtherDTD); //do this as the default DTD for strict documents... mDTDDeque.Push(mOtherDTD); } + NS_NewNavHTMLDTD(&theDTD); //do this as a default HTML DTD... + mDTDDeque.Push(theDTD); + mHasViewSourceDTD=PR_FALSE; mHasRTFDTD=mHasXMLDTD=PR_FALSE; } @@ -211,6 +206,9 @@ void nsParser::FreeSharedObjects(void) { } } + + + static PRBool gDumpContent=PR_FALSE; /** @@ -648,13 +646,17 @@ void DetermineParseMode(nsString& aBuffer,nsDTDMode& aParseMode,eParserDocType& aParseMode=eDTDMode_quirks; aDocType=eHTML3Text; + nsDTDMode thePublicID=eDTDMode_quirks; + nsDTDMode theSystemID=eDTDMode_unknown; + + PRBool theMimeTypeIsHTML=aMimeType.EqualsWithConversion(kHTMLTextContentType); //let's eliminate non-HTML as quickly as possible... PRInt32 theIndex=aBuffer.Find("?XML",PR_TRUE,0,128); if(kNotFound!=theIndex) { aParseMode=eDTDMode_strict; - if(aMimeType.EqualsWithConversion(kHTMLTextContentType)) { + if(theMimeTypeIsHTML) { //this is here to prevent a crash if someone gives us an XML document, //but necko tells us it's a text/html mimetype. aDocType=eHTML4Text; @@ -663,9 +665,10 @@ void DetermineParseMode(nsString& aBuffer,nsDTDMode& aParseMode,eParserDocType& else { if(!aMimeType.EqualsWithConversion(kPlainTextContentType)) { aDocType=eXMLText; + aParseMode=eDTDMode_strict; + theSystemID=thePublicID=eDTDMode_strict; } else aDocType=ePlainText; - return; } } else if(aMimeType.EqualsWithConversion(kPlainTextContentType)) { @@ -687,25 +690,28 @@ void DetermineParseMode(nsString& aBuffer,nsDTDMode& aParseMode,eParserDocType& //now let's see if we have HTML or XHTML... - PRInt32 theLTPos=aBuffer.FindChar(kLessThan); - PRInt32 theGTPos=aBuffer.FindChar(kGreaterThan); + PRInt32 theOffset=0; + PRInt32 theDocTypePos=aBuffer.Find("!DOCTYPE",PR_TRUE,0,500); //find doctype + if(kNotFound!=theDocTypePos){ + theOffset=theDocTypePos-2; + } - if((kNotFound!=theGTPos) && (kNotFound!=theLTPos)) { + PRInt32 theLTPos=aBuffer.FindChar(kLessThan,PR_FALSE,theOffset); + PRInt32 theGTPos=aBuffer.FindChar(kGreaterThan,PR_FALSE,theOffset); + + if((kNotFound!=theGTPos) && (kNotFound!=theLTPos)) { const PRUnichar* theBuffer=aBuffer.GetUnicode(); CWordTokenizer theTokenizer(aBuffer,theLTPos,theGTPos); - PRInt32 theOffset=theTokenizer.GetNextWord(); //try to find ?xml, !doctype, etc... + theOffset=theTokenizer.GetNextWord(); //try to find ?xml, !doctype, etc... - if((kNotFound!=theOffset) && - (0==nsCRT::strncasecmp(theBuffer+theOffset,"!DOCTYPE",theTokenizer.mLength))) { + if((kNotFound!=theOffset) && (kNotFound!=theDocTypePos)) { //Ok -- so assume it's (X)HTML; now figure out the flavor... PRInt32 theIter=0; //prevent infinite loops... PRBool done=PR_FALSE; //use this to quit if we find garbage... PRBool readSystemID=PR_FALSE; - nsDTDMode thePublicID=eDTDMode_quirks; - nsDTDMode theSystemID=eDTDMode_unknown; theOffset=theTokenizer.GetNextWord(); @@ -839,6 +845,13 @@ void DetermineParseMode(nsString& aBuffer,nsDTDMode& aParseMode,eParserDocType& } } + if(eXHTMLText==aDocType) { + aParseMode=eDTDMode_strict; + if(theMimeTypeIsHTML){ + aDocType=eHTML4Text; + } + } + #ifdef DISABLE_TRANSITIONAL_MODE /******************************************************************************************** @@ -852,8 +865,8 @@ void DetermineParseMode(nsString& aBuffer,nsDTDMode& aParseMode,eParserDocType& if(eDTDMode_transitional==aParseMode) { if(eHTML4Text==aDocType) aParseMode=eDTDMode_quirks; - else if(eXHTMLText==aDocType) - aParseMode=eDTDMode_strict; +// else if(eXHTMLText==aDocType) +// aParseMode=eDTDMode_strict; } #endif @@ -1217,6 +1230,7 @@ NS_IMETHODIMP nsParser::CreateCompatibleDTD(nsIDTD** aDTD, } +//#define TEST_DOCTYPES #ifdef TEST_DOCTYPES static const char* doctypes[] = { @@ -1330,10 +1344,12 @@ static const char* doctypes[] = { "", "", "", + "", 0 }; #endif +//////////////////////////////////////////////////////////////////////// /** @@ -1351,6 +1367,7 @@ nsresult nsParser::WillBuildModel(nsString& aFilename){ nsresult result=NS_OK; + #ifdef TEST_DOCTYPES static PRBool tested=PR_FALSE; @@ -2067,8 +2084,8 @@ nsParser::OnStatus(nsIChannel* channel, nsISupports* aContext, * @param * @return error code -- 0 if ok, non-zero if error. */ -nsresult nsParser::OnStartRequest(nsIChannel* channel, nsISupports* aContext) -{ +nsresult nsParser::OnStartRequest(nsIChannel* channel, nsISupports* aContext) { + NS_PRECONDITION((eNone==mParserContext->mStreamListenerState),kBadListenerInit); if (nsnull != mObserver) { diff --git a/htmlparser/src/nsParserNode.cpp b/htmlparser/src/nsParserNode.cpp index bf63124c7ede..777ba4b42f2b 100644 --- a/htmlparser/src/nsParserNode.cpp +++ b/htmlparser/src/nsParserNode.cpp @@ -187,7 +187,7 @@ const nsString& nsCParserNode::GetName() const { * @return string ref of text from internal token */ const nsString& nsCParserNode::GetText() const { - return mToken->GetStringValueXXX(); + return (mToken) ? mToken->GetStringValueXXX() : GetEmptyString(); } /** @@ -229,7 +229,7 @@ void nsCParserNode::SetSkippedContent(nsString& aString) { * @return int value that represents tag type */ PRInt32 nsCParserNode::GetNodeType(void) const{ - return mToken->GetTypeID(); + return (mToken) ? mToken->GetTypeID() : 0; } @@ -242,7 +242,7 @@ PRInt32 nsCParserNode::GetNodeType(void) const{ * @return */ PRInt32 nsCParserNode::GetTokenType(void) const{ - return mToken->GetTokenType(); + return (mToken) ? mToken->GetTokenType() : 0; } diff --git a/parser/htmlparser/src/CNavDTD.cpp b/parser/htmlparser/src/CNavDTD.cpp index 12c2bd4fa1a4..76e17c50cb6c 100644 --- a/parser/htmlparser/src/CNavDTD.cpp +++ b/parser/htmlparser/src/CNavDTD.cpp @@ -336,15 +336,7 @@ eAutoDetectResult CNavDTD::CanParse(CParserContext& aParserContext,nsString& aBu } else { if(PR_TRUE==aParserContext.mMimeType.EqualsWithConversion(kHTMLTextContentType)) { - switch(aParserContext.mDTDMode) { - case eDTDMode_strict: - case eDTDMode_transitional: - result=eValidDetect; - break; - default: - result=ePrimaryDetect; - break; - } + result=ePrimaryDetect; } else if(PR_TRUE==aParserContext.mMimeType.EqualsWithConversion(kPlainTextContentType)) { result=ePrimaryDetect; @@ -897,6 +889,19 @@ nsresult CNavDTD::DidHandleStartTag(nsCParserNode& aNode,eHTMLTags aChildTag){ break; }//switch + //handle tags by generating a close tag... + //added this to fix bug 48351, which contains XHTML and uses empty tags. + if(nsHTMLElement::IsContainer(aChildTag)) { + CStartToken *theToken=NS_STATIC_CAST(CStartToken*,aNode.mToken); + if(theToken->IsEmpty()){ + + CToken *theEndToken=mTokenAllocator->CreateTokenOfType(eToken_end,aChildTag); + if(theEndToken) { + result=HandleEndToken(theEndToken); + } + } + } + return result; } @@ -2257,9 +2262,6 @@ nsresult CNavDTD::CollectSkippedContent(nsCParserNode& aNode,PRInt32 &aCount) { } else theNextToken->AppendSource(*aNode.mSkippedContent); } - else { - theNextToken->AppendSource(*aNode.mSkippedContent); - } IF_FREE(theNextToken); } diff --git a/parser/htmlparser/src/CRtfDTD.cpp b/parser/htmlparser/src/CRtfDTD.cpp index 1f09e2ed506c..38a6915356b4 100644 --- a/parser/htmlparser/src/CRtfDTD.cpp +++ b/parser/htmlparser/src/CRtfDTD.cpp @@ -995,10 +995,10 @@ nsresult nsRTFTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr) } if(aIID.Equals(kISupportsIID)) { //do IUnknown... - *aInstancePtr = (nsIDTD*)(this); + *aInstancePtr = (nsISupports*)(this); } else if(aIID.Equals(kITokenizerIID)) { //do IParser base class... - *aInstancePtr = (nsIDTD*)(this); + *aInstancePtr = (nsITokenizer*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (nsRTFTokenizer*)(this); diff --git a/parser/htmlparser/src/nsElementTable.cpp b/parser/htmlparser/src/nsElementTable.cpp index 99e724c75df8..8612a6175ab4 100644 --- a/parser/htmlparser/src/nsElementTable.cpp +++ b/parser/htmlparser/src/nsElementTable.cpp @@ -949,7 +949,7 @@ void InitializeElementTable(void) { /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPreformatted, (kSelf|kFlowEntity), kNone, //I'm allowing WAY too much in here. Spec says inline. + /*parent,incl,exclgroups*/ kInlineEntity|kPreformatted, (kSelf|kFlowEntity), kNone, //I'm allowing WAY too much in here. Spec says inline. /*special props, prop-range*/ 0, kDefaultPropRange, /*special parents,kids,skip*/ 0,&gPreKids,eHTMLTag_unknown); @@ -1238,8 +1238,8 @@ void InitializeElementTable(void) { /*req-parent excl-parent*/ eHTMLTag_unknown,eHTMLTag_unknown, /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, /*autoclose starttags and endtags*/ 0,0,0,0, - /*parent,incl,exclgroups*/ kPreformatted, kNone, kNone, - /*special props, prop-range*/ kNonContainer,kDefaultPropRange, + /*parent,incl,exclgroups*/ kInlineEntity|kPreformatted, kNone, kNone, + /*special props, prop-range*/ kNone,kDefaultPropRange, /*special parents,kids,skip*/ 0,0,eHTMLTag_xmp); Initialize( diff --git a/parser/htmlparser/src/nsHTMLTokenizer.cpp b/parser/htmlparser/src/nsHTMLTokenizer.cpp index 07353b3c82dd..21f285b167ba 100644 --- a/parser/htmlparser/src/nsHTMLTokenizer.cpp +++ b/parser/htmlparser/src/nsHTMLTokenizer.cpp @@ -59,10 +59,10 @@ nsresult nsHTMLTokenizer::QueryInterface(const nsIID& aIID, void** aInstancePtr) } if(aIID.Equals(kISupportsIID)) { //do IUnknown... - *aInstancePtr = (nsIDTD*)(this); + *aInstancePtr = (nsISupports*)(this); } else if(aIID.Equals(kITokenizerIID)) { //do IParser base class... - *aInstancePtr = (nsIDTD*)(this); + *aInstancePtr = (nsITokenizer*)(this); } else if(aIID.Equals(kClassIID)) { //do this class... *aInstancePtr = (nsHTMLTokenizer*)(this); diff --git a/parser/htmlparser/src/nsHTMLTokens.cpp b/parser/htmlparser/src/nsHTMLTokens.cpp index f57db666ecec..5b7d75a5944e 100644 --- a/parser/htmlparser/src/nsHTMLTokens.cpp +++ b/parser/htmlparser/src/nsHTMLTokens.cpp @@ -639,7 +639,8 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann theTermStrPos=kNotFound; tempOffset=theBuffer.FindChar(kGreaterThan,PR_TRUE,tempOffset); if(tempOffset>-1) { - theTermStrPos=theBuffer.RFind(aTerminalString,PR_TRUE,tempOffset,termStrLen+2); + //theTermStrPos=theBuffer.RFind(aTerminalString,PR_TRUE,tempOffset,termStrLen+2); + theTermStrPos=theBuffer.RFind(aTerminalString,PR_TRUE,tempOffset,tempOffset-(theCurrOffset-2)); //bug43513... if(theTermStrPos>-1) break; tempOffset++; } diff --git a/parser/htmlparser/src/nsHTMLTokens.h b/parser/htmlparser/src/nsHTMLTokens.h index 558cee9ce951..228e0c9dcac9 100644 --- a/parser/htmlparser/src/nsHTMLTokens.h +++ b/parser/htmlparser/src/nsHTMLTokens.h @@ -157,7 +157,7 @@ class CStartToken: public CHTMLToken { PRBool mEmpty; eContainerInfo mContainerInfo; - nsCOMPtr mIDAttributeAtom; + nsCOMPtr mIDAttributeAtom; }; diff --git a/parser/htmlparser/src/nsParser.cpp b/parser/htmlparser/src/nsParser.cpp index 0d193d44750f..222b66e337c9 100644 --- a/parser/htmlparser/src/nsParser.cpp +++ b/parser/htmlparser/src/nsParser.cpp @@ -23,10 +23,10 @@ #define DEBUG_XMLENCODING #define XMLENCODING_PEEKBYTES 64 -//#define TEST_DOCTYPES #define DISABLE_TRANSITIONAL_MODE + #include "nsParser.h" #include "nsIContentSink.h" #include "nsString.h" @@ -106,21 +106,16 @@ public: nsIDTD* theDTD; - NS_NewNavHTMLDTD(&theDTD); //do this as a default HTML DTD... - mDTDDeque.Push(theDTD); - - -#if 1 //to fix bug 50070. - const char* theStrictDTDEnabled="true"; -#else const char* theStrictDTDEnabled=PR_GetEnv("ENABLE_STRICT"); //always false (except rickg's machine) -#endif if(theStrictDTDEnabled) { NS_NewOtherHTMLDTD(&mOtherDTD); //do this as the default DTD for strict documents... mDTDDeque.Push(mOtherDTD); } + NS_NewNavHTMLDTD(&theDTD); //do this as a default HTML DTD... + mDTDDeque.Push(theDTD); + mHasViewSourceDTD=PR_FALSE; mHasRTFDTD=mHasXMLDTD=PR_FALSE; } @@ -211,6 +206,9 @@ void nsParser::FreeSharedObjects(void) { } } + + + static PRBool gDumpContent=PR_FALSE; /** @@ -648,13 +646,17 @@ void DetermineParseMode(nsString& aBuffer,nsDTDMode& aParseMode,eParserDocType& aParseMode=eDTDMode_quirks; aDocType=eHTML3Text; + nsDTDMode thePublicID=eDTDMode_quirks; + nsDTDMode theSystemID=eDTDMode_unknown; + + PRBool theMimeTypeIsHTML=aMimeType.EqualsWithConversion(kHTMLTextContentType); //let's eliminate non-HTML as quickly as possible... PRInt32 theIndex=aBuffer.Find("?XML",PR_TRUE,0,128); if(kNotFound!=theIndex) { aParseMode=eDTDMode_strict; - if(aMimeType.EqualsWithConversion(kHTMLTextContentType)) { + if(theMimeTypeIsHTML) { //this is here to prevent a crash if someone gives us an XML document, //but necko tells us it's a text/html mimetype. aDocType=eHTML4Text; @@ -663,9 +665,10 @@ void DetermineParseMode(nsString& aBuffer,nsDTDMode& aParseMode,eParserDocType& else { if(!aMimeType.EqualsWithConversion(kPlainTextContentType)) { aDocType=eXMLText; + aParseMode=eDTDMode_strict; + theSystemID=thePublicID=eDTDMode_strict; } else aDocType=ePlainText; - return; } } else if(aMimeType.EqualsWithConversion(kPlainTextContentType)) { @@ -687,25 +690,28 @@ void DetermineParseMode(nsString& aBuffer,nsDTDMode& aParseMode,eParserDocType& //now let's see if we have HTML or XHTML... - PRInt32 theLTPos=aBuffer.FindChar(kLessThan); - PRInt32 theGTPos=aBuffer.FindChar(kGreaterThan); + PRInt32 theOffset=0; + PRInt32 theDocTypePos=aBuffer.Find("!DOCTYPE",PR_TRUE,0,500); //find doctype + if(kNotFound!=theDocTypePos){ + theOffset=theDocTypePos-2; + } - if((kNotFound!=theGTPos) && (kNotFound!=theLTPos)) { + PRInt32 theLTPos=aBuffer.FindChar(kLessThan,PR_FALSE,theOffset); + PRInt32 theGTPos=aBuffer.FindChar(kGreaterThan,PR_FALSE,theOffset); + + if((kNotFound!=theGTPos) && (kNotFound!=theLTPos)) { const PRUnichar* theBuffer=aBuffer.GetUnicode(); CWordTokenizer theTokenizer(aBuffer,theLTPos,theGTPos); - PRInt32 theOffset=theTokenizer.GetNextWord(); //try to find ?xml, !doctype, etc... + theOffset=theTokenizer.GetNextWord(); //try to find ?xml, !doctype, etc... - if((kNotFound!=theOffset) && - (0==nsCRT::strncasecmp(theBuffer+theOffset,"!DOCTYPE",theTokenizer.mLength))) { + if((kNotFound!=theOffset) && (kNotFound!=theDocTypePos)) { //Ok -- so assume it's (X)HTML; now figure out the flavor... PRInt32 theIter=0; //prevent infinite loops... PRBool done=PR_FALSE; //use this to quit if we find garbage... PRBool readSystemID=PR_FALSE; - nsDTDMode thePublicID=eDTDMode_quirks; - nsDTDMode theSystemID=eDTDMode_unknown; theOffset=theTokenizer.GetNextWord(); @@ -839,6 +845,13 @@ void DetermineParseMode(nsString& aBuffer,nsDTDMode& aParseMode,eParserDocType& } } + if(eXHTMLText==aDocType) { + aParseMode=eDTDMode_strict; + if(theMimeTypeIsHTML){ + aDocType=eHTML4Text; + } + } + #ifdef DISABLE_TRANSITIONAL_MODE /******************************************************************************************** @@ -852,8 +865,8 @@ void DetermineParseMode(nsString& aBuffer,nsDTDMode& aParseMode,eParserDocType& if(eDTDMode_transitional==aParseMode) { if(eHTML4Text==aDocType) aParseMode=eDTDMode_quirks; - else if(eXHTMLText==aDocType) - aParseMode=eDTDMode_strict; +// else if(eXHTMLText==aDocType) +// aParseMode=eDTDMode_strict; } #endif @@ -1217,6 +1230,7 @@ NS_IMETHODIMP nsParser::CreateCompatibleDTD(nsIDTD** aDTD, } +//#define TEST_DOCTYPES #ifdef TEST_DOCTYPES static const char* doctypes[] = { @@ -1330,10 +1344,12 @@ static const char* doctypes[] = { "", "", "", + "", 0 }; #endif +//////////////////////////////////////////////////////////////////////// /** @@ -1351,6 +1367,7 @@ nsresult nsParser::WillBuildModel(nsString& aFilename){ nsresult result=NS_OK; + #ifdef TEST_DOCTYPES static PRBool tested=PR_FALSE; @@ -2067,8 +2084,8 @@ nsParser::OnStatus(nsIChannel* channel, nsISupports* aContext, * @param * @return error code -- 0 if ok, non-zero if error. */ -nsresult nsParser::OnStartRequest(nsIChannel* channel, nsISupports* aContext) -{ +nsresult nsParser::OnStartRequest(nsIChannel* channel, nsISupports* aContext) { + NS_PRECONDITION((eNone==mParserContext->mStreamListenerState),kBadListenerInit); if (nsnull != mObserver) { diff --git a/parser/htmlparser/src/nsParserNode.cpp b/parser/htmlparser/src/nsParserNode.cpp index bf63124c7ede..777ba4b42f2b 100644 --- a/parser/htmlparser/src/nsParserNode.cpp +++ b/parser/htmlparser/src/nsParserNode.cpp @@ -187,7 +187,7 @@ const nsString& nsCParserNode::GetName() const { * @return string ref of text from internal token */ const nsString& nsCParserNode::GetText() const { - return mToken->GetStringValueXXX(); + return (mToken) ? mToken->GetStringValueXXX() : GetEmptyString(); } /** @@ -229,7 +229,7 @@ void nsCParserNode::SetSkippedContent(nsString& aString) { * @return int value that represents tag type */ PRInt32 nsCParserNode::GetNodeType(void) const{ - return mToken->GetTypeID(); + return (mToken) ? mToken->GetTypeID() : 0; } @@ -242,7 +242,7 @@ PRInt32 nsCParserNode::GetNodeType(void) const{ * @return */ PRInt32 nsCParserNode::GetTokenType(void) const{ - return mToken->GetTokenType(); + return (mToken) ? mToken->GetTokenType() : 0; }