From df72f84c56b9c8e8708b66afe2473dd0d3cb1647 Mon Sep 17 00:00:00 2001 From: rickg Date: Thu, 28 May 1998 07:12:08 +0000 Subject: [PATCH] modest improvements to push, and various bug fixes --- htmlparser/src/CNavDTD.cpp | 10 +--- htmlparser/src/CNavDelegate.cpp | 36 +++++++------- htmlparser/src/nsHTMLParser.cpp | 54 ++++++++++++++++---- htmlparser/src/nsHTMLParser.h | 1 + htmlparser/src/nsHTMLTokens.cpp | 69 ++++++++++++++++++-------- htmlparser/src/nsHTMLTokens.h | 22 ++++---- htmlparser/src/nsParserTypes.h | 1 + htmlparser/src/nsToken.cpp | 11 ++++ htmlparser/src/nsToken.h | 7 +++ parser/htmlparser/src/CNavDTD.cpp | 10 +--- parser/htmlparser/src/CNavDelegate.cpp | 36 +++++++------- parser/htmlparser/src/nsHTMLParser.cpp | 54 ++++++++++++++++---- parser/htmlparser/src/nsHTMLParser.h | 1 + parser/htmlparser/src/nsHTMLTokens.cpp | 69 ++++++++++++++++++-------- parser/htmlparser/src/nsHTMLTokens.h | 22 ++++---- parser/htmlparser/src/nsParserTypes.h | 1 + parser/htmlparser/src/nsToken.cpp | 11 ++++ parser/htmlparser/src/nsToken.h | 7 +++ 18 files changed, 290 insertions(+), 132 deletions(-) diff --git a/htmlparser/src/CNavDTD.cpp b/htmlparser/src/CNavDTD.cpp index 351d853ebf6..898f46a7703 100644 --- a/htmlparser/src/CNavDTD.cpp +++ b/htmlparser/src/CNavDTD.cpp @@ -400,8 +400,7 @@ PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) const { if (eHTMLTag_listitem == aChild) { return PR_FALSE; } - result = PR_TRUE; - break; + result=PRBool(!strchr(gHeadingTags,aChild)); break; case eHTMLTag_listing: result = PR_TRUE; break; @@ -423,8 +422,7 @@ PRBool CNavDTD::CanContain(PRInt32 aParent,PRInt32 aChild) const { case eHTMLTag_ol: case eHTMLTag_ul: // XXX kipp was here - result = PR_TRUE; - break; + result=PRBool(!strchr(gHeadingTags,aChild)); break; case eHTMLTag_noframes: if(eHTMLTag_body==aChild) @@ -786,10 +784,6 @@ PRInt32 CNavDTD::GetDefaultParentTagFor(PRInt32 aTag) const{ case eHTMLTag_col: result=eHTMLTag_colgroup; break; - //These have to do with listings... - case eHTMLTag_listitem: - result=eHTMLTag_ul; break; - case eHTMLTag_dd: case eHTMLTag_dt: result=eHTMLTag_dl; break; diff --git a/htmlparser/src/CNavDelegate.cpp b/htmlparser/src/CNavDelegate.cpp index 14c5cf34e03..3befb9ae52c 100644 --- a/htmlparser/src/CNavDelegate.cpp +++ b/htmlparser/src/CNavDelegate.cpp @@ -162,23 +162,7 @@ PRInt32 CNavDelegate::ConsumeAttributes(PRUnichar aChar,CScanner& aScanner,CToke }//if }//while - //ok, this is a bit complicated, so follow closely. - //Since we're incremental (but pessimistic), it is possible that even though - //we've eaten a few delicious attributes, we can't keep them because - //we couldn't eat all of them (up to an including the close > for this tag). - //Therefore, we need to remove the ones we just created from the tokendeque, - //and destroy them. (They'll get reconsumed on the next incremental pass). - //NOTE: This process can be enhanced later on by adding state to the delegate - // telling us that we're in the attribute consumption phase. - // Remember the mantra: Crawl, Walk, Run! - if(kNoError==result) { - aToken->SetAttributeCount(theAttrCount); - } - else { - while(theAttrCount--) { - delete mTokenDeque.PopBack(); - } - } + aToken->SetAttributeCount(theAttrCount); return result; } @@ -214,9 +198,11 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a * @return new token or null */ PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) { - aToken=new CStartToken(nsAutoString("")); + PRInt32 theDequeSize=mTokenDeque.GetSize(); PRInt32 result=kNoError; + aToken=new CStartToken(nsAutoString("")); + if(aToken) { result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text... if(kNoError==result) { @@ -251,6 +237,20 @@ PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken* } //if } //if } //if + + //EEEEECCCCKKKK!!! + //This code is confusing, so pay attention. + //If you're here, it's because we were in the midst of consuming a start + //tag but ran out of data (not in the stream, but in this *part* of the stream. + //For simplicity, we have to unwind our input. Therefore, we pop and discard + //any new tokens we've cued this round. Later we can get smarter about this. + if(kNoError!=result) { + while(mTokenDeque.GetSize()>theDequeSize) { + delete mTokenDeque.PopBack(); + } + } + + } //if } //if return result; diff --git a/htmlparser/src/nsHTMLParser.cpp b/htmlparser/src/nsHTMLParser.cpp index 73be135c322..4e3f39a7992 100644 --- a/htmlparser/src/nsHTMLParser.cpp +++ b/htmlparser/src/nsHTMLParser.cpp @@ -512,6 +512,7 @@ void GetDelegateAndDTD(eParseMode aMode,ITokenizerDelegate*& aDelegate,nsIDTD*& */ PRInt32 nsHTMLParser::WillBuildModel(void) { mIteration=-1; + mHasSeenOpenTag=PR_FALSE; if(mSink) mSink->WillBuildModel(); return kNoError; @@ -824,7 +825,7 @@ PRInt32 nsHTMLParser::HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag if(PR_FALSE==contains){ result=CreateContextStackFor(aChildTag); - if(PR_FALSE==result) { + if(kNoError!=result) { //if you're here, then the new topmost container can't contain aToken. //You must determine what container hierarchy you need to hold aToken, //and create that on the parsestack. @@ -1096,9 +1097,24 @@ PRInt32 nsHTMLParser::HandleAttributeToken(CToken* aToken) { PRInt32 nsHTMLParser::HandleScriptToken(CToken* aToken) { NS_PRECONDITION(0!=aToken,kNullToken); - CScriptToken* st = (CScriptToken*)(aToken); - PRInt32 result=kNoError; - return result; + CScriptToken* st = (CScriptToken*)(aToken); + + eHTMLTokenTypes subtype=eToken_attribute; + nsDeque& deque=mTokenizer->GetDeque(); + nsDequeIterator end=deque.End(); + + if(*mCurrentPos!=end) { + CHTMLToken* tkn=(CHTMLToken*)(++(*mCurrentPos)); + subtype=eHTMLTokenTypes(tkn->GetTokenType()); + if(eToken_skippedcontent==subtype) { + //WE INTENTIONALLY DROP THE TOKEN ON THE FLOOR! + //LATER, we'll pass this onto the javascript system. + return kNoError; + } + else (*mCurrentPos)--; + } + return kInterrupted; + } /** @@ -1531,6 +1547,7 @@ PRInt32 nsHTMLParser::CreateContextStackFor(PRInt32 aChildTag){ } } //while } //elseif + else result=kCantPropagate; } //elseif //now, build up the stack according to the tags @@ -1539,8 +1556,7 @@ PRInt32 nsHTMLParser::CreateContextStackFor(PRInt32 aChildTag){ nsAutoString empty; int i=0; for(i=pos;iSetHTMLTag((eHTMLTags)theVector[cnt-1-i]); + CStartToken* st=new CStartToken((eHTMLTags)theVector[cnt-1-i]); HandleStartToken(st); } } @@ -1616,19 +1632,39 @@ nsresult nsHTMLParser::OnStartBinding(void){ * * * @update gess 5/12/98 - * @param - * @return + * @param pIStream contains the input chars + * @param length is the number of bytes waiting input + * @return error code (usually 0) */ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){ int len=0; + int offset=0; do { PRInt32 err; len = pIStream->Read(&err, mTransferBuffer, 0, gTransferBufferSize); if(len>0) { + + //Ok -- here's the problem. + //Just because someone throws you some data, doesn't mean that it's + //actually GOOD data. Recently, I encountered a problem where netlib + //was prepending an otherwise valid buffer with a few garbage characters. + //To solve this, I'm adding some debug code here that protects us from + //propagating the bad data upwards. + mTransferBuffer[len]=0; - mTokenizer->Append(mTransferBuffer,len); + if(PR_FALSE==mHasSeenOpenTag) { + for(offset=0;offsetAppend(&mTransferBuffer[offset],len); } } while (len > 0); diff --git a/htmlparser/src/nsHTMLParser.h b/htmlparser/src/nsHTMLParser.h index c3a51b9f75d..776670bb378 100644 --- a/htmlparser/src/nsHTMLParser.h +++ b/htmlparser/src/nsHTMLParser.h @@ -521,6 +521,7 @@ protected: ITokenizerDelegate* mDelegate; PRInt32 mIteration; char* mTransferBuffer; + PRBool mHasSeenOpenTag; }; diff --git a/htmlparser/src/nsHTMLTokens.cpp b/htmlparser/src/nsHTMLTokens.cpp index af6690dda8e..791846a81f8 100644 --- a/htmlparser/src/nsHTMLTokens.cpp +++ b/htmlparser/src/nsHTMLTokens.cpp @@ -37,6 +37,7 @@ static nsString gAlphaChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTU static nsAutoString gDigits("0123456789"); static nsAutoString gWhitespace(" \t\b"); static nsAutoString gOperatorChars("/?.<>[]{}~^+=-!%&*(),|:"); +static const char* gUserdefined = "userdefined"; //debug error messages... static const char* kNullScanner = "Error: Scanner is null."; @@ -99,7 +100,6 @@ struct HTMLTagEntry { eHTMLTags fTagID; }; - // KEEP THIS LIST SORTED! // NOTE: This table is sorted in ascii collating order. If you // add a new entry, make sure you put it in the right spot otherwise @@ -121,6 +121,7 @@ HTMLTagEntry gHTMLTagTable[] = {"CERTIFICATE", eHTMLTag_certificate}, {"CITE", eHTMLTag_cite}, {"CODE", eHTMLTag_code}, {"COL", eHTMLTag_col}, {"COLGROUP", eHTMLTag_colgroup}, + {"COMMENT", eHTMLTag_comment}, {"DD", eHTMLTag_dd}, {"DEL", eHTMLTag_del}, {"DFN", eHTMLTag_dfn}, {"DIR", eHTMLTag_dir}, @@ -193,7 +194,6 @@ HTMLTagEntry gHTMLTagTable[] = {"VAR", eHTMLTag_var}, {"WBR", eHTMLTag_wbr}, {"WS", eHTMLTag_whitespace}, - }; @@ -262,6 +262,17 @@ CHTMLToken::CHTMLToken(const nsString& aName) : CToken(aName) { mTagType=eHTMLTag_unknown; } +/* + * constructor from tag id + * + * @update gess 3/25/98 + * @param + * @return + */ +CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(GetTagName(aTag)) { + mTagType=aTag; +} + /* * * @@ -296,6 +307,17 @@ CStartToken::CStartToken(const nsString& aName) : CHTMLToken(aName) { mAttributed=PR_FALSE; } +/* + * constructor from tag id + * + * @update gess 3/25/98 + * @param + * @return + */ +CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) { + mAttributed=PR_FALSE; +} + /* * default destructor * @@ -1335,17 +1357,15 @@ eHTMLTags DetermineHTMLTagType(const nsString& aString) PRInt32 high=cnt-1; PRInt32 middle=kNotFound; - if (0 != cnt) - while(low<=high) - { - middle=(PRInt32)(low+high)/2; - result=aString.Compare(gHTMLTagTable[middle].fName, PR_TRUE); - if (result==0) - return gHTMLTagTable[middle].fTagID; - if (result<0) - high=middle-1; - else low=middle+1; - } + while(low<=high){ + middle=(PRInt32)(low+high)/2; + result=aString.Compare(gHTMLTagTable[middle].fName, PR_TRUE); + if (result==0) + return gHTMLTagTable[middle].fTagID; + if (result<0) + high=middle-1; + else low=middle+1; + } return eHTMLTag_userdefined; } @@ -1357,17 +1377,24 @@ eHTMLTags DetermineHTMLTagType(const nsString& aString) * @return */ const char* GetTagName(PRInt32 aTag) { - const char* result=0; - PRInt32 cnt=sizeof(gHTMLTagTable)/sizeof(HTMLTagEntry); - - int i=0; - for(i=0;i for this tag). - //Therefore, we need to remove the ones we just created from the tokendeque, - //and destroy them. (They'll get reconsumed on the next incremental pass). - //NOTE: This process can be enhanced later on by adding state to the delegate - // telling us that we're in the attribute consumption phase. - // Remember the mantra: Crawl, Walk, Run! - if(kNoError==result) { - aToken->SetAttributeCount(theAttrCount); - } - else { - while(theAttrCount--) { - delete mTokenDeque.PopBack(); - } - } + aToken->SetAttributeCount(theAttrCount); return result; } @@ -214,9 +198,11 @@ PRInt32 CNavDelegate::ConsumeContentToEndTag(const nsString& aString,PRUnichar a * @return new token or null */ PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken*& aToken) { - aToken=new CStartToken(nsAutoString("")); + PRInt32 theDequeSize=mTokenDeque.GetSize(); PRInt32 result=kNoError; + aToken=new CStartToken(nsAutoString("")); + if(aToken) { result= aToken->Consume(aChar,aScanner); //tell new token to finish consuming text... if(kNoError==result) { @@ -251,6 +237,20 @@ PRInt32 CNavDelegate::ConsumeStartTag(PRUnichar aChar,CScanner& aScanner,CToken* } //if } //if } //if + + //EEEEECCCCKKKK!!! + //This code is confusing, so pay attention. + //If you're here, it's because we were in the midst of consuming a start + //tag but ran out of data (not in the stream, but in this *part* of the stream. + //For simplicity, we have to unwind our input. Therefore, we pop and discard + //any new tokens we've cued this round. Later we can get smarter about this. + if(kNoError!=result) { + while(mTokenDeque.GetSize()>theDequeSize) { + delete mTokenDeque.PopBack(); + } + } + + } //if } //if return result; diff --git a/parser/htmlparser/src/nsHTMLParser.cpp b/parser/htmlparser/src/nsHTMLParser.cpp index 73be135c322..4e3f39a7992 100644 --- a/parser/htmlparser/src/nsHTMLParser.cpp +++ b/parser/htmlparser/src/nsHTMLParser.cpp @@ -512,6 +512,7 @@ void GetDelegateAndDTD(eParseMode aMode,ITokenizerDelegate*& aDelegate,nsIDTD*& */ PRInt32 nsHTMLParser::WillBuildModel(void) { mIteration=-1; + mHasSeenOpenTag=PR_FALSE; if(mSink) mSink->WillBuildModel(); return kNoError; @@ -824,7 +825,7 @@ PRInt32 nsHTMLParser::HandleDefaultStartToken(CToken* aToken,eHTMLTags aChildTag if(PR_FALSE==contains){ result=CreateContextStackFor(aChildTag); - if(PR_FALSE==result) { + if(kNoError!=result) { //if you're here, then the new topmost container can't contain aToken. //You must determine what container hierarchy you need to hold aToken, //and create that on the parsestack. @@ -1096,9 +1097,24 @@ PRInt32 nsHTMLParser::HandleAttributeToken(CToken* aToken) { PRInt32 nsHTMLParser::HandleScriptToken(CToken* aToken) { NS_PRECONDITION(0!=aToken,kNullToken); - CScriptToken* st = (CScriptToken*)(aToken); - PRInt32 result=kNoError; - return result; + CScriptToken* st = (CScriptToken*)(aToken); + + eHTMLTokenTypes subtype=eToken_attribute; + nsDeque& deque=mTokenizer->GetDeque(); + nsDequeIterator end=deque.End(); + + if(*mCurrentPos!=end) { + CHTMLToken* tkn=(CHTMLToken*)(++(*mCurrentPos)); + subtype=eHTMLTokenTypes(tkn->GetTokenType()); + if(eToken_skippedcontent==subtype) { + //WE INTENTIONALLY DROP THE TOKEN ON THE FLOOR! + //LATER, we'll pass this onto the javascript system. + return kNoError; + } + else (*mCurrentPos)--; + } + return kInterrupted; + } /** @@ -1531,6 +1547,7 @@ PRInt32 nsHTMLParser::CreateContextStackFor(PRInt32 aChildTag){ } } //while } //elseif + else result=kCantPropagate; } //elseif //now, build up the stack according to the tags @@ -1539,8 +1556,7 @@ PRInt32 nsHTMLParser::CreateContextStackFor(PRInt32 aChildTag){ nsAutoString empty; int i=0; for(i=pos;iSetHTMLTag((eHTMLTags)theVector[cnt-1-i]); + CStartToken* st=new CStartToken((eHTMLTags)theVector[cnt-1-i]); HandleStartToken(st); } } @@ -1616,19 +1632,39 @@ nsresult nsHTMLParser::OnStartBinding(void){ * * * @update gess 5/12/98 - * @param - * @return + * @param pIStream contains the input chars + * @param length is the number of bytes waiting input + * @return error code (usually 0) */ nsresult nsHTMLParser::OnDataAvailable(nsIInputStream *pIStream, PRInt32 length){ int len=0; + int offset=0; do { PRInt32 err; len = pIStream->Read(&err, mTransferBuffer, 0, gTransferBufferSize); if(len>0) { + + //Ok -- here's the problem. + //Just because someone throws you some data, doesn't mean that it's + //actually GOOD data. Recently, I encountered a problem where netlib + //was prepending an otherwise valid buffer with a few garbage characters. + //To solve this, I'm adding some debug code here that protects us from + //propagating the bad data upwards. + mTransferBuffer[len]=0; - mTokenizer->Append(mTransferBuffer,len); + if(PR_FALSE==mHasSeenOpenTag) { + for(offset=0;offsetAppend(&mTransferBuffer[offset],len); } } while (len > 0); diff --git a/parser/htmlparser/src/nsHTMLParser.h b/parser/htmlparser/src/nsHTMLParser.h index c3a51b9f75d..776670bb378 100644 --- a/parser/htmlparser/src/nsHTMLParser.h +++ b/parser/htmlparser/src/nsHTMLParser.h @@ -521,6 +521,7 @@ protected: ITokenizerDelegate* mDelegate; PRInt32 mIteration; char* mTransferBuffer; + PRBool mHasSeenOpenTag; }; diff --git a/parser/htmlparser/src/nsHTMLTokens.cpp b/parser/htmlparser/src/nsHTMLTokens.cpp index af6690dda8e..791846a81f8 100644 --- a/parser/htmlparser/src/nsHTMLTokens.cpp +++ b/parser/htmlparser/src/nsHTMLTokens.cpp @@ -37,6 +37,7 @@ static nsString gAlphaChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTU static nsAutoString gDigits("0123456789"); static nsAutoString gWhitespace(" \t\b"); static nsAutoString gOperatorChars("/?.<>[]{}~^+=-!%&*(),|:"); +static const char* gUserdefined = "userdefined"; //debug error messages... static const char* kNullScanner = "Error: Scanner is null."; @@ -99,7 +100,6 @@ struct HTMLTagEntry { eHTMLTags fTagID; }; - // KEEP THIS LIST SORTED! // NOTE: This table is sorted in ascii collating order. If you // add a new entry, make sure you put it in the right spot otherwise @@ -121,6 +121,7 @@ HTMLTagEntry gHTMLTagTable[] = {"CERTIFICATE", eHTMLTag_certificate}, {"CITE", eHTMLTag_cite}, {"CODE", eHTMLTag_code}, {"COL", eHTMLTag_col}, {"COLGROUP", eHTMLTag_colgroup}, + {"COMMENT", eHTMLTag_comment}, {"DD", eHTMLTag_dd}, {"DEL", eHTMLTag_del}, {"DFN", eHTMLTag_dfn}, {"DIR", eHTMLTag_dir}, @@ -193,7 +194,6 @@ HTMLTagEntry gHTMLTagTable[] = {"VAR", eHTMLTag_var}, {"WBR", eHTMLTag_wbr}, {"WS", eHTMLTag_whitespace}, - }; @@ -262,6 +262,17 @@ CHTMLToken::CHTMLToken(const nsString& aName) : CToken(aName) { mTagType=eHTMLTag_unknown; } +/* + * constructor from tag id + * + * @update gess 3/25/98 + * @param + * @return + */ +CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(GetTagName(aTag)) { + mTagType=aTag; +} + /* * * @@ -296,6 +307,17 @@ CStartToken::CStartToken(const nsString& aName) : CHTMLToken(aName) { mAttributed=PR_FALSE; } +/* + * constructor from tag id + * + * @update gess 3/25/98 + * @param + * @return + */ +CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) { + mAttributed=PR_FALSE; +} + /* * default destructor * @@ -1335,17 +1357,15 @@ eHTMLTags DetermineHTMLTagType(const nsString& aString) PRInt32 high=cnt-1; PRInt32 middle=kNotFound; - if (0 != cnt) - while(low<=high) - { - middle=(PRInt32)(low+high)/2; - result=aString.Compare(gHTMLTagTable[middle].fName, PR_TRUE); - if (result==0) - return gHTMLTagTable[middle].fTagID; - if (result<0) - high=middle-1; - else low=middle+1; - } + while(low<=high){ + middle=(PRInt32)(low+high)/2; + result=aString.Compare(gHTMLTagTable[middle].fName, PR_TRUE); + if (result==0) + return gHTMLTagTable[middle].fTagID; + if (result<0) + high=middle-1; + else low=middle+1; + } return eHTMLTag_userdefined; } @@ -1357,17 +1377,24 @@ eHTMLTags DetermineHTMLTagType(const nsString& aString) * @return */ const char* GetTagName(PRInt32 aTag) { - const char* result=0; - PRInt32 cnt=sizeof(gHTMLTagTable)/sizeof(HTMLTagEntry); - - int i=0; - for(i=0;i