diff --git a/content/base/public/nsContentUtils.h b/content/base/public/nsContentUtils.h index 2cc61f5e04d7..0c04d251c859 100644 --- a/content/base/public/nsContentUtils.h +++ b/content/base/public/nsContentUtils.h @@ -78,7 +78,8 @@ public: static PRUint32 CopyNewlineNormalizedUnicodeTo(const nsAReadableString& aSource, PRUint32 aSrcOffset, PRUnichar* aDest, - PRUint32 aLength); + PRUint32 aLength, + PRBool& aLastCharCR); static PRUint32 CopyNewlineNormalizedUnicodeTo(nsReadingIterator& aSrcStart, const nsReadingIterator& aSrcEnd, nsAWritableString& aDest); diff --git a/content/base/src/nsContentUtils.cpp b/content/base/src/nsContentUtils.cpp index 9f257e7dfdd6..9db32f866293 100644 --- a/content/base/src/nsContentUtils.cpp +++ b/content/base/src/nsContentUtils.cpp @@ -217,8 +217,8 @@ class CopyNormalizeNewlines typedef typename OutputIterator::value_type value_type; public: - CopyNormalizeNewlines(OutputIterator* aDestination) : - mLastCharCR(PR_FALSE), + CopyNormalizeNewlines(OutputIterator* aDestination,PRBool aLastCharCR=PR_FALSE) : + mLastCharCR(aLastCharCR), mDestination(aDestination), mWritten(0) { } @@ -227,6 +227,10 @@ class CopyNormalizeNewlines return mWritten; } + PRBool IsLastCharCR() { + return mLastCharCR; + } + PRUint32 write(const typename OutputIterator::value_type* aSource, PRUint32 aSourceLength) { const typename OutputIterator::value_type* done_writing = aSource + aSourceLength; @@ -274,14 +278,21 @@ class CopyNormalizeNewlines // static PRUint32 -nsContentUtils::CopyNewlineNormalizedUnicodeTo(const nsAReadableString& aSource, PRUint32 aSrcOffset, PRUnichar* aDest, PRUint32 aLength) +nsContentUtils::CopyNewlineNormalizedUnicodeTo(const nsAReadableString& aSource, + PRUint32 aSrcOffset, + PRUnichar* aDest, + PRUint32 aLength, + PRBool& aLastCharCR) { typedef NormalizeNewlinesCharTraits sink_traits; sink_traits dest_traits(aDest); - CopyNormalizeNewlines normalizer(&dest_traits); + CopyNormalizeNewlines normalizer(&dest_traits,aLastCharCR); nsReadingIterator fromBegin, fromEnd; - copy_string(aSource.BeginReading(fromBegin).advance( PRInt32(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( PRInt32(aSrcOffset+aLength) ), normalizer); + copy_string(aSource.BeginReading(fromBegin).advance( PRInt32(aSrcOffset) ), + aSource.BeginReading(fromEnd).advance( PRInt32(aSrcOffset+aLength) ), + normalizer); + aLastCharCR = normalizer.IsLastCharCR(); return normalizer.GetCharsWritten(); } diff --git a/content/html/document/src/nsHTMLContentSink.cpp b/content/html/document/src/nsHTMLContentSink.cpp index c1160eb5238d..a8889e058b9a 100644 --- a/content/html/document/src/nsHTMLContentSink.cpp +++ b/content/html/document/src/nsHTMLContentSink.cpp @@ -2043,6 +2043,7 @@ SinkContext::AddText(const nsAReadableString& aText) // Copy data from string into our buffer; flush buffer when it fills up PRInt32 offset = 0; + PRBool isLastCharCR = PR_FALSE; while (0 != addLen) { PRInt32 amount = mTextSize - mTextLength; if (amount > addLen) { @@ -2058,7 +2059,8 @@ SinkContext::AddText(const nsAReadableString& aText) mTextLength += nsContentUtils::CopyNewlineNormalizedUnicodeTo(aText, offset, &mText[mTextLength], - amount); + amount, + isLastCharCR); offset += amount; addLen -= amount; } @@ -4500,18 +4502,22 @@ HTMLContentSink::ProcessMETATag(const nsIParserNode& aNode) // the preference. if(!mInsideNoXXXTag) { - // set any HTTP-EQUIV data into document's header data as well as url - nsAutoString header; - it->GetAttribute(kNameSpaceID_HTML, nsHTMLAtoms::httpEquiv, header); - if (header.Length() > 0) { - nsAutoString result; - it->GetAttribute(kNameSpaceID_HTML, nsHTMLAtoms::content, result); - if (result.Length() > 0) { - header.ToLowerCase(); - nsCOMPtr fieldAtom(dont_AddRef(NS_NewAtom(header))); - rv=ProcessHeaderData(fieldAtom,result,it); - }//if (result.Length() > 0) - }//if (header.Length() > 0) + // Bug 40072: Don't evaluate METAs after FRAMESET. + if (!mFrameset) { + + // set any HTTP-EQUIV data into document's header data as well as url + nsAutoString header; + it->GetAttribute(kNameSpaceID_HTML, nsHTMLAtoms::httpEquiv, header); + if (header.Length() > 0) { + nsAutoString result; + it->GetAttribute(kNameSpaceID_HTML, nsHTMLAtoms::content, result); + if (result.Length() > 0) { + header.ToLowerCase(); + nsCOMPtr fieldAtom(dont_AddRef(NS_NewAtom(header))); + rv=ProcessHeaderData(fieldAtom,result,it); + }//if (result.Length() > 0) + }//if (header.Length() > 0) + }//if (!mFrameset || !mDocument) }//if(!mInsideNoXXXTag) }//if (NS_OK == rv) }//if (nsnull != parent) diff --git a/content/html/document/src/nsHTMLFragmentContentSink.cpp b/content/html/document/src/nsHTMLFragmentContentSink.cpp index 9f64c9e7ef64..d93c017258ae 100644 --- a/content/html/document/src/nsHTMLFragmentContentSink.cpp +++ b/content/html/document/src/nsHTMLFragmentContentSink.cpp @@ -762,6 +762,7 @@ nsHTMLFragmentContentSink::AddText(const nsAReadableString& aString) // Copy data from string into our buffer; flush buffer when it fills up PRInt32 offset = 0; + PRBool isLastCharCR = PR_FALSE; while (0 != addLen) { PRInt32 amount = mTextSize - mTextLength; if (amount > addLen) { @@ -774,9 +775,11 @@ nsHTMLFragmentContentSink::AddText(const nsAReadableString& aString) } } mTextLength += - nsContentUtils::CopyNewlineNormalizedUnicodeTo(aString, offset, + nsContentUtils::CopyNewlineNormalizedUnicodeTo(aString, + offset, &mText[mTextLength], - amount); + amount, + isLastCharCR); offset += amount; addLen -= amount; } diff --git a/content/xml/document/src/nsXMLContentSink.cpp b/content/xml/document/src/nsXMLContentSink.cpp index bfcd1290f7ff..ef97dd94f130 100644 --- a/content/xml/document/src/nsXMLContentSink.cpp +++ b/content/xml/document/src/nsXMLContentSink.cpp @@ -1446,6 +1446,7 @@ nsXMLContentSink::AddText(const nsAReadableString& aString) // Copy data from string into our buffer; flush buffer when it fills up PRInt32 offset = 0; + PRBool isLastCharCR = PR_FALSE; while (0 != addLen) { PRInt32 amount = mTextSize - mTextLength; if (amount > addLen) { @@ -1467,9 +1468,11 @@ nsXMLContentSink::AddText(const nsAReadableString& aString) } } mTextLength += - nsContentUtils::CopyNewlineNormalizedUnicodeTo(aString, offset, + nsContentUtils::CopyNewlineNormalizedUnicodeTo(aString, + offset, &mText[mTextLength], - amount); + amount, + isLastCharCR); offset += amount; addLen -= amount; } diff --git a/htmlparser/src/nsHTMLTokenizer.cpp b/htmlparser/src/nsHTMLTokenizer.cpp index b54d09d9dfe6..1ce4fd18b990 100644 --- a/htmlparser/src/nsHTMLTokenizer.cpp +++ b/htmlparser/src/nsHTMLTokenizer.cpp @@ -752,14 +752,12 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan //if((eHTMLTag_style==theTag) || (eHTMLTag_script==theTag)) { if(gHTMLElements[theTag].CanContainType(kCDATA)) { - nsAutoString endText, endTagName; + nsAutoString endTagName; endTagName.AssignWithConversion(nsHTMLTags::GetStringValue(theTag)); - endText.Assign(endTagName); - endText.InsertWithConversion("CreateTokenOfType(eToken_text,eHTMLTag_text); CTextToken* textToken=NS_STATIC_CAST(CTextToken*,text); - result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endText,mFlags,aFlushTokens); //tell new token to finish consuming text... + result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endTagName,mFlags,aFlushTokens); //tell new token to finish consuming text... // Fix bug 44186 // Support XML like syntax, i.e., @@ -841,35 +839,24 @@ nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanne nsresult result=aScanner.Peek(theChar, 1); nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - if(NS_OK==result) { - if(nsCRT::IsAsciiAlpha(theChar)) { //handle common enity references &xxx; or �. - // Get the "&" - aScanner.GetChar(theChar); - aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity); + if (NS_SUCCEEDED(result)) { + if (nsCRT::IsAsciiAlpha(theChar) || theChar==kHashsign) { + aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity); + result=aToken->Consume(theChar,aScanner,mFlags); - // Get the first entity character - aScanner.GetChar(theChar); - result = aToken->Consume(theChar,aScanner,mFlags); //tell new token to finish consuming text... - } - else if(kHashsign==theChar) { - // Get the "&" - aScanner.GetChar(theChar); - aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity); - - // Get the first numerical entity character - aScanner.GetChar(theChar); - result=aToken->Consume(theChar,aScanner,mFlags); - } - else { - //oops, we're actually looking at plain text... - return ConsumeText(aToken,aScanner); - }//if - if(aToken){ - if(mIsFinalChunk && (kEOF==result)) { - result=NS_OK; //use as much of the entity as you can get. + if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) { + IF_FREE(aToken, mTokenAllocator); + } + else { + if (mIsFinalChunk && result == kEOF) { + result=NS_OK; //use as much of the entity as you can get. + } + AddToken(aToken,result,&mTokenDeque,theAllocator); + return result; } - AddToken(aToken,result,&mTokenDeque,theAllocator); } + // oops, we're actually looking at plain text... + result = ConsumeText(aToken,aScanner); }//if return result; } diff --git a/htmlparser/src/nsHTMLTokens.cpp b/htmlparser/src/nsHTMLTokens.cpp index 6e0ad0a38239..abc244e589ec 100644 --- a/htmlparser/src/nsHTMLTokens.cpp +++ b/htmlparser/src/nsHTMLTokens.cpp @@ -599,12 +599,11 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) * @return error result */ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner, - nsString& aTerminalString,PRInt32 aFlag,PRBool& aFlushTokens){ + nsString& aEndTagName,PRInt32 aFlag,PRBool& aFlushTokens){ nsresult result=NS_OK; nsReadingIterator theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos; PRBool done=PR_FALSE; PRBool theLastIteration=PR_FALSE; - PRInt32 termStrLen=aTerminalString.Length(); aScanner.CurrentPosition(theStartOffset); theCurrOffset = theStartOffset; @@ -627,95 +626,82 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann // 5. If the end of the document is reached and if we still don't have the condition in step 4. then // assume that the prematured terminal string is the actual terminal string and goto step 1. This // will be our last iteration. + nsAutoString theTerminalString(aEndTagName); + theTerminalString.InsertWithConversion(" tempOffset = theCurrOffset; - while(1) { - if (FindCharInReadable(PRUnichar(kGreaterThan), tempOffset, endPos)) { - - // Make a copy of the (presumed) end tag and - // do a case-insensitive comparision - nsAutoString str; + PRBool found = PR_FALSE; + nsReadingIterator gtOffset,ltOffset = theCurrOffset; + while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) && + Distance(ltOffset, endPos) >= termStrLen) { + // Make a copy of the (presumed) end tag and + // do a case-insensitive comparision - nsReadingIterator start(tempOffset), end(tempOffset); - start.advance(-termStrLen); + nsReadingIterator start(ltOffset), end(ltOffset); + end.advance(termStrLen); - CopyUnicodeTo(start, end, str); - - if (str.EqualsIgnoreCase(aTerminalString)) { - theTermStrPos = tempOffset; - theTermStrPos.advance(-termStrLen); - break; - } - tempOffset.advance(1); + if (CaseInsensitiveFindInReadable(theTerminalString,start,end) && + end != endPos && (*end == '>' || *end == ' ' || + *end == '\t' || *end == '\n' || + *end == '\r' || *end == '\b')) { + gtOffset = end; + if (FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) { + found = PR_TRUE; + theTermStrPos = start; } - else { - // Ran out of data and haven't found the terminal string yet. - // Note: If a bogus terminal string is found it would have - // been stored in theAltTermStrPos; Bug: 64576 - theTermStrPos=endPos; - break; // we have reached the end of the document + break; + } + ltOffset.advance(1); + } + + if (found && theTermStrPos != endPos) { + if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) && + !(aFlag & NS_IPARSER_FLAG_TRANSITIONAL_MODE) && + !theLastIteration && !aIgnoreComments) { + nsReadingIterator endComment(ltOffset); + endComment.advance(5); + + if ((theStartCommentPos == endPos) && + FindInReadable(NS_LITERAL_STRING(" between "), + theCurrOffset, terminal)) { + // If you're here it means that we have a bogus terminal string. + // Even though it is bogus, the position of the terminal string + // could be helpful in case we hit the rock bottom. + theAltTermStrPos = theTermStrPos; + + // We did not find '-->' so keep searching for terminal string. + theCurrOffset = theTermStrPos; + theCurrOffset.advance(termStrLen); + continue; + } } } - if (theTermStrPos != endPos) { - if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) && - !(aFlag & NS_IPARSER_FLAG_TRANSITIONAL_MODE) && - !theLastIteration && !aIgnoreComments) { - nsReadingIterator endComment(theCurrOffset); - endComment.advance(5); - if ((theStartCommentPos == endPos) && - FindInReadable(NS_LITERAL_STRING(" between "), - theCurrOffset, terminal)) { - // If you're here it means that we have a bogus terminal string. - - // Even though it is bogus, the position of the terminal string - // could be helpful in case we hit the rock bottom. - theAltTermStrPos = theTermStrPos; - - // We did not find '-->' so keep searching for terminal string. - theCurrOffset = theTermStrPos; - theCurrOffset.advance(termStrLen); - continue; - } - } - } - - disaster=PR_FALSE; - - aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos); - - theTermStrPos.advance(termStrLen+1); - aScanner.SetPosition(theTermStrPos); - - // We found ...permit flushing -> Ref: Bug 22485 - aFlushTokens=PR_TRUE; - done = PR_TRUE; - } - else { - disaster = PR_TRUE; + // Make sure to preserve the end tag's representation in viewsource + if(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { + CopyUnicodeTo(ltOffset.advance(2),gtOffset,aEndTagName); } + + aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos); + aScanner.SetPosition(gtOffset.advance(1)); + + // We found ...permit flushing -> Ref: Bug 22485 + aFlushTokens=PR_TRUE; + done = PR_TRUE; } else { - disaster = PR_TRUE; - } - - - if(disaster) { + // We end up here if: + // a) when the buffer runs out ot data. + // b) when the terminal string is not found. if(!aScanner.IsIncremental()) { if(theAltTermStrPos != endPos) { // If you're here it means..we hit the rock bottom and therefore switch to plan B. @@ -723,15 +709,15 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann theLastIteration = PR_TRUE; } else { - aTerminalString.Cut(0,2); done = PR_TRUE; // Do this to fix Bug. 35456 } } - else + else { result=kEOF; + } } } - return result; + return result; } void CTextToken::CopyTo(nsAWritableString& aStr) @@ -1496,7 +1482,6 @@ nsresult ConsumeAttributeEntity(nsString& aString, nsAutoString entity; if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - aScanner.GetChar(amp); // Get '&' result=CEntityToken::ConsumeEntity(ch,entity,aScanner); if (NS_SUCCEEDED(result)) { theNCRValue = nsHTMLEntities::EntityToUnicode(entity); @@ -1507,7 +1492,7 @@ nsresult ConsumeAttributeEntity(nsString& aString, // Resembling IE!! if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) { // Looks like we're not dealing with an entity - aString.Append(amp); + aString.Append(kAmpersand); aString.Append(entity); } else { @@ -1517,12 +1502,19 @@ nsresult ConsumeAttributeEntity(nsString& aString, } } else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - aScanner.GetChar(amp); // Discard '&' - PRInt32 err; result=CEntityToken::ConsumeEntity(ch,entity,aScanner); if (NS_SUCCEEDED(result)) { - theNCRValue=entity.ToInteger(&err,kAutoDetect); - aString.Append(PRUnichar(theNCRValue)); + if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) { + // Looked like an entity but it's not + aScanner.GetChar(amp); + aString.Append(amp); + result = NS_OK; // just being safe.. + } + else { + PRInt32 err; + theNCRValue=entity.ToInteger(&err,kAutoDetect); + aString.Append(PRUnichar(theNCRValue)); + } } } else { @@ -1580,7 +1572,7 @@ nsresult ConsumeAttributeValueText(nsString& aString, * @return error result */ static -nsresult ConsumeQuottedString(PRUnichar aChar, +nsresult ConsumeQuotedString(PRUnichar aChar, nsString& aString, nsScanner& aScanner, PRInt32 aFlag) @@ -1639,128 +1631,107 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a result = aScanner.SkipWhitespace(); } - if(NS_OK==result) { - result=aScanner.Peek(aChar); - if(NS_OK==result) { - - nsReadingIterator start, end; - if((kHashsign==aChar) || (nsCRT::IsAsciiDigit(aChar))){ - result=aScanner.ReadNumber(start, end); + if (NS_OK==result) { + static const PRUnichar theTerminalsChars[] = + { PRUnichar(' '), PRUnichar('"'), + PRUnichar('='), PRUnichar('\n'), + PRUnichar('\r'), PRUnichar('\t'), + PRUnichar('>'), PRUnichar('\b'), + PRUnichar(0) }; + + nsReadingIterator start, end; + const nsDependentString theTerminals(theTerminalsChars, + sizeof(theTerminalsChars)/sizeof(theTerminalsChars[0]) - 1); + result=aScanner.ReadUntil(start,end,theTerminals,PR_FALSE); + + if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { + aScanner.BindSubstring(mTextKey, start, end); + } + + //now it's time to Consume the (optional) value... + if (NS_OK==result) { + if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { + result = aScanner.ReadWhitespace(start, wsend); + aScanner.BindSubstring(mTextKey, wsstart, wsend); } else { - //If you're here, handle an unquoted key. - static const PRUnichar theTerminalsChars[] = - { PRUnichar('\b'), PRUnichar('\t'), PRUnichar('\n'), PRUnichar('\r'), - PRUnichar(' '), PRUnichar('"'), PRUnichar('='), PRUnichar('>'), - PRUnichar(0) }; - const nsDependentString theTerminals(theTerminalsChars, - sizeof(theTerminalsChars)/sizeof(theTerminalsChars[0]) - 1); - result=aScanner.ReadUntil(start,end,theTerminals,PR_FALSE); - } - if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - aScanner.BindSubstring(mTextKey, start, end); + result = aScanner.SkipWhitespace(); } - //now it's time to Consume the (optional) value... - if(NS_OK==result) { + if (NS_OK==result) { + result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'... + if (NS_OK==result) { + if (kEqual==aChar){ + result=aScanner.GetChar(aChar); //skip the equal sign... + if (NS_OK==result) { + if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { + result = aScanner.ReadWhitespace(mTextValue); + } + else { + result = aScanner.SkipWhitespace(); + } - if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { - result = aScanner.ReadWhitespace(start, wsend); - aScanner.BindSubstring(mTextKey, wsstart, wsend); - } - else { - result = aScanner.SkipWhitespace(); - } - - if(NS_OK==result) { - result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'... - if(NS_OK==result) { - if(kEqual==aChar){ - result=aScanner.GetChar(aChar); //skip the equal sign... - if(NS_OK==result) { - - if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { - result = aScanner.ReadWhitespace(mTextValue); - } - else { - result = aScanner.SkipWhitespace(); - } - - if(NS_OK==result) { - result=aScanner.Peek(aChar); //and grab the next char. - if(NS_OK==result) { - if((kQuote==aChar) || (kApostrophe==aChar)) { - aScanner.GetChar(aChar); - result=ConsumeQuottedString(aChar,mTextValue,aScanner,aFlag); - if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - mTextValue.Insert(aChar,0); - mTextValue.Append(aChar); - } - // According to spec. we ( who? ) should ignore linefeeds. But look, - // even the carriage return was getting stripped ( wonder why! ) - - // Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds, - // bug then what about bug 47535 ? Should we preserve everything then? - // Well, let's make it so! Commenting out the next two lines.. - /*if(!aRetain) - mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds... - */ - } - else if(kGreaterThan==aChar){ - mHasEqualWithoutValue=PR_TRUE; - } - else if(kAmpersand==aChar) { - // XXX - Discard script entity for now....except in - // view-source - aScanner.GetChar(aChar); - PRBool discard=!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE); + if (NS_OK==result) { + result=aScanner.Peek(aChar); //and grab the next char. + if (NS_OK==result) { + if ((kQuote==aChar) || (kApostrophe==aChar)) { + aScanner.GetChar(aChar); + result=ConsumeQuotedString(aChar,mTextValue,aScanner,aFlag); + if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { + mTextValue.Insert(aChar,0); mTextValue.Append(aChar); - result=aScanner.GetChar(aChar); - if(NS_OK==result) { - mTextValue.Append(aChar); - result=CEntityToken::ConsumeEntity(aChar,mTextValue,aScanner); - } - if(discard) mTextValue.Truncate(); - } - else { - aScanner.GetChar(aChar); - mTextValue.Append(aChar); //it's an alphanum attribute... - result=ConsumeAttributeValueText(mTextValue,aScanner,kAttributeTerminalChars,aFlag); - } - }//if - if(NS_OK==result) { - if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { - result = aScanner.ReadWhitespace(mTextValue); - } - else { - result = aScanner.SkipWhitespace(); } + // According to spec. we ( who? ) should ignore linefeeds. But look, + // even the carriage return was getting stripped ( wonder why! ) - + // Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds, + // bug then what about bug 47535 ? Should we preserve everything then? + // Well, let's make it so! Commenting out the next two lines.. + /*if(!aRetain) + mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds... + */ } + else if (kGreaterThan==aChar){ + mHasEqualWithoutValue=PR_TRUE; + } + else { + result=ConsumeAttributeValueText(mTextValue, + aScanner, + kAttributeTerminalChars, + aFlag); + } }//if + if (NS_OK==result) { + if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { + result = aScanner.ReadWhitespace(mTextValue); + } + else { + result = aScanner.SkipWhitespace(); + } + } }//if }//if - else { - //This is where we have to handle fairly busted content. - //If you're here, it means we saw an attribute name, but couldn't find - //the following equal sign. REALLY ugly. - //My best guess is to grab the next non-ws char. We know it's not '=', - //so let's see what it is. If it's a '"', then assume we're reading - //from the middle of the value. Try stripping the quote and continuing... - - if(kQuote==aChar){ - result=aScanner.SkipOver(aChar); //strip quote. - } - } }//if - } //if - }//if (consume optional value) + else { + //This is where we have to handle fairly busted content. + //If you're here, it means we saw an attribute name, but couldn't find + //the following equal sign. REALLY ugly. + //My best guess is to grab the next non-ws char. We know it's not '=', + //so let's see what it is. If it's a '"', then assume we're reading + //from the middle of the value. Try stripping the quote and continuing... + if (kQuote==aChar){ + result=aScanner.SkipOver(aChar); //strip quote. + } + } + }//if + } //if + }//if (consume optional value) - if(NS_OK==result) { - result=aScanner.Peek(aChar); - mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result)); - } - } //if + if (NS_OK==result) { + result=aScanner.Peek(aChar); + mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result)); + } }//if return result; } @@ -1900,8 +1871,6 @@ CEntityToken::CEntityToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag * @return error result */ nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - if(aChar) - mTextValue.Assign(aChar); nsresult result=ConsumeEntity(aChar,mTextValue,aScanner); return result; } @@ -1939,52 +1908,80 @@ PRInt32 CEntityToken::GetTokenType(void) { * @param aScanner -- controller of underlying input source * @return error result */ -PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){ - PRUnichar theChar=0; - PRInt32 result=aScanner.Peek(theChar); - if(NS_OK==result) { - if(kLeftBrace==aChar) { - //you're consuming a script entity... - PRInt32 rightBraceCount = 0; - PRInt32 leftBraceCount = 1; - while(leftBraceCount!=rightBraceCount) { - result=aScanner.GetChar(aChar); - if(NS_OK!=result) return result; - aString += aChar; - if(aChar==kRightBrace) - rightBraceCount++; - else if(aChar==kLeftBrace) - leftBraceCount++; - } - result=aScanner.ReadUntil(aString,kSemicolon,PR_FALSE); - if(NS_OK==result) { - result=aScanner.GetChar(aChar); // This character should be a semicolon - if(NS_OK==result) aString += aChar; - } - } //if - else { - if(kHashsign==aChar) { - if('X'==(toupper((char)theChar))) { - result=aScanner.GetChar(theChar); - aString+=theChar; - } - if(NS_OK==result){ - result=aScanner.ReadNumber(aString); - } - } - else result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE. - if(NS_OK==result) { - result=aScanner.Peek(theChar); - if(NS_OK==result) { - if (kSemicolon == theChar) { - // consume semicolon that stopped the scan - aString+=theChar; - result=aScanner.GetChar(theChar); - } - } - }//if - } //else +nsresult +CEntityToken::ConsumeEntity(PRUnichar aChar, + nsString& aString, + nsScanner& aScanner) { + nsresult result=NS_OK; + if(kLeftBrace==aChar) { + //you're consuming a script entity... + aScanner.GetChar(aChar); // Consume & + + PRInt32 rightBraceCount = 0; + PRInt32 leftBraceCount = 0; + + do { + result=aScanner.GetChar(aChar); + NS_ENSURE_SUCCESS(result,result); + + aString.Append(aChar); + if(aChar==kRightBrace) + rightBraceCount++; + else if(aChar==kLeftBrace) + leftBraceCount++; + } while(leftBraceCount!=rightBraceCount); } //if + else { + PRUnichar theChar=0; + if (kHashsign==aChar) { + result = aScanner.Peek(theChar,2); + NS_ENSURE_SUCCESS(result,result); + + if (nsCRT::IsAsciiDigit(theChar)) { + aScanner.GetChar(aChar); // Consume & + aScanner.GetChar(aChar); // Consume # + aString.Assign(aChar); + result=aScanner.ReadNumber(aString,10); + } + else if (theChar == 'x' || theChar == 'X') { + aScanner.GetChar(aChar); // Consume & + aScanner.GetChar(aChar); // Consume # + aScanner.GetChar(theChar); // Consume x + aString.Assign(aChar); + aString.Append(theChar); + result=aScanner.ReadNumber(aString,16); + } + else { + return NS_HTMLTOKENS_NOT_AN_ENTITY; + } + } + else { + result = aScanner.Peek(theChar,1); + NS_ENSURE_SUCCESS(result,result); + + if(nsCRT::IsAsciiAlpha(theChar) || + theChar == '_' || + theChar == ':') { + aScanner.GetChar(aChar); // Consume & + result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE. + } + else { + return NS_HTMLTOKENS_NOT_AN_ENTITY; + } + } + } + + NS_ENSURE_SUCCESS(result,result); + + result=aScanner.Peek(aChar); + NS_ENSURE_SUCCESS(result,result); + + if (aChar == kSemicolon) { + // consume semicolon that stopped the scan + aString.Append(aChar); + result=aScanner.GetChar(aChar); + } + return result; } diff --git a/htmlparser/src/nsHTMLTokens.h b/htmlparser/src/nsHTMLTokens.h index 743208a164ea..31edcbe0759b 100644 --- a/htmlparser/src/nsHTMLTokens.h +++ b/htmlparser/src/nsHTMLTokens.h @@ -232,7 +232,7 @@ class CEntityToken : public CHTMLToken { virtual PRInt32 GetTokenType(void); PRInt32 TranslateToUnicodeStr(nsString& aString); virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - static PRInt32 ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner); + static nsresult ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner); static PRInt32 TranslateToUnicodeStr(PRInt32 aValue,nsString& aString); virtual void DebugDumpSource(nsOutputStream& out); virtual const nsAReadableString& GetStringValue(void); @@ -281,7 +281,7 @@ class CTextToken: public CHTMLToken { CTextToken(const nsAReadableString& aString); virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); nsresult ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner, - nsString& aTerminalString,PRInt32 aMode,PRBool& aFlushTokens); + nsString& aEndTagName,PRInt32 aMode,PRBool& aFlushTokens); virtual const char* GetClassName(void); virtual PRInt32 GetTokenType(void); virtual PRInt32 GetTextLength(void); diff --git a/htmlparser/src/nsParser.cpp b/htmlparser/src/nsParser.cpp index 010efdf66c71..e3d5fae1c290 100644 --- a/htmlparser/src/nsParser.cpp +++ b/htmlparser/src/nsParser.cpp @@ -2476,7 +2476,7 @@ nsParser::DetectMetaTag(const char* aBytes, const char* attrEnd; // Find the end of the tag - FindInReadable(NS_LITERAL_CSTRING(">"), tagEnd, end); + FindCharInReadable('>', tagEnd, end); attrEnd = tagEnd.get(); CWordTokenizer tokenizer(attrStart, 0, attrEnd-attrStart); @@ -2517,7 +2517,7 @@ nsParser::DetectMetaTag(const char* aBytes, (nsCRT::strncasecmp(contentStart+offset, kCharsetStr, kCharsetStrLen) == 0)) { // The next word is the charset - if ((offset = contentTokenizer.GetNextWord()) != kNotFound) { + if ((offset = contentTokenizer.GetNextWord(PR_TRUE)) != kNotFound) { aCharset.Assign(NS_ConvertASCIItoUCS2(contentStart+offset, contentTokenizer.GetLength())); } diff --git a/htmlparser/src/nsScanner.cpp b/htmlparser/src/nsScanner.cpp index 65976ce07196..3b93befcb5d0 100644 --- a/htmlparser/src/nsScanner.cpp +++ b/htmlparser/src/nsScanner.cpp @@ -874,43 +874,36 @@ nsresult nsScanner::ReadIdentifier(nsReadingIterator& aStart, } /** - * Consume characters until you find the terminal char + * Consume digits * - * @update gess 3/25/98 - * @param aString receives new data from stream - * @param addTerminal tells us whether to append terminal to aString + * @param aString - should contain digits * @return error code */ -nsresult nsScanner::ReadNumber(nsString& aString) { +nsresult nsScanner::ReadNumber(nsString& aString,PRInt32 aBase) { if (!mSlidingBuffer) { return kEOF; } + NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported"); + PRUnichar theChar=0; nsresult result=Peek(theChar); nsReadingIterator origin, current, end; - PRBool found=PR_FALSE; origin = mCurrentPosition; current = origin; end = mEndPosition; + PRBool done = PR_FALSE; while(current != end) { - theChar=*current; if(theChar) { - found=PR_FALSE; - if(('a'<=theChar) && (theChar<='f')) - found=PR_TRUE; - else if(('A'<=theChar) && (theChar<='F')) - found=PR_TRUE; - else if(('0'<=theChar) && (theChar<='9')) - found=PR_TRUE; - else if('#'==theChar) - found=PR_TRUE; - - if(!found) { + done = (theChar < '0' || theChar > '9') && + ((aBase == 16)? (theChar < 'A' || theChar > 'F') && + (theChar < 'a' || theChar > 'f') + :PR_TRUE); + if(done) { AppendUnicodeTo(origin, current, aString); break; } @@ -930,36 +923,32 @@ nsresult nsScanner::ReadNumber(nsString& aString) { } nsresult nsScanner::ReadNumber(nsReadingIterator& aStart, - nsReadingIterator& aEnd) { + nsReadingIterator& aEnd, + PRInt32 aBase) { if (!mSlidingBuffer) { return kEOF; } + NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported"); + PRUnichar theChar=0; nsresult result=Peek(theChar); nsReadingIterator origin, current, end; - PRBool found=PR_FALSE; origin = mCurrentPosition; current = origin; end = mEndPosition; + PRBool done = PR_FALSE; while(current != end) { - theChar=*current; if(theChar) { - found=PR_FALSE; - if(('a'<=theChar) && (theChar<='f')) - found=PR_TRUE; - else if(('A'<=theChar) && (theChar<='F')) - found=PR_TRUE; - else if(('0'<=theChar) && (theChar<='9')) - found=PR_TRUE; - else if('#'==theChar) - found=PR_TRUE; - - if(!found) { + done = (theChar < '0' || theChar > '9') && + ((aBase == 16)? (theChar < 'A' || theChar > 'F') && + (theChar < 'a' || theChar > 'f') + :PR_TRUE); + if(done) { aStart = origin; aEnd = current; break; diff --git a/htmlparser/src/nsScanner.h b/htmlparser/src/nsScanner.h index 64f6f174102a..bfb70c7ae025 100644 --- a/htmlparser/src/nsScanner.h +++ b/htmlparser/src/nsScanner.h @@ -184,9 +184,10 @@ class nsScanner { nsresult ReadIdentifier(nsReadingIterator& aStart, nsReadingIterator& aEnd, PRBool allowPunct=PR_FALSE); - nsresult ReadNumber(nsString& aString); + nsresult ReadNumber(nsString& aString,PRInt32 aBase); nsresult ReadNumber(nsReadingIterator& aStart, - nsReadingIterator& aEnd); + nsReadingIterator& aEnd, + PRInt32 aBase); nsresult ReadWhitespace(nsString& aString); nsresult ReadWhitespace(nsReadingIterator& aStart, nsReadingIterator& aEnd); diff --git a/htmlparser/src/nsToken.h b/htmlparser/src/nsToken.h index eb834f983bef..afa249ded9a5 100644 --- a/htmlparser/src/nsToken.h +++ b/htmlparser/src/nsToken.h @@ -59,6 +59,9 @@ #include "nsFileSpec.h" #include "nsFixedSizeAllocator.h" +#define NS_HTMLTOKENS_NOT_AN_ENTITY \ + NS_ERROR_GENERATE_SUCCESS(NS_ERROR_MODULE_HTMLPARSER,2000) + class nsScanner; class nsTokenAllocator; diff --git a/parser/htmlparser/src/nsHTMLTokenizer.cpp b/parser/htmlparser/src/nsHTMLTokenizer.cpp index b54d09d9dfe6..1ce4fd18b990 100644 --- a/parser/htmlparser/src/nsHTMLTokenizer.cpp +++ b/parser/htmlparser/src/nsHTMLTokenizer.cpp @@ -752,14 +752,12 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan //if((eHTMLTag_style==theTag) || (eHTMLTag_script==theTag)) { if(gHTMLElements[theTag].CanContainType(kCDATA)) { - nsAutoString endText, endTagName; + nsAutoString endTagName; endTagName.AssignWithConversion(nsHTMLTags::GetStringValue(theTag)); - endText.Assign(endTagName); - endText.InsertWithConversion("CreateTokenOfType(eToken_text,eHTMLTag_text); CTextToken* textToken=NS_STATIC_CAST(CTextToken*,text); - result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endText,mFlags,aFlushTokens); //tell new token to finish consuming text... + result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endTagName,mFlags,aFlushTokens); //tell new token to finish consuming text... // Fix bug 44186 // Support XML like syntax, i.e., @@ -841,35 +839,24 @@ nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanne nsresult result=aScanner.Peek(theChar, 1); nsTokenAllocator* theAllocator=this->GetTokenAllocator(); - if(NS_OK==result) { - if(nsCRT::IsAsciiAlpha(theChar)) { //handle common enity references &xxx; or �. - // Get the "&" - aScanner.GetChar(theChar); - aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity); + if (NS_SUCCEEDED(result)) { + if (nsCRT::IsAsciiAlpha(theChar) || theChar==kHashsign) { + aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity); + result=aToken->Consume(theChar,aScanner,mFlags); - // Get the first entity character - aScanner.GetChar(theChar); - result = aToken->Consume(theChar,aScanner,mFlags); //tell new token to finish consuming text... - } - else if(kHashsign==theChar) { - // Get the "&" - aScanner.GetChar(theChar); - aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity); - - // Get the first numerical entity character - aScanner.GetChar(theChar); - result=aToken->Consume(theChar,aScanner,mFlags); - } - else { - //oops, we're actually looking at plain text... - return ConsumeText(aToken,aScanner); - }//if - if(aToken){ - if(mIsFinalChunk && (kEOF==result)) { - result=NS_OK; //use as much of the entity as you can get. + if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) { + IF_FREE(aToken, mTokenAllocator); + } + else { + if (mIsFinalChunk && result == kEOF) { + result=NS_OK; //use as much of the entity as you can get. + } + AddToken(aToken,result,&mTokenDeque,theAllocator); + return result; } - AddToken(aToken,result,&mTokenDeque,theAllocator); } + // oops, we're actually looking at plain text... + result = ConsumeText(aToken,aScanner); }//if return result; } diff --git a/parser/htmlparser/src/nsHTMLTokens.cpp b/parser/htmlparser/src/nsHTMLTokens.cpp index 6e0ad0a38239..abc244e589ec 100644 --- a/parser/htmlparser/src/nsHTMLTokens.cpp +++ b/parser/htmlparser/src/nsHTMLTokens.cpp @@ -599,12 +599,11 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) * @return error result */ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner, - nsString& aTerminalString,PRInt32 aFlag,PRBool& aFlushTokens){ + nsString& aEndTagName,PRInt32 aFlag,PRBool& aFlushTokens){ nsresult result=NS_OK; nsReadingIterator theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos; PRBool done=PR_FALSE; PRBool theLastIteration=PR_FALSE; - PRInt32 termStrLen=aTerminalString.Length(); aScanner.CurrentPosition(theStartOffset); theCurrOffset = theStartOffset; @@ -627,95 +626,82 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann // 5. If the end of the document is reached and if we still don't have the condition in step 4. then // assume that the prematured terminal string is the actual terminal string and goto step 1. This // will be our last iteration. + nsAutoString theTerminalString(aEndTagName); + theTerminalString.InsertWithConversion(" tempOffset = theCurrOffset; - while(1) { - if (FindCharInReadable(PRUnichar(kGreaterThan), tempOffset, endPos)) { - - // Make a copy of the (presumed) end tag and - // do a case-insensitive comparision - nsAutoString str; + PRBool found = PR_FALSE; + nsReadingIterator gtOffset,ltOffset = theCurrOffset; + while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) && + Distance(ltOffset, endPos) >= termStrLen) { + // Make a copy of the (presumed) end tag and + // do a case-insensitive comparision - nsReadingIterator start(tempOffset), end(tempOffset); - start.advance(-termStrLen); + nsReadingIterator start(ltOffset), end(ltOffset); + end.advance(termStrLen); - CopyUnicodeTo(start, end, str); - - if (str.EqualsIgnoreCase(aTerminalString)) { - theTermStrPos = tempOffset; - theTermStrPos.advance(-termStrLen); - break; - } - tempOffset.advance(1); + if (CaseInsensitiveFindInReadable(theTerminalString,start,end) && + end != endPos && (*end == '>' || *end == ' ' || + *end == '\t' || *end == '\n' || + *end == '\r' || *end == '\b')) { + gtOffset = end; + if (FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) { + found = PR_TRUE; + theTermStrPos = start; } - else { - // Ran out of data and haven't found the terminal string yet. - // Note: If a bogus terminal string is found it would have - // been stored in theAltTermStrPos; Bug: 64576 - theTermStrPos=endPos; - break; // we have reached the end of the document + break; + } + ltOffset.advance(1); + } + + if (found && theTermStrPos != endPos) { + if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) && + !(aFlag & NS_IPARSER_FLAG_TRANSITIONAL_MODE) && + !theLastIteration && !aIgnoreComments) { + nsReadingIterator endComment(ltOffset); + endComment.advance(5); + + if ((theStartCommentPos == endPos) && + FindInReadable(NS_LITERAL_STRING(" between "), + theCurrOffset, terminal)) { + // If you're here it means that we have a bogus terminal string. + // Even though it is bogus, the position of the terminal string + // could be helpful in case we hit the rock bottom. + theAltTermStrPos = theTermStrPos; + + // We did not find '-->' so keep searching for terminal string. + theCurrOffset = theTermStrPos; + theCurrOffset.advance(termStrLen); + continue; + } } } - if (theTermStrPos != endPos) { - if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) && - !(aFlag & NS_IPARSER_FLAG_TRANSITIONAL_MODE) && - !theLastIteration && !aIgnoreComments) { - nsReadingIterator endComment(theCurrOffset); - endComment.advance(5); - if ((theStartCommentPos == endPos) && - FindInReadable(NS_LITERAL_STRING(" between "), - theCurrOffset, terminal)) { - // If you're here it means that we have a bogus terminal string. - - // Even though it is bogus, the position of the terminal string - // could be helpful in case we hit the rock bottom. - theAltTermStrPos = theTermStrPos; - - // We did not find '-->' so keep searching for terminal string. - theCurrOffset = theTermStrPos; - theCurrOffset.advance(termStrLen); - continue; - } - } - } - - disaster=PR_FALSE; - - aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos); - - theTermStrPos.advance(termStrLen+1); - aScanner.SetPosition(theTermStrPos); - - // We found ...permit flushing -> Ref: Bug 22485 - aFlushTokens=PR_TRUE; - done = PR_TRUE; - } - else { - disaster = PR_TRUE; + // Make sure to preserve the end tag's representation in viewsource + if(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { + CopyUnicodeTo(ltOffset.advance(2),gtOffset,aEndTagName); } + + aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos); + aScanner.SetPosition(gtOffset.advance(1)); + + // We found ...permit flushing -> Ref: Bug 22485 + aFlushTokens=PR_TRUE; + done = PR_TRUE; } else { - disaster = PR_TRUE; - } - - - if(disaster) { + // We end up here if: + // a) when the buffer runs out ot data. + // b) when the terminal string is not found. if(!aScanner.IsIncremental()) { if(theAltTermStrPos != endPos) { // If you're here it means..we hit the rock bottom and therefore switch to plan B. @@ -723,15 +709,15 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann theLastIteration = PR_TRUE; } else { - aTerminalString.Cut(0,2); done = PR_TRUE; // Do this to fix Bug. 35456 } } - else + else { result=kEOF; + } } } - return result; + return result; } void CTextToken::CopyTo(nsAWritableString& aStr) @@ -1496,7 +1482,6 @@ nsresult ConsumeAttributeEntity(nsString& aString, nsAutoString entity; if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - aScanner.GetChar(amp); // Get '&' result=CEntityToken::ConsumeEntity(ch,entity,aScanner); if (NS_SUCCEEDED(result)) { theNCRValue = nsHTMLEntities::EntityToUnicode(entity); @@ -1507,7 +1492,7 @@ nsresult ConsumeAttributeEntity(nsString& aString, // Resembling IE!! if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) { // Looks like we're not dealing with an entity - aString.Append(amp); + aString.Append(kAmpersand); aString.Append(entity); } else { @@ -1517,12 +1502,19 @@ nsresult ConsumeAttributeEntity(nsString& aString, } } else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - aScanner.GetChar(amp); // Discard '&' - PRInt32 err; result=CEntityToken::ConsumeEntity(ch,entity,aScanner); if (NS_SUCCEEDED(result)) { - theNCRValue=entity.ToInteger(&err,kAutoDetect); - aString.Append(PRUnichar(theNCRValue)); + if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) { + // Looked like an entity but it's not + aScanner.GetChar(amp); + aString.Append(amp); + result = NS_OK; // just being safe.. + } + else { + PRInt32 err; + theNCRValue=entity.ToInteger(&err,kAutoDetect); + aString.Append(PRUnichar(theNCRValue)); + } } } else { @@ -1580,7 +1572,7 @@ nsresult ConsumeAttributeValueText(nsString& aString, * @return error result */ static -nsresult ConsumeQuottedString(PRUnichar aChar, +nsresult ConsumeQuotedString(PRUnichar aChar, nsString& aString, nsScanner& aScanner, PRInt32 aFlag) @@ -1639,128 +1631,107 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a result = aScanner.SkipWhitespace(); } - if(NS_OK==result) { - result=aScanner.Peek(aChar); - if(NS_OK==result) { - - nsReadingIterator start, end; - if((kHashsign==aChar) || (nsCRT::IsAsciiDigit(aChar))){ - result=aScanner.ReadNumber(start, end); + if (NS_OK==result) { + static const PRUnichar theTerminalsChars[] = + { PRUnichar(' '), PRUnichar('"'), + PRUnichar('='), PRUnichar('\n'), + PRUnichar('\r'), PRUnichar('\t'), + PRUnichar('>'), PRUnichar('\b'), + PRUnichar(0) }; + + nsReadingIterator start, end; + const nsDependentString theTerminals(theTerminalsChars, + sizeof(theTerminalsChars)/sizeof(theTerminalsChars[0]) - 1); + result=aScanner.ReadUntil(start,end,theTerminals,PR_FALSE); + + if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { + aScanner.BindSubstring(mTextKey, start, end); + } + + //now it's time to Consume the (optional) value... + if (NS_OK==result) { + if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { + result = aScanner.ReadWhitespace(start, wsend); + aScanner.BindSubstring(mTextKey, wsstart, wsend); } else { - //If you're here, handle an unquoted key. - static const PRUnichar theTerminalsChars[] = - { PRUnichar('\b'), PRUnichar('\t'), PRUnichar('\n'), PRUnichar('\r'), - PRUnichar(' '), PRUnichar('"'), PRUnichar('='), PRUnichar('>'), - PRUnichar(0) }; - const nsDependentString theTerminals(theTerminalsChars, - sizeof(theTerminalsChars)/sizeof(theTerminalsChars[0]) - 1); - result=aScanner.ReadUntil(start,end,theTerminals,PR_FALSE); - } - if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - aScanner.BindSubstring(mTextKey, start, end); + result = aScanner.SkipWhitespace(); } - //now it's time to Consume the (optional) value... - if(NS_OK==result) { + if (NS_OK==result) { + result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'... + if (NS_OK==result) { + if (kEqual==aChar){ + result=aScanner.GetChar(aChar); //skip the equal sign... + if (NS_OK==result) { + if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { + result = aScanner.ReadWhitespace(mTextValue); + } + else { + result = aScanner.SkipWhitespace(); + } - if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { - result = aScanner.ReadWhitespace(start, wsend); - aScanner.BindSubstring(mTextKey, wsstart, wsend); - } - else { - result = aScanner.SkipWhitespace(); - } - - if(NS_OK==result) { - result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'... - if(NS_OK==result) { - if(kEqual==aChar){ - result=aScanner.GetChar(aChar); //skip the equal sign... - if(NS_OK==result) { - - if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { - result = aScanner.ReadWhitespace(mTextValue); - } - else { - result = aScanner.SkipWhitespace(); - } - - if(NS_OK==result) { - result=aScanner.Peek(aChar); //and grab the next char. - if(NS_OK==result) { - if((kQuote==aChar) || (kApostrophe==aChar)) { - aScanner.GetChar(aChar); - result=ConsumeQuottedString(aChar,mTextValue,aScanner,aFlag); - if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { - mTextValue.Insert(aChar,0); - mTextValue.Append(aChar); - } - // According to spec. we ( who? ) should ignore linefeeds. But look, - // even the carriage return was getting stripped ( wonder why! ) - - // Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds, - // bug then what about bug 47535 ? Should we preserve everything then? - // Well, let's make it so! Commenting out the next two lines.. - /*if(!aRetain) - mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds... - */ - } - else if(kGreaterThan==aChar){ - mHasEqualWithoutValue=PR_TRUE; - } - else if(kAmpersand==aChar) { - // XXX - Discard script entity for now....except in - // view-source - aScanner.GetChar(aChar); - PRBool discard=!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE); + if (NS_OK==result) { + result=aScanner.Peek(aChar); //and grab the next char. + if (NS_OK==result) { + if ((kQuote==aChar) || (kApostrophe==aChar)) { + aScanner.GetChar(aChar); + result=ConsumeQuotedString(aChar,mTextValue,aScanner,aFlag); + if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) { + mTextValue.Insert(aChar,0); mTextValue.Append(aChar); - result=aScanner.GetChar(aChar); - if(NS_OK==result) { - mTextValue.Append(aChar); - result=CEntityToken::ConsumeEntity(aChar,mTextValue,aScanner); - } - if(discard) mTextValue.Truncate(); - } - else { - aScanner.GetChar(aChar); - mTextValue.Append(aChar); //it's an alphanum attribute... - result=ConsumeAttributeValueText(mTextValue,aScanner,kAttributeTerminalChars,aFlag); - } - }//if - if(NS_OK==result) { - if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { - result = aScanner.ReadWhitespace(mTextValue); - } - else { - result = aScanner.SkipWhitespace(); } + // According to spec. we ( who? ) should ignore linefeeds. But look, + // even the carriage return was getting stripped ( wonder why! ) - + // Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds, + // bug then what about bug 47535 ? Should we preserve everything then? + // Well, let's make it so! Commenting out the next two lines.. + /*if(!aRetain) + mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds... + */ } + else if (kGreaterThan==aChar){ + mHasEqualWithoutValue=PR_TRUE; + } + else { + result=ConsumeAttributeValueText(mTextValue, + aScanner, + kAttributeTerminalChars, + aFlag); + } }//if + if (NS_OK==result) { + if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) { + result = aScanner.ReadWhitespace(mTextValue); + } + else { + result = aScanner.SkipWhitespace(); + } + } }//if }//if - else { - //This is where we have to handle fairly busted content. - //If you're here, it means we saw an attribute name, but couldn't find - //the following equal sign. REALLY ugly. - //My best guess is to grab the next non-ws char. We know it's not '=', - //so let's see what it is. If it's a '"', then assume we're reading - //from the middle of the value. Try stripping the quote and continuing... - - if(kQuote==aChar){ - result=aScanner.SkipOver(aChar); //strip quote. - } - } }//if - } //if - }//if (consume optional value) + else { + //This is where we have to handle fairly busted content. + //If you're here, it means we saw an attribute name, but couldn't find + //the following equal sign. REALLY ugly. + //My best guess is to grab the next non-ws char. We know it's not '=', + //so let's see what it is. If it's a '"', then assume we're reading + //from the middle of the value. Try stripping the quote and continuing... + if (kQuote==aChar){ + result=aScanner.SkipOver(aChar); //strip quote. + } + } + }//if + } //if + }//if (consume optional value) - if(NS_OK==result) { - result=aScanner.Peek(aChar); - mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result)); - } - } //if + if (NS_OK==result) { + result=aScanner.Peek(aChar); + mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result)); + } }//if return result; } @@ -1900,8 +1871,6 @@ CEntityToken::CEntityToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag * @return error result */ nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) { - if(aChar) - mTextValue.Assign(aChar); nsresult result=ConsumeEntity(aChar,mTextValue,aScanner); return result; } @@ -1939,52 +1908,80 @@ PRInt32 CEntityToken::GetTokenType(void) { * @param aScanner -- controller of underlying input source * @return error result */ -PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){ - PRUnichar theChar=0; - PRInt32 result=aScanner.Peek(theChar); - if(NS_OK==result) { - if(kLeftBrace==aChar) { - //you're consuming a script entity... - PRInt32 rightBraceCount = 0; - PRInt32 leftBraceCount = 1; - while(leftBraceCount!=rightBraceCount) { - result=aScanner.GetChar(aChar); - if(NS_OK!=result) return result; - aString += aChar; - if(aChar==kRightBrace) - rightBraceCount++; - else if(aChar==kLeftBrace) - leftBraceCount++; - } - result=aScanner.ReadUntil(aString,kSemicolon,PR_FALSE); - if(NS_OK==result) { - result=aScanner.GetChar(aChar); // This character should be a semicolon - if(NS_OK==result) aString += aChar; - } - } //if - else { - if(kHashsign==aChar) { - if('X'==(toupper((char)theChar))) { - result=aScanner.GetChar(theChar); - aString+=theChar; - } - if(NS_OK==result){ - result=aScanner.ReadNumber(aString); - } - } - else result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE. - if(NS_OK==result) { - result=aScanner.Peek(theChar); - if(NS_OK==result) { - if (kSemicolon == theChar) { - // consume semicolon that stopped the scan - aString+=theChar; - result=aScanner.GetChar(theChar); - } - } - }//if - } //else +nsresult +CEntityToken::ConsumeEntity(PRUnichar aChar, + nsString& aString, + nsScanner& aScanner) { + nsresult result=NS_OK; + if(kLeftBrace==aChar) { + //you're consuming a script entity... + aScanner.GetChar(aChar); // Consume & + + PRInt32 rightBraceCount = 0; + PRInt32 leftBraceCount = 0; + + do { + result=aScanner.GetChar(aChar); + NS_ENSURE_SUCCESS(result,result); + + aString.Append(aChar); + if(aChar==kRightBrace) + rightBraceCount++; + else if(aChar==kLeftBrace) + leftBraceCount++; + } while(leftBraceCount!=rightBraceCount); } //if + else { + PRUnichar theChar=0; + if (kHashsign==aChar) { + result = aScanner.Peek(theChar,2); + NS_ENSURE_SUCCESS(result,result); + + if (nsCRT::IsAsciiDigit(theChar)) { + aScanner.GetChar(aChar); // Consume & + aScanner.GetChar(aChar); // Consume # + aString.Assign(aChar); + result=aScanner.ReadNumber(aString,10); + } + else if (theChar == 'x' || theChar == 'X') { + aScanner.GetChar(aChar); // Consume & + aScanner.GetChar(aChar); // Consume # + aScanner.GetChar(theChar); // Consume x + aString.Assign(aChar); + aString.Append(theChar); + result=aScanner.ReadNumber(aString,16); + } + else { + return NS_HTMLTOKENS_NOT_AN_ENTITY; + } + } + else { + result = aScanner.Peek(theChar,1); + NS_ENSURE_SUCCESS(result,result); + + if(nsCRT::IsAsciiAlpha(theChar) || + theChar == '_' || + theChar == ':') { + aScanner.GetChar(aChar); // Consume & + result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE. + } + else { + return NS_HTMLTOKENS_NOT_AN_ENTITY; + } + } + } + + NS_ENSURE_SUCCESS(result,result); + + result=aScanner.Peek(aChar); + NS_ENSURE_SUCCESS(result,result); + + if (aChar == kSemicolon) { + // consume semicolon that stopped the scan + aString.Append(aChar); + result=aScanner.GetChar(aChar); + } + return result; } diff --git a/parser/htmlparser/src/nsHTMLTokens.h b/parser/htmlparser/src/nsHTMLTokens.h index 743208a164ea..31edcbe0759b 100644 --- a/parser/htmlparser/src/nsHTMLTokens.h +++ b/parser/htmlparser/src/nsHTMLTokens.h @@ -232,7 +232,7 @@ class CEntityToken : public CHTMLToken { virtual PRInt32 GetTokenType(void); PRInt32 TranslateToUnicodeStr(nsString& aString); virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); - static PRInt32 ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner); + static nsresult ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner); static PRInt32 TranslateToUnicodeStr(PRInt32 aValue,nsString& aString); virtual void DebugDumpSource(nsOutputStream& out); virtual const nsAReadableString& GetStringValue(void); @@ -281,7 +281,7 @@ class CTextToken: public CHTMLToken { CTextToken(const nsAReadableString& aString); virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode); nsresult ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner, - nsString& aTerminalString,PRInt32 aMode,PRBool& aFlushTokens); + nsString& aEndTagName,PRInt32 aMode,PRBool& aFlushTokens); virtual const char* GetClassName(void); virtual PRInt32 GetTokenType(void); virtual PRInt32 GetTextLength(void); diff --git a/parser/htmlparser/src/nsParser.cpp b/parser/htmlparser/src/nsParser.cpp index 010efdf66c71..e3d5fae1c290 100644 --- a/parser/htmlparser/src/nsParser.cpp +++ b/parser/htmlparser/src/nsParser.cpp @@ -2476,7 +2476,7 @@ nsParser::DetectMetaTag(const char* aBytes, const char* attrEnd; // Find the end of the tag - FindInReadable(NS_LITERAL_CSTRING(">"), tagEnd, end); + FindCharInReadable('>', tagEnd, end); attrEnd = tagEnd.get(); CWordTokenizer tokenizer(attrStart, 0, attrEnd-attrStart); @@ -2517,7 +2517,7 @@ nsParser::DetectMetaTag(const char* aBytes, (nsCRT::strncasecmp(contentStart+offset, kCharsetStr, kCharsetStrLen) == 0)) { // The next word is the charset - if ((offset = contentTokenizer.GetNextWord()) != kNotFound) { + if ((offset = contentTokenizer.GetNextWord(PR_TRUE)) != kNotFound) { aCharset.Assign(NS_ConvertASCIItoUCS2(contentStart+offset, contentTokenizer.GetLength())); } diff --git a/parser/htmlparser/src/nsScanner.cpp b/parser/htmlparser/src/nsScanner.cpp index 65976ce07196..3b93befcb5d0 100644 --- a/parser/htmlparser/src/nsScanner.cpp +++ b/parser/htmlparser/src/nsScanner.cpp @@ -874,43 +874,36 @@ nsresult nsScanner::ReadIdentifier(nsReadingIterator& aStart, } /** - * Consume characters until you find the terminal char + * Consume digits * - * @update gess 3/25/98 - * @param aString receives new data from stream - * @param addTerminal tells us whether to append terminal to aString + * @param aString - should contain digits * @return error code */ -nsresult nsScanner::ReadNumber(nsString& aString) { +nsresult nsScanner::ReadNumber(nsString& aString,PRInt32 aBase) { if (!mSlidingBuffer) { return kEOF; } + NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported"); + PRUnichar theChar=0; nsresult result=Peek(theChar); nsReadingIterator origin, current, end; - PRBool found=PR_FALSE; origin = mCurrentPosition; current = origin; end = mEndPosition; + PRBool done = PR_FALSE; while(current != end) { - theChar=*current; if(theChar) { - found=PR_FALSE; - if(('a'<=theChar) && (theChar<='f')) - found=PR_TRUE; - else if(('A'<=theChar) && (theChar<='F')) - found=PR_TRUE; - else if(('0'<=theChar) && (theChar<='9')) - found=PR_TRUE; - else if('#'==theChar) - found=PR_TRUE; - - if(!found) { + done = (theChar < '0' || theChar > '9') && + ((aBase == 16)? (theChar < 'A' || theChar > 'F') && + (theChar < 'a' || theChar > 'f') + :PR_TRUE); + if(done) { AppendUnicodeTo(origin, current, aString); break; } @@ -930,36 +923,32 @@ nsresult nsScanner::ReadNumber(nsString& aString) { } nsresult nsScanner::ReadNumber(nsReadingIterator& aStart, - nsReadingIterator& aEnd) { + nsReadingIterator& aEnd, + PRInt32 aBase) { if (!mSlidingBuffer) { return kEOF; } + NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported"); + PRUnichar theChar=0; nsresult result=Peek(theChar); nsReadingIterator origin, current, end; - PRBool found=PR_FALSE; origin = mCurrentPosition; current = origin; end = mEndPosition; + PRBool done = PR_FALSE; while(current != end) { - theChar=*current; if(theChar) { - found=PR_FALSE; - if(('a'<=theChar) && (theChar<='f')) - found=PR_TRUE; - else if(('A'<=theChar) && (theChar<='F')) - found=PR_TRUE; - else if(('0'<=theChar) && (theChar<='9')) - found=PR_TRUE; - else if('#'==theChar) - found=PR_TRUE; - - if(!found) { + done = (theChar < '0' || theChar > '9') && + ((aBase == 16)? (theChar < 'A' || theChar > 'F') && + (theChar < 'a' || theChar > 'f') + :PR_TRUE); + if(done) { aStart = origin; aEnd = current; break; diff --git a/parser/htmlparser/src/nsScanner.h b/parser/htmlparser/src/nsScanner.h index 64f6f174102a..bfb70c7ae025 100644 --- a/parser/htmlparser/src/nsScanner.h +++ b/parser/htmlparser/src/nsScanner.h @@ -184,9 +184,10 @@ class nsScanner { nsresult ReadIdentifier(nsReadingIterator& aStart, nsReadingIterator& aEnd, PRBool allowPunct=PR_FALSE); - nsresult ReadNumber(nsString& aString); + nsresult ReadNumber(nsString& aString,PRInt32 aBase); nsresult ReadNumber(nsReadingIterator& aStart, - nsReadingIterator& aEnd); + nsReadingIterator& aEnd, + PRInt32 aBase); nsresult ReadWhitespace(nsString& aString); nsresult ReadWhitespace(nsReadingIterator& aStart, nsReadingIterator& aEnd); diff --git a/parser/htmlparser/src/nsToken.h b/parser/htmlparser/src/nsToken.h index eb834f983bef..afa249ded9a5 100644 --- a/parser/htmlparser/src/nsToken.h +++ b/parser/htmlparser/src/nsToken.h @@ -59,6 +59,9 @@ #include "nsFileSpec.h" #include "nsFixedSizeAllocator.h" +#define NS_HTMLTOKENS_NOT_AN_ENTITY \ + NS_ERROR_GENERATE_SUCCESS(NS_ERROR_MODULE_HTMLPARSER,2000) + class nsScanner; class nsTokenAllocator;