40072 - (patch provided by pollmann) Do not process METAs after FRAMESET. r=harishd sr=vidur

58809 - Handle numeric & hexadecimal entities correctly r=heikki sr=vidur
72249 - Per SGML endtag can contain whitespace between generic identifier and tagc. r=heikki sr=vidur
82971 - if the last character was a CR make sure to not lose that information. r=heikki sr=vidur
This commit is contained in:
harishd%netscape.com 2001-07-05 22:20:34 +00:00
Родитель 2e5a47dd8f
Коммит 7ef87f6bbd
19 изменённых файлов: 629 добавлений и 651 удалений

Просмотреть файл

@ -78,7 +78,8 @@ public:
static PRUint32 CopyNewlineNormalizedUnicodeTo(const nsAReadableString& aSource,
PRUint32 aSrcOffset,
PRUnichar* aDest,
PRUint32 aLength);
PRUint32 aLength,
PRBool& aLastCharCR);
static PRUint32 CopyNewlineNormalizedUnicodeTo(nsReadingIterator<PRUnichar>& aSrcStart, const nsReadingIterator<PRUnichar>& aSrcEnd, nsAWritableString& aDest);

Просмотреть файл

@ -217,8 +217,8 @@ class CopyNormalizeNewlines
typedef typename OutputIterator::value_type value_type;
public:
CopyNormalizeNewlines(OutputIterator* aDestination) :
mLastCharCR(PR_FALSE),
CopyNormalizeNewlines(OutputIterator* aDestination,PRBool aLastCharCR=PR_FALSE) :
mLastCharCR(aLastCharCR),
mDestination(aDestination),
mWritten(0)
{ }
@ -227,6 +227,10 @@ class CopyNormalizeNewlines
return mWritten;
}
PRBool IsLastCharCR() {
return mLastCharCR;
}
PRUint32 write(const typename OutputIterator::value_type* aSource, PRUint32 aSourceLength) {
const typename OutputIterator::value_type* done_writing = aSource + aSourceLength;
@ -274,14 +278,21 @@ class CopyNormalizeNewlines
// static
PRUint32
nsContentUtils::CopyNewlineNormalizedUnicodeTo(const nsAReadableString& aSource, PRUint32 aSrcOffset, PRUnichar* aDest, PRUint32 aLength)
nsContentUtils::CopyNewlineNormalizedUnicodeTo(const nsAReadableString& aSource,
PRUint32 aSrcOffset,
PRUnichar* aDest,
PRUint32 aLength,
PRBool& aLastCharCR)
{
typedef NormalizeNewlinesCharTraits<PRUnichar*> sink_traits;
sink_traits dest_traits(aDest);
CopyNormalizeNewlines<sink_traits> normalizer(&dest_traits);
CopyNormalizeNewlines<sink_traits> normalizer(&dest_traits,aLastCharCR);
nsReadingIterator<PRUnichar> fromBegin, fromEnd;
copy_string(aSource.BeginReading(fromBegin).advance( PRInt32(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( PRInt32(aSrcOffset+aLength) ), normalizer);
copy_string(aSource.BeginReading(fromBegin).advance( PRInt32(aSrcOffset) ),
aSource.BeginReading(fromEnd).advance( PRInt32(aSrcOffset+aLength) ),
normalizer);
aLastCharCR = normalizer.IsLastCharCR();
return normalizer.GetCharsWritten();
}

Просмотреть файл

@ -2043,6 +2043,7 @@ SinkContext::AddText(const nsAReadableString& aText)
// Copy data from string into our buffer; flush buffer when it fills up
PRInt32 offset = 0;
PRBool isLastCharCR = PR_FALSE;
while (0 != addLen) {
PRInt32 amount = mTextSize - mTextLength;
if (amount > addLen) {
@ -2058,7 +2059,8 @@ SinkContext::AddText(const nsAReadableString& aText)
mTextLength += nsContentUtils::CopyNewlineNormalizedUnicodeTo(aText,
offset,
&mText[mTextLength],
amount);
amount,
isLastCharCR);
offset += amount;
addLen -= amount;
}
@ -4500,18 +4502,22 @@ HTMLContentSink::ProcessMETATag(const nsIParserNode& aNode)
// the preference.
if(!mInsideNoXXXTag) {
// set any HTTP-EQUIV data into document's header data as well as url
nsAutoString header;
it->GetAttribute(kNameSpaceID_HTML, nsHTMLAtoms::httpEquiv, header);
if (header.Length() > 0) {
nsAutoString result;
it->GetAttribute(kNameSpaceID_HTML, nsHTMLAtoms::content, result);
if (result.Length() > 0) {
header.ToLowerCase();
nsCOMPtr<nsIAtom> fieldAtom(dont_AddRef(NS_NewAtom(header)));
rv=ProcessHeaderData(fieldAtom,result,it);
}//if (result.Length() > 0)
}//if (header.Length() > 0)
// Bug 40072: Don't evaluate METAs after FRAMESET.
if (!mFrameset) {
// set any HTTP-EQUIV data into document's header data as well as url
nsAutoString header;
it->GetAttribute(kNameSpaceID_HTML, nsHTMLAtoms::httpEquiv, header);
if (header.Length() > 0) {
nsAutoString result;
it->GetAttribute(kNameSpaceID_HTML, nsHTMLAtoms::content, result);
if (result.Length() > 0) {
header.ToLowerCase();
nsCOMPtr<nsIAtom> fieldAtom(dont_AddRef(NS_NewAtom(header)));
rv=ProcessHeaderData(fieldAtom,result,it);
}//if (result.Length() > 0)
}//if (header.Length() > 0)
}//if (!mFrameset || !mDocument)
}//if(!mInsideNoXXXTag)
}//if (NS_OK == rv)
}//if (nsnull != parent)

Просмотреть файл

@ -762,6 +762,7 @@ nsHTMLFragmentContentSink::AddText(const nsAReadableString& aString)
// Copy data from string into our buffer; flush buffer when it fills up
PRInt32 offset = 0;
PRBool isLastCharCR = PR_FALSE;
while (0 != addLen) {
PRInt32 amount = mTextSize - mTextLength;
if (amount > addLen) {
@ -774,9 +775,11 @@ nsHTMLFragmentContentSink::AddText(const nsAReadableString& aString)
}
}
mTextLength +=
nsContentUtils::CopyNewlineNormalizedUnicodeTo(aString, offset,
nsContentUtils::CopyNewlineNormalizedUnicodeTo(aString,
offset,
&mText[mTextLength],
amount);
amount,
isLastCharCR);
offset += amount;
addLen -= amount;
}

Просмотреть файл

@ -1446,6 +1446,7 @@ nsXMLContentSink::AddText(const nsAReadableString& aString)
// Copy data from string into our buffer; flush buffer when it fills up
PRInt32 offset = 0;
PRBool isLastCharCR = PR_FALSE;
while (0 != addLen) {
PRInt32 amount = mTextSize - mTextLength;
if (amount > addLen) {
@ -1467,9 +1468,11 @@ nsXMLContentSink::AddText(const nsAReadableString& aString)
}
}
mTextLength +=
nsContentUtils::CopyNewlineNormalizedUnicodeTo(aString, offset,
nsContentUtils::CopyNewlineNormalizedUnicodeTo(aString,
offset,
&mText[mTextLength],
amount);
amount,
isLastCharCR);
offset += amount;
addLen -= amount;
}

Просмотреть файл

@ -752,14 +752,12 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
//if((eHTMLTag_style==theTag) || (eHTMLTag_script==theTag)) {
if(gHTMLElements[theTag].CanContainType(kCDATA)) {
nsAutoString endText, endTagName;
nsAutoString endTagName;
endTagName.AssignWithConversion(nsHTMLTags::GetStringValue(theTag));
endText.Assign(endTagName);
endText.InsertWithConversion("</",0,2);
CToken* text=theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text);
CTextToken* textToken=NS_STATIC_CAST(CTextToken*,text);
result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endText,mFlags,aFlushTokens); //tell new token to finish consuming text...
result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endTagName,mFlags,aFlushTokens); //tell new token to finish consuming text...
// Fix bug 44186
// Support XML like syntax, i.e., <script src="external.js"/> == <script src="external.js"></script>
@ -841,35 +839,24 @@ nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanne
nsresult result=aScanner.Peek(theChar, 1);
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
if(NS_OK==result) {
if(nsCRT::IsAsciiAlpha(theChar)) { //handle common enity references &xxx; or &#000.
// Get the "&"
aScanner.GetChar(theChar);
aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
if (NS_SUCCEEDED(result)) {
if (nsCRT::IsAsciiAlpha(theChar) || theChar==kHashsign) {
aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
result=aToken->Consume(theChar,aScanner,mFlags);
// Get the first entity character
aScanner.GetChar(theChar);
result = aToken->Consume(theChar,aScanner,mFlags); //tell new token to finish consuming text...
}
else if(kHashsign==theChar) {
// Get the "&"
aScanner.GetChar(theChar);
aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
// Get the first numerical entity character
aScanner.GetChar(theChar);
result=aToken->Consume(theChar,aScanner,mFlags);
}
else {
//oops, we're actually looking at plain text...
return ConsumeText(aToken,aScanner);
}//if
if(aToken){
if(mIsFinalChunk && (kEOF==result)) {
result=NS_OK; //use as much of the entity as you can get.
if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
IF_FREE(aToken, mTokenAllocator);
}
else {
if (mIsFinalChunk && result == kEOF) {
result=NS_OK; //use as much of the entity as you can get.
}
AddToken(aToken,result,&mTokenDeque,theAllocator);
return result;
}
AddToken(aToken,result,&mTokenDeque,theAllocator);
}
// oops, we're actually looking at plain text...
result = ConsumeText(aToken,aScanner);
}//if
return result;
}

Просмотреть файл

@ -599,12 +599,11 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
* @return error result
*/
nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
nsString& aTerminalString,PRInt32 aFlag,PRBool& aFlushTokens){
nsString& aEndTagName,PRInt32 aFlag,PRBool& aFlushTokens){
nsresult result=NS_OK;
nsReadingIterator<PRUnichar> theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos;
PRBool done=PR_FALSE;
PRBool theLastIteration=PR_FALSE;
PRInt32 termStrLen=aTerminalString.Length();
aScanner.CurrentPosition(theStartOffset);
theCurrOffset = theStartOffset;
@ -627,95 +626,82 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann
// 5. If the end of the document is reached and if we still don't have the condition in step 4. then
// assume that the prematured terminal string is the actual terminal string and goto step 1. This
// will be our last iteration.
nsAutoString theTerminalString(aEndTagName);
theTerminalString.InsertWithConversion("</",0,2);
// When is the disaster enabled?
// a) when the buffer runs out ot data.
// b) when the terminal string is not found.
PRBool disaster = PR_FALSE;
PRUint32 termStrLen=theTerminalString.Length();
while((result == NS_OK) && !done) {
if (FindCharInReadable(PRUnichar(kLessThan), theCurrOffset, endPos)) {
nsReadingIterator<PRUnichar> tempOffset = theCurrOffset;
while(1) {
if (FindCharInReadable(PRUnichar(kGreaterThan), tempOffset, endPos)) {
// Make a copy of the (presumed) end tag and
// do a case-insensitive comparision
nsAutoString str;
PRBool found = PR_FALSE;
nsReadingIterator<PRUnichar> gtOffset,ltOffset = theCurrOffset;
while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) &&
Distance(ltOffset, endPos) >= termStrLen) {
// Make a copy of the (presumed) end tag and
// do a case-insensitive comparision
nsReadingIterator<PRUnichar> start(tempOffset), end(tempOffset);
start.advance(-termStrLen);
nsReadingIterator<PRUnichar> start(ltOffset), end(ltOffset);
end.advance(termStrLen);
CopyUnicodeTo(start, end, str);
if (str.EqualsIgnoreCase(aTerminalString)) {
theTermStrPos = tempOffset;
theTermStrPos.advance(-termStrLen);
break;
}
tempOffset.advance(1);
if (CaseInsensitiveFindInReadable(theTerminalString,start,end) &&
end != endPos && (*end == '>' || *end == ' ' ||
*end == '\t' || *end == '\n' ||
*end == '\r' || *end == '\b')) {
gtOffset = end;
if (FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) {
found = PR_TRUE;
theTermStrPos = start;
}
else {
// Ran out of data and haven't found the terminal string yet.
// Note: If a bogus terminal string is found it would have
// been stored in theAltTermStrPos; Bug: 64576
theTermStrPos=endPos;
break; // we have reached the end of the document
break;
}
ltOffset.advance(1);
}
if (found && theTermStrPos != endPos) {
if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) &&
!(aFlag & NS_IPARSER_FLAG_TRANSITIONAL_MODE) &&
!theLastIteration && !aIgnoreComments) {
nsReadingIterator<PRUnichar> endComment(ltOffset);
endComment.advance(5);
if ((theStartCommentPos == endPos) &&
FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset, endComment)) {
theStartCommentPos = theCurrOffset;
}
if (theStartCommentPos != endPos) {
// Search for --> between <!-- and </TERMINALSTRING>.
theCurrOffset = theStartCommentPos;
nsReadingIterator<PRUnichar> terminal(theTermStrPos);
if (!RFindInReadable(NS_LITERAL_STRING("-->"),
theCurrOffset, terminal)) {
// If you're here it means that we have a bogus terminal string.
// Even though it is bogus, the position of the terminal string
// could be helpful in case we hit the rock bottom.
theAltTermStrPos = theTermStrPos;
// We did not find '-->' so keep searching for terminal string.
theCurrOffset = theTermStrPos;
theCurrOffset.advance(termStrLen);
continue;
}
}
}
if (theTermStrPos != endPos) {
if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) &&
!(aFlag & NS_IPARSER_FLAG_TRANSITIONAL_MODE) &&
!theLastIteration && !aIgnoreComments) {
nsReadingIterator<PRUnichar> endComment(theCurrOffset);
endComment.advance(5);
if ((theStartCommentPos == endPos) &&
FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset, endComment)) {
theStartCommentPos = theCurrOffset;
}
if (theStartCommentPos != endPos) {
// Search for --> between <!-- and </TERMINALSTRING>.
theCurrOffset = theStartCommentPos;
nsReadingIterator<PRUnichar> terminal(theTermStrPos);
if (!RFindInReadable(NS_LITERAL_STRING("-->"),
theCurrOffset, terminal)) {
// If you're here it means that we have a bogus terminal string.
// Even though it is bogus, the position of the terminal string
// could be helpful in case we hit the rock bottom.
theAltTermStrPos = theTermStrPos;
// We did not find '-->' so keep searching for terminal string.
theCurrOffset = theTermStrPos;
theCurrOffset.advance(termStrLen);
continue;
}
}
}
disaster=PR_FALSE;
aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
theTermStrPos.advance(termStrLen+1);
aScanner.SetPosition(theTermStrPos);
// We found </SCRIPT>...permit flushing -> Ref: Bug 22485
aFlushTokens=PR_TRUE;
done = PR_TRUE;
}
else {
disaster = PR_TRUE;
// Make sure to preserve the end tag's representation in viewsource
if(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
CopyUnicodeTo(ltOffset.advance(2),gtOffset,aEndTagName);
}
aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
aScanner.SetPosition(gtOffset.advance(1));
// We found </SCRIPT>...permit flushing -> Ref: Bug 22485
aFlushTokens=PR_TRUE;
done = PR_TRUE;
}
else {
disaster = PR_TRUE;
}
if(disaster) {
// We end up here if:
// a) when the buffer runs out ot data.
// b) when the terminal string is not found.
if(!aScanner.IsIncremental()) {
if(theAltTermStrPos != endPos) {
// If you're here it means..we hit the rock bottom and therefore switch to plan B.
@ -723,15 +709,15 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann
theLastIteration = PR_TRUE;
}
else {
aTerminalString.Cut(0,2);
done = PR_TRUE; // Do this to fix Bug. 35456
}
}
else
else {
result=kEOF;
}
}
}
return result;
return result;
}
void CTextToken::CopyTo(nsAWritableString& aStr)
@ -1496,7 +1482,6 @@ nsresult ConsumeAttributeEntity(nsString& aString,
nsAutoString entity;
if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
aScanner.GetChar(amp); // Get '&'
result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
if (NS_SUCCEEDED(result)) {
theNCRValue = nsHTMLEntities::EntityToUnicode(entity);
@ -1507,7 +1492,7 @@ nsresult ConsumeAttributeEntity(nsString& aString,
// Resembling IE!!
if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) {
// Looks like we're not dealing with an entity
aString.Append(amp);
aString.Append(kAmpersand);
aString.Append(entity);
}
else {
@ -1517,12 +1502,19 @@ nsresult ConsumeAttributeEntity(nsString& aString,
}
}
else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
aScanner.GetChar(amp); // Discard '&'
PRInt32 err;
result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
if (NS_SUCCEEDED(result)) {
theNCRValue=entity.ToInteger(&err,kAutoDetect);
aString.Append(PRUnichar(theNCRValue));
if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
// Looked like an entity but it's not
aScanner.GetChar(amp);
aString.Append(amp);
result = NS_OK; // just being safe..
}
else {
PRInt32 err;
theNCRValue=entity.ToInteger(&err,kAutoDetect);
aString.Append(PRUnichar(theNCRValue));
}
}
}
else {
@ -1580,7 +1572,7 @@ nsresult ConsumeAttributeValueText(nsString& aString,
* @return error result
*/
static
nsresult ConsumeQuottedString(PRUnichar aChar,
nsresult ConsumeQuotedString(PRUnichar aChar,
nsString& aString,
nsScanner& aScanner,
PRInt32 aFlag)
@ -1639,128 +1631,107 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a
result = aScanner.SkipWhitespace();
}
if(NS_OK==result) {
result=aScanner.Peek(aChar);
if(NS_OK==result) {
nsReadingIterator<PRUnichar> start, end;
if((kHashsign==aChar) || (nsCRT::IsAsciiDigit(aChar))){
result=aScanner.ReadNumber(start, end);
if (NS_OK==result) {
static const PRUnichar theTerminalsChars[] =
{ PRUnichar(' '), PRUnichar('"'),
PRUnichar('='), PRUnichar('\n'),
PRUnichar('\r'), PRUnichar('\t'),
PRUnichar('>'), PRUnichar('\b'),
PRUnichar(0) };
nsReadingIterator<PRUnichar> start, end;
const nsDependentString theTerminals(theTerminalsChars,
sizeof(theTerminalsChars)/sizeof(theTerminalsChars[0]) - 1);
result=aScanner.ReadUntil(start,end,theTerminals,PR_FALSE);
if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
aScanner.BindSubstring(mTextKey, start, end);
}
//now it's time to Consume the (optional) value...
if (NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(start, wsend);
aScanner.BindSubstring(mTextKey, wsstart, wsend);
}
else {
//If you're here, handle an unquoted key.
static const PRUnichar theTerminalsChars[] =
{ PRUnichar('\b'), PRUnichar('\t'), PRUnichar('\n'), PRUnichar('\r'),
PRUnichar(' '), PRUnichar('"'), PRUnichar('='), PRUnichar('>'),
PRUnichar(0) };
const nsDependentString theTerminals(theTerminalsChars,
sizeof(theTerminalsChars)/sizeof(theTerminalsChars[0]) - 1);
result=aScanner.ReadUntil(start,end,theTerminals,PR_FALSE);
}
if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
aScanner.BindSubstring(mTextKey, start, end);
result = aScanner.SkipWhitespace();
}
//now it's time to Consume the (optional) value...
if(NS_OK==result) {
if (NS_OK==result) {
result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'...
if (NS_OK==result) {
if (kEqual==aChar){
result=aScanner.GetChar(aChar); //skip the equal sign...
if (NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(mTextValue);
}
else {
result = aScanner.SkipWhitespace();
}
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(start, wsend);
aScanner.BindSubstring(mTextKey, wsstart, wsend);
}
else {
result = aScanner.SkipWhitespace();
}
if(NS_OK==result) {
result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'...
if(NS_OK==result) {
if(kEqual==aChar){
result=aScanner.GetChar(aChar); //skip the equal sign...
if(NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(mTextValue);
}
else {
result = aScanner.SkipWhitespace();
}
if(NS_OK==result) {
result=aScanner.Peek(aChar); //and grab the next char.
if(NS_OK==result) {
if((kQuote==aChar) || (kApostrophe==aChar)) {
aScanner.GetChar(aChar);
result=ConsumeQuottedString(aChar,mTextValue,aScanner,aFlag);
if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
mTextValue.Insert(aChar,0);
mTextValue.Append(aChar);
}
// According to spec. we ( who? ) should ignore linefeeds. But look,
// even the carriage return was getting stripped ( wonder why! ) -
// Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds,
// bug then what about bug 47535 ? Should we preserve everything then?
// Well, let's make it so! Commenting out the next two lines..
/*if(!aRetain)
mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
*/
}
else if(kGreaterThan==aChar){
mHasEqualWithoutValue=PR_TRUE;
}
else if(kAmpersand==aChar) {
// XXX - Discard script entity for now....except in
// view-source
aScanner.GetChar(aChar);
PRBool discard=!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE);
if (NS_OK==result) {
result=aScanner.Peek(aChar); //and grab the next char.
if (NS_OK==result) {
if ((kQuote==aChar) || (kApostrophe==aChar)) {
aScanner.GetChar(aChar);
result=ConsumeQuotedString(aChar,mTextValue,aScanner,aFlag);
if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
mTextValue.Insert(aChar,0);
mTextValue.Append(aChar);
result=aScanner.GetChar(aChar);
if(NS_OK==result) {
mTextValue.Append(aChar);
result=CEntityToken::ConsumeEntity(aChar,mTextValue,aScanner);
}
if(discard) mTextValue.Truncate();
}
else {
aScanner.GetChar(aChar);
mTextValue.Append(aChar); //it's an alphanum attribute...
result=ConsumeAttributeValueText(mTextValue,aScanner,kAttributeTerminalChars,aFlag);
}
}//if
if(NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(mTextValue);
}
else {
result = aScanner.SkipWhitespace();
}
// According to spec. we ( who? ) should ignore linefeeds. But look,
// even the carriage return was getting stripped ( wonder why! ) -
// Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds,
// bug then what about bug 47535 ? Should we preserve everything then?
// Well, let's make it so! Commenting out the next two lines..
/*if(!aRetain)
mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
*/
}
else if (kGreaterThan==aChar){
mHasEqualWithoutValue=PR_TRUE;
}
else {
result=ConsumeAttributeValueText(mTextValue,
aScanner,
kAttributeTerminalChars,
aFlag);
}
}//if
if (NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(mTextValue);
}
else {
result = aScanner.SkipWhitespace();
}
}
}//if
}//if
else {
//This is where we have to handle fairly busted content.
//If you're here, it means we saw an attribute name, but couldn't find
//the following equal sign. <tag NAME=....
//Doing this right in all cases is <i>REALLY</i> ugly.
//My best guess is to grab the next non-ws char. We know it's not '=',
//so let's see what it is. If it's a '"', then assume we're reading
//from the middle of the value. Try stripping the quote and continuing...
if(kQuote==aChar){
result=aScanner.SkipOver(aChar); //strip quote.
}
}
}//if
} //if
}//if (consume optional value)
else {
//This is where we have to handle fairly busted content.
//If you're here, it means we saw an attribute name, but couldn't find
//the following equal sign. <tag NAME=....
//Doing this right in all cases is <i>REALLY</i> ugly.
//My best guess is to grab the next non-ws char. We know it's not '=',
//so let's see what it is. If it's a '"', then assume we're reading
//from the middle of the value. Try stripping the quote and continuing...
if (kQuote==aChar){
result=aScanner.SkipOver(aChar); //strip quote.
}
}
}//if
} //if
}//if (consume optional value)
if(NS_OK==result) {
result=aScanner.Peek(aChar);
mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result));
}
} //if
if (NS_OK==result) {
result=aScanner.Peek(aChar);
mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result));
}
}//if
return result;
}
@ -1900,8 +1871,6 @@ CEntityToken::CEntityToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag
* @return error result
*/
nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
if(aChar)
mTextValue.Assign(aChar);
nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
return result;
}
@ -1939,52 +1908,80 @@ PRInt32 CEntityToken::GetTokenType(void) {
* @param aScanner -- controller of underlying input source
* @return error result
*/
PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){
PRUnichar theChar=0;
PRInt32 result=aScanner.Peek(theChar);
if(NS_OK==result) {
if(kLeftBrace==aChar) {
//you're consuming a script entity...
PRInt32 rightBraceCount = 0;
PRInt32 leftBraceCount = 1;
while(leftBraceCount!=rightBraceCount) {
result=aScanner.GetChar(aChar);
if(NS_OK!=result) return result;
aString += aChar;
if(aChar==kRightBrace)
rightBraceCount++;
else if(aChar==kLeftBrace)
leftBraceCount++;
}
result=aScanner.ReadUntil(aString,kSemicolon,PR_FALSE);
if(NS_OK==result) {
result=aScanner.GetChar(aChar); // This character should be a semicolon
if(NS_OK==result) aString += aChar;
}
} //if
else {
if(kHashsign==aChar) {
if('X'==(toupper((char)theChar))) {
result=aScanner.GetChar(theChar);
aString+=theChar;
}
if(NS_OK==result){
result=aScanner.ReadNumber(aString);
}
}
else result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE.
if(NS_OK==result) {
result=aScanner.Peek(theChar);
if(NS_OK==result) {
if (kSemicolon == theChar) {
// consume semicolon that stopped the scan
aString+=theChar;
result=aScanner.GetChar(theChar);
}
}
}//if
} //else
nsresult
CEntityToken::ConsumeEntity(PRUnichar aChar,
nsString& aString,
nsScanner& aScanner) {
nsresult result=NS_OK;
if(kLeftBrace==aChar) {
//you're consuming a script entity...
aScanner.GetChar(aChar); // Consume &
PRInt32 rightBraceCount = 0;
PRInt32 leftBraceCount = 0;
do {
result=aScanner.GetChar(aChar);
NS_ENSURE_SUCCESS(result,result);
aString.Append(aChar);
if(aChar==kRightBrace)
rightBraceCount++;
else if(aChar==kLeftBrace)
leftBraceCount++;
} while(leftBraceCount!=rightBraceCount);
} //if
else {
PRUnichar theChar=0;
if (kHashsign==aChar) {
result = aScanner.Peek(theChar,2);
NS_ENSURE_SUCCESS(result,result);
if (nsCRT::IsAsciiDigit(theChar)) {
aScanner.GetChar(aChar); // Consume &
aScanner.GetChar(aChar); // Consume #
aString.Assign(aChar);
result=aScanner.ReadNumber(aString,10);
}
else if (theChar == 'x' || theChar == 'X') {
aScanner.GetChar(aChar); // Consume &
aScanner.GetChar(aChar); // Consume #
aScanner.GetChar(theChar); // Consume x
aString.Assign(aChar);
aString.Append(theChar);
result=aScanner.ReadNumber(aString,16);
}
else {
return NS_HTMLTOKENS_NOT_AN_ENTITY;
}
}
else {
result = aScanner.Peek(theChar,1);
NS_ENSURE_SUCCESS(result,result);
if(nsCRT::IsAsciiAlpha(theChar) ||
theChar == '_' ||
theChar == ':') {
aScanner.GetChar(aChar); // Consume &
result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE.
}
else {
return NS_HTMLTOKENS_NOT_AN_ENTITY;
}
}
}
NS_ENSURE_SUCCESS(result,result);
result=aScanner.Peek(aChar);
NS_ENSURE_SUCCESS(result,result);
if (aChar == kSemicolon) {
// consume semicolon that stopped the scan
aString.Append(aChar);
result=aScanner.GetChar(aChar);
}
return result;
}

Просмотреть файл

@ -232,7 +232,7 @@ class CEntityToken : public CHTMLToken {
virtual PRInt32 GetTokenType(void);
PRInt32 TranslateToUnicodeStr(nsString& aString);
virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
static PRInt32 ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
static nsresult ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
static PRInt32 TranslateToUnicodeStr(PRInt32 aValue,nsString& aString);
virtual void DebugDumpSource(nsOutputStream& out);
virtual const nsAReadableString& GetStringValue(void);
@ -281,7 +281,7 @@ class CTextToken: public CHTMLToken {
CTextToken(const nsAReadableString& aString);
virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
nsresult ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
nsString& aTerminalString,PRInt32 aMode,PRBool& aFlushTokens);
nsString& aEndTagName,PRInt32 aMode,PRBool& aFlushTokens);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
virtual PRInt32 GetTextLength(void);

Просмотреть файл

@ -2476,7 +2476,7 @@ nsParser::DetectMetaTag(const char* aBytes,
const char* attrEnd;
// Find the end of the tag
FindInReadable(NS_LITERAL_CSTRING(">"), tagEnd, end);
FindCharInReadable('>', tagEnd, end);
attrEnd = tagEnd.get();
CWordTokenizer<char> tokenizer(attrStart, 0, attrEnd-attrStart);
@ -2517,7 +2517,7 @@ nsParser::DetectMetaTag(const char* aBytes,
(nsCRT::strncasecmp(contentStart+offset,
kCharsetStr, kCharsetStrLen) == 0)) {
// The next word is the charset
if ((offset = contentTokenizer.GetNextWord()) != kNotFound) {
if ((offset = contentTokenizer.GetNextWord(PR_TRUE)) != kNotFound) {
aCharset.Assign(NS_ConvertASCIItoUCS2(contentStart+offset,
contentTokenizer.GetLength()));
}

Просмотреть файл

@ -874,43 +874,36 @@ nsresult nsScanner::ReadIdentifier(nsReadingIterator<PRUnichar>& aStart,
}
/**
* Consume characters until you find the terminal char
* Consume digits
*
* @update gess 3/25/98
* @param aString receives new data from stream
* @param addTerminal tells us whether to append terminal to aString
* @param aString - should contain digits
* @return error code
*/
nsresult nsScanner::ReadNumber(nsString& aString) {
nsresult nsScanner::ReadNumber(nsString& aString,PRInt32 aBase) {
if (!mSlidingBuffer) {
return kEOF;
}
NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
PRUnichar theChar=0;
nsresult result=Peek(theChar);
nsReadingIterator<PRUnichar> origin, current, end;
PRBool found=PR_FALSE;
origin = mCurrentPosition;
current = origin;
end = mEndPosition;
PRBool done = PR_FALSE;
while(current != end) {
theChar=*current;
if(theChar) {
found=PR_FALSE;
if(('a'<=theChar) && (theChar<='f'))
found=PR_TRUE;
else if(('A'<=theChar) && (theChar<='F'))
found=PR_TRUE;
else if(('0'<=theChar) && (theChar<='9'))
found=PR_TRUE;
else if('#'==theChar)
found=PR_TRUE;
if(!found) {
done = (theChar < '0' || theChar > '9') &&
((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
(theChar < 'a' || theChar > 'f')
:PR_TRUE);
if(done) {
AppendUnicodeTo(origin, current, aString);
break;
}
@ -930,36 +923,32 @@ nsresult nsScanner::ReadNumber(nsString& aString) {
}
nsresult nsScanner::ReadNumber(nsReadingIterator<PRUnichar>& aStart,
nsReadingIterator<PRUnichar>& aEnd) {
nsReadingIterator<PRUnichar>& aEnd,
PRInt32 aBase) {
if (!mSlidingBuffer) {
return kEOF;
}
NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
PRUnichar theChar=0;
nsresult result=Peek(theChar);
nsReadingIterator<PRUnichar> origin, current, end;
PRBool found=PR_FALSE;
origin = mCurrentPosition;
current = origin;
end = mEndPosition;
PRBool done = PR_FALSE;
while(current != end) {
theChar=*current;
if(theChar) {
found=PR_FALSE;
if(('a'<=theChar) && (theChar<='f'))
found=PR_TRUE;
else if(('A'<=theChar) && (theChar<='F'))
found=PR_TRUE;
else if(('0'<=theChar) && (theChar<='9'))
found=PR_TRUE;
else if('#'==theChar)
found=PR_TRUE;
if(!found) {
done = (theChar < '0' || theChar > '9') &&
((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
(theChar < 'a' || theChar > 'f')
:PR_TRUE);
if(done) {
aStart = origin;
aEnd = current;
break;

Просмотреть файл

@ -184,9 +184,10 @@ class nsScanner {
nsresult ReadIdentifier(nsReadingIterator<PRUnichar>& aStart,
nsReadingIterator<PRUnichar>& aEnd,
PRBool allowPunct=PR_FALSE);
nsresult ReadNumber(nsString& aString);
nsresult ReadNumber(nsString& aString,PRInt32 aBase);
nsresult ReadNumber(nsReadingIterator<PRUnichar>& aStart,
nsReadingIterator<PRUnichar>& aEnd);
nsReadingIterator<PRUnichar>& aEnd,
PRInt32 aBase);
nsresult ReadWhitespace(nsString& aString);
nsresult ReadWhitespace(nsReadingIterator<PRUnichar>& aStart,
nsReadingIterator<PRUnichar>& aEnd);

Просмотреть файл

@ -59,6 +59,9 @@
#include "nsFileSpec.h"
#include "nsFixedSizeAllocator.h"
#define NS_HTMLTOKENS_NOT_AN_ENTITY \
NS_ERROR_GENERATE_SUCCESS(NS_ERROR_MODULE_HTMLPARSER,2000)
class nsScanner;
class nsTokenAllocator;

Просмотреть файл

@ -752,14 +752,12 @@ nsresult nsHTMLTokenizer::ConsumeStartTag(PRUnichar aChar,CToken*& aToken,nsScan
//if((eHTMLTag_style==theTag) || (eHTMLTag_script==theTag)) {
if(gHTMLElements[theTag].CanContainType(kCDATA)) {
nsAutoString endText, endTagName;
nsAutoString endTagName;
endTagName.AssignWithConversion(nsHTMLTags::GetStringValue(theTag));
endText.Assign(endTagName);
endText.InsertWithConversion("</",0,2);
CToken* text=theAllocator->CreateTokenOfType(eToken_text,eHTMLTag_text);
CTextToken* textToken=NS_STATIC_CAST(CTextToken*,text);
result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endText,mFlags,aFlushTokens); //tell new token to finish consuming text...
result=textToken->ConsumeUntil(0,theTag!=eHTMLTag_script,aScanner,endTagName,mFlags,aFlushTokens); //tell new token to finish consuming text...
// Fix bug 44186
// Support XML like syntax, i.e., <script src="external.js"/> == <script src="external.js"></script>
@ -841,35 +839,24 @@ nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanne
nsresult result=aScanner.Peek(theChar, 1);
nsTokenAllocator* theAllocator=this->GetTokenAllocator();
if(NS_OK==result) {
if(nsCRT::IsAsciiAlpha(theChar)) { //handle common enity references &xxx; or &#000.
// Get the "&"
aScanner.GetChar(theChar);
aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
if (NS_SUCCEEDED(result)) {
if (nsCRT::IsAsciiAlpha(theChar) || theChar==kHashsign) {
aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
result=aToken->Consume(theChar,aScanner,mFlags);
// Get the first entity character
aScanner.GetChar(theChar);
result = aToken->Consume(theChar,aScanner,mFlags); //tell new token to finish consuming text...
}
else if(kHashsign==theChar) {
// Get the "&"
aScanner.GetChar(theChar);
aToken = theAllocator->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
// Get the first numerical entity character
aScanner.GetChar(theChar);
result=aToken->Consume(theChar,aScanner,mFlags);
}
else {
//oops, we're actually looking at plain text...
return ConsumeText(aToken,aScanner);
}//if
if(aToken){
if(mIsFinalChunk && (kEOF==result)) {
result=NS_OK; //use as much of the entity as you can get.
if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
IF_FREE(aToken, mTokenAllocator);
}
else {
if (mIsFinalChunk && result == kEOF) {
result=NS_OK; //use as much of the entity as you can get.
}
AddToken(aToken,result,&mTokenDeque,theAllocator);
return result;
}
AddToken(aToken,result,&mTokenDeque,theAllocator);
}
// oops, we're actually looking at plain text...
result = ConsumeText(aToken,aScanner);
}//if
return result;
}

Просмотреть файл

@ -599,12 +599,11 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
* @return error result
*/
nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
nsString& aTerminalString,PRInt32 aFlag,PRBool& aFlushTokens){
nsString& aEndTagName,PRInt32 aFlag,PRBool& aFlushTokens){
nsresult result=NS_OK;
nsReadingIterator<PRUnichar> theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos;
PRBool done=PR_FALSE;
PRBool theLastIteration=PR_FALSE;
PRInt32 termStrLen=aTerminalString.Length();
aScanner.CurrentPosition(theStartOffset);
theCurrOffset = theStartOffset;
@ -627,95 +626,82 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann
// 5. If the end of the document is reached and if we still don't have the condition in step 4. then
// assume that the prematured terminal string is the actual terminal string and goto step 1. This
// will be our last iteration.
nsAutoString theTerminalString(aEndTagName);
theTerminalString.InsertWithConversion("</",0,2);
// When is the disaster enabled?
// a) when the buffer runs out ot data.
// b) when the terminal string is not found.
PRBool disaster = PR_FALSE;
PRUint32 termStrLen=theTerminalString.Length();
while((result == NS_OK) && !done) {
if (FindCharInReadable(PRUnichar(kLessThan), theCurrOffset, endPos)) {
nsReadingIterator<PRUnichar> tempOffset = theCurrOffset;
while(1) {
if (FindCharInReadable(PRUnichar(kGreaterThan), tempOffset, endPos)) {
// Make a copy of the (presumed) end tag and
// do a case-insensitive comparision
nsAutoString str;
PRBool found = PR_FALSE;
nsReadingIterator<PRUnichar> gtOffset,ltOffset = theCurrOffset;
while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) &&
Distance(ltOffset, endPos) >= termStrLen) {
// Make a copy of the (presumed) end tag and
// do a case-insensitive comparision
nsReadingIterator<PRUnichar> start(tempOffset), end(tempOffset);
start.advance(-termStrLen);
nsReadingIterator<PRUnichar> start(ltOffset), end(ltOffset);
end.advance(termStrLen);
CopyUnicodeTo(start, end, str);
if (str.EqualsIgnoreCase(aTerminalString)) {
theTermStrPos = tempOffset;
theTermStrPos.advance(-termStrLen);
break;
}
tempOffset.advance(1);
if (CaseInsensitiveFindInReadable(theTerminalString,start,end) &&
end != endPos && (*end == '>' || *end == ' ' ||
*end == '\t' || *end == '\n' ||
*end == '\r' || *end == '\b')) {
gtOffset = end;
if (FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) {
found = PR_TRUE;
theTermStrPos = start;
}
else {
// Ran out of data and haven't found the terminal string yet.
// Note: If a bogus terminal string is found it would have
// been stored in theAltTermStrPos; Bug: 64576
theTermStrPos=endPos;
break; // we have reached the end of the document
break;
}
ltOffset.advance(1);
}
if (found && theTermStrPos != endPos) {
if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) &&
!(aFlag & NS_IPARSER_FLAG_TRANSITIONAL_MODE) &&
!theLastIteration && !aIgnoreComments) {
nsReadingIterator<PRUnichar> endComment(ltOffset);
endComment.advance(5);
if ((theStartCommentPos == endPos) &&
FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset, endComment)) {
theStartCommentPos = theCurrOffset;
}
if (theStartCommentPos != endPos) {
// Search for --> between <!-- and </TERMINALSTRING>.
theCurrOffset = theStartCommentPos;
nsReadingIterator<PRUnichar> terminal(theTermStrPos);
if (!RFindInReadable(NS_LITERAL_STRING("-->"),
theCurrOffset, terminal)) {
// If you're here it means that we have a bogus terminal string.
// Even though it is bogus, the position of the terminal string
// could be helpful in case we hit the rock bottom.
theAltTermStrPos = theTermStrPos;
// We did not find '-->' so keep searching for terminal string.
theCurrOffset = theTermStrPos;
theCurrOffset.advance(termStrLen);
continue;
}
}
}
if (theTermStrPos != endPos) {
if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) &&
!(aFlag & NS_IPARSER_FLAG_TRANSITIONAL_MODE) &&
!theLastIteration && !aIgnoreComments) {
nsReadingIterator<PRUnichar> endComment(theCurrOffset);
endComment.advance(5);
if ((theStartCommentPos == endPos) &&
FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset, endComment)) {
theStartCommentPos = theCurrOffset;
}
if (theStartCommentPos != endPos) {
// Search for --> between <!-- and </TERMINALSTRING>.
theCurrOffset = theStartCommentPos;
nsReadingIterator<PRUnichar> terminal(theTermStrPos);
if (!RFindInReadable(NS_LITERAL_STRING("-->"),
theCurrOffset, terminal)) {
// If you're here it means that we have a bogus terminal string.
// Even though it is bogus, the position of the terminal string
// could be helpful in case we hit the rock bottom.
theAltTermStrPos = theTermStrPos;
// We did not find '-->' so keep searching for terminal string.
theCurrOffset = theTermStrPos;
theCurrOffset.advance(termStrLen);
continue;
}
}
}
disaster=PR_FALSE;
aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
theTermStrPos.advance(termStrLen+1);
aScanner.SetPosition(theTermStrPos);
// We found </SCRIPT>...permit flushing -> Ref: Bug 22485
aFlushTokens=PR_TRUE;
done = PR_TRUE;
}
else {
disaster = PR_TRUE;
// Make sure to preserve the end tag's representation in viewsource
if(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
CopyUnicodeTo(ltOffset.advance(2),gtOffset,aEndTagName);
}
aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
aScanner.SetPosition(gtOffset.advance(1));
// We found </SCRIPT>...permit flushing -> Ref: Bug 22485
aFlushTokens=PR_TRUE;
done = PR_TRUE;
}
else {
disaster = PR_TRUE;
}
if(disaster) {
// We end up here if:
// a) when the buffer runs out ot data.
// b) when the terminal string is not found.
if(!aScanner.IsIncremental()) {
if(theAltTermStrPos != endPos) {
// If you're here it means..we hit the rock bottom and therefore switch to plan B.
@ -723,15 +709,15 @@ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScann
theLastIteration = PR_TRUE;
}
else {
aTerminalString.Cut(0,2);
done = PR_TRUE; // Do this to fix Bug. 35456
}
}
else
else {
result=kEOF;
}
}
}
return result;
return result;
}
void CTextToken::CopyTo(nsAWritableString& aStr)
@ -1496,7 +1482,6 @@ nsresult ConsumeAttributeEntity(nsString& aString,
nsAutoString entity;
if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
aScanner.GetChar(amp); // Get '&'
result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
if (NS_SUCCEEDED(result)) {
theNCRValue = nsHTMLEntities::EntityToUnicode(entity);
@ -1507,7 +1492,7 @@ nsresult ConsumeAttributeEntity(nsString& aString,
// Resembling IE!!
if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) {
// Looks like we're not dealing with an entity
aString.Append(amp);
aString.Append(kAmpersand);
aString.Append(entity);
}
else {
@ -1517,12 +1502,19 @@ nsresult ConsumeAttributeEntity(nsString& aString,
}
}
else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
aScanner.GetChar(amp); // Discard '&'
PRInt32 err;
result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
if (NS_SUCCEEDED(result)) {
theNCRValue=entity.ToInteger(&err,kAutoDetect);
aString.Append(PRUnichar(theNCRValue));
if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
// Looked like an entity but it's not
aScanner.GetChar(amp);
aString.Append(amp);
result = NS_OK; // just being safe..
}
else {
PRInt32 err;
theNCRValue=entity.ToInteger(&err,kAutoDetect);
aString.Append(PRUnichar(theNCRValue));
}
}
}
else {
@ -1580,7 +1572,7 @@ nsresult ConsumeAttributeValueText(nsString& aString,
* @return error result
*/
static
nsresult ConsumeQuottedString(PRUnichar aChar,
nsresult ConsumeQuotedString(PRUnichar aChar,
nsString& aString,
nsScanner& aScanner,
PRInt32 aFlag)
@ -1639,128 +1631,107 @@ nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 a
result = aScanner.SkipWhitespace();
}
if(NS_OK==result) {
result=aScanner.Peek(aChar);
if(NS_OK==result) {
nsReadingIterator<PRUnichar> start, end;
if((kHashsign==aChar) || (nsCRT::IsAsciiDigit(aChar))){
result=aScanner.ReadNumber(start, end);
if (NS_OK==result) {
static const PRUnichar theTerminalsChars[] =
{ PRUnichar(' '), PRUnichar('"'),
PRUnichar('='), PRUnichar('\n'),
PRUnichar('\r'), PRUnichar('\t'),
PRUnichar('>'), PRUnichar('\b'),
PRUnichar(0) };
nsReadingIterator<PRUnichar> start, end;
const nsDependentString theTerminals(theTerminalsChars,
sizeof(theTerminalsChars)/sizeof(theTerminalsChars[0]) - 1);
result=aScanner.ReadUntil(start,end,theTerminals,PR_FALSE);
if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
aScanner.BindSubstring(mTextKey, start, end);
}
//now it's time to Consume the (optional) value...
if (NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(start, wsend);
aScanner.BindSubstring(mTextKey, wsstart, wsend);
}
else {
//If you're here, handle an unquoted key.
static const PRUnichar theTerminalsChars[] =
{ PRUnichar('\b'), PRUnichar('\t'), PRUnichar('\n'), PRUnichar('\r'),
PRUnichar(' '), PRUnichar('"'), PRUnichar('='), PRUnichar('>'),
PRUnichar(0) };
const nsDependentString theTerminals(theTerminalsChars,
sizeof(theTerminalsChars)/sizeof(theTerminalsChars[0]) - 1);
result=aScanner.ReadUntil(start,end,theTerminals,PR_FALSE);
}
if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
aScanner.BindSubstring(mTextKey, start, end);
result = aScanner.SkipWhitespace();
}
//now it's time to Consume the (optional) value...
if(NS_OK==result) {
if (NS_OK==result) {
result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'...
if (NS_OK==result) {
if (kEqual==aChar){
result=aScanner.GetChar(aChar); //skip the equal sign...
if (NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(mTextValue);
}
else {
result = aScanner.SkipWhitespace();
}
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(start, wsend);
aScanner.BindSubstring(mTextKey, wsstart, wsend);
}
else {
result = aScanner.SkipWhitespace();
}
if(NS_OK==result) {
result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'...
if(NS_OK==result) {
if(kEqual==aChar){
result=aScanner.GetChar(aChar); //skip the equal sign...
if(NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(mTextValue);
}
else {
result = aScanner.SkipWhitespace();
}
if(NS_OK==result) {
result=aScanner.Peek(aChar); //and grab the next char.
if(NS_OK==result) {
if((kQuote==aChar) || (kApostrophe==aChar)) {
aScanner.GetChar(aChar);
result=ConsumeQuottedString(aChar,mTextValue,aScanner,aFlag);
if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
mTextValue.Insert(aChar,0);
mTextValue.Append(aChar);
}
// According to spec. we ( who? ) should ignore linefeeds. But look,
// even the carriage return was getting stripped ( wonder why! ) -
// Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds,
// bug then what about bug 47535 ? Should we preserve everything then?
// Well, let's make it so! Commenting out the next two lines..
/*if(!aRetain)
mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
*/
}
else if(kGreaterThan==aChar){
mHasEqualWithoutValue=PR_TRUE;
}
else if(kAmpersand==aChar) {
// XXX - Discard script entity for now....except in
// view-source
aScanner.GetChar(aChar);
PRBool discard=!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE);
if (NS_OK==result) {
result=aScanner.Peek(aChar); //and grab the next char.
if (NS_OK==result) {
if ((kQuote==aChar) || (kApostrophe==aChar)) {
aScanner.GetChar(aChar);
result=ConsumeQuotedString(aChar,mTextValue,aScanner,aFlag);
if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
mTextValue.Insert(aChar,0);
mTextValue.Append(aChar);
result=aScanner.GetChar(aChar);
if(NS_OK==result) {
mTextValue.Append(aChar);
result=CEntityToken::ConsumeEntity(aChar,mTextValue,aScanner);
}
if(discard) mTextValue.Truncate();
}
else {
aScanner.GetChar(aChar);
mTextValue.Append(aChar); //it's an alphanum attribute...
result=ConsumeAttributeValueText(mTextValue,aScanner,kAttributeTerminalChars,aFlag);
}
}//if
if(NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(mTextValue);
}
else {
result = aScanner.SkipWhitespace();
}
// According to spec. we ( who? ) should ignore linefeeds. But look,
// even the carriage return was getting stripped ( wonder why! ) -
// Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds,
// bug then what about bug 47535 ? Should we preserve everything then?
// Well, let's make it so! Commenting out the next two lines..
/*if(!aRetain)
mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
*/
}
else if (kGreaterThan==aChar){
mHasEqualWithoutValue=PR_TRUE;
}
else {
result=ConsumeAttributeValueText(mTextValue,
aScanner,
kAttributeTerminalChars,
aFlag);
}
}//if
if (NS_OK==result) {
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
result = aScanner.ReadWhitespace(mTextValue);
}
else {
result = aScanner.SkipWhitespace();
}
}
}//if
}//if
else {
//This is where we have to handle fairly busted content.
//If you're here, it means we saw an attribute name, but couldn't find
//the following equal sign. <tag NAME=....
//Doing this right in all cases is <i>REALLY</i> ugly.
//My best guess is to grab the next non-ws char. We know it's not '=',
//so let's see what it is. If it's a '"', then assume we're reading
//from the middle of the value. Try stripping the quote and continuing...
if(kQuote==aChar){
result=aScanner.SkipOver(aChar); //strip quote.
}
}
}//if
} //if
}//if (consume optional value)
else {
//This is where we have to handle fairly busted content.
//If you're here, it means we saw an attribute name, but couldn't find
//the following equal sign. <tag NAME=....
//Doing this right in all cases is <i>REALLY</i> ugly.
//My best guess is to grab the next non-ws char. We know it's not '=',
//so let's see what it is. If it's a '"', then assume we're reading
//from the middle of the value. Try stripping the quote and continuing...
if (kQuote==aChar){
result=aScanner.SkipOver(aChar); //strip quote.
}
}
}//if
} //if
}//if (consume optional value)
if(NS_OK==result) {
result=aScanner.Peek(aChar);
mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result));
}
} //if
if (NS_OK==result) {
result=aScanner.Peek(aChar);
mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result));
}
}//if
return result;
}
@ -1900,8 +1871,6 @@ CEntityToken::CEntityToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag
* @return error result
*/
nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
if(aChar)
mTextValue.Assign(aChar);
nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
return result;
}
@ -1939,52 +1908,80 @@ PRInt32 CEntityToken::GetTokenType(void) {
* @param aScanner -- controller of underlying input source
* @return error result
*/
PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){
PRUnichar theChar=0;
PRInt32 result=aScanner.Peek(theChar);
if(NS_OK==result) {
if(kLeftBrace==aChar) {
//you're consuming a script entity...
PRInt32 rightBraceCount = 0;
PRInt32 leftBraceCount = 1;
while(leftBraceCount!=rightBraceCount) {
result=aScanner.GetChar(aChar);
if(NS_OK!=result) return result;
aString += aChar;
if(aChar==kRightBrace)
rightBraceCount++;
else if(aChar==kLeftBrace)
leftBraceCount++;
}
result=aScanner.ReadUntil(aString,kSemicolon,PR_FALSE);
if(NS_OK==result) {
result=aScanner.GetChar(aChar); // This character should be a semicolon
if(NS_OK==result) aString += aChar;
}
} //if
else {
if(kHashsign==aChar) {
if('X'==(toupper((char)theChar))) {
result=aScanner.GetChar(theChar);
aString+=theChar;
}
if(NS_OK==result){
result=aScanner.ReadNumber(aString);
}
}
else result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE.
if(NS_OK==result) {
result=aScanner.Peek(theChar);
if(NS_OK==result) {
if (kSemicolon == theChar) {
// consume semicolon that stopped the scan
aString+=theChar;
result=aScanner.GetChar(theChar);
}
}
}//if
} //else
nsresult
CEntityToken::ConsumeEntity(PRUnichar aChar,
nsString& aString,
nsScanner& aScanner) {
nsresult result=NS_OK;
if(kLeftBrace==aChar) {
//you're consuming a script entity...
aScanner.GetChar(aChar); // Consume &
PRInt32 rightBraceCount = 0;
PRInt32 leftBraceCount = 0;
do {
result=aScanner.GetChar(aChar);
NS_ENSURE_SUCCESS(result,result);
aString.Append(aChar);
if(aChar==kRightBrace)
rightBraceCount++;
else if(aChar==kLeftBrace)
leftBraceCount++;
} while(leftBraceCount!=rightBraceCount);
} //if
else {
PRUnichar theChar=0;
if (kHashsign==aChar) {
result = aScanner.Peek(theChar,2);
NS_ENSURE_SUCCESS(result,result);
if (nsCRT::IsAsciiDigit(theChar)) {
aScanner.GetChar(aChar); // Consume &
aScanner.GetChar(aChar); // Consume #
aString.Assign(aChar);
result=aScanner.ReadNumber(aString,10);
}
else if (theChar == 'x' || theChar == 'X') {
aScanner.GetChar(aChar); // Consume &
aScanner.GetChar(aChar); // Consume #
aScanner.GetChar(theChar); // Consume x
aString.Assign(aChar);
aString.Append(theChar);
result=aScanner.ReadNumber(aString,16);
}
else {
return NS_HTMLTOKENS_NOT_AN_ENTITY;
}
}
else {
result = aScanner.Peek(theChar,1);
NS_ENSURE_SUCCESS(result,result);
if(nsCRT::IsAsciiAlpha(theChar) ||
theChar == '_' ||
theChar == ':') {
aScanner.GetChar(aChar); // Consume &
result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE.
}
else {
return NS_HTMLTOKENS_NOT_AN_ENTITY;
}
}
}
NS_ENSURE_SUCCESS(result,result);
result=aScanner.Peek(aChar);
NS_ENSURE_SUCCESS(result,result);
if (aChar == kSemicolon) {
// consume semicolon that stopped the scan
aString.Append(aChar);
result=aScanner.GetChar(aChar);
}
return result;
}

Просмотреть файл

@ -232,7 +232,7 @@ class CEntityToken : public CHTMLToken {
virtual PRInt32 GetTokenType(void);
PRInt32 TranslateToUnicodeStr(nsString& aString);
virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
static PRInt32 ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
static nsresult ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
static PRInt32 TranslateToUnicodeStr(PRInt32 aValue,nsString& aString);
virtual void DebugDumpSource(nsOutputStream& out);
virtual const nsAReadableString& GetStringValue(void);
@ -281,7 +281,7 @@ class CTextToken: public CHTMLToken {
CTextToken(const nsAReadableString& aString);
virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
nsresult ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
nsString& aTerminalString,PRInt32 aMode,PRBool& aFlushTokens);
nsString& aEndTagName,PRInt32 aMode,PRBool& aFlushTokens);
virtual const char* GetClassName(void);
virtual PRInt32 GetTokenType(void);
virtual PRInt32 GetTextLength(void);

Просмотреть файл

@ -2476,7 +2476,7 @@ nsParser::DetectMetaTag(const char* aBytes,
const char* attrEnd;
// Find the end of the tag
FindInReadable(NS_LITERAL_CSTRING(">"), tagEnd, end);
FindCharInReadable('>', tagEnd, end);
attrEnd = tagEnd.get();
CWordTokenizer<char> tokenizer(attrStart, 0, attrEnd-attrStart);
@ -2517,7 +2517,7 @@ nsParser::DetectMetaTag(const char* aBytes,
(nsCRT::strncasecmp(contentStart+offset,
kCharsetStr, kCharsetStrLen) == 0)) {
// The next word is the charset
if ((offset = contentTokenizer.GetNextWord()) != kNotFound) {
if ((offset = contentTokenizer.GetNextWord(PR_TRUE)) != kNotFound) {
aCharset.Assign(NS_ConvertASCIItoUCS2(contentStart+offset,
contentTokenizer.GetLength()));
}

Просмотреть файл

@ -874,43 +874,36 @@ nsresult nsScanner::ReadIdentifier(nsReadingIterator<PRUnichar>& aStart,
}
/**
* Consume characters until you find the terminal char
* Consume digits
*
* @update gess 3/25/98
* @param aString receives new data from stream
* @param addTerminal tells us whether to append terminal to aString
* @param aString - should contain digits
* @return error code
*/
nsresult nsScanner::ReadNumber(nsString& aString) {
nsresult nsScanner::ReadNumber(nsString& aString,PRInt32 aBase) {
if (!mSlidingBuffer) {
return kEOF;
}
NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
PRUnichar theChar=0;
nsresult result=Peek(theChar);
nsReadingIterator<PRUnichar> origin, current, end;
PRBool found=PR_FALSE;
origin = mCurrentPosition;
current = origin;
end = mEndPosition;
PRBool done = PR_FALSE;
while(current != end) {
theChar=*current;
if(theChar) {
found=PR_FALSE;
if(('a'<=theChar) && (theChar<='f'))
found=PR_TRUE;
else if(('A'<=theChar) && (theChar<='F'))
found=PR_TRUE;
else if(('0'<=theChar) && (theChar<='9'))
found=PR_TRUE;
else if('#'==theChar)
found=PR_TRUE;
if(!found) {
done = (theChar < '0' || theChar > '9') &&
((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
(theChar < 'a' || theChar > 'f')
:PR_TRUE);
if(done) {
AppendUnicodeTo(origin, current, aString);
break;
}
@ -930,36 +923,32 @@ nsresult nsScanner::ReadNumber(nsString& aString) {
}
nsresult nsScanner::ReadNumber(nsReadingIterator<PRUnichar>& aStart,
nsReadingIterator<PRUnichar>& aEnd) {
nsReadingIterator<PRUnichar>& aEnd,
PRInt32 aBase) {
if (!mSlidingBuffer) {
return kEOF;
}
NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
PRUnichar theChar=0;
nsresult result=Peek(theChar);
nsReadingIterator<PRUnichar> origin, current, end;
PRBool found=PR_FALSE;
origin = mCurrentPosition;
current = origin;
end = mEndPosition;
PRBool done = PR_FALSE;
while(current != end) {
theChar=*current;
if(theChar) {
found=PR_FALSE;
if(('a'<=theChar) && (theChar<='f'))
found=PR_TRUE;
else if(('A'<=theChar) && (theChar<='F'))
found=PR_TRUE;
else if(('0'<=theChar) && (theChar<='9'))
found=PR_TRUE;
else if('#'==theChar)
found=PR_TRUE;
if(!found) {
done = (theChar < '0' || theChar > '9') &&
((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
(theChar < 'a' || theChar > 'f')
:PR_TRUE);
if(done) {
aStart = origin;
aEnd = current;
break;

Просмотреть файл

@ -184,9 +184,10 @@ class nsScanner {
nsresult ReadIdentifier(nsReadingIterator<PRUnichar>& aStart,
nsReadingIterator<PRUnichar>& aEnd,
PRBool allowPunct=PR_FALSE);
nsresult ReadNumber(nsString& aString);
nsresult ReadNumber(nsString& aString,PRInt32 aBase);
nsresult ReadNumber(nsReadingIterator<PRUnichar>& aStart,
nsReadingIterator<PRUnichar>& aEnd);
nsReadingIterator<PRUnichar>& aEnd,
PRInt32 aBase);
nsresult ReadWhitespace(nsString& aString);
nsresult ReadWhitespace(nsReadingIterator<PRUnichar>& aStart,
nsReadingIterator<PRUnichar>& aEnd);

Просмотреть файл

@ -59,6 +59,9 @@
#include "nsFileSpec.h"
#include "nsFixedSizeAllocator.h"
#define NS_HTMLTOKENS_NOT_AN_ENTITY \
NS_ERROR_GENERATE_SUCCESS(NS_ERROR_MODULE_HTMLPARSER,2000)
class nsScanner;
class nsTokenAllocator;