Bug 40477 ( nsbeta2+ ) - Don't rely on ';' to terminate entities.

r=pollmann.
This commit is contained in:
harishd%netscape.com 2000-06-29 23:05:46 +00:00
Родитель 653d06b9ba
Коммит de2c8f3d0a
2 изменённых файлов: 136 добавлений и 26 удалений

Просмотреть файл

@ -504,6 +504,47 @@ HTMLContentSink::SinkTraceNode(PRUint32 aBit,
}
#endif
/**
* Helper to find identifiers that can terminate an entity
*
* harishd 06/23/00
*
* @param aSource - Search for entity terminator in this string
* @param aChar - Holds the terminated character
* @param aStartOffset - Beings search, in aSource, from this offset.
*/
PRInt32
GetEntityTerminator(nsString& aSource,PRUnichar& aChar,PRInt32 aStartOffset=0) {
PRUnichar theChar=aChar=0;
PRInt32 theOffset=aStartOffset;
PRInt32 theLength=aSource.Length();
PRBool found=PR_FALSE;
while(theOffset<theLength) {
theChar=aSource[theOffset++];
if(('a'<=theChar) && (theChar<='z'))
found=PR_TRUE;
else if(('A'<=theChar) && (theChar<='Z'))
found=PR_TRUE;
else if(('0'<=theChar) && (theChar<='9'))
found=PR_TRUE;
else if('#'==theChar)
found=PR_TRUE;
else
found=PR_FALSE;
if(!found) {
aChar=theChar;
return theOffset-1;
}
}
return -1;
}
void HTMLContentSink::ReduceEntities(nsString& aString) {
if (mParser) {
@ -525,8 +566,13 @@ void HTMLContentSink::ReduceEntities(nsString& aString) {
PRInt32 theLen=aString.Length();
PRInt32 theAmpPos = aString.FindChar('&');
PRInt32 theStartPos=0;
PRInt32 theTermPos=-1;
PRUnichar theTermChar='\0';
const PRUnichar *theBuf=aString.GetUnicode();
nsDTDMode mode;
mHTMLDocument->GetDTDMode(mode);
while(-1!=theAmpPos) {
if(theStartPos<theAmpPos) {
@ -534,19 +580,19 @@ void HTMLContentSink::ReduceEntities(nsString& aString) {
theOutString.Append(&theBuf[theStartPos],theAmpPos-theStartPos);
}
PRInt32 theSemiPos=aString.FindChar(';',PR_FALSE,theAmpPos+1);
theTermPos=GetEntityTerminator(aString,theTermChar,theAmpPos+1);
if(-1!=theSemiPos) {
if(-1!=theTermPos) {
//having found a semi, copy chars between amppos and semipos;
aString.Mid(theNCRStr,theAmpPos+1,theSemiPos-theAmpPos-1);
aString.Mid(theNCRStr,theAmpPos+1,theTermPos-theAmpPos-1);
}
else {
aString.Mid(theNCRStr,theAmpPos+1,theLen-theAmpPos-1);
PRInt32 theNewAmpPos=aString.FindChar('&',PR_FALSE,theAmpPos+1);
theSemiPos=(-1==theNewAmpPos) ? theLen+1 : theNewAmpPos-1;
theTermPos=(-1==theNewAmpPos) ? theLen+1 : theNewAmpPos-1;
}
theStartPos=theSemiPos+1;
theStartPos=theTermPos+1;
PRUnichar theChar=(theLen>theAmpPos+1) ? aString.CharAt(theAmpPos+1) : '\0';
PRUnichar theEntity=0;
@ -564,24 +610,33 @@ void HTMLContentSink::ReduceEntities(nsString& aString) {
default:
if(nsCRT::IsAsciiAlpha(theChar)) {
dtd->ConvertEntityToUnicode(theNCRStr, &theNCRValue);
if (eDTDMode_strict!=mode) {
// XXX - Hack - Nav. does not support entity values > 255
// on the other hand IE supports entity values > 255 with a
// semicolon. I think it's reasonable to emulate IE than Nav.
if(theNCRValue>255 && theTermChar!=';') break;
}
if(-1!=theNCRValue) {
theEntity=PRUnichar(theNCRValue);
}
}
if(!theEntity) {
//what looked like an entity is not really one.
//so let's copy the ncrstring back to the output string
aString.Mid(theNCRStr,theAmpPos,theSemiPos-theAmpPos+1);
theOutString.Append(theNCRStr);
}
break;
} //switch
if(theEntity) {
theOutString.Append(theEntity);
if(theTermChar!='\0' && theTermChar!='&' && theTermChar!=';') {
theOutString.Append(theTermChar);
}
}
theAmpPos = aString.FindChar('&',PR_FALSE,theSemiPos+1);
else {
//what looked like an entity is not really one.
//so let's copy the ncrstring back to the output string
if(theTermChar!='&') { theTermPos++; }
aString.Mid(theNCRStr,theAmpPos,theTermPos-theAmpPos);
theOutString.Append(theNCRStr);
}
theAmpPos = aString.FindChar('&',PR_FALSE,theTermPos);
} //while
if(0<theOutString.Length()) {

Просмотреть файл

@ -504,6 +504,47 @@ HTMLContentSink::SinkTraceNode(PRUint32 aBit,
}
#endif
/**
* Helper to find identifiers that can terminate an entity
*
* harishd 06/23/00
*
* @param aSource - Search for entity terminator in this string
* @param aChar - Holds the terminated character
* @param aStartOffset - Beings search, in aSource, from this offset.
*/
PRInt32
GetEntityTerminator(nsString& aSource,PRUnichar& aChar,PRInt32 aStartOffset=0) {
PRUnichar theChar=aChar=0;
PRInt32 theOffset=aStartOffset;
PRInt32 theLength=aSource.Length();
PRBool found=PR_FALSE;
while(theOffset<theLength) {
theChar=aSource[theOffset++];
if(('a'<=theChar) && (theChar<='z'))
found=PR_TRUE;
else if(('A'<=theChar) && (theChar<='Z'))
found=PR_TRUE;
else if(('0'<=theChar) && (theChar<='9'))
found=PR_TRUE;
else if('#'==theChar)
found=PR_TRUE;
else
found=PR_FALSE;
if(!found) {
aChar=theChar;
return theOffset-1;
}
}
return -1;
}
void HTMLContentSink::ReduceEntities(nsString& aString) {
if (mParser) {
@ -525,8 +566,13 @@ void HTMLContentSink::ReduceEntities(nsString& aString) {
PRInt32 theLen=aString.Length();
PRInt32 theAmpPos = aString.FindChar('&');
PRInt32 theStartPos=0;
PRInt32 theTermPos=-1;
PRUnichar theTermChar='\0';
const PRUnichar *theBuf=aString.GetUnicode();
nsDTDMode mode;
mHTMLDocument->GetDTDMode(mode);
while(-1!=theAmpPos) {
if(theStartPos<theAmpPos) {
@ -534,19 +580,19 @@ void HTMLContentSink::ReduceEntities(nsString& aString) {
theOutString.Append(&theBuf[theStartPos],theAmpPos-theStartPos);
}
PRInt32 theSemiPos=aString.FindChar(';',PR_FALSE,theAmpPos+1);
theTermPos=GetEntityTerminator(aString,theTermChar,theAmpPos+1);
if(-1!=theSemiPos) {
if(-1!=theTermPos) {
//having found a semi, copy chars between amppos and semipos;
aString.Mid(theNCRStr,theAmpPos+1,theSemiPos-theAmpPos-1);
aString.Mid(theNCRStr,theAmpPos+1,theTermPos-theAmpPos-1);
}
else {
aString.Mid(theNCRStr,theAmpPos+1,theLen-theAmpPos-1);
PRInt32 theNewAmpPos=aString.FindChar('&',PR_FALSE,theAmpPos+1);
theSemiPos=(-1==theNewAmpPos) ? theLen+1 : theNewAmpPos-1;
theTermPos=(-1==theNewAmpPos) ? theLen+1 : theNewAmpPos-1;
}
theStartPos=theSemiPos+1;
theStartPos=theTermPos+1;
PRUnichar theChar=(theLen>theAmpPos+1) ? aString.CharAt(theAmpPos+1) : '\0';
PRUnichar theEntity=0;
@ -564,24 +610,33 @@ void HTMLContentSink::ReduceEntities(nsString& aString) {
default:
if(nsCRT::IsAsciiAlpha(theChar)) {
dtd->ConvertEntityToUnicode(theNCRStr, &theNCRValue);
if (eDTDMode_strict!=mode) {
// XXX - Hack - Nav. does not support entity values > 255
// on the other hand IE supports entity values > 255 with a
// semicolon. I think it's reasonable to emulate IE than Nav.
if(theNCRValue>255 && theTermChar!=';') break;
}
if(-1!=theNCRValue) {
theEntity=PRUnichar(theNCRValue);
}
}
if(!theEntity) {
//what looked like an entity is not really one.
//so let's copy the ncrstring back to the output string
aString.Mid(theNCRStr,theAmpPos,theSemiPos-theAmpPos+1);
theOutString.Append(theNCRStr);
}
break;
} //switch
if(theEntity) {
theOutString.Append(theEntity);
if(theTermChar!='\0' && theTermChar!='&' && theTermChar!=';') {
theOutString.Append(theTermChar);
}
}
theAmpPos = aString.FindChar('&',PR_FALSE,theSemiPos+1);
else {
//what looked like an entity is not really one.
//so let's copy the ncrstring back to the output string
if(theTermChar!='&') { theTermPos++; }
aString.Mid(theNCRStr,theAmpPos,theTermPos-theAmpPos);
theOutString.Append(theNCRStr);
}
theAmpPos = aString.FindChar('&',PR_FALSE,theTermPos);
} //while
if(0<theOutString.Length()) {