Fixing bug 395651. Make our UTF-8 and UTF-16 comparison code etc more robust when dealing with invalid UTF-16 data. r+sr=jonas@sicking.cc, a=jst@mozilla.org/jonas@sicking.cc

This commit is contained in:
jst@mozilla.org 2007-09-14 16:09:49 -07:00
Родитель bbdfc401cb
Коммит 6132ff04ba
2 изменённых файлов: 69 добавлений и 19 удалений

Просмотреть файл

@ -293,13 +293,18 @@ public:
}
else if (NS_IS_HIGH_SURROGATE(c)) // U+D800 - U+DBFF
{
if (*buffer == end)
if (p == end)
{
NS_ERROR("Unexpected end of buffer after high surrogate");
// Found a high surrogate the end of the buffer. Flag this
// as an error and return the Unicode replacement
// character 0xFFFD.
NS_WARNING("Unexpected end of buffer after high surrogate");
if (err)
*err = PR_TRUE;
return 0;
*buffer = p;
return 0xFFFD;
}
// D800- DBFF - High Surrogate
@ -319,15 +324,31 @@ public:
}
else
{
NS_ERROR("got a High Surrogate but no low surrogate");
// output nothing.
// Found a high surrogate followed by something other than
// a low surrogate. Flag this as an error and return the
// Unicode replacement character 0xFFFD.
NS_WARNING("got a High Surrogate but no low surrogate");
if (err)
*err = PR_TRUE;
*buffer = p;
return 0xFFFD;
}
}
else // U+DC00 - U+DFFF
{
// DC00- DFFF - Low Surrogate
NS_ERROR("got a low Surrogate but no high surrogate");
// output nothing.
// Found a low surrogate w/o a preceeding high surrogate. Flag
// this as an error and return the Unicode replacement
// character 0xFFFD.
NS_WARNING("got a low Surrogate but no high surrogate");
if (err)
*err = PR_TRUE;
*buffer = p;
return 0xFFFD;
}
if (err)
@ -359,10 +380,15 @@ public:
{
if (iter == end)
{
// Found a high surrogate the end of the buffer. Flag this
// as an error and return the Unicode replacement
// character 0xFFFD.
NS_WARNING("Unexpected end of buffer after high surrogate");
if (err)
*err = PR_TRUE;
return 0;
return 0xFFFD;
}
// D800- DBFF - High Surrogate
@ -381,15 +407,30 @@ public:
}
else
{
NS_ERROR("got a High Surrogate but no low surrogate");
// output nothing.
// Found a high surrogate followed by something other than
// a low surrogate. Flag this as an error and return the
// Unicode replacement character 0xFFFD.
NS_WARNING("got a High Surrogate but no low surrogate");
if (err)
*err = PR_TRUE;
return 0xFFFD;
}
}
else // U+DC00 - U+DFFF
{
// DC00- DFFF - Low Surrogate
NS_ERROR("got a low Surrogate but no high surrogate");
// output nothing.
// Found a low surrogate w/o a preceeding high surrogate. Flag
// this as an error and return the Unicode replacement
// character 0xFFFD.
NS_WARNING("got a low Surrogate but no high surrogate");
if (err)
*err = PR_TRUE;
return 0xFFFD;
}
if (err)

Просмотреть файл

@ -1158,11 +1158,20 @@ CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
if (err)
return PR_INT32_MIN;
PRUint32 c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end,
&err);
if (err)
return PR_INT32_MIN;
PRUint32 c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
// The above UTF16CharEnumerator::NextChar() calls can
// fail, but if it does for anything other than no data to
// look at (which can't happen here), it returns the
// Unicode replacement character 0xFFFD for the invalid
// data they were fed. Ignore that error and treat invalid
// UTF16 as 0xFFFD.
//
// This matches what our UTF16 to UTF8 conversion code
// does, and thus a UTF8 string that came from an invalid
// UTF16 string will compare equal to the invalid UTF16
// string it came from. Same is true for any other UTF16
// string differs only in the invalid part of the string.
if (c8_32 != c16_32)
return c8_32 < c16_32 ? -1 : 1;
}