зеркало из https://github.com/mozilla/pjs.git
Fixing bug 395651. Make our UTF-8 and UTF-16 comparison code etc more robust when dealing with invalid UTF-16 data. r+sr=jonas@sicking.cc, a=jst@mozilla.org/jonas@sicking.cc
This commit is contained in:
Родитель
bbdfc401cb
Коммит
6132ff04ba
|
@ -293,13 +293,18 @@ public:
|
|||
}
|
||||
else if (NS_IS_HIGH_SURROGATE(c)) // U+D800 - U+DBFF
|
||||
{
|
||||
if (*buffer == end)
|
||||
if (p == end)
|
||||
{
|
||||
NS_ERROR("Unexpected end of buffer after high surrogate");
|
||||
// Found a high surrogate the end of the buffer. Flag this
|
||||
// as an error and return the Unicode replacement
|
||||
// character 0xFFFD.
|
||||
|
||||
NS_WARNING("Unexpected end of buffer after high surrogate");
|
||||
|
||||
if (err)
|
||||
*err = PR_TRUE;
|
||||
|
||||
return 0;
|
||||
*buffer = p;
|
||||
return 0xFFFD;
|
||||
}
|
||||
|
||||
// D800- DBFF - High Surrogate
|
||||
|
@ -319,15 +324,31 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
NS_ERROR("got a High Surrogate but no low surrogate");
|
||||
// output nothing.
|
||||
// Found a high surrogate followed by something other than
|
||||
// a low surrogate. Flag this as an error and return the
|
||||
// Unicode replacement character 0xFFFD.
|
||||
|
||||
NS_WARNING("got a High Surrogate but no low surrogate");
|
||||
|
||||
if (err)
|
||||
*err = PR_TRUE;
|
||||
*buffer = p;
|
||||
return 0xFFFD;
|
||||
}
|
||||
}
|
||||
else // U+DC00 - U+DFFF
|
||||
{
|
||||
// DC00- DFFF - Low Surrogate
|
||||
NS_ERROR("got a low Surrogate but no high surrogate");
|
||||
// output nothing.
|
||||
|
||||
// Found a low surrogate w/o a preceeding high surrogate. Flag
|
||||
// this as an error and return the Unicode replacement
|
||||
// character 0xFFFD.
|
||||
|
||||
NS_WARNING("got a low Surrogate but no high surrogate");
|
||||
if (err)
|
||||
*err = PR_TRUE;
|
||||
*buffer = p;
|
||||
return 0xFFFD;
|
||||
}
|
||||
|
||||
if (err)
|
||||
|
@ -359,10 +380,15 @@ public:
|
|||
{
|
||||
if (iter == end)
|
||||
{
|
||||
// Found a high surrogate the end of the buffer. Flag this
|
||||
// as an error and return the Unicode replacement
|
||||
// character 0xFFFD.
|
||||
|
||||
NS_WARNING("Unexpected end of buffer after high surrogate");
|
||||
|
||||
if (err)
|
||||
*err = PR_TRUE;
|
||||
|
||||
return 0;
|
||||
return 0xFFFD;
|
||||
}
|
||||
|
||||
// D800- DBFF - High Surrogate
|
||||
|
@ -381,15 +407,30 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
NS_ERROR("got a High Surrogate but no low surrogate");
|
||||
// output nothing.
|
||||
// Found a high surrogate followed by something other than
|
||||
// a low surrogate. Flag this as an error and return the
|
||||
// Unicode replacement character 0xFFFD.
|
||||
|
||||
NS_WARNING("got a High Surrogate but no low surrogate");
|
||||
|
||||
if (err)
|
||||
*err = PR_TRUE;
|
||||
return 0xFFFD;
|
||||
}
|
||||
}
|
||||
else // U+DC00 - U+DFFF
|
||||
{
|
||||
// DC00- DFFF - Low Surrogate
|
||||
NS_ERROR("got a low Surrogate but no high surrogate");
|
||||
// output nothing.
|
||||
|
||||
// Found a low surrogate w/o a preceeding high surrogate. Flag
|
||||
// this as an error and return the Unicode replacement
|
||||
// character 0xFFFD.
|
||||
|
||||
NS_WARNING("got a low Surrogate but no high surrogate");
|
||||
|
||||
if (err)
|
||||
*err = PR_TRUE;
|
||||
return 0xFFFD;
|
||||
}
|
||||
|
||||
if (err)
|
||||
|
|
|
@ -1158,11 +1158,20 @@ CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
|
|||
if (err)
|
||||
return PR_INT32_MIN;
|
||||
|
||||
PRUint32 c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end,
|
||||
&err);
|
||||
if (err)
|
||||
return PR_INT32_MIN;
|
||||
|
||||
PRUint32 c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
|
||||
// The above UTF16CharEnumerator::NextChar() calls can
|
||||
// fail, but if it does for anything other than no data to
|
||||
// look at (which can't happen here), it returns the
|
||||
// Unicode replacement character 0xFFFD for the invalid
|
||||
// data they were fed. Ignore that error and treat invalid
|
||||
// UTF16 as 0xFFFD.
|
||||
//
|
||||
// This matches what our UTF16 to UTF8 conversion code
|
||||
// does, and thus a UTF8 string that came from an invalid
|
||||
// UTF16 string will compare equal to the invalid UTF16
|
||||
// string it came from. Same is true for any other UTF16
|
||||
// string differs only in the invalid part of the string.
|
||||
|
||||
if (c8_32 != c16_32)
|
||||
return c8_32 < c16_32 ? -1 : 1;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче