зеркало из https://github.com/mozilla/gecko-dev.git
Don't split a surrogate pair over two output buffers. Bug 600974, r=emk, a=blocking
This commit is contained in:
Родитель
e83ac0de10
Коммит
3c47aaf992
|
@ -197,8 +197,7 @@ NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc,
|
|||
*aDest = UCS2_NO_MAPPING;
|
||||
} else {
|
||||
// let's try supplement mapping
|
||||
NS_ASSERTION(( (iDestlen+1) <= (*aDestLength) ), "no enouth output memory");
|
||||
if ( (iDestlen+1) <= (*aDestLength) )
|
||||
if ( (iDestlen+1) < (*aDestLength) )
|
||||
{
|
||||
if(DecodeToSurrogate(aSrc, aDest))
|
||||
{
|
||||
|
@ -209,7 +208,13 @@ NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc,
|
|||
*aDest = UCS2_NO_MAPPING;
|
||||
}
|
||||
} else {
|
||||
*aDest = UCS2_NO_MAPPING;
|
||||
if (*aDestLength < 2) {
|
||||
NS_ERROR("insufficient space in output buffer");
|
||||
*aDest = UCS2_NO_MAPPING;
|
||||
} else {
|
||||
rv = NS_OK_UDEC_MOREOUTPUT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -42,14 +42,16 @@
|
|||
#include <string.h>
|
||||
#include "prtypes.h"
|
||||
|
||||
#define STATE_NORMAL 0
|
||||
#define STATE_HALF_CODE_POINT 1
|
||||
#define STATE_FIRST_CALL 2
|
||||
#define STATE_FOUND_BOM 3
|
||||
#define STATE_NORMAL 0
|
||||
#define STATE_HALF_CODE_POINT 1
|
||||
#define STATE_FIRST_CALL 2
|
||||
#define STATE_FOUND_BOM 3
|
||||
#define STATE_ODD_SURROGATE_PAIR 4
|
||||
|
||||
static nsresult
|
||||
UTF16ConvertToUnicode(PRUint8& aState, PRUint8& aOddByte,
|
||||
PRUnichar& aOddHighSurrogate, const char * aSrc,
|
||||
PRUnichar& aOddHighSurrogate, PRUnichar& aOddLowSurrogate,
|
||||
const char * aSrc,
|
||||
PRInt32 * aSrcLength, PRUnichar * aDest,
|
||||
PRInt32 * aDestLength,
|
||||
PRBool aSwapBytes)
|
||||
|
@ -59,32 +61,51 @@ UTF16ConvertToUnicode(PRUint8& aState, PRUint8& aOddByte,
|
|||
PRUnichar* dest = aDest;
|
||||
PRUnichar* destEnd = aDest + *aDestLength;
|
||||
|
||||
if(STATE_FOUND_BOM == aState) // caller found a BOM
|
||||
{
|
||||
if (*aSrcLength < 2)
|
||||
return NS_ERROR_ILLEGAL_INPUT;
|
||||
src+=2;
|
||||
aState = STATE_NORMAL;
|
||||
} else if(STATE_FIRST_CALL == aState) { // first time called
|
||||
if (*aSrcLength < 2)
|
||||
return NS_ERROR_ILLEGAL_INPUT;
|
||||
|
||||
// Eliminate BOM (0xFEFF). Note that different endian case is taken care of
|
||||
// in |Convert| of LE and BE converters. Here, we only have to
|
||||
// deal with the same endian case. That is, 0xFFFE (byte-swapped BOM) is
|
||||
// illegal.
|
||||
if(0xFEFF == *((PRUnichar*)src)) {
|
||||
switch(aState) {
|
||||
case STATE_FOUND_BOM:
|
||||
if (*aSrcLength < 2)
|
||||
return NS_ERROR_ILLEGAL_INPUT;
|
||||
src+=2;
|
||||
} else if(0xFFFE == *((PRUnichar*)src)) {
|
||||
*aSrcLength=0;
|
||||
*aDestLength=0;
|
||||
return NS_ERROR_ILLEGAL_INPUT;
|
||||
}
|
||||
aState = STATE_NORMAL;
|
||||
aState = STATE_NORMAL;
|
||||
break;
|
||||
|
||||
case STATE_FIRST_CALL: // first time called
|
||||
if (*aSrcLength < 2)
|
||||
return NS_ERROR_ILLEGAL_INPUT;
|
||||
|
||||
// Eliminate BOM (0xFEFF). Note that different endian case is taken care
|
||||
// of in |Convert| of LE and BE converters. Here, we only have to
|
||||
// deal with the same endian case. That is, 0xFFFE (byte-swapped BOM) is
|
||||
// illegal.
|
||||
if(0xFEFF == *((PRUnichar*)src)) {
|
||||
src+=2;
|
||||
} else if(0xFFFE == *((PRUnichar*)src)) {
|
||||
*aSrcLength=0;
|
||||
*aDestLength=0;
|
||||
return NS_ERROR_ILLEGAL_INPUT;
|
||||
}
|
||||
aState = STATE_NORMAL;
|
||||
break;
|
||||
|
||||
case STATE_ODD_SURROGATE_PAIR:
|
||||
if (*aDestLength < 2)
|
||||
*dest++ = UCS2_REPLACEMENT_CHAR;
|
||||
else {
|
||||
*dest++ = aOddHighSurrogate;
|
||||
*dest++ = aOddLowSurrogate;
|
||||
aOddHighSurrogate = aOddLowSurrogate = 0;
|
||||
aState = STATE_NORMAL;
|
||||
}
|
||||
break;
|
||||
|
||||
case STATE_NORMAL:
|
||||
case STATE_HALF_CODE_POINT:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (src == srcEnd) {
|
||||
*aDestLength = 0;
|
||||
*aDestLength = dest - aDest;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
@ -140,17 +161,19 @@ have_codepoint:
|
|||
oddHighSurrogate = u;
|
||||
}
|
||||
else /* if (NS_IS_LOW_SURROGATE(u)) */ {
|
||||
if (oddHighSurrogate) {
|
||||
if (dest == destEnd - 1) {
|
||||
*dest++ = UCS2_REPLACEMENT_CHAR;
|
||||
if (oddHighSurrogate && *aDestLength > 1) {
|
||||
if (dest + 1 >= destEnd) {
|
||||
aOddLowSurrogate = u;
|
||||
aOddHighSurrogate = oddHighSurrogate;
|
||||
aState = STATE_ODD_SURROGATE_PAIR;
|
||||
goto error;
|
||||
}
|
||||
*dest++ = oddHighSurrogate;
|
||||
*dest++ = u;
|
||||
oddHighSurrogate = 0;
|
||||
} else {
|
||||
*dest++ = UCS2_REPLACEMENT_CHAR;
|
||||
}
|
||||
oddHighSurrogate = 0;
|
||||
}
|
||||
}
|
||||
if (src != srcEnd) {
|
||||
|
@ -177,6 +200,7 @@ nsUTF16ToUnicodeBase::Reset()
|
|||
mState = STATE_FIRST_CALL;
|
||||
mOddByte = 0;
|
||||
mOddHighSurrogate = 0;
|
||||
mOddLowSurrogate = 0;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
@ -185,9 +209,11 @@ nsUTF16ToUnicodeBase::GetMaxLength(const char * aSrc, PRInt32 aSrcLength,
|
|||
PRInt32 * aDestLength)
|
||||
{
|
||||
// the left-over data of the previous run have to be taken into account.
|
||||
*aDestLength = (aSrcLength +
|
||||
((STATE_HALF_CODE_POINT == mState) ? 1 : 0)) / 2 +
|
||||
((mOddHighSurrogate != 0) ? 1 : 0);
|
||||
*aDestLength = (aSrcLength + ((STATE_HALF_CODE_POINT == mState) ? 1 : 0)) / 2;
|
||||
if (mOddHighSurrogate)
|
||||
(*aDestLength)++;
|
||||
if (mOddLowSurrogate)
|
||||
(*aDestLength)++;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
@ -216,6 +242,7 @@ nsUTF16BEToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLength,
|
|||
#endif
|
||||
|
||||
nsresult rv = UTF16ConvertToUnicode(mState, mOddByte, mOddHighSurrogate,
|
||||
mOddLowSurrogate,
|
||||
aSrc, aSrcLength, aDest, aDestLength,
|
||||
#ifdef IS_LITTLE_ENDIAN
|
||||
PR_TRUE
|
||||
|
@ -250,6 +277,7 @@ nsUTF16LEToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLength,
|
|||
#endif
|
||||
|
||||
nsresult rv = UTF16ConvertToUnicode(mState, mOddByte, mOddHighSurrogate,
|
||||
mOddLowSurrogate,
|
||||
aSrc, aSrcLength, aDest, aDestLength,
|
||||
#ifdef IS_BIG_ENDIAN
|
||||
PR_TRUE
|
||||
|
@ -308,6 +336,7 @@ nsUTF16ToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLength,
|
|||
}
|
||||
|
||||
nsresult rv = UTF16ConvertToUnicode(mState, mOddByte, mOddHighSurrogate,
|
||||
mOddLowSurrogate,
|
||||
aSrc, aSrcLength, aDest, aDestLength,
|
||||
#ifdef IS_BIG_ENDIAN
|
||||
(mEndian == kLittleEndian)
|
||||
|
|
|
@ -62,6 +62,8 @@ protected:
|
|||
PRUint8 mOddByte;
|
||||
// to store an odd high surrogate left over between runs
|
||||
PRUnichar mOddHighSurrogate;
|
||||
// to store an odd low surrogate left over between runs
|
||||
PRUnichar mOddLowSurrogate;
|
||||
};
|
||||
|
||||
// UTF-16 big endian
|
||||
|
|
Загрузка…
Ссылка в новой задаче