зеркало из https://github.com/mozilla/gecko-dev.git
#64235, 25037
mishandling of 'stand-alone' octet with MSB set in CJK(multibyte) encodings r = ftang sr=erik, p = Jungshik Shin
This commit is contained in:
Родитель
bffde03348
Коммит
1c3ce9176c
|
@ -83,7 +83,8 @@ NS_IMETHODIMP nsGB2312ToUnicodeV2::ConvertNoBuff(const char* aSrc,
|
|||
break;
|
||||
}
|
||||
|
||||
if ( *aSrc & 0x80 )
|
||||
// we need to handle 0xa0 specially even though it is not a legal GB2312 code point
|
||||
if ( (PRUint8)0xA0 < (PRUint8)*aSrc && (PRUint8)*aSrc < (PRUint8)0xFF)
|
||||
{
|
||||
if(i+1 >= iSrcLength)
|
||||
{
|
||||
|
@ -91,22 +92,39 @@ NS_IMETHODIMP nsGB2312ToUnicodeV2::ConvertNoBuff(const char* aSrc,
|
|||
break;
|
||||
}
|
||||
|
||||
// The source is a GBCode
|
||||
|
||||
|
||||
left = pSrcDBCode->leftbyte;
|
||||
right = pSrcDBCode->rightbyte;
|
||||
|
||||
iGBKToUnicodeIndex = (left - 0x0081)*0x00BF + (right - 0x0040);
|
||||
*pDestDBCode = GBKToUnicodeTable[iGBKToUnicodeIndex];
|
||||
// To make sure, the second byte has to be checked as well
|
||||
// The valid 2nd byte range: [0xA1,0xFE]
|
||||
if ( (PRUint8)0xA0 < right && right < (PRUint8)0xFF )
|
||||
{
|
||||
// Valid GB 2312 code point
|
||||
iGBKToUnicodeIndex = (left - 0x0081)*0x00BF + (right - 0x0040);
|
||||
*pDestDBCode = GBKToUnicodeTable[iGBKToUnicodeIndex];
|
||||
aSrc += 2;
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Invalid GB 2312 code point
|
||||
|
||||
aSrc += 2;
|
||||
i++;
|
||||
*pDestDBCode = (PRUnichar)0xfffd;
|
||||
aSrc++;
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// The source is an ASCII
|
||||
*pDestDBCode = (PRUnichar) ( ((char )(*aSrc)) & 0x00ff);
|
||||
aSrc++;
|
||||
if ((PRUint8)*aSrc <= (PRUint8) 0x9f && (PRUint8)*aSrc >= (PRUint8) 0x80)
|
||||
*pDestDBCode = (PRUnichar)0xfffd;
|
||||
else
|
||||
// The source is an ASCII
|
||||
*pDestDBCode = (PRUnichar) ( ((char )(*aSrc)) & 0x00ff);
|
||||
|
||||
aSrc++;
|
||||
|
||||
}
|
||||
|
||||
iDestlen++;
|
||||
|
|
|
@ -70,23 +70,43 @@ NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc,
|
|||
break;
|
||||
}
|
||||
|
||||
if ( *aSrc & 0x80 )
|
||||
// The valid range for the 1st byte is [0x81,0xFE]
|
||||
if ( (PRUint8) 0x80 < (PRUint8)*aSrc && (PRUint8)*aSrc < (PRUint8)0xff )
|
||||
{
|
||||
if(i+1 >= iSrcLength)
|
||||
{
|
||||
rv = NS_OK_UDEC_MOREINPUT;
|
||||
break;
|
||||
}
|
||||
// The source is a GBCode
|
||||
|
||||
|
||||
left = pSrcDBCode->leftbyte;
|
||||
right = pSrcDBCode->rightbyte;
|
||||
// To make sure, the second byte has to be checked as well.
|
||||
// In GBK, the second byte range is [0x40,0x7E] and [0x80,0XFE]
|
||||
if ( right >= (PRUint8)0x40 && (right & 0x7f) != (PRUint8)0x7F)
|
||||
{
|
||||
// Valid GBK code
|
||||
iGBKToUnicodeIndex = (left - 0x0081)*0x00BF + (right - 0x0040);
|
||||
*pDestDBCode = GBKToUnicodeTable[iGBKToUnicodeIndex];
|
||||
aSrc += 2;
|
||||
i++;
|
||||
}
|
||||
else if ( left == (PRUint8)0xA0 )
|
||||
{
|
||||
// stand-alone (not followed by a valid second byte) 0xA0 !
|
||||
// treat it as valid a la Netscape 4.x
|
||||
*pDestDBCode = (PRUnichar) ( ((char )(*aSrc)) & 0x00ff);
|
||||
aSrc++;
|
||||
}
|
||||
|
||||
iGBKToUnicodeIndex = (left - 0x0081)*0x00BF + (right - 0x0040);
|
||||
*pDestDBCode = GBKToUnicodeTable[iGBKToUnicodeIndex];
|
||||
else
|
||||
{
|
||||
// Invalid GBK code point (second byte should be 0x40 or higher)
|
||||
*pDestDBCode = (PRUnichar)0xfffd;
|
||||
aSrc++;
|
||||
}
|
||||
|
||||
aSrc += 2;
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -100,7 +100,7 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
|
|||
switch(mState)
|
||||
{
|
||||
case 0:
|
||||
if(*src & 0x80)
|
||||
if(*src & 0x80 && *src != (unsigned char)0xa0)
|
||||
{
|
||||
mData = fbIdx[*src & 0x7F];
|
||||
if(mData < 0xE000 )
|
||||
|
@ -135,6 +135,11 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
|
|||
PRUint8 off = sbIdx[*src];
|
||||
if(0xFF == off) {
|
||||
*dest++ = 0xFFFD;
|
||||
// if the first byte is valid for SJIS but the second
|
||||
// is not while being a valid US-ASCII(i.e. < 0x40), save it
|
||||
// instead of eating it up !
|
||||
if ( ! (*src & 0xc0) )
|
||||
*dest++ = (PRUnichar) *src;
|
||||
} else {
|
||||
*dest++ = gJis0208map[mData+off];
|
||||
}
|
||||
|
@ -149,6 +154,9 @@ NS_IMETHODIMP nsShiftJISToUnicode::Convert(
|
|||
PRUint8 off = sbIdx[*src];
|
||||
if(0xFF == off) {
|
||||
*dest++ = 0xFFFD;
|
||||
// see the comment above for mstate=1
|
||||
if ( ! (*src & 0xc0) )
|
||||
*dest++ = (PRUnichar) *src;
|
||||
} else {
|
||||
*dest++ = mData + off;
|
||||
}
|
||||
|
@ -272,7 +280,7 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
|
|||
switch(mState)
|
||||
{
|
||||
case 0:
|
||||
if(*src & 0x80)
|
||||
if(*src & 0x80 && *src != (unsigned char)0xa0)
|
||||
{
|
||||
mData = fbIdx[*src & 0x7F];
|
||||
if(mData != 0xFFFD )
|
||||
|
@ -304,7 +312,12 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
|
|||
{
|
||||
PRUint8 off = sbIdx[*src];
|
||||
if(0xFF == off) {
|
||||
*dest++ = 0xFFFD;
|
||||
*dest++ = 0xFFFD;
|
||||
// if the first byte is valid for EUC-JP but the second
|
||||
// is not while being a valid US-ASCII(i.e. < 0xc0), save it
|
||||
// instead of eating it up !
|
||||
if ( ! (*src & 0xc0) )
|
||||
*dest++ = (PRUnichar) *src;;
|
||||
} else {
|
||||
*dest++ = gJis0208map[mData+off];
|
||||
}
|
||||
|
@ -319,7 +332,11 @@ NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
|
|||
if((0xA1 <= *src) && (*src <= 0xDF)) {
|
||||
*dest++ = (0xFF61-0x00A1) + *src;
|
||||
} else {
|
||||
*dest++ = 0xFFFD;
|
||||
*dest++ = 0xFFFD;
|
||||
// if 0x8e is not followed by a valid JIS X 0201 byte
|
||||
// but by a valid US-ASCII, save it instead of eating it up.
|
||||
if ( (PRUint8)*src < (PRUint8)0x7f )
|
||||
*dest++ = (PRUnichar) *src;;
|
||||
}
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
|
|
Загрузка…
Ссылка в новой задаче