Change IsUTF8String more restrict. Do not allow surrogate char encode as UTF8

This commit is contained in:
ftang%netscape.com 1998-10-12 13:21:31 +00:00
Родитель 0acf22e248
Коммит 2c0e548402
1 изменённых файлов: 4 добавлений и 0 удалений

Просмотреть файл

@ -540,6 +540,10 @@ PRBool IsUTF8Text(const char* utf8, int32 len)
/* No enough trail bytes */ /* No enough trail bytes */
if( (i + clen) > len) if( (i + clen) > len)
return FALSE; return FALSE;
/* a single Surrogate should not show in 3 bytes UTF8, instead, the pair should be intepreted
as one single UCS4 char and encoded UTF8 in 4 bytes */
if((0xED == utf8[i] ) && (0xA0 == (utf8[i+1] & 0xA0 ) ))
return FALSE;
/* 0000 0000 - 0000 07FF : should encode in less bytes */ /* 0000 0000 - 0000 07FF : should encode in less bytes */
if((0 == (utf8[i] & 0x0F )) && (0 == (utf8[i+1] & 0x20 ) )) if((0 == (utf8[i] & 0x0F )) && (0 == (utf8[i+1] & 0x20 ) ))
return FALSE; return FALSE;