Don't detect HZ as UTF-8: bug 119079 r=nhotta sr=mscott a=asa

This commit is contained in:
jgmyers%netscape.com 2002-03-07 21:25:13 +00:00
Родитель 1ced872252
Коммит e897fb906e
1 изменённых файлов: 40 добавлений и 5 удалений

Просмотреть файл

@ -854,15 +854,48 @@ static char *intlmime_decode_q(const char *in, unsigned length)
return NULL; return NULL;
} }
static PRBool intl_is_legal_utf8(const char *input, unsigned len) static PRBool intl_is_utf8(const char *input, unsigned len)
{ {
PRInt32 c; PRInt32 c;
/*
* Input which contains legal HZ sequences should not be detected
* as UTF-8.
*/
enum { hz_initial, /* No HZ seen yet */
hz_escaped, /* Inside an HZ ~{ escape sequence */
hz_seen, /* Have seen at least one complete HZ sequence */
hz_notpresent /* Have seen something that is not legal HZ */
} hz_state;
hz_state = hz_initial;
while (len) { while (len) {
c = (unsigned char)*input++; c = (unsigned char)*input++;
len--; len--;
if (c == 0x1B) return PR_FALSE; if (c == 0x1B) return PR_FALSE;
if (c == '~') {
switch (hz_state) {
case hz_initial:
case hz_seen:
if (*input == '{') {
hz_state = hz_escaped;
} else if (*input == '~') {
/* ~~ is the HZ encoding of ~. Skip over second ~ as well */
hz_state = hz_seen;
input++;
len--;
} else {
hz_state = hz_notpresent;
}
break;
case hz_escaped:
if (*input == '}') hz_state = hz_seen;
break;
}
continue;
}
if ((c & 0x80) == 0) continue; if ((c & 0x80) == 0) continue;
hz_state = hz_notpresent;
if ((c & 0xE0) == 0xC0) { if ((c & 0xE0) == 0xC0) {
if (len < 1 || (*input & 0xC0) != 0x80 || if (len < 1 || (*input & 0xC0) != 0x80 ||
((c & 0x1F)<<6) + (*input & 0x3f) < 0x80) { ((c & 0x1F)<<6) + (*input & 0x3f) < 0x80) {
@ -888,6 +921,7 @@ static PRBool intl_is_legal_utf8(const char *input, unsigned len)
return PR_FALSE; return PR_FALSE;
} }
} }
if (hz_state == hz_seen) return PR_FALSE;
return PR_TRUE; return PR_TRUE;
} }
@ -904,7 +938,8 @@ static void intl_copy_uncoded_header(char **output, const char *input,
} }
// Copy as long as it's US-ASCII. An ESC may indicate ISO 2022 // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
while (len && (c = (unsigned char)*input++) != 0x1B && !(c & 0x80)) { // A ~ may indicate it is HZ
while (len && (c = (unsigned char)*input++) != 0x1B && c != '~' && !(c & 0x80)) {
*dest++ = c; *dest++ = c;
len--; len--;
} }
@ -914,9 +949,9 @@ static void intl_copy_uncoded_header(char **output, const char *input,
} }
input--; input--;
// If not legal UTF-8, treat as default charset // If not UTF-8, treat as default charset
nsAutoString tempUnicodeString; nsAutoString tempUnicodeString;
if (!intl_is_legal_utf8(input, len) && if (!intl_is_utf8(input, len) &&
NS_SUCCEEDED(ConvertToUnicode(default_charset, nsCAutoString(input, len).get(), tempUnicodeString))) { NS_SUCCEEDED(ConvertToUnicode(default_charset, nsCAutoString(input, len).get(), tempUnicodeString))) {
NS_ConvertUCS2toUTF8 utf8_text(tempUnicodeString); NS_ConvertUCS2toUTF8 utf8_text(tempUnicodeString);
PRInt32 output_len = utf8_text.Length(); PRInt32 output_len = utf8_text.Length();
@ -1081,7 +1116,7 @@ extern "C" char *MIME_DecodeMimeHeader(const char *header,
// If no MIME encoded then do nothing otherwise decode the input. // If no MIME encoded then do nothing otherwise decode the input.
if (PL_strstr(header, "=?") || if (PL_strstr(header, "=?") ||
(default_charset && !intl_is_legal_utf8(header, strlen(header)))) { (default_charset && !intl_is_utf8(header, strlen(header)))) {
result = intl_decode_mime_part2_str(header, default_charset, override_charset); result = intl_decode_mime_part2_str(header, default_charset, override_charset);
} else if (eatContinuations && } else if (eatContinuations &&
(PL_strchr(header, '\n') || PL_strchr(header, '\r'))) { (PL_strchr(header, '\n') || PL_strchr(header, '\r'))) {