fix MIME encoded-word decoder: bugs 51453 58114 r=ducarroz sr=sspitzer a=asa

This commit is contained in:
jgmyers%netscape.com 2001-02-08 00:12:50 +00:00
Родитель 9e6e35f1f7
Коммит ba976f5df1
3 изменённых файлов: 239 добавлений и 269 удалений

Просмотреть файл

@ -299,12 +299,11 @@ static PRBool stateful_encoding(const char* charset)
static PRBool intlmime_only_ascii_str(const char *s); static PRBool intlmime_only_ascii_str(const char *s);
static char * intlmime_encode_next8bitword(char *src); static char * intlmime_encode_next8bitword(char *src);
/* we should consider replace this base64 decodeing and encoding /* we should consider replace this base64 encoding
function with a better one */ function with a better one */
static int intlmime_decode_base64 (const char *in, char *out); static char * intlmime_decode_q(const char *in, unsigned length);
static char * intlmime_decode_qp(char *in); static char * intlmime_decode_b(const char *in, unsigned length);
static int intlmime_encode_base64 (const char *in, char *out); static int intlmime_encode_base64 (const char *in, char *out);
static char * intlmime_decode_base64_buf(char *subject);
static char * intlmime_encode_base64_buf(char *subject, size_t size); static char * intlmime_encode_base64_buf(char *subject, size_t size);
static char * intlmime_encode_qp_buf(char *subject); static char * intlmime_encode_qp_buf(char *subject);
@ -374,62 +373,95 @@ static char *intlmime_encode_base64_buf(char *subject, size_t size)
return output; return output;
} }
static int intlmime_decode_base64 (const char *in, char *out) #define XX 127
/*
* Table for decoding base64
*/
static char index_64[256] = {
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,62, XX,XX,XX,63,
52,53,54,55, 56,57,58,59, 60,61,XX,XX, XX,XX,XX,XX,
XX, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
15,16,17,18, 19,20,21,22, 23,24,25,XX, XX,XX,XX,XX,
XX,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
41,42,43,44, 45,46,47,48, 49,50,51,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
};
#define CHAR64(c) (index_64[(unsigned char)(c)])
static char *intlmime_decode_b (const char *in, unsigned length)
{ {
/* reads 4, writes 3. */ char *out, *dest = 0;
int j; int c1, c2, c3, c4;
unsigned long num = 0;
for (j = 0; j < 4; j++) out = dest = (char *)PR_Malloc(length+1);
{ if (dest == NULL)
unsigned char c; return NULL;
if (in[j] >= 'A' && in[j] <= 'Z') c = in[j] - 'A';
else if (in[j] >= 'a' && in[j] <= 'z') c = in[j] - ('a' - 26); while (length > 0) {
else if (in[j] >= '0' && in[j] <= '9') c = in[j] - ('0' - 52); while (length > 0 && CHAR64(*in) == XX) {
else if (in[j] == '+') c = 62; if (*in == '=') goto badsyntax;
else if (in[j] == '/') c = 63; in++;
else if (in[j] == '=') c = 0; length--;
else
{
/* abort (); */
PL_strcpy(out, in); /* I hate abort */
return 0;
} }
num = (num << 6) | c; if (length == 0) break;
c1 = *in++;
length--;
while (length > 0 && CHAR64(*in) == XX) {
if (*in == '=') goto badsyntax;
in++;
length--;
}
if (length == 0) goto badsyntax;
c2 = *in++;
length--;
while (length > 0 && *in != '=' && CHAR64(*in) == XX) {
in++;
length--;
}
if (length == 0) goto badsyntax;
c3 = *in++;
length--;
while (length > 0 && *in != '=' && CHAR64(*in) == XX) {
in++;
length--;
}
if (length == 0) goto badsyntax;
c4 = *in++;
length--;
c1 = CHAR64(c1);
c2 = CHAR64(c2);
*out++ = ((c1<<2) | ((c2&0x30)>>4));
if (c3 == '=') {
if (c4 != '=') goto badsyntax;
break; /* End of data */
}
c3 = CHAR64(c3);
*out++ = (((c2&0x0F) << 4) | ((c3&0x3C) >> 2));
if (c4 == '=') {
break; /* End of data */
}
c4 = CHAR64(c4);
*out++ = (((c3&0x03) << 6) | c4);
} }
*out++ = '\0';
return dest;
*out++ = (unsigned char) (num >> 16); badsyntax:
*out++ = (unsigned char) ((num >> 8) & 0xFF); PR_Free(dest);
*out++ = (unsigned char) (num & 0xFF); return NULL;
return 1;
}
static char *intlmime_decode_base64_buf(char *subject)
{
char *output = 0;
char *pSrc, *pDest ;
int i ;
mime_SACopy(&output, subject); /* Assume converted text are always less than source text */
pSrc = subject;
pDest = output ;
for (i = nsCRT::strlen(subject); i > 3; i -= 4)
{
if (intlmime_decode_base64(pSrc, pDest) == 0)
{
pSrc += 4;
pDest += 4;
}
else
{
pSrc += 4;
pDest += 3;
}
}
*pDest = '\0';
return output;
} }
static char *intlmime_encode_qp_buf(char *subject) static char *intlmime_encode_qp_buf(char *subject)
@ -958,218 +990,179 @@ char *utf8_EncodeMimePartIIStr(const char *subject, char *charset, int maxLineLe
return buf; return buf;
} }
/*
* Table for decoding hexadecimal in quoted-printable
*/
static char index_hex[256] = {
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,XX,XX, XX,XX,XX,XX,
XX,10,11,12, 13,14,15,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,10,11,12, 13,14,15,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
};
#define HEXCHAR(c) (index_hex[(unsigned char)(c)])
static char *intlmime_decode_qp(char *in) static char *intlmime_decode_q(const char *in, unsigned length)
{ {
int i = 0, length;
char token[3];
char *out, *dest = 0; char *out, *dest = 0;
out = dest = (char *)PR_Malloc(nsCRT::strlen(in)+1); out = dest = (char *)PR_Malloc(length+1);
if (dest == NULL) if (dest == NULL)
return NULL; return NULL;
memset(out, 0, nsCRT::strlen(in)+1); memset(out, 0, length+1);
length = nsCRT::strlen(in); while (length > 0) {
while (length > 0 || i != 0) switch (*in) {
{ case '=':
while (i < 3 && length > 0) if (length < 3 || HEXCHAR(in[1]) == XX ||
{ HEXCHAR(in[2]) == XX) goto badsyntax;
token [i++] = *in; *out++ = (HEXCHAR(in[1]) << 4) + HEXCHAR(in[2]);
in += 3;
length -= 3;
break;
case '_':
*out++ = ' ';
in++; in++;
length--; length--;
} continue;
if (i < 3) default:
{ if (*in & 0x80) goto badsyntax;
/* Didn't get enough for a complete token. *out++ = *in++;
If it might be a token, unread it. length--;
Otherwise, just dump it.
*/
strncpy (out, token, i);
break;
}
i = 0;
if (token [0] == '=')
{
unsigned char c = 0;
if (token[1] >= '0' && token[1] <= '9')
c = token[1] - '0';
else if (token[1] >= 'A' && token[1] <= 'F')
c = token[1] - ('A' - 10);
else if (token[1] >= 'a' && token[1] <= 'f')
c = token[1] - ('a' - 10);
else if (token[1] == CR || token[1] == LF)
{
/* =\n means ignore the newline. */
if (token[1] == CR && token[2] == LF)
; /* swallow all three chars */
else
{
in--; /* put the third char back */
length++;
}
continue;
}
else
{
/* = followed by something other than hex or newline -
pass it through unaltered, I guess. (But, if
this bogus token happened to occur over a buffer
boundary, we can't do this, since we don't have
space for it. Oh well. Screw it.) */
if (in > out) *out++ = token[0];
if (in > out) *out++ = token[1];
if (in > out) *out++ = token[2];
continue;
}
/* Second hex digit */
c = (c << 4);
if (token[2] >= '0' && token[2] <= '9')
c += token[2] - '0';
else if (token[2] >= 'A' && token[2] <= 'F')
c += token[2] - ('A' - 10);
else if (token[2] >= 'a' && token[2] <= 'f')
c += token[2] - ('a' - 10);
else
{
/* We got =xy where "x" was hex and "y" was not, so
treat that as a literal "=", x, and y. (But, if
this bogus token happened to occur over a buffer
boundary, we can't do this, since we don't have
space for it. Oh well. Screw it.) */
if (in > out) *out++ = token[0];
if (in > out) *out++ = token[1];
if (in > out) *out++ = token[2];
continue;
}
*out++ = (char) c;
}
else
{
*out++ = token [0];
token[0] = token[1];
token[1] = token[2];
i = 2;
} }
} }
/* take care of special underscore case */ *out++ = '\0';
for (out = dest; *out; out++)
if (*out == '_') *out = ' ';
return dest; return dest;
badsyntax:
PR_Free(dest);
return NULL;
} }
/* static char *especials = "()<>@,;:\\\"/[]?.=";
intlmime_is_mime_part2_header:
*/
static PRBool intlmime_is_mime_part2_header(const char *header)
{
return ((
PL_strstr(header, "=?") &&
(
PL_strstr(header, "?q?") ||
PL_strstr(header, "?Q?") ||
PL_strstr(header, "?b?") ||
PL_strstr(header, "?B?")
)
) ? PR_TRUE : PR_FALSE );
}
static static
char *intl_decode_mime_part2_str(const char *header, char* charset) char *intl_decode_mime_part2_str(const char *header)
{ {
char *work_buf = NULL;
char *output_p = NULL; char *output_p = NULL;
char *retbuff = NULL; char *retbuff = NULL;
char *p, *q, *decoded_text; const char *p, *q, *r;
char *begin; /* tracking pointer for where we are in the work buffer */ char *decoded_text, *utf8_text;
int ret = 0; const char *begin; /* tracking pointer for where we are in the input buffer */
int last_saw_encoded_word = 0;
const char *charset_start, *charset_end;
char charset[80];
// initialize charset name to an empty string // initialize charset name to an empty string
if (charset) charset[0] = '\0';
charset[0] = '\0';
mime_SACopy(&work_buf, header); /* temporary buffer */ /* Assume no more than 2X expansion due to UTF-8 conversion */
mime_SACopy(&retbuff, header); retbuff = (char *)PR_Malloc(2*strlen(header)+1);
if (work_buf == NULL || retbuff == NULL) if (retbuff == NULL)
return NULL; return NULL;
output_p = retbuff; output_p = retbuff;
begin = work_buf; begin = header;
while (*begin != '\0') while ((p = PL_strstr(begin, "=?")) != 0) {
{ if (last_saw_encoded_word) {
char * output_text; /* See if it's all whitespace. */
for (q = begin; q < p; q++) {
if (!PL_strchr(" \t\r\n", *q)) break;
}
}
/* GetCharset(); */ if (!last_saw_encoded_word || q < p) {
p = strstr(begin, "=?"); /* copy the part before the encoded-word */
if (p == NULL) PL_strncpy(output_p, begin, p - begin);
break; /* exit the loop because the rest are not encoded */ output_p += p - begin;
*p = '\0'; begin = p;
/* skip strings don't need conversion */ }
strncpy(output_p, begin, p - begin);
output_p += p - begin;
p += 2; p += 2;
begin = p;
q = strchr(p, '?'); /* Get charset info */ /* Get charset info */
if (q == NULL) charset_start = p;
break; /* exit the loop because there are no charset info */ charset_end = 0;
*q++ = '\0'; for (q = p; *q != '?'; q++) {
if (charset) if (*q <= ' ' || PL_strchr(especials, *q)) {
PL_strcpy(charset, nsCRT::strcasecmp(p, "us-ascii") ? p : "ISO-8859-1"); goto badsyntax;
}
if (*(q+1) == '?' && /* RFC 2231 section 5 */
(*q == 'Q' || *q == 'q' || *q == 'B' || *q == 'b')) if (!charset_end && *q == '*') {
{ charset_end = q;
p = strstr(q+2, "?="); }
if(p != NULL) }
*p = '\0'; if (!charset_end) {
if(*q == 'Q' || *q == 'q') charset_end = q;
decoded_text = intlmime_decode_qp(q+2);
else
decoded_text = intlmime_decode_base64_buf(q+2);
} }
else
break; /* exit the loop because we don't know the encoding method */
begin = (p != NULL) ? p + 2 : (q + nsCRT::strlen(q)); /* Check for too-long charset name */
if ((unsigned)(charset_end - charset_start) >= sizeof(charset)) goto badsyntax;
memcpy(charset, charset_start, charset_end - charset_start);
charset[charset_end - charset_start] = 0;
q++;
if (*q != 'Q' && *q != 'q' && *q != 'B' && *q != 'b')
goto badsyntax;
if (q[1] != '?')
goto badsyntax;
r = q;
for (r = q + 2; *r != '?'; r++) {
if (*r <= ' ') goto badsyntax;
}
if (r == q + 2 || r[1] != '=') goto badsyntax;
if(*q == 'Q' || *q == 'q')
decoded_text = intlmime_decode_q(q+2, r - (q+2));
else
decoded_text = intlmime_decode_b(q+2, r - (q+2));
if (decoded_text == NULL) if (decoded_text == NULL)
break; /* exit the loop because we have problem to decode */ goto badsyntax;
ret = 1; if (MIME_ConvertString(charset, "UTF-8", decoded_text, &utf8_text) == 0) {
PL_strcpy(output_p, (char *)utf8_text);
output_p += nsCRT::strlen(utf8_text);
PR_Free(utf8_text);
} else {
PL_strcpy(output_p, "\347\277\275"); /* UTF-8 encoding of U+FFFD */
output_p += 3;
}
output_text = (char *)decoded_text;
PR_ASSERT(output_text != NULL);
PL_strcpy(output_p, (char *)output_text);
output_p += nsCRT::strlen(output_text);
if (output_text != decoded_text)
PR_Free(output_text);
PR_Free(decoded_text); PR_Free(decoded_text);
begin = r + 2;
last_saw_encoded_word = 1;
continue;
badsyntax:
/* copy the part before the encoded-word */
PL_strncpy(output_p, begin, p - begin);
output_p += p - begin;
begin = p;
last_saw_encoded_word = 0;
} }
PL_strcpy(output_p, (char *)begin); /* put the tail back */ PL_strcpy(output_p, (char *)begin); /* put the tail back */
if (work_buf) return retbuff;
PR_Free(work_buf);
if (ret)
{
return retbuff;
}
else
{
PR_Free(retbuff);
PL_strcpy(charset, "us-ascii"); /* charset was not encoded, put us-ascii */
return nsCRT::strdup(header); /* nothing to decode */
}
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -1475,25 +1468,28 @@ extern "C" char *MIME_DecodeMimePartIIStr(const char *header, char *charset,
return nsnull; return nsnull;
// If no MIME encoded then do nothing otherwise decode the input. // If no MIME encoded then do nothing otherwise decode the input.
if (*header != '\0' && intlmime_is_mime_part2_header(header)) { if (*header != '\0' && PL_strstr(header, "=?")) {
result = intl_decode_mime_part2_str(header, charset); result = intl_decode_mime_part2_str(header);
if (charset) PL_strcpy(charset, "UTF-8");
} }
else else if (charset && *charset == '\0') {
{ // no charset name is specified then assume it's us-ascii (or ISO-8859-1 if 8bit)
if (*charset == '\0') { // and dup the input (later change the caller to avoid the duplication)
// no charset name is specified then assume it's us-ascii (or ISO-8859-1 if 8bit) unsigned char *cp = (unsigned char *) header;
// and dup the input (later change the caller to avoid the duplication) PL_strcpy(charset, "us-ascii");
unsigned char *cp = (unsigned char *) header; while (*cp) {
PL_strcpy(charset, "us-ascii"); if (*cp > 127) {
while (*cp) { PL_strcpy(charset, "ISO-8859-1");
if (*cp > 127) { break;
PL_strcpy(charset, "ISO-8859-1");
break;
}
cp++;
} }
cp++;
} }
result = nsCRT::strdup(header); result = nsCRT::strdup(header);
} else if (eatContinuations &&
(PL_strchr(header, '\n') || PL_strchr(header, '\r'))) {
result = nsCRT::strdup(header);
} else {
eatContinuations = PR_FALSE;
} }
if (eatContinuations) if (eatContinuations)
result = MIME_StripContinuations(result); result = MIME_StripContinuations(result);

Просмотреть файл

@ -42,9 +42,6 @@
#include "mimemoz2.h" #include "mimemoz2.h"
// Forward declares... // Forward declares...
extern "C" char *MIME_DecodeMimePartIIStr(const char *header,
char *charset,
PRBool eatContinuations);
extern "C" char *MIME_StripContinuations(char *original); extern "C" char *MIME_StripContinuations(char *original);
int MimeHeaders_build_heads_list(MimeHeaders *hdrs); int MimeHeaders_build_heads_list(MimeHeaders *hdrs);
@ -889,8 +886,6 @@ mime_decode_filename(char *name)
{ {
char *s = name, *d = name; char *s = name, *d = name;
char *cvt, *returnVal = NULL; char *cvt, *returnVal = NULL;
char charsetName[128];
charsetName[0] = 0;
while (*s) while (*s)
{ {
@ -904,29 +899,11 @@ mime_decode_filename(char *name)
*d = 0; *d = 0;
returnVal = name; returnVal = name;
/* If there is a MIME-2 encoded-word in the string, cvt = MIME_DecodeMimePartIIStr(returnVal, 0, PR_TRUE);
get the charset of the first one and decode to that charset. */
s = PL_strstr(returnVal, "=?");
if (s)
{
cvt = MIME_DecodeMimePartIIStr(returnVal, charsetName, PR_TRUE); if (cvt && cvt != returnVal) {
returnVal = cvt;
// rhp - trying to fix header conversion bug
//
if (cvt && cvt != returnVal)
{
char *newString = nsnull;
PRInt32 res = MIME_ConvertString(charsetName, "UTF-8", cvt, &newString);
if ( (res != 0) || (!newString) )
returnVal = cvt;
else
{
PR_FREEIF(cvt);
returnVal = newString;
}
} }
}
return returnVal; return returnVal;
} }

Просмотреть файл

@ -77,9 +77,6 @@ static NS_DEFINE_CID(kIOServiceCID, NS_IOSERVICE_CID);
// Text Scanning... // Text Scanning...
static NS_DEFINE_CID(kTXTToHTMLConvCID, MOZITXTTOHTMLCONV_CID); static NS_DEFINE_CID(kTXTToHTMLConvCID, MOZITXTTOHTMLCONV_CID);
extern "C" char *MIME_DecodeMimePartIIStr(const char *header,
char *charset,
PRBool eatContinuations);
void ValidateRealName(nsMsgAttachmentData *aAttach, MimeHeaders *aHdrs); void ValidateRealName(nsMsgAttachmentData *aAttach, MimeHeaders *aHdrs);
static MimeHeadersState MIME_HeaderType; static MimeHeadersState MIME_HeaderType;