bug 236941 (patch by Jean-Marc Desperrier) : UTF-8 converter loses full lines of text if there's any invalid character (r=jshin, sr=bienvenu)

This commit is contained in:
jshin%mailaps.org 2004-06-01 17:26:27 +00:00
Родитель a9cbdc4b2a
Коммит bf2788d424
2 изменённых файлов: 63 добавлений и 22 удалений

Просмотреть файл

@ -765,10 +765,48 @@ int ConvertUsingEncoderAndDecoder(const char *stringToUse, PRInt32 inLength,
rv = NS_ERROR_OUT_OF_MEMORY;
}
else {
// convert to unicode
rv = decoder->Convert(stringToUse, &srcLen, unichars, &unicharLength);
if (NS_SUCCEEDED(rv)) {
rv = encoder->GetMaxLength(unichars, unicharLength, &dstLength);
// convert to unicode, replacing failed chars with 0xFFFD as in
// the methode used in nsXMLHttpRequest::ConvertBodyToText and nsScanner::Append
//
// We will need several pass to convert the whole string if it has invalid characters
// 'totalChars' is where the sum of the number of converted characters will be done
// 'dataLen' is the number of character left to convert
// 'outLen' is the number of characters still available in the output buffer as input of decoder->Convert
// and the number of characters written in it as output.
PRInt32 totalChars = 0,
inBufferIndex = 0,
outBufferIndex = 0;
PRInt32 dataLen = srcLen,
outLen = unicharLength;
do {
PRInt32 inBufferLength = dataLen;
rv = decoder->Convert(&stringToUse[inBufferIndex],
&inBufferLength,
&unichars[outBufferIndex],
&outLen);
totalChars += outLen;
// Done if conversion successful
if (NS_SUCCEEDED(rv))
break;
// We consume one byte, replace it with U+FFFD
// and try the conversion again.
outBufferIndex += outLen;
unichars[outBufferIndex++] = PRUnichar(0xFFFD);
// totalChars is updated here
outLen = unicharLength - (++totalChars);
inBufferIndex += inBufferLength + 1;
dataLen -= inBufferLength + 1;
decoder->Reset();
// If there is not at least one byte available after the one we
// consumed, we're done
} while ( dataLen > 0 );
rv = encoder->GetMaxLength(unichars, totalChars, &dstLength);
// allocale an output buffer
dstPtr = (char *) PR_Malloc(dstLength + 1);
if (dstPtr == nsnull) {
@ -779,7 +817,7 @@ int ConvertUsingEncoderAndDecoder(const char *stringToUse, PRInt32 inLength,
// convert from unicode
rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nsnull, '?');
if (NS_SUCCEEDED(rv)) {
rv = encoder->Convert(unichars, &unicharLength, dstPtr, &dstLength);
rv = encoder->Convert(unichars, &totalChars, dstPtr, &dstLength);
if (NS_SUCCEEDED(rv)) {
PRInt32 finLen = buffLength - dstLength;
rv = encoder->Finish((char *)(dstPtr+dstLength), &finLen);
@ -792,7 +830,6 @@ int ConvertUsingEncoderAndDecoder(const char *stringToUse, PRInt32 inLength,
}
}
}
}
if (inLength > klocalbufsize)
delete [] unichars;
}

Просмотреть файл

@ -390,6 +390,10 @@ MimeInlineText_convert_and_parse_line(char *line, PRInt32 length, MimeObject *ob
//initiate decoder if not yet
if (text->inputDecoder == nsnull)
MIME_get_unicode_decoder(text->charset, getter_AddRefs(text->inputDecoder));
// If no decoder found, use ""UTF-8"", that will map most non-US-ASCII chars as invalid
// A pure-ASCII only decoder would be better, but there is none
if (text->inputDecoder == nsnull)
MIME_get_unicode_decoder("UTF-8", getter_AddRefs(text->inputDecoder));
if (text->utf8Encoder == nsnull)
MIME_get_unicode_encoder("UTF-8", getter_AddRefs(text->utf8Encoder));