diff --git a/mailnews/mime/src/comi18n.cpp b/mailnews/mime/src/comi18n.cpp index cda0eb176da9..d9b5476b0572 100644 --- a/mailnews/mime/src/comi18n.cpp +++ b/mailnews/mime/src/comi18n.cpp @@ -568,13 +568,13 @@ static PRBool intlmime_only_ascii_str(const char *s) static unsigned char * utf8_nextchar(unsigned char *str) { - int len = PL_strlen((char *) str); if (*str < 128) { return (str+1); } + int len = PL_strlen((char *) str); // RFC 2279 defines more than 3 bytes sequences (0xF0, 0xF8, 0xFC), // but I think we won't encounter those cases as long as we're supporting UCS-2 and no surrogate. - else if ((len >= 3) && (*str >= 0xE0)) { + if ((len >= 3) && (*str >= 0xE0)) { return (str+3); } else if ((len >= 2) && (*str >= 0xC0)) { @@ -1268,41 +1268,6 @@ static PRInt32 INTL_ConvertFromUnicode(const char* to_charset, const void* uniBu } //////////////////////////////////////////////////////////////////////////////// -class MimeCharsetConverterClass { -public: - MimeCharsetConverterClass(); - virtual ~MimeCharsetConverterClass(); - - // Initialize converters for charsets, fails if converter not available. - // - PRInt32 Initialize(const char* from_charset, const char* to_charset, - const PRBool autoDetect=PR_FALSE, const PRInt32 maxNumCharsDetect=-1); - - // Converts input buffer or duplicates input if converters not available (and returns 0). - // Also duplicates input if convertion not needed. - // C string is generated for converted string. - PRInt32 Convert(const char* inBuffer, const PRInt32 inLength, - char** outBuffer, PRInt32* outLength, - PRInt32* numUnConverted); - -protected: - nsIUnicodeDecoder * GetUnicodeDecoder() {return (mAutoDetect && NULL != mDecoderDetected) ? mDecoderDetected : mDecoder;} - nsIUnicodeEncoder * GetUnicodeEncoder() {return mEncoder;} - PRBool NeedCharsetConversion(const nsString& from_charset, const nsString& to_charset); - -private: - nsIUnicodeDecoder *mDecoder; // decoder (convert to unicode) - nsIUnicodeEncoder *mEncoder; // encoder (convert from unicode) - nsIUnicodeDecoder *mDecoderDetected; // decoder of detected charset (after when auto detection succeeded) - PRInt32 mMaxNumCharsDetect; // maximum number of characters in bytes to abort auto detection - // (-1 for no limit) - PRInt32 mNumChars; // accumulated number of characters converted in bytes - PRBool mAutoDetect; // true if apply auto detection - nsString mInputCharset; // input charset for auto detection hint as well as need conversion check - nsString mOutputCharset; // output charset for need conversion check - nsIStringCharsetDetector *mDetector; // charset detector -}; - MimeCharsetConverterClass::MimeCharsetConverterClass() { mDecoder = NULL; @@ -1583,16 +1548,17 @@ PRInt32 MIME_ConvertCharset(const PRBool autoDetection, const char* from_charset const char* inBuffer, const PRInt32 inLength, char** outBuffer, PRInt32* outLength, PRInt32* numUnConverted) { - char srcCharset[kMAX_CSNAME+1], dstCharset[kMAX_CSNAME+1]; +// char srcCharset[kMAX_CSNAME+1], dstCharset[kMAX_CSNAME+1]; MimeCharsetConverterClass aMimeCharsetConverterClass; PRInt32 res; - srcCharset[0] = '\0'; - dstCharset[0] = '\0'; - PL_strcpy(srcCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) from_charset : "iso-8859-1"); - PL_strcpy(dstCharset, PL_strcasecmp(to_charset, "us-ascii") ? (char *) to_charset : "iso-8859-1"); + // commenting out per Naoki's instructions. +// srcCharset[0] = '\0'; +// dstCharset[0] = '\0'; +// PL_strcpy(srcCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) from_charset : "iso-8859-1"); +// PL_strcpy(dstCharset, PL_strcasecmp(from_charset, "us-ascii") ? (char *) to_charset : "iso-8859-1"); - res = aMimeCharsetConverterClass.Initialize(srcCharset, dstCharset, autoDetection, -1); + res = aMimeCharsetConverterClass.Initialize(from_charset, from_charset, autoDetection, -1); if (res != -1) { res = aMimeCharsetConverterClass.Convert(inBuffer, inLength, outBuffer, outLength, NULL); diff --git a/mailnews/mime/src/comi18n.h b/mailnews/mime/src/comi18n.h index 78bb05ef235d..95107c212fc2 100644 --- a/mailnews/mime/src/comi18n.h +++ b/mailnews/mime/src/comi18n.h @@ -15,7 +15,10 @@ * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ +#ifndef _COMI18N_LOADED_H_ +#define _COMI18N_LOADED_H_ +#include "msgCore.h" #ifndef kMIME_ENCODED_WORD_SIZE #define kMIME_ENCODED_WORD_SIZE 75 @@ -25,7 +28,47 @@ #define kMAX_CSNAME 64 #endif +class nsIUnicodeDecoder; +class nsIUnicodeEncoder; +class nsIStringCharsetDetector; + +class MimeCharsetConverterClass { +public: + MimeCharsetConverterClass(); + virtual ~MimeCharsetConverterClass(); + + // Initialize converters for charsets, fails if converter not available. + // + PRInt32 Initialize(const char* from_charset, const char* to_charset, + const PRBool autoDetect=PR_FALSE, const PRInt32 maxNumCharsDetect=-1); + + // Converts input buffer or duplicates input if converters not available (and returns 0). + // Also duplicates input if convertion not needed. + // C string is generated for converted string. + PRInt32 Convert(const char* inBuffer, const PRInt32 inLength, + char** outBuffer, PRInt32* outLength, + PRInt32* numUnConverted); + +protected: + nsIUnicodeDecoder * GetUnicodeDecoder() {return (mAutoDetect && NULL != mDecoderDetected) ? mDecoderDetected : mDecoder;} + nsIUnicodeEncoder * GetUnicodeEncoder() {return mEncoder;} + PRBool NeedCharsetConversion(const nsString& from_charset, const nsString& to_charset); + +private: + nsIUnicodeDecoder *mDecoder; // decoder (convert to unicode) + nsIUnicodeEncoder *mEncoder; // encoder (convert from unicode) + nsIUnicodeDecoder *mDecoderDetected; // decoder of detected charset (after when auto detection succeeded) + PRInt32 mMaxNumCharsDetect; // maximum number of characters in bytes to abort auto detection + // (-1 for no limit) + PRInt32 mNumChars; // accumulated number of characters converted in bytes + PRBool mAutoDetect; // true if apply auto detection + nsString mInputCharset; // input charset for auto detection hint as well as need conversion check + nsString mOutputCharset; // output charset for need conversion check + nsIStringCharsetDetector *mDetector; // charset detector +}; + + #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ @@ -145,3 +188,6 @@ char *INTL_EncodeMimePartIIStr_VarLen(char *subject, PRInt16 wincsid, PRBool bUs #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ + +#endif // _COMI18N_LOADED_H_ + diff --git a/mailnews/mime/src/nsMsgHeaderParser.cpp b/mailnews/mime/src/nsMsgHeaderParser.cpp index b5ea4f0a4ed5..2e64e028020a 100644 --- a/mailnews/mime/src/nsMsgHeaderParser.cpp +++ b/mailnews/mime/src/nsMsgHeaderParser.cpp @@ -36,6 +36,7 @@ #define COPY_CHAR(_D,_S) do { if (!_S || !*_S) { *_D++ = 0; }\ else { int _LEN = NextChar_UTF8((char *)_S) - _S;\ nsCRT::memcpy(_D,_S,_LEN); _D += _LEN; } } while (0) +//#define NEXT_CHAR(_STR) (_STR = (* (char *) _STR < 128) ? (char *) _STR + 1 : NextChar_UTF8((char *)_STR)) #define NEXT_CHAR(_STR) (_STR = NextChar_UTF8((char *)_STR)) #define TRIM_WHITESPACE(_S,_E,_T) do { while (_E > _S && IS_SPACE(_E[-1])) _E--;\ *_E++ = _T; } while (0) @@ -71,19 +72,38 @@ nsMsgHeaderParser::nsMsgHeaderParser() { /* the following macro is used to initialize the ref counting data */ NS_INIT_REFCNT(); + m_USAsciiToUtf8CharsetConverter = nsnull; + } nsMsgHeaderParser::~nsMsgHeaderParser() -{} +{ + delete m_USAsciiToUtf8CharsetConverter; +} /* the following macros actually implement addref, release and query interface for our component. */ NS_IMPL_ADDREF(nsMsgHeaderParser) NS_IMPL_RELEASE(nsMsgHeaderParser) NS_IMPL_QUERY_INTERFACE(nsMsgHeaderParser, nsIMsgHeaderParser::GetIID()); /* we need to pass in the interface ID of this interface */ +MimeCharsetConverterClass *nsMsgHeaderParser::GetUSAsciiToUtf8CharsetConverter() +{ + if (!m_USAsciiToUtf8CharsetConverter) + { + m_USAsciiToUtf8CharsetConverter = new MimeCharsetConverterClass; + if (m_USAsciiToUtf8CharsetConverter) + { + nsresult rv = m_USAsciiToUtf8CharsetConverter->Initialize("us-ascii","utf-8", PR_FALSE); + } + } + return m_USAsciiToUtf8CharsetConverter; +} + nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const char *line, char **names, char **addresses, PRUint32 *numAddresses) { char *utf8Str, *outStrings; + MimeCharsetConverterClass *converter = nsnull; + nsresult rv; if (nsnull == line || MIME_ConvertString(CHARSET(charset), "UTF-8", line, &utf8Str) != 0) { utf8Str = nsnull; @@ -102,8 +122,19 @@ nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const cha s += len; } // convert array of strings - if (MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *names, - len_all, &outStrings, &outStrLen, NULL) == 0) { + if (!charset) + { + converter = GetUSAsciiToUtf8CharsetConverter(); + if (converter) + rv = converter->Convert(*names, len_all, &outStrings, &outStrLen, nsnull); + } + if (!converter) + { + rv = MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *names, + len_all, &outStrings, &outStrLen, NULL) ; + } + if (NS_SUCCEEDED(rv)) + { PR_Free(*names); *names = outStrings; } @@ -117,8 +148,21 @@ nsresult nsMsgHeaderParser::ParseHeaderAddresses (const char *charset, const cha s += len; } // convert array of strings - if (MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *addresses, - len_all, &outStrings, &outStrLen, NULL) == 0) { + if (!charset) + { + converter = GetUSAsciiToUtf8CharsetConverter(); + if (converter) + rv = converter->Convert(*addresses, + len_all, &outStrings, &outStrLen, nsnull); + } + // if non null charset, or couldn't get a converter, use MIME_ function. + if (!converter) + { + rv = MIME_ConvertCharset(PR_FALSE, "UTF-8", CHARSET(charset), *addresses, + len_all, &outStrings, &outStrLen, NULL); + } + if (NS_SUCCEEDED(rv)) + { PR_Free(*addresses); *addresses = outStrings; } diff --git a/mailnews/mime/src/nsMsgHeaderParser.h b/mailnews/mime/src/nsMsgHeaderParser.h index 5beb0f8e6f04..13c8d34244ed 100644 --- a/mailnews/mime/src/nsMsgHeaderParser.h +++ b/mailnews/mime/src/nsMsgHeaderParser.h @@ -27,6 +27,7 @@ #include "msgCore.h" #include "nsIMsgHeaderParser.h" /* include the interface we are going to support */ +#include "comi18n.h" /* * RFC-822 parser @@ -116,7 +117,10 @@ names to users. e.g. summary file, address book */ NS_IMETHOD UnquotePhraseOrAddr (const char *charset, const char *line, char** lineout); - + + MimeCharsetConverterClass *GetUSAsciiToUtf8CharsetConverter(); + protected: + MimeCharsetConverterClass *m_USAsciiToUtf8CharsetConverter; private: };