/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: NPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Netscape Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Pierre Phaneuf * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the NPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the NPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ // as does this #include "nsICharsetConverterManager.h" #include "nsICharsetAlias.h" #include "nsIPlatformCharset.h" #include "nsIServiceManager.h" #include "nsICharsetConverterManager.h" #include "nsISupports.h" #include "nsIPrefBranch.h" #include "nsIPrefService.h" #include "nsIPrefLocalizedString.h" #include "nsIMimeConverter.h" #include "msgCore.h" #include "nsMsgI18N.h" #include "nsFileSpec.h" #include "nsFileStream.h" #include "nsMsgMimeCID.h" #include "nsMimeTypes.h" #include "nsIEntityConverter.h" #include "nsISaveAsCharset.h" #include "nsHankakuToZenkakuCID.h" #include "nsXPIDLString.h" #include "nsString.h" #include "nsReadableUtils.h" #include "prmem.h" #include "nsFileSpec.h" #include "nsUnicharUtils.h" static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); static NS_DEFINE_CID(kEntityConverterCID, NS_ENTITYCONVERTER_CID); // // International functions necessary for composition // nsresult nsMsgI18NConvertFromUnicode(const nsCString& aCharset, const nsString& inString, nsCString& outString) { if (inString.IsEmpty()) { outString.Truncate(0); return NS_OK; } // Note: this will hide a possible error when the unicode text may contain more than one charset. // (e.g. Latin1 + Japanese). Use nsMsgI18NSaveAsCharset instead to avoid that problem. else if (aCharset.IsEmpty() || aCharset.EqualsIgnoreCase("us-ascii") || aCharset.EqualsIgnoreCase("ISO-8859-1")) { outString.AssignWithConversion(inString); return NS_OK; } else if (aCharset.EqualsIgnoreCase("UTF-8")) { char *s = ToNewUTF8String(inString); if (NULL == s) return NS_ERROR_OUT_OF_MEMORY; outString.Assign(s); Recycle(s); return NS_OK; } nsCAutoString convCharset(NS_LITERAL_CSTRING("ISO-8859-1")); nsresult res; // Resolve charset alias nsCOMPtr calias = do_GetService(NS_CHARSETALIAS_CONTRACTID, &res); if (NS_SUCCEEDED(res)) { if (!aCharset.IsEmpty()) { res = calias->GetPreferred(aCharset, convCharset); } } nsCOMPtr ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res); if(NS_SUCCEEDED(res)) { nsCOMPtr encoder; // get an unicode converter res = ccm->GetUnicodeEncoderRaw(convCharset.get(), getter_AddRefs(encoder)); if(NS_SUCCEEDED(res)) { res = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nsnull, '?'); if (NS_SUCCEEDED(res)) { const PRUnichar *originalSrcPtr = inString.get(); PRUnichar *currentSrcPtr = NS_CONST_CAST(PRUnichar *, originalSrcPtr); PRInt32 originalUnicharLength = inString.Length(); PRInt32 srcLength; PRInt32 dstLength; char localbuf[512]; PRInt32 consumedLen = 0; outString.Assign(""); // convert while (consumedLen < originalUnicharLength) { srcLength = originalUnicharLength - consumedLen; dstLength = 512; res = encoder->Convert(currentSrcPtr, &srcLength, localbuf, &dstLength); if (NS_FAILED(res) || dstLength == 0) break; outString.Append(localbuf, dstLength); currentSrcPtr += srcLength; consumedLen = currentSrcPtr - originalSrcPtr; // src length used so far } res = encoder->Finish(localbuf, &dstLength); if (NS_SUCCEEDED(res)) outString.Append(localbuf, dstLength); } } } return res; } nsresult nsMsgI18NConvertToUnicode(const nsCString& aCharset, const nsCString& inString, nsString& outString) { if (inString.IsEmpty()) { outString.Truncate(); return NS_OK; } else if (aCharset.IsEmpty() || aCharset.EqualsIgnoreCase("us-ascii") || aCharset.EqualsIgnoreCase("ISO-8859-1")) { outString.AssignWithConversion(inString.get()); return NS_OK; } nsCAutoString convCharset; nsresult res; // Resolve charset alias nsCOMPtr calias = do_GetService(NS_CHARSETALIAS_CONTRACTID, &res); if (NS_SUCCEEDED(res)) { if (!aCharset.IsEmpty()) { res = calias->GetPreferred(aCharset, convCharset); } } if (NS_FAILED(res)) { return res; } nsCOMPtr ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res); if(NS_SUCCEEDED(res)) { nsCOMPtr decoder; // get an unicode converter res = ccm->GetUnicodeDecoderRaw(convCharset.get(), getter_AddRefs(decoder)); if(NS_SUCCEEDED(res)) { const char *originalSrcPtr = inString.get(); char *currentSrcPtr = NS_CONST_CAST(char *, originalSrcPtr); PRInt32 originalLength = inString.Length(); PRInt32 srcLength; PRInt32 dstLength; PRUnichar localbuf[512]; PRInt32 consumedLen = 0; outString.Assign(NS_LITERAL_STRING("")); // convert while (consumedLen < originalLength) { srcLength = originalLength - consumedLen; dstLength = 512; res = decoder->Convert(currentSrcPtr, &srcLength, localbuf, &dstLength); if (NS_FAILED(res) || dstLength == 0) break; outString.Append(localbuf, dstLength); currentSrcPtr += srcLength; consumedLen = currentSrcPtr - originalSrcPtr; // src length used so far } } } return res; } // Convert an unicode string to a C string with a given charset. nsresult ConvertFromUnicode(const char* aCharset, const nsString& inString, char** outCString) { NS_ENSURE_ARG_POINTER(aCharset); NS_ENSURE_ARG_POINTER(outCString); *outCString = NULL; if (inString.IsEmpty()) { *outCString = nsCRT::strdup(""); return (NULL == *outCString) ? NS_ERROR_OUT_OF_MEMORY : NS_OK; } // Note: this will hide a possible error when the unicode text may contain more than one charset. // (e.g. Latin1 + Japanese). Use nsMsgI18NSaveAsCharset instead to avoid that problem. else if (!*aCharset || !nsCRT::strcasecmp("us-ascii", aCharset) || !nsCRT::strcasecmp("ISO-8859-1", aCharset)) { *outCString = ToNewCString(inString); return *outCString ? NS_OK : NS_ERROR_OUT_OF_MEMORY; } else if (!nsCRT::strcasecmp("UTF-8", aCharset)) { *outCString = ToNewUTF8String(inString); return *outCString ? NS_OK : NS_ERROR_OUT_OF_MEMORY; } nsresult res; nsCOMPtr ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res); NS_ENSURE_SUCCESS(res, res); // get an unicode converter nsCOMPtr encoder; res = ccm->GetUnicodeEncoder(aCharset, getter_AddRefs(encoder)); NS_ENSURE_SUCCESS(res, res); PRUnichar *unichars = (PRUnichar *) inString.get(); PRInt32 unicharLength = inString.Length(); PRInt32 dstLength; res = encoder->GetMaxLength(unichars, unicharLength, &dstLength); NS_ENSURE_SUCCESS(res, res); res = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nsnull, '?'); NS_ENSURE_SUCCESS(res, res); // allocale an output buffer *outCString = (char *) PR_Malloc(dstLength + 1); NS_ENSURE_TRUE(*outCString, NS_ERROR_OUT_OF_MEMORY); PRInt32 buffLength = dstLength; **outCString = '\0'; res = encoder->Convert(unichars, &unicharLength, *outCString, &dstLength); if (NS_SUCCEEDED(res)) { PRInt32 finLen = buffLength - dstLength; res = encoder->Finish((char *)(*outCString+dstLength), &finLen); if (NS_SUCCEEDED(res)) { dstLength += finLen; } (*outCString)[dstLength] = '\0'; } return res; } // Convert a C string to an unicode string. nsresult ConvertToUnicode(const char* aCharset, const char* inCString, nsString& outString) { NS_ENSURE_ARG_POINTER(aCharset); NS_ENSURE_ARG_POINTER(inCString); if ('\0' == *inCString) { outString.Truncate(); return NS_OK; } else if ((!*aCharset || !nsCRT::strcasecmp("us-ascii", aCharset) || !nsCRT::strcasecmp("ISO-8859-1", aCharset)) && nsCRT::IsAscii(inCString)) { outString.AssignWithConversion(inCString); return NS_OK; } nsresult res; nsCOMPtr ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res); NS_ENSURE_SUCCESS(res, res); // get an unicode converter nsCOMPtr decoder; res = ccm->GetUnicodeDecoder(aCharset, getter_AddRefs(decoder)); NS_ENSURE_SUCCESS(res, res); PRUnichar *unichars; PRInt32 unicharLength; PRInt32 srcLen = PL_strlen(inCString); // buffer size 144 = // 72 (default line len for compose) // times 2 (converted byte len might be larger) const int klocalbufsize = 144; PRUnichar localbuf[klocalbufsize+1]; PRBool usedlocalbuf; if (srcLen > klocalbufsize) { res = decoder->GetMaxLength(inCString, srcLen, &unicharLength); NS_ENSURE_SUCCESS(res, res); unichars = (PRUnichar *) nsMemory::Alloc(unicharLength * sizeof(PRUnichar)); NS_ENSURE_TRUE(unichars, NS_ERROR_OUT_OF_MEMORY); usedlocalbuf = PR_FALSE; } else { unichars = localbuf; unicharLength = klocalbufsize+1; usedlocalbuf = PR_TRUE; } // convert to unicode res = decoder->Convert(inCString, &srcLen, unichars, &unicharLength); outString.Assign(unichars, unicharLength); if (!usedlocalbuf) nsMemory::Free(unichars); return res; } // Charset to be used for the internatl processing. const char *msgCompHeaderInternalCharset() { // UTF-8 is a super set of us-ascii. // We can use the same string manipulation methods as us-ascii without breaking non us-ascii characters. return "UTF-8"; } // Charset used by the file system. const char * nsMsgI18NFileSystemCharset() { /* Get a charset used for the file. */ static nsCAutoString fileSystemCharset; if (fileSystemCharset.IsEmpty()) { nsresult rv; nsCOMPtr platformCharset = do_GetService(NS_PLATFORMCHARSET_CONTRACTID, &rv); if (NS_SUCCEEDED(rv)) { rv = platformCharset->GetCharset(kPlatformCharsetSel_FileName, fileSystemCharset); } if (NS_FAILED(rv)) fileSystemCharset.Assign("ISO-8859-1"); } return fileSystemCharset.get(); } // MIME encoder, output string should be freed by PR_FREE char * nsMsgI18NEncodeMimePartIIStr(const char *header, PRBool structured, const char *charset, PRInt32 fieldnamelen, PRBool usemime) { // No MIME, convert to the outgoing mail charset. if (PR_FALSE == usemime) { char *convertedStr; if (NS_SUCCEEDED(ConvertFromUnicode(charset, NS_ConvertUTF8toUCS2(header), &convertedStr))) return (convertedStr); else return PL_strdup(header); } char *encodedString = nsnull; nsresult res; nsCOMPtr converter = do_GetService(NS_MIME_CONVERTER_CONTRACTID, &res); if (NS_SUCCEEDED(res) && nsnull != converter) res = converter->EncodeMimePartIIStr_UTF8(header, structured, charset, fieldnamelen, kMIME_ENCODED_WORD_SIZE, &encodedString); return NS_SUCCEEDED(res) ? encodedString : nsnull; } // Return True if a charset is stateful (e.g. JIS). PRBool nsMsgI18Nstateful_charset(const char *charset) { //TODO: use charset manager's service return (nsCRT::strcasecmp(charset, "ISO-2022-JP") == 0); } PRBool nsMsgI18Nmultibyte_charset(const char *charset) { nsresult res; nsCOMPtr ccm = do_GetService(kCharsetConverterManagerCID, &res); PRBool result = PR_FALSE; if (NS_SUCCEEDED(res)) { nsAutoString charsetData; res = ccm->GetCharsetData(charset, NS_LITERAL_STRING(".isMultibyte").get(), charsetData); if (NS_SUCCEEDED(res)) { result = charsetData.EqualsIgnoreCase("true"); } } return result; } PRBool nsMsgI18Ncheck_data_in_charset_range(const char *charset, const PRUnichar* inString, char **fallbackCharset) { if (!charset || !*charset || !inString || !*inString) return PR_TRUE; nsresult res; PRBool result = PR_TRUE; nsCOMPtr ccm = do_GetService(kCharsetConverterManagerCID, &res); if (NS_SUCCEEDED(res)) { nsCOMPtr encoder; // get an unicode converter res = ccm->GetUnicodeEncoderRaw(charset, getter_AddRefs(encoder)); if(NS_SUCCEEDED(res)) { const PRUnichar *originalPtr = inString; PRInt32 originalLen = nsCRT::strlen(inString); PRUnichar *currentSrcPtr = NS_CONST_CAST(PRUnichar *, originalPtr); char localBuff[512]; PRInt32 consumedLen = 0; PRInt32 srcLen; PRInt32 dstLength; // convert from unicode while (consumedLen < originalLen) { srcLen = originalLen - consumedLen; dstLength = 512; res = encoder->Convert(currentSrcPtr, &srcLen, localBuff, &dstLength); if (NS_ERROR_UENC_NOMAPPING == res) { result = PR_FALSE; break; } else if (NS_FAILED(res) || (0 == dstLength)) break; currentSrcPtr += srcLen; consumedLen = currentSrcPtr - originalPtr; // src length used so far } } } // if the conversion was not successful then try fallback to other charsets if (!result && fallbackCharset) { nsXPIDLCString convertedString; res = nsMsgI18NSaveAsCharset("text/plain", charset, inString, getter_Copies(convertedString), fallbackCharset); result = (NS_SUCCEEDED(res) && NS_ERROR_UENC_NOMAPPING != res); } return result; } // Simple parser to parse META charset. // It only supports the case when the description is within one line. const char * nsMsgI18NParseMetaCharset(nsFileSpec* fileSpec) { static char charset[kMAX_CSNAME+1]; char buffer[512]; *charset = '\0'; if (fileSpec->IsDirectory()) { NS_ASSERTION(0,"file is a directory"); return charset; } nsInputFileStream fileStream(*fileSpec); while (!fileStream.eof() && !fileStream.failed() && fileStream.is_open()) { fileStream.readline(buffer, 512); if (*buffer == nsCRT::CR || *buffer == nsCRT::LF || *buffer == 0) continue; PRUint32 len = PL_strlen(buffer); for (PRUint32 i = 0; i < len; i++) { buffer[i] = toupper(buffer[i]); } if (PL_strstr(buffer, "/HEAD")) break; if (PL_strstr(buffer, "META") && PL_strstr(buffer, "HTTP-EQUIV") && PL_strstr(buffer, "CONTENT-TYPE") && PL_strstr(buffer, "CHARSET") ) { char *cp = PL_strstr(PL_strstr(buffer, "CHARSET"), "=") + 1; char seps[] = " \"\'"; char *token; char* newStr; token = nsCRT::strtok(cp, seps, &newStr); if (token != NULL) { PL_strncpy(charset, token, sizeof(charset)); charset[sizeof(charset)-1] = '\0'; // this function cannot parse a file if it is really // encoded by one of the following charsets // so we can say that the charset label must be incorrect for // the .html if we actually see those charsets parsed // and we should ignore them if (!nsCRT::strncasecmp("UTF-16", charset, sizeof("UTF-16")-1) || !nsCRT::strncasecmp("UTF-32", charset, sizeof("UTF-32")-1)) charset[0] = '\0'; break; } } } return charset; } nsresult nsMsgI18NConvertToEntity(const nsString& inString, nsString* outString) { nsresult res; outString->Truncate(); nsCOMPtr entityConv; res = nsComponentManager::CreateInstance(kEntityConverterCID, NULL, NS_GET_IID(nsIEntityConverter), getter_AddRefs(entityConv)); if(NS_SUCCEEDED(res)) { PRUnichar *entities = NULL; res = entityConv->ConvertToEntities(inString.get(), nsIEntityConverter::html40Latin1, &entities); if (NS_SUCCEEDED(res) && (NULL != entities)) { outString->Assign(entities); nsMemory::Free(entities); } } return res; } nsresult nsMsgI18NSaveAsCharset(const char* contentType, const char *charset, const PRUnichar* inString, char** outString, char **fallbackCharset, PRBool *isAsciiOnly) { NS_ENSURE_ARG_POINTER(contentType); NS_ENSURE_ARG_POINTER(charset); NS_ENSURE_ARG_POINTER(inString); NS_ENSURE_ARG_POINTER(outString); *outString = nsnull; if (nsCRT::IsAscii(inString)) { if (isAsciiOnly) *isAsciiOnly = PR_TRUE; *outString = nsCRT::strdup(NS_LossyConvertUCS2toASCII(inString).get()); return (nsnull != *outString) ? NS_OK : NS_ERROR_OUT_OF_MEMORY; } if (isAsciiOnly) *isAsciiOnly = PR_FALSE; PRBool bTEXT_HTML = PR_FALSE; nsresult res; if (!nsCRT::strcasecmp(contentType, TEXT_HTML)) { bTEXT_HTML = PR_TRUE; } else if (nsCRT::strcasecmp(contentType, TEXT_PLAIN)) { return NS_ERROR_ILLEGAL_VALUE; // not supported type } nsCOMPtr calias = do_GetService(NS_CHARSETALIAS_CONTRACTID, &res); NS_ENSURE_SUCCESS(res, res); nsCAutoString charsetName; res = calias->GetPreferred(nsDependentCString(charset), charsetName); NS_ENSURE_SUCCESS(res, res); // charset converter plus entity, NCR generation nsCOMPtr conv = do_CreateInstance(NS_SAVEASCHARSET_CONTRACTID, &res); NS_ENSURE_SUCCESS(res, res); // attribute: // html text - charset conv then fallback to entity or NCR // plain text - charset conv then fallback to '?' if (bTEXT_HTML) // For ISO-8859-1 only, convert to entity first (always generate entites like  ). res = conv->Init(charsetName.get(), charsetName.Equals(NS_LITERAL_CSTRING("ISO-8859-1")) ? nsISaveAsCharset::attr_htmlTextDefault : nsISaveAsCharset::attr_EntityAfterCharsetConv + nsISaveAsCharset::attr_FallbackDecimalNCR, nsIEntityConverter::html32); else // fallback for text/plain: first try transliterate then '?' res = conv->Init(charsetName.get(), nsISaveAsCharset::attr_FallbackQuestionMark + nsISaveAsCharset::attr_EntityAfterCharsetConv, nsIEntityConverter::transliterate); NS_ENSURE_SUCCESS(res, res); const PRUnichar *input = inString; // Mapping characters in a certain range (required for Japanese only) nsAutoString mapped; if (charsetName.Equals(NS_LITERAL_CSTRING("ISO-2022-JP"))) { static PRInt32 sSendHankakuKana = -1; if (sSendHankakuKana < 0) { nsCOMPtr prefBranch(do_GetService(NS_PREFSERVICE_CONTRACTID, &res)); NS_ENSURE_SUCCESS(res, res); PRBool sendHankaku; // Get a hidden 4.x pref with no UI, get it only once. if (NS_FAILED(prefBranch->GetBoolPref("mailnews.send_hankaku_kana", &sendHankaku))) sSendHankakuKana = 0; // no pref means need the mapping else sSendHankakuKana = sendHankaku ? 1 : 0; } if (!sSendHankakuKana) { nsCOMPtr textTransform = do_CreateInstance(NS_HANKAKUTOZENKAKU_CONTRACTID, &res); if (NS_SUCCEEDED(res)) { res = textTransform->Change(inString, nsCRT::strlen(inString), mapped); if (NS_SUCCEEDED(res)) input = mapped.get(); } } } // Convert to charset res = conv->Convert(input, outString); // If the converer cannot encode to the charset, // then fallback to pref sepcified charsets. if (NS_ERROR_UENC_NOMAPPING == res && !bTEXT_HTML && fallbackCharset) { nsCOMPtr prefBranch(do_GetService(NS_PREFSERVICE_CONTRACTID, &res)); NS_ENSURE_SUCCESS(res, res); nsCAutoString prefString("intl.fallbackCharsetList."); prefString.Append(charset); nsXPIDLCString fallbackList; res = prefBranch->GetCharPref(prefString.get(), getter_Copies(fallbackList)); // do the fallback only if there is a pref for the charset if (NS_FAILED(res) || fallbackList.IsEmpty()) return NS_ERROR_UENC_NOMAPPING; res = conv->Init(fallbackList.get(), nsISaveAsCharset::attr_FallbackQuestionMark + nsISaveAsCharset::attr_EntityAfterCharsetConv + nsISaveAsCharset::attr_CharsetFallback, nsIEntityConverter::transliterate); NS_ENSURE_SUCCESS(res, res); // free whatever we have now PR_FREEIF(*outString); res = conv->Convert(input, outString); NS_ENSURE_SUCCESS(res, res); // get the actual charset used for the conversion if (NS_FAILED(conv->GetCharset(fallbackCharset))) *fallbackCharset = nsnull; } // In case of HTML, non ASCII may be encoded as CER, NCR. // Exclude stateful charset which is 7 bit but not ASCII only. else if (isAsciiOnly && bTEXT_HTML && *outString && !nsMsgI18Nstateful_charset(charsetName.get())) *isAsciiOnly = nsCRT::IsAscii(*outString); return res; } nsresult nsMsgI18NFormatNNTPXPATInNonRFC1522Format(const nsCString& aCharset, const nsString& inString, nsCString& outString) { outString.AssignWithConversion(inString); return NS_OK; } const char * nsMsgI18NGetAcceptLanguage(void) { nsCOMPtr prefBranch(do_GetService(NS_PREFSERVICE_CONTRACTID)); if (prefBranch) { nsCOMPtr prefString; prefBranch->GetComplexValue("intl.accept_languages", NS_GET_IID(nsIPrefLocalizedString), getter_AddRefs(prefString)); if (prefString) { nsXPIDLString ucsval; prefString->ToString(getter_Copies(ucsval)); if (!ucsval.IsEmpty()) { static nsCAutoString acceptLang; acceptLang.Assign(NS_LossyConvertUCS2toASCII(ucsval)); return acceptLang.get(); } } } // Default Accept-Language return "en"; } // taken from nsFileSpec::GetNativePathString() void nsMsgGetNativePathString(const char *aPath, nsString& aResult) { if (!aPath) { aResult.Truncate(); return; } if (nsCRT::IsAscii(aPath)) aResult.AssignWithConversion(aPath); else ConvertToUnicode(nsMsgI18NFileSystemCharset(), aPath, aResult); }