зеркало из https://github.com/mozilla/pjs.git
Adding I18N functions for pref migration, bug 14349, r=ftang.
This commit is contained in:
Родитель
e65fae394e
Коммит
9738dce74b
|
@ -49,6 +49,12 @@
|
|||
#include "nsNetUtil.h"
|
||||
#include "nsCRT.h"
|
||||
|
||||
#define NS_IMPL_IDS
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsIPlatformCharset.h"
|
||||
#undef NS_IMPL_IDS
|
||||
|
||||
|
||||
/* Network */
|
||||
|
||||
#include "net.h"
|
||||
|
@ -203,6 +209,8 @@ static NS_DEFINE_CID(kCommonDialogsCID, NS_CommonDialog_CID);
|
|||
static NS_DEFINE_CID(kDialogParamBlockCID, NS_DialogParamBlock_CID);
|
||||
static NS_DEFINE_CID(kFileLocatorCID, NS_FILELOCATOR_CID);
|
||||
|
||||
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
||||
|
||||
nsPrefMigration* nsPrefMigration::mInstance = nsnull;
|
||||
|
||||
nsPrefMigration *
|
||||
|
@ -1937,3 +1945,189 @@ nsPrefMigration::ShowPMDialogEngine(nsIDialogParamBlock *ioParamBlock, const cha
|
|||
|
||||
#endif /* 0 */
|
||||
|
||||
|
||||
// This is to be called per string pref to check whether the input pref needs
|
||||
// charset conversion (from platform charset to UTF-8) for the pref migration.
|
||||
PRBool
|
||||
nsPrefMigration::PrefStringNeedsCharsetConversion(const char* prefName)
|
||||
{
|
||||
//TODO: we need a complete list of 4.x prefs which are saved as the platform charset.
|
||||
// also may need an extensibility in addition to the hard coded names.
|
||||
const char *names[] = {
|
||||
"mail.identity.username",
|
||||
""
|
||||
};
|
||||
|
||||
for (int i = 0; names[i][0]; i++) {
|
||||
if (!nsCRT::strcasecmp(prefName, names[i]))
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
// for UTF-8 detection
|
||||
#define kLeft1BitMask 0x80
|
||||
#define kLeft2BitsMask 0xC0
|
||||
#define kLeft3BitsMask 0xE0
|
||||
#define kLeft4BitsMask 0xF0
|
||||
#define kLeft5BitsMask 0xF8
|
||||
#define kLeft6BitsMask 0xFC
|
||||
#define kLeft7BitsMask 0xFE
|
||||
|
||||
#define k2BytesLeadByte kLeft2BitsMask
|
||||
#define k3BytesLeadByte kLeft3BitsMask
|
||||
#define k4BytesLeadByte kLeft4BitsMask
|
||||
#define k5BytesLeadByte kLeft5BitsMask
|
||||
#define k6BytesLeadByte kLeft6BitsMask
|
||||
#define kTrialByte kLeft1BitMask
|
||||
|
||||
#define UTF8_1Byte(c) ( 0 == ((c) & kLeft1BitMask))
|
||||
#define UTF8_2Bytes(c) ( k2BytesLeadByte == ((c) & kLeft3BitsMask))
|
||||
#define UTF8_3Bytes(c) ( k3BytesLeadByte == ((c) & kLeft4BitsMask))
|
||||
#define UTF8_4Bytes(c) ( k4BytesLeadByte == ((c) & kLeft5BitsMask))
|
||||
#define UTF8_5Bytes(c) ( k5BytesLeadByte == ((c) & kLeft6BitsMask))
|
||||
#define UTF8_6Bytes(c) ( k6BytesLeadByte == ((c) & kLeft7BitsMask))
|
||||
#define UTF8_ValidTrialByte(c) ( kTrialByte == ((c) & kLeft2BitsMask))
|
||||
|
||||
// Check if the given C string is UTF-8 or not.
|
||||
PRBool
|
||||
nsPrefMigration::IsUTF8String(const unsigned char* utf8)
|
||||
{
|
||||
if(NULL == utf8)
|
||||
return PR_TRUE;
|
||||
return IsUTF8Text(utf8, nsCRT::strlen((char *)utf8));}
|
||||
|
||||
// Check if the given buffer is UTF-8 or not.
|
||||
PRBool
|
||||
nsPrefMigration::IsUTF8Text(const unsigned char* utf8, PRInt32 len)
|
||||
{
|
||||
PRInt32 i;
|
||||
PRInt32 j;
|
||||
PRInt32 clen;
|
||||
for(i =0; i < len; i += clen)
|
||||
{
|
||||
if(UTF8_1Byte(utf8[i]))
|
||||
{
|
||||
clen = 1;
|
||||
} else if(UTF8_2Bytes(utf8[i])) {
|
||||
clen = 2;
|
||||
/* No enough trail bytes */
|
||||
if( (i + clen) > len)
|
||||
return PR_FALSE;
|
||||
/* 0000 0000 - 0000 007F : should encode in less bytes */
|
||||
if(0 == (utf8[i] & 0x1E ))
|
||||
return PR_FALSE;
|
||||
} else if(UTF8_3Bytes(utf8[i])) {
|
||||
clen = 3;
|
||||
/* No enough trail bytes */
|
||||
if( (i + clen) > len)
|
||||
return PR_FALSE;
|
||||
/* a single Surrogate should not show in 3 bytes UTF8, instead, the pair
|
||||
should be intepreted
|
||||
as one single UCS4 char and encoded UTF8 in 4 bytes */
|
||||
if((0xED == utf8[i] ) && (0xA0 == (utf8[i+1] & 0xA0 ) ))
|
||||
return PR_FALSE;
|
||||
/* 0000 0000 - 0000 07FF : should encode in less bytes */
|
||||
if((0 == (utf8[i] & 0x0F )) && (0 == (utf8[i+1] & 0x20 ) ))
|
||||
return PR_FALSE;
|
||||
} else if(UTF8_4Bytes(utf8[i])) {
|
||||
clen = 4;
|
||||
/* No enough trail bytes */
|
||||
if( (i + clen) > len)
|
||||
return PR_FALSE;
|
||||
/* 0000 0000 - 0000 FFFF : should encode in less bytes */
|
||||
if((0 == (utf8[i] & 0x07 )) && (0 == (utf8[i+1] & 0x30 )) )
|
||||
return PR_FALSE;
|
||||
} else if(UTF8_5Bytes(utf8[i])) {
|
||||
clen = 5;
|
||||
/* No enough trail bytes */
|
||||
if( (i + clen) > len)
|
||||
return FALSE;
|
||||
/* 0000 0000 - 001F FFFF : should encode in less bytes */
|
||||
if((0 == (utf8[i] & 0x03 )) && (0 == (utf8[i+1] & 0x38 )) )
|
||||
return PR_FALSE;
|
||||
} else if(UTF8_6Bytes(utf8[i])) {
|
||||
clen = 6;
|
||||
/* No enough trail bytes */
|
||||
if( (i + clen) > len)
|
||||
return PR_FALSE;
|
||||
/* 0000 0000 - 03FF FFFF : should encode in less bytes */
|
||||
if((0 == (utf8[i] & 0x01 )) && (0 == (utf8[i+1] & 0x3E )) )
|
||||
return PR_FALSE;
|
||||
} else {
|
||||
return PR_FALSE;
|
||||
}
|
||||
for(j = 1; j<clen ;j++)
|
||||
{
|
||||
if(! UTF8_ValidTrialByte(utf8[i+j])) /* Trail bytes invalid */
|
||||
return PR_FALSE;
|
||||
}
|
||||
}
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
||||
// A wrapper function to call the interface to get a platform file charset.
|
||||
nsresult
|
||||
nsPrefMigration::GetPlatformCharset(nsAutoString& aCharset)
|
||||
{
|
||||
nsresult rv;
|
||||
|
||||
// we may cache it since the platform charset will not change through application life
|
||||
nsCOMPtr <nsIPlatformCharset> platformCharset;
|
||||
rv = nsComponentManager::CreateInstance(NS_PLATFORMCHARSET_PROGID, nsnull,
|
||||
NS_GET_IID(nsIPlatformCharset), getter_AddRefs(platformCharset));
|
||||
if (NS_SUCCEEDED(rv)) {
|
||||
rv = platformCharset->GetCharset(kPlatformCharsetSel_FileName, aCharset);
|
||||
}
|
||||
if (NS_FAILED(rv)) {
|
||||
aCharset.SetString("ISO-8859-1"); // use ISO-8859-1 in case of any error
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
// Apply a charset conversion from the given charset to UTF-8 for the input C string.
|
||||
nsresult
|
||||
nsPrefMigration::ConvertStringToUTF8(nsAutoString& aCharset, const char* inString, char** outString)
|
||||
{
|
||||
if (nsnull == outString)
|
||||
return NS_ERROR_NULL_POINTER;
|
||||
|
||||
nsresult rv;
|
||||
// convert result to unicode
|
||||
NS_WITH_SERVICE(nsICharsetConverterManager, ccm, kCharsetConverterManagerCID, &rv);
|
||||
|
||||
if(NS_SUCCEEDED(rv)) {
|
||||
nsCOMPtr <nsIUnicodeDecoder> decoder; // this may be cached
|
||||
|
||||
rv = ccm->GetUnicodeDecoder(&aCharset, getter_AddRefs(decoder));
|
||||
if(NS_SUCCEEDED(rv) && decoder) {
|
||||
PRInt32 uniLength = 0;
|
||||
PRInt32 srcLength = nsCRT::strlen(inString);
|
||||
rv = decoder->GetMaxLength(inString, srcLength, &uniLength);
|
||||
if (NS_SUCCEEDED(rv)) {
|
||||
PRUnichar *unichars = new PRUnichar [uniLength];
|
||||
|
||||
if (nsnull != unichars) {
|
||||
// convert to unicode
|
||||
rv = decoder->Convert(inString, &srcLength, unichars, &uniLength);
|
||||
if (NS_SUCCEEDED(rv)) {
|
||||
nsAutoString aString;
|
||||
aString.SetString(unichars, uniLength);
|
||||
// convert to UTF-8
|
||||
*outString = aString.ToNewUTF8String();
|
||||
}
|
||||
delete [] unichars;
|
||||
}
|
||||
else {
|
||||
rv = NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -121,7 +121,37 @@ class nsPrefMigration: public nsIPrefMigration
|
|||
nsresult GetPremigratedFilePref(const char *pref_name, nsIFileSpec **filePath);
|
||||
#endif /* NEED_TO_COPY_AND_RENAME_NEWSRC_FILES */
|
||||
|
||||
|
||||
// I18N pref migration:
|
||||
//
|
||||
// 5.0 stores pref strings are UTF-8 while 4.x stores them either plat form charset or UTF-8
|
||||
// depends on the pref.
|
||||
// Functions here provide possible two ways to deal with the I18N migration.
|
||||
//
|
||||
// 1) Use the knowleage of which 4.x pref strings are platform charset.
|
||||
// If PrefStringNeedsCharsetConversion() returns true then the string to be converted to UTF-8.
|
||||
//
|
||||
// 2) Apply UTF-8 detection to all string pres. Apply the conversion if the string is detected as UTF-8.
|
||||
//
|
||||
// The user of the functions need to decide 1) or 2).
|
||||
// The functions to get platform charset and charset conversion code to UTF-8 are also provided.
|
||||
//
|
||||
|
||||
// This is to be called per string pref to check whether the input pref needs
|
||||
// charset conversion (from platform charset to UTF-8) for the pref migration.
|
||||
PRBool PrefStringNeedsCharsetConversion(const char* prefName);
|
||||
|
||||
// Check if the given C string is UTF-8 or not.
|
||||
PRBool IsUTF8String(const unsigned char* utf8);
|
||||
|
||||
// Check if the given buffer is UTF-8 or not.
|
||||
PRBool IsUTF8Text(const unsigned char* utf8, PRInt32 len);
|
||||
|
||||
// A wrapper function to call the interface to get a platform file charset.
|
||||
nsresult GetPlatformCharset(nsAutoString& aCharset);
|
||||
|
||||
// Apply a charset conversion from the given charset to UTF-8 for the input C string.
|
||||
nsresult ConvertStringToUTF8(nsAutoString& aCharset, const char* inString, char** outString);
|
||||
|
||||
nsIPref* m_prefs;
|
||||
nsresult getPrefService();
|
||||
nsCOMPtr<nsIFileSpec> m_prefsFile;
|
||||
|
|
Загрузка…
Ссылка в новой задаче