зеркало из https://github.com/mozilla/pjs.git
fix bug 7964 : Add ISO-2022-KR decoder (to Unicode) for Korean emails. Encoder is NOT included since ISO-2022-KR should not be used for outgoing emails.
This commit is contained in:
Родитель
6dc96ac474
Коммит
0b2e73d509
|
@ -19,7 +19,7 @@
|
|||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Contributor(s): Jungshik Shin <jshin@mailaps.org>
|
||||
*
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
|
@ -35,3 +35,173 @@
|
|||
* the terms of any one of the NPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
#include "nsISO2022KRToUnicode.h"
|
||||
#include "nsUCvKOSupport.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
#include "nsIServiceManager.h"
|
||||
|
||||
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
||||
|
||||
NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen, PRUnichar * aDest, PRInt32 * aDestLen)
|
||||
{
|
||||
const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
|
||||
const unsigned char* src =(unsigned char*) aSrc;
|
||||
PRUnichar* destEnd = aDest + *aDestLen;
|
||||
PRUnichar* dest = aDest;
|
||||
while((src < srcEnd))
|
||||
{
|
||||
switch(mState)
|
||||
{
|
||||
case mState_ASCII:
|
||||
if(0x1b == *src) {
|
||||
mLastLegalState = mState;
|
||||
mState = mState_ESC;
|
||||
}
|
||||
else if(0x0e == *src) { // Shift-Out
|
||||
mState = mState_KSX1001_1992;
|
||||
}
|
||||
else if(*src & 0x80) {
|
||||
*dest++ = 0xFFFD;
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
}
|
||||
else {
|
||||
*dest++ = (PRUnichar) *src;
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
}
|
||||
break;
|
||||
|
||||
case mState_ESC:
|
||||
if('$' == *src) {
|
||||
mState = mState_ESC_24;
|
||||
}
|
||||
else {
|
||||
if((dest+2) >= destEnd)
|
||||
goto error1;
|
||||
*dest++ = (PRUnichar) 0x1b;
|
||||
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
|
||||
mState = mLastLegalState;
|
||||
}
|
||||
break;
|
||||
|
||||
case mState_ESC_24: // ESC $
|
||||
if(')' == *src) {
|
||||
mState = mState_ESC_24_29;
|
||||
}
|
||||
else {
|
||||
if((dest+3) >= destEnd)
|
||||
goto error1;
|
||||
*dest++ = (PRUnichar) 0x1b;
|
||||
*dest++ = (PRUnichar) '$';
|
||||
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
|
||||
mState = mLastLegalState;
|
||||
}
|
||||
break;
|
||||
|
||||
case mState_ESC_24_29: // ESC $ )
|
||||
mState = mLastLegalState;
|
||||
if('C' == *src) {
|
||||
mState = mState_ASCII;
|
||||
}
|
||||
else {
|
||||
if((dest+4) >= destEnd)
|
||||
goto error1;
|
||||
*dest++ = (PRUnichar) 0x1b;
|
||||
*dest++ = (PRUnichar) '$';
|
||||
*dest++ = (PRUnichar) ')';
|
||||
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
|
||||
mState = mLastLegalState;
|
||||
}
|
||||
break;
|
||||
|
||||
case mState_KSX1001_1992:
|
||||
if (0x20 < (PRUint8) *src && (PRUint8) *src < 0x7f) {
|
||||
mData = (PRUint8) *src;
|
||||
mState = mState_KSX1001_1992_2ndbyte;
|
||||
}
|
||||
else if (0x0f == *src) { // Shift-In (SI)
|
||||
mState = mState_ASCII;
|
||||
}
|
||||
else if ((PRUint8) *src == 0x20 || (PRUint8) *src == 0x09) {
|
||||
// Allow space and tab between SO and SI (i.e. in Hangul segment)
|
||||
mState = mState_KSX1001_1992;
|
||||
*dest++ = (PRUnichar) *src;
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
}
|
||||
else { // Everything else is invalid.
|
||||
*dest++ = 0xFFFD;
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
}
|
||||
break;
|
||||
|
||||
case mState_KSX1001_1992_2ndbyte:
|
||||
if ( 0x20 < (PRUint8) *src && (PRUint8) *src < 0x7f ) {
|
||||
if (!mEUCKRDecoder) {
|
||||
// creating a delegate converter (EUC-KR)
|
||||
nsresult rv;
|
||||
nsString tmpCharset;
|
||||
nsCOMPtr<nsICharsetConverterManager> ccm =
|
||||
do_GetService(kCharsetConverterManagerCID, &rv);
|
||||
if (!NS_FAILED(rv)) {
|
||||
tmpCharset.AssignWithConversion("EUC-KR");
|
||||
rv = ccm->GetUnicodeDecoder(&tmpCharset, &mEUCKRDecoder);
|
||||
}
|
||||
}
|
||||
|
||||
if (!mEUCKRDecoder) {// failed creating a delegate converter
|
||||
*dest++ = 0xFFFD;
|
||||
}
|
||||
else {
|
||||
unsigned char ksx[2];
|
||||
PRUnichar uni;
|
||||
PRInt32 ksxLen = 2, uniLen = 1;
|
||||
// mData is the original 1st byte.
|
||||
// *src is the present 2nd byte.
|
||||
// Put 2 bytes (one character) to ksx[] with EUC-KR encoding.
|
||||
ksx[0] = mData | 0x80;
|
||||
ksx[1] = *src | 0x80;
|
||||
// Convert EUC-KR to unicode.
|
||||
mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen);
|
||||
*dest++ = uni;
|
||||
}
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
mState = mState_KSX1001_1992;
|
||||
}
|
||||
else { // Invalid
|
||||
if ( 0x0f == *src ) { // Shift-In (SI)
|
||||
mState = mState_ASCII;
|
||||
}
|
||||
else {
|
||||
mState = mState_KSX1001_1992;
|
||||
}
|
||||
*dest++ = 0xFFFD;
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
}
|
||||
break;
|
||||
|
||||
case mState_ERROR:
|
||||
mState = mLastLegalState;
|
||||
*dest++ = 0xFFFD;
|
||||
if(dest >= destEnd)
|
||||
goto error1;
|
||||
break;
|
||||
|
||||
} // switch
|
||||
src++;
|
||||
if ( *src == 0x0a || *src == 0x0d ) // if LF/CR, return to US-ASCII unconditionally.
|
||||
mState = mState_ASCII;
|
||||
}
|
||||
*aDestLen = dest - aDest;
|
||||
return NS_OK;
|
||||
|
||||
error1:
|
||||
*aDestLen = dest-aDest;
|
||||
*aSrcLen = src-(unsigned char*)aSrc;
|
||||
return NS_OK_UDEC_MOREOUTPUT;
|
||||
}
|
||||
|
||||
|
|
|
@ -35,3 +35,59 @@
|
|||
* the terms of any one of the NPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
#ifndef nsISO2022KRToUnicode_h__
|
||||
#define nsISO2022KRToUnicode_h__
|
||||
#include "nsISupports.h"
|
||||
#include "nsUCvKOSupport.h"
|
||||
|
||||
|
||||
|
||||
class nsISO2022KRToUnicode : public nsBasicDecoderSupport
|
||||
{
|
||||
public:
|
||||
nsISO2022KRToUnicode()
|
||||
{
|
||||
mState = mState_ASCII;
|
||||
mLastLegalState = mState_ASCII;
|
||||
mData = 0;
|
||||
mEUCKRDecoder = nsnull;
|
||||
};
|
||||
|
||||
virtual ~nsISO2022KRToUnicode()
|
||||
{
|
||||
NS_IF_RELEASE(mEUCKRDecoder);
|
||||
};
|
||||
|
||||
NS_IMETHOD Convert(const char * aSrc, PRInt32 * aSrcLength,
|
||||
PRUnichar * aDest, PRInt32 * aDestLength) ;
|
||||
|
||||
NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength,
|
||||
PRInt32 * aDestLength)
|
||||
{
|
||||
*aDestLength = aSrcLength;
|
||||
return NS_OK;
|
||||
};
|
||||
|
||||
NS_IMETHOD Reset()
|
||||
{
|
||||
mState = mState_ASCII;
|
||||
mLastLegalState = mState_ASCII;
|
||||
return NS_OK;
|
||||
};
|
||||
|
||||
private:
|
||||
enum {
|
||||
mState_ASCII,
|
||||
mState_ESC,
|
||||
mState_ESC_24,
|
||||
mState_ESC_24_29,
|
||||
mState_KSX1001_1992,
|
||||
mState_KSX1001_1992_2ndbyte,
|
||||
mState_ERROR
|
||||
} mState, mLastLegalState;
|
||||
|
||||
PRUint8 mData;
|
||||
|
||||
nsIUnicodeDecoder *mEUCKRDecoder;
|
||||
};
|
||||
#endif // nsISO2022KRToUnicode_h__
|
||||
|
|
|
@ -62,6 +62,7 @@
|
|||
#include "nsUnicodeToJohabNoAscii.h"
|
||||
#include "nsCP949ToUnicode.h"
|
||||
#include "nsUnicodeToCP949.h"
|
||||
#include "nsISO2022KRToUnicode.h"
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Global functions and data [declaration]
|
||||
|
@ -97,6 +98,7 @@ NS_UCONV_REG_UNREG(nsUnicodeToJohab, "Unicode", "x-johab", NS_UNICODETOJOHAB_CI
|
|||
NS_UCONV_REG_UNREG(nsUnicodeToJohabNoAscii, "Unicode", "x-johab-noascii", NS_UNICODETOJOHABNOASCII_CID);
|
||||
NS_UCONV_REG_UNREG(nsCP949ToUnicode, "x-windows-949", "Unicode" , NS_CP949TOUNICODE_CID);
|
||||
NS_UCONV_REG_UNREG(nsUnicodeToCP949, "Unicode", "x-windows-949", NS_UNICODETOCP949_CID);
|
||||
NS_UCONV_REG_UNREG(nsISO2022KRToUnicode, "ISO-2022-KR", "Unicode" , NS_ISO2022KRTOUNICODE_CID);
|
||||
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsEUCKRToUnicode);
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToEUCKR);
|
||||
|
@ -107,6 +109,7 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToJohab);
|
|||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToJohabNoAscii);
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsCP949ToUnicode);
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToCP949);
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsISO2022KRToUnicode);
|
||||
|
||||
static nsModuleComponentInfo components[] =
|
||||
{
|
||||
|
@ -163,6 +166,12 @@ static nsModuleComponentInfo components[] =
|
|||
NS_UNICODEENCODER_CONTRACTID_BASE "x-windows-949",
|
||||
nsUnicodeToCP949Constructor,
|
||||
nsUnicodeToCP949RegSelf, nsUnicodeToCP949UnRegSelf
|
||||
},
|
||||
{
|
||||
DECODER_NAME_BASE "ISO-2022-KR" , NS_ISO2022KRTOUNICODE_CID,
|
||||
NS_UNICODEDECODER_CONTRACTID_BASE "ISO-2022-KR",
|
||||
nsISO2022KRToUnicodeConstructor ,
|
||||
nsISO2022KRToUnicodeRegSelf , nsISO2022KRToUnicodeUnRegSelf
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче