fix bug 7964 : Add ISO-2022-KR decoder (to Unicode) for
Korean emails. Encoder is NOT included since ISO-2022-KR should
not be used for outgoing emails.
This commit is contained in:
jshin%mailaps.org 2001-10-13 07:22:27 +00:00
Родитель 6dc96ac474
Коммит 0b2e73d509
3 изменённых файлов: 236 добавлений и 1 удалений

Просмотреть файл

@ -19,7 +19,7 @@
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Contributor(s): Jungshik Shin <jshin@mailaps.org>
*
*
* Alternatively, the contents of this file may be used under the terms of
@ -35,3 +35,173 @@
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsISO2022KRToUnicode.h"
#include "nsUCvKOSupport.h"
#include "nsICharsetConverterManager.h"
#include "nsIServiceManager.h"
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen, PRUnichar * aDest, PRInt32 * aDestLen)
{
const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
const unsigned char* src =(unsigned char*) aSrc;
PRUnichar* destEnd = aDest + *aDestLen;
PRUnichar* dest = aDest;
while((src < srcEnd))
{
switch(mState)
{
case mState_ASCII:
if(0x1b == *src) {
mLastLegalState = mState;
mState = mState_ESC;
}
else if(0x0e == *src) { // Shift-Out
mState = mState_KSX1001_1992;
}
else if(*src & 0x80) {
*dest++ = 0xFFFD;
if(dest >= destEnd)
goto error1;
}
else {
*dest++ = (PRUnichar) *src;
if(dest >= destEnd)
goto error1;
}
break;
case mState_ESC:
if('$' == *src) {
mState = mState_ESC_24;
}
else {
if((dest+2) >= destEnd)
goto error1;
*dest++ = (PRUnichar) 0x1b;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = mLastLegalState;
}
break;
case mState_ESC_24: // ESC $
if(')' == *src) {
mState = mState_ESC_24_29;
}
else {
if((dest+3) >= destEnd)
goto error1;
*dest++ = (PRUnichar) 0x1b;
*dest++ = (PRUnichar) '$';
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = mLastLegalState;
}
break;
case mState_ESC_24_29: // ESC $ )
mState = mLastLegalState;
if('C' == *src) {
mState = mState_ASCII;
}
else {
if((dest+4) >= destEnd)
goto error1;
*dest++ = (PRUnichar) 0x1b;
*dest++ = (PRUnichar) '$';
*dest++ = (PRUnichar) ')';
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = mLastLegalState;
}
break;
case mState_KSX1001_1992:
if (0x20 < (PRUint8) *src && (PRUint8) *src < 0x7f) {
mData = (PRUint8) *src;
mState = mState_KSX1001_1992_2ndbyte;
}
else if (0x0f == *src) { // Shift-In (SI)
mState = mState_ASCII;
}
else if ((PRUint8) *src == 0x20 || (PRUint8) *src == 0x09) {
// Allow space and tab between SO and SI (i.e. in Hangul segment)
mState = mState_KSX1001_1992;
*dest++ = (PRUnichar) *src;
if(dest >= destEnd)
goto error1;
}
else { // Everything else is invalid.
*dest++ = 0xFFFD;
if(dest >= destEnd)
goto error1;
}
break;
case mState_KSX1001_1992_2ndbyte:
if ( 0x20 < (PRUint8) *src && (PRUint8) *src < 0x7f ) {
if (!mEUCKRDecoder) {
// creating a delegate converter (EUC-KR)
nsresult rv;
nsString tmpCharset;
nsCOMPtr<nsICharsetConverterManager> ccm =
do_GetService(kCharsetConverterManagerCID, &rv);
if (!NS_FAILED(rv)) {
tmpCharset.AssignWithConversion("EUC-KR");
rv = ccm->GetUnicodeDecoder(&tmpCharset, &mEUCKRDecoder);
}
}
if (!mEUCKRDecoder) {// failed creating a delegate converter
*dest++ = 0xFFFD;
}
else {
unsigned char ksx[2];
PRUnichar uni;
PRInt32 ksxLen = 2, uniLen = 1;
// mData is the original 1st byte.
// *src is the present 2nd byte.
// Put 2 bytes (one character) to ksx[] with EUC-KR encoding.
ksx[0] = mData | 0x80;
ksx[1] = *src | 0x80;
// Convert EUC-KR to unicode.
mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen);
*dest++ = uni;
}
if(dest >= destEnd)
goto error1;
mState = mState_KSX1001_1992;
}
else { // Invalid
if ( 0x0f == *src ) { // Shift-In (SI)
mState = mState_ASCII;
}
else {
mState = mState_KSX1001_1992;
}
*dest++ = 0xFFFD;
if(dest >= destEnd)
goto error1;
}
break;
case mState_ERROR:
mState = mLastLegalState;
*dest++ = 0xFFFD;
if(dest >= destEnd)
goto error1;
break;
} // switch
src++;
if ( *src == 0x0a || *src == 0x0d ) // if LF/CR, return to US-ASCII unconditionally.
mState = mState_ASCII;
}
*aDestLen = dest - aDest;
return NS_OK;
error1:
*aDestLen = dest-aDest;
*aSrcLen = src-(unsigned char*)aSrc;
return NS_OK_UDEC_MOREOUTPUT;
}

Просмотреть файл

@ -35,3 +35,59 @@
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsISO2022KRToUnicode_h__
#define nsISO2022KRToUnicode_h__
#include "nsISupports.h"
#include "nsUCvKOSupport.h"
class nsISO2022KRToUnicode : public nsBasicDecoderSupport
{
public:
nsISO2022KRToUnicode()
{
mState = mState_ASCII;
mLastLegalState = mState_ASCII;
mData = 0;
mEUCKRDecoder = nsnull;
};
virtual ~nsISO2022KRToUnicode()
{
NS_IF_RELEASE(mEUCKRDecoder);
};
NS_IMETHOD Convert(const char * aSrc, PRInt32 * aSrcLength,
PRUnichar * aDest, PRInt32 * aDestLength) ;
NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength,
PRInt32 * aDestLength)
{
*aDestLength = aSrcLength;
return NS_OK;
};
NS_IMETHOD Reset()
{
mState = mState_ASCII;
mLastLegalState = mState_ASCII;
return NS_OK;
};
private:
enum {
mState_ASCII,
mState_ESC,
mState_ESC_24,
mState_ESC_24_29,
mState_KSX1001_1992,
mState_KSX1001_1992_2ndbyte,
mState_ERROR
} mState, mLastLegalState;
PRUint8 mData;
nsIUnicodeDecoder *mEUCKRDecoder;
};
#endif // nsISO2022KRToUnicode_h__

Просмотреть файл

@ -62,6 +62,7 @@
#include "nsUnicodeToJohabNoAscii.h"
#include "nsCP949ToUnicode.h"
#include "nsUnicodeToCP949.h"
#include "nsISO2022KRToUnicode.h"
//----------------------------------------------------------------------------
// Global functions and data [declaration]
@ -97,6 +98,7 @@ NS_UCONV_REG_UNREG(nsUnicodeToJohab, "Unicode", "x-johab", NS_UNICODETOJOHAB_CI
NS_UCONV_REG_UNREG(nsUnicodeToJohabNoAscii, "Unicode", "x-johab-noascii", NS_UNICODETOJOHABNOASCII_CID);
NS_UCONV_REG_UNREG(nsCP949ToUnicode, "x-windows-949", "Unicode" , NS_CP949TOUNICODE_CID);
NS_UCONV_REG_UNREG(nsUnicodeToCP949, "Unicode", "x-windows-949", NS_UNICODETOCP949_CID);
NS_UCONV_REG_UNREG(nsISO2022KRToUnicode, "ISO-2022-KR", "Unicode" , NS_ISO2022KRTOUNICODE_CID);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsEUCKRToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToEUCKR);
@ -107,6 +109,7 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToJohab);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToJohabNoAscii);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsCP949ToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToCP949);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsISO2022KRToUnicode);
static nsModuleComponentInfo components[] =
{
@ -163,6 +166,12 @@ static nsModuleComponentInfo components[] =
NS_UNICODEENCODER_CONTRACTID_BASE "x-windows-949",
nsUnicodeToCP949Constructor,
nsUnicodeToCP949RegSelf, nsUnicodeToCP949UnRegSelf
},
{
DECODER_NAME_BASE "ISO-2022-KR" , NS_ISO2022KRTOUNICODE_CID,
NS_UNICODEDECODER_CONTRACTID_BASE "ISO-2022-KR",
nsISO2022KRToUnicodeConstructor ,
nsISO2022KRToUnicodeRegSelf , nsISO2022KRToUnicodeUnRegSelf
}
};