[converter] need ISO-2022-CN converters
Added ISO-2022-CN converter. (no new file)
r=ftang,sr=alecf
This commit is contained in:
katakai%japan.sun.com 2002-08-17 01:17:42 +00:00
Родитель 1e70b268c9
Коммит 43237dfbfd
3 изменённых файлов: 650 добавлений и 2 удалений

Просмотреть файл

@ -279,6 +279,8 @@
#include "nsGB2312ToUnicodeV2.h"
#include "nsUnicodeToGB2312V2.h"
#include "nsUnicodeToGB2312GL.h"
#include "nsISO2022CNToUnicode.h"
#include "nsUnicodeToISO2022CN.h"
#include "gbku.h"
#define DECODER_NAME_BASE "Unicode Decoder-"
@ -410,6 +412,7 @@ NS_UCONV_REG_UNREG_ENCODER("gb_2312-80", NS_UNICODETOGB2312GL_CID)
NS_UCONV_REG_UNREG("gb18030", NS_GB18030TOUNICODE_CID, NS_UNICODETOGB18030_CID)
NS_UCONV_REG_UNREG_ENCODER("gb18030.2000-0", NS_UNICODETOGB18030Font0_CID)
NS_UCONV_REG_UNREG_ENCODER("gb18030.2000-1", NS_UNICODETOGB18030Font1_CID)
NS_UCONV_REG_UNREG_DECODER("ISO-2022-CN", NS_ISO2022CNTOUNICODE_CID)
NS_CONVERTER_REGISTRY_END
@ -628,6 +631,7 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsGB18030ToUnicode);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToGB18030);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToGB18030Font0);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToGB18030Font1);
NS_GENERIC_FACTORY_CONSTRUCTOR(nsISO2022CNToUnicode);
//----------------------------------------------------------------------------
@ -1779,6 +1783,11 @@ static const nsModuleComponentInfo components[] =
NS_UNICODEENCODER_CONTRACTID_BASE "gb18030",
nsUnicodeToGB18030Constructor,
},
{
DECODER_NAME_BASE "ISO-2022-CN" , NS_ISO2022CNTOUNICODE_CID,
NS_UNICODEDECODER_CONTRACTID_BASE "ISO-2022-CN",
nsISO2022CNToUnicodeConstructor,
},
};
NS_IMPL_NSGETMODULE(nsUConvModule, components);

Просмотреть файл

@ -19,7 +19,7 @@
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Contributor(s): Ervin Yan <ervin.yan@sun.com>
*
*
* Alternatively, the contents of this file may be used under the terms of
@ -35,3 +35,549 @@
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsISO2022CNToUnicode.h"
#include "nsUCSupport.h"
#include "nsICharsetConverterManager.h"
#include "nsIServiceManager.h"
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
NS_IMETHODIMP nsISO2022CNToUnicode::GB2312_To_Unicode(unsigned char *aSrc, PRInt32 aSrcLength, PRUnichar * aDest, PRInt32 * aDestLength)
{
nsresult rv;
if(!mGB2312_Decoder) {
// creating a delegate converter (GB2312)
nsAutoString tmpCharset;
nsCOMPtr<nsICharsetConverterManager> ccm =
do_GetService(kCharsetConverterManagerCID, &rv);
if(NS_FAILED(rv))
return NS_ERROR_UNEXPECTED;
tmpCharset.Assign(NS_LITERAL_STRING("GB2312"));
rv = ccm->GetUnicodeDecoder(&tmpCharset, getter_AddRefs(mGB2312_Decoder));
if(NS_FAILED(rv))
return NS_ERROR_UNEXPECTED;
}
if(!mGB2312_Decoder) // failed creating a delegate converter
return NS_ERROR_UNEXPECTED;
rv = mGB2312_Decoder->Convert((const char *)aSrc, &aSrcLength, aDest, aDestLength);
return rv;
}
NS_IMETHODIMP nsISO2022CNToUnicode::EUCTW_To_Unicode(unsigned char *aSrc, PRInt32 aSrcLength, PRUnichar * aDest, PRInt32 * aDestLength)
{
nsresult rv;
if(!mEUCTW_Decoder) {
// creating a delegate converter (x-euc-tw)
nsAutoString tmpCharset;
nsCOMPtr<nsICharsetConverterManager> ccm =
do_GetService(kCharsetConverterManagerCID, &rv);
if(NS_FAILED(rv))
return NS_ERROR_UNEXPECTED;
tmpCharset.Assign(NS_LITERAL_STRING("x-euc-tw"));
rv = ccm->GetUnicodeDecoder(&tmpCharset, getter_AddRefs(mEUCTW_Decoder));
if(NS_FAILED(rv))
return NS_ERROR_UNEXPECTED;
}
if(!mEUCTW_Decoder) // failed creating a delegate converter
return NS_ERROR_UNEXPECTED;
rv = mEUCTW_Decoder->Convert((const char *)aSrc, &aSrcLength, aDest, aDestLength);
return(rv);
}
NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen, PRUnichar * aDest, PRInt32 * aDestLen)
{
const unsigned char * srcEnd = (unsigned char *)aSrc + *aSrcLen;
const unsigned char * src = (unsigned char *) aSrc;
PRUnichar* destEnd = aDest + *aDestLen;
PRUnichar* dest = aDest;
nsresult rv;
PRInt32 aLen;
while ((src < srcEnd))
{
switch (mState)
{
case eState_ASCII:
if(ESC == *src) {
mState = eState_ESC;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_ESC: // ESC
if('$' == *src) {
mState = eState_ESC_24;
} else {
if(dest+2 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_ESC_24: // ESC $
if(')' == *src) {
mState = eState_ESC_24_29;
} else if('*' == *src) {
mState = eState_ESC_24_2A;
} else if('+' == *src) {
mState = eState_ESC_24_2B;
} else {
if(dest+3 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (PRUnichar) '$';
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_ESC_24_29: // ESC $ )
if('A' == *src) {
mState = eState_ESC_24_29_A;
} else if('G' == *src) {
mState = eState_ESC_24_29_G;
} else {
if(dest+4 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (PRUnichar) '$';
*dest++ = (PRUnichar) ')';
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_ESC_24_29_A: // ESC $ ) A
if(SO == *src) {
mState = eState_GB2312_1980;
} else {
if(dest+5 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (PRUnichar) '$';
*dest++ = (PRUnichar) ')';
*dest++ = (PRUnichar) 'A';
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_GB2312_1980: // ESC $ ) A SO
if(SI == *src) { // Shift-In (SI)
mState = eState_ESC_24_29_A_SO_SI;
} else if(ESC == *src) {
mState = eState_ESC;
} else {
if(0x20 < *src && *src < 0x7f) {
mData = *src;
mState = eState_GB2312_1980_2ndbyte;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
}
}
break;
case eState_GB2312_1980_2ndbyte: // ESC $ ) A SO
if(0x20 < *src && *src < 0x7f) {
unsigned char gb[2];
PRInt32 gbLen = 2;
gb[0] = mData | 0x80;
gb[1] = *src | 0x80;
aLen = destEnd - dest;
rv = GB2312_To_Unicode(gb, gbLen, dest, &aLen);
if(rv == NS_OK_UDEC_MOREOUTPUT) {
goto error1;
} else if(NS_FAILED(rv)) {
goto error2;
}
dest += aLen;
} else {
if(dest+2 >= destEnd)
goto error1;
*dest++ = (PRUnichar) mData;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
}
mState = eState_GB2312_1980;
break;
case eState_ESC_24_29_A_SO_SI: // ESC $ ) A SO SI
if(SO == *src) {
mState = eState_GB2312_1980;
} else if(ESC == *src) {
mState = eState_ESC;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ESC_24_29_A_SO_SI;
}
break;
case eState_ESC_24_29_G: // ESC $ ) G
if(SO == *src) {
mState = eState_CNS11643_1;
} else {
if(dest+5 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (PRUnichar) '$';
*dest++ = (PRUnichar) ')';
*dest++ = (PRUnichar) 'G';
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_CNS11643_1: // ESC $ ) G SO
if(SI == *src) { // Shift-In (SI)
mState = eState_ESC_24_29_G_SO_SI;
} else if(ESC == *src) {
mState = eState_ESC;
} else {
if(0x20 < *src && *src < 0x7f) {
mData = *src;
mState = eState_CNS11643_1_2ndbyte;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
}
}
break;
case eState_CNS11643_1_2ndbyte: // ESC $ ) G SO
if(0x20 < *src && *src < 0x7f) {
unsigned char cns[4];
PRInt32 cnsLen = 2;
cns[0] = mData | 0x80;
cns[1] = *src | 0x80;
aLen = destEnd - dest;
rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
if(rv == NS_OK_UDEC_MOREOUTPUT) {
goto error1;
} else if(NS_FAILED(rv)) {
goto error2;
}
dest += aLen;
} else {
if(dest+2 >= destEnd)
goto error1;
*dest++ = (PRUnichar) mData;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
}
mState = eState_CNS11643_1;
break;
case eState_ESC_24_29_G_SO_SI: // ESC $ ) G SO SI
if(SO == *src) {
mState = eState_CNS11643_1;
} else if(ESC == *src) {
mState = eState_ESC;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ESC_24_29_G_SO_SI;
}
break;
case eState_ESC_24_2A: // ESC $ *
if('H' == *src) {
mState = eState_ESC_24_2A_H;
} else {
if(dest+4 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (PRUnichar) '$';
*dest++ = (PRUnichar) '*';
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_ESC_24_2A_H: // ESC $ * H
if(ESC == *src) {
mState = eState_ESC_24_2A_H_ESC;
} else {
if(dest+5 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (PRUnichar) '$';
*dest++ = (PRUnichar) '*';
*dest++ = (PRUnichar) 'H';
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_ESC_24_2A_H_ESC: // ESC $ * H ESC
if(SS2 == *src) {
mState = eState_CNS11643_2;
} else if('$' == *src) {
mState = eState_ESC_24;
} else {
if(dest+6 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (PRUnichar) '$';
*dest++ = (PRUnichar) '*';
*dest++ = (PRUnichar) 'H';
*dest++ = (PRUnichar) ESC;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_CNS11643_2: // ESC $ * H ESC SS2
if(SI == *src) { // Shift-In (SI)
mState = eState_ESC_24_2A_H_ESC_SS2_SI;
} else if(ESC == *src) {
mState = eState_ESC_24_2A_H_ESC;
} else {
if(0x20 < *src && *src < 0x7f) {
mData = *src;
mState = eState_CNS11643_2_2ndbyte;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
}
}
break;
case eState_CNS11643_2_2ndbyte: // ESC $ * H ESC SS2
if(0x20 < *src && *src < 0x7f) {
unsigned char cns[4];
PRInt32 cnsLen = 4;
cns[0] = (unsigned char) MBYTE;
cns[1] = (unsigned char) (PMASK + 2);
cns[2] = mData | 0x80;
cns[3] = *src | 0x80;
aLen = destEnd - dest;
rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
if(rv == NS_OK_UDEC_MOREOUTPUT) {
goto error1;
} else if(NS_FAILED(rv)) {
goto error2;
}
dest += aLen;
} else {
if(dest+2 >= destEnd)
goto error1;
*dest++ = (PRUnichar) mData;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
}
mState = eState_CNS11643_2;
break;
case eState_ESC_24_2A_H_ESC_SS2_SI: // ESC $ * H ESC SS2 SI
if(ESC == *src) {
mState = eState_ESC_24_2A_H_ESC_SS2_SI_ESC;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ESC_24_2A_H_ESC_SS2_SI;
}
break;
case eState_ESC_24_2A_H_ESC_SS2_SI_ESC: // ESC $ * H ESC SS2 SI ESC
if(SS2 == *src) {
mState = eState_CNS11643_2;
} else if('$' == *src) {
mState = eState_ESC_24;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ESC_24_2A_H_ESC_SS2_SI;
}
break;
case eState_ESC_24_2B: // ESC $ +
if('I' <= *src && *src <= 'M') {
mState = eState_ESC_24_2B_I;
mPlaneID = *src - 'I' + 3;
} else {
if(dest+4 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (PRUnichar) '$';
*dest++ = (PRUnichar) '+';
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_ESC_24_2B_I: // ESC $ + I
if(ESC == *src) {
mState = eState_ESC_24_2B_I_ESC;
} else {
if(dest+5 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (PRUnichar) '$';
*dest++ = (PRUnichar) '+';
*dest++ = (PRUnichar) 'I' + mPlaneID - 3;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_ESC_24_2B_I_ESC: // ESC $ + I ESC
if(SS3 == *src) {
mState = eState_CNS11643_3;
} else if('$' == *src) {
mState = eState_ESC_24;
} else {
if(dest+6 >= destEnd)
goto error1;
*dest++ = (PRUnichar) ESC;
*dest++ = (PRUnichar) '$';
*dest++ = (PRUnichar) '+';
*dest++ = (PRUnichar) 'I' + mPlaneID - 3;
*dest++ = (PRUnichar) ESC;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ASCII;
}
break;
case eState_CNS11643_3: // ESC $ + I ESC SS3
if(SI == *src) { // Shift-In (SI)
mState = eState_ESC_24_2B_I_ESC_SS3_SI;
} else if(ESC == *src) {
mState = eState_ESC_24_2B_I_ESC;
} else {
if(0x20 < *src && *src < 0x7f) {
mData = *src;
mState = eState_CNS11643_3_2ndbyte;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
}
}
break;
case eState_CNS11643_3_2ndbyte: // ESC $ + I ESC SS3
if(0x20 < *src && *src < 0x7f) {
unsigned char cns[4];
PRInt32 cnsLen = 4;
cns[0] = (unsigned char) MBYTE;
cns[1] = (unsigned char) (PMASK + mPlaneID);
cns[2] = mData | 0x80;
cns[3] = *src | 0x80;
aLen = destEnd - dest;
rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
if(rv == NS_OK_UDEC_MOREOUTPUT) {
goto error1;
} else if(NS_FAILED(rv)) {
goto error2;
}
dest += aLen;
} else {
if(dest+2 >= destEnd)
goto error1;
*dest++ = (PRUnichar) mData;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
}
mState = eState_CNS11643_3;
break;
case eState_ESC_24_2B_I_ESC_SS3_SI: // ESC $ + I ESC SS3 SI
if(ESC == *src) {
mState = eState_ESC_24_2B_I_ESC_SS3_SI_ESC;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ESC_24_2B_I_ESC_SS3_SI;
}
break;
case eState_ESC_24_2B_I_ESC_SS3_SI_ESC: // ESC $ + I ESC SS3 SI ESC
if(SS3 == *src) {
mState = eState_CNS11643_3;
} else if('$' == *src) {
mState = eState_ESC_24;
} else {
if(dest+1 >= destEnd)
goto error1;
*dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
mState = eState_ESC_24_2B_I_ESC_SS3_SI;
}
break;
} // switch
src++;
}
*aDestLen = dest- aDest;
return NS_OK;
error1:
*aDestLen = dest-aDest;
src++;
if ((mState == eState_ASCII) && (src == srcEnd)) {
return NS_OK;
}
*aSrcLen = src - (const unsigned char*)aSrc;
return NS_OK_UDEC_MOREOUTPUT;
error2:
*aSrcLen = src - (const unsigned char*)aSrc;
*aDestLen = dest-aDest;
mState = eState_ASCII;
return NS_ERROR_UNEXPECTED;
}

Просмотреть файл

@ -19,7 +19,7 @@
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Contributor(s): Ervin Yan <Ervin.Yan@Sun.Com>
*
*
* Alternatively, the contents of this file may be used under the terms of
@ -35,3 +35,96 @@
* the terms of any one of the NPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#ifndef nsISO2022CNToUnicode_h__
#define nsISO2022CNToUnicode_h__
#include "nsCOMPtr.h"
#include "nsISupports.h"
#include "nsUCSupport.h"
#define MBYTE 0x8e
#define PMASK 0xa0
#define SI 0x0f
#define SO 0x0e
#define ESC 0x1b
#define SS2 0x4e
#define SS3 0x4f
class nsISO2022CNToUnicode : public nsBasicDecoderSupport
{
public:
nsISO2022CNToUnicode() :
mState(eState_ASCII),
mPlaneID(0) { NS_INIT_REFCNT(); };
virtual ~nsISO2022CNToUnicode() {};
NS_IMETHOD Convert(const char *aSrc, PRInt32 * aSrcLength,
PRUnichar * aDest, PRInt32 * aDestLength) ;
NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength,
PRInt32 * aDestLength)
{
*aDestLength = aSrcLength;
return NS_OK;
};
NS_IMETHOD Reset()
{
mState = eState_ASCII;
mPlaneID = 0;
return NS_OK;
};
private:
// State Machine ID
enum {
eState_ASCII,
eState_ESC, // ESC
eState_ESC_24, // ESC $
eState_ESC_24_29, // ESC $ )
eState_ESC_24_29_A, // ESC $ ) A
eState_GB2312_1980, // ESC $ ) A SO
eState_GB2312_1980_2ndbyte, // ESC $ ) A SO
eState_ESC_24_29_A_SO_SI, // ESC $ ) A SO SI
eState_ESC_24_29_G, // ESC $ ) G or H
eState_CNS11643_1, // ESC $ ) G SO
eState_CNS11643_1_2ndbyte, // ESC $ ) G SO
eState_ESC_24_29_G_SO_SI, // ESC $ ) G SO SI
eState_ESC_24_2A, // ESC $ *
eState_ESC_24_2A_H, // ESC $ * H
eState_ESC_24_2A_H_ESC, // ESC $ * H ESC
eState_CNS11643_2, // ESC $ * H ESC SS2
eState_CNS11643_2_2ndbyte, // ESC $ * H ESC SS2
eState_ESC_24_2A_H_ESC_SS2_SI, // ESC $ * H ESC SS2 SI
eState_ESC_24_2A_H_ESC_SS2_SI_ESC, // ESC $ * H ESC SS2 SI ESC
eState_ESC_24_2B, // ESC $ +
eState_ESC_24_2B_I, // ESC $ + I
eState_ESC_24_2B_I_ESC, // ESC $ + I ESC
eState_CNS11643_3, // ESC $ + I ESC SS3
eState_CNS11643_3_2ndbyte, // ESC $ + I ESC SS3
eState_ESC_24_2B_I_ESC_SS3_SI, // ESC $ + I ESC SI
eState_ESC_24_2B_I_ESC_SS3_SI_ESC, // ESC $ + I ESC SI ESC
eState_ERROR
} mState;
char mData;
// Plane number for CNS11643 code
int mPlaneID;
// Decoder handler
nsCOMPtr<nsIUnicodeDecoder> mGB2312_Decoder;
nsCOMPtr<nsIUnicodeDecoder> mEUCTW_Decoder;
NS_IMETHOD GB2312_To_Unicode(unsigned char *aSrc, PRInt32 aSrcLength,
PRUnichar * aDest, PRInt32 * aDestLength) ;
NS_IMETHOD EUCTW_To_Unicode(unsigned char *aSrc, PRInt32 aSrcLength,
PRUnichar * aDest, PRInt32 * aDestLength) ;
};
#endif // nsISO2022CNToUnicode_h__