check in GBK converter for Xu, Yueheng <yueheng.xu@intel.com

This commit is contained in:
ftang%netscape.com 1999-09-13 19:27:05 +00:00
Родитель f7c03ea9f8
Коммит 4860f5dce0
6 изменённых файлов: 3601 добавлений и 0 удалений

3151
intl/uconv/ucvcn/cp936map.h Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

62
intl/uconv/ucvcn/gbku.h Normal file
Просмотреть файл

@ -0,0 +1,62 @@
// =======================================================================
// Original Author: Yueheng Xu
// email: yueheng.xu@intel.com
// phone: (503)264-2248
// Intel Corporation, Oregon, USA
// Last Update: September 7, 1999
// Revision History:
// 09/07/1999 - initial version.
// =======================================================================
// This table maps the GBK code to its unicode.
// The mapping data of this GBK table is obtained from
// ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT
// Frank Tang of Netscape wrote the original perl tool to re-align the
// mapping data into an 8-item per line format ( i.e. file cp936map.txt ).
//
// The valid GBK charset range: left byte is [0x81, 0xfe], right byte are
// [0x40, 0x7e] and [0x80, 0xfe]. But for the convenience of index
// calculation, the table here has a single consecutive range of
// [0x40, 0xfe] for the right byte. Those invalid chars whose right byte
// is 0x7f will be mapped to undefined unicode 0xFFFF.
//
#ifdef _GBKU_TABLE_
#define GB_UNDEFINED 0xFFFF
#define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */
typedef struct
{
char leftbyte;
char rightbyte;
} DByte;
extern PRUnichar GBKToUnicodeTable[MAX_GBK_LENGTH];
#else
#define _GBKU_TABLE_
#define GB_UNDEFINED 0xFFFF
#define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */
typedef struct
{
char leftbyte;
char rightbyte;
} DByte;
PRUnichar GBKToUnicodeTable[MAX_GBK_LENGTH] =
{
#include "cp936map.h"
};
#endif /* ifdef _GBKU_TABLE_ */

Просмотреть файл

@ -15,3 +15,141 @@
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
/**
* A character set converter from GBK to Unicode.
*
*
* @created 07/Sept/1999
* @author Yueheng Xu, Yueheng.Xu@intel.com
*/
#include "nsGBKToUnicode.h"
#include "nsUCvCnDll.h"
#include "gbku.h"
//----------------------------------------------------------------------
// Global functions and data [declaration]
static PRInt16 g_ASCIIShiftTable[] = {
0, u1ByteCharset,
ShiftCell(0,0,0,0,0,0,0,0)
};
static PRInt16 g_GB2312ShiftTable[] = {
0, u2BytesGRCharset,
ShiftCell(0,0,0,0,0,0,0,0)
};
static PRInt16 *g_GB2312ShiftTableSet [] = {
g_ASCIIShiftTable,
g_GB2312ShiftTable
};
static PRUint16 *g_GB2312MappingTableSet [] ={
g_AsciiMapping,
g_utGB2312Mapping
};
static uRange g_GB2312Ranges[] = {
{ 0x00, 0x7E },
{ 0xA1, 0xFE }
};
//----------------------------------------------------------------------
// Class nsGB2312ToUnicode [implementation]
nsGBKToUnicode::nsGBKToUnicode()
: nsMultiTableDecoderSupport(2,
(uRange *) &g_GB2312Ranges,
(uShiftTable**) &g_GB2312ShiftTableSet,
(uMappingTable**) &g_GB2312MappingTableSet)
{
}
nsresult nsGBKToUnicode::CreateInstance(nsISupports ** aResult)
{
*aResult = new nsGBKToUnicode();
return (*aResult == NULL)? NS_ERROR_OUT_OF_MEMORY : NS_OK;
}
//----------------------------------------------------------------------
// Subclassing of nsTablesDecoderSupport class [implementation]
NS_IMETHODIMP nsGBKToUnicode::GetMaxLength(const char * aSrc,
PRInt32 aSrcLength,
PRInt32 * aDestLength)
{
// we are a single byte to Unicode converter, so...
*aDestLength = aSrcLength;
return NS_OK_UDEC_EXACTLENGTH;
}
//Overwriting the ConvertNoBuff() in nsUCvCnSupport.cpp.
//side effects: all the helper functions called by UCvCnSupport are deprecated
void GBKToUnicode(DByte *pGBCode, PRUnichar * pUnicode)
{
short int iGBKToUnicodeIndex;
if(pGBCode)
iGBKToUnicodeIndex = ( (short int)(pGBCode->leftbyte) - 0x81)*0xbf +( (short int)(pGBCode->rightbyte) - 0x40);
if( (iGBKToUnicodeIndex >= 0 ) && ( iGBKToUnicodeIndex < MAX_GBK_LENGTH) )
*pUnicode = GBKToUnicodeTable[iGBKToUnicodeIndex];
}
NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc,
PRInt32 * aSrcLength,
PRUnichar *aDest,
PRInt32 * aDestLength)
{
short int i=0;
short int iSrcLength = (short int)(*aSrcLength);
DByte *pSrcDBCode = (DByte *)aSrc;
PRUnichar *pDestDBCode = (PRUnichar *)aDest;
int iDestlen = 0;
for (i=0;i<iSrcLength;i++)
{
pSrcDBCode = (DByte *)aSrc;
pDestDBCode = aDest;
if ( iDestlen >= (*aDestLength) )
{
break;
}
if ( *aSrc & 0x80 )
{
// The source is a GBCode
GBKToUnicode(pSrcDBCode, pDestDBCode);
aSrc += 2;
i++;
}
else
{
// The source is an ASCII
*pDestDBCode = (PRUnichar) ( ((char)(*aSrc) )& 0x00ff);
aSrc++;
}
iDestlen++;
aDest++;
*aSrcLength = i+1;
}
*aDestLength = iDestlen;
return NS_OK;
}

Просмотреть файл

@ -15,3 +15,49 @@
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
#ifndef nsGBKToUnicode_h___
#define nsGBKToUnicode_h___
#include "nsUCvCnSupport.h"
//----------------------------------------------------------------------
// Class nsGBKToUnicode [declaration]
/**
* A character set converter from GBK to Unicode.
*
*
* @created 07/Sept/1999
* @author Yueheng Xu, Yueheng.Xu@intel.com
*/
class nsGBKToUnicode : public nsMultiTableDecoderSupport
{
public:
/**
* Class constructor.
*/
nsGBKToUnicode();
/**
* Static class constructor.
*/
static nsresult CreateInstance(nsISupports **aResult);
protected:
//--------------------------------------------------------------------
// Subclassing of nsDecoderSupport class [declaration]
NS_IMETHOD ConvertNoBuff(const char* aSrc,
PRInt32 * aSrcLength,
PRUnichar *aDest,
PRInt32 * aDestLength);
NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength,
PRInt32 * aDestLength);
};
#endif /* nsGBKToUnicode_h___ */

Просмотреть файл

@ -15,3 +15,160 @@
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
/**
* A character set converter from Unicode to GBK.
*
*
* @created 08/Sept/1999
* @author Yueheng Xu, Yueheng.Xu@intel.com
*/
#include "nsUnicodeToGBK.h"
#include "nsUCvCnDll.h"
#define _GBKU_TABLE_ // to use a shared GBKU table
#include "gbku.h"
//----------------------------------------------------------------------
// Global functions and data [declaration]
static PRInt16 g_ASCIIShiftTable[] = {
0, u1ByteCharset,
ShiftCell(0,0,0,0,0,0,0,0)
};
static PRInt16 g_GB2312ShiftTable[] = {
0, u2BytesGRCharset,
ShiftCell(0,0,0,0,0,0,0,0)
};
static PRInt16 *g_GB2312ShiftTableSet [] = {
g_ASCIIShiftTable,
g_GB2312ShiftTable
};
static PRUint16 *g_GB2312MappingTableSet [] ={
g_AsciiMapping,
g_ufGB2312Mapping
};
//----------------------------------------------------------------------
// Class nsUnicodeToGBK [implementation]
nsUnicodeToGBK::nsUnicodeToGBK()
: nsMultiTableEncoderSupport(2,
(uShiftTable**) &g_GB2312ShiftTableSet,
(uMappingTable**) &g_GB2312MappingTableSet)
{
}
#define TRUE 1
#define FALSE 0
void UnicodeToGBK(PRUnichar SrcUnicode, DByte *pGBCode)
{
short int iRet = FALSE;
short int i = 0;
short int iGBKToUnicodeIndex = 0;
for ( i=0; i<MAX_GBK_LENGTH; i++)
{
if ( SrcUnicode == GBKToUnicodeTable[i] )
{
iGBKToUnicodeIndex = i;
iRet = TRUE;
break;
}
}
if ( iRet )
{
//convert from one dimensional index to (left, right) pair
if(pGBCode)
{
pGBCode->leftbyte = (char) ( iGBKToUnicodeIndex / 0x00BF + 0x0081) ;
pGBCode->leftbyte |= 0x80;
pGBCode->rightbyte = (char) ( iGBKToUnicodeIndex % 0x00BF+ 0x0040);
pGBCode->rightbyte |= 0x80;
}
}
}
NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff(const PRUnichar * aSrc,
PRInt32 * aSrcLength,
char * aDest,
PRInt32 * aDestLength)
{
PRInt32 i=0;
PRInt32 iSrcLength = *aSrcLength;
DByte *pDestDBCode;
DByte *pSrcDBCode;
PRInt32 iDestLength = 0;
PRUnichar *pSrc = (PRUnichar *)aSrc;
pDestDBCode = (DByte *)aDest;
for (i=0;i< iSrcLength;i++)
{
pDestDBCode = (DByte *)aDest;
if( (*pSrc) & 0xff00 )
{
// hi byte has something, it is not ASCII, must be a GB
UnicodeToGBK( *pSrc, pDestDBCode);
aDest += 2; // increment 2 bytes
pDestDBCode = (DByte *)aDest;
iDestLength +=2;
}
else
{
// this is an ASCII
pSrcDBCode = (DByte *)pSrc;
*aDest = pSrcDBCode->leftbyte;
aDest++; // increment 1 byte
iDestLength +=1;
}
pSrc++; // increment 2 bytes
if ( iDestLength >= (*aDestLength) )
{
break;
}
}
*aDestLength = iDestLength;
*aSrcLength = i;
return NS_OK;
}
nsresult nsUnicodeToGBK::CreateInstance(nsISupports ** aResult)
{
nsIUnicodeEncoder *p = new nsUnicodeToGBK();
if(p) {
*aResult = p;
return NS_OK;
}
return NS_ERROR_OUT_OF_MEMORY;
}
//----------------------------------------------------------------------
// Subclassing of nsTableEncoderSupport class [implementation]
NS_IMETHODIMP nsUnicodeToGBK::GetMaxLength(const PRUnichar * aSrc,
PRInt32 aSrcLength,
PRInt32 * aDestLength)
{
*aDestLength = 2 * aSrcLength;
return NS_OK;
}

Просмотреть файл

@ -15,3 +15,50 @@
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
/**
* A character set converter from Unicode to GBK.
*
*
* @created 08/Sept/1999
* @author Yueheng Xu, Yueheng.Xu@intel.com
*/
#ifndef nsUnicodeToGBK_h___
#define nsUnicodeToGBK_h___
#include "nsUCvCnSupport.h"
//----------------------------------------------------------------------
// Class nsUnicodeToGBK [declaration]
class nsUnicodeToGBK: public nsMultiTableEncoderSupport
{
public:
/**
* Class constructor.
*/
nsUnicodeToGBK();
/**
* Static class constructor.
*/
static nsresult CreateInstance(nsISupports **aResult);
protected:
//--------------------------------------------------------------------
// Subclassing of nsEncoderSupport class [declaration]
NS_IMETHOD ConvertNoBuff(const PRUnichar * aSrc,
PRInt32 * aSrcLength,
char * aDest,
PRInt32 * aDestLength);
NS_IMETHOD GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength,
PRInt32 * aDestLength);
};
#endif /* nsUnicodeToGBK_h___ */