зеркало из https://github.com/mozilla/pjs.git
check in GBK converter for Xu, Yueheng <yueheng.xu@intel.com
This commit is contained in:
Родитель
f7c03ea9f8
Коммит
4860f5dce0
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,62 @@
|
|||
// =======================================================================
|
||||
// Original Author: Yueheng Xu
|
||||
// email: yueheng.xu@intel.com
|
||||
// phone: (503)264-2248
|
||||
// Intel Corporation, Oregon, USA
|
||||
// Last Update: September 7, 1999
|
||||
// Revision History:
|
||||
// 09/07/1999 - initial version.
|
||||
// =======================================================================
|
||||
// This table maps the GBK code to its unicode.
|
||||
// The mapping data of this GBK table is obtained from
|
||||
// ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT
|
||||
// Frank Tang of Netscape wrote the original perl tool to re-align the
|
||||
// mapping data into an 8-item per line format ( i.e. file cp936map.txt ).
|
||||
//
|
||||
// The valid GBK charset range: left byte is [0x81, 0xfe], right byte are
|
||||
// [0x40, 0x7e] and [0x80, 0xfe]. But for the convenience of index
|
||||
// calculation, the table here has a single consecutive range of
|
||||
// [0x40, 0xfe] for the right byte. Those invalid chars whose right byte
|
||||
// is 0x7f will be mapped to undefined unicode 0xFFFF.
|
||||
//
|
||||
|
||||
#ifdef _GBKU_TABLE_
|
||||
|
||||
#define GB_UNDEFINED 0xFFFF
|
||||
#define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char leftbyte;
|
||||
char rightbyte;
|
||||
|
||||
} DByte;
|
||||
extern PRUnichar GBKToUnicodeTable[MAX_GBK_LENGTH];
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#define _GBKU_TABLE_
|
||||
|
||||
|
||||
#define GB_UNDEFINED 0xFFFF
|
||||
#define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char leftbyte;
|
||||
char rightbyte;
|
||||
|
||||
} DByte;
|
||||
|
||||
|
||||
|
||||
PRUnichar GBKToUnicodeTable[MAX_GBK_LENGTH] =
|
||||
{
|
||||
#include "cp936map.h"
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* ifdef _GBKU_TABLE_ */
|
||||
|
|
@ -15,3 +15,141 @@
|
|||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
/**
|
||||
* A character set converter from GBK to Unicode.
|
||||
*
|
||||
*
|
||||
* @created 07/Sept/1999
|
||||
* @author Yueheng Xu, Yueheng.Xu@intel.com
|
||||
*/
|
||||
|
||||
#include "nsGBKToUnicode.h"
|
||||
#include "nsUCvCnDll.h"
|
||||
|
||||
#include "gbku.h"
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Global functions and data [declaration]
|
||||
|
||||
static PRInt16 g_ASCIIShiftTable[] = {
|
||||
0, u1ByteCharset,
|
||||
ShiftCell(0,0,0,0,0,0,0,0)
|
||||
};
|
||||
|
||||
static PRInt16 g_GB2312ShiftTable[] = {
|
||||
0, u2BytesGRCharset,
|
||||
ShiftCell(0,0,0,0,0,0,0,0)
|
||||
};
|
||||
|
||||
static PRInt16 *g_GB2312ShiftTableSet [] = {
|
||||
g_ASCIIShiftTable,
|
||||
g_GB2312ShiftTable
|
||||
};
|
||||
|
||||
static PRUint16 *g_GB2312MappingTableSet [] ={
|
||||
g_AsciiMapping,
|
||||
g_utGB2312Mapping
|
||||
};
|
||||
|
||||
static uRange g_GB2312Ranges[] = {
|
||||
{ 0x00, 0x7E },
|
||||
{ 0xA1, 0xFE }
|
||||
};
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Class nsGB2312ToUnicode [implementation]
|
||||
|
||||
nsGBKToUnicode::nsGBKToUnicode()
|
||||
: nsMultiTableDecoderSupport(2,
|
||||
(uRange *) &g_GB2312Ranges,
|
||||
(uShiftTable**) &g_GB2312ShiftTableSet,
|
||||
(uMappingTable**) &g_GB2312MappingTableSet)
|
||||
{
|
||||
}
|
||||
|
||||
nsresult nsGBKToUnicode::CreateInstance(nsISupports ** aResult)
|
||||
{
|
||||
*aResult = new nsGBKToUnicode();
|
||||
return (*aResult == NULL)? NS_ERROR_OUT_OF_MEMORY : NS_OK;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Subclassing of nsTablesDecoderSupport class [implementation]
|
||||
|
||||
NS_IMETHODIMP nsGBKToUnicode::GetMaxLength(const char * aSrc,
|
||||
PRInt32 aSrcLength,
|
||||
PRInt32 * aDestLength)
|
||||
{
|
||||
// we are a single byte to Unicode converter, so...
|
||||
*aDestLength = aSrcLength;
|
||||
return NS_OK_UDEC_EXACTLENGTH;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//Overwriting the ConvertNoBuff() in nsUCvCnSupport.cpp.
|
||||
//side effects: all the helper functions called by UCvCnSupport are deprecated
|
||||
|
||||
void GBKToUnicode(DByte *pGBCode, PRUnichar * pUnicode)
|
||||
{
|
||||
short int iGBKToUnicodeIndex;
|
||||
|
||||
if(pGBCode)
|
||||
iGBKToUnicodeIndex = ( (short int)(pGBCode->leftbyte) - 0x81)*0xbf +( (short int)(pGBCode->rightbyte) - 0x40);
|
||||
|
||||
if( (iGBKToUnicodeIndex >= 0 ) && ( iGBKToUnicodeIndex < MAX_GBK_LENGTH) )
|
||||
*pUnicode = GBKToUnicodeTable[iGBKToUnicodeIndex];
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc,
|
||||
PRInt32 * aSrcLength,
|
||||
PRUnichar *aDest,
|
||||
PRInt32 * aDestLength)
|
||||
{
|
||||
|
||||
short int i=0;
|
||||
short int iSrcLength = (short int)(*aSrcLength);
|
||||
DByte *pSrcDBCode = (DByte *)aSrc;
|
||||
PRUnichar *pDestDBCode = (PRUnichar *)aDest;
|
||||
int iDestlen = 0;
|
||||
|
||||
|
||||
for (i=0;i<iSrcLength;i++)
|
||||
{
|
||||
pSrcDBCode = (DByte *)aSrc;
|
||||
pDestDBCode = aDest;
|
||||
|
||||
if ( iDestlen >= (*aDestLength) )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if ( *aSrc & 0x80 )
|
||||
{
|
||||
// The source is a GBCode
|
||||
GBKToUnicode(pSrcDBCode, pDestDBCode);
|
||||
aSrc += 2;
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// The source is an ASCII
|
||||
*pDestDBCode = (PRUnichar) ( ((char)(*aSrc) )& 0x00ff);
|
||||
aSrc++;
|
||||
}
|
||||
|
||||
iDestlen++;
|
||||
aDest++;
|
||||
*aSrcLength = i+1;
|
||||
}
|
||||
|
||||
*aDestLength = iDestlen;
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -15,3 +15,49 @@
|
|||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
#ifndef nsGBKToUnicode_h___
|
||||
#define nsGBKToUnicode_h___
|
||||
|
||||
#include "nsUCvCnSupport.h"
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Class nsGBKToUnicode [declaration]
|
||||
|
||||
/**
|
||||
* A character set converter from GBK to Unicode.
|
||||
*
|
||||
*
|
||||
* @created 07/Sept/1999
|
||||
* @author Yueheng Xu, Yueheng.Xu@intel.com
|
||||
*/
|
||||
class nsGBKToUnicode : public nsMultiTableDecoderSupport
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Class constructor.
|
||||
*/
|
||||
nsGBKToUnicode();
|
||||
|
||||
/**
|
||||
* Static class constructor.
|
||||
*/
|
||||
static nsresult CreateInstance(nsISupports **aResult);
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
// Subclassing of nsDecoderSupport class [declaration]
|
||||
NS_IMETHOD ConvertNoBuff(const char* aSrc,
|
||||
PRInt32 * aSrcLength,
|
||||
PRUnichar *aDest,
|
||||
PRInt32 * aDestLength);
|
||||
|
||||
NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength,
|
||||
PRInt32 * aDestLength);
|
||||
|
||||
};
|
||||
|
||||
#endif /* nsGBKToUnicode_h___ */
|
||||
|
||||
|
|
|
@ -15,3 +15,160 @@
|
|||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
/**
|
||||
* A character set converter from Unicode to GBK.
|
||||
*
|
||||
*
|
||||
* @created 08/Sept/1999
|
||||
* @author Yueheng Xu, Yueheng.Xu@intel.com
|
||||
*/
|
||||
|
||||
#include "nsUnicodeToGBK.h"
|
||||
#include "nsUCvCnDll.h"
|
||||
|
||||
|
||||
#define _GBKU_TABLE_ // to use a shared GBKU table
|
||||
#include "gbku.h"
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Global functions and data [declaration]
|
||||
|
||||
|
||||
static PRInt16 g_ASCIIShiftTable[] = {
|
||||
0, u1ByteCharset,
|
||||
ShiftCell(0,0,0,0,0,0,0,0)
|
||||
};
|
||||
|
||||
static PRInt16 g_GB2312ShiftTable[] = {
|
||||
0, u2BytesGRCharset,
|
||||
ShiftCell(0,0,0,0,0,0,0,0)
|
||||
};
|
||||
|
||||
static PRInt16 *g_GB2312ShiftTableSet [] = {
|
||||
g_ASCIIShiftTable,
|
||||
g_GB2312ShiftTable
|
||||
};
|
||||
|
||||
static PRUint16 *g_GB2312MappingTableSet [] ={
|
||||
g_AsciiMapping,
|
||||
g_ufGB2312Mapping
|
||||
};
|
||||
//----------------------------------------------------------------------
|
||||
// Class nsUnicodeToGBK [implementation]
|
||||
|
||||
nsUnicodeToGBK::nsUnicodeToGBK()
|
||||
: nsMultiTableEncoderSupport(2,
|
||||
(uShiftTable**) &g_GB2312ShiftTableSet,
|
||||
(uMappingTable**) &g_GB2312MappingTableSet)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
|
||||
void UnicodeToGBK(PRUnichar SrcUnicode, DByte *pGBCode)
|
||||
{
|
||||
short int iRet = FALSE;
|
||||
short int i = 0;
|
||||
short int iGBKToUnicodeIndex = 0;
|
||||
|
||||
|
||||
for ( i=0; i<MAX_GBK_LENGTH; i++)
|
||||
{
|
||||
if ( SrcUnicode == GBKToUnicodeTable[i] )
|
||||
{
|
||||
iGBKToUnicodeIndex = i;
|
||||
iRet = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( iRet )
|
||||
{
|
||||
//convert from one dimensional index to (left, right) pair
|
||||
if(pGBCode)
|
||||
{
|
||||
pGBCode->leftbyte = (char) ( iGBKToUnicodeIndex / 0x00BF + 0x0081) ;
|
||||
pGBCode->leftbyte |= 0x80;
|
||||
pGBCode->rightbyte = (char) ( iGBKToUnicodeIndex % 0x00BF+ 0x0040);
|
||||
pGBCode->rightbyte |= 0x80;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff(const PRUnichar * aSrc,
|
||||
PRInt32 * aSrcLength,
|
||||
char * aDest,
|
||||
PRInt32 * aDestLength)
|
||||
{
|
||||
PRInt32 i=0;
|
||||
PRInt32 iSrcLength = *aSrcLength;
|
||||
DByte *pDestDBCode;
|
||||
DByte *pSrcDBCode;
|
||||
PRInt32 iDestLength = 0;
|
||||
|
||||
PRUnichar *pSrc = (PRUnichar *)aSrc;
|
||||
|
||||
pDestDBCode = (DByte *)aDest;
|
||||
|
||||
for (i=0;i< iSrcLength;i++)
|
||||
{
|
||||
pDestDBCode = (DByte *)aDest;
|
||||
|
||||
if( (*pSrc) & 0xff00 )
|
||||
{
|
||||
// hi byte has something, it is not ASCII, must be a GB
|
||||
|
||||
UnicodeToGBK( *pSrc, pDestDBCode);
|
||||
aDest += 2; // increment 2 bytes
|
||||
pDestDBCode = (DByte *)aDest;
|
||||
iDestLength +=2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// this is an ASCII
|
||||
pSrcDBCode = (DByte *)pSrc;
|
||||
*aDest = pSrcDBCode->leftbyte;
|
||||
aDest++; // increment 1 byte
|
||||
iDestLength +=1;
|
||||
}
|
||||
pSrc++; // increment 2 bytes
|
||||
|
||||
if ( iDestLength >= (*aDestLength) )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*aDestLength = iDestLength;
|
||||
*aSrcLength = i;
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
||||
nsresult nsUnicodeToGBK::CreateInstance(nsISupports ** aResult)
|
||||
{
|
||||
nsIUnicodeEncoder *p = new nsUnicodeToGBK();
|
||||
if(p) {
|
||||
*aResult = p;
|
||||
return NS_OK;
|
||||
}
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Subclassing of nsTableEncoderSupport class [implementation]
|
||||
|
||||
NS_IMETHODIMP nsUnicodeToGBK::GetMaxLength(const PRUnichar * aSrc,
|
||||
PRInt32 aSrcLength,
|
||||
PRInt32 * aDestLength)
|
||||
{
|
||||
*aDestLength = 2 * aSrcLength;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -15,3 +15,50 @@
|
|||
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
||||
* Reserved.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A character set converter from Unicode to GBK.
|
||||
*
|
||||
*
|
||||
* @created 08/Sept/1999
|
||||
* @author Yueheng Xu, Yueheng.Xu@intel.com
|
||||
*/
|
||||
|
||||
#ifndef nsUnicodeToGBK_h___
|
||||
#define nsUnicodeToGBK_h___
|
||||
|
||||
#include "nsUCvCnSupport.h"
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Class nsUnicodeToGBK [declaration]
|
||||
|
||||
class nsUnicodeToGBK: public nsMultiTableEncoderSupport
|
||||
{
|
||||
public:
|
||||
|
||||
/**
|
||||
* Class constructor.
|
||||
*/
|
||||
nsUnicodeToGBK();
|
||||
|
||||
/**
|
||||
* Static class constructor.
|
||||
*/
|
||||
static nsresult CreateInstance(nsISupports **aResult);
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
// Subclassing of nsEncoderSupport class [declaration]
|
||||
NS_IMETHOD ConvertNoBuff(const PRUnichar * aSrc,
|
||||
PRInt32 * aSrcLength,
|
||||
char * aDest,
|
||||
PRInt32 * aDestLength);
|
||||
|
||||
NS_IMETHOD GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength,
|
||||
PRInt32 * aDestLength);
|
||||
};
|
||||
|
||||
#endif /* nsUnicodeToGBK_h___ */
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче