fixed #39640 (regression of UnicodeToGB2312GL converter) and #34626( optimization of table initialization. Do it only when it is needed at first time). r=ftang, a=waterson.

This commit is contained in:
yueheng.xu%intel.com 2000-06-02 22:18:42 +00:00
Родитель 3a0ba0cd8e
Коммит 76710f74ed
4 изменённых файлов: 109 добавлений и 76 удалений

Просмотреть файл

@ -33,6 +33,7 @@
// 04/10/1999 - changed leftbyte. rightbyte to PRUint8 in struct DByte;
// added table UnicodeToGBKTable[0x5200]
//
// 05/16/2000 - added gUnicodeToGBKTableInitialized flag for optimization
// ======================================================================================
// Table GBKToUnicode[] maps the GBK code to its unicode.
// The mapping data of this GBK table is obtained from
@ -68,9 +69,11 @@ typedef struct
extern PRUnichar GBKToUnicodeTable[MAX_GBK_LENGTH];
#ifdef _UNICODE_TO_GBK_ENCODER_
DByte UnicodeToGBKTable[0x5200];
DByte UnicodeToGBKTable[0x5200];
PRUint16 gUnicodeToGBKTableInitialized = PR_FALSE; // default to not initialized yet
#else
extern DByte UnicodeToGBKTable[0x5200]; // 0xA000 - 0x4E00 = 0x5200
extern DByte UnicodeToGBKTable[0x5200]; // 0xA000 - 0x4E00 = 0x5200
extern PRUint16 gUnicodeToGBKTableInitialized;
#endif
#else
@ -97,11 +100,12 @@ PRUnichar GBKToUnicodeTable[MAX_GBK_LENGTH] =
#ifdef _UNICODE_TO_GBK_ENCODER_
DByte UnicodeToGBKTable[0x5200];
DByte UnicodeToGBKTable[0x5200];
PRUint16 gUnicodeToGBKTableInitialized = PR_FALSE; // default to not initialized yet
#else
extern DByte UnicodeToGBKTable[0x5200]; // 0xA000 - 0x4E00 = 0x5200
extern DByte UnicodeToGBKTable[0x5200]; // 0xA000 - 0x4E00 = 0x5200
extern PRUint16 gUnicodeToGBKTableInitialized;
#endif
#endif /* ifdef _GBKU_TABLE_ */

Просмотреть файл

@ -40,23 +40,27 @@ nsUnicodeToGB2312GL::nsUnicodeToGB2312GL()
PRUnichar unicode;
PRUint16 i;
for ( i=0; i<MAX_GBK_LENGTH; i++ )
if ( !gUnicodeToGBKTableInitialized )
{
left = ( i / 0x00BF + 0x0081);
right = ( i % 0x00BF+ 0x0040);
unicode = GBKToUnicodeTable[i];
// to reduce size of UnicodeToGBKTable, we only do direct unicode to GB
// table mapping between unicode 0x4E00 and 0xA000. Others by searching
// GBKToUnicodeTable. There is a trade off between memory usage and speed.
if ( (unicode >= 0x4E00 ) && ( unicode <= 0xA000 ))
for ( i=0; i<MAX_GBK_LENGTH; i++ )
{
unicode -= 0x4E00;
UnicodeToGBKTable[unicode].leftbyte = left;
UnicodeToGBKTable[unicode].rightbyte = right;
left = ( i / 0x00BF + 0x0081);
right = ( i % 0x00BF+ 0x0040);
unicode = GBKToUnicodeTable[i];
// to reduce size of UnicodeToGBKTable, we only do direct unicode to GB
// table mapping between unicode 0x4E00 and 0xA000. Others by searching
// GBKToUnicodeTable. There is a trade off between memory usage and speed.
if ( (unicode >= 0x4E00 ) && ( unicode <= 0xA000 ))
{
unicode -= 0x4E00;
UnicodeToGBKTable[unicode].leftbyte = left;
UnicodeToGBKTable[unicode].rightbyte = right;
}
}
}
gUnicodeToGBKTableInitialized = PR_TRUE;
}
}
NS_IMETHODIMP nsUnicodeToGB2312GL::ConvertNoBuff(const PRUnichar * aSrc,
@ -67,15 +71,16 @@ NS_IMETHODIMP nsUnicodeToGB2312GL::ConvertNoBuff(const PRUnichar * aSrc,
PRInt32 i=0;
PRInt32 iSrcLength = 0;
DByte *pDestDBCode;
DByte *pSrcDBCode;
PRInt32 iDestLength = 0;
PRUnichar unicode;
PRUint8 left, right;
nsresult res = NS_OK;
PRUnichar *pSrc = (PRUnichar *)aSrc;
pDestDBCode = (DByte *)aDest;
while( iSrcLength < *aSrcLength )
while( iSrcLength < *aSrcLength)
{
pDestDBCode = (DByte *)aDest;
@ -110,8 +115,8 @@ NS_IMETHODIMP nsUnicodeToGB2312GL::ConvertNoBuff(const PRUnichar * aSrc,
// UnicodeToGBK( *pSrc, pDestDBCode);
aDest += 2; // increment 2 bytes
pDestDBCode = (DByte *)aDest;
iDestLength +=2;
iSrcLength +=2;
iDestLength +=2; // Dest Length in units of chars, each GB char counts as two in string length
iSrcLength++ ; // Each unicode char just count as one in PRUnichar string;
pSrc++; // increment 2 bytes
}
else
@ -121,7 +126,7 @@ NS_IMETHODIMP nsUnicodeToGB2312GL::ConvertNoBuff(const PRUnichar * aSrc,
}
// if dest buffer not big enough, handles it here.
if ( (iDestLength >= *aDestLength) && (iSrcLength > 0) )
if ( (iDestLength >= *aDestLength) && ( iSrcLength < *aSrcLength) )
{
res = NS_OK_UENC_MOREOUTPUT;
break;
@ -172,18 +177,18 @@ NS_IMETHODIMP nsUnicodeToGB2312GL::FillInfo(PRUint32 *aInfo)
for ( i=0x0081;i<0x00FF;i++)
{
// HZ and GB2312 starts at row 0x21|0x80 = 0xA1
if ( i < 0xA1 )
if ( i < 0xA1 )
continue;
// valid GBK columns are in 0x41 to 0xFE
for( j=0x0041;j<0x00FF;j++)
for( j=0x0040;j<0x00FF;j++)
{
//HZ and GB2312 starts at col 0x21 | 0x80 = 0xA1
if ( j < 0xA1 )
continue;
// k is index in GBKU.H table
k = (i - 0x0081)*(0x00FE - 0x0080)+(j-0x0041);
k = (i - 0x0081)*0x00BF +(j-0x0040);
SrcUnicode = GBKToUnicodeTable[k];
if (( SrcUnicode != 0xFFFF ) && (SrcUnicode != 0xFFFD) )

Просмотреть файл

@ -18,6 +18,7 @@
* Rights Reserved.
*
* Contributor(s):
* Yueheng Xu, yueheng.xu@intel.com
*/
#include "nsUnicodeToGB2312V2.h"
@ -38,23 +39,27 @@ nsUnicodeToGB2312V2::nsUnicodeToGB2312V2()
PRUnichar unicode;
PRUint16 i;
for ( i=0; i<MAX_GBK_LENGTH; i++ )
if ( !gUnicodeToGBKTableInitialized )
{
left = ( i / 0x00BF + 0x0081);
right = ( i % 0x00BF+ 0x0040);
unicode = GBKToUnicodeTable[i];
// to reduce size of UnicodeToGBKTable, we only do direct unicode to GB
// table mapping between unicode 0x4E00 and 0xA000. Others by searching
// GBKToUnicodeTable. There is a trade off between memory usage and speed.
if ( (unicode >= 0x4E00 ) && ( unicode <= 0xA000 ))
for ( i=0; i<MAX_GBK_LENGTH; i++ )
{
unicode -= 0x4E00;
UnicodeToGBKTable[unicode].leftbyte = left;
UnicodeToGBKTable[unicode].rightbyte = right;
}
}
left = ( i / 0x00BF + 0x0081);
right = ( i % 0x00BF+ 0x0040);
unicode = GBKToUnicodeTable[i];
// to reduce size of UnicodeToGBKTable, we only do direct unicode to GB
// table mapping between unicode 0x4E00 and 0xA000. Others by searching
// GBKToUnicodeTable. There is a trade off between memory usage and speed.
if ( (unicode >= 0x4E00 ) && ( unicode <= 0xA000 ))
{
unicode -= 0x4E00;
UnicodeToGBKTable[unicode].leftbyte = left;
UnicodeToGBKTable[unicode].rightbyte = right;
}
}
gUnicodeToGBKTableInitialized = PR_TRUE;
}
}
NS_IMETHODIMP nsUnicodeToGB2312V2::ConvertNoBuff(const PRUnichar * aSrc,
@ -63,17 +68,17 @@ NS_IMETHODIMP nsUnicodeToGB2312V2::ConvertNoBuff(const PRUnichar * aSrc,
PRInt32 * aDestLength)
{
PRInt32 i=0;
PRInt32 iSrcLength = *aSrcLength;
PRInt32 iSrcLength = 0;
DByte *pDestDBCode;
DByte *pSrcDBCode;
PRInt32 iDestLength = 0;
PRUnichar unicode;
PRUint8 left, right;
nsresult res = NS_OK;
PRUnichar *pSrc = (PRUnichar *)aSrc;
pDestDBCode = (DByte *)aDest;
for (i=0;i< iSrcLength;i++)
while (iSrcLength < *aSrcLength)
{
pDestDBCode = (DByte *)aDest;
@ -97,7 +102,7 @@ NS_IMETHODIMP nsUnicodeToGB2312V2::ConvertNoBuff(const PRUnichar * aSrc,
if ( unicode == GBKToUnicodeTable[i] )
{
//this manipulation handles the little endian / big endian issues
left = (char) ( i / 0x00BF + 0x0081) | 0x80 ;
left = (char) ( i / 0x00BF + 0x0081) | 0x80;
right = (char) ( i % 0x00BF+ 0x0040) | 0x80;
pDestDBCode->leftbyte = left;
pDestDBCode->rightbyte = right;
@ -108,7 +113,7 @@ NS_IMETHODIMP nsUnicodeToGB2312V2::ConvertNoBuff(const PRUnichar * aSrc,
// UnicodeToGBK( *pSrc, pDestDBCode);
aDest += 2; // increment 2 bytes
pDestDBCode = (DByte *)aDest;
iDestLength +=2;
iDestLength +=2; // each GB char count as two in char* string
}
else
{
@ -118,18 +123,20 @@ NS_IMETHODIMP nsUnicodeToGB2312V2::ConvertNoBuff(const PRUnichar * aSrc,
aDest++; // increment 1 byte
iDestLength +=1;
}
iSrcLength++ ; // each unicode char just count as one in PRUnichar* string
pSrc++; // increment 2 bytes
if ( iDestLength >= (*aDestLength) )
if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength ))
{
res = NS_OK_UENC_MOREOUTPUT;
break;
}
}
*aDestLength = iDestLength;
*aSrcLength = i;
*aSrcLength = iSrcLength;
return NS_OK;
return res;
}
@ -174,14 +181,14 @@ NS_IMETHODIMP nsUnicodeToGB2312V2::FillInfo(PRUint32 *aInfo)
continue;
// valid GBK columns are in 0x41 to 0xFE
for( j=0x0041;j<0x00FF;j++)
for( j=0x0040;j<0x00FF;j++)
{
//HZ and GB2312 starts at col 0x21 | 0x80 = 0xA1
if ( j < 0xA1 )
continue;
// k is index in GBKU.H table
k = (i - 0x0081)*(0x00FE - 0x0080)+(j-0x0041);
k = (i - 0x0081)*0x00BF+(j-0x0040);
SrcUnicode = GBKToUnicodeTable[k];
if (( SrcUnicode != 0xFFFF ) && (SrcUnicode != 0xFFFD) )
@ -190,6 +197,11 @@ NS_IMETHODIMP nsUnicodeToGB2312V2::FillInfo(PRUint32 *aInfo)
}
}
}
//GB2312 font lib also have single byte ASCII characters, set them here
for ( SrcUnicode = 0x0000; SrcUnicode <= 0x00FF; SrcUnicode++);
{
SET_REPRESENTABLE(aInfo, SrcUnicode);
}
return NS_OK;
}

Просмотреть файл

@ -49,23 +49,27 @@ nsUnicodeToGBK::nsUnicodeToGBK()
PRUnichar unicode;
PRUnichar i;
for ( i=0; i<MAX_GBK_LENGTH; i++ )
{
left = ( i / 0x00BF + 0x0081);
right = ( i % 0x00BF+ 0x0040);
unicode = GBKToUnicodeTable[i];
// to reduce size of UnicodeToGBKTable, we only do direct unicode to GB
// table mapping between unicode 0x4E00 and 0xA000. Others by searching
// GBKToUnicodeTable. There is a trade off between memory usage and speed.
if ( (unicode >= 0x4E00 ) && ( unicode <= 0xA000 ))
if ( !gUnicodeToGBKTableInitialized )
{
for ( i=0; i<MAX_GBK_LENGTH; i++ )
{
unicode -= 0x4E00;
UnicodeToGBKTable[unicode].leftbyte = left;
UnicodeToGBKTable[unicode].rightbyte = right;
left = ( i / 0x00BF + 0x0081);
right = ( i % 0x00BF+ 0x0040);
unicode = GBKToUnicodeTable[i];
// to reduce size of UnicodeToGBKTable, we only do direct unicode to GB
// table mapping between unicode 0x4E00 and 0xA000. Others by searching
// GBKToUnicodeTable. There is a trade off between memory usage and speed.
if ( (unicode >= 0x4E00 ) && ( unicode <= 0xA000 ))
{
unicode -= 0x4E00;
UnicodeToGBKTable[unicode].leftbyte = left;
UnicodeToGBKTable[unicode].rightbyte = right;
}
}
}
gUnicodeToGBKTableInitialized = PR_TRUE;
}
}
@ -76,17 +80,17 @@ NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff(const PRUnichar * aSrc,
{
PRInt32 i=0;
PRInt32 iSrcLength = *aSrcLength;
PRInt32 iSrcLength = 0;
DByte *pDestDBCode;
DByte *pSrcDBCode;
PRInt32 iDestLength = 0;
PRUnichar unicode;
PRUint8 left, right;
nsresult res = NS_OK;
PRUnichar *pSrc = (PRUnichar *)aSrc;
pDestDBCode = (DByte *)aDest;
for (i=0;i< iSrcLength;i++)
while (iSrcLength < *aSrcLength )
{
pDestDBCode = (DByte *)aDest;
@ -132,18 +136,21 @@ NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff(const PRUnichar * aSrc,
aDest++; // increment 1 byte
iDestLength +=1;
}
iSrcLength++ ; // Each unicode char just count as one in PRUnichar string;
pSrc++; // increment 2 bytes
if ( iDestLength >= (*aDestLength) )
if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength) )
{
res = NS_OK_UENC_MOREOUTPUT;
break;
}
}
*aDestLength = iDestLength;
*aSrcLength = i;
*aSrcLength = iSrcLength;
return NS_OK;
return res;
}
@ -188,7 +195,7 @@ NS_IMETHODIMP nsUnicodeToGBK::FillInfo(PRUint32 *aInfo)
for( j=0x0040; j<0x00FF; j++)
{
// k is index in GBKU.H table
k = (i - 0x0081)*(0xFE - 0x0080)+(j-0x0040);
k = (i - 0x0081)*0x00BF +(j-0x0040);
SrcUnicode = GBKToUnicodeTable[k];
if (( SrcUnicode != 0xFFFF ) && (SrcUnicode != 0xFFFD) )
{
@ -196,7 +203,12 @@ NS_IMETHODIMP nsUnicodeToGBK::FillInfo(PRUint32 *aInfo)
}
}
}
//GBK font lib also have single byte ASCII characters, set them here
for ( SrcUnicode = 0x0000; SrcUnicode <= 0x00FF; SrcUnicode++);
{
SET_REPRESENTABLE(aInfo, SrcUnicode);
}
return NS_OK;
}