#92806 need to support GB18030 in universal detector

r= ftang sr=blizzard@mozilla.org

This fix also added some comment for:
#33337 Universal Charset Autodetection for mozilla tree
The code is almost the same as in commercial tree, 3rd party detector is removed, and name has been changed by roy.

r=shanjian/ftang  sr=blizzard
This commit is contained in:
shanjian%netscape.com 2005-11-02 16:56:50 +00:00
Родитель 701bb6e3eb
Коммит 8083521d2e
3 изменённых файлов: 58 добавлений и 2 удалений

Просмотреть файл

@ -32,6 +32,7 @@ typedef enum {
#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
//state machine model
typedef struct
{
nsPkgInt classTable;
@ -48,12 +49,14 @@ public:
mModel = sm;
};
nsSMState NextState(char c){
//for each byte we get its class , if it is first byte, we also get byte length
PRUint32 byteCls = GETCLASS(c);
if (mCurrentState == eStart)
{
mCurrentBytePos = 0;
mCurrentCharLen = mModel->charLenTable[byteCls];
}
//from byte's class and stateTable, we get its next state
mCurrentState=(nsSMState)GETFROMPCK(mCurrentState*(mModel->classFactor)+byteCls,
mModel->stateTable);
mCurrentBytePos++;
@ -76,7 +79,7 @@ extern SMModel Big5SMModel;
extern SMModel EUCJPSMModel;
extern SMModel EUCKRSMModel;
extern SMModel EUCTWSMModel;
extern SMModel GB2312SMModel;
extern SMModel GB18030SMModel;
extern SMModel SJISSMModel;
extern SMModel UCS2BESMModel;

Просмотреть файл

@ -29,7 +29,7 @@
class nsGB2312Prober: public nsCharSetProber {
public:
nsGB2312Prober(void){mCodingSM = new nsCodingStateMachine(&GB2312SMModel);
nsGB2312Prober(void){mCodingSM = new nsCodingStateMachine(&GB18030SMModel);
Reset();};
virtual ~nsGB2312Prober(void){delete mCodingSM;};
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);

Просмотреть файл

@ -245,6 +245,7 @@ SMModel EUCTWSMModel = {
"x-euc-tw",
};
/* obsolete GB2312 by gb18030
static PRUint32 GB2312_cls [ 256 / 8 ] = {
//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
@ -296,7 +297,59 @@ SMModel GB2312SMModel = {
GB2312CharLenTable,
"GB2312",
};
*/
static PRUint32 GB18030_cls [ 256 / 8 ] = {
//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
PCK4BITS(3,3,3,3,3,3,3,3), // 40 - 47
PCK4BITS(3,3,3,3,3,3,3,3), // 48 - 4f
PCK4BITS(3,3,3,3,3,3,3,3), // 50 - 57
PCK4BITS(3,3,3,3,3,3,3,3), // 58 - 5f
PCK4BITS(3,3,3,3,3,3,3,3), // 60 - 67
PCK4BITS(3,3,3,3,3,3,3,3), // 68 - 6f
PCK4BITS(3,3,3,3,3,3,3,3), // 70 - 77
PCK4BITS(3,3,3,3,3,3,3,1), // 78 - 7f
PCK4BITS(3,2,2,2,2,2,2,2), // 80 - 87
PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f
PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97
PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f
PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff
};
static PRUint32 GB18030_st [ 2] = {
PCK4BITS(eError,eStart, 3,eStart,eError,eError,eError,eError),//00-07
PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
};
static PRUint32 GB18030CharLenTable[] = {0, 1, 2, 0};
SMModel GB18030SMModel = {
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls },
4,
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st },
GB18030CharLenTable,
"GB18030",
};
// sjis