#92806 need to support GB18030 in universal detector
r= ftang sr=blizzard@mozilla.org This fix also added some comment for: #33337 Universal Charset Autodetection for mozilla tree The code is almost the same as in commercial tree, 3rd party detector is removed, and name has been changed by roy. r=shanjian/ftang sr=blizzard
This commit is contained in:
Родитель
701bb6e3eb
Коммит
8083521d2e
|
@ -32,6 +32,7 @@ typedef enum {
|
|||
|
||||
#define GETCLASS(c) GETFROMPCK(((unsigned char)(c)), mModel->classTable)
|
||||
|
||||
//state machine model
|
||||
typedef struct
|
||||
{
|
||||
nsPkgInt classTable;
|
||||
|
@ -48,12 +49,14 @@ public:
|
|||
mModel = sm;
|
||||
};
|
||||
nsSMState NextState(char c){
|
||||
//for each byte we get its class , if it is first byte, we also get byte length
|
||||
PRUint32 byteCls = GETCLASS(c);
|
||||
if (mCurrentState == eStart)
|
||||
{
|
||||
mCurrentBytePos = 0;
|
||||
mCurrentCharLen = mModel->charLenTable[byteCls];
|
||||
}
|
||||
//from byte's class and stateTable, we get its next state
|
||||
mCurrentState=(nsSMState)GETFROMPCK(mCurrentState*(mModel->classFactor)+byteCls,
|
||||
mModel->stateTable);
|
||||
mCurrentBytePos++;
|
||||
|
@ -76,7 +79,7 @@ extern SMModel Big5SMModel;
|
|||
extern SMModel EUCJPSMModel;
|
||||
extern SMModel EUCKRSMModel;
|
||||
extern SMModel EUCTWSMModel;
|
||||
extern SMModel GB2312SMModel;
|
||||
extern SMModel GB18030SMModel;
|
||||
extern SMModel SJISSMModel;
|
||||
extern SMModel UCS2BESMModel;
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
|
||||
class nsGB2312Prober: public nsCharSetProber {
|
||||
public:
|
||||
nsGB2312Prober(void){mCodingSM = new nsCodingStateMachine(&GB2312SMModel);
|
||||
nsGB2312Prober(void){mCodingSM = new nsCodingStateMachine(&GB18030SMModel);
|
||||
Reset();};
|
||||
virtual ~nsGB2312Prober(void){delete mCodingSM;};
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
|
|
|
@ -245,6 +245,7 @@ SMModel EUCTWSMModel = {
|
|||
"x-euc-tw",
|
||||
};
|
||||
|
||||
/* obsolete GB2312 by gb18030
|
||||
static PRUint32 GB2312_cls [ 256 / 8 ] = {
|
||||
//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
|
||||
|
@ -296,7 +297,59 @@ SMModel GB2312SMModel = {
|
|||
GB2312CharLenTable,
|
||||
"GB2312",
|
||||
};
|
||||
*/
|
||||
|
||||
static PRUint32 GB18030_cls [ 256 / 8 ] = {
|
||||
//PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
|
||||
PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
|
||||
PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
|
||||
PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
|
||||
PCK4BITS(3,3,3,3,3,3,3,3), // 40 - 47
|
||||
PCK4BITS(3,3,3,3,3,3,3,3), // 48 - 4f
|
||||
PCK4BITS(3,3,3,3,3,3,3,3), // 50 - 57
|
||||
PCK4BITS(3,3,3,3,3,3,3,3), // 58 - 5f
|
||||
PCK4BITS(3,3,3,3,3,3,3,3), // 60 - 67
|
||||
PCK4BITS(3,3,3,3,3,3,3,3), // 68 - 6f
|
||||
PCK4BITS(3,3,3,3,3,3,3,3), // 70 - 77
|
||||
PCK4BITS(3,3,3,3,3,3,3,1), // 78 - 7f
|
||||
PCK4BITS(3,2,2,2,2,2,2,2), // 80 - 87
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
|
||||
PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
|
||||
PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff
|
||||
};
|
||||
|
||||
|
||||
static PRUint32 GB18030_st [ 2] = {
|
||||
PCK4BITS(eError,eStart, 3,eStart,eError,eError,eError,eError),//00-07
|
||||
PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
|
||||
};
|
||||
|
||||
static PRUint32 GB18030CharLenTable[] = {0, 1, 2, 0};
|
||||
|
||||
SMModel GB18030SMModel = {
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls },
|
||||
4,
|
||||
{eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st },
|
||||
GB18030CharLenTable,
|
||||
"GB18030",
|
||||
};
|
||||
|
||||
// sjis
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче