This commit is contained in:
ftang%netscape.com 1999-07-16 20:52:07 +00:00
Родитель 15070efc70
Коммит 27a7e0dd80
1 изменённых файлов: 228 добавлений и 10 удалений

Просмотреть файл

@ -30,12 +30,212 @@
#endif
class nsStatis {
public:
nsStatis() { };
virtual ~nsStatis() { };
virtual PRBool HandleData(const char* aBuf, PRUint32 aLen) = 0;
virtual void DataEnd() = 0;
virtual void Report()=0;
};
class nsBaseStatis : public nsStatis {
public:
nsBaseStatis(unsigned char aL, unsigned char aH, float aR) ;
virtual ~nsBaseStatis() {};
virtual PRBool HandleData(const char* aBuf, PRUint32 aLen);
virtual void DataEnd() ;
virtual void Report();
protected:
unsigned char mLWordHi;
unsigned char mLWordLo;
private:
PRUint32 mNumOf2Bytes;
PRUint32 mNumOfLChar;
PRUint32 mNumOfLWord;
PRUint32 mLWordLength;
PRUint32 mLWordLen[10];
float mR;
PRBool mTailByte;
PRBool mLastLChar;
};
nsBaseStatis::nsBaseStatis(unsigned char aL, unsigned char aH, float aR)
{
mNumOf2Bytes = mNumOfLWord = mLWordLength = mNumOfLChar= 0;
mTailByte = mLastLChar = PR_FALSE;
for(PRUint32 i =0;i < 20; i++)
mLWordLen[i] = 0;
mLWordHi = aH;
mLWordLo = aL;
mR = aR;
}
PRBool nsBaseStatis::HandleData(const char* aBuf, PRUint32 aLen)
{
for(PRUint32 i=0; i < aLen; i++)
{
if(mTailByte)
mTailByte = PR_FALSE;
else
{
mTailByte = (0x80 == ( aBuf[i] & 0x80));
if(mTailByte)
{
mNumOf2Bytes++;
unsigned char a = (unsigned char) aBuf[i];
PRBool thisLChar = (( mLWordLo <= a) && (a <= mLWordHi));
if(thisLChar)
{
mNumOfLChar++;
mLWordLength++;
} else {
if(mLastLChar) {
mNumOfLWord++;
mLWordLen[ (mLWordLength > 10) ? 9 : (mLWordLength-1)]++;
mLWordLength =0 ;
}
}
mLastLChar = thisLChar;
} else {
if(mLastLChar) {
mNumOfLWord++;
mLWordLen[ (mLWordLength > 10) ? 9 : (mLWordLength-1)]++;
mLWordLength =0 ;
mLastLChar = PR_FALSE;
}
}
}
}
return PR_TRUE;
}
void nsBaseStatis::DataEnd()
{
if(mLastLChar) {
mNumOfLWord++;
mLWordLen[ (mLWordLength > 10) ? 9 : (mLWordLength-1)]++;
}
}
void nsBaseStatis::Report()
{
if(mNumOf2Bytes > 0)
{
/*
printf("LChar Ratio = %d : %d ( %5.3f)\n",
mNumOfLChar,
mNumOf2Bytes,
((float)mNumOfLChar / (float)mNumOf2Bytes) * 100);
*/
float rate = (float) mNumOfLChar / (float) mNumOf2Bytes;
float delta = (rate - mR) / mR;
delta *= delta * 1000;
#ifdef EXPERIMENT
printf("Exp = %f \n",delta);
#endif
}
/*
if(mNumOfLChar > 0)
printf("LWord Word = %d : %d (%5.3f)\n",
mNumOfLWord,
mNumOfLChar,
((float)mNumOfLWord / (float)mNumOfLChar) * 100);
if(mNumOfLWord > 0)
{
PRUint32 ac =0;
for(PRUint32 i=0;i<10;i++)
{
ac += mLWordLen[i];
printf("LWord Word Length[%d]= %d -> %5.3f%% %5.3f%%\n", i+1,
mLWordLen[i],
(((float)mLWordLen[i] / (float)mNumOfLWord) * 100),
(((float)ac / (float)mNumOfLWord) * 100));
}
}
*/
}
class nsSimpleStatis : public nsStatis {
public:
nsSimpleStatis(unsigned char aL, unsigned char aH, float aR,const char* aCharset) ;
virtual ~nsSimpleStatis() {};
virtual PRBool HandleData(const char* aBuf, PRUint32 aLen);
virtual void DataEnd() ;
virtual void Report();
protected:
unsigned char mLWordHi;
unsigned char mLWordLo;
private:
PRUint32 mNumOf2Bytes;
PRUint32 mNumOfLChar;
float mR;
const char* mCharset;
PRBool mTailByte;
};
nsSimpleStatis::nsSimpleStatis(unsigned char aL, unsigned char aH, float aR, const char* aCharset)
{
mNumOf2Bytes = mNumOfLChar= 0;
mTailByte = PR_FALSE;
mLWordHi = aH;
mLWordLo = aL;
mR = aR;
mCharset = aCharset;
}
PRBool nsSimpleStatis::HandleData(const char* aBuf, PRUint32 aLen)
{
for(PRUint32 i=0; i < aLen; i++)
{
if(mTailByte)
mTailByte = PR_FALSE;
else
{
mTailByte = (0x80 == ( aBuf[i] & 0x80));
if(mTailByte)
{
mNumOf2Bytes++;
unsigned char a = (unsigned char) aBuf[i];
PRBool thisLChar = (( mLWordLo <= a) && (a <= mLWordHi));
if(thisLChar)
mNumOfLChar++;
}
}
}
return PR_TRUE;
}
void nsSimpleStatis::DataEnd()
{
}
void nsSimpleStatis::Report()
{
if(mNumOf2Bytes > 0)
{
float rate = (float) mNumOfLChar / (float) mNumOf2Bytes;
float delta = (rate - mR) / mR;
delta = delta * delta * (float)100;
#ifdef EXPERIMENT
printf("Exp = %f \n",delta);
if(delta < 1.0)
printf("This is %s\n" ,mCharset);
#endif
}
}
//==========================================================
#define MAXBSIZE (1L << 13)
void usage() {
printf("Usage: DetectFile detector blocksize\n"
" detector: "
"japsm,"
"kopsm,"
"zhcnpsm,"
"zhtwpsm,"
"zhpsm,"
"cjkpsm,"
"ruprob,"
"ukprob,"
"1stblkdbg,"
"2ndblkdbg,"
"lastblkdbg"
@ -128,25 +328,43 @@ int main(int argc, char** argv) {
size_t sz;
PRBool done = PR_FALSE;
nsSimpleStatis ks(0xb0,0xc8, (float)0.95952, "EUC-KR");
nsSimpleStatis js(0xa4,0xa5, (float)0.45006, "EUC-JP");
nsStatis* stat[2] = {&ks, &js};
PRUint32 i;
do
{
sz = read(0, buf, bs);
if(sz > 0) {
rev = det->DoIt( buf, sz, &done);
if(NS_FAILED(rev))
{
printf("XPCOM ERROR CODE = %x\n", rev);
return(-1);
if(! done) {
rev = det->DoIt( buf, sz, &done);
if(NS_FAILED(rev))
{
printf("XPCOM ERROR CODE = %x\n", rev);
return(-1);
}
}
for(i=0;i<2;i++)
stat[i]->HandleData(buf, sz);
}
} while((sz > 0) && (!done));
rev = det->Done();
if(NS_FAILED(rev))
// } while((sz > 0) && (!done) );
} while(sz > 0);
if(!done)
{
printf("XPCOM ERROR CODE = %x\n", rev);
return(-1);
rev = det->Done();
if(NS_FAILED(rev))
{
printf("XPCOM ERROR CODE = %x\n", rev);
return(-1);
}
}
for(i=0;i<2;i++) {
stat[i]->DataEnd();
stat[i]->Report();
}
printf( "Done\n");
NS_IF_RELEASE(det);
printf( "Done 2\n");
return (0);
}