add implementation for word breaker. Add selftest code for word break. Fix GetClass for line breaker

This commit is contained in:
ftang%netscape.com 1999-02-19 01:38:12 +00:00
Родитель 3051dee08e
Коммит c88834ab8b
6 изменённых файлов: 358 добавлений и 32 удалений

Просмотреть файл

@ -37,6 +37,7 @@ public:
const PRUnichar* aText2 , PRUint32 aTextLen2,
PRBool *oCanBreak) = 0;
NS_IMETHOD PostionToBoundary(const PRUnichar* aText1 , PRUint32 aTextLen1,
PRUint32 aOffset,
PRUint32 *oWordBegin,
PRUint32 *oWordEnd) = 0;
NS_IMETHOD FirstForwardBreak (nsIBreakState* state) = 0;

Просмотреть файл

@ -188,9 +188,9 @@ PRInt8 nsJISx4501LineBreaker::GetClass(PRUnichar u)
else if( 0x0030 == h)
{
c = GETCLASSFROMTABLE(gLBClass30, l);
} else if (( ( 0x3200 <= h) && ( h <= 0x3300) ) ||
( ( 0x4e00 <= h) && ( h <= 0x9f00) ) ||
( ( 0xf900 <= h) && ( h <= 0xfa00) )
} else if (( ( 0x3200 <= h) && ( h <= 0x33ff) ) ||
( ( 0x4e00 <= h) && ( h <= 0x9fff) ) ||
( ( 0xf900 <= h) && ( h <= 0xfaff) )
)
{
c = 5; // CJK charcter, Han, and Han Compatability
@ -230,6 +230,10 @@ NS_IMPL_ISUPPORTS(nsJISx4501LineBreaker, kILineBreakerIID);
nsresult nsJISx4501LineBreaker::FirstForwardBreak (nsIBreakState* state)
{
NS_PRECONDITION( nsnull != state, "null ptr");
if(nsnull == state )
return NS_ERROR_NULL_POINTER;
nsresult res;
PRUint32 len;
@ -254,6 +258,10 @@ nsresult nsJISx4501LineBreaker::FirstForwardBreak (nsIBreakState* state)
}
nsresult nsJISx4501LineBreaker::NextForwardBreak (nsIBreakState* state)
{
NS_PRECONDITION( nsnull != state, "null ptr");
if(nsnull == state )
return NS_ERROR_NULL_POINTER;
PRBool done;
nsresult res;
res = state->IsDone(&done);

Просмотреть файл

@ -34,9 +34,9 @@ nsLWBreakerFImp::~nsLWBreakerFImp()
}
NS_DEFINE_IID(kILineBreakerFactoryIID, NS_ILINEBREAKERFACTORY_IID);
NS_DEFINE_IID(kIWordBreakerFactoryIID, NS_ILINEBREAKERFACTORY_IID);
NS_DEFINE_IID(kIWordBreakerFactoryIID, NS_IWORDBREAKERFACTORY_IID);
NS_DEFINE_IID(kILineBreakerIID, NS_ILINEBREAKER_IID);
NS_DEFINE_IID(kIWordBreakerIID, NS_ILINEBREAKER_IID);
NS_DEFINE_IID(kIWordBreakerIID, NS_IWORDBREAKER_IID);
NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);

Просмотреть файл

@ -40,39 +40,190 @@ nsresult nsSampleWordBreaker::BreakInBetween(
const PRUnichar* aText2 , PRUint32 aTextLen2,
PRBool *oCanBreak)
{
// to be implement
NS_PRECONDITION( nsnull != aText1, "null ptr");
NS_PRECONDITION( nsnull != aText2, "null ptr");
if((aText1 == nsnull) || (aText2 == nsnull))
return NS_ERROR_NULL_POINTER;
if( (0 == aTextLen1) || (0 == aTextLen2))
{
*oCanBreak = PR_FALSE;
return NS_OK;
}
*oCanBreak = (this->GetClass(aText1[aTextLen1-1]) != this->GetClass(aText2[0]));
return NS_OK;
}
// hack
typedef enum {
kWbClassSpace = 0,
kWbClassAlphaLetter,
kWbClassPunct,
kWbClassHanLetter,
kWbClassKatakanaLetter,
kWbClassHiraganaLetter
} wb_class;
#define IS_ASCII(c) (0 != ( 0x7f & (c)))
#define ASCII_IS_ALPHA(c) ((( 'a' <= (c)) && ((c) <= 'z')) || (( 'A' <= (c)) && ((c) <= 'Z')))
#define ASCII_IS_DIGIT(c) (( '0' <= (c)) && ((c) <= '9'))
#define ASCII_IS_SPACE(c) (( ' ' == (c)) || ( '\t' == (c)) || ( '\r' == (c)) || ( '\n' == (c)))
#define IS_HAN(c) (( 0x4e00 <= (c)) && ((c) <= 0x9fff))||(( 0xf900 <= (c)) && ((c) <= 0xfaff))
#define IS_KATAKANA(c) (( 0x30A0 <= (c)) && ((c) <= 0x30FF))
#define IS_HIRAGANA(c) (( 0x3040 <= (c)) && ((c) <= 0x309F))
PRUint8 nsSampleWordBreaker::GetClass(PRUnichar c)
{
// begin of the hack
if(IS_ASCII(c))
{
if(ASCII_IS_SPACE(c))
return kWbClassSpace;
else if(ASCII_IS_ALPHA(c) || ASCII_IS_DIGIT(c))
return kWbClassAlphaLetter;
else
return kWbClassPunct;
}
else if(IS_HAN(c)) {
return kWbClassHanLetter;
}
else if(IS_KATAKANA(c))
{
return kWbClassKatakanaLetter;
}
else if(IS_HIRAGANA(c))
{
return kWbClassHiraganaLetter;
}
else
{
return kWbClassAlphaLetter;
}
return 0;
}
nsresult nsSampleWordBreaker::PostionToBoundary(
const PRUnichar* aText1 , PRUint32 aTextLen1,
const PRUnichar* aText , PRUint32 aTextLen,
PRUint32 aOffset,
PRUint32 *oWordBegin,
PRUint32 *oWordEnd)
{
// to be implement
NS_PRECONDITION( nsnull != aText, "null ptr");
NS_PRECONDITION( 0 != aTextLen, "len = 0");
NS_PRECONDITION( nsnull != oWordBegin, "null ptr");
NS_PRECONDITION( nsnull != oWordEnd, "null ptr");
NS_PRECONDITION( aOffset <= aTextLen, "aOffset > aTextLen");
if((nsnull == aText ) || (nsnull == oWordBegin) || (nsnull == oWordEnd))
return NS_ERROR_NULL_POINTER;
if( aOffset > aTextLen )
return NS_ERROR_ILLEGAL_VALUE;
PRUint8 c = this->GetClass(aText[aOffset]);
PRUint32 i;
// Scan forward
for(i = aOffset +1;i <= aTextLen; i++)
{
if( c != this->GetClass(aText[i]))
{
*oWordEnd = i;
break;
}
}
// Scan backward
for(i = aOffset -1;i >= 0; i++)
{
if( c != this->GetClass(aText[i]))
{
*oWordBegin = i;
break;
}
}
return NS_OK;
}
nsresult nsSampleWordBreaker::FirstForwardBreak(nsIBreakState* state)
nsresult nsSampleWordBreaker::FirstForwardBreak (nsIBreakState* state)
{
// to be implement
state->Set(1, PR_TRUE);
return NS_OK;
}
nsresult nsSampleWordBreaker::NextForwardBreak(nsIBreakState* state)
{
// to be implement
PRBool done;
state->IsDone(&done);
if(! done)
NS_PRECONDITION( nsnull != state, "null ptr");
if(nsnull == state )
return NS_ERROR_NULL_POINTER;
nsresult res;
PRUint32 len;
res = state->Length(&len);
if(len < 2)
{
PRUint32 pos;
PRUint32 len;
state->Current(&pos);
pos += 1;
state->Length(&len);
state->Set(pos, (pos >= len));
res = state->Set(len, PR_TRUE);
return NS_OK;
}
const PRUnichar* text;
res = state->GetText(&text);
PRUint32 cur;
res = state->Current(&cur);
PRUint32 next = Next(text, len, 0);
res = state->Set(next , (next == len) );
return NS_OK;
}
nsresult nsSampleWordBreaker::NextForwardBreak (nsIBreakState* state)
{
NS_PRECONDITION( nsnull != state, "null ptr");
if(nsnull == state )
return NS_ERROR_NULL_POINTER;
PRBool done;
nsresult res;
res = state->IsDone(&done);
if(done)
return NS_OK;
const PRUnichar* text;
res = state->GetText(&text);
PRUint32 len;
res = state->Length(&len);
PRUint32 cur;
res = state->Current(&cur);
PRUint32 next = Next(text, len, cur);
res = state->Set(next , (next == len) );
return NS_OK;
}
PRUint32 nsSampleWordBreaker::Next(
const PRUnichar* aText,
PRUint32 aLen,
PRUint32 aPos
)
{
PRInt8 c1, c2;
PRUint32 cur = aPos;
c1 = this->GetClass(aText[cur]);
for(cur++; cur <aLen; cur++)
{
c2 = this->GetClass(aText[cur]);
if(c2 != c1)
break;
}
return cur;
}

Просмотреть файл

@ -24,6 +24,7 @@
class nsSampleWordBreaker : public nsIWordBreaker
{
NS_DECL_ISUPPORTS
public:
nsSampleWordBreaker() ;
~nsSampleWordBreaker() ;
@ -32,6 +33,7 @@ class nsSampleWordBreaker : public nsIWordBreaker
const PRUnichar* aText2 , PRUint32 aTextLen2,
PRBool *oCanBreak);
NS_IMETHOD PostionToBoundary(const PRUnichar* aText1 , PRUint32 aTextLen1,
PRUint32 aOffset,
PRUint32 *oWordBegin,
PRUint32 *oWordEnd);
@ -47,6 +49,11 @@ class nsSampleWordBreaker : public nsIWordBreaker
NS_IMETHOD NextBackwardConnect (nsIBreakState* state) ;
#endif
protected:
PRUint8 GetClass(PRUnichar aChar);
PRUint32 Next(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
};
#endif /* nsSampleWordBreaker_h__ */

Просмотреть файл

@ -21,14 +21,18 @@
#include "nsIServiceManager.h"
#include "nsILineBreakerFactory.h"
#include "nsILineBreaker.h"
#include "nsIWordBreakerFactory.h"
#include "nsIWordBreaker.h"
#include "nsIBreakState.h"
#include "nsLWBrkCIID.h"
#define WORK_AROUND_SERVICE_MANAGER_ASSERT
IMPL_NS_IBREAKSTATE( nsBreakState )
NS_DEFINE_CID(kLWBrkCID, NS_LWBRK_CID);
NS_DEFINE_IID(kILineBreakerFactory, NS_ILINEBREAKERFACTORY_IID);
NS_DEFINE_IID(kIWordBreakerFactory, NS_IWORDBREAKERFACTORY_IID);
static char teng1[] =
@ -41,6 +45,11 @@ static PRUint32 exp1[] = {
67,68,69,70
};
static PRUint32 wexp1[] = {
4,5,7,8,9,10,14,15,17,18,22,23,33,34,35,39,43,48,49,50,54,55,56,57,62,63,
64,65,67,68,69,70,72
};
// 1 2 3 4 5 6 7
//01234567890123456789012345678901234567890123456789012345678901234567890123456789
static char teng2[] =
@ -49,6 +58,9 @@ static char teng2[] =
static PRUint32 exp2[] = {
2,12,15,17,18,22,23,24,30,31,37,38,
};
static PRUint32 wexp2[] = {
4,12,13,14,15,16,17,18,22,24,29,30,31,32,37,38,43
};
// 1 2 3 4 5 6 7
//01234567890123456789012345678901234567890123456789012345678901234567890123456789
@ -57,6 +69,9 @@ static char teng3[] =
static PRUint32 exp3[] = {
4, 5, 6,7,11,12,14,15,19,25,27,28,32,33
};
static PRUint32 wexp3[] = {
4,5,6,7,11,12,14,15,19,20,25,26,27,28,32,33,38
};
static char ruler1[] =
" 1 2 3 4 5 6 7 ";
@ -64,7 +79,62 @@ static char ruler2[] =
"0123456789012345678901234567890123456789012345678901234567890123456789012";
PRBool TestASCII(nsILineBreaker *lb,
PRBool TestASCIILB(nsILineBreaker *lb,
const char* in, const PRUint32 len,
const PRUint32* out, PRUint32 outlen)
{
nsAutoString eng1(in);
nsBreakState bk(eng1.GetUnicode(), eng1.Length());
PRUint32 i,j;
PRUint32 res[256];
PRBool ok = PR_TRUE;
for(i = 0, lb->FirstForwardBreak(&bk);
(! bk.IsDone()) && (i < 256);
lb->NextForwardBreak(&bk), i++)
{
res [i] = bk.Current();
}
if (i != outlen)
{
ok = PR_FALSE;
cout << "WARNING!!! return size wrong, expect " << outlen <<
" bet got " << i << "\n";
}
cout << "string = \n" << in << "\n";
cout << ruler1 << "\n";
cout << ruler2 << "\n";
cout << "Expect = \n";
for(j=0;j<outlen;j++)
{
cout << out[j] << ",";
}
cout << "\nResult = \n";
for(j=0;j<i;j++)
{
cout << res[j] << ",";
}
cout << "\n";
for(j=0;j<i;j++)
{
if(j < outlen)
{
if (res[j] != out[j])
{
ok = PR_FALSE;
cout << "[" << j << "] expect " << out[j] << " but got " <<
res[j] << "\n";
}
} else {
ok = PR_FALSE;
cout << "[" << j << "] additional " <<
res[j] << "\n";
}
}
return ok;
}
PRBool TestASCIIWB(nsIWordBreaker *lb,
const char* in, const PRUint32 len,
const PRUint32* out, PRUint32 outlen)
{
@ -137,7 +207,9 @@ PRBool TestLineBreaker()
cout << "\t1st GetService failed\n";
ok = PR_FALSE;
} else {
#ifdef WORD_AROUND_SERVICE_MANAGER_ASSERT
res = nsServiceManager::ReleaseService(kLWBrkCID, t);
#endif
}
res = nsServiceManager::GetService(kLWBrkCID,
@ -160,7 +232,7 @@ PRBool TestLineBreaker()
} else {
cout << "Test 4 - {First,Next}ForwardBreak():\n";
if( TestASCII(lb, teng1, sizeof(teng1)/sizeof(char),
if( TestASCIILB(lb, teng1, sizeof(teng1)/sizeof(char),
exp1, sizeof(exp1)/sizeof(PRUint32)) )
{
cout << "Test 4 Passed\n\n";
@ -170,7 +242,7 @@ PRBool TestLineBreaker()
}
cout << "Test 5 - {First,Next}ForwardBreak():\n";
if(TestASCII(lb, teng2, sizeof(teng2)/sizeof(char),
if(TestASCIILB(lb, teng2, sizeof(teng2)/sizeof(char),
exp2, sizeof(exp2)/sizeof(PRUint32)) )
{
cout << "Test 5 Passed\n\n";
@ -180,7 +252,7 @@ PRBool TestLineBreaker()
}
cout << "Test 6 - {First,Next}ForwardBreak():\n";
if(TestASCII(lb, teng3, sizeof(teng3)/sizeof(char),
if(TestASCIILB(lb, teng3, sizeof(teng3)/sizeof(char),
exp3, sizeof(exp3)/sizeof(PRUint32)) )
{
cout << "Test 6 Passed\n\n";
@ -190,19 +262,102 @@ PRBool TestLineBreaker()
}
NS_IF_RELEASE(lb);
}
#ifdef WORD_AROUND_SERVICE_MANAGER_ASSERT
res = nsServiceManager::ReleaseService(kLWBrkCID, t);
#endif
}
cout << "==================================\n";
cout << "Finish nsILineBreakerFactory Test \n";
cout << "==================================\n";
return ok;
}
PRBool TestWordBreaker()
{
cout << "==================================\n";
cout << "Finish nsIWordBreakerFactory Test \n";
cout << "==================================\n";
nsIWordBreakerFactory *t = NULL;
nsresult res;
PRBool ok = PR_TRUE;
res = nsServiceManager::GetService(kLWBrkCID,
kIWordBreakerFactory,
(nsISupports**) &t);
cout << "Test 1 - GetService():\n";
if(NS_FAILED(res) || ( t == NULL ) ) {
cout << "\t1st GetService failed\n";
ok = PR_FALSE;
} else {
res = nsServiceManager::ReleaseService(kLWBrkCID, t);
}
res = nsServiceManager::GetService(kLWBrkCID,
kIWordBreakerFactory,
(nsISupports**) &t);
if(NS_FAILED(res) || ( t == NULL ) ) {
cout << "\t2nd GetService failed\n";
ok = PR_FALSE;
} else {
cout << "Test 3 - GetWordBreaker():\n";
nsIWordBreaker *lb;
nsAutoString lb_arg("");
res = t->GetBreaker(lb_arg, &lb);
if(NS_FAILED(res) || (lb == NULL)) {
cout << "GetBreaker(nsIWordBreaker*) failed\n";
ok = PR_FALSE;
} else {
cout << "Test 4 - {First,Next}ForwardBreak():\n";
if( TestASCIIWB(lb, teng1, sizeof(teng1)/sizeof(char),
wexp1, sizeof(wexp1)/sizeof(PRUint32)) )
{
cout << "Test 4 Passed\n\n";
} else {
ok = PR_FALSE;
cout << "Test 4 Failed\n\n";
}
cout << "Test 5 - {First,Next}ForwardBreak():\n";
if(TestASCIIWB(lb, teng2, sizeof(teng2)/sizeof(char),
wexp2, sizeof(wexp2)/sizeof(PRUint32)) )
{
cout << "Test 5 Passed\n\n";
} else {
ok = PR_FALSE;
cout << "Test 5 Failed\n\n";
}
cout << "Test 6 - {First,Next}ForwardBreak():\n";
if(TestASCIIWB(lb, teng3, sizeof(teng3)/sizeof(char),
wexp3, sizeof(wexp3)/sizeof(PRUint32)) )
{
cout << "Test 6 Passed\n\n";
} else {
ok = PR_FALSE;
cout << "Test 6 Failed\n\n";
}
NS_IF_RELEASE(lb);
}
res = nsServiceManager::ReleaseService(kLWBrkCID, t);
}
cout << "==================================\n";
cout << "Finish nsILineBreakerFactory Test \n";
cout << "Finish nsIWordBreakerFactory Test \n";
cout << "==================================\n";
return ok;
}
#ifdef XP_PC
#define LWBRK_DLL "lwbrk.dll"
#else
@ -223,8 +378,12 @@ int main(int argc, char** argv) {
NS_SetupRegistry();
// --------------------------------------------
cout << "Test Line Break\n";
PRBool ok ;
ok =TestWordBreaker();
cout << "Test Word Break\n";
ok =TestLineBreaker();
// --------------------------------------------