зеркало из https://github.com/mozilla/gecko-dev.git
b=255990 backing-out the previous patch, we should try to better fix.
This commit is contained in:
Родитель
4b19461842
Коммит
573e6523ef
|
@ -43,10 +43,10 @@
|
|||
|
||||
#define NS_LINEBREAKER_NEED_MORE_TEXT -1
|
||||
|
||||
// {7509772F-770C-44e8-AAFA-8032E5A35370}
|
||||
// {E86B3375-BF89-11d2-B3AF-00805F8A6670}
|
||||
#define NS_ILINEBREAKER_IID \
|
||||
{ 0x7509772f, 0x770c, 0x44e8, \
|
||||
{ 0xaa, 0xfa, 0x80, 0x32, 0xe5, 0xa3, 0x53, 0x70 } }
|
||||
{ 0xe86b3375, 0xbf89, 0x11d2, \
|
||||
{ 0xb3, 0xaf, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
|
||||
|
||||
|
||||
class nsILineBreaker : public nsISupports
|
||||
|
@ -57,10 +57,6 @@ public:
|
|||
const PRUnichar* aText2 ,
|
||||
PRUint32 aTextLen2) = 0;
|
||||
|
||||
virtual PRBool CanBreakBetweenLatin1(PRUnichar aChar1,
|
||||
PRUnichar aChar2) = 0;
|
||||
|
||||
|
||||
virtual PRInt32 Next( const PRUnichar* aText, PRUint32 aLen,
|
||||
PRUint32 aPos) = 0;
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ static const PRUint32 gLBClass00[32] = {
|
|||
0x55555555, // U+0010 - U+0017
|
||||
0x55555555, // U+0018 - U+001F
|
||||
0x88438815, // U+0020 - U+0027
|
||||
0x11515810, // U+0028 - U+002F
|
||||
0x81515810, // U+0028 - U+002F
|
||||
0x66666666, // U+0030 - U+0037
|
||||
0x11501166, // U+0038 - U+003F
|
||||
0x88888888, // U+0040 - U+0047
|
||||
|
|
|
@ -350,19 +350,12 @@ nsJISx4051LineBreaker::~nsJISx4051LineBreaker()
|
|||
|
||||
NS_IMPL_ISUPPORTS1(nsJISx4051LineBreaker, nsILineBreaker)
|
||||
|
||||
#define U_PERIOD PRUnichar('.')
|
||||
#define U_COMMA PRUnichar(',')
|
||||
#define U_COLON PRUnichar(':')
|
||||
#define U_SEMICOLON PRUnichar(';')
|
||||
#define U_SLASH PRUnichar('/')
|
||||
#define U_SPACE PRUnichar(' ')
|
||||
#define U_NULL PRUnichar(0x0000)
|
||||
#define U_RIGHT_SINGLE_QUOTATION_MARK PRUnichar(0x2019)
|
||||
#define U_PERIOD ((PRUnichar) '.')
|
||||
#define U_COMMA ((PRUnichar) ',')
|
||||
#define U_SPACE ((PRUnichar) ' ')
|
||||
#define U_RIGHT_SINGLE_QUOTATION_MARK ((PRUnichar) 0x2019)
|
||||
#define NEED_CONTEXTUAL_ANALYSIS(c) ((c) == U_PERIOD || \
|
||||
(c) == U_COMMA || \
|
||||
(c) == U_COLON || \
|
||||
(c) == U_SEMICOLON || \
|
||||
(c) == U_SLASH || \
|
||||
(c) == U_RIGHT_SINGLE_QUOTATION_MARK)
|
||||
#define NUMERIC_CLASS 6 // JIS x4051 class 15 is now map to simplified class 6
|
||||
#define CHARACTER_CLASS 8 // JIS x4051 class 18 is now map to simplified class 8
|
||||
|
@ -372,17 +365,17 @@ PRInt8 nsJISx4051LineBreaker::ContextualAnalysis(
|
|||
PRUnichar prev, PRUnichar cur, PRUnichar next
|
||||
)
|
||||
{
|
||||
if(U_COMMA == cur || U_COLON == cur || U_SEMICOLON == cur)
|
||||
if(U_COMMA == cur)
|
||||
{
|
||||
if((IS_ASCII_DIGIT(prev) || prev == U_NULL) && IS_ASCII_DIGIT(next))
|
||||
if(IS_ASCII_DIGIT (prev) && IS_ASCII_DIGIT (next))
|
||||
return NUMERIC_CLASS;
|
||||
}
|
||||
else if(U_PERIOD == cur)
|
||||
{
|
||||
if((IS_ASCII_DIGIT(prev) || prev == U_SPACE || prev == U_NULL) &&
|
||||
IS_ASCII_DIGIT(next))
|
||||
if((IS_ASCII_DIGIT (prev) || (0x0020 == prev)) &&
|
||||
IS_ASCII_DIGIT (next))
|
||||
return NUMERIC_CLASS;
|
||||
|
||||
|
||||
// By assigning a full stop character class only when it's followed by
|
||||
// class 6 (numeric), 7, and 8 (character). Note that class 9 (Thai)
|
||||
// doesn't matter, either way, we prevent lines from breaking around
|
||||
|
@ -394,12 +387,6 @@ PRInt8 nsJISx4051LineBreaker::ContextualAnalysis(
|
|||
if((pc > 5 || pc == 0) && GetClass(next) > 5)
|
||||
return CHARACTER_CLASS;
|
||||
}
|
||||
else if(U_SLASH == cur)
|
||||
{
|
||||
// We don't need to check prev character. Because SLASH breaks only after.
|
||||
if (IS_ASCII_DIGIT(next))
|
||||
return NUMERIC_CLASS;
|
||||
}
|
||||
else if(U_RIGHT_SINGLE_QUOTATION_MARK == cur)
|
||||
{
|
||||
// somehow people use this as ' in "it's" sometimes...
|
||||
|
@ -409,25 +396,6 @@ PRInt8 nsJISx4051LineBreaker::ContextualAnalysis(
|
|||
return this->GetClass(cur);
|
||||
}
|
||||
|
||||
PRBool nsJISx4051LineBreaker::CanBreakBetweenLatin1(PRUnichar aChar1,
|
||||
PRUnichar aChar2)
|
||||
{
|
||||
NS_ASSERTION(aChar1 < 256 && aChar2 < 256, "invalid input");
|
||||
|
||||
PRInt8 c1, c2;
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aChar1))
|
||||
c1 = this->ContextualAnalysis(U_NULL, aChar1, aChar2);
|
||||
else
|
||||
c1 = this->GetClass(aChar1);
|
||||
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aChar2))
|
||||
c2 = this->ContextualAnalysis(aChar1, aChar2, U_NULL);
|
||||
else
|
||||
c2 = this->GetClass(aChar2);
|
||||
|
||||
return GetPair(c1, c2);
|
||||
}
|
||||
|
||||
|
||||
PRBool nsJISx4051LineBreaker::BreakInBetween(
|
||||
const PRUnichar* aText1 , PRUint32 aTextLen1,
|
||||
|
@ -440,9 +408,34 @@ PRBool nsJISx4051LineBreaker::BreakInBetween(
|
|||
return PR_FALSE;
|
||||
}
|
||||
|
||||
//search for CJK characters until a space is found.
|
||||
//if CJK char is found before space, use 4051, otherwise western
|
||||
PRInt32 cur;
|
||||
|
||||
for (cur= aTextLen1-1; cur>=0; cur--)
|
||||
{
|
||||
if (IS_SPACE(aText1[cur]))
|
||||
break;
|
||||
if (IS_CJK_CHAR(aText1[cur]))
|
||||
goto ROUTE_CJK_BETWEEN;
|
||||
}
|
||||
|
||||
for (cur= 0; cur < (PRInt32)aTextLen2; cur++)
|
||||
{
|
||||
if (IS_SPACE(aText2[cur]))
|
||||
break;
|
||||
if (IS_CJK_CHAR(aText2[cur]))
|
||||
goto ROUTE_CJK_BETWEEN;
|
||||
}
|
||||
|
||||
//now apply western rule.
|
||||
return IS_SPACE(aText1[aTextLen1-1]) || IS_SPACE(aText2[0]);
|
||||
|
||||
ROUTE_CJK_BETWEEN:
|
||||
|
||||
PRInt8 c1, c2;
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText1[aTextLen1-1]))
|
||||
c1 = this->ContextualAnalysis((aTextLen1>1)?aText1[aTextLen1-2]:U_NULL,
|
||||
c1 = this->ContextualAnalysis((aTextLen1>1)?aText1[aTextLen1-2]:0,
|
||||
aText1[aTextLen1-1],
|
||||
aText2[0]);
|
||||
else
|
||||
|
@ -451,7 +444,7 @@ PRBool nsJISx4051LineBreaker::BreakInBetween(
|
|||
if(NEED_CONTEXTUAL_ANALYSIS(aText2[0]))
|
||||
c2 = this->ContextualAnalysis(aText1[aTextLen1-1],
|
||||
aText2[0],
|
||||
(aTextLen2>1)?aText2[1]:U_NULL);
|
||||
(aTextLen2>1)?aText2[1]:0);
|
||||
else
|
||||
c2 = this->GetClass(aText2[0]);
|
||||
|
||||
|
@ -473,13 +466,26 @@ PRInt32 nsJISx4051LineBreaker::Next(
|
|||
NS_ASSERTION(aText, "aText shouldn't be null");
|
||||
NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
|
||||
|
||||
//forward check for CJK characters until a space is found.
|
||||
//if CJK char is found before space, use 4051, otherwise western
|
||||
PRUint32 cur;
|
||||
for (cur = aPos; cur < aLen; ++cur)
|
||||
{
|
||||
if (IS_SPACE(aText[cur]))
|
||||
return cur;
|
||||
if (IS_CJK_CHAR(aText[cur]))
|
||||
goto ROUTE_CJK_NEXT;
|
||||
}
|
||||
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
|
||||
|
||||
ROUTE_CJK_NEXT:
|
||||
PRInt8 c1, c2;
|
||||
PRUint32 cur = aPos;
|
||||
cur = aPos;
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
|
||||
{
|
||||
c1 = this->ContextualAnalysis((cur>0)?aText[cur-1]:U_NULL,
|
||||
c1 = this->ContextualAnalysis((cur>0)?aText[cur-1]:0,
|
||||
aText[cur],
|
||||
(cur<(aLen-1)) ?aText[cur+1]:U_NULL);
|
||||
(cur<(aLen-1)) ?aText[cur+1]:0);
|
||||
} else {
|
||||
c1 = this->GetClass(aText[cur]);
|
||||
}
|
||||
|
@ -491,9 +497,9 @@ PRInt32 nsJISx4051LineBreaker::Next(
|
|||
{
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
|
||||
{
|
||||
c2= this->ContextualAnalysis((cur>0)?aText[cur-1]:U_NULL,
|
||||
c2= this->ContextualAnalysis((cur>0)?aText[cur-1]:0,
|
||||
aText[cur],
|
||||
(cur<(aLen-1)) ?aText[cur+1]:U_NULL);
|
||||
(cur<(aLen-1)) ?aText[cur+1]:0);
|
||||
} else {
|
||||
c2 = this->GetClass(aText[cur]);
|
||||
}
|
||||
|
@ -511,13 +517,31 @@ PRInt32 nsJISx4051LineBreaker::Prev(
|
|||
{
|
||||
NS_ASSERTION(aText, "aText shouldn't be null");
|
||||
|
||||
PRUint32 cur = aPos;
|
||||
//backward check for CJK characters until a space is found.
|
||||
//if CJK char is found before space, use 4051, otherwise western
|
||||
PRUint32 cur;
|
||||
for (cur = aPos - 1; cur > 0; --cur)
|
||||
{
|
||||
if (IS_SPACE(aText[cur]))
|
||||
{
|
||||
if (cur != aPos - 1) // XXXldb Why?
|
||||
++cur;
|
||||
return cur;
|
||||
}
|
||||
if (IS_CJK_CHAR(aText[cur]))
|
||||
goto ROUTE_CJK_PREV;
|
||||
}
|
||||
|
||||
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
|
||||
|
||||
ROUTE_CJK_PREV:
|
||||
cur = aPos;
|
||||
PRInt8 c1, c2;
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
|
||||
{
|
||||
c2 = this->ContextualAnalysis(((cur-1)>0)?aText[cur-2]:U_NULL,
|
||||
c2 = this->ContextualAnalysis(((cur-1)>0)?aText[cur-2]:0,
|
||||
aText[cur-1],
|
||||
(cur<aLen) ?aText[cur]:U_NULL);
|
||||
(cur<aLen) ?aText[cur]:0);
|
||||
} else {
|
||||
c2 = this->GetClass(aText[cur-1]);
|
||||
}
|
||||
|
@ -529,9 +553,9 @@ PRInt32 nsJISx4051LineBreaker::Prev(
|
|||
{
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
|
||||
{
|
||||
c1= this->ContextualAnalysis(((cur-1)>0)?aText[cur-2]:U_NULL,
|
||||
c1= this->ContextualAnalysis(((cur-1)>0)?aText[cur-2]:0,
|
||||
aText[cur-1],
|
||||
(cur<aLen) ?aText[cur]:U_NULL);
|
||||
(cur<aLen) ?aText[cur]:0);
|
||||
} else {
|
||||
c1 = this->GetClass(aText[cur-1]);
|
||||
}
|
||||
|
|
|
@ -48,9 +48,6 @@ public:
|
|||
nsJISx4051LineBreaker();
|
||||
virtual ~nsJISx4051LineBreaker();
|
||||
|
||||
PRBool CanBreakBetweenLatin1(PRUnichar aChar1,
|
||||
PRUnichar aChar2);
|
||||
|
||||
PRBool BreakInBetween( const PRUnichar* aText1 , PRUint32 aTextLen1,
|
||||
const PRUnichar* aText2 , PRUint32 aTextLen2);
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
0028;;1
|
||||
002F;;2
|
||||
005B;;1
|
||||
007B;;1
|
||||
2018;;1
|
||||
|
|
|
@ -344,11 +344,8 @@ nsTextTransformer::ScanNormalAsciiText_F(PRInt32* aWordLen,
|
|||
bp2 += mBufferPos;
|
||||
}
|
||||
|
||||
PRUnichar prevCh;
|
||||
PRUnichar ch = 0;
|
||||
for (; offset < fragLen; offset++) {
|
||||
prevCh = (ch == ' ') ? CH_NBSP : ch;
|
||||
ch = *cp++;
|
||||
unsigned char ch = *cp++;
|
||||
if (XP_IS_SPACE(ch)) {
|
||||
break;
|
||||
}
|
||||
|
@ -356,10 +353,6 @@ nsTextTransformer::ScanNormalAsciiText_F(PRInt32* aWordLen,
|
|||
ch = ' ';
|
||||
*aWasTransformed = PR_TRUE;
|
||||
}
|
||||
else if (offset != mOffset &&
|
||||
nsContentUtils::LineBreaker()->CanBreakBetweenLatin1(prevCh, ch)) {
|
||||
break;
|
||||
}
|
||||
else if (IS_DISCARDED(ch)) {
|
||||
// Strip discarded characters from the transformed output
|
||||
continue;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
This is a mail with a couple of long lines and
|
||||
then a sig. This is used as test of the format=
|
||||
flowed output in the nsHTMLToTXTSinkstream. If
|
||||
this test fails and none else, it's likely the
|
||||
then a sig. This is used as test of the
|
||||
format=flowed output in the nsHTMLToTXTSinkstream.
|
||||
If this test fails and none else, it's likely the
|
||||
spaces at the ends of the lines that are missing.
|
||||
They aren't easily seen without looking at the
|
||||
data in an editor and checking where the end of
|
||||
|
|
Загрузка…
Ссылка в новой задаче