зеркало из https://github.com/mozilla/gecko-dev.git
Bug 336959. Use Pango to break inside Thai/Lao runs. Patch by Theppitak Karoonboonyanan, r+sr=roc
This commit is contained in:
Родитель
84f164ad4a
Коммит
6ac430a20e
|
@ -96,11 +96,12 @@ public:
|
|||
static inline PRBool IsComplexChar(PRUnichar u)
|
||||
{
|
||||
return IsComplexASCIIChar(u) ||
|
||||
(0x1100 <= u && u <= 0x11ff) ||
|
||||
(0x2000 <= u && u <= 0x21ff) ||
|
||||
(0x2e80 <= u && u <= 0xd7ff) ||
|
||||
(0xf900 <= u && u <= 0xfaff) ||
|
||||
(0xff00 <= u && u <= 0xffef);
|
||||
(0x0e01 <= u && u <= 0x0edf) || // Thai & Lao
|
||||
(0x1100 <= u && u <= 0x11ff) || // Hangul Jamo
|
||||
(0x2000 <= u && u <= 0x21ff) || // Punctuations and Symbols
|
||||
(0x2e80 <= u && u <= 0xd7ff) || // several CJK blocks
|
||||
(0xf900 <= u && u <= 0xfaff) || // CJK Compatibility Idographs
|
||||
(0xff00 <= u && u <= 0xffef); // Halfwidth and Fullwidth Forms
|
||||
}
|
||||
|
||||
// Normally, break opportunities exist at the end of each run of whitespace
|
||||
|
|
|
@ -106,3 +106,13 @@ endif
|
|||
|
||||
include $(topsrcdir)/config/rules.mk
|
||||
|
||||
ifdef MOZ_ENABLE_PANGO
|
||||
CXXFLAGS += \
|
||||
$(MOZ_PANGO_CFLAGS) \
|
||||
$(NULL)
|
||||
|
||||
EXTRA_DSO_LDOPTS += \
|
||||
$(MOZ_PANGO_LIBS) \
|
||||
$(NULL)
|
||||
endif
|
||||
|
||||
|
|
|
@ -43,19 +43,15 @@
|
|||
|
||||
#define NS_LINEBREAKER_NEED_MORE_TEXT -1
|
||||
|
||||
// {C9C5938E-70EF-4db2-ADEE-E7B2CCFBBEE6}
|
||||
// {5ae68851-d9a3-49fd-9388-58586dad8044}
|
||||
#define NS_ILINEBREAKER_IID \
|
||||
{ 0xc9c5938e, 0x70ef, 0x4db2, \
|
||||
{ 0xad, 0xee, 0xe7, 0xb2, 0xcc, 0xfb, 0xbe, 0xe6 } }
|
||||
{ 0x5ae68851, 0xd9a3, 0x49fd, \
|
||||
{ 0x93, 0x88, 0x58, 0x58, 0x6d, 0xad, 0x80, 0x44 } }
|
||||
|
||||
class nsILineBreaker : public nsISupports
|
||||
{
|
||||
public:
|
||||
NS_DECLARE_STATIC_IID_ACCESSOR(NS_ILINEBREAKER_IID)
|
||||
virtual PRBool BreakInBetween( const PRUnichar* aText1 , PRUint32 aTextLen1,
|
||||
const PRUnichar* aText2 ,
|
||||
PRUint32 aTextLen2) = 0;
|
||||
|
||||
virtual PRInt32 Next( const PRUnichar* aText, PRUint32 aLen,
|
||||
PRUint32 aPos) = 0;
|
||||
|
||||
|
|
|
@ -52,12 +52,33 @@ REQUIRES = xpcom \
|
|||
unicharutil \
|
||||
$(NULL)
|
||||
|
||||
CSRCS = rulebrk.c
|
||||
|
||||
CPPSRCS = \
|
||||
nsJISx4501LineBreaker.cpp \
|
||||
nsSampleWordBreaker.cpp \
|
||||
nsSemanticUnitScanner.cpp \
|
||||
$(NULL)
|
||||
|
||||
ifdef MOZ_ENABLE_PANGO
|
||||
CPPSRCS += \
|
||||
nsPangoBreaker.cpp \
|
||||
$(NULL)
|
||||
else
|
||||
CPPSRCS += \
|
||||
nsRuleBreaker.cpp \
|
||||
$(NULL)
|
||||
|
||||
CSRCS = rulebrk.c
|
||||
endif
|
||||
|
||||
include $(topsrcdir)/config/rules.mk
|
||||
|
||||
ifdef MOZ_ENABLE_PANGO
|
||||
CXXFLAGS += \
|
||||
$(MOZ_PANGO_CFLAGS) \
|
||||
$(NULL)
|
||||
|
||||
EXTRA_DSO_LDOPTS += \
|
||||
$(MOZ_PANGO_LIBS) \
|
||||
$(NULL)
|
||||
endif
|
||||
|
||||
|
|
|
@ -178,3 +178,38 @@ static const PRUint32 gLBClass30[32] = {
|
|||
0x51111555, // U+30F8 - U+30FF
|
||||
};
|
||||
|
||||
static const PRUint32 gLBClass0E[32] = {
|
||||
0x99999999, // U+0E00 - U+0E07
|
||||
0x99999999, // U+0E08 - U+0E0F
|
||||
0x99999999, // U+0E10 - U+0E17
|
||||
0x99999999, // U+0E18 - U+0E1F
|
||||
0x99999999, // U+0E20 - U+0E27
|
||||
0x19999999, // U+0E28 - U+0E2F
|
||||
0x99999999, // U+0E30 - U+0E37
|
||||
0x09999999, // U+0E38 - U+0E3F
|
||||
0x91999999, // U+0E40 - U+0E47
|
||||
0x89999999, // U+0E48 - U+0E4F
|
||||
0x66666666, // U+0E50 - U+0E57
|
||||
0x99991166, // U+0E58 - U+0E5F
|
||||
0x99999999, // U+0E60 - U+0E67
|
||||
0x99999999, // U+0E68 - U+0E6F
|
||||
0x99999999, // U+0E70 - U+0E77
|
||||
0x99999999, // U+0E78 - U+0E7F
|
||||
0x99999999, // U+0E80 - U+0E87
|
||||
0x99999999, // U+0E88 - U+0E8F
|
||||
0x99999999, // U+0E90 - U+0E97
|
||||
0x99999999, // U+0E98 - U+0E9F
|
||||
0x99999999, // U+0EA0 - U+0EA7
|
||||
0x19999999, // U+0EA8 - U+0EAF
|
||||
0x99999999, // U+0EB0 - U+0EB7
|
||||
0x99999999, // U+0EB8 - U+0EBF
|
||||
0x91999999, // U+0EC0 - U+0EC7
|
||||
0x99999999, // U+0EC8 - U+0ECF
|
||||
0x66666666, // U+0ED0 - U+0ED7
|
||||
0x99999966, // U+0ED8 - U+0EDF
|
||||
0x99999999, // U+0EE0 - U+0EE7
|
||||
0x99999999, // U+0EE8 - U+0EEF
|
||||
0x99999999, // U+0EF0 - U+0EF7
|
||||
0x99999999, // U+0EF8 - U+0EFF
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is mozilla.org code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Theppitak Karoonboonyanan <thep@linux.thai.net>.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2007
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* - Theppitak Karoonboonyanan <thep@linux.thai.net>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either of the GNU General Public License Version 2 or later (the "GPL"),
|
||||
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
#ifndef nsComplexBreaker_h__
|
||||
#define nsComplexBreaker_h__
|
||||
|
||||
#include "nsString.h"
|
||||
|
||||
/**
|
||||
* Find line break opportunities in aText[] of aLength characters,
|
||||
* filling boolean values indicating line break opportunities for
|
||||
* corresponding charactersin aBreakBefore[] on return.
|
||||
*/
|
||||
void
|
||||
NS_GetComplexLineBreaks(const PRUnichar* aText, PRUint32 aLength,
|
||||
PRPackedBool* aBreakBefore);
|
||||
|
||||
#endif /* nsComplexBreaker_h__ */
|
|
@ -42,9 +42,8 @@
|
|||
#include "pratom.h"
|
||||
#include "nsLWBRKDll.h"
|
||||
#include "jisx4501class.h"
|
||||
#define TH_UNICODE
|
||||
#include "th_char.h"
|
||||
#include "rulebrk.h"
|
||||
#include "nsComplexBreaker.h"
|
||||
#include "nsTArray.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
|
||||
/*
|
||||
|
@ -141,15 +140,16 @@
|
|||
|
||||
|
||||
|
||||
4. We add THAI characters and make it breakable w/ all ther class
|
||||
4. We add COMPLEX characters and make it breakable w/ all ther class
|
||||
except after class 1 and before class [a]
|
||||
|
||||
Class of
|
||||
Leading Class of Trailing Char Class
|
||||
Char
|
||||
|
||||
1 [a] 7 8 9 [b]15 16 18 THAI
|
||||
1 [a] 7 8 9 [b]15 16 18 COMPLEX
|
||||
|
||||
1 X X X X X X X X X
|
||||
1 X X X X X X X X X X
|
||||
[a] X
|
||||
7 X X
|
||||
8 X X
|
||||
|
@ -158,7 +158,7 @@
|
|||
15 X X X X
|
||||
16 X X X
|
||||
18 X X X X
|
||||
THAI T
|
||||
COMPLEX X T
|
||||
|
||||
T : need special handling
|
||||
|
||||
|
@ -167,7 +167,7 @@
|
|||
|
||||
18 <- 1
|
||||
|
||||
1 0000 0001 1111 1111 = 0x01FF
|
||||
1 0000 0011 1111 1111 = 0x03FF
|
||||
[a] 0000 0000 0000 0010 = 0x0002
|
||||
7 0000 0000 0000 0110 = 0x0006
|
||||
8 0000 0000 0100 0010 = 0x0042
|
||||
|
@ -176,7 +176,7 @@
|
|||
15 0000 0001 0101 0010 = 0x0152
|
||||
16 0000 0001 1000 0010 = 0x0182
|
||||
18 0000 0001 1100 0010 = 0x01C2
|
||||
THAI 0000 0000 0000 0000 = 0x0000
|
||||
COMPLEX 0000 0010 0000 0010 = 0x0202
|
||||
|
||||
5. Now we map the class to number
|
||||
|
||||
|
@ -189,14 +189,14 @@
|
|||
6: 15
|
||||
7: 16
|
||||
8: 18
|
||||
9: THAI
|
||||
9: COMPLEX
|
||||
|
||||
*/
|
||||
|
||||
#define MAX_CLASSES 10
|
||||
|
||||
static const PRUint16 gPair[MAX_CLASSES] = {
|
||||
0x01FF,
|
||||
0x03FF,
|
||||
0x0002,
|
||||
0x0006,
|
||||
0x0042,
|
||||
|
@ -205,7 +205,7 @@ static const PRUint16 gPair[MAX_CLASSES] = {
|
|||
0x0152,
|
||||
0x0182,
|
||||
0x01C2,
|
||||
0x0000
|
||||
0x0202
|
||||
};
|
||||
|
||||
|
||||
|
@ -215,7 +215,7 @@ GETCLASSFROMTABLE(const PRUint32* t, PRUint16 l)
|
|||
return ((((t)[(l>>3)]) >> ((l & 0x0007)<<2)) & 0x000f);
|
||||
}
|
||||
|
||||
#define CLASS_THAI 9
|
||||
#define CLASS_COMPLEX 9
|
||||
|
||||
|
||||
|
||||
|
@ -234,6 +234,12 @@ IS_CJK_CHAR(PRUnichar u)
|
|||
(0xff00 <= (u) && (u) <= 0xffef) );
|
||||
}
|
||||
|
||||
static inline int
|
||||
IS_COMPLEX(PRUnichar u)
|
||||
{
|
||||
return (0x0e01 <= (u) && (u) <= 0x0e5b);
|
||||
}
|
||||
|
||||
static inline int
|
||||
IS_SPACE(PRUnichar u)
|
||||
{
|
||||
|
@ -251,9 +257,9 @@ static PRInt8 GetClass(PRUnichar u)
|
|||
{
|
||||
c = GETCLASSFROMTABLE(gLBClass00, l);
|
||||
}
|
||||
else if(th_isthai(u))
|
||||
else if( 0x0E00 == h)
|
||||
{
|
||||
c = CLASS_THAI;
|
||||
c = GETCLASSFROMTABLE(gLBClass0E, l);
|
||||
}
|
||||
else if( 0x2000 == h)
|
||||
{
|
||||
|
@ -412,175 +418,64 @@ static PRInt8 ContextualAnalysis(
|
|||
}
|
||||
|
||||
|
||||
PRBool nsJISx4051LineBreaker::BreakInBetween(
|
||||
const PRUnichar* aText1 , PRUint32 aTextLen1,
|
||||
const PRUnichar* aText2 , PRUint32 aTextLen2)
|
||||
PRInt32 nsJISx4051LineBreaker::WordMove(
|
||||
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos, PRInt8 aDirection)
|
||||
{
|
||||
if(!aText1 || !aText2 || (0 == aTextLen1) || (0==aTextLen2) ||
|
||||
NS_IS_HIGH_SURROGATE(aText1[aTextLen1-1]) &&
|
||||
NS_IS_LOW_SURROGATE(aText2[0]) ) //Do not separate a surrogate pair
|
||||
{
|
||||
return PR_FALSE;
|
||||
PRBool textNeedsJISx4051 = PR_FALSE;
|
||||
PRInt32 begin, end;
|
||||
|
||||
for (begin = aPos; begin > 0 && !IS_SPACE(aText[begin - 1]); --begin) {
|
||||
if (IS_CJK_CHAR(aText[begin]) || IS_COMPLEX(aText[begin])) {
|
||||
textNeedsJISx4051 = PR_TRUE;
|
||||
}
|
||||
}
|
||||
for (end = aPos + 1; end < PRInt32(aLen) && !IS_SPACE(aText[end]); ++end) {
|
||||
if (IS_CJK_CHAR(aText[end]) || IS_COMPLEX(aText[end])) {
|
||||
textNeedsJISx4051 = PR_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
//search for CJK characters until a space is found.
|
||||
//if CJK char is found before space, use 4051, otherwise western
|
||||
PRInt32 cur;
|
||||
PRInt32 ret;
|
||||
nsAutoTArray<PRPackedBool, 2000> breakState;
|
||||
if (!textNeedsJISx4051 || !breakState.AppendElements(end - begin)) {
|
||||
// No complex text character, do not try to do complex line break.
|
||||
// (This is required for serializers. See Bug #344816.)
|
||||
// Also fall back to this when out of memory.
|
||||
if (aDirection < 0) {
|
||||
ret = (begin == PRInt32(aPos)) ? begin - 1 : begin;
|
||||
} else {
|
||||
ret = end;
|
||||
}
|
||||
} else {
|
||||
GetJISx4051Breaks(aText + begin, end - begin, breakState.Elements());
|
||||
|
||||
for (cur= aTextLen1-1; cur>=0; cur--)
|
||||
{
|
||||
if (IS_SPACE(aText1[cur]))
|
||||
break;
|
||||
if (IS_CJK_CHAR(aText1[cur]))
|
||||
goto ROUTE_CJK_BETWEEN;
|
||||
ret = aPos;
|
||||
do {
|
||||
ret += aDirection;
|
||||
} while (begin < ret && ret < end && !breakState[ret - begin]);
|
||||
}
|
||||
|
||||
for (cur= 0; cur < (PRInt32)aTextLen2; cur++)
|
||||
{
|
||||
if (IS_SPACE(aText2[cur]))
|
||||
break;
|
||||
if (IS_CJK_CHAR(aText2[cur]))
|
||||
goto ROUTE_CJK_BETWEEN;
|
||||
}
|
||||
|
||||
//now apply western rule.
|
||||
return IS_SPACE(aText1[aTextLen1-1]) || IS_SPACE(aText2[0]);
|
||||
|
||||
ROUTE_CJK_BETWEEN:
|
||||
|
||||
PRInt8 c1, c2;
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText1[aTextLen1-1]))
|
||||
c1 = ContextualAnalysis((aTextLen1>1)?aText1[aTextLen1-2]:U_NULL,
|
||||
aText1[aTextLen1-1],
|
||||
aText2[0]);
|
||||
else
|
||||
c1 = GetClass(aText1[aTextLen1-1]);
|
||||
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText2[0]))
|
||||
c2 = ContextualAnalysis(aText1[aTextLen1-1],
|
||||
aText2[0],
|
||||
(aTextLen2>1)?aText2[1]:U_NULL);
|
||||
else
|
||||
c2 = GetClass(aText2[0]);
|
||||
|
||||
/* Handle cases for THAI */
|
||||
if((CLASS_THAI == c1) && (CLASS_THAI == c2))
|
||||
{
|
||||
return (0 == TrbWordBreakPos(aText1, aTextLen1, aText2, aTextLen2));
|
||||
}
|
||||
else
|
||||
{
|
||||
return GetPair(c1,c2);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PRInt32 nsJISx4051LineBreaker::Next(
|
||||
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos)
|
||||
{
|
||||
NS_ASSERTION(aText, "aText shouldn't be null");
|
||||
NS_ASSERTION(aLen > aPos, "Illegal value (length > position)");
|
||||
|
||||
//forward check for CJK characters until a space is found.
|
||||
//if CJK char is found before space, use 4051, otherwise western
|
||||
PRUint32 cur;
|
||||
for (cur = aPos; cur < aLen; ++cur)
|
||||
{
|
||||
if (IS_SPACE(aText[cur]))
|
||||
return cur;
|
||||
if (IS_CJK_CHAR(aText[cur]))
|
||||
goto ROUTE_CJK_NEXT;
|
||||
}
|
||||
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
|
||||
|
||||
ROUTE_CJK_NEXT:
|
||||
PRInt8 c1, c2;
|
||||
cur = aPos;
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
|
||||
{
|
||||
c1 = ContextualAnalysis((cur>0)?aText[cur-1]:U_NULL,
|
||||
aText[cur],
|
||||
(cur<(aLen-1)) ?aText[cur+1]:U_NULL);
|
||||
} else {
|
||||
c1 = GetClass(aText[cur]);
|
||||
}
|
||||
|
||||
if(CLASS_THAI == c1)
|
||||
return PRUint32(TrbFollowing(aText, aLen, aPos));
|
||||
|
||||
for(cur++; cur <aLen; cur++)
|
||||
{
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
|
||||
{
|
||||
c2 = ContextualAnalysis((cur>0)?aText[cur-1]:U_NULL,
|
||||
aText[cur],
|
||||
(cur<(aLen-1)) ?aText[cur+1]:U_NULL);
|
||||
} else {
|
||||
c2 = GetClass(aText[cur]);
|
||||
}
|
||||
|
||||
if(GetPair(c1, c2)) {
|
||||
return cur;
|
||||
}
|
||||
c1 = c2;
|
||||
}
|
||||
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
|
||||
PRInt32 nextPos = WordMove(aText, aLen, aPos, 1);
|
||||
return nextPos < PRInt32(aLen) ? nextPos : NS_LINEBREAKER_NEED_MORE_TEXT;
|
||||
}
|
||||
|
||||
PRInt32 nsJISx4051LineBreaker::Prev(
|
||||
const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos)
|
||||
{
|
||||
NS_ASSERTION(aText, "aText shouldn't be null");
|
||||
NS_ASSERTION(aLen >= aPos, "Illegal value (length >= position)");
|
||||
|
||||
//backward check for CJK characters until a space is found.
|
||||
//if CJK char is found before space, use 4051, otherwise western
|
||||
PRUint32 cur;
|
||||
for (cur = aPos - 1; cur > 0; --cur)
|
||||
{
|
||||
if (IS_SPACE(aText[cur]))
|
||||
{
|
||||
if (cur != aPos - 1) // XXXldb Why?
|
||||
++cur;
|
||||
return cur;
|
||||
}
|
||||
if (IS_CJK_CHAR(aText[cur]))
|
||||
goto ROUTE_CJK_PREV;
|
||||
}
|
||||
|
||||
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
|
||||
|
||||
ROUTE_CJK_PREV:
|
||||
cur = aPos;
|
||||
PRInt8 c1, c2;
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
|
||||
{
|
||||
c2 = ContextualAnalysis(((cur-1)>0)?aText[cur-2]:U_NULL,
|
||||
aText[cur-1],
|
||||
(cur<aLen) ?aText[cur]:U_NULL);
|
||||
} else {
|
||||
c2 = GetClass(aText[cur-1]);
|
||||
}
|
||||
// To Do:
|
||||
//
|
||||
// Should handle CLASS_THAI here
|
||||
//
|
||||
for(cur--; cur > 0; cur--)
|
||||
{
|
||||
if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
|
||||
{
|
||||
c1 = ContextualAnalysis(((cur-1)>0)?aText[cur-2]:U_NULL,
|
||||
aText[cur-1],
|
||||
(cur<aLen) ?aText[cur]:U_NULL);
|
||||
} else {
|
||||
c1 = GetClass(aText[cur-1]);
|
||||
}
|
||||
|
||||
if(GetPair(c1, c2)) {
|
||||
return cur;
|
||||
}
|
||||
c2 = c1;
|
||||
}
|
||||
return NS_LINEBREAKER_NEED_MORE_TEXT; // Need more text
|
||||
PRInt32 prevPos = WordMove(aText, aLen, aPos, -1);
|
||||
return prevPos > 0 ? prevPos : NS_LINEBREAKER_NEED_MORE_TEXT;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -604,16 +499,29 @@ nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUnichar* aChars, PRUint32 aLeng
|
|||
|
||||
PRBool allowBreak;
|
||||
if (cur > 0) {
|
||||
if (CLASS_THAI == lastClass && CLASS_THAI == cl) {
|
||||
allowBreak = 0 == TrbWordBreakPos(aChars, cur, aChars + cur, aLength - cur);
|
||||
} else {
|
||||
allowBreak = GetPair(lastClass, cl);
|
||||
}
|
||||
NS_ASSERTION(CLASS_COMPLEX != lastClass || CLASS_COMPLEX != cl,
|
||||
"Loop should have prevented adjacent complex chars here");
|
||||
allowBreak = GetPair(lastClass, cl);
|
||||
} else {
|
||||
allowBreak = PR_FALSE;
|
||||
}
|
||||
aBreakBefore[cur] = allowBreak;
|
||||
lastClass = cl;
|
||||
if (CLASS_COMPLEX == cl) {
|
||||
PRUint32 end = cur + 1;
|
||||
|
||||
while (end < aLength && CLASS_COMPLEX == GetClass(aChars[end])) {
|
||||
++end;
|
||||
}
|
||||
|
||||
NS_GetComplexLineBreaks(aChars + cur, end - cur, aBreakBefore + cur);
|
||||
|
||||
// restore breakability at chunk begin, which was always set to false
|
||||
// by the complex line breaker
|
||||
aBreakBefore[cur] = allowBreak;
|
||||
|
||||
cur = end - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -48,9 +48,6 @@ public:
|
|||
nsJISx4051LineBreaker();
|
||||
virtual ~nsJISx4051LineBreaker();
|
||||
|
||||
PRBool BreakInBetween( const PRUnichar* aText1 , PRUint32 aTextLen1,
|
||||
const PRUnichar* aText2 , PRUint32 aTextLen2);
|
||||
|
||||
PRInt32 Next( const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
|
||||
|
||||
PRInt32 Prev( const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos);
|
||||
|
@ -59,6 +56,10 @@ public:
|
|||
PRPackedBool* aBreakBefore);
|
||||
virtual void GetJISx4051Breaks(const PRUint8* aText, PRUint32 aLength,
|
||||
PRPackedBool* aBreakBefore);
|
||||
|
||||
private:
|
||||
PRInt32 WordMove(const PRUnichar* aText, PRUint32 aLen, PRUint32 aPos,
|
||||
PRInt8 aDirection);
|
||||
};
|
||||
|
||||
#endif /* nsJISx4501LineBreaker_h__ */
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is mozilla.org code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Theppitak Karoonboonyanan <thep@linux.thai.net>.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2007
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* - Theppitak Karoonboonyanan <thep@linux.thai.net>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either of the GNU General Public License Version 2 or later (the "GPL"),
|
||||
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "nsComplexBreaker.h"
|
||||
|
||||
#include <pango/pango-break.h>
|
||||
#include "nsUTF8Utils.h"
|
||||
#include "nsString.h"
|
||||
#include "nsTArray.h"
|
||||
|
||||
void
|
||||
NS_GetComplexLineBreaks(const PRUnichar* aText, PRUint32 aLength,
|
||||
PRPackedBool* aBreakBefore)
|
||||
{
|
||||
NS_ASSERTION(aText, "aText shouldn't be null");
|
||||
|
||||
nsAutoTArray<PangoLogAttr, 2000> attrBuffer;
|
||||
if (!attrBuffer.AppendElements(aLength + 1))
|
||||
{
|
||||
// out of memory, behave as if there were no complex line breaker
|
||||
for (PRUint32 i = 0; i < aLength; ++i) {
|
||||
aBreakBefore[i] = PR_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
NS_ConvertUTF16toUTF8 aUTF8(aText, aLength);
|
||||
|
||||
const gchar* p = aUTF8.Data();
|
||||
const gchar* end = p + aUTF8.Length();
|
||||
PRUint32 u16Offset = 0;
|
||||
|
||||
static PangoLanguage* language = pango_language_from_string("en");
|
||||
|
||||
while (p < end)
|
||||
{
|
||||
PangoLogAttr* attr = attrBuffer.Elements();
|
||||
pango_get_log_attrs(p, end - p, -1, language, attr, attrBuffer.Length());
|
||||
|
||||
while (p < end)
|
||||
{
|
||||
aBreakBefore[u16Offset] = attr->is_line_break;
|
||||
if (NS_IS_LOW_SURROGATE(aText[u16Offset]))
|
||||
aBreakBefore[++u16Offset] = PR_FALSE; // Skip high surrogate
|
||||
++u16Offset;
|
||||
|
||||
PRUint32 ch = UTF8CharEnumerator::NextChar(&p, end);
|
||||
++attr;
|
||||
|
||||
if (ch == 0) {
|
||||
// pango_break (pango 1.16.2) only analyses text before the
|
||||
// first NUL (but sets one extra attr). Workaround loop to call
|
||||
// pango_break again to analyse after the NUL is done somewhere else
|
||||
// (gfx/thebes/src/gfxPangoFonts.cpp: SetupClusterBoundaries()).
|
||||
// So, we do the same here for pango_get_log_attrs.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is mozilla.org code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Theppitak Karoonboonyanan <thep@linux.thai.net>.
|
||||
* Portions created by the Initial Developer are Copyright (C) 2007
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* - Theppitak Karoonboonyanan <thep@linux.thai.net>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either of the GNU General Public License Version 2 or later (the "GPL"),
|
||||
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "nsComplexBreaker.h"
|
||||
|
||||
#define TH_UNICODE
|
||||
#include "rulebrk.h"
|
||||
|
||||
void
|
||||
NS_GetComplexLineBreaks(const PRUnichar* aText, PRUint32 aLength,
|
||||
PRPackedBool* aBreakBefore)
|
||||
{
|
||||
NS_ASSERTION(aText, "aText shouldn't be null");
|
||||
|
||||
for (PRUint32 i = 0; i < aLength; i++)
|
||||
aBreakBefore[i] = (0 == TrbWordBreakPos(aText, i, aText + i, aLength - i));
|
||||
}
|
||||
|
|
@ -54,9 +54,9 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>14</TD>
|
||||
<TD>2</TD>
|
||||
<TD>3</TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>16</TD>
|
||||
<TD BGCOLOR=white>17</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -79,7 +79,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD>13</TD>
|
||||
<TD>1</TD>
|
||||
<TD>2</TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
|
@ -89,20 +89,20 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
</TR>
|
||||
<TR><TH>01_[a]<TH>
|
||||
<TD></TD>
|
||||
<TD>27</TD>
|
||||
<TD>31</TD>
|
||||
<TD>2</TD>
|
||||
<TD></TD>
|
||||
<TD>30</TD>
|
||||
<TD>32</TD>
|
||||
<TD>6</TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>65</TD>
|
||||
<TD BGCOLOR=white>71</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>5</TD>
|
||||
<TD>22</TD>
|
||||
<TD>7</TD>
|
||||
<TD>24</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -116,7 +116,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD>14</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>14</TD>
|
||||
<TD>16</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>2</TD>
|
||||
|
@ -286,11 +286,11 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>10</TD>
|
||||
<TD>30</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD BGCOLOR=white>10</TD>
|
||||
<TD BGCOLOR=white>30</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -303,7 +303,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>10</TD>
|
||||
<TD>30</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -362,22 +362,22 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
</TR>
|
||||
<TR><TH>08_18<TH>
|
||||
<TD>10</TD>
|
||||
<TD>660</TD>
|
||||
<TD>659</TD>
|
||||
<TD>4</TD>
|
||||
<TD>130</TD>
|
||||
<TD>55</TD>
|
||||
<TD>940</TD>
|
||||
<TD>56</TD>
|
||||
<TD>941</TD>
|
||||
<TD>2</TD>
|
||||
<TD BGCOLOR=white>1801</TD>
|
||||
<TD BGCOLOR=white>1802</TD>
|
||||
<TD></TD>
|
||||
<TD>10</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>367</TD>
|
||||
<TD>368</TD>
|
||||
<TD>1</TD>
|
||||
<TD>5</TD>
|
||||
<TD>4</TD>
|
||||
<TD></TD>
|
||||
<TD>287</TD>
|
||||
<TD>286</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>4</TD>
|
||||
|
@ -389,17 +389,17 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD>3</TD>
|
||||
<TD>4</TD>
|
||||
<TD>6</TD>
|
||||
<TD>29</TD>
|
||||
<TD>30</TD>
|
||||
<TD>5</TD>
|
||||
<TD>12</TD>
|
||||
<TD>10</TD>
|
||||
<TD>273</TD>
|
||||
<TD>645</TD>
|
||||
<TD>646</TD>
|
||||
<TD>1</TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>09_nbsp<TH>
|
||||
<TR><TH>09_COMPLEX<TH>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
|
@ -489,7 +489,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD BGCOLOR=red>06_15</TD>
|
||||
<TD BGCOLOR=red>07_16</TD>
|
||||
<TD BGCOLOR=red>08_18</TD>
|
||||
<TD BGCOLOR=red>09_nbsp</TD>
|
||||
<TD BGCOLOR=red>09_COMPLEX</TD>
|
||||
<TD BGCOLOR=red>X</TD>
|
||||
</TR>
|
||||
<TR><TH>00<TH>
|
||||
|
@ -557,6 +557,19 @@ Analysis of JIS X 4051 to Unicode General Category Mapping
|
|||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>0E<TH>
|
||||
<TD>1</TD>
|
||||
<TD>6</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
<TD>20</TD>
|
||||
<TD></TD>
|
||||
<TD>1</TD>
|
||||
<TD></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TH>20<TH>
|
||||
<TD></TD>
|
||||
<TD>5</TD>
|
||||
|
|
|
@ -400,6 +400,7 @@ printarray("00", "8");
|
|||
printarray("20", "8");
|
||||
printarray("21", "8");
|
||||
printarray("30", "5");
|
||||
printarray("0E", "9");
|
||||
|
||||
#print %rangecount;
|
||||
|
||||
|
|
|
@ -190,3 +190,12 @@
|
|||
2776;2794;18
|
||||
2798;27AF;18
|
||||
27B1;27BE;18
|
||||
0E3F;;1
|
||||
0E2F;;4
|
||||
0E46;;4
|
||||
0E5A;0E5B;4
|
||||
0E50;0E59;15
|
||||
0E4F;;18
|
||||
0EAF;;4
|
||||
0EC6;;4
|
||||
0ED0;0ED9;15
|
||||
|
|
|
@ -18,4 +18,4 @@
|
|||
18;08_18
|
||||
19;X
|
||||
20;X
|
||||
21;09_nbsp
|
||||
21;09_COMPLEX
|
||||
|
|
Загрузка…
Ссылка в новой задаче