diff --git a/intl/lwbrk/src/jisx4501class.h b/intl/lwbrk/src/jisx4501class.h index e3032db9b21..5f6483813c6 100644 --- a/intl/lwbrk/src/jisx4501class.h +++ b/intl/lwbrk/src/jisx4501class.h @@ -46,7 +46,7 @@ static const PRUint32 gLBClass00[32] = { 0x7AABAAA5, // U+0020 - U+0027 0x7A7AAAA9, // U+0028 - U+002F 0x66666666, // U+0030 - U+0037 -0x1AA9AA66, // U+0038 - U+003F +0xAAA9AA66, // U+0038 - U+003F 0x77777777, // U+0040 - U+0047 0x77777777, // U+0048 - U+004F 0x77777777, // U+0050 - U+0057 @@ -76,9 +76,9 @@ static const PRUint32 gLBClass00[32] = { static const PRUint32 gLBClass20[32] = { 0xB5555555, // U+2000 - U+2007 0x77775555, // U+2008 - U+200F -0x777211B1, // U+2010 - U+2017 +0x777277B7, // U+2010 - U+2017 0x77777777, // U+2018 - U+201F -0xA2227777, // U+2020 - U+2027 +0xAAAA7777, // U+2020 - U+2027 0xB7777777, // U+2028 - U+202F 0x77744444, // U+2030 - U+2037 0x7A115107, // U+2038 - U+203F diff --git a/intl/lwbrk/src/nsJISx4501LineBreaker.cpp b/intl/lwbrk/src/nsJISx4501LineBreaker.cpp index b1fd3ea6fba..1e279da35ca 100644 --- a/intl/lwbrk/src/nsJISx4501LineBreaker.cpp +++ b/intl/lwbrk/src/nsJISx4501LineBreaker.cpp @@ -382,12 +382,14 @@ static const PRUint16 gPairConservative[MAX_CLASSES] = { #define U_EQUAL PRUnichar('=') #define U_PERCENT PRUnichar('%') #define U_AMPERSAND PRUnichar('&') +#define U_SEMICOLON PRUnichar(';') #define U_BACKSLASH PRUnichar('\\') #define NEED_CONTEXTUAL_ANALYSIS(c) (IS_HYPHEN(c) || \ (c) == U_SLASH || \ (c) == U_PERCENT || \ (c) == U_AMPERSAND || \ + (c) == U_SEMICOLON || \ (c) == U_BACKSLASH) #define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039) @@ -431,7 +433,8 @@ IS_HYPHEN(PRUnichar u) return (u == U_HYPHEN || u == 0x058A || // ARMENIAN HYPHEN u == 0x2010 || // HYPHEN - u == 0x2012); // FIGURE DASH + u == 0x2012 || // FIGURE DASH + u == 0x2013); // EN DASH } static PRInt8 @@ -608,11 +611,9 @@ public: // is near it. // Note that index is always larger than CONSERVATIVE_BREAK_RANGE here. - for (PRUint32 i = index - 1; index - CONSERVATIVE_BREAK_RANGE < i; --i) { - if (IS_NONBREAKABLE_SPACE(GetCharAt(i))) + for (PRUint32 i = index; index - CONSERVATIVE_BREAK_RANGE < i; --i) { + if (IS_NONBREAKABLE_SPACE(GetCharAt(i - 1))) return PR_TRUE; - if (i == 0) - break; } // Note that index is always less than mLength - CONSERVATIVE_BREAK_RANGE. for (PRUint32 i = index + 1; i < index + CONSERVATIVE_BREAK_RANGE; ++i) { @@ -624,13 +625,9 @@ public: PRBool HasCharacterAlready(PRUnichar aCh) { // Be careful for the index being unsigned. - if (mIndex == 0) - return PR_FALSE; - for (PRUint32 i = mIndex - 1; 0 < i; --i) { - if (GetCharAt(i) == aCh) + for (PRUint32 i = mIndex; i > 0; --i) { + if (GetCharAt(i - 1) == aCh) return PR_TRUE; - if (i == 0) - break; } return PR_FALSE; } @@ -639,14 +636,10 @@ public: NS_ASSERTION(IS_HYPHEN(GetCharAt(mIndex)), "current character isn't hyphen"); // Be careful for the index being unsigned. - if (mIndex == 0) - return PR_FALSE; - for (PRUint32 i = mIndex - 1; 0 < i; --i) { - PRUnichar ch = GetCharAt(i); + for (PRUint32 i = mIndex; i > 0; --i) { + PRUnichar ch = GetCharAt(i - 1); if (!IS_HYPHEN(ch)) return ch; - if (i == 0) - break; } return U_NULL; } @@ -725,7 +718,7 @@ ContextualAnalysis(PRUnichar prev, PRUnichar cur, PRUnichar next, aState.GetCharAt(aState.Index() + 3) == U_PERCENT) return CLASS_OPEN; } - } else if (cur == U_AMPERSAND) { + } else if (cur == U_AMPERSAND || cur == U_SEMICOLON) { // If this may be a separator of params of URL, we should break after. if (!aState.UseConservativeBreaking(1) && aState.HasCharacterAlready(U_EQUAL)) diff --git a/intl/lwbrk/tools/anzx4501.html b/intl/lwbrk/tools/anzx4501.html index 44da8ee3917..9e5f900cf43 100644 --- a/intl/lwbrk/tools/anzx4501.html +++ b/intl/lwbrk/tools/anzx4501.html @@ -92,10 +92,10 @@ Analysis of JIS X 4051 to Unicode General Category Mapping 31 2 -28 +24 3 -64 +60 @@ -112,11 +112,11 @@ Analysis of JIS X 4051 to Unicode General Category Mapping -4 +1 12 1 -11 +10 2 @@ -131,10 +131,10 @@ Analysis of JIS X 4051 to Unicode General Category Mapping -4 +1 -4 +1 @@ -155,7 +155,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping -3 + @@ -326,10 +326,10 @@ Analysis of JIS X 4051 to Unicode General Category Mapping 157 33 -54 +57 125 3 -391 +394 19 @@ -346,7 +346,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping 3 30 4 -2 +5 2 2 4 @@ -443,10 +443,10 @@ Analysis of JIS X 4051 to Unicode General Category Mapping 2 6 -21 +25 16 -46 +50 1 @@ -467,7 +467,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping 3 1 1 -16 +20 2 3 @@ -574,7 +574,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping 00 -1 + @@ -583,7 +583,7 @@ Analysis of JIS X 4051 to Unicode General Category Mapping 126 7 -44 +45 2 @@ -604,16 +604,16 @@ Analysis of JIS X 4051 to Unicode General Category Mapping 20 2 -11 -4 +8 +1 5 12 -101 +104 -2 +5 3 diff --git a/intl/lwbrk/tools/jisx4501class.txt b/intl/lwbrk/tools/jisx4501class.txt index 81e1c29706b..ac1e19bee01 100644 --- a/intl/lwbrk/tools/jisx4501class.txt +++ b/intl/lwbrk/tools/jisx4501class.txt @@ -8,7 +8,6 @@ 0021;002F;23 0030;0039;15 003C;;22 -003F;;4 003A;003F;23 0040;;18 0041;005A;18 @@ -44,15 +43,15 @@ 2007;;24 2000;200B;17 200C;200F;18 -2010;;2 +2010;;18 2011;;24 -2012;2013;2 +2012;2013;18 2014;;7 2015;;18 2016;2017;18 2018;201F;18 2020;2023;18 -2024;2026;7 +2024;2026;23 2027;;23 2028;202E;18 202F;;24