diff --git a/intl/lwbrk/src/jisx4501class.h b/intl/lwbrk/src/jisx4501class.h index e3032db9b21..5f6483813c6 100644 --- a/intl/lwbrk/src/jisx4501class.h +++ b/intl/lwbrk/src/jisx4501class.h @@ -46,7 +46,7 @@ static const PRUint32 gLBClass00[32] = { 0x7AABAAA5, // U+0020 - U+0027 0x7A7AAAA9, // U+0028 - U+002F 0x66666666, // U+0030 - U+0037 -0x1AA9AA66, // U+0038 - U+003F +0xAAA9AA66, // U+0038 - U+003F 0x77777777, // U+0040 - U+0047 0x77777777, // U+0048 - U+004F 0x77777777, // U+0050 - U+0057 @@ -76,9 +76,9 @@ static const PRUint32 gLBClass00[32] = { static const PRUint32 gLBClass20[32] = { 0xB5555555, // U+2000 - U+2007 0x77775555, // U+2008 - U+200F -0x777211B1, // U+2010 - U+2017 +0x777277B7, // U+2010 - U+2017 0x77777777, // U+2018 - U+201F -0xA2227777, // U+2020 - U+2027 +0xAAAA7777, // U+2020 - U+2027 0xB7777777, // U+2028 - U+202F 0x77744444, // U+2030 - U+2037 0x7A115107, // U+2038 - U+203F diff --git a/intl/lwbrk/src/nsJISx4501LineBreaker.cpp b/intl/lwbrk/src/nsJISx4501LineBreaker.cpp index b1fd3ea6fba..1e279da35ca 100644 --- a/intl/lwbrk/src/nsJISx4501LineBreaker.cpp +++ b/intl/lwbrk/src/nsJISx4501LineBreaker.cpp @@ -382,12 +382,14 @@ static const PRUint16 gPairConservative[MAX_CLASSES] = { #define U_EQUAL PRUnichar('=') #define U_PERCENT PRUnichar('%') #define U_AMPERSAND PRUnichar('&') +#define U_SEMICOLON PRUnichar(';') #define U_BACKSLASH PRUnichar('\\') #define NEED_CONTEXTUAL_ANALYSIS(c) (IS_HYPHEN(c) || \ (c) == U_SLASH || \ (c) == U_PERCENT || \ (c) == U_AMPERSAND || \ + (c) == U_SEMICOLON || \ (c) == U_BACKSLASH) #define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039) @@ -431,7 +433,8 @@ IS_HYPHEN(PRUnichar u) return (u == U_HYPHEN || u == 0x058A || // ARMENIAN HYPHEN u == 0x2010 || // HYPHEN - u == 0x2012); // FIGURE DASH + u == 0x2012 || // FIGURE DASH + u == 0x2013); // EN DASH } static PRInt8 @@ -608,11 +611,9 @@ public: // is near it. // Note that index is always larger than CONSERVATIVE_BREAK_RANGE here. - for (PRUint32 i = index - 1; index - CONSERVATIVE_BREAK_RANGE < i; --i) { - if (IS_NONBREAKABLE_SPACE(GetCharAt(i))) + for (PRUint32 i = index; index - CONSERVATIVE_BREAK_RANGE < i; --i) { + if (IS_NONBREAKABLE_SPACE(GetCharAt(i - 1))) return PR_TRUE; - if (i == 0) - break; } // Note that index is always less than mLength - CONSERVATIVE_BREAK_RANGE. for (PRUint32 i = index + 1; i < index + CONSERVATIVE_BREAK_RANGE; ++i) { @@ -624,13 +625,9 @@ public: PRBool HasCharacterAlready(PRUnichar aCh) { // Be careful for the index being unsigned. - if (mIndex == 0) - return PR_FALSE; - for (PRUint32 i = mIndex - 1; 0 < i; --i) { - if (GetCharAt(i) == aCh) + for (PRUint32 i = mIndex; i > 0; --i) { + if (GetCharAt(i - 1) == aCh) return PR_TRUE; - if (i == 0) - break; } return PR_FALSE; } @@ -639,14 +636,10 @@ public: NS_ASSERTION(IS_HYPHEN(GetCharAt(mIndex)), "current character isn't hyphen"); // Be careful for the index being unsigned. - if (mIndex == 0) - return PR_FALSE; - for (PRUint32 i = mIndex - 1; 0 < i; --i) { - PRUnichar ch = GetCharAt(i); + for (PRUint32 i = mIndex; i > 0; --i) { + PRUnichar ch = GetCharAt(i - 1); if (!IS_HYPHEN(ch)) return ch; - if (i == 0) - break; } return U_NULL; } @@ -725,7 +718,7 @@ ContextualAnalysis(PRUnichar prev, PRUnichar cur, PRUnichar next, aState.GetCharAt(aState.Index() + 3) == U_PERCENT) return CLASS_OPEN; } - } else if (cur == U_AMPERSAND) { + } else if (cur == U_AMPERSAND || cur == U_SEMICOLON) { // If this may be a separator of params of URL, we should break after. if (!aState.UseConservativeBreaking(1) && aState.HasCharacterAlready(U_EQUAL)) diff --git a/intl/lwbrk/tools/anzx4501.html b/intl/lwbrk/tools/anzx4501.html index 44da8ee3917..9e5f900cf43 100644 --- a/intl/lwbrk/tools/anzx4501.html +++ b/intl/lwbrk/tools/anzx4501.html @@ -92,10 +92,10 @@ Analysis of JIS X 4051 to Unicode General Category Mapping